Skip to content
This repository has been archived by the owner on Jan 7, 2023. It is now read-only.

Commit

Permalink
Revert "FROMLIST: SQUASH: i965: SIMD32 selection heuristics"
Browse files Browse the repository at this point in the history
This patch is causing visual artifacts on Celadon home screen, which is
troubling because it is supposed to be disabled by default. Reverting until we
find a solution.
  • Loading branch information
strassek committed Jul 25, 2019
1 parent 88b6e29 commit 80f542c
Show file tree
Hide file tree
Showing 9 changed files with 7 additions and 152 deletions.
11 changes: 0 additions & 11 deletions src/intel/compiler/brw_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,6 @@ struct ra_regs;
struct nir_shader;
struct brw_program;

struct brw_simd32_heuristics_control {
bool grouped_sends_check;
int max_grouped_sends;
bool inst_count_check;
float inst_count_ratio;
bool mrt_check;
int max_mrts;
};

struct brw_compiler {
const struct gen_device_info *devinfo;

Expand Down Expand Up @@ -127,8 +118,6 @@ struct brw_compiler {
* whether nir_opt_large_constants will be run.
*/
bool supports_shader_constants;

struct brw_simd32_heuristics_control simd32_heuristics_control;
};

/**
Expand Down
63 changes: 6 additions & 57 deletions src/intel/compiler/brw_fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7933,8 +7933,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
char **error_str)
{
const struct gen_device_info *devinfo = compiler->devinfo;
bool simd16_failed = false;
bool simd16_spilled = false;

shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
brw_nir_lower_fs_inputs(shader, devinfo, key);
Expand Down Expand Up @@ -8000,30 +7998,20 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
shader_time_index16);
v16.import_uniforms(&v8);
if (!v16.run_fs(allow_spilling, use_rep_send)) {
simd16_failed = true;
compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",
v16.fail_msg);
} else {
simd16_spilled = v16.spilled_any_registers;
simd16_cfg = v16.cfg;
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
}
}

/* Currently, the compiler only supports SIMD32 on SNB+ */
const brw_simd32_heuristics_control *ctrl = &compiler->simd32_heuristics_control;
uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0;

if (v8.max_dispatch_width >= 32 && !use_rep_send &&
compiler->devinfo->gen >= 6 &&
(unlikely(INTEL_DEBUG & DEBUG_DO32) ||
(unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
!simd16_failed && !simd16_spilled &&
(!ctrl->mrt_check ||
(ctrl->mrt_check &&
u_count_bits64(&mrts) <= ctrl->max_mrts))))) {
unlikely(INTEL_DEBUG & DEBUG_DO32)) {
/* Try a SIMD32 compile */
fs_visitor v32(compiler, log_data, mem_ctx, key,
&prog_data->base, prog, shader, 32,
Expand All @@ -8034,12 +8022,9 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
"SIMD32 shader failed to compile: %s",
v32.fail_msg);
} else {
if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) ||
v32.run_heuristic(ctrl)) {
simd32_cfg = v32.cfg;
prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
}
simd32_cfg = v32.cfg;
prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
}
}

Expand Down Expand Up @@ -8118,49 +8103,13 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
}

if (simd32_cfg) {
uint32_t offset = g.generate_code(simd32_cfg, 32);

if (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
(unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
(!simd16_cfg ||
(simd16_cfg &&
(!ctrl->inst_count_check ||
(ctrl->inst_count_check &&
(float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= ctrl->inst_count_ratio)))))) {
prog_data->dispatch_32 = true;
prog_data->prog_offset_32 = offset;
}
prog_data->dispatch_32 = true;
prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32);
}

return g.get_assembly();
}

bool
fs_visitor::run_heuristic(const struct brw_simd32_heuristics_control *ctrl) {
int grouped_sends = 0;
int max_grouped_sends = 0;
bool pass = true;

foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->opcode >= SHADER_OPCODE_TEX && inst->opcode <= SHADER_OPCODE_SAMPLEINFO_LOGICAL) {
++grouped_sends;
} else if (grouped_sends > 0) {
if (grouped_sends > max_grouped_sends) {
max_grouped_sends = grouped_sends;
}
grouped_sends = 0;
}
}

if (ctrl->grouped_sends_check) {
if (max_grouped_sends > ctrl->max_grouped_sends) {
pass = false;
}
}

return pass;
}

fs_reg *
fs_visitor::emit_cs_work_group_id_setup()
{
Expand Down
4 changes: 0 additions & 4 deletions src/intel/compiler/brw_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,6 @@ class fs_visitor : public backend_shader
void dump_instruction(backend_instruction *inst);
void dump_instruction(backend_instruction *inst, FILE *file);

bool run_heuristic(const struct brw_simd32_heuristics_control *ctrl);

const void *const key;
const struct brw_sampler_prog_key_data *key_tex;

Expand Down Expand Up @@ -422,7 +420,6 @@ class fs_generator

void enable_debug(const char *shader_name);
int generate_code(const cfg_t *cfg, int dispatch_width);
int get_inst_count(int dispatch_width);
const unsigned *get_assembly();

private:
Expand Down Expand Up @@ -518,7 +515,6 @@ class fs_generator
struct brw_stage_prog_data * const prog_data;

unsigned dispatch_width; /**< 8, 16 or 32 */
int inst_count[3]; /* for 8, 16 and 32 */

exec_list discard_halt_patches;
unsigned promoted_constants;
Expand Down
12 changes: 0 additions & 12 deletions src/intel/compiler/brw_fs_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2256,8 +2256,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
fill_count, promoted_constants, before_size,
after_size);

inst_count[ffs(dispatch_width) - 4] = before_size / 16;

return start_offset;
}

Expand All @@ -2266,13 +2264,3 @@ fs_generator::get_assembly()
{
return brw_get_program(p, &prog_data->program_size);
}

int
fs_generator::get_inst_count(int dispatch_width)
{
if (dispatch_width == 8 || dispatch_width == 16 || dispatch_width == 32) {
return inst_count[ffs(dispatch_width) - 4];
} else {
return 0;
}
}
1 change: 0 additions & 1 deletion src/intel/dev/gen_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ static const struct debug_control debug_control[] = {
{ "color", DEBUG_COLOR },
{ "reemit", DEBUG_REEMIT },
{ "soft64", DEBUG_SOFT64 },
{ "heur32", DEBUG_HEUR32 },
{ NULL, 0 }
};

Expand Down
3 changes: 1 addition & 2 deletions src/intel/dev/gen_debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,14 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_COLOR (1ull << 40)
#define DEBUG_REEMIT (1ull << 41)
#define DEBUG_SOFT64 (1ull << 42)
#define DEBUG_HEUR32 (1ull << 43)

/* These flags are not compatible with the disk shader cache */
#define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME

/* These flags may affect program generation */
#define DEBUG_DISK_CACHE_MASK \
(DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \
DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64 | DEBUG_HEUR32)
DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64)

#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
Expand Down
13 changes: 0 additions & 13 deletions src/mesa/drivers/dri/i965/brw_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -914,19 +914,6 @@ brw_process_driconf_options(struct brw_context *brw)
ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
driComputeOptionsSha1(&brw->screen->optionCache,
ctx->Const.dri_config_options_sha1);

brw->screen->compiler->simd32_heuristics_control.grouped_sends_check =
driQueryOptionb(&brw->optionCache, "simd32_heuristic_grouped_check");
brw->screen->compiler->simd32_heuristics_control.max_grouped_sends =
driQueryOptioni(&brw->optionCache, "simd32_heuristic_grouped_sends");
brw->screen->compiler->simd32_heuristics_control.inst_count_check =
driQueryOptionb(&brw->optionCache, "simd32_heuristic_inst_check");
brw->screen->compiler->simd32_heuristics_control.inst_count_ratio =
driQueryOptionf(&brw->optionCache, "simd32_heuristic_inst_ratio");
brw->screen->compiler->simd32_heuristics_control.mrt_check =
driQueryOptionb(&brw->optionCache, "simd32_heuristic_mrt_check");
brw->screen->compiler->simd32_heuristics_control.max_mrts =
driQueryOptioni(&brw->optionCache, "simd32_heuristic_max_mrts");
}

GLboolean
Expand Down
27 changes: 0 additions & 27 deletions src/mesa/drivers/dri/i965/intel_screen.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,33 +62,6 @@ DRI_CONF_BEGIN
DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
DRI_CONF_DESC_END
DRI_CONF_OPT_END

DRI_CONF_OPT_BEGIN_B(simd32_heuristic_grouped_check, "true")
DRI_CONF_DESC(en, "Enable/disable grouped texture fetch "
"check in the SIMD32 selection heuristic.")
DRI_CONF_OPT_END
DRI_CONF_OPT_BEGIN_V(simd32_heuristic_grouped_sends, int, 6, "1:999")
DRI_CONF_DESC(en, "How many grouped texture fetches should "
"the SIMD32 selection heuristic allow.")
DRI_CONF_OPT_END
DRI_CONF_OPT_BEGIN_B(simd32_heuristic_inst_check, "true")
DRI_CONF_DESC(en, "Enable/disable SIMD32/SIMD16 instruction "
"count ratio check in the SIMD32 selection "
"heuristic.")
DRI_CONF_OPT_END
DRI_CONF_OPT_BEGIN_V(simd32_heuristic_inst_ratio, float, 2.3, "1:999")
DRI_CONF_DESC(en, "SIMD32/SIMD16 instruction count ratio "
"the SIMD32 selection heuristic should allow.")
DRI_CONF_OPT_END
DRI_CONF_OPT_BEGIN_B(simd32_heuristic_mrt_check, "true")
DRI_CONF_DESC(en, "Enable/disable MRT write check in the "
"SIMD32 selection heuristic.")
DRI_CONF_OPT_END
DRI_CONF_OPT_BEGIN_V(simd32_heuristic_max_mrts, int, 1, "1:8")
DRI_CONF_DESC(en, "How many MRT writes should the SIMD32 "
"selection heuristic allow.")
DRI_CONF_OPT_END

DRI_CONF_MESA_NO_ERROR("false")
DRI_CONF_MESA_GLTHREAD("false")
DRI_CONF_SECTION_END
Expand Down
25 changes: 0 additions & 25 deletions src/util/bitscan.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,31 +112,6 @@ u_bit_scan64(uint64_t *mask)
return i;
}

/* Count bits set in mask */
static inline int
u_count_bits(unsigned *mask)
{
unsigned v = *mask;
int c;
v = v - ((v >> 1) & 0x55555555);
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
v = (v + (v >> 4)) & 0xF0F0F0F;
c = (int)((v * 0x1010101) >> 24);
return c;
}

static inline int
u_count_bits64(uint64_t *mask)
{
uint64_t v = *mask;
int c;
v = v - ((v >> 1) & 0x5555555555555555ull);
v = (v & 0x3333333333333333ull) + ((v >> 2) & 0x3333333333333333ull);
v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full;
c = (int)((v * 0x101010101010101ull) >> 56);
return c;
}

/* Determine if an unsigned value is a power of two.
*
* \note
Expand Down

0 comments on commit 80f542c

Please sign in to comment.