VarAction2: Add a JZ operation, use for ternary style mul masking

pull/400/head
Jonathan G Rennison 2 years ago
parent 8687b82dd8
commit 13fcde9ea0

@ -34,6 +34,7 @@ enum NewGRFOptimiserFlags {
NGOF_NO_OPT_VARACT2_EXPENSIVE_VARS = 3,
NGOF_NO_OPT_VARACT2_SIMPLIFY_STORES = 4,
NGOF_NO_OPT_VARACT2_ADJUST_ORDERING = 5,
NGOF_NO_OPT_VARACT2_INSERT_JUMPS = 6,
};
inline bool HasGrfOptimiserFlag(NewGRFOptimiserFlags flag)

@ -136,7 +136,6 @@ public:
btree::btree_map<const SpriteGroup *, VarAction2GroupVariableTracking *> group_temp_store_variable_tracking;
UniformArenaAllocator<sizeof(VarAction2GroupVariableTracking), 1024> group_temp_store_variable_tracking_storage;
std::vector<DeterministicSpriteGroup *> dead_store_elimination_candidates;
std::vector<DeterministicSpriteGroup *> pending_expensive_var_checks;
VarAction2GroupVariableTracking *GetVarAction2GroupVariableTracking(const SpriteGroup *group, bool make_new)
{
@ -166,7 +165,6 @@ public:
this->group_temp_store_variable_tracking.clear();
this->group_temp_store_variable_tracking_storage.EmptyArena();
this->dead_store_elimination_candidates.clear();
this->pending_expensive_var_checks.clear();
}
/**
@ -6625,6 +6623,7 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
}
if (adjust.operation == DSGA_OP_MUL && adjust.variable != 0x7E) {
state.inference |= VA2AIF_MUL_BOOL;
adjust.adjust_flags |= DSGAF_BOOL_MUL_HINT;
}
}
} else {
@ -6924,6 +6923,7 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
}
if ((prev_inference & VA2AIF_ONE_OR_ZERO) || (non_const_var_inference & VA2AIF_ONE_OR_ZERO)) {
state.inference |= VA2AIF_MUL_BOOL;
adjust.adjust_flags |= DSGAF_BOOL_MUL_HINT;
}
break;
}
@ -7199,13 +7199,22 @@ static bool OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(Determi
}
int insert_pos = start;
uint32 and_mask = 0;
uint condition_depth = 0;
bool seen_first = false;
for (int j = end; j >= start; j--) {
DeterministicSpriteGroupAdjust &adjust = group->adjusts[j];
if (seen_first && adjust.operation == DSGA_OP_JZ && condition_depth > 0) {
/* Do not insert the STO_NC inside a conditional block when it is also needed outside the block */
condition_depth--;
insert_pos = j;
}
if (seen_first && adjust.adjust_flags & DSGAF_END_BLOCK) condition_depth++;
if (adjust.variable == target_var && adjust.parameter == target_param) {
and_mask |= adjust.and_mask << adjust.shift_num;
adjust.variable = 0x7D;
adjust.parameter = bit;
insert_pos = j;
seen_first = true;
}
}
DeterministicSpriteGroupAdjust load = {};
@ -7386,7 +7395,111 @@ static void OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(Determinist
});
adjust.operation = DSGA_OP_RST;
adjust.adjust_flags &= ~DSGAF_SKIP_ON_ZERO;
adjust.adjust_flags &= ~(DSGAF_SKIP_ON_ZERO | DSGAF_BOOL_MUL_HINT);
}
}
}
}
static bool TryCombineTempStoreLoadWithStoreSourceAdjust(DeterministicSpriteGroupAdjust &target, const DeterministicSpriteGroupAdjust *var_src, bool inverted)
{
DeterministicSpriteGroupAdjustType var_src_type = var_src->type;
if (inverted) {
switch (var_src_type) {
case DSGA_TYPE_EQ:
var_src_type = DSGA_TYPE_NEQ;
break;
case DSGA_TYPE_NEQ:
var_src_type = DSGA_TYPE_EQ;
break;
default:
/* Don't try to handle this case */
return false;
}
}
if (target.type == DSGA_TYPE_NONE && target.shift_num == 0 && (target.and_mask == 0xFFFFFFFF || (IsConstantComparisonAdjustType(var_src_type) && (target.and_mask & 1)))) {
target.type = var_src_type;
target.variable = var_src->variable;
target.shift_num = var_src->shift_num;
target.parameter = var_src->parameter;
target.and_mask = var_src->and_mask;
target.add_val = var_src->add_val;
target.divmod_val = var_src->divmod_val;
return true;
} else if (IsConstantComparisonAdjustType(target.type) && target.shift_num == 0 && (target.and_mask & 1) && target.add_val == 0 &&
IsConstantComparisonAdjustType(var_src_type)) {
/* DSGA_TYPE_EQ/NEQ on target are OK if add_val is 0 because this is a boolean invert/convert of the incoming DSGA_TYPE_EQ/NEQ */
if (target.type == DSGA_TYPE_EQ) {
target.type = InvertConstantComparisonAdjustType(var_src_type);
} else {
target.type = var_src_type;
}
target.variable = var_src->variable;
target.shift_num = var_src->shift_num;
target.parameter = var_src->parameter;
target.and_mask = var_src->and_mask;
target.add_val = var_src->add_val;
target.divmod_val = var_src->divmod_val;
return true;
} else if (var_src_type == DSGA_TYPE_NONE && (target.shift_num + var_src->shift_num) < 32) {
target.variable = var_src->variable;
target.parameter = var_src->parameter;
target.and_mask &= var_src->and_mask >> target.shift_num;
target.shift_num += var_src->shift_num;
return true;
}
return false;
}
static void OptimiseVarAction2DeterministicSpriteGroupInsertJumps(DeterministicSpriteGroup *group)
{
if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_INSERT_JUMPS)) return;
for (int i = (int)group->adjusts.size() - 1; i >= 1; i--) {
DeterministicSpriteGroupAdjust &adjust = group->adjusts[i];
if (adjust.adjust_flags & DSGAF_BOOL_MUL_HINT) {
std::bitset<256> ok_stores;
int j = i - 1;
while (j >= 0) {
DeterministicSpriteGroupAdjust &prev = group->adjusts[j];
/* Don't try to skip over: unpredictable or special stores, procedure calls, permanent stores, or another jump */
if (prev.operation == DSGA_OP_STO && (prev.type != DSGA_TYPE_NONE || prev.variable != 0x1A || prev.shift_num != 0 || prev.and_mask >= 0x100)) break;
if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val >= 0x100) break;
if (prev.operation == DSGA_OP_STOP) break;
if (prev.operation == DSGA_OP_JZ) break;
if (prev.variable == 0x7E) break;
/* Reached a store which can't be skipped over because the value is needed later */
if (prev.operation == DSGA_OP_STO && !ok_stores[prev.and_mask]) break;
if (prev.operation == DSGA_OP_STO_NC && !ok_stores[prev.divmod_val]) break;
if (prev.variable == 0x7D && (prev.adjust_flags & DSGAF_LAST_VAR_READ)) {
/* The stored value is no longer needed after this, we can skip the corresponding store */
ok_stores.set(prev.parameter & 0xFF, true);
}
j--;
}
if (j < i - 1) {
DeterministicSpriteGroupAdjust current = adjust;
current.operation = DSGA_OP_JZ;
current.adjust_flags &= ~(DSGAF_BOOL_MUL_HINT | DSGAF_SKIP_ON_ZERO);
group->adjusts[i - 1].adjust_flags |= DSGAF_END_BLOCK;
group->adjusts.erase(group->adjusts.begin() + i);
if (j >= 0 && current.variable == 0x7D && (current.adjust_flags & DSGAF_LAST_VAR_READ)) {
DeterministicSpriteGroupAdjust &prev = group->adjusts[j];
if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val == (current.parameter & 0xFF) &&
TryCombineTempStoreLoadWithStoreSourceAdjust(current, &prev, false)) {
/* Managed to extract source from immediately prior STO_NC, which can now be removed */
group->adjusts.erase(group->adjusts.begin() + j);
j--;
i--;
}
}
group->adjusts.insert(group->adjusts.begin() + j + 1, current);
i++;
}
}
}
@ -7526,7 +7639,7 @@ static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &
if (state.check_expensive_vars && !HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_EXPENSIVE_VARS)) {
if (dse_candidate) {
_cur.pending_expensive_var_checks.push_back(group);
group->dsg_flags |= DSGF_CHECK_EXPENSIVE_VARS;
} else {
OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group);
}
@ -7572,51 +7685,7 @@ static std::bitset<256> HandleVarAction2DeadStoreElimination(DeterministicSprite
if (var_src != nullptr && var_src->variable != 0x7C) {
/* Don't use variable 7C as we're not checking for store perms which may clobber the value here */
DeterministicSpriteGroupAdjustType var_src_type = var_src->type;
if (inverted) {
switch (var_src_type) {
case DSGA_TYPE_EQ:
var_src_type = DSGA_TYPE_NEQ;
break;
case DSGA_TYPE_NEQ:
var_src_type = DSGA_TYPE_EQ;
break;
default:
/* Don't try to handle this case */
return false;
}
}
if (target.type == DSGA_TYPE_NONE && target.shift_num == 0 && (target.and_mask == 0xFFFFFFFF || (IsConstantComparisonAdjustType(var_src_type) && (target.and_mask & 1)))) {
target.type = var_src_type;
target.variable = var_src->variable;
target.shift_num = var_src->shift_num;
target.parameter = var_src->parameter;
target.and_mask = var_src->and_mask;
target.add_val = var_src->add_val;
target.divmod_val = var_src->divmod_val;
return true;
} else if (IsConstantComparisonAdjustType(target.type) && target.shift_num == 0 && (target.and_mask & 1) && target.add_val == 0 &&
IsConstantComparisonAdjustType(var_src_type)) {
/* DSGA_TYPE_EQ/NEQ on target are OK if add_val is 0 because this is a boolean invert/convert of the incoming DSGA_TYPE_EQ/NEQ */
if (target.type == DSGA_TYPE_EQ) {
target.type = InvertConstantComparisonAdjustType(var_src_type);
} else {
target.type = var_src_type;
}
target.variable = var_src->variable;
target.shift_num = var_src->shift_num;
target.parameter = var_src->parameter;
target.and_mask = var_src->and_mask;
target.add_val = var_src->add_val;
target.divmod_val = var_src->divmod_val;
return true;
} else if (var_src_type == DSGA_TYPE_NONE && (target.shift_num + var_src->shift_num) < 32) {
target.variable = var_src->variable;
target.parameter = var_src->parameter;
target.and_mask &= var_src->and_mask >> target.shift_num;
target.shift_num += var_src->shift_num;
return true;
}
if (TryCombineTempStoreLoadWithStoreSourceAdjust(target, var_src, inverted)) return true;
}
return false;
};
@ -7778,7 +7847,10 @@ static std::bitset<256> HandleVarAction2DeadStoreElimination(DeterministicSprite
break;
}
}
if (add) substitution_candidates.push_back(adjust.parameter | (i << 8));
if (add) {
substitution_candidates.push_back(adjust.parameter | (i << 8));
const_cast<DeterministicSpriteGroupAdjust &>(adjust).adjust_flags |= DSGAF_LAST_VAR_READ;
}
} else {
bits.set(adjust.parameter, true);
}
@ -7850,10 +7922,12 @@ static void HandleVarAction2OptimisationPasses()
OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group);
OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(group);
}
for (DeterministicSpriteGroup *group : _cur.pending_expensive_var_checks) {
OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group);
if (!(group->dsg_flags & DSGF_NO_DSE)) {
OptimiseVarAction2DeterministicSpriteGroupInsertJumps(group);
}
if (group->dsg_flags & DSGF_CHECK_EXPENSIVE_VARS) {
OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group);
}
}
}

@ -18,6 +18,7 @@
#include "newgrf_extension.h"
#include "newgrf_industrytiles_analysis.h"
#include "scope.h"
#include "debug_settings.h"
#include "safeguards.h"
@ -152,10 +153,16 @@ static inline uint32 GetVariable(const ResolverObject &object, ScopeResolver *sc
return &this->default_scope;
}
struct ConditionalNestingState {
uint depth = 0;
uint skip_until_depth = 0;
bool skipping = false;
};
/* Evaluate an adjustment for a variable of the given size.
* U is the unsigned type and S is the signed type to use. */
template <typename U, typename S>
static U EvalAdjustT(const DeterministicSpriteGroupAdjust &adjust, ScopeResolver *scope, U last_value, uint32 value)
static U EvalAdjustT(const DeterministicSpriteGroupAdjust &adjust, ScopeResolver *scope, U last_value, uint32 value, ConditionalNestingState *cond_nesting_state = nullptr)
{
value >>= adjust.shift_num;
value &= adjust.and_mask;
@ -201,6 +208,17 @@ static U EvalAdjustT(const DeterministicSpriteGroupAdjust &adjust, ScopeResolver
case DSGA_OP_RSUB: return value - last_value;
case DSGA_OP_STO_NC: _temp_store.StoreValue(adjust.divmod_val, (S)value); return last_value;
case DSGA_OP_ABS: return ((S)last_value < 0) ? -((S)last_value) : (S)last_value;
case DSGA_OP_JZ: {
if (value == 0 && cond_nesting_state != nullptr) {
/* Jump */
cond_nesting_state->skip_until_depth = cond_nesting_state->depth - 1;
cond_nesting_state->skipping = true;
return 0;
} else {
/* Don't jump */
return last_value;
}
}
default: return value;
}
}
@ -227,7 +245,19 @@ const SpriteGroup *DeterministicSpriteGroup::Resolve(ResolverObject &object) con
ScopeResolver *scope = object.GetScope(this->var_scope);
ConditionalNestingState conditional_nesting = {};
for (const auto &adjust : this->adjusts) {
if (adjust.adjust_flags & DSGAF_END_BLOCK) {
conditional_nesting.depth--;
if (conditional_nesting.skipping && conditional_nesting.skip_until_depth == conditional_nesting.depth) {
/* End of block that was skipped */
conditional_nesting.skipping = false;
continue;
}
}
if (adjust.operation == DSGA_OP_JZ) conditional_nesting.depth++;
if (conditional_nesting.skipping) continue;
if ((adjust.adjust_flags & DSGAF_SKIP_ON_ZERO) && (last_value == 0)) continue;
if ((adjust.adjust_flags & DSGAF_SKIP_ON_LSB_SET) && (last_value & 1) != 0) continue;
@ -256,9 +286,9 @@ const SpriteGroup *DeterministicSpriteGroup::Resolve(ResolverObject &object) con
}
switch (this->size) {
case DSG_SIZE_BYTE: value = EvalAdjustT<uint8, int8> (adjust, scope, last_value, value); break;
case DSG_SIZE_WORD: value = EvalAdjustT<uint16, int16>(adjust, scope, last_value, value); break;
case DSG_SIZE_DWORD: value = EvalAdjustT<uint32, int32>(adjust, scope, last_value, value); break;
case DSG_SIZE_BYTE: value = EvalAdjustT<uint8, int8> (adjust, scope, last_value, value, &conditional_nesting); break;
case DSG_SIZE_WORD: value = EvalAdjustT<uint16, int16>(adjust, scope, last_value, value, &conditional_nesting); break;
case DSG_SIZE_DWORD: value = EvalAdjustT<uint32, int32>(adjust, scope, last_value, value, &conditional_nesting); break;
default: NOT_REACHED();
}
last_value = value;
@ -685,6 +715,7 @@ static const char *_dsg_op_special_names[] {
"RSUB",
"STO_NC",
"ABS",
"JZ",
};
static_assert(lengthof(_dsg_op_special_names) == DSGA_OP_SPECIAL_END - DSGA_OP_TERNARY);
@ -708,13 +739,18 @@ static char *GetAdjustOperationName(char *str, const char *last, DeterministicSp
return str + seprintf(str, last, "\?\?\?(0x%X)", operation);
}
static char *DumpSpriteGroupAdjust(char *p, const char *last, const DeterministicSpriteGroupAdjust &adjust, int padding, uint32 &highlight_tag)
static char *DumpSpriteGroupAdjust(char *p, const char *last, const DeterministicSpriteGroupAdjust &adjust, int padding, uint32 &highlight_tag, uint &conditional_indent)
{
if (adjust.variable == 0x7D) {
/* Temp storage load */
highlight_tag = (1 << 16) | (adjust.parameter & 0xFFFF);
}
p += seprintf(p, last, "%*s", padding, "");
for (uint i = 0; i < conditional_indent; i++) {
p += seprintf(p, last, "> ");
}
auto append_flags = [&]() {
if (adjust.adjust_flags & DSGAF_SKIP_ON_ZERO) {
p += seprintf(p, last, ", skip on zero");
@ -722,15 +758,31 @@ static char *DumpSpriteGroupAdjust(char *p, const char *last, const Deterministi
if (adjust.adjust_flags & DSGAF_SKIP_ON_LSB_SET) {
p += seprintf(p, last, ", skip on LSB set");
}
if (adjust.adjust_flags & DSGAF_LAST_VAR_READ && HasBit(_misc_debug_flags, MDF_NEWGRF_SG_DUMP_MORE_DETAIL)) {
p += seprintf(p, last, ", last var read");
}
if (adjust.adjust_flags & DSGAF_BOOL_MUL_HINT && HasBit(_misc_debug_flags, MDF_NEWGRF_SG_DUMP_MORE_DETAIL)) {
p += seprintf(p, last, ", bool mul hint");
}
if (adjust.adjust_flags & DSGAF_END_BLOCK) {
p += seprintf(p, last, ", end block");
}
};
if (adjust.operation == DSGA_OP_JZ) {
conditional_indent++;
}
if (adjust.adjust_flags & DSGAF_END_BLOCK) {
conditional_indent--;
}
if (adjust.operation == DSGA_OP_TERNARY) {
p += seprintf(p, last, "%*sTERNARY: true: %X, false: %X", padding, "", adjust.and_mask, adjust.add_val);
p += seprintf(p, last, "TERNARY: true: %X, false: %X", adjust.and_mask, adjust.add_val);
append_flags();
return p;
}
if (adjust.operation == DSGA_OP_ABS) {
p += seprintf(p, last, "%*sABS", padding, "");
p += seprintf(p, last, "ABS");
append_flags();
return p;
}
@ -738,7 +790,7 @@ static char *DumpSpriteGroupAdjust(char *p, const char *last, const Deterministi
/* Temp storage store */
highlight_tag = (1 << 16) | (adjust.and_mask & 0xFFFF);
}
p += seprintf(p, last, "%*svar: %X", padding, "", adjust.variable);
p += seprintf(p, last, "var: %X", adjust.variable);
if (adjust.variable == A2VRI_VEHICLE_CURRENT_SPEED_SCALED) {
p += seprintf(p, last, " (current_speed_scaled)");
} else if (adjust.variable >= 0x100) {
@ -860,12 +912,14 @@ void SpriteGroupDumper::DumpSpriteGroup(const SpriteGroup *sg, int padding, uint
if (dsg->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) p += seprintf(p, lastof(this->buffer), ", DSE_RD");
if (dsg->dsg_flags & DSGF_VAR_TRACKING_PENDING) p += seprintf(p, lastof(this->buffer), ", VAR_PENDING");
if (dsg->dsg_flags & DSGF_REQUIRES_VAR1C) p += seprintf(p, lastof(this->buffer), ", REQ_1C");
if (dsg->dsg_flags & DSGF_CHECK_EXPENSIVE_VARS) p += seprintf(p, lastof(this->buffer), ", CHECK_EXP_VAR");
}
print();
emit_start();
padding += 2;
uint conditional_indent = 0;
for (const auto &adjust : (*adjusts)) {
DumpSpriteGroupAdjust(this->buffer, lastof(this->buffer), adjust, padding, highlight_tag);
DumpSpriteGroupAdjust(this->buffer, lastof(this->buffer), adjust, padding, highlight_tag, conditional_indent);
print();
if (adjust.variable == 0x7E && adjust.subroutine != nullptr) {
this->DumpSpriteGroup(adjust.subroutine, padding + 5, 0);

@ -206,6 +206,7 @@ enum DeterministicSpriteGroupAdjustOperation : uint8 {
DSGA_OP_RSUB, ///< b - a
DSGA_OP_STO_NC, ///< store b into temporary storage, indexed by c. return a
DSGA_OP_ABS, ///< abs(a)
DSGA_OP_JZ, ///< jump to adjust after DSGAF_END_BLOCK marker (taking into account nesting) if b is zero. return 0 if jumped, return a if not jumped
DSGA_OP_SPECIAL_END,
};
@ -217,6 +218,9 @@ enum DeterministicSpriteGroupAdjustFlags : uint8 {
DSGAF_NONE = 0,
DSGAF_SKIP_ON_ZERO = 1 << 0,
DSGAF_SKIP_ON_LSB_SET = 1 << 1,
DSGAF_LAST_VAR_READ = 1 << 2,
DSGAF_BOOL_MUL_HINT = 1 << 3,
DSGAF_END_BLOCK = 1 << 4,
};
DECLARE_ENUM_AS_BIT_SET(DeterministicSpriteGroupAdjustFlags);
@ -441,6 +445,7 @@ enum DeterministicSpriteGroupFlags : uint8 {
DSGF_DSE_RECURSIVE_DISABLE = 1 << 1,
DSGF_VAR_TRACKING_PENDING = 1 << 2,
DSGF_REQUIRES_VAR1C = 1 << 3,
DSGF_CHECK_EXPENSIVE_VARS = 1 << 4,
};
DECLARE_ENUM_AS_BIT_SET(DeterministicSpriteGroupFlags)

Loading…
Cancel
Save