diff --git a/lslopt/lsldeadcode.py b/lslopt/lsldeadcode.py new file mode 100644 index 0000000..65f8cb5 --- /dev/null +++ b/lslopt/lsldeadcode.py @@ -0,0 +1,504 @@ + +import lslfuncs +from lslfuncs import Key, Vector, Quaternion + +class deadcode(object): + # Functions that cause the rest of the current block to never be executed + # e.g. default { state_entry() { llResetScript(); llSetPos(<3,3,3>); } } + # in that example, llSetPos is dead code because llResetScript does not let + # it execute. llRemoveInventory(llGetScriptName()), llDetachFromAvatar() + # and llDie() come close, but evidence shows that it's not the case and the + # script can execute some more lines before stopping. + # llScriptState(..., FALSE) allows resuming after it, so whatever comes + # next isn't really dead code. + # TODO: check if there are any more than this one. + TerminatorFuncs = ('llResetScript',) + + # TODO: Share with lsloptimizer. + PythonType2LSL = {int: 'integer', float: 'float', + unicode: 'string', Key: 'key', Vector: 'vector', + Quaternion: 'rotation', list: 'list'} + + def MarkReferences(self, node): + """Marks each node it passes through as executed (X), and each variable + as read (R) (with count) and/or written (W) (with node where it is, or + False if written more than once) as appropriate. Traces execution to + determine if any part of the code is never executed. + """ + # The 'X' key, when present, indicates whether a node is executed. + # Its value means whether this instruction will proceed to the next + # (True: it will; False: it won't). + if 'X' in node: + return node['X'] # branch already analyzed + + nt = node['nt'] + child = node['ch'] if 'ch' in node else None + + # Control flow statements + + if nt == 'STSW': + node['X'] = False # Executed but path-breaking. + sym = self.symtab[0][node['name']] + if 'X' not in self.tree[sym['Loc']]: + self.MarkReferences(self.tree[sym['Loc']]) + + return False + + if nt == 'JUMP': + node['X'] = False # Executed but path-breaking. + sym = self.symtab[node['scope']][node['name']] + if 'R' in sym: + sym['R'] += 1 + else: + sym['R'] = 1 + return False + + if nt == 'RETURN': + node['X'] = False # Executed but path-breaking. + if child: + self.MarkReferences(child[0]) + return False + + if nt == 'IF': + # "When you get to a fork in the road, take it." + node['X'] = None # provisional value, refined later + self.MarkReferences(child[0]) + condnode = child[0] + if condnode['nt'] == 'CONST': + if lslfuncs.cond(condnode['value']): + # TRUE - 'then' branch always executed. + node['X'] = self.MarkReferences(child[1]) + return node['X'] + elif len(child) == 3: + # FALSE - 'else' branch always executed. + node['X'] = self.MarkReferences(child[2]) + return node['X'] + # else fall through + else: + cont = self.MarkReferences(child[1]) + if len(child) == 3: + if not cont: + cont = self.MarkReferences(child[2]) + node['X'] = cont + return cont + self.MarkReferences(child[2]) + node['X'] = True + return True + + if nt == 'WHILE': + node['X'] = None # provisional value, refined later + self.MarkReferences(child[0]) + + if child[0]['nt'] == 'CONST': + if lslfuncs.cond(child[0]['value']): + # Infinite loop - unless it returns, it stops + # execution. But it is executed itself. + self.MarkReferences(child[1]) + node['X'] = False + return node['X'] + # else the inside isn't executed at all, so don't mark it + else: + self.MarkReferences(child[1]) + node['X'] = True + return True + + if nt == 'DO': + node['X'] = None # provisional value, refined later + if not self.MarkReferences(child[0]): + node['X'] = False + return False + self.MarkReferences(child[1]) + # It proceeds to the next statement unless it's an infinite loop + node['X'] = not (child[1]['nt'] == 'CONST' and lslfuncs.cond(child[1]['value'])) + return node['X'] + + if nt == 'FOR': + node['X'] = None # provisional value, refined later + self.MarkReferences(child[0]) + self.MarkReferences(child[1]) + if child[1]['nt'] == 'CONST': + if lslfuncs.cond(child[1]['value']): + # Infinite loop - unless it returns, it stops + # execution. But it is executed itself. + node['X'] = False + self.MarkReferences(child[3]) + self.MarkReferences(child[2]) # this can't stop execution + return node['X'] + # else the body and the iterator aren't executed at all, so + # don't mark them + node['X'] = True + else: + node['X'] = True + self.MarkReferences(child[3]) + self.MarkReferences(child[2]) + # Mark the EXPRLIST as always executed, but not the subexpressions. + # That forces the EXPRLIST (which is a syntactic requirement) to be + # kept, while still simplifying the contents properly. + child[2]['X'] = True + return True + + if nt == '{}': + # Go through each statement in turn. If one stops execution, + # continue reading until either we find a used label (and resume + # execution) or reach the end (and return False). Otherwise return + # True. + + continues = True + node['X'] = None # provisional + for stmt in child: + if continues or stmt['nt'] == '@': + continues = self.MarkReferences(stmt) + node['X'] = continues + return continues + + if nt == 'FNCALL': + node['X'] = None # provisional + sym = self.symtab[0][node['name']] + + fdef = self.tree[sym['Loc']] if 'Loc' in sym else None + for idx in xrange(len(child)-1, -1, -1): + # Each element is a "write" on the callee's parameter. + # E.g. f(integer a, integer b) { f(2,3); } means 2, 3 are + # writes to a and b. + self.MarkReferences(child[idx]) + if fdef is not None: + psym = self.symtab[fdef['pscope']][fdef['pnames'][idx]] + if 'W' in psym: + psym['W'] = False + else: + psym['W'] = child[idx] + + if 'Loc' in sym: + if 'X' not in self.tree[sym['Loc']]: + self.MarkReferences(self.tree[sym['Loc']]) + node['X'] = self.tree[sym['Loc']]['X'] + else: + node['X'] = node['name'] not in self.TerminatorFuncs + # Note that JUMP analysis is incomplete. To do it correctly, we + # should follow the jump right to its destination, in order to know + # if that branch leads to a RETURN or completely stops the event. + # With our code structure, following the JUMP is unfeasible. + # For that reason, we can't track whether a branch ends in RETURN + # or in something more powerful like a script reset, in order to + # propagate it through the function definition to the caller. + # + # In practice, this means that the caller can't distinguish this: + # fn() { return; } + # from this: + # fn() { llResetScript(); } + # and therefore, invocations of the function that are followed by + # code can't know whether that code is dead or not. + # + # What does that have to do with jumps? Well, imagine this: + # fn(integer x) { if (x) jump x1; else jump x2; + # @x1; return; @x2; llResetScript(); } + # What of the branches of if() is taken, depends on where the jumps + # lead to. Assuming the last one always is wrong, because it would + # mark in the caller code that may execute, as dead, e.g. here: + # fn2() { fn(); x = 1; } + + + return node['X'] + + if nt == 'DECL': + sym = self.symtab[node['scope']][node['name']] + if child is not None: + sym['W'] = child[0] + else: + sym['W'] = {'nt':'CONST', 't':node['t'], + 'value':self.DefaultValues[node['t']]} + + node['X'] = True + if child is not None: + if 'orig' in child[0]: + self.MarkReferences(child[0]['orig']) + child[0]['X'] = child[0]['orig']['X'] + else: + self.MarkReferences(child[0]) + return True + + # ---- Starting here, all node types return through the bottom + # (except '='). + + node['X'] = None # provisional + if nt in self.assign_ops or nt in ('--V', '++V', 'V++', 'V--'): + ident = node['ch'][0] + if ident['nt'] == 'FLD': + ident = ident['ch'][0] + assert ident['nt'] == 'IDENT' + sym = self.symtab[ident['scope']][ident['name']] + if ident['scope'] == 0: + # Mark the global first. + self.MarkReferences(self.tree[sym['Loc']]) + # In any case, this is at least the second write, so mark it as such + # (SSA would be a plus for this to be optimal) + sym['W'] = False + + if nt == '=': + # Prevent the first node from being mistaken as a read, by + # recursing only on the RHS node. + self.MarkReferences(child[1]) + node['X'] = True + return True + + elif nt == 'FLD': + # Mark this variable as referenced by a Field (recursing will mark + # the ident as read later) + self.symtab[child[0]['scope']][child[0]['name']]['Fld'] = True + + elif nt == 'IDENT': + sym = self.symtab[node['scope']][node['name']] + # Mark global if it's one. + if 'W' not in sym and node['scope'] == 0: + self.MarkReferences(self.tree[sym['Loc']]) + # Increase read counter + if 'R' in sym: + sym['R'] += 1 + else: + sym['R'] = 1 + + node['X'] = True + if child is not None: + for subnode in child: + self.MarkReferences(subnode) + + return True + + def SymbolReplacedOrDeleted(self, curnode): + """If the given node's name must be simplified, that is, replaced or + deleted (deleted if declaration, replaced if identifier), it returns + the symbol table entry. Otherwise it returns False. + """ + # 'W':False means written more than once, i.e. not only in the + # declaration. Variables written more than once can't be + # simplified by this code. + + # If not written more than once: + # For expressions: + # - Remove expressions read only once, replacing the value, but only + # if the readers are not fields of vectors or rotations, and if + # they are side-effect free. + # For constants: + # - Remove lists, vectors and rotations read only once. + # - Floats are removed if their value has no decimals or if + # used no more than N times (for some N). + # - Strings, keys and integers are just removed. + sym = self.symtab[curnode['scope']][curnode['name']] + + if 'R' not in sym: + return True # if not used, it can be removed + + # Event parameters do not have 'W' in sym. + if 'W' not in sym: + return False + + if sym['W'] is not False: + + node = sym['W'] + nt = node['nt'] + if nt == 'CONST': + tcurnode = curnode['t'] + if tcurnode in ('integer', 'string', 'key'): + return sym + + if tcurnode == 'float': + if sym['R'] <= 3 or type(node['value']) == int: + return sym + elif tcurnode == 'vector' \ + or tcurnode == 'list' and len(node['value']) <= 3: + if sym['R'] <= 1: + return sym + elif tcurnode == 'rotation' \ + or tcurnode == 'list' and len(node['value']) <= 4: + if sym['R'] <= 1: + return sym + return False + + # To replace expressions, they MUST be side-effect free, or they + # will be executed at a different time. + # Also, we can't safely replace expressions unless shrinknames is + # active. shrinknames assigns a different identifier to each + # variable, which avoids conflicts. Consider this scenario: + # integer i=2; + # default{state_entry(){integer j=i+1; integer i=4; llSleep(i+j);}} + # Replacing j with i+1 in llOwnerSay will produce wrong code because + # the name i is redefined after j is assigned. shrinknames prevents + # that. + if not self.shrinknames or 'SEF' not in node: + return False + + if nt not in ('VECTOR', 'ROTATION'): + # If it's an expression and the reference is to a field, we + # can't simplify. Consider e.g.: + # vector v = llGetVel(); llOwnerSay((string)v.z); + # However, if it's a field coming from a Vector or Rotation + # expression, we can embed the corresponding component, e.g. + # vector v = ; llOwnerSay((string)v.y); + # can be replaced with: llOwnerSay((string)((float)(i+2))); + + if 'Fld' in sym: + return False + + if sym['R'] == 1: + return sym + + return False + + def CleanNode(self, curnode): + """Recursively checks if the children are used, deleting those that are + not. + """ + if 'ch' not in curnode: + return + # NOTE: Should not depend on 'Loc', since the nodes that are the + # destination of 'Loc' are renumbered as we delete stuff from globals. + + index = int(curnode['nt'] in self.assign_ops) # don't recurse into a lvalue + + while index < len(curnode['ch']): + node = curnode['ch'][index] + + if 'X' not in node: + del curnode['ch'][index] + continue + + nt = node['nt'] + + if nt == 'DECL': + if self.SymbolReplacedOrDeleted(node): + if 'ch' not in node or 'SEF' in node['ch'][0]: + del curnode['ch'][index] + continue + node = curnode['ch'][index] = {'nt':'EXPR', 't':node['t'], + 'ch':[self.Cast(node['ch'][0], node['t'])]} + + elif nt == 'FLD': + sym = self.SymbolReplacedOrDeleted(node['ch'][0]) + if sym: + value = sym['W'] + # Mark as executed, so it isn't optimized out. + value['X'] = True + fieldidx = 'xyzs'.index(node['fld']) + if value['nt'] == 'CONST': + value = value['value'][fieldidx] + value = {'nt':'CONST', 'X':True, 'SEF':True, + 't':self.PythonType2LSL[type(value)], 'value':value} + value = self.Cast(value, 'float') + SEF = True + else: # assumed VECTOR or ROTATION per SymbolReplacedOrDeleted + SEF = 'SEF' in value + value = self.Cast(value['ch'][fieldidx], 'float') + # Replace it, in parentheses + if value['nt'] not in ('CONST','()','FLD','IDENT','FNCALL'): + node = curnode['ch'][index] = \ + {'nt':'()', 'X':True, 't':value['t'], 'ch':[value]} + else: + node = curnode['ch'][index] = value + if SEF: + node['SEF'] = True + + elif nt == 'IDENT': + sym = self.SymbolReplacedOrDeleted(node) + if sym: + # Mark as executed, so it isn't optimized out. + new = sym['W'] + new['X'] = True + + if new['nt'] not in ('CONST','()','FLD','IDENT','FNCALL'): + new = {'nt':'()', 'X':True, 't':sym['W']['t'], + 'ch':[new]} + SEF = 'SEF' in sym['W'] + if SEF: + new['SEF'] = True + if new['t'] != node['t']: + new = self.Cast(new, node['t']) + if new['nt'] not in ('CONST','()','FLD','IDENT','FNCALL'): + new = {'nt':'()', 'X':True, 't':node['t'], 'ch':[new]} + if SEF: + new['SEF'] = True + curnode['ch'][index] = node = new + # Delete orig if present, as we've eliminated the original + #if 'orig' in sym['W']: + # del sym['W']['orig'] + + elif nt in self.assign_ops: + ident = node['ch'][0] + if ident['nt'] == 'FLD': + ident = ident['ch'][0] + sym = self.SymbolReplacedOrDeleted(ident) + if sym: + node = curnode['ch'][index] = self.Cast(node['ch'][1], node['t']) + + elif nt in ('IF', 'WHILE', 'DO', 'FOR'): + # If the mandatory statement is to be removed, replace it + # with a ; to prevent leaving the statement empty. + child = node['ch'] + idx = 3 if nt == 'FOR' else 0 if nt == 'DO' else 1 + if 'X' not in child[idx]: + child[idx] = {'nt':';', 't':None, 'X':True, 'SEF':True} + if nt == 'DO' and 'X' not in child[1]: + # Mandatory condition but not executed - replace + child[1] = {'nt':'CONST','X':True,'SEF':True,'t':'integer', + 'value':0} + + self.CleanNode(node) + index += 1 + + + def RemoveDeadCode(self): + """Simple reference-based dead code removal. It also performs a + simplified form of constant propagation, taking advantage of the fact + that it analyzes the code flow. + """ + # TODO: Converting to SSA first would facilitate DCR. + # The SSA should be followed by constant and expression propagation, + # then constant folding and then dead code removal. + + # Start at state default and mark everything referenced from there. + # At the end, unreferenced globals and states will be removed. + # We assume all events in a state are executed. + # + # This may not be the case, e.g. a target()/no_target() without + # llTarget, sensor()/no_sensor() without llSensor(), listen() without + # llListen, timer without llSetTimerEvent, etc. but we're not that + # sophisticated (yet). + + # TODO: Inlining of functions that are a single 'return' line. + + statedef = self.tree[self.symtab[0]['default']['Loc']] + assert statedef['nt'] == 'STDEF' and statedef['name'] == 'default' + self.MarkReferences(statedef) + + # Track removal of global lines, to reasign locations later. + LocMap = range(len(self.tree)) + + # Perform the removal + idx = 0 + while idx < len(self.tree): + # Globals are special. + # We need to track the locations too. + node = self.tree[idx] + + delete = False + if 'X' not in node: + delete = True + elif node['nt'] == 'DECL': + delete = self.SymbolReplacedOrDeleted(node) + + if delete: + del self.tree[idx] + del LocMap[idx] + else: + idx += 1 + self.CleanNode(node) + + # Reassign locations + for name in self.symtab[0]: + if name != -1: + sym = self.symtab[0][name] + if 'Loc' in sym: + try: + sym['Loc'] = LocMap.index(sym['Loc']) + except ValueError: + # Subtree deleted - delete the Loc + del sym['Loc'] diff --git a/testparser.py b/testparser.py index 5816ce8..6ebdefc 100644 --- a/testparser.py +++ b/testparser.py @@ -302,12 +302,13 @@ class Test03_Optimizer(UnitTestCase): 'extendedglobalexpr', 'allowmultistrings', 'allowkeyconcat'] ) self.opt.optimize(p) + self.opt.optimize(p,['optimize','shrinknames']) self.opt.optimize(p, ()) print self.outscript.output(p) p = self.parser.parse('''string s = llUnescapeURL("%09");default{timer(){float f=llSqrt(-1); - integer i;i++;i=i;-(-(0.0+i));!!(!~~(!(i)));[]+i+s+f;}}''', - ['extendedtypecast','extendedassignment', + integer i;i++;i=i;-(-(0.0+i));!!(!~~(!(i)));[]+i+s+f;llOwnerSay(s);}}''', + ['extendedtypecast', 'extendedassignment', 'extendedglobalexpr', 'allowmultistrings', 'allowkeyconcat'] ) self.opt.optimize(p, ['optimize','foldtabs']) @@ -320,9 +321,7 @@ class Test03_Optimizer(UnitTestCase): if (1) state another;} default { timer() { state another; } } state another { timer() { state default; } touch(integer num_det) {} } - ''', - ['optimize', 'shrinknames'] - ) + ''') self.opt.optimize(p, ['optimize','shrinknames']) print self.outscript.output(p) @@ -348,9 +347,7 @@ class Test03_Optimizer(UnitTestCase): if (1) state another;} default { timer() { state another; } state_exit() {} } state another { timer() { state default; } touch(integer num_det) {} } - ''', - ['optimize', 'shrinknames'] - ) + ''') self.opt.optimize(p, ['optimize','shrinknames']) print self.outscript.output(p) @@ -377,37 +374,66 @@ class Test03_Optimizer(UnitTestCase): if (1) state another;} default { timer() { state another; } state_exit() {} } state another { timer() { state default; } touch(integer num_det) {} } - ''', - ['optimize', 'shrinknames'] - ) + ''') self.opt.optimize(p, ['optimize','shrinknames']) p = self.parser.parse( '''integer i1; integer i2; f(integer a, integer b, integer c, integer d, integer e){} default{timer(){}} - ''', - ['optimize', 'shrinknames'] - ) + ''') self.opt.optimize(p, ['optimize','shrinknames']) + out = self.outscript.output(p) + self.assertEqual(out, 'default\n{\n timer()\n {\n }\n}\n') + + p = self.parser.parse('integer j;integer f(){return 1;}' + 'default{timer(){if (f()) jump x;jump x;@x;\n' + 'if (j) jump x; else return;\n' + '}}', + ['extendedglobalexpr']) + self.opt.optimize(p) print self.outscript.output(p) - # + p = self.parser.parse( + 'rotation v=<1,2,3,4>;rotation w=;\n' + 'integer f(integer x){return 1;}\n' + 'float j=5;\n' + 'default{touch(integer n){\n' + 'v.x = 4; w.x;\n' + 'vector u = llGetVel(); llOwnerSay((string)u.y);\n' + 'vector v1=),2,3>; float fq=v1.x;\n' + 'rotation v2=<4,2,3,4>; v2.y;j;f(3);f(n);\n' + 'while(1) do return; while(0);\n' + '}}', + ['extendedglobalexpr']) + self.opt.optimize(p,['optimize', 'shrinknames']) + print self.outscript.output(p) + + p = self.parser.parse( + 'integer i=2;\n' + 'default{state_entry(){\n' + 'integer j=i+1; integer i=4; llSleep(i+j);\n' + '}}', + ['extendedglobalexpr']) + self.opt.optimize(p, ['optimize', 'shrinknames']) + print self.outscript.output(p) def test_regression(self): p = self.parser.parse(''' - integer a; + integer a; float f = 3; x() { if (1) { string s = "x"; s = s + (string)a; } } - default { timer() { x();a=3;llOwnerSay((string)a); } } + default { timer() { x();a=3;llOwnerSay((string)a); list L; L = [f]; + } } ''', ['extendedassignment']) self.opt.optimize(p) out = self.outscript.output(p) self.assertEqual(out, 'integer a;\nx()\n{\n {\n ' 'string s = "x";\n s = s + (string)a;\n }\n}\n' 'default\n{\n timer()\n {\n x();\n a = 3;\n' - ' llOwnerSay((string)a);\n }\n}\n' + ' llOwnerSay((string)a);\n [((float)(3))];\n' + ' }\n}\n' ) p = self.parser.parse( @@ -417,7 +443,8 @@ class Test03_Optimizer(UnitTestCase): integer i = 0; vector v = ; - default{timer(){f=4;k="";i=0;v=<0,0,0>;L=[];llOwnerSay((string)(L+f+i+v+k));}} + default{timer(){f=4;k="";i=0;v=<0,0,0>;L=[]; + llOwnerSay((string)(L+f+i+v+k));}} ''', ['extendedassignment']) self.opt.optimize(p) out = self.outscript.output(p)