diff --git a/lslopt/lsloptimizer.py b/lslopt/lsloptimizer.py index 11bbbb8..674f16e 100644 --- a/lslopt/lsloptimizer.py +++ b/lslopt/lsloptimizer.py @@ -2,7 +2,9 @@ import lslfuncs from lslparse import warning -class optimizer(object): +from lslrenamer import renamer + +class optimizer(renamer): # Default values per type when declaring variables DefaultValues = {'integer': 0, 'float': 0.0, 'string': u'', @@ -606,6 +608,9 @@ class optimizer(object): else: self.FoldTree(tree, idx) + if 'shrinknames' in options: + self.AssignNewNames() + treesymtab = (self.tree, self.symtab) del self.tree del self.symtab diff --git a/lslopt/lsloutput.py b/lslopt/lsloutput.py index ebcf699..1b80c0b 100644 --- a/lslopt/lsloutput.py +++ b/lslopt/lsloutput.py @@ -135,11 +135,16 @@ class outscript(object): def dent(self): return self.indent * self.indentlevel - def FindName(self, node): - try: - return self.symtab[node['scope']][node['name']]['NewName'] - except KeyError: + def FindName(self, node, scope = None): + if scope is None: + # node is a node + if 'scope' in node and'NewName' in self.symtab[node['scope']][node['name']]: + return self.symtab[node['scope']][node['name']]['NewName'] return node['name'] + # node is a name + if 'NewName' in self.symtab[scope][node]: + return self.symtab[scope][node]['NewName'] + return node def OutIndented(self, node): if node['nt'] != '{}': @@ -300,7 +305,9 @@ class outscript(object): if node['t'] is not None: ret += node['t'] + ' ' ret += self.FindName(node) + '(' - ret += ', '.join(typ + ' ' + name for typ, name in zip(node['ptypes'], node['pnames'])) + scope = node['pscope'] + ret += ', '.join(typ + ' ' + self.FindName(name, scope) + for typ, name in zip(node['ptypes'], node['pnames'])) return ret + ')\n' + self.OutCode(child[0]) return self.dent() + self.OutExpr(node) + ';\n' diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py index 95ea0a3..3a45dc9 100644 --- a/lslopt/lslparse.py +++ b/lslopt/lslparse.py @@ -638,8 +638,10 @@ class parser(object): args = self.Parse_optional_expression_list(sym['ParamTypes']) self.expect(')') self.NextToken() - return {'nt':'FNCALL', 't':sym['Type'], 'name':name, - 'scope':self.scopeindex, 'ch':args} + ret = {'nt':'FNCALL', 't':sym['Type'], 'name':name, 'ch':args} + if 'Scope' in sym: + ret['scope'] = sym['Scope'] + return ret if sym['Kind'] != 'v': raise EParseTypeMismatch(self) typ = sym['Type'] @@ -1170,7 +1172,7 @@ class parser(object): x = random.randint(0, 16777215) unique += b64encode(chr(x>>16) + chr((x>>8)&255) + chr(x&255)).replace('+', '_') - if '/' not in unique and unique not in self.locallabels: + if '/' not in unique not in self.locallabels: break else: # Use the existing name. Faster and more readable. @@ -1200,11 +1202,11 @@ class parser(object): # It might still be a forward reference, so we add it to the # list of things to look up when done self.jump_lookups.append((name, self.scopeindex, self.errorpos, jumpnode)) + else: + jumpnode['scope'] = sym['Scope'] self.NextToken() self.expect(';') self.NextToken() - if sym is not None: - jumpnode['scope'] = sym['Scope'] return jumpnode if tok0 == 'STATE': self.NextToken() @@ -1217,7 +1219,7 @@ class parser(object): self.NextToken() self.expect(';') self.NextToken() - return {'nt':'STSW', 't':None, 'name':name} + return {'nt':'STSW', 't':None, 'name':name, 'scope':0} if tok0 == 'RETURN': self.NextToken() if self.tok[0] == ';': @@ -1609,7 +1611,7 @@ class parser(object): events = self.Parse_events() self.expect('}') - self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'ch':events}) + self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'scope':0, 'ch':events}) self.NextToken() def Parse_script(self): @@ -1633,10 +1635,11 @@ class parser(object): # Check the pending jump targets for tgt in self.jump_lookups: self.scopeindex = tgt[1] - if self.FindSymbolPartial(tgt[0], MustBeLabel = True) is None: + sym = self.FindSymbolPartial(tgt[0], MustBeLabel = True) + if sym is None: self.errorpos = tgt[2] raise EParseUndefined(self) - tgt[3]['scope'] = tgt[1] + tgt[3]['scope'] = sym['Scope'] del self.jump_lookups # Finished with it. diff --git a/lslopt/lslrenamer.py b/lslopt/lslrenamer.py new file mode 100644 index 0000000..00a8579 --- /dev/null +++ b/lslopt/lslrenamer.py @@ -0,0 +1,140 @@ +import random +from base64 import b64encode + +class renamer(object): + CharSet1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_' + CharSet2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789' + # TODO: Derive these from builtins.txt somehow. + KwByLen = ((), (), ('do', 'if', 'PI'), ('for', 'key', 'EOF'), + ('jump', 'else', 'list', 'TRUE', 'LOOP')) + def GetNextShortest(self): + """Generate the next shortest possible identifier""" + while True: + n = self.WordCntr + self.WordCntr += 1 + ret = self.CharSet1[n % 53] + n //= 53 + while n > 1: + ret += self.CharSet2[n % 63] + n //= 63 + if ret not in self.KwByLen[len(ret)] and ret not in self.UsedNames: + return ret + + def AssignNewNames(self): + self.WordCntr = 53 # Initialize to length 1 + + # Names that can be reused without penalty. The initial set is there + # since the beginning. Others (e.g. Key) are created when some kinds + # of stuff are present, but we don't take so many risks. + ReusableNames = set(['LslLibrary', 'LslUserScript', 'System']) + + # Names from ReusableNames that have already been used + self.UsedNames = set() + + UsedLocals = set() + + # Make a first pass to separate the symbols into three categories. + globalvars = [] + states = [] + functions = [] + globaldefs = self.symtab[0] + for name in globaldefs: + if name == -1: continue + kind = globaldefs[name]['Kind'] + if kind == 's': + states.append(name) + elif kind == 'f': + if 'Loc' in globaldefs[name]: + functions.append(name) + elif kind == 'v': + globalvars.append(name) + else: + assert False, 'Invalid kind at this scope: ' + kind + + # We make three passes, one for states, then functions, then globals, + # in that order. + + for name in states: + # States have top priority. Here's why. An internal event function + # name is made by concatenating an 'e', then the state name, then + # the event name, e.g. edefaultstate_entry. Since a new identifier + # having part of the state name is created for every event in that + # state, the shortest the state name, the least bytes it will use. + # Furthermore, a state switch instruction further adds an Unicode + # string (all other identifier names use one-byte strings), which + # is the more reason to shorten it as much as possible. + # + # Unfortunately, there isn't much that can be done about 'default'. + # + # The good side is that we get to reuse these names for variables + # without using extra space and without wasting single or double + # letter identifiers. + + entry = globaldefs[name] + if name != 'default': + name = entry['NewName'] = self.GetNextShortest() + # Find also the event names it uses, to add them for reuse. + for node in self.tree[entry['Loc']]['ch']: + assert node['nt'] == 'FNDEF' + ReusableNames.add('e' + name + node['name']) + del states + + for name in functions: + # Assign a new name. Internal function names get a 'g' prepended + # to them, so these are candidates for reuse too. + + # Unfortunately, we won't find any reusable name starting with 'g' + # this early, so no point in searching. + + short = globaldefs[name]['NewName'] = self.GetNextShortest() + ReusableNames.add('g' + short) + del functions + + for name in globalvars: + # First, check if we have reusable names available. + if ReusableNames: + short = ReusableNames.pop() + self.UsedNames.add(short) + else: + short = self.GetNextShortest() + globaldefs[name]['NewName'] = short + + # Do the same for function and event parameter names. Pure locals get + # long distinct names. + First = True + for table in self.symtab: + if First: + First = False + # Skip globals + continue + for name,sym in table.iteritems(): + if name == -1: continue + if sym['Kind'] != 'v': + assert sym['Kind'] == 'l' + continue + if 'Param' in sym: + # Same procedure as for global vars + # Not the best strategy (using locally unique names would + # work optimally) but hey. At the time of writing there's + # no reference analysis. TODO: Implement. + if ReusableNames: + short = ReusableNames.pop() + self.UsedNames.add(short) + else: + short = self.GetNextShortest() + table[name]['NewName'] = short + else: + # Generate new identifier + while True: + x = random.randint(0, 16777215) + unique = 'L_' + b64encode(chr(x>>16) + chr((x>>8)&255) + + chr(x&255)).replace('+', '_') + x = random.randint(0, 16777215) + unique += b64encode(chr(x>>16) + chr((x>>8)&255) + + chr(x&255)).replace('+', '_') + if '/' not in unique not in UsedLocals: + break + UsedLocals.add(unique) + table[name]['NewName'] = unique + + del globalvars diff --git a/main.py b/main.py index 5adefe7..4555ce5 100644 --- a/main.py +++ b/main.py @@ -50,6 +50,10 @@ Options (+ means active by default, - means inactive by default): will go to the last label with that name). This flag works around that limitation by replacing the names of the labels in the output with unique ones. + shrinknames - Reduces script memory by shrinking identifiers. In the + process, it turns the script into unreadable gibberish, + hard to debug, but this gets big savings for complex + scripts. Note that the optimizer doesn't reorder expressions to fold constants. This means that e.g. a + 3 + 5 is not optimized to a + 8; however a + (3 + 5) is.