Implement the shrinknames option.

Fixes some bugs with the treatment of the shrink attribute, some others with the output of renamed stuff.
2025-07-01 23:58:20 +00:00 · 2014-08-01 05:07:50 +02:00 · 2014-08-01 05:07:50 +02:00 · 6c248c46e3
commit 6c248c46e3
parent 847d7b1e20
5 changed files with 174 additions and 15 deletions
--- a/lslopt/lsloptimizer.py
+++ b/lslopt/lsloptimizer.py
@ -2,7 +2,9 @@
 import lslfuncs
 from lslparse import warning
-class optimizer(object):
+from lslrenamer import renamer
 class optimizer(renamer):
    # Default values per type when declaring variables
    DefaultValues = {'integer': 0, 'float': 0.0, 'string': u'',
@ -606,6 +608,9 @@ class optimizer(object):
            else:
                self.FoldTree(tree, idx)
        if 'shrinknames' in options:
            self.AssignNewNames()
        treesymtab = (self.tree, self.symtab)
        del self.tree
        del self.symtab
--- a/lslopt/lsloutput.py
+++ b/lslopt/lsloutput.py
@ -135,11 +135,16 @@ class outscript(object):
    def dent(self):
        return self.indent * self.indentlevel
-    def FindName(self, node):
+    def FindName(self, node, scope = None):
-        try:
+        if scope is None:
-            return self.symtab[node['scope']][node['name']]['NewName']
+            # node is a node
-        except KeyError:
+            if 'scope' in node and'NewName' in self.symtab[node['scope']][node['name']]:
                return self.symtab[node['scope']][node['name']]['NewName']
            return node['name']
        # node is a name
        if 'NewName' in self.symtab[scope][node]:
            return self.symtab[scope][node]['NewName']
        return node
    def OutIndented(self, node):
        if node['nt'] != '{}':
@ -300,7 +305,9 @@ class outscript(object):
            if node['t'] is not None:
                ret += node['t'] + ' '
            ret += self.FindName(node) + '('
-            ret += ', '.join(typ + ' ' + name for typ, name in zip(node['ptypes'], node['pnames']))
+            scope = node['pscope']
            ret += ', '.join(typ + ' ' + self.FindName(name, scope)
                             for typ, name in zip(node['ptypes'], node['pnames']))
            return ret + ')\n' + self.OutCode(child[0])
        return self.dent() + self.OutExpr(node) + ';\n'
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@ -638,8 +638,10 @@ class parser(object):
            args = self.Parse_optional_expression_list(sym['ParamTypes'])
            self.expect(')')
            self.NextToken()
-            return {'nt':'FNCALL', 't':sym['Type'], 'name':name,
+            ret = {'nt':'FNCALL', 't':sym['Type'], 'name':name, 'ch':args}
-                'scope':self.scopeindex, 'ch':args}
+            if 'Scope' in sym:
                ret['scope'] = sym['Scope']
            return ret
        if sym['Kind'] != 'v':
            raise EParseTypeMismatch(self)
        typ = sym['Type']
@ -1170,7 +1172,7 @@ class parser(object):
                        x = random.randint(0, 16777215)
                        unique += b64encode(chr(x>>16) + chr((x>>8)&255)
                            + chr(x&255)).replace('+', '_')
-                        if '/' not in unique and unique not in self.locallabels:
+                        if '/' not in unique not in self.locallabels:
                            break
                else:
                    # Use the existing name. Faster and more readable.
@ -1200,11 +1202,11 @@ class parser(object):
                # It might still be a forward reference, so we add it to the
                # list of things to look up when done
                self.jump_lookups.append((name, self.scopeindex, self.errorpos, jumpnode))
            else:
                jumpnode['scope'] = sym['Scope']
            self.NextToken()
            self.expect(';')
            self.NextToken()
            if sym is not None:
                jumpnode['scope'] = sym['Scope']
            return jumpnode
        if tok0 == 'STATE':
            self.NextToken()
@ -1217,7 +1219,7 @@ class parser(object):
            self.NextToken()
            self.expect(';')
            self.NextToken()
-            return {'nt':'STSW', 't':None, 'name':name}
+            return {'nt':'STSW', 't':None, 'name':name, 'scope':0}
        if tok0 == 'RETURN':
            self.NextToken()
            if self.tok[0] == ';':
@ -1609,7 +1611,7 @@ class parser(object):
            events = self.Parse_events()
            self.expect('}')
-            self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'ch':events})
+            self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'scope':0, 'ch':events})
            self.NextToken()
    def Parse_script(self):
@ -1633,10 +1635,11 @@ class parser(object):
        # Check the pending jump targets
        for tgt in self.jump_lookups:
            self.scopeindex = tgt[1]
-            if self.FindSymbolPartial(tgt[0], MustBeLabel = True) is None:
+            sym = self.FindSymbolPartial(tgt[0], MustBeLabel = True)
            if sym is None:
                self.errorpos = tgt[2]
                raise EParseUndefined(self)
-            tgt[3]['scope'] = tgt[1]
+            tgt[3]['scope'] = sym['Scope']
        del self.jump_lookups # Finished with it.
--- a/lslopt/lslrenamer.py
+++ b/lslopt/lslrenamer.py
@ -0,0 +1,140 @@
 import random
 from base64 import b64encode
 class renamer(object):
    CharSet1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_'
    CharSet2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789'
    # TODO: Derive these from builtins.txt somehow.
    KwByLen = ((), (), ('do', 'if', 'PI'), ('for', 'key', 'EOF'),
        ('jump', 'else', 'list', 'TRUE', 'LOOP'))
    def GetNextShortest(self):
        """Generate the next shortest possible identifier"""
        while True:
            n = self.WordCntr
            self.WordCntr += 1
            ret = self.CharSet1[n % 53]
            n //= 53
            while n > 1:
                ret += self.CharSet2[n % 63]
                n //= 63
            if ret not in self.KwByLen[len(ret)] and ret not in self.UsedNames:
                return ret
    def AssignNewNames(self):
        self.WordCntr = 53 # Initialize to length 1
        # Names that can be reused without penalty. The initial set is there
        # since the beginning. Others (e.g. Key) are created when some kinds
        # of stuff are present, but we don't take so many risks.
        ReusableNames = set(['LslLibrary', 'LslUserScript', 'System'])
        # Names from ReusableNames that have already been used
        self.UsedNames = set()
        UsedLocals = set()
        # Make a first pass to separate the symbols into three categories.
        globalvars = []
        states = []
        functions = []
        globaldefs = self.symtab[0]
        for name in globaldefs:
            if name == -1: continue
            kind = globaldefs[name]['Kind']
            if kind == 's':
                states.append(name)
            elif kind == 'f':
                if 'Loc' in globaldefs[name]:
                    functions.append(name)
            elif kind == 'v':
                globalvars.append(name)
            else:
                assert False, 'Invalid kind at this scope: ' + kind
        # We make three passes, one for states, then functions, then globals,
        # in that order.
        for name in states:
            # States have top priority. Here's why. An internal event function
            # name is made by concatenating an 'e', then the state name, then
            # the event name, e.g. edefaultstate_entry. Since a new identifier
            # having part of the state name is created for every event in that
            # state, the shortest the state name, the least bytes it will use.
            # Furthermore, a state switch instruction further adds an Unicode
            # string (all other identifier names use one-byte strings), which
            # is the more reason to shorten it as much as possible.
            #
            # Unfortunately, there isn't much that can be done about 'default'.
            #
            # The good side is that we get to reuse these names for variables
            # without using extra space and without wasting single or double
            # letter identifiers.
            entry = globaldefs[name]
            if name != 'default':
                name = entry['NewName'] = self.GetNextShortest()
            # Find also the event names it uses, to add them for reuse.
            for node in self.tree[entry['Loc']]['ch']:
                assert node['nt'] == 'FNDEF'
                ReusableNames.add('e' + name + node['name'])
        del states
        for name in functions:
            # Assign a new name. Internal function names get a 'g' prepended
            # to them, so these are candidates for reuse too.
            # Unfortunately, we won't find any reusable name starting with 'g'
            # this early, so no point in searching.
            short = globaldefs[name]['NewName'] = self.GetNextShortest()
            ReusableNames.add('g' + short)
        del functions
        for name in globalvars:
            # First, check if we have reusable names available.
            if ReusableNames:
                short = ReusableNames.pop()
                self.UsedNames.add(short)
            else:
                short = self.GetNextShortest()
            globaldefs[name]['NewName'] = short
        # Do the same for function and event parameter names. Pure locals get
        # long distinct names.
        First = True
        for table in self.symtab:
            if First:
                First = False
                # Skip globals
                continue
            for name,sym in table.iteritems():
                if name == -1: continue
                if sym['Kind'] != 'v':
                    assert sym['Kind'] == 'l'
                    continue
                if 'Param' in sym:
                    # Same procedure as for global vars
                    # Not the best strategy (using locally unique names would
                    # work optimally) but hey. At the time of writing there's
                    # no reference analysis. TODO: Implement.
                    if ReusableNames:
                        short = ReusableNames.pop()
                        self.UsedNames.add(short)
                    else:
                        short = self.GetNextShortest()
                    table[name]['NewName'] = short
                else:
                    # Generate new identifier
                    while True:
                        x = random.randint(0, 16777215)
                        unique = 'L_' + b64encode(chr(x>>16) + chr((x>>8)&255)
                            + chr(x&255)).replace('+', '_')
                        x = random.randint(0, 16777215)
                        unique += b64encode(chr(x>>16) + chr((x>>8)&255)
                            + chr(x&255)).replace('+', '_')
                        if '/' not in unique not in UsedLocals:
                            break
                    UsedLocals.add(unique)
                    table[name]['NewName'] = unique
        del globalvars
--- a/main.py
+++ b/main.py
@ -50,6 +50,10 @@ Options (+ means active by default, - means inactive by default):
                       will go to the last label with that name). This flag
                       works around that limitation by replacing the names of
                       the labels in the output with unique ones.
  shrinknames        - Reduces script memory by shrinking identifiers. In the
                       process, it turns the script into unreadable gibberish,
                       hard to debug, but this gets big savings for complex
                       scripts.
 Note that the optimizer doesn't reorder expressions to fold constants. This
 means that e.g. a + 3 + 5 is not optimized to a + 8; however a + (3 + 5) is.