LSL-PyOptimizer/lslopt/lslrenamer.py

#    (C) Copyright 2015 Sei Lisa. All rights reserved.
#
#    This file is part of LSL PyOptimizer.
#
#    LSL PyOptimizer is free software: you can redistribute it and/or
#    modify it under the terms of the GNU General Public License as
#    published by the Free Software Foundation, either version 3 of the
#    License, or (at your option) any later version.
#
#    LSL PyOptimizer is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.

# This module renames all kinds of variables. Globals and function/event
# parameters take memory space, so shrinking the identifiers as much as
# possible ensures their memory usage will be minimized. It also reuses some
# preexisting names when possible. Locals are renamed also so that they don't
# stand in the way of globals.
#
# A side effect of this change is that the script becomes unreadable gibberish.

# TODO: Make a new counter per scope.

class renamer(object):
    CharSet1 = '_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    CharSet2 = '0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    # TODO: Derive these from builtins.txt somehow.
    Kws = (frozenset({'do', 'if', 'PI',
                      'for', 'key', 'EOF',
                      'jump', 'else', 'list', 'TRUE', 'LOOP', 'case'
                     }))
    def GetNextShortest(self):
        """Generate the next shortest possible identifier"""
        while True:
            ret = self.CharSet1[self.WordFirstChar]
            for idx in self.WordRestOfChars:
                ret += self.CharSet2[idx]
            self.WordFirstChar += 1
            if self.WordFirstChar >= len(self.CharSet1):
                self.WordFirstChar = 0
                for idx in xrange(len(self.WordRestOfChars)):
                    if self.WordRestOfChars[idx] < len(self.CharSet2)-1:
                        self.WordRestOfChars[idx] += 1
                        break
                    self.WordRestOfChars[idx] = 0
                else:
                    self.WordRestOfChars.append(0)

            if ret not in self.Kws and ret not in self.UsedNames:
                return ret

    def ShrinkNames(self):
        """Implements the shrinknames option."""
        self.WordFirstChar = 0
        self.WordRestOfChars = []

        # Names that can be reused without penalty. This initial set is there
        # since the beginning. Others are created depending on the code
        # (e.g. Key when there are keys), but we don't take too many risks.
        ReusableNames = set(['LslLibrary', 'Library', 'LslUserScript',
            'System', 'UThread', 'UThreadStackFrame', 'Pop',
            'IsRestoring', 'IsSaveDue', 'ResumeVoid',
            ])

        # Names from ReusableNames that have already been used
        self.UsedNames = set()

        # Make a first pass to separate the symbols into three categories.
        globalvars = []
        states = []
        functions = []
        globaldefs = self.symtab[0]
        for name in globaldefs:
            if name == -1: continue
            kind = globaldefs[name]['Kind']
            if kind == 's':
                states.append(name)
            elif kind == 'f':
                if 'Loc' in globaldefs[name]:
                    functions.append(name)
            elif kind == 'v':
                globalvars.append(name)
            else:
                assert False, 'Invalid kind at this scope: ' \
                    + kind # pragma: no cover

        # We make three passes, one for states, then functions, then globals,
        # in that order.

        for name in states:
            # States have top priority. Here's why. An internal event function
            # name is made by concatenating an 'e', then the state name, then
            # the event name, e.g. edefaultstate_entry. Since a new identifier
            # having part of the state name is created for every event in that
            # state, the shorter the state name, the lesser bytes it will use.
            # Furthermore, a state switch instruction also adds a Unicode
            # string (all other identifier names use one-byte strings), which
            # is the more reason to shorten it as much as possible.
            #
            # Unfortunately, there isn't much that can be done about 'default'.
            #
            # The good side is that we get to reuse these names for variables
            # without using extra space and without wasting single or double
            # letter identifiers.

            entry = globaldefs[name]
            if name != 'default':
                name = entry['NewName'] = self.GetNextShortest()
            # Find also the event names it uses, to add them for reuse.
            for node in self.tree[entry['Loc']]['ch']:
                assert node['nt'] == 'FNDEF'
                event_name = node['name']
                # These events have their names translated.
                if event_name == 'on_rez':
                    event_name = 'rez'
                if event_name == 'listen':
                    event_name = 'chat'
                if event_name == 'run_time_permissions':
                    # LSO internally translates run_time_permissions to 'chat'.
                    # But it doesn't include identifiers in the object anyway,
                    # so this is here for documentation purposes only.
                    #if lslcommon.LSO:
                    #    event_name = 'chat'
                    #else:
                        event_name = 'run_time_perms'
                if event_name == 'remote_data':
                    event_name = 'remote_event'
                ReusableNames.add('e' + name + event_name)
        del states

        for name in functions:
            # Assign a new name. Internally, function names get a 'g' prepended
            # to them, so these are candidates for reuse too.

            # Unfortunately, we won't find any reusable name starting with 'g'
            # this early, so no point in searching.

            short = globaldefs[name]['NewName'] = self.GetNextShortest()
            ReusableNames.add('g' + short)
        del functions

        for name in globalvars:
            # First, check if we have reusable names available.
            if ReusableNames:
                short = ReusableNames.pop()
                self.UsedNames.add(short)
            else:
                short = self.GetNextShortest()
            globaldefs[name]['NewName'] = short

        # Do the same for function and event parameter names. Pure locals get
        # long distinct names.
        First = True
        restart = self.WordFirstChar
        for table in self.symtab:
            if First:
                First = False
                # Skip globals
                continue
            InParams = False
            for name,sym in table.iteritems():
                if name == -1: continue
                if sym['Kind'] != 'v':
                    assert sym['Kind'] == 'l'
                    name = name # trick python to not optimize out the jump
                    continue
                if 'Param' in sym:
                    if not InParams:
                        # Restart at every new parameter table.
                        # Parameter tables are isolated from each other.
                        InParams = True
                        self.WordFirstChar = restart
                    # Same procedure as for global vars
                    # Not the best strategy (using locally unique names would
                    # do a better job) but hey.
                    if ReusableNames:
                        short = ReusableNames.pop()
                        self.UsedNames.add(short)
                    else:
                        short = self.GetNextShortest()
                    table[name]['NewName'] = short
                else:
                    # Generate new identifier, by prepending the four character
                    # string 'loc_'. This generates identifiers with five chars
                    # or more. We assume that is enough for them to stay safe
                    # from name collisions with globals and parameter names.
                    # Four letters allow for more than 1.4 million identifiers.
                    unique = 'loc_' + name
                    table[name]['NewName'] = unique

        del globalvars