LSL-PyOptimizer/lslopt/lslrenamer.py
Sei Lisa f2a6243695 Reuse state names for function parameters; restart UsedNames.
Gives us a few more opportunities for catching single-letter identifiers.

UsedNames was not restarted. It's unlikely that this had any detrimental effect on optimization, and it was certainly safe to not restart it. But it looks more correct like this.
2017-10-28 23:39:25 +02:00

211 lines
9.2 KiB
Python

# (C) Copyright 2015-2017 Sei Lisa. All rights reserved.
#
# This file is part of LSL PyOptimizer.
#
# LSL PyOptimizer is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# LSL PyOptimizer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
# This module renames all kinds of variables. Globals and function/event
# parameters take memory space, so shrinking the identifiers as much as
# possible ensures their memory usage will be minimized. It also reuses some
# preexisting names when possible. Locals are renamed also so that they don't
# stand in the way of globals.
#
# A side effect of this change is that the script becomes unreadable gibberish.
# TODO: Make a new counter per scope.
# TODO: Reuse used library function names for UDF and event parameters.
class renamer(object):
CharSet1 = '_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
CharSet2 = '0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
# TODO: Derive these from builtins.txt somehow.
Kws = frozenset({'do', 'if', 'PI',
'for', 'key', 'EOF',
'jump', 'else', 'list', 'TRUE', 'LOOP', 'case'
})
def GetNextShortest(self):
"""Generate the next shortest possible identifier"""
while True:
ret = self.CharSet1[self.WordFirstChar]
for idx in self.WordRestOfChars:
ret += self.CharSet2[idx]
self.WordFirstChar += 1
if self.WordFirstChar >= len(self.CharSet1):
self.WordFirstChar = 0
for idx in xrange(len(self.WordRestOfChars)):
if self.WordRestOfChars[idx] < len(self.CharSet2)-1:
self.WordRestOfChars[idx] += 1
break
self.WordRestOfChars[idx] = 0
else:
self.WordRestOfChars.append(0)
if ret not in self.Kws and ret not in self.UsedNames:
return ret
def ShrinkNames(self):
"""Implements the shrinknames option."""
self.WordFirstChar = 0
self.WordRestOfChars = []
# Names that can be reused without penalty. This initial set is there
# since the beginning. Others are created depending on the code
# (e.g. Key when there are keys), but we don't take too many risks.
ReusableNames = set(['LslLibrary', 'Library', 'LslUserScript',
'System', 'UThread', 'UThreadStackFrame', 'Pop',
'IsRestoring', 'IsSaveDue', 'ResumeVoid',
])
# Names from ReusableNames that have already been used
self.UsedNames = set()
# Make a first pass to separate the symbols into three categories.
globalvars = []
states = []
functions = []
globaldefs = self.symtab[0]
for name in globaldefs:
if name == -1: continue
kind = globaldefs[name]['Kind']
if kind == 's':
states.append(name)
elif kind == 'f':
if 'Loc' in globaldefs[name]:
functions.append(name)
elif kind == 'v':
globalvars.append(name)
else:
assert False, 'Invalid kind at this scope: ' \
+ kind # pragma: no cover
# We make four passes, one for states, then functions, then globals,
# then parameter names and locals, in that order.
# State names are usable as short identifiers for parameters.
stateNames = []
for name in states:
# States have top priority. Here's why. An internal event function
# name is made by concatenating an 'e', then the state name, then
# the event name, e.g. edefaultstate_entry. Since a new identifier
# having part of the state name is created for every event in that
# state, the shorter the state name, the lesser bytes it will use.
# Furthermore, a state switch instruction also adds a Unicode
# string (all other identifier names use one-byte strings), which
# is the more reason to shorten it as much as possible.
#
# Unfortunately, there isn't much that can be done about 'default'.
#
# The good side is that we get to reuse these names for variables
# without using extra space and without wasting single or double
# letter identifiers.
entry = globaldefs[name]
if name != 'default':
name = entry['NewName'] = self.GetNextShortest()
stateNames.append(name)
# Find also the event names it uses, to add them for reuse.
for node in self.tree[entry['Loc']]['ch']:
assert node['nt'] == 'FNDEF'
event_name = node['name']
# These events have their names translated.
if event_name == 'on_rez':
event_name = 'rez'
if event_name == 'listen':
event_name = 'chat'
if event_name == 'run_time_permissions':
# LSO internally translates run_time_permissions to 'chat'.
# But it doesn't include identifiers in the object anyway,
# so this is here for documentation purposes only.
#if lslcommon.LSO:
# event_name = 'chat'
#else:
event_name = 'run_time_perms'
if event_name == 'remote_data':
event_name = 'remote_event'
ReusableNames.add('e' + name + event_name)
del states
for name in functions:
# Assign a new name. Internally, function names get a 'g' prepended
# to them, so these are candidates for reuse too.
# Unfortunately, we won't find any reusable name starting with 'g'
# this early, so no point in searching.
short = globaldefs[name]['NewName'] = self.GetNextShortest()
ReusableNames.add('g' + short)
del functions
for name in globalvars:
# First, check if we have reusable names available.
if ReusableNames:
short = ReusableNames.pop()
self.UsedNames.add(short)
else:
short = self.GetNextShortest()
globaldefs[name]['NewName'] = short
# Do the same for function and event parameter names. Pure locals get
# long distinct names.
First = True
restart = self.WordFirstChar
restartReusable = ReusableNames
ReusableNames = restartReusable.copy()
restartState = stateNames;
stateNames = restartState[:]
restartUsed = self.UsedNames;
self.UsedNames = restartUsed.copy()
for table in self.symtab:
if First:
First = False
# Skip globals
continue
InParams = False
for name,sym in table.iteritems():
if name == -1: continue
if sym['Kind'] != 'v':
assert sym['Kind'] == 'l'
name = name # trick python to not optimize out the jump
continue
if 'Param' in sym:
if not InParams:
# Restart at every new parameter table.
# Parameter tables are isolated from each other.
InParams = True
self.WordFirstChar = restart
ReusableNames = restartReusable.copy()
stateNames = restartState[:]
self.UsedNames = restartUsed.copy()
# Same procedure as for global vars
if ReusableNames:
short = ReusableNames.pop()
self.UsedNames.add(short)
elif stateNames:
short = stateNames.pop()
# No need to add it to UsedNames because
# GetNextShortest will always start past it anyway.
else:
short = self.GetNextShortest()
table[name]['NewName'] = short
else:
# Generate new identifier, by prepending the four character
# string 'loc_'. This generates identifiers with five chars
# or more. We assume that is enough for them to stay safe
# from name collisions with globals and parameter names.
# Four letters allow for more than 1.4 million identifiers.
unique = 'loc_' + name
table[name]['NewName'] = unique
del globalvars