LSL-PyOptimizer/lslopt/lslrenamer.py
Sei Lisa 075d3aba0c Change the AST node type from dict to object
That was long overdue. Obviously, this is a large commit.

The new nr (node record) class has built-in dump capabilities, rather than using print_node().

SEF always exists now, and is a boolean, rather than using the existence of SEF as the flag. This was changed for sanity. However, other flags like 'X' are still possibly absent, and in some cases the absence itself has meaning (in the case of 'X', its absence means that the node has not yet been analyzed).

Similarly, an event is distinguished from a UDF by checking for the existence of the 'scope' attribute. This trick works because events are not in the symbol table therefore they have no scope. But this should probably be changed in future to something more rational and faster.

A few minor bugfixes were applied while going through the code.

- Some tabs used as Unicode were written as byte strings. Add the u'\t' prefix.
- After simplifying a%1 -> a&0, fold again the node and return. It's not clear why it didn't return, and whether it depended on subsequent passes (e.g. after DCR) for possibly optimizing out the result. Now we're sure.
- A few places lacked a SEF declaration.
- Formatting changes to split lines that spilled the margin.
- Some comment changes.
- Expanded lazy_list_set definition while adapting it to object format. The plan was to re-compress it after done, but decided to leave it in expanded form.
- Added a few TODOs & FIXMEs, resisting the temptation to fix them in the same commit:
  - TODO: ~-~-~-expr  ->  expr + -3.
  - FIXME: Now that we have CompareTrees, we can easily check if expr + -expr cancels out and remove a TODO. Low-hanging fruit.
  - TODO: Check what we can do when comparing non-SEF and non-CONST values in '>' (current code relies on converting '>' to '<' for applying more optimizations, but that may miss some opportunities).
  - FIXME: Could remove one comparison in nt == '&&' or nt == '||'. Low-hanging fruit.
2018-04-01 02:14:00 +02:00

211 lines
9.3 KiB
Python

# (C) Copyright 2015-2018 Sei Lisa. All rights reserved.
#
# This file is part of LSL PyOptimizer.
#
# LSL PyOptimizer is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# LSL PyOptimizer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
# This module renames all kinds of variables. Globals and function/event
# parameters take memory space, so shrinking the identifiers as much as
# possible ensures their memory usage will be minimized. It also reuses some
# preexisting names when possible. Locals are renamed also so that they don't
# stand in the way of globals.
#
# A side effect of this change is that the script becomes unreadable gibberish.
class renamer(object):
CharSet1 = '_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
CharSet2 = '0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
Kws = frozenset({'do', 'if', 'PI',
'for', 'key', 'EOF',
'jump', 'else', 'list', 'TRUE', 'LOOP', 'case'
})
def GetNextShortest(self):
"""Generate the next shortest possible identifier"""
while True:
ret = self.CharSet1[self.WordFirstChar]
for idx in self.WordRestOfChars:
ret += self.CharSet2[idx]
self.WordFirstChar += 1
if self.WordFirstChar >= len(self.CharSet1):
self.WordFirstChar = 0
for idx in xrange(len(self.WordRestOfChars)):
if self.WordRestOfChars[idx] < len(self.CharSet2)-1:
self.WordRestOfChars[idx] += 1
break
self.WordRestOfChars[idx] = 0
else:
self.WordRestOfChars.append(0)
if ret not in self.Kws and ret not in self.UsedNames:
return ret
def ShrinkNames(self, UsableAsGlobals = None, UsableAsParams = None):
"""Implements the shrinknames option."""
self.WordFirstChar = 0
self.WordRestOfChars = []
# Names that can be reused without penalty. This initial set is there
# since the beginning. Others are created depending on the code
# (e.g. Key when there are keys), but we don't take too many risks.
ReusableNames = set(['LslLibrary', 'Library', 'LslUserScript',
'System', 'UThread', 'UThreadStackFrame', 'Pop',
'IsRestoring', 'IsSaveDue', 'ResumeVoid',
])
if UsableAsGlobals is not None:
ReusableNames |= UsableAsGlobals
# Names from ReusableNames that have already been used
self.UsedNames = set()
# Make a preliminary pass to separate the symbols into three categories
globalvars = []
states = []
functions = []
globaldefs = self.symtab[0]
for name in globaldefs:
if name == -1: continue
kind = globaldefs[name]['Kind']
if kind == 's':
states.append(name)
elif kind == 'f':
if 'Loc' in globaldefs[name]:
functions.append(name)
elif kind == 'v':
globalvars.append(name)
else:
assert False, 'Invalid kind at this scope: ' \
+ kind # pragma: no cover
# We make four passes, one for states, then function names, then
# global names, then parameter names and locals, in that order.
# State names are usable as short identifiers for parameters.
stateNames = []
for name in states:
# States have top priority. Here's why. An internal event function
# name is made by concatenating an 'e', then the state name, then
# the event name, e.g. edefaultstate_entry. Since a new identifier
# having part of the state name is created for every event in that
# state, the shorter the state name, the lesser bytes it will use.
# Furthermore, a state switch instruction also adds a Unicode
# string (all other identifier names use one-byte strings), which
# is the more reason to shorten it as much as possible.
#
# Unfortunately, there isn't much that can be done about 'default'.
#
# The good side is that we get to reuse these names for variables
# without using extra space and without wasting single or double
# letter identifiers.
entry = globaldefs[name]
if name != 'default':
name = entry['NewName'] = self.GetNextShortest()
stateNames.append(name)
# Find also the event names it uses, to add them for reuse.
for node in self.tree[entry['Loc']].ch:
assert node.nt == 'FNDEF'
event_name = node.name
# These events have their names translated.
if event_name == 'on_rez':
event_name = 'rez'
if event_name == 'listen':
event_name = 'chat'
if event_name == 'run_time_permissions':
# LSO internally translates run_time_permissions to 'chat'.
# But it doesn't include identifiers in the object anyway,
# so this is here for documentation purposes only.
#if lslcommon.LSO:
# event_name = 'chat'
#else:
event_name = 'run_time_perms'
if event_name == 'remote_data':
event_name = 'remote_event'
ReusableNames.add('e' + name + event_name)
del states
for name in functions:
# Assign a new name. Internally, function names get a 'g' prepended
# to them, so these are candidates for reuse too.
# Unfortunately, we won't find any reusable name starting with 'g'
# this early, so no point in searching.
short = globaldefs[name]['NewName'] = self.GetNextShortest()
ReusableNames.add('g' + short)
del functions
for name in globalvars:
# First, check if we have reusable names available.
if ReusableNames:
short = ReusableNames.pop()
self.UsedNames.add(short)
else:
short = self.GetNextShortest()
globaldefs[name]['NewName'] = short
# Do the same for function and event parameter names. Pure locals get
# long distinct names.
if UsableAsParams is not None:
ReusableNames |= UsableAsParams
First = True
restart = self.WordFirstChar
restartReusable = ReusableNames
ReusableNames = restartReusable.copy()
restartState = stateNames;
stateNames = restartState[:]
restartUsed = self.UsedNames;
self.UsedNames = restartUsed.copy()
for table in self.symtab:
if First:
First = False
# Skip globals
continue
InParams = False
for name,sym in table.iteritems():
if name == -1: continue
if sym['Kind'] != 'v':
assert sym['Kind'] == 'l'
name = name # trick python to not optimize out the jump
continue
if 'Param' in sym:
if not InParams:
# Restart at every new parameter table.
# Parameter tables are isolated from each other.
InParams = True
self.WordFirstChar = restart
ReusableNames = restartReusable.copy()
stateNames = restartState[:]
self.UsedNames = restartUsed.copy()
# Same procedure as for global vars
if ReusableNames:
short = ReusableNames.pop()
self.UsedNames.add(short)
elif stateNames:
short = stateNames.pop()
# No need to add it to UsedNames because
# GetNextShortest will always start past it anyway.
else:
short = self.GetNextShortest()
table[name]['NewName'] = short
else:
# Generate new identifier, by prepending the four character
# string 'loc_'. This generates identifiers with five chars
# or more. We assume that is enough for them to stay safe
# from name collisions with globals and parameter names.
# Four letters allow for more than 1.4 million identifiers.
unique = 'loc_' + name
table[name]['NewName'] = unique
del globalvars