Implement the shrinknames option.

Fixes some bugs with the treatment of the shrink attribute, some others with the output of renamed stuff.
This commit is contained in:
Sei Lisa 2014-08-01 05:07:50 +02:00
parent 847d7b1e20
commit 6c248c46e3
5 changed files with 174 additions and 15 deletions

View file

@ -2,7 +2,9 @@
import lslfuncs
from lslparse import warning
class optimizer(object):
from lslrenamer import renamer
class optimizer(renamer):
# Default values per type when declaring variables
DefaultValues = {'integer': 0, 'float': 0.0, 'string': u'',
@ -606,6 +608,9 @@ class optimizer(object):
else:
self.FoldTree(tree, idx)
if 'shrinknames' in options:
self.AssignNewNames()
treesymtab = (self.tree, self.symtab)
del self.tree
del self.symtab

View file

@ -135,11 +135,16 @@ class outscript(object):
def dent(self):
return self.indent * self.indentlevel
def FindName(self, node):
try:
def FindName(self, node, scope = None):
if scope is None:
# node is a node
if 'scope' in node and'NewName' in self.symtab[node['scope']][node['name']]:
return self.symtab[node['scope']][node['name']]['NewName']
except KeyError:
return node['name']
# node is a name
if 'NewName' in self.symtab[scope][node]:
return self.symtab[scope][node]['NewName']
return node
def OutIndented(self, node):
if node['nt'] != '{}':
@ -300,7 +305,9 @@ class outscript(object):
if node['t'] is not None:
ret += node['t'] + ' '
ret += self.FindName(node) + '('
ret += ', '.join(typ + ' ' + name for typ, name in zip(node['ptypes'], node['pnames']))
scope = node['pscope']
ret += ', '.join(typ + ' ' + self.FindName(name, scope)
for typ, name in zip(node['ptypes'], node['pnames']))
return ret + ')\n' + self.OutCode(child[0])
return self.dent() + self.OutExpr(node) + ';\n'

View file

@ -638,8 +638,10 @@ class parser(object):
args = self.Parse_optional_expression_list(sym['ParamTypes'])
self.expect(')')
self.NextToken()
return {'nt':'FNCALL', 't':sym['Type'], 'name':name,
'scope':self.scopeindex, 'ch':args}
ret = {'nt':'FNCALL', 't':sym['Type'], 'name':name, 'ch':args}
if 'Scope' in sym:
ret['scope'] = sym['Scope']
return ret
if sym['Kind'] != 'v':
raise EParseTypeMismatch(self)
typ = sym['Type']
@ -1170,7 +1172,7 @@ class parser(object):
x = random.randint(0, 16777215)
unique += b64encode(chr(x>>16) + chr((x>>8)&255)
+ chr(x&255)).replace('+', '_')
if '/' not in unique and unique not in self.locallabels:
if '/' not in unique not in self.locallabels:
break
else:
# Use the existing name. Faster and more readable.
@ -1200,11 +1202,11 @@ class parser(object):
# It might still be a forward reference, so we add it to the
# list of things to look up when done
self.jump_lookups.append((name, self.scopeindex, self.errorpos, jumpnode))
else:
jumpnode['scope'] = sym['Scope']
self.NextToken()
self.expect(';')
self.NextToken()
if sym is not None:
jumpnode['scope'] = sym['Scope']
return jumpnode
if tok0 == 'STATE':
self.NextToken()
@ -1217,7 +1219,7 @@ class parser(object):
self.NextToken()
self.expect(';')
self.NextToken()
return {'nt':'STSW', 't':None, 'name':name}
return {'nt':'STSW', 't':None, 'name':name, 'scope':0}
if tok0 == 'RETURN':
self.NextToken()
if self.tok[0] == ';':
@ -1609,7 +1611,7 @@ class parser(object):
events = self.Parse_events()
self.expect('}')
self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'ch':events})
self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'scope':0, 'ch':events})
self.NextToken()
def Parse_script(self):
@ -1633,10 +1635,11 @@ class parser(object):
# Check the pending jump targets
for tgt in self.jump_lookups:
self.scopeindex = tgt[1]
if self.FindSymbolPartial(tgt[0], MustBeLabel = True) is None:
sym = self.FindSymbolPartial(tgt[0], MustBeLabel = True)
if sym is None:
self.errorpos = tgt[2]
raise EParseUndefined(self)
tgt[3]['scope'] = tgt[1]
tgt[3]['scope'] = sym['Scope']
del self.jump_lookups # Finished with it.

140
lslopt/lslrenamer.py Normal file
View file

@ -0,0 +1,140 @@
import random
from base64 import b64encode
class renamer(object):
CharSet1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_'
CharSet2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789'
# TODO: Derive these from builtins.txt somehow.
KwByLen = ((), (), ('do', 'if', 'PI'), ('for', 'key', 'EOF'),
('jump', 'else', 'list', 'TRUE', 'LOOP'))
def GetNextShortest(self):
"""Generate the next shortest possible identifier"""
while True:
n = self.WordCntr
self.WordCntr += 1
ret = self.CharSet1[n % 53]
n //= 53
while n > 1:
ret += self.CharSet2[n % 63]
n //= 63
if ret not in self.KwByLen[len(ret)] and ret not in self.UsedNames:
return ret
def AssignNewNames(self):
self.WordCntr = 53 # Initialize to length 1
# Names that can be reused without penalty. The initial set is there
# since the beginning. Others (e.g. Key) are created when some kinds
# of stuff are present, but we don't take so many risks.
ReusableNames = set(['LslLibrary', 'LslUserScript', 'System'])
# Names from ReusableNames that have already been used
self.UsedNames = set()
UsedLocals = set()
# Make a first pass to separate the symbols into three categories.
globalvars = []
states = []
functions = []
globaldefs = self.symtab[0]
for name in globaldefs:
if name == -1: continue
kind = globaldefs[name]['Kind']
if kind == 's':
states.append(name)
elif kind == 'f':
if 'Loc' in globaldefs[name]:
functions.append(name)
elif kind == 'v':
globalvars.append(name)
else:
assert False, 'Invalid kind at this scope: ' + kind
# We make three passes, one for states, then functions, then globals,
# in that order.
for name in states:
# States have top priority. Here's why. An internal event function
# name is made by concatenating an 'e', then the state name, then
# the event name, e.g. edefaultstate_entry. Since a new identifier
# having part of the state name is created for every event in that
# state, the shortest the state name, the least bytes it will use.
# Furthermore, a state switch instruction further adds an Unicode
# string (all other identifier names use one-byte strings), which
# is the more reason to shorten it as much as possible.
#
# Unfortunately, there isn't much that can be done about 'default'.
#
# The good side is that we get to reuse these names for variables
# without using extra space and without wasting single or double
# letter identifiers.
entry = globaldefs[name]
if name != 'default':
name = entry['NewName'] = self.GetNextShortest()
# Find also the event names it uses, to add them for reuse.
for node in self.tree[entry['Loc']]['ch']:
assert node['nt'] == 'FNDEF'
ReusableNames.add('e' + name + node['name'])
del states
for name in functions:
# Assign a new name. Internal function names get a 'g' prepended
# to them, so these are candidates for reuse too.
# Unfortunately, we won't find any reusable name starting with 'g'
# this early, so no point in searching.
short = globaldefs[name]['NewName'] = self.GetNextShortest()
ReusableNames.add('g' + short)
del functions
for name in globalvars:
# First, check if we have reusable names available.
if ReusableNames:
short = ReusableNames.pop()
self.UsedNames.add(short)
else:
short = self.GetNextShortest()
globaldefs[name]['NewName'] = short
# Do the same for function and event parameter names. Pure locals get
# long distinct names.
First = True
for table in self.symtab:
if First:
First = False
# Skip globals
continue
for name,sym in table.iteritems():
if name == -1: continue
if sym['Kind'] != 'v':
assert sym['Kind'] == 'l'
continue
if 'Param' in sym:
# Same procedure as for global vars
# Not the best strategy (using locally unique names would
# work optimally) but hey. At the time of writing there's
# no reference analysis. TODO: Implement.
if ReusableNames:
short = ReusableNames.pop()
self.UsedNames.add(short)
else:
short = self.GetNextShortest()
table[name]['NewName'] = short
else:
# Generate new identifier
while True:
x = random.randint(0, 16777215)
unique = 'L_' + b64encode(chr(x>>16) + chr((x>>8)&255)
+ chr(x&255)).replace('+', '_')
x = random.randint(0, 16777215)
unique += b64encode(chr(x>>16) + chr((x>>8)&255)
+ chr(x&255)).replace('+', '_')
if '/' not in unique not in UsedLocals:
break
UsedLocals.add(unique)
table[name]['NewName'] = unique
del globalvars

View file

@ -50,6 +50,10 @@ Options (+ means active by default, - means inactive by default):
will go to the last label with that name). This flag
works around that limitation by replacing the names of
the labels in the output with unique ones.
shrinknames - Reduces script memory by shrinking identifiers. In the
process, it turns the script into unreadable gibberish,
hard to debug, but this gets big savings for complex
scripts.
Note that the optimizer doesn't reorder expressions to fold constants. This
means that e.g. a + 3 + 5 is not optimized to a + 8; however a + (3 + 5) is.