mirror of
https://github.com/Sei-Lisa/LSL-PyOptimizer
synced 2025-07-01 23:58:20 +00:00
Implement the shrinknames option.
Fixes some bugs with the treatment of the shrink attribute, some others with the output of renamed stuff.
This commit is contained in:
parent
847d7b1e20
commit
6c248c46e3
5 changed files with 174 additions and 15 deletions
|
@ -2,7 +2,9 @@
|
||||||
import lslfuncs
|
import lslfuncs
|
||||||
from lslparse import warning
|
from lslparse import warning
|
||||||
|
|
||||||
class optimizer(object):
|
from lslrenamer import renamer
|
||||||
|
|
||||||
|
class optimizer(renamer):
|
||||||
|
|
||||||
# Default values per type when declaring variables
|
# Default values per type when declaring variables
|
||||||
DefaultValues = {'integer': 0, 'float': 0.0, 'string': u'',
|
DefaultValues = {'integer': 0, 'float': 0.0, 'string': u'',
|
||||||
|
@ -606,6 +608,9 @@ class optimizer(object):
|
||||||
else:
|
else:
|
||||||
self.FoldTree(tree, idx)
|
self.FoldTree(tree, idx)
|
||||||
|
|
||||||
|
if 'shrinknames' in options:
|
||||||
|
self.AssignNewNames()
|
||||||
|
|
||||||
treesymtab = (self.tree, self.symtab)
|
treesymtab = (self.tree, self.symtab)
|
||||||
del self.tree
|
del self.tree
|
||||||
del self.symtab
|
del self.symtab
|
||||||
|
|
|
@ -135,11 +135,16 @@ class outscript(object):
|
||||||
def dent(self):
|
def dent(self):
|
||||||
return self.indent * self.indentlevel
|
return self.indent * self.indentlevel
|
||||||
|
|
||||||
def FindName(self, node):
|
def FindName(self, node, scope = None):
|
||||||
try:
|
if scope is None:
|
||||||
return self.symtab[node['scope']][node['name']]['NewName']
|
# node is a node
|
||||||
except KeyError:
|
if 'scope' in node and'NewName' in self.symtab[node['scope']][node['name']]:
|
||||||
|
return self.symtab[node['scope']][node['name']]['NewName']
|
||||||
return node['name']
|
return node['name']
|
||||||
|
# node is a name
|
||||||
|
if 'NewName' in self.symtab[scope][node]:
|
||||||
|
return self.symtab[scope][node]['NewName']
|
||||||
|
return node
|
||||||
|
|
||||||
def OutIndented(self, node):
|
def OutIndented(self, node):
|
||||||
if node['nt'] != '{}':
|
if node['nt'] != '{}':
|
||||||
|
@ -300,7 +305,9 @@ class outscript(object):
|
||||||
if node['t'] is not None:
|
if node['t'] is not None:
|
||||||
ret += node['t'] + ' '
|
ret += node['t'] + ' '
|
||||||
ret += self.FindName(node) + '('
|
ret += self.FindName(node) + '('
|
||||||
ret += ', '.join(typ + ' ' + name for typ, name in zip(node['ptypes'], node['pnames']))
|
scope = node['pscope']
|
||||||
|
ret += ', '.join(typ + ' ' + self.FindName(name, scope)
|
||||||
|
for typ, name in zip(node['ptypes'], node['pnames']))
|
||||||
return ret + ')\n' + self.OutCode(child[0])
|
return ret + ')\n' + self.OutCode(child[0])
|
||||||
|
|
||||||
return self.dent() + self.OutExpr(node) + ';\n'
|
return self.dent() + self.OutExpr(node) + ';\n'
|
||||||
|
|
|
@ -638,8 +638,10 @@ class parser(object):
|
||||||
args = self.Parse_optional_expression_list(sym['ParamTypes'])
|
args = self.Parse_optional_expression_list(sym['ParamTypes'])
|
||||||
self.expect(')')
|
self.expect(')')
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
return {'nt':'FNCALL', 't':sym['Type'], 'name':name,
|
ret = {'nt':'FNCALL', 't':sym['Type'], 'name':name, 'ch':args}
|
||||||
'scope':self.scopeindex, 'ch':args}
|
if 'Scope' in sym:
|
||||||
|
ret['scope'] = sym['Scope']
|
||||||
|
return ret
|
||||||
if sym['Kind'] != 'v':
|
if sym['Kind'] != 'v':
|
||||||
raise EParseTypeMismatch(self)
|
raise EParseTypeMismatch(self)
|
||||||
typ = sym['Type']
|
typ = sym['Type']
|
||||||
|
@ -1170,7 +1172,7 @@ class parser(object):
|
||||||
x = random.randint(0, 16777215)
|
x = random.randint(0, 16777215)
|
||||||
unique += b64encode(chr(x>>16) + chr((x>>8)&255)
|
unique += b64encode(chr(x>>16) + chr((x>>8)&255)
|
||||||
+ chr(x&255)).replace('+', '_')
|
+ chr(x&255)).replace('+', '_')
|
||||||
if '/' not in unique and unique not in self.locallabels:
|
if '/' not in unique not in self.locallabels:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# Use the existing name. Faster and more readable.
|
# Use the existing name. Faster and more readable.
|
||||||
|
@ -1200,11 +1202,11 @@ class parser(object):
|
||||||
# It might still be a forward reference, so we add it to the
|
# It might still be a forward reference, so we add it to the
|
||||||
# list of things to look up when done
|
# list of things to look up when done
|
||||||
self.jump_lookups.append((name, self.scopeindex, self.errorpos, jumpnode))
|
self.jump_lookups.append((name, self.scopeindex, self.errorpos, jumpnode))
|
||||||
|
else:
|
||||||
|
jumpnode['scope'] = sym['Scope']
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
self.expect(';')
|
self.expect(';')
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
if sym is not None:
|
|
||||||
jumpnode['scope'] = sym['Scope']
|
|
||||||
return jumpnode
|
return jumpnode
|
||||||
if tok0 == 'STATE':
|
if tok0 == 'STATE':
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
|
@ -1217,7 +1219,7 @@ class parser(object):
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
self.expect(';')
|
self.expect(';')
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
return {'nt':'STSW', 't':None, 'name':name}
|
return {'nt':'STSW', 't':None, 'name':name, 'scope':0}
|
||||||
if tok0 == 'RETURN':
|
if tok0 == 'RETURN':
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
if self.tok[0] == ';':
|
if self.tok[0] == ';':
|
||||||
|
@ -1609,7 +1611,7 @@ class parser(object):
|
||||||
events = self.Parse_events()
|
events = self.Parse_events()
|
||||||
|
|
||||||
self.expect('}')
|
self.expect('}')
|
||||||
self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'ch':events})
|
self.tree.append({'nt':'STDEF', 't':None, 'name':name, 'scope':0, 'ch':events})
|
||||||
self.NextToken()
|
self.NextToken()
|
||||||
|
|
||||||
def Parse_script(self):
|
def Parse_script(self):
|
||||||
|
@ -1633,10 +1635,11 @@ class parser(object):
|
||||||
# Check the pending jump targets
|
# Check the pending jump targets
|
||||||
for tgt in self.jump_lookups:
|
for tgt in self.jump_lookups:
|
||||||
self.scopeindex = tgt[1]
|
self.scopeindex = tgt[1]
|
||||||
if self.FindSymbolPartial(tgt[0], MustBeLabel = True) is None:
|
sym = self.FindSymbolPartial(tgt[0], MustBeLabel = True)
|
||||||
|
if sym is None:
|
||||||
self.errorpos = tgt[2]
|
self.errorpos = tgt[2]
|
||||||
raise EParseUndefined(self)
|
raise EParseUndefined(self)
|
||||||
tgt[3]['scope'] = tgt[1]
|
tgt[3]['scope'] = sym['Scope']
|
||||||
|
|
||||||
del self.jump_lookups # Finished with it.
|
del self.jump_lookups # Finished with it.
|
||||||
|
|
||||||
|
|
140
lslopt/lslrenamer.py
Normal file
140
lslopt/lslrenamer.py
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
import random
|
||||||
|
from base64 import b64encode
|
||||||
|
|
||||||
|
class renamer(object):
|
||||||
|
CharSet1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_'
|
||||||
|
CharSet2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789'
|
||||||
|
# TODO: Derive these from builtins.txt somehow.
|
||||||
|
KwByLen = ((), (), ('do', 'if', 'PI'), ('for', 'key', 'EOF'),
|
||||||
|
('jump', 'else', 'list', 'TRUE', 'LOOP'))
|
||||||
|
def GetNextShortest(self):
|
||||||
|
"""Generate the next shortest possible identifier"""
|
||||||
|
while True:
|
||||||
|
n = self.WordCntr
|
||||||
|
self.WordCntr += 1
|
||||||
|
ret = self.CharSet1[n % 53]
|
||||||
|
n //= 53
|
||||||
|
while n > 1:
|
||||||
|
ret += self.CharSet2[n % 63]
|
||||||
|
n //= 63
|
||||||
|
if ret not in self.KwByLen[len(ret)] and ret not in self.UsedNames:
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def AssignNewNames(self):
|
||||||
|
self.WordCntr = 53 # Initialize to length 1
|
||||||
|
|
||||||
|
# Names that can be reused without penalty. The initial set is there
|
||||||
|
# since the beginning. Others (e.g. Key) are created when some kinds
|
||||||
|
# of stuff are present, but we don't take so many risks.
|
||||||
|
ReusableNames = set(['LslLibrary', 'LslUserScript', 'System'])
|
||||||
|
|
||||||
|
# Names from ReusableNames that have already been used
|
||||||
|
self.UsedNames = set()
|
||||||
|
|
||||||
|
UsedLocals = set()
|
||||||
|
|
||||||
|
# Make a first pass to separate the symbols into three categories.
|
||||||
|
globalvars = []
|
||||||
|
states = []
|
||||||
|
functions = []
|
||||||
|
globaldefs = self.symtab[0]
|
||||||
|
for name in globaldefs:
|
||||||
|
if name == -1: continue
|
||||||
|
kind = globaldefs[name]['Kind']
|
||||||
|
if kind == 's':
|
||||||
|
states.append(name)
|
||||||
|
elif kind == 'f':
|
||||||
|
if 'Loc' in globaldefs[name]:
|
||||||
|
functions.append(name)
|
||||||
|
elif kind == 'v':
|
||||||
|
globalvars.append(name)
|
||||||
|
else:
|
||||||
|
assert False, 'Invalid kind at this scope: ' + kind
|
||||||
|
|
||||||
|
# We make three passes, one for states, then functions, then globals,
|
||||||
|
# in that order.
|
||||||
|
|
||||||
|
for name in states:
|
||||||
|
# States have top priority. Here's why. An internal event function
|
||||||
|
# name is made by concatenating an 'e', then the state name, then
|
||||||
|
# the event name, e.g. edefaultstate_entry. Since a new identifier
|
||||||
|
# having part of the state name is created for every event in that
|
||||||
|
# state, the shortest the state name, the least bytes it will use.
|
||||||
|
# Furthermore, a state switch instruction further adds an Unicode
|
||||||
|
# string (all other identifier names use one-byte strings), which
|
||||||
|
# is the more reason to shorten it as much as possible.
|
||||||
|
#
|
||||||
|
# Unfortunately, there isn't much that can be done about 'default'.
|
||||||
|
#
|
||||||
|
# The good side is that we get to reuse these names for variables
|
||||||
|
# without using extra space and without wasting single or double
|
||||||
|
# letter identifiers.
|
||||||
|
|
||||||
|
entry = globaldefs[name]
|
||||||
|
if name != 'default':
|
||||||
|
name = entry['NewName'] = self.GetNextShortest()
|
||||||
|
# Find also the event names it uses, to add them for reuse.
|
||||||
|
for node in self.tree[entry['Loc']]['ch']:
|
||||||
|
assert node['nt'] == 'FNDEF'
|
||||||
|
ReusableNames.add('e' + name + node['name'])
|
||||||
|
del states
|
||||||
|
|
||||||
|
for name in functions:
|
||||||
|
# Assign a new name. Internal function names get a 'g' prepended
|
||||||
|
# to them, so these are candidates for reuse too.
|
||||||
|
|
||||||
|
# Unfortunately, we won't find any reusable name starting with 'g'
|
||||||
|
# this early, so no point in searching.
|
||||||
|
|
||||||
|
short = globaldefs[name]['NewName'] = self.GetNextShortest()
|
||||||
|
ReusableNames.add('g' + short)
|
||||||
|
del functions
|
||||||
|
|
||||||
|
for name in globalvars:
|
||||||
|
# First, check if we have reusable names available.
|
||||||
|
if ReusableNames:
|
||||||
|
short = ReusableNames.pop()
|
||||||
|
self.UsedNames.add(short)
|
||||||
|
else:
|
||||||
|
short = self.GetNextShortest()
|
||||||
|
globaldefs[name]['NewName'] = short
|
||||||
|
|
||||||
|
# Do the same for function and event parameter names. Pure locals get
|
||||||
|
# long distinct names.
|
||||||
|
First = True
|
||||||
|
for table in self.symtab:
|
||||||
|
if First:
|
||||||
|
First = False
|
||||||
|
# Skip globals
|
||||||
|
continue
|
||||||
|
for name,sym in table.iteritems():
|
||||||
|
if name == -1: continue
|
||||||
|
if sym['Kind'] != 'v':
|
||||||
|
assert sym['Kind'] == 'l'
|
||||||
|
continue
|
||||||
|
if 'Param' in sym:
|
||||||
|
# Same procedure as for global vars
|
||||||
|
# Not the best strategy (using locally unique names would
|
||||||
|
# work optimally) but hey. At the time of writing there's
|
||||||
|
# no reference analysis. TODO: Implement.
|
||||||
|
if ReusableNames:
|
||||||
|
short = ReusableNames.pop()
|
||||||
|
self.UsedNames.add(short)
|
||||||
|
else:
|
||||||
|
short = self.GetNextShortest()
|
||||||
|
table[name]['NewName'] = short
|
||||||
|
else:
|
||||||
|
# Generate new identifier
|
||||||
|
while True:
|
||||||
|
x = random.randint(0, 16777215)
|
||||||
|
unique = 'L_' + b64encode(chr(x>>16) + chr((x>>8)&255)
|
||||||
|
+ chr(x&255)).replace('+', '_')
|
||||||
|
x = random.randint(0, 16777215)
|
||||||
|
unique += b64encode(chr(x>>16) + chr((x>>8)&255)
|
||||||
|
+ chr(x&255)).replace('+', '_')
|
||||||
|
if '/' not in unique not in UsedLocals:
|
||||||
|
break
|
||||||
|
UsedLocals.add(unique)
|
||||||
|
table[name]['NewName'] = unique
|
||||||
|
|
||||||
|
del globalvars
|
4
main.py
4
main.py
|
@ -50,6 +50,10 @@ Options (+ means active by default, - means inactive by default):
|
||||||
will go to the last label with that name). This flag
|
will go to the last label with that name). This flag
|
||||||
works around that limitation by replacing the names of
|
works around that limitation by replacing the names of
|
||||||
the labels in the output with unique ones.
|
the labels in the output with unique ones.
|
||||||
|
shrinknames - Reduces script memory by shrinking identifiers. In the
|
||||||
|
process, it turns the script into unreadable gibberish,
|
||||||
|
hard to debug, but this gets big savings for complex
|
||||||
|
scripts.
|
||||||
|
|
||||||
Note that the optimizer doesn't reorder expressions to fold constants. This
|
Note that the optimizer doesn't reorder expressions to fold constants. This
|
||||||
means that e.g. a + 3 + 5 is not optimized to a + 8; however a + (3 + 5) is.
|
means that e.g. a + 3 + 5 is not optimized to a + 8; however a + (3 + 5) is.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue