mirror of
https://github.com/Sei-Lisa/LSL-PyOptimizer
synced 2024-11-21 06:15:56 -07:00
d03de9a6be
Closes #30, but not the general problem of control characters in the source.
219 lines
9.7 KiB
Python
219 lines
9.7 KiB
Python
# (C) Copyright 2015-2024 Sei Lisa. All rights reserved.
|
|
#
|
|
# This file is part of LSL PyOptimizer.
|
|
#
|
|
# LSL PyOptimizer is free software: you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License as
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
# License, or (at your option) any later version.
|
|
#
|
|
# LSL PyOptimizer is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
# This module renames all kinds of variables. Global variables, user functions,
|
|
# state names, event names and function/event parameters all take memory space,
|
|
# so shrinking the identifiers as much as possible, when possible (i.e. not for
|
|
# events or the default state), ensures their memory usage will be minimized.
|
|
# It also reuses some preexisting names. Locals are renamed also so that they
|
|
# don't stand in the way of globals.
|
|
#
|
|
# A side effect of this change is that the script becomes unreadable gibberish.
|
|
|
|
from strutil import xrange
|
|
|
|
class renamer(object):
|
|
CharSet1 = '_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
|
CharSet2 = '0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
|
# As a special case, 'Pop' is not a keyword, but it's one of the predefined
|
|
# identifiers, therefore it must not be generated by GetNextShortest
|
|
# because it's already in the list of identifiers available from the start.
|
|
Kws = frozenset({'do', 'if', 'PI',
|
|
'for', 'key', 'EOF', 'Pop',
|
|
'jump', 'else', 'list', 'TRUE', 'LOOP', 'case'
|
|
})
|
|
def GetNextShortest(self):
|
|
"""Generate the next shortest possible identifier"""
|
|
while True:
|
|
ret = self.CharSet1[self.WordFirstChar]
|
|
for idx in self.WordRestOfChars:
|
|
ret += self.CharSet2[idx]
|
|
self.WordFirstChar += 1
|
|
if self.WordFirstChar >= len(self.CharSet1):
|
|
self.WordFirstChar = 0
|
|
for idx in xrange(len(self.WordRestOfChars)):
|
|
if self.WordRestOfChars[idx] < len(self.CharSet2)-1:
|
|
self.WordRestOfChars[idx] += 1
|
|
break
|
|
self.WordRestOfChars[idx] = 0
|
|
else:
|
|
self.WordRestOfChars.append(0)
|
|
|
|
if ret not in self.Kws and ret not in self.UsedNames:
|
|
return ret
|
|
|
|
def ShrinkNames(self, UsableAsGlobals = None, UsableAsParams = None):
|
|
"""Implements the shrinknames option."""
|
|
self.WordFirstChar = 0
|
|
self.WordRestOfChars = []
|
|
|
|
# Names that can be reused without penalty. This initial set is there
|
|
# since the beginning. Others are created depending on the code
|
|
# (e.g. Key when there are keys), but we don't take too many risks.
|
|
ReusableNames = set(['LslLibrary', 'Library', 'LslUserScript',
|
|
'System', 'UThread', 'UThreadStackFrame', 'Pop',
|
|
'IsRestoring', 'IsSaveDue', 'ResumeVoid',
|
|
])
|
|
if UsableAsGlobals is not None:
|
|
ReusableNames |= UsableAsGlobals
|
|
|
|
# Names from ReusableNames that have already been used
|
|
self.UsedNames = set()
|
|
|
|
# Make a preliminary pass to separate the symbols into three categories
|
|
globalvars = []
|
|
states = []
|
|
functions = []
|
|
globaldefs = self.symtab[0]
|
|
for name in globaldefs:
|
|
if name == -1: continue
|
|
kind = globaldefs[name]['Kind']
|
|
if kind == 's':
|
|
states.append(name)
|
|
elif kind == 'f':
|
|
if 'Loc' in globaldefs[name]:
|
|
functions.append(name)
|
|
elif kind == 'v':
|
|
globalvars.append(name)
|
|
elif kind == 'c':
|
|
pass
|
|
else:
|
|
assert False, 'Invalid kind at this scope: ' \
|
|
+ kind # pragma: no cover
|
|
|
|
# We make four passes, one for states, then function names, then
|
|
# global names, then parameter names and locals, in that order.
|
|
|
|
# State names are usable as short identifiers for parameters.
|
|
stateNames = []
|
|
|
|
for name in states:
|
|
# States have top priority. Here's why. An internal event function
|
|
# name is made by concatenating an 'e', then the state name, then
|
|
# the event name, e.g. edefaultstate_entry. Since a new identifier
|
|
# having part of the state name is created for every event in that
|
|
# state, the shorter the state name, the lesser bytes it will use.
|
|
# Furthermore, a state switch instruction also adds a Unicode
|
|
# string (all other identifier names use one-byte strings), which
|
|
# is the more reason to shorten it as much as possible.
|
|
#
|
|
# Unfortunately, there isn't much that can be done about 'default'.
|
|
#
|
|
# The good side is that we get to reuse these names for variables
|
|
# without using extra space and without wasting single or double
|
|
# letter identifiers.
|
|
|
|
entry = globaldefs[name]
|
|
if name != 'default':
|
|
name = entry['NewName'] = self.GetNextShortest()
|
|
stateNames.append(name)
|
|
# Find also the event names it uses, to add them for reuse.
|
|
for node in self.tree[entry['Loc']].ch:
|
|
assert node.nt == 'FNDEF'
|
|
event_name = node.name
|
|
# These events have their names translated.
|
|
if event_name == 'on_rez':
|
|
event_name = 'rez'
|
|
if event_name == 'listen':
|
|
event_name = 'chat'
|
|
if event_name == 'run_time_permissions':
|
|
# LSO internally translates run_time_permissions to 'chat'.
|
|
# But it doesn't include identifiers in the object anyway,
|
|
# so this is here for documentation purposes only.
|
|
#if lslcommon.LSO:
|
|
# event_name = 'chat'
|
|
#else:
|
|
event_name = 'run_time_perms'
|
|
if event_name == 'remote_data':
|
|
event_name = 'remote_event'
|
|
ReusableNames.add('e' + name + event_name)
|
|
del states
|
|
|
|
for name in functions:
|
|
# Assign a new name. Internally, function names get a 'g' prepended
|
|
# to them, so these are candidates for reuse too.
|
|
|
|
# Unfortunately, we won't find any reusable name starting with 'g'
|
|
# this early, so no point in searching.
|
|
|
|
short = globaldefs[name]['NewName'] = self.GetNextShortest()
|
|
ReusableNames.add('g' + short)
|
|
del functions
|
|
|
|
for name in globalvars:
|
|
# First, check if we have reusable names available.
|
|
if ReusableNames:
|
|
short = ReusableNames.pop()
|
|
self.UsedNames.add(short)
|
|
else:
|
|
short = self.GetNextShortest()
|
|
globaldefs[name]['NewName'] = short
|
|
|
|
# Do the same for function and event parameter names. Pure locals get
|
|
# long distinct names.
|
|
if UsableAsParams is not None:
|
|
ReusableNames |= UsableAsParams
|
|
First = True
|
|
restart = self.WordFirstChar
|
|
restartReusable = ReusableNames
|
|
ReusableNames = restartReusable.copy()
|
|
restartState = stateNames;
|
|
stateNames = restartState[:]
|
|
restartUsed = self.UsedNames;
|
|
self.UsedNames = restartUsed.copy()
|
|
for table in self.symtab:
|
|
if First:
|
|
First = False
|
|
# Skip globals
|
|
continue
|
|
InParams = False
|
|
for name,sym in table.items():
|
|
if name == -1: continue
|
|
if sym['Kind'] != 'v':
|
|
assert sym['Kind'] == 'l'
|
|
name = name # trick python to not optimize out the jump
|
|
continue
|
|
if 'Param' in sym:
|
|
if not InParams:
|
|
# Restart at every new parameter table.
|
|
# Parameter tables are isolated from each other.
|
|
InParams = True
|
|
self.WordFirstChar = restart
|
|
ReusableNames = restartReusable.copy()
|
|
stateNames = restartState[:]
|
|
self.UsedNames = restartUsed.copy()
|
|
# Same procedure as for global vars
|
|
if ReusableNames:
|
|
short = ReusableNames.pop()
|
|
self.UsedNames.add(short)
|
|
elif stateNames:
|
|
short = stateNames.pop()
|
|
# No need to add it to UsedNames because
|
|
# GetNextShortest will always start past it anyway.
|
|
else:
|
|
short = self.GetNextShortest()
|
|
table[name]['NewName'] = short
|
|
else:
|
|
# Generate new identifier, by prepending the four character
|
|
# string 'loc_'. This generates identifiers with five chars
|
|
# or more. We assume that is enough for them to stay safe
|
|
# from name collisions with globals and parameter names.
|
|
# Four letters allow for more than 1.4 million identifiers.
|
|
unique = 'loc_' + name
|
|
table[name]['NewName'] = unique
|
|
|
|
del globalvars
|