Progress towards dual Python 2 & 3

This commit is contained in:
Sei Lisa 2020-11-09 02:28:57 +01:00
parent dde9577cea
commit f8cf78dfac
10 changed files with 100 additions and 80 deletions

View file

@ -19,6 +19,7 @@
from lslopt import lslfuncs
from lslopt.lslcommon import nr
from strutil import xrange
class deadcode(object):
@ -530,7 +531,7 @@ class deadcode(object):
self.MarkReferences(statedef)
# Track removal of global lines, to reasign locations later.
LocMap = range(len(self.tree))
LocMap = list(range(len(self.tree)))
GlobalDeletions = []

View file

@ -23,6 +23,7 @@ from lslopt import lslfuncs
from lslopt.lslfuncs import ZERO_VECTOR, ZERO_ROTATION
import math
from lslopt.lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
from strutil import xrange, unicode
# TODO: Remove special handling of @ within IF,WHILE,FOR,DO

View file

@ -25,6 +25,7 @@ from lslopt.lslcommon import nr
#import math
#from lslparse import warning
#from lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
from strutil import xrange
class rec:
def __init__(self, **init):

View file

@ -20,6 +20,7 @@
import sys, re
from lslopt.lslcommon import types, warning, Vector, Quaternion
from lslopt import lslcommon, lslfuncs
from strutil import *
def LoadLibrary(builtins = None, fndata = None):
"""Load builtins.txt and fndata.txt (or the given filenames) and return
@ -40,27 +41,27 @@ def LoadLibrary(builtins = None, fndata = None):
# Library read code
parse_lin_re = re.compile(
br'^\s*([a-z]+)\s+'
br'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
br'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
br'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
br')?\s*\)\s*$'
br'|'
br'^\s*const\s+([a-z]+)'
br'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
br'|'
br'^\s*(?:#.*|//.*)?$')
parse_arg_re = re.compile(br'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
parse_fp_re = re.compile(br'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
br'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
parse_int_re = re.compile(br'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
r'^\s*([a-z]+)\s+'
r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
r'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
r'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
r')?\s*\)\s*$'
r'|'
r'^\s*const\s+([a-z]+)'
r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
r'|'
r'^\s*(?:#.*|//.*)?$')
parse_arg_re = re.compile(r'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
parse_fp_re = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
r'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
parse_int_re = re.compile(r'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
parse_str_re = re.compile(u'^"((?:[^"\\\\]|\\\\.)*)"$')
f = open(builtins, 'rb')
f = open(builtins, 'r')
try:
linenum = 0
try:
ubuiltins = builtins.decode(sys.getfilesystemencoding())
ubuiltins = str2u(builtins, sys.getfilesystemencoding())
except UnicodeDecodeError:
# This is just a guess at the filename encoding.
ubuiltins = builtins.decode('iso-8859-15')
@ -70,7 +71,7 @@ def LoadLibrary(builtins = None, fndata = None):
if not line: break
if line[-1] == '\n': line = line[:-1]
try:
uline = line.decode('utf8')
uline = str2u(line, 'utf8')
except UnicodeDecodeError:
warning(u"Bad Unicode in %s line %d" % (ubuiltins, linenum))
continue
@ -153,7 +154,7 @@ def LoadLibrary(builtins = None, fndata = None):
elif typ == 'float':
value = lslfuncs.F32(float(value))
elif typ == 'string':
value = value.decode('utf8')
value = str2u(value, 'utf8')
if parse_str_re.search(value):
esc = False
tmp = value[1:-1]
@ -242,14 +243,14 @@ def LoadLibrary(builtins = None, fndata = None):
# TODO: "quaternion" doesn't compare equal to "rotation" even if they are
# equivalent. Canonicalize it before comparison, to avoid false
# reports of mismatches.
f = open(fndata, 'rb')
f = open(fndata, 'r')
try:
linenum = 0
curr_fn = None
curr_ty = None
skipping = False
try:
ufndata = fndata.decode(sys.getfilesystemencoding())
ufndata = str2u(fndata, sys.getfilesystemencoding())
except UnicodeDecodeError:
# This is just a guess at the filename encoding.
ufndata = fndata.decode('iso-8859-15')
@ -259,7 +260,7 @@ def LoadLibrary(builtins = None, fndata = None):
if not line: break
if line[-1] == '\n': line = line[:-1]
try:
uline = line.decode('utf8')
uline = str2u(line, 'utf8')
except UnicodeDecodeError:
warning(u"Bad Unicode in %s line %d" % (ufndata, linenum))
continue
@ -272,7 +273,7 @@ def LoadLibrary(builtins = None, fndata = None):
if match_fn and (rettype in ('void', 'event') or rettype in types):
skipping = True # until proven otherwise
name = match_fn.group(2)
uname = name.decode('utf8')
uname = str2u(name, 'utf8')
if (rettype == 'event' and name not in events
or rettype != 'event' and name not in functions
):
@ -347,7 +348,7 @@ def LoadLibrary(builtins = None, fndata = None):
skipping = True
continue
if not skipping:
ucurr_fn = curr_fn.decode('utf8')
ucurr_fn = str2u(curr_fn, 'utf8')
if match_flag.group(1):
# SEF
# We don't handle conditions yet. Take the
@ -438,7 +439,7 @@ def LoadLibrary(builtins = None, fndata = None):
# Post-checks
for i in functions:
ui = i.decode('utf8')
ui = str2u(i, 'utf8')
if 'NeedsData' in functions[i]:
del functions[i]['NeedsData']
warning(u"Library data, file %s: Function %s has no data."
@ -455,7 +456,7 @@ def LoadLibrary(builtins = None, fndata = None):
u" delay. Removing SEF." % ui)
del functions[i]['SEF']
for i in events:
ui = i.decode('utf8')
ui = str2u(i, 'utf8')
if 'NeedsData' in events[i]:
del events[i]['NeedsData']
warning(u"Library data, file %s: Event %s has no data."

View file

@ -21,6 +21,7 @@ from lslopt import lslfuncs
from lslopt import lslcommon
from lslopt.lslcommon import Key, Vector, Quaternion, warning
from math import copysign
from strutil import *
debugScopes = False
@ -62,7 +63,7 @@ class outscript(object):
" spaces by the viewer when copy-pasting the code"
" (disable this warning by disabling the 'warntabs'"
" option).")
return pfx + '"' + value.encode('utf8').replace('\\','\\\\') \
return pfx + '"' + any2str(value, 'utf8').replace('\\','\\\\') \
.replace('"','\\"').replace('\n','\\n') + '"' + sfx
if tvalue == int:
if value < 0 and not self.globalmode and self.optsigns:

View file

@ -29,6 +29,10 @@ import re
# Note this module was basically written from bottom to top, which may help
# reading it.
WHITESPACE_CHARS = frozenset({' ', '\r', '\n', '\x0B', '\x0C'})
SINGLE_SYMBOLS = frozenset({'.', ';', '{', '}', ',', '=', '(', ')', '-', '+',
'*', '/', '%', '@', ':', '<', '>', '[', ']', '&', '|', '^', '~', '!'})
def isdigit(c):
return '0' <= c <= '9'
@ -48,7 +52,7 @@ def GetErrLineCol(parser):
# Find start of current line
lstart = parser.script.rfind('\n', 0, errorpos) + 1
# Find zero-based column number in characters
cno = len(parser.script[lstart:errorpos].decode('utf8'))
cno = len(any2u(parser.script[lstart:errorpos], 'utf8'))
# Find in #line directives list
i = len(parser.linedir)
filename = '<stdin>' # value to return if there's no #line before lno
@ -75,7 +79,7 @@ class EParse(Exception):
if parser.emap and filename == '<stdin>':
filename = parser.filename
filename = (filename.decode('utf8', 'replace')
filename = (str2u(filename, 'utf8')
.replace(u'\\', u'\\\\')
.replace(u'"', u'\\"')
)
@ -543,7 +547,7 @@ class parser(object):
# self.linestart is related to the preprocessor, therefore we
# check the characters that are relevant for standard C.
if c not in ' \n\r\x0B\x0C':
if c not in WHITESPACE_CHARS:
self.linestart = False
# Process strings
@ -584,7 +588,7 @@ class parser(object):
if is_string:
self.pos += 1
return ('STRING_VALUE', lslfuncs.zstr(strliteral.decode('utf8')))
return ('STRING_VALUE', lslfuncs.zstr(str2u(strliteral, 'utf8')))
# fall through (to consider the L or to ignore the ")
if isalpha_(c):
@ -705,7 +709,7 @@ class parser(object):
return (self.script[self.pos-3:self.pos],)
return (self.script[self.pos-2:self.pos],)
if c in '.;{},=()-+*/%@:<>[]&|^~!' and c != '':
if c in SINGLE_SYMBOLS:
return (c,)
if c == '\n':
@ -2801,8 +2805,7 @@ list lazy_list_set(list L, integer i, list v)
self.filename = filename
if type(script) is unicode:
script = script.encode('utf8')
script = any2str(script, 'utf8')
self.script = script
self.length = len(script)

View file

@ -23,6 +23,8 @@
#
# A side effect of this change is that the script becomes unreadable gibberish.
from strutil import xrange
class renamer(object):
CharSet1 = '_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
CharSet2 = '0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'

75
main.py
View file

@ -188,15 +188,15 @@ def PreparePreproc(script):
def ScriptHeader(script, avname):
if avname:
avname = b' - ' + avname
return (b'//start_unprocessed_text\n/*'
avname = ' - ' + avname
return ('//start_unprocessed_text\n/*'
# + re.sub(r'([*/])(?=[*|/])', r'\1|', script) # FS's algorithm
# HACK: This won't break strings containing ** or /* or // like URLs,
# while still being compatible with FS.
+ re.sub(br'([*/]\||\*(?=/))', br'\1|', script)
+ b'*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
b'//program_version LSL PyOptimizer v' + str2b(VERSION)
+ str2b(avname) + b'\n//mono\n\n')
+ re.sub(r'([*/]\||\*(?=/))', r'\1|', script)
+ '*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
'//program_version LSL PyOptimizer v' + VERSION
+ avname + '\n//mono\n\n')
def Usage(progname, about = None):
if about is None:
@ -453,7 +453,7 @@ def main(argv):
if chgfix[1:] not in validoptions:
Usage(argv[0], 'optimizer-options')
werr(u"\nError: Unrecognized"
u" optimizer option: %s\n" % chg.decode('utf8'))
u" optimizer option: %s\n" % str2u(chg, 'utf8'))
return 1
if chgfix[0] == '-':
options.discard(chgfix[1:])
@ -591,6 +591,28 @@ def main(argv):
f.close()
del f
# Transform to str and check Unicode validity
if type(script) is unicode:
script = u2str(script, 'utf8')
else:
try:
# Try converting the script to Unicode, to report any encoding
# errors with accurate line information.
tmp = UniConvScript(script, options,
fname if fname != '-' else '<stdin>',
emap).to_unicode()
# For Python 2, just report any errors and ignore the result.
# For Python 3, use the Unicode.
if python3:
script = tmp
del tmp
except EParse as e:
# We don't call ReportError to prevent problems due to
# displaying invalid UTF-8
werr(e.args[0] + u"\n")
return 1
# Now script is in native str format.
if script_header:
script_header = ScriptHeader(script, avname)
@ -598,7 +620,7 @@ def main(argv):
import time
tmp = time.time()
script_timestamp = time.strftime(
b'// Generated on %Y-%m-%dT%H:%M:%S.{0:06d}Z\n'
'// Generated on %Y-%m-%dT%H:%M:%S.{0:06d}Z\n'
.format(int(tmp % 1 * 1000000)), time.gmtime(tmp))
del tmp
@ -642,27 +664,11 @@ def main(argv):
# Append user arguments at the end to allow them to override defaults
preproc_cmdline += preproc_user_postargs
# Transform to bytes and check Unicode validity
if type(script) is unicode:
script = script.encode('utf8')
else:
try:
# Try converting the script to Unicode, to report any encoding
# errors with accurate line information. At this point we don't
# need the result.
UniConvScript(script, options,
fname if fname != '-' else '<stdin>',
emap).to_unicode()
except EParse as e:
# We don't call ReportError to prevent problems due to
# displaying invalid UTF-8
werr(e.args[0] + u"\n")
return 1
if preproc != 'none':
# PreparePreproc uses and returns Unicode string encoding.
script = u2b(PreparePreproc(any2u(script, 'utf8')), 'utf8')
# At this point, for the external preprocessor to work we need the
# script as a byte array, not as unicode, but it should be UTF-8.
script = PreparePreproc(script.decode('utf8')).encode('utf8')
if preproc == 'mcpp':
# As a special treatment for mcpp, we force it to output its
# macros so we can read if USE_xxx are defined. With GCC that
@ -680,6 +686,8 @@ def main(argv):
return status
del p, status
script = any2str(script, 'utf8')
# This method is very imperfect, in several senses. However, since
# it's applied to the output of the preprocessor, all of the
# concerns should be addressed:
@ -687,13 +695,13 @@ def main(argv):
# - Comments preceding the directive should not cause problems.
# e.g.: /* test */ #directive
# - #directive within a comment or string should be ignored.
for x in re.findall(br'(?:(?<=\n)|^)\s*#\s*define\s+('
br'USE_SWITCHES'
br'|USE_LAZY_LISTS'
br')(?:$|[^A-Za-z0-9_])', script, re.S):
if x == b'USE_SWITCHES':
for x in re.findall(r'(?:(?<=\n)|^)\s*#\s*define\s+('
r'USE_SWITCHES'
r'|USE_LAZY_LISTS'
r')(?:$|[^A-Za-z0-9_])', script, re.S):
if x == 'USE_SWITCHES':
options.add('enableswitch')
elif x == b'USE_LAZY_LISTS':
elif x == 'USE_LAZY_LISTS':
options.add('lazylists')
if not preshow:
@ -703,9 +711,10 @@ def main(argv):
lib = lslopt.lslloadlib.LoadLibrary(builtins, libdata)
p = parser(lib)
assert type(script) == str
try:
ts = p.parse(script, options,
fname if fname != '-' else '<stdin>')
'stdin' if fname == '-' else fname)
except EParse as e:
ReportError(script, e)
return 1

View file

@ -213,9 +213,9 @@ def invokeMain(argv, stdin = None):
lslcommon.IsCalc = False
lslcommon.Bugs.clear()
lslcommon.Bugs.add(6495)
save_stdin = sys.stdin
save_stdout = sys.stdout
save_stderr = sys.stderr
lslcommon.save_stdin = sys.stdin
lslcommon.save_stdout = sys.stdout
lslcommon.save_stderr = sys.stderr
stdout_output = None
stderr_output = None
try:
@ -231,9 +231,9 @@ def invokeMain(argv, stdin = None):
stdout_output = sys.stdout.getvalue()
stderr_output = sys.stderr.getvalue()
finally:
sys.stdin = save_stdin
sys.stdout = save_stdout
sys.stderr = save_stderr
sys.stdin = lslcommon.save_stdin
sys.stdout = lslcommon.save_stdout
sys.stderr = lslcommon.save_stderr
lslcommon.LSO = False
lslcommon.IsCalc = False
lslcommon.Bugs.clear()
@ -721,10 +721,9 @@ def generateScriptTests():
try:
if expected_stderr.startswith(b'REGEX\n'):
self.assertIsNotNone(
re.search(expected_stderr[6:],
actual_stderr.decode('utf8')
)
self.assertIsNotNone(re.search(
b2u(expected_stderr[6:], 'utf8'),
b2u(actual_stderr, 'utf8'))
)
else:
self.assertTrue(expected_stderr == actual_stderr)
@ -734,6 +733,7 @@ def generateScriptTests():
werr(expected_stderr)
werr(u'\n************ actual stderr:\n')
werr(actual_stderr)
# werr(('1' if difflib else '0')+('1' if expected_stderr else '0') + ('1' if actual_stderr else '0'))
if difflib and expected_stderr and actual_stderr \
and not expected_stderr.startswith(b'REGEX\n'):
werr(u'\n************ diff:\n'
@ -746,8 +746,9 @@ def generateScriptTests():
raise
try:
if expected_stdout.startswith(b'REGEX\n'):
self.assertIsNotNone(re.search(expected_stdout[6:],
actual_stdout))
self.assertIsNotNone(re.search(
b2u(expected_stdout[6:], 'utf8'),
b2u(actual_stdout, 'utf8')))
else:
self.assertTrue(expected_stdout == actual_stdout)
except AssertionError:

View file

@ -1,2 +1,2 @@
REGEX
IOError: (?:\[Errno 21\] Is a directory|\[Errno 13\] Permission denied): 'unit_tests/coverage.suite/actually-a-dir.d'
Error: (?:\[Errno 21\] Is a directory|\[Errno 13\] Permission denied): 'unit_tests/coverage.suite/actually-a-dir.d'