From fe2dd9a7210abfc0246b25427a12abdd8dfbbbbf Mon Sep 17 00:00:00 2001 From: Sei Lisa Date: Tue, 15 Jan 2019 20:27:02 +0100 Subject: [PATCH] First baby steps towards dual Python2+3 compatibility --- lslopt/lslbasefuncs.py | 54 +++++++++++++---- lslopt/lslcommon.py | 1 + lslopt/lsldeadcode.py | 4 +- lslopt/lslextrafuncs.py | 4 +- lslopt/lslfoldconst.py | 10 ++-- lslopt/lslfuncopt.py | 6 +- lslopt/lslfuncs.py | 6 +- lslopt/lsljson.py | 39 ++++++++---- lslopt/lsllastpass.py | 4 +- lslopt/lslloadlib.py | 34 +++++------ lslopt/lsloptimizer.py | 12 ++-- lslopt/lsloutput.py | 6 +- lslopt/lslparse.py | 9 +-- main.py | 89 ++++++++++++++-------------- run-tests.py | 128 +++++++++++++++++++++------------------- strutil.py | 85 ++++++++++++++++++++++++++ unit_tests/json.py | 3 +- 17 files changed, 319 insertions(+), 175 deletions(-) create mode 100644 strutil.py diff --git a/lslopt/lslbasefuncs.py b/lslopt/lslbasefuncs.py index 20cf12b..e1cae4c 100644 --- a/lslopt/lslbasefuncs.py +++ b/lslopt/lslbasefuncs.py @@ -34,12 +34,13 @@ # The JSON functions have been separated to their own module. import re -from lslcommon import * -import lslcommon +from lslopt.lslcommon import * +from lslopt import lslcommon from ctypes import c_float import math import hashlib from base64 import b64encode, b64decode +from strutil import * # Regular expressions used along the code. They are needed mainly because @@ -58,18 +59,49 @@ from base64 import b64encode, b64decode # as is (vector)"<1,inf,info>". The 1st gives <0,0,0>, the others <1,inf,inf>. # The lookahead (?!i) is essential for parsing them that way without extra code. # Note that '|' in REs is order-sensitive. -float_re = re.compile(ur'^\s*[+-]?(?:0(x)(?:[0-9a-f]+(?:\.[0-9a-f]*)?|\.[0-9a-f]+)(?:p[+-]?[0-9]+)?' - ur'|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?|inf|(nan))', - re.I) -vfloat_re = re.compile(ur'^\s*[+-]?(?:0(x)(?:[0-9a-f]+(?:\.[0-9a-f]*)?|\.[0-9a-f]+)(?:p[+-]?[0-9]+)?' - ur'|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?|infinity|inf(?!i)|(nan))', - re.I) +float_re = re.compile(str2u(r''' + ^\s*[+-]?(?: + 0(x)(?: # Hex float or hex int (captures the 'x') + [0-9a-f]+(?:\.[0-9a-f]*)? + |\.[0-9a-f]+ # Hex digits + )(?: + p[+-]?[0-9]+ # Hex float exponent + )? # (optional) + |(?: # Decimal float or decimal int + [0-9]+(?:\.[0-9]*)? + |\.[0-9]+ # Decimal digits + )(?: + e[+-]?[0-9]+ # Decimal float exponent + )? # (optional) + |inf # Infinity + |(nan) # NaN (captured) + ) + '''), re.I | re.X) +vfloat_re = re.compile(str2u(r''' + ^\s*[+-]?(?: + 0(x)(?: # Hex float or hex int (captures the 'x') + [0-9a-f]+(?:\.[0-9a-f]*)? + |\.[0-9a-f]+ # Hex digits + )(?: + p[+-]?[0-9]+ # Hex float exponent + )? # (optional) + |(?: # Decimal float or decimal int + [0-9]+(?:\.[0-9]*)? + |\.[0-9]+ # Decimal digits + )(?: + e[+-]?[0-9]+ # Decimal float exponent + )? # (optional) + |infinity|inf(?!i) # Infinity (the only difference with the above) + |(nan) # NaN (captured) + ) + '''), re.I | re.X) -int_re = re.compile(ur'^0(x)[0-9a-f]+|^\s*[+-]?[0-9]+', re.I) +int_re = re.compile(str2u(r'^0(x)[0-9a-f]+|^\s*[+-]?[0-9]+'), re.I) -key_re = re.compile(ur'^[0-9a-f]{8}(?:-[0-9a-f]{4}){4}[0-9a-f]{8}$', re.I) +key_re = re.compile(str2u(r'^[0-9a-f]{8}(?:-[0-9a-f]{4}){4}[0-9a-f]{8}$'), + re.I) -b64_re = re.compile(ur'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2,3})?') +b64_re = re.compile(str2u(r'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2,3})?')) ZERO_VECTOR = Vector((0.0, 0.0, 0.0)) ZERO_ROTATION = Quaternion((0.0, 0.0, 0.0, 1.0)) diff --git a/lslopt/lslcommon.py b/lslopt/lslcommon.py index e0e36c9..4f45604 100644 --- a/lslopt/lslcommon.py +++ b/lslopt/lslcommon.py @@ -18,6 +18,7 @@ # Classes, functions and variables for use of all modules. import sys +from strutil import * _exclusions = frozenset(('nt','t','name','value','ch', 'X','SEF')) diff --git a/lslopt/lsldeadcode.py b/lslopt/lsldeadcode.py index 59cf2fa..4f03d9c 100644 --- a/lslopt/lsldeadcode.py +++ b/lslopt/lsldeadcode.py @@ -17,8 +17,8 @@ # Dead Code Removal optimization -import lslfuncs -from lslcommon import nr +from lslopt import lslfuncs +from lslopt.lslcommon import nr class deadcode(object): diff --git a/lslopt/lslextrafuncs.py b/lslopt/lslextrafuncs.py index 8c412fa..a295727 100644 --- a/lslopt/lslextrafuncs.py +++ b/lslopt/lslextrafuncs.py @@ -17,8 +17,8 @@ # Extra functions that have predictable return values for certain arguments. -from lslcommon import Key, Vector #, Quaternion -from lslbasefuncs import ELSLCantCompute, fi,ff,fs,fk,v2f,q2f,fl, \ +from lslopt.lslcommon import Key, Vector #, Quaternion +from lslopt.lslbasefuncs import ELSLCantCompute, fi,ff,fs,fk,v2f,q2f,fl, \ NULL_KEY, ZERO_VECTOR, ZERO_ROTATION, \ TOUCH_INVALID_TEXCOORD, cond ff, q2f # keep pyflakes happy as these are not used diff --git a/lslopt/lslfoldconst.py b/lslopt/lslfoldconst.py index 2a9514d..ce3a0a7 100644 --- a/lslopt/lslfoldconst.py +++ b/lslopt/lslfoldconst.py @@ -17,12 +17,12 @@ # Constant folding and simplification of expressions and statements. -import lslcommon -from lslcommon import Vector, Quaternion, warning, nr -import lslfuncs -from lslfuncs import ZERO_VECTOR, ZERO_ROTATION +from lslopt import lslcommon +from lslopt.lslcommon import Vector, Quaternion, warning, nr +from lslopt import lslfuncs +from lslopt.lslfuncs import ZERO_VECTOR, ZERO_ROTATION import math -from lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup +from lslopt.lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup # TODO: Remove special handling of @ within IF,WHILE,FOR,DO diff --git a/lslopt/lslfuncopt.py b/lslopt/lslfuncopt.py index 65cf8d9..f4ec0d2 100644 --- a/lslopt/lslfuncopt.py +++ b/lslopt/lslfuncopt.py @@ -18,9 +18,9 @@ # Optimize calls to LSL library functions and parameters where possible # This is dependent on the LSL function library. -import lslcommon -from lslcommon import Key, Vector, Quaternion, nr -import lslfuncs +from lslopt import lslcommon +from lslopt.lslcommon import Key, Vector, Quaternion, nr +from lslopt import lslfuncs def OptimizeArgs(node, sym): """Transform function arguments to shorter equivalents where possible.""" diff --git a/lslopt/lslfuncs.py b/lslopt/lslfuncs.py index 5c6dff1..2263512 100644 --- a/lslopt/lslfuncs.py +++ b/lslopt/lslfuncs.py @@ -17,6 +17,6 @@ # Put all LSL functions together in one single module -from lslbasefuncs import * -from lsljson import * -from lslextrafuncs import * +from lslopt.lslbasefuncs import * +from lslopt.lsljson import * +from lslopt.lslextrafuncs import * diff --git a/lslopt/lsljson.py b/lslopt/lsljson.py index 15ed2c6..7f18e1b 100644 --- a/lslopt/lsljson.py +++ b/lslopt/lsljson.py @@ -19,8 +19,8 @@ import re import math -from lslcommon import * -from lslbasefuncs import llStringTrim, fs, fl, InternalTypecast +from lslopt.lslcommon import * +from lslopt.lslbasefuncs import llStringTrim, fs, fl, InternalTypecast # INCOMPATIBILITY NOTE: The JSON functions in SL have very weird behaviour # in corner cases. Despite our best efforts, that behaviour is not replicated @@ -44,8 +44,8 @@ JSON_DELETE = u'\uFDD8' JSON_APPEND = -1 jsonesc_re = re.compile(u'[\x08\x09\x0A\x0C\x0D"/\\\\]') -jsonesc_dict = {u'\x08':ur'\b', u'\x09':ur'\t', u'\x0A':ur'\n', u'\x0C':ur'\f', - u'\x0D':ur'\r', u'"':ur'\"', u'/':ur'\/', u'\\':ur'\\'} +jsonesc_dict = {u'\x08':u'\\b', u'\x09':u'\\t', u'\x0A':u'\\n', u'\x0C':u'\\f', + u'\x0D':u'\\r', u'"':u'\\"', u'/':u'\\/', u'\\':u'\\\\'} jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u'\x0D'} # LSL JSON numbers differ from standard JSON numbers in many respects: @@ -72,18 +72,37 @@ jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u # elements when appropriate. # Real JSON number parser: -#jsonnum_re = re.compile(ur'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?') +#jsonnum_re = re.compile(str2u( +# r'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?' +# )) # BUG-6466 active: -jsonnumbug_re = re.compile(ur'-?(?:[0-9]*([Ee])-?[0-9]*\.?[0-9]*|(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*))') +jsonnumbug_re = re.compile(str2u(r''' + -?(?: + [0-9]*([Ee])-?[0-9]*\.?[0-9]* + |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*) + ) + '''), re.X) # BUG-6466 fixed: # The new RE is just a modified version of the crap, allowing + exponents and # disallowing zeros, sometimes even when legal (e.g. 0e0) -#jsonnum_re = re.compile(ur'-?(?:(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]*|(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*))') +#jsonnum_re = re.compile(str2u(r''' +# -?(?: +# (?=[1-9]|\.(?:[^e]|$) +# |0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]* +# |(?=[1-9]|\.(?:[^e]|$) +# |0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*) +# ) +# '''), re.X) # They've fixed BUG-6657 by bringing BUG-6466 back to life. -jsonnum_re = re.compile(ur'-?(?:[0-9]*([Ee])-?[0-9]*\.?[0-9]*|(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*))') +jsonnum_re = re.compile(str2u(r''' + -?(?: + [0-9]*([Ee])-?[0-9]*\.?[0-9]* + |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*) + ) + '''), re.X) -jsonstring_re = re.compile(ur'"(?:[^"\\]|\\.)*"') +jsonstring_re = re.compile(str2u(r'"(?:[^"\\]|\\.)*"')) # This might need some explanation. The ] and - are included in the first # set, the ] in the first after the ^ and the - in the last positions of @@ -91,7 +110,7 @@ jsonstring_re = re.compile(ur'"(?:[^"\\]|\\.)*"') # though it confuses things. The set comprises any character not in # -{}[],:"0123456789 # The second set comprises zero or more characters not in ,:]} -#word_re = re.compile(ur'[^][{}0-9",:-][^]},:]*') +#word_re = re.compile(str2u(r'[^][{}0-9",:-][^]},:]*')) # Screw that, we're using just a fallback. jsoncatchall_re = re.compile(u'(.*?)[\x09\x0A\x0B\x0C\x0D ]*(?:[]},]|$)') diff --git a/lslopt/lsllastpass.py b/lslopt/lsllastpass.py index 5d541b4..f950596 100644 --- a/lslopt/lsllastpass.py +++ b/lslopt/lsllastpass.py @@ -17,8 +17,8 @@ # Optimizations that have a negative effect on other stages. -import lslcommon -from lslcommon import nr +from lslopt import lslcommon +from lslopt.lslcommon import nr #from lslcommon import Vector, Quaternion #import lslfuncs #from lslfuncs import ZERO_VECTOR, ZERO_ROTATION diff --git a/lslopt/lslloadlib.py b/lslopt/lslloadlib.py index e755a02..c08f325 100644 --- a/lslopt/lslloadlib.py +++ b/lslopt/lslloadlib.py @@ -18,8 +18,8 @@ # Load the builtins and function properties. import sys, re -from lslcommon import types, warning, Vector, Quaternion -import lslcommon, lslfuncs +from lslopt.lslcommon import types, warning, Vector, Quaternion +from lslopt import lslcommon, lslfuncs def LoadLibrary(builtins = None, fndata = None): """Load builtins.txt and fndata.txt (or the given filenames) and return @@ -40,21 +40,21 @@ def LoadLibrary(builtins = None, fndata = None): # Library read code parse_lin_re = re.compile( - r'^\s*([a-z]+)\s+' - r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*(' - r'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*' - r'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*' - r')?\s*\)\s*$' - r'|' - r'^\s*const\s+([a-z]+)' - r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$' - r'|' - r'^\s*(?:#.*|//.*)?$') - parse_arg_re = re.compile(r'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$') - parse_fp_re = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*' - r'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$') - parse_int_re = re.compile(r'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$') - parse_str_re = re.compile(ur'^"((?:[^"\\]|\\.)*)"$') + br'^\s*([a-z]+)\s+' + br'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*(' + br'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*' + br'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*' + br')?\s*\)\s*$' + br'|' + br'^\s*const\s+([a-z]+)' + br'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$' + br'|' + br'^\s*(?:#.*|//.*)?$') + parse_arg_re = re.compile(br'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$') + parse_fp_re = re.compile(br'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*' + br'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$') + parse_int_re = re.compile(br'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$') + parse_str_re = re.compile(u'^"((?:[^"\\\\]|\\\\.)*)"$') f = open(builtins, 'rb') try: diff --git a/lslopt/lsloptimizer.py b/lslopt/lsloptimizer.py index 0d61af4..3f332de 100644 --- a/lslopt/lsloptimizer.py +++ b/lslopt/lsloptimizer.py @@ -17,13 +17,13 @@ # Optimizer class that wraps and calls the other parts. -import lslfuncs +from lslopt import lslfuncs -from lslcommon import nr -from lslfoldconst import foldconst -from lslrenamer import renamer -from lsldeadcode import deadcode -from lsllastpass import lastpass +from lslopt.lslcommon import nr +from lslopt.lslfoldconst import foldconst +from lslopt.lslrenamer import renamer +from lslopt.lsldeadcode import deadcode +from lslopt.lsllastpass import lastpass class optimizer(foldconst, renamer, deadcode, lastpass): diff --git a/lslopt/lsloutput.py b/lslopt/lsloutput.py index 15854c6..1ea9658 100644 --- a/lslopt/lsloutput.py +++ b/lslopt/lsloutput.py @@ -17,9 +17,9 @@ # Convert an abstract syntax tree + symbol table back to a script as text. -import lslfuncs -import lslcommon -from lslcommon import Key, Vector, Quaternion, warning +from lslopt import lslfuncs +from lslopt import lslcommon +from lslopt.lslcommon import Key, Vector, Quaternion, warning from math import copysign debugScopes = False diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py index f99c310..5cf6300 100644 --- a/lslopt/lslparse.py +++ b/lslopt/lslparse.py @@ -20,8 +20,9 @@ # TODO: Add info to be able to propagate error position to the source. -from lslcommon import Key, Vector, Quaternion, types, nr -import lslcommon, lslfuncs +from lslopt.lslcommon import Key, Vector, Quaternion, types, nr +from lslopt import lslcommon, lslfuncs +from strutil import * import re # Note this module was basically written from bottom to top, which may help @@ -70,8 +71,8 @@ class EParse(Exception): self.errorpos = parser.errorpos self.lno, self.cno, self.fname = GetErrLineCol(parser) filename = (self.fname.decode('utf8', 'replace') - .replace(u'\\', ur'\\') - .replace(u'"', ur'\"') + .replace(u'\\', u'\\\\') + .replace(u'"', u'\\"') ) if parser.processpre and filename != '': diff --git a/main.py b/main.py index 8a98803..b1c7ce8 100755 --- a/main.py +++ b/main.py @@ -31,6 +31,7 @@ from lslopt.lsloptimizer import optimizer import sys, os, getopt, re import lslopt.lslcommon import lslopt.lslloadlib +from strutil import * VERSION = '0.3.0beta' @@ -44,7 +45,7 @@ def ReportError(script, e): # When the encoding of stderr is unknown (e.g. when redirected to a file), # output will be encoded in UTF-8; otherwise the terminal's encoding will # be used. - enc = sys.stderr.encoding or 'utf8' + enc = getattr(sys.stderr, 'encoding', 'utf8') # Synchronize the UTF-8 encoded line with the output line in the # terminal's encoding. We need to compensate for the fact that the @@ -58,15 +59,15 @@ def ReportError(script, e): # Write the whole line in the target encoding. err_line = script[linestart:lineend] + b'\n' - sys.stderr.write(err_line.decode('utf8').encode(enc, 'backslashreplace')) - sys.stderr.write(u" " * cno + u"^\n") - sys.stderr.write(e.args[0] + u"\n") + werr(err_line.decode('utf8')) + werr(" " * cno + "^\n") + werr(e.args[0] + u"\n") class UniConvScript(object): """Converts the script to Unicode, setting the properties required by EParse to report a meaningful error position. """ - def __init__(self, script, options = (), filename = ''): + def __init__(self, script, options = (), filename = b''): self.linedir = [] self.filename = filename # We don't interpret #line here. In case of an encode error, @@ -118,29 +119,29 @@ def PreparePreproc(script): # least surprise seems to suggest to accept valid LSL strings as LSL # instead of reproducing that C quirk. This also matches what FS is doing # currently, so it's good for compatibility. - tok = re.compile( - ur'(?:' - ur'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/' - ur'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n' - ur'|[^"]' - ur')+' - ur'|"' - , re.S) + tok = re.compile(str2u( + r'(?:' + r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/' + r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n' + r'|[^"]' + r')+' + r'|"' + ), re.S) # RE used inside strings. - tok2 = re.compile( - ur'(?:' - ur"\?\?[='()!<>-]" # valid trigraph except ??/ (backslash) - ur"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])" + tok2 = re.compile(str2u( + r'(?:' + r"\?\?[='()!<>-]" # valid trigraph except ??/ (backslash) + r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])" # backslash trigraph or actual backslash, # followed by any trigraph or non-newline - ur'|(?!\?\?/\n|\\\n|"|\n).' + r'|(?!\?\?/\n|\\\n|"|\n).' # any character that doesn't start a trigraph/ # backslash escape followed by a newline # or is a newline or double quote, as we're # interested in all those individually. - ur')+' # as many of those as possible - ur'|\?\?/\n|\\\n|\n|"' # or any of those individually - ) + r')+' # as many of those as possible + r'|\?\?/\n|\\\n|\n|"' # or any of those individually + )) pos = 0 match = tok.search(script, pos) @@ -155,24 +156,24 @@ def PreparePreproc(script): matched2 = match2.group(0) pos += len(matched2) - if matched2 == u'\\\n' or matched2 == u'??/\n': + if matched2 == b'\\\n' or matched2 == b'??/\n': nlines += 1 col = 0 match2 = tok2.search(script, pos) continue - if matched2 == u'"': + if matched2 == b'"': if nlines: - if script[pos:pos+1] == u'\n': + if script[pos:pos+1] == b'\n': col = -1 # don't add spaces if not necessary # col misses the quote added here, so add 1 - s += u'"' + u'\n'*nlines + u' '*(col+1) + s += b'"' + b'\n'*nlines + b' '*(col+1) else: - s += u'"' + s += b'"' break - if matched2 == u'\n': + if matched2 == b'\n': nlines += 1 col = 0 - s += u'\\n' + s += b'\\n' else: col += len(matched2) s += matched2 @@ -186,20 +187,20 @@ def PreparePreproc(script): def ScriptHeader(script, avname): if avname: - avname = ' - ' + avname - return ('//start_unprocessed_text\n/*' + avname = b' - ' + avname + return (b'//start_unprocessed_text\n/*' # + re.sub(r'([*/])(?=[*|/])', r'\1|', script) # FS's algorithm # HACK: This won't break strings containing ** or /* or // like URLs, # while still being compatible with FS. - + re.sub(r'([*/]\||\*(?=/))', r'\1|', script) - + '*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n' - '//program_version LSL PyOptimizer v' + VERSION + avname - + '\n//mono\n\n') + + re.sub(br'([*/]\||\*(?=/))', br'\1|', script) + + b'*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n' + b'//program_version LSL PyOptimizer v' + str2b(VERSION) + + str2b(avname) + b'\n//mono\n\n') def Usage(progname, about = None): if about is None: - sys.stderr.write( -ur"""LSL optimizer v{version} + werr( +u"""LSL optimizer v{version} (C) Copyright 2015-2019 Sei Lisa. All rights reserved. @@ -253,12 +254,12 @@ Preprocessor modes: Normally, running the preprocessor needs the option 'processpre' active, to make the output readable by the optimizer. This option is active by default. -""".format(progname=progname, version=VERSION)) +""".format(progname=str2u(progname), version=str2u(VERSION))) return if about == 'optimizer-options': - sys.stderr.write( -ur""" + werr( +u""" Optimizer control options. + means active by default, - means inactive by default. Case insensitive. @@ -363,7 +364,7 @@ For example: {progname} -O -DCR,+BreakCont scriptname.lsl would turn off dead code removal (which is active by default) and turn on the break/continue syntax extension (which is inactive by default). -""".format(progname=progname)) +""".format(progname=str2u(progname))) return validoptions = frozenset(('extendedglobalexpr','breakcont','extendedtypecast', @@ -405,7 +406,7 @@ def main(argv): 'libdata=')) except getopt.GetoptError as e: Usage(argv[0]) - sys.stderr.write(u"\nError: %s\n" % str(e).decode('utf8', 'replace')) + werr(u"\nError: %s\n" % str(e).decode('utf8', 'replace')) return 1 outfile = '-' @@ -462,7 +463,7 @@ def main(argv): return 0 elif opt == '--version': - sys.stdout.write('LSL PyOptimizer version %s\n' % VERSION) + wout(u'LSL PyOptimizer version %s\n' % str2u(VERSION)) return 0 elif opt in ('-o', '--output'): @@ -558,7 +559,7 @@ def main(argv): fname = args[0] if args else None if fname is None: Usage(argv[0]) - sys.stderr.write(u"\nError: Input file not specified. Use -" + werr(u"\nError: Input file not specified. Use -" u" if you want to use stdin.\n") return 1 @@ -644,7 +645,7 @@ def main(argv): except EParse as e: # We don't call ReportError to prevent problems due to # displaying invalid UTF-8 - sys.stderr.write(e.args[0] + u"\n") + werr(e.args[0] + u"\n") return 1 if preproc != 'none': diff --git a/run-tests.py b/run-tests.py index 2f391fb..096616d 100755 --- a/run-tests.py +++ b/run-tests.py @@ -56,9 +56,13 @@ try: import difflib except ImportError: difflib = None -import StringIO as StringStream +if sys.hexversion < 0x3000000: + from StringIO import StringIO as StringStream +else: + from io import BytesIO as StringStream from lslopt import lslcommon,lslfuncs,lslparse,lsloutput,lslloadlib from lslopt.lslcommon import nr +from strutil import * class EArgError(Exception): pass @@ -89,25 +93,25 @@ def parseArgs(s): State = Space p = 0 Len = len(s) - arg = '' + arg = b'' while p < Len: - c = s[p] + c = s[p:p+1] p += 1 if State in (Space, Normal): - if c == '\\': + if c == b'\\': State = NBackslash if State == Normal else SBackslash - elif c == '"': + elif c == b'"': State = DQuote - elif c == "'": + elif c == b"'": State = SQuote - elif c in (' ', '\t'): + elif c in (b' ', b'\t'): if State == Normal: State = Space args.append(arg) - arg = '' + arg = b'' # else remain in the 'Space' state - elif c == '\n': + elif c == b'\n': break else: State = Normal @@ -118,20 +122,20 @@ def parseArgs(s): else Space if State == SBackslash else Normal) else: - if State == DQBackslash and c not in ('"', '`', '$', '\\'): - arg += '\\' + if State == DQBackslash and c not in (b'"', b'`', b'$', b'\\'): + arg += b'\\' arg += c State = DQuote if State == DQBackslash else Normal elif State == DQuote: - if c == '\\': + if c == b'\\': State = DQBackslash # ` and $ are not interpreted by this parser. - elif c == '"': + elif c == b'"': State = Normal else: arg += c elif State == SQuote: - if c == "'": + if c == b"'": State = Normal else: arg += c @@ -185,7 +189,7 @@ def parseArgs(s): def tryRead(fn): result = None try: - f = open(fn, 'r') + f = open(fn, 'rb') try: result = f.read() finally: @@ -197,12 +201,9 @@ def tryRead(fn): # In StringIO, mixing unicode and str causes problems with non-ASCII chars. # Avoid it by overriding the write method, to always encode unicode as UTF-8. -class StrUTF8IO(StringStream.StringIO): +class StrUTF8IO(StringStream): def write(self, s): - if type(s) == unicode: - StringStream.StringIO.write(self, s.encode('utf8')) - else: - StringStream.StringIO.write(self, s) + StringStream.write(self, any2b(s)) def invokeMain(argv, stdin = None): """Invoke main.main, substituting stdin, stdout, stderr. @@ -218,7 +219,7 @@ def invokeMain(argv, stdin = None): stdout_output = None stderr_output = None try: - sys.stdin = StringStream.StringIO(stdin) + sys.stdin = StringStream(stdin) sys.stdout = StrUTF8IO() sys.stderr = StrUTF8IO() sys.stdin.encoding = 'utf8' @@ -314,8 +315,10 @@ class UnitTestRegression(UnitTestCase): stdout_output = False stderr_output = False try: - sys.stdout = StringStream.StringIO() - sys.stderr = StringStream.StringIO() + sys.stdout = StringStream() + sys.stdout.encoding = 'utf8' + sys.stderr = StringStream() + sys.stderr.encoding = 'utf8' errs = json.run_tests() stdout_output = sys.stdout.getvalue() stderr_output = sys.stderr.getvalue() @@ -439,7 +442,8 @@ class UnitTestCoverage(UnitTestCase): self.assertEqual(repr(lslfuncs.q2f(lslcommon.Quaternion((1,0,0,0)))), 'Quaternion((1.0, 0.0, 0.0, 0.0))') # Key repr coverage - self.assertEqual(repr(lslcommon.Key(u'')), "Key(u'')") + self.assertEqual(repr(lslcommon.Key(u'')), "Key(u'')" + if str != unicode else "Key('')") # string + key coverage self.assertEqual(lslfuncs.add(u'a', lslcommon.Key(u'b')), u'ab') @@ -684,8 +688,8 @@ def generateScriptTests(): def makeTestFunction(fbase, suite): def TestFunction(self): stdin = tryRead(fbase + '.lsl') or '' - expected_stdout = tryRead(fbase + '.out') or '' - expected_stderr = tryRead(fbase + '.err') or '' + expected_stdout = tryRead(fbase + '.out') or b'' + expected_stderr = tryRead(fbase + '.err') or b'' runargs = (parseArgs(tryRead(fbase + '.run')) or (['main.py', '-y', '-'] if suite != 'Expr' else ['main.py', @@ -694,18 +698,18 @@ def generateScriptTests(): ',addstrings,expr', '-y', '-'])) - sys.stderr.write("\nRunning test %s: " % fbase) + werr(u"\nRunning test %s: " % any2u(fbase)) actual_stdout, actual_stderr = invokeMain(runargs, stdin) - actual_stdout = (actual_stdout.replace('\r','\r\n') - .replace('\r\n\n','\n') - .replace('\r\n','\n')) + actual_stdout = (actual_stdout.replace(b'\r',b'\r\n') + .replace(b'\r\n\n',b'\n') + .replace(b'\r\n',b'\n')) - actual_stderr = (actual_stderr.replace('\r','\r\n') - .replace('\r\n\n','\n') - .replace('\r\n','\n')) + actual_stderr = (actual_stderr.replace(b'\r',b'\r\n') + .replace(b'\r\n\n',b'\n') + .replace(b'\r\n',b'\n')) try: - if expected_stderr.startswith('REGEX\n'): + if expected_stderr.startswith(b'REGEX\n'): self.assertIsNotNone( re.search(expected_stderr[6:], actual_stderr.decode('utf8') @@ -714,66 +718,67 @@ def generateScriptTests(): else: self.assertTrue(expected_stderr == actual_stderr) except AssertionError: - sys.stderr.write('Failed' - '\n************ expected stderr:\n') - sys.stderr.write(expected_stderr) - sys.stderr.write('\n************ actual stderr:\n') - sys.stderr.write(actual_stderr) + werr(u'Failed' + u'\n************ expected stderr:\n') + werr(expected_stderr) + werr(u'\n************ actual stderr:\n') + werr(actual_stderr) if difflib and expected_stderr and actual_stderr: - sys.stderr.write('\n************ diff:\n' - + '\n'.join(difflib.unified_diff( - expected_stderr.split('\n'), - actual_stderr.split('\n'), + sys.stderr.write(u'\n************ diff:\n' + + u'\n'.join(difflib.unified_diff( + b2u(expected_stderr).split(u'\n'), + b2u(actual_stderr).split(u'\n'), 'expected', 'actual', lineterm='' ))) - sys.stderr.write('\n************ ') + werr(u'\n************ ') raise try: - if expected_stdout.startswith('REGEX\n'): + if expected_stdout.startswith(b'REGEX\n'): self.assertIsNotNone(re.search(expected_stdout[6:], actual_stdout)) else: self.assertTrue(expected_stdout == actual_stdout) except AssertionError: - sys.stderr.write('Failed' - '\n************ expected stdout:\n') - sys.stderr.write(expected_stdout) - sys.stderr.write('\n************ actual stdout:\n') - sys.stderr.write(actual_stdout) + werr(u'Failed' + u'\n************ expected stdout:\n') + werr(expected_stdout) + werr(u'\n************ actual stdout:\n') + werr(actual_stdout) if difflib and expected_stdout and actual_stdout: - sys.stderr.write('\n************ diff:\n' - + '\n'.join(difflib.unified_diff( - expected_stdout.split('\n'), - actual_stdout.split('\n'), + werr(u'\n************ diff:\n' + + u'\n'.join(difflib.unified_diff( + b2u(expected_stdout).split('\n'), + b2u(actual_stdout).split('\n'), 'expected', 'actual', lineterm='' ))) - sys.stderr.write('\n************ ') + sys.stderr.write(u'\n************ ') raise return TestFunction TestFunction = makeTestFunction(fbase, testsuite) # __doc__ is used by Eric - line = '' + line = b'' try: - f = open(fbase + '.lsl') + f = open(fbase + '.lsl', 'rb') try: line = f.readline() - if line.endswith('\r\n'): + if line.endswith(b'\r\n'): line = line[:-2] - elif line[-1:] in ('\r', '\n'): + elif line[-1:] in (b'\r', b'\n'): line = line[:-1] finally: f.close() except IOError as e: if e.errno != 2: raise - TestFunction.__doc__ = line[3:] if line.startswith('// ') else None + TestFunction.__doc__ = (b2u(line[3:]) if line.startswith(b'// ') + else None) TestFunction.__name__ = ('test_' + testsuite + '__' + os.path.basename(fbase).replace('-','_')) fail = tryRead(fbase + '.fail') if fail is not None: if fail: - TestFunction.__doc__ = fail + TestFunction.__doc__ = b2u(fail) TestFunction = unittest.expectedFailure(TestFunction) else: skip = tryRead(fbase + '.skp') @@ -786,3 +791,4 @@ def generateScriptTests(): generateScriptTests() if __name__ == '__main__': unittest.main(argv = sys.argv) +#UnitTestRegression().test_Regression__multiline_string() diff --git a/strutil.py b/strutil.py new file mode 100644 index 0000000..aae6159 --- /dev/null +++ b/strutil.py @@ -0,0 +1,85 @@ +# (C) Copyright 2015-2019 Sei Lisa. All rights reserved. +# +# This file is part of LSL PyOptimizer. +# +# LSL PyOptimizer is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# LSL PyOptimizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LSL PyOptimizer. If not, see . + +# String <-> Bytes conversion and output utilities + +import sys +if sys.hexversion >= 0x3000000: + unicode = str + unichr = chr + def str2u(s, enc=None): + """Convert a native Python3 str to Unicode. This is a NOP.""" + return s + + def str2b(s, enc=None): + """Convert a native Python3 str to bytes, with the given encoding.""" + return s.encode(enc if type(enc) == str + else getattr(enc, 'encoding', 'utf8'), + 'backslashreplace') + + def u2str(s, enc=None): + """Convert a Unicode string to native Python 3 str. This is a NOP.""" + return s + + def b2str(s, enc=None): + """Convert a Bytes string to native Python 3 str.""" + return s.decode(getattr(enc, 'encoding', enc) or 'utf8', + 'backslashreplace') + +else: + def str2u(s, enc=None): + """Convert a native Python2 str to Unicode.""" + return s.decode(getattr(enc, 'encoding', enc) or 'utf8', + 'backslashreplace') + + def str2b(s, enc=None): + """Convert a native Python2 str to bytes. This is a NOP.""" + return s + + def u2str(s, enc=None): + """Convert a Unicode string to native Python 2 str.""" + return s.encode(enc if type(enc) == str + else getattr(enc, 'encoding', 'utf8'), + 'backslashreplace') + + def b2str(s, enc=None): + """Convert a Bytes string to native Python 2 str. This is a NOP.""" + return s + +def b2u(s, enc=None): + """Bytes to Unicode""" + return str2u(b2str(s, enc), enc) + +def u2b(s, enc=None): + """Unicode to Bytes""" + return u2str(str2b(s, enc), enc) + +def any2b(s, enc=None): + """Bytes or Unicode to Bytes""" + return s if type(s) == bytes else u2b(s, enc) + +def any2u(s, enc=None): + """Bytes or Unicode to Unicode""" + return s if type(s) == unicode else b2u(s, enc) + +def werr(s): + """Write any string to stderr""" + sys.stderr.write(any2u(s, sys.stderr)) + +def wout(s): + """Write any string to stdout""" + sys.stdout.write(any2u(s, sys.stdout)) diff --git a/unit_tests/json.py b/unit_tests/json.py index 30d4012..37864ce 100644 --- a/unit_tests/json.py +++ b/unit_tests/json.py @@ -1,4 +1,5 @@ import sys +from strutil import * from lslopt.lslfuncs import * tests = 0 @@ -7,7 +8,6 @@ errors = 0 # Begin JSON tests from http://wiki.secondlife.com/wiki/Json_usage_in_LSL/TestScript def verify(msg, result, expected): global tests - werr = sys.stderr.write tests += 1 if expected != result: global errors @@ -356,7 +356,6 @@ def test_jira_fixes(): maint3081(); def run_tests(): - werr = sys.stderr.write # JSON tests from the wiki test_types(); test_get_value();