Progress towards dual Python 2 & 3

2025-07-15 09:16:23 -07:00 · 2020-11-09 02:28:57 +01:00 · 2020-11-09 02:28:57 +01:00 · f8cf78dfac
commit f8cf78dfac
parent dde9577cea
10 changed files with 100 additions and 80 deletions
--- a/lslopt/lsldeadcode.py
+++ b/lslopt/lsldeadcode.py
@ -19,6 +19,7 @@

 from lslopt import lslfuncs
 from lslopt.lslcommon import nr
+from strutil import xrange

 class deadcode(object):

@ -530,7 +531,7 @@ class deadcode(object):
        self.MarkReferences(statedef)

        # Track removal of global lines, to reasign locations later.
-        LocMap = range(len(self.tree))
+        LocMap = list(range(len(self.tree)))

        GlobalDeletions = []

--- a/lslopt/lslfoldconst.py
+++ b/lslopt/lslfoldconst.py
@ -23,6 +23,7 @@ from lslopt import lslfuncs
 from lslopt.lslfuncs import ZERO_VECTOR, ZERO_ROTATION
 import math
 from lslopt.lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
+from strutil import xrange, unicode

 # TODO: Remove special handling of @ within IF,WHILE,FOR,DO

--- a/lslopt/lsllastpass.py
+++ b/lslopt/lsllastpass.py
@ -25,6 +25,7 @@ from lslopt.lslcommon import nr
 #import math
 #from lslparse import warning
 #from lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
+from strutil import xrange

 class rec:
    def __init__(self, **init):
--- a/lslopt/lslloadlib.py
+++ b/lslopt/lslloadlib.py
@ -20,6 +20,7 @@
 import sys, re
 from lslopt.lslcommon import types, warning, Vector, Quaternion
 from lslopt import lslcommon, lslfuncs
+from strutil import *

 def LoadLibrary(builtins = None, fndata = None):
    """Load builtins.txt and fndata.txt (or the given filenames) and return
@ -40,27 +41,27 @@ def LoadLibrary(builtins = None, fndata = None):
    # Library read code

    parse_lin_re = re.compile(
-        br'^\s*([a-z]+)\s+'
-        br'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
-            br'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
-            br'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
-        br')?\s*\)\s*$'
-        br'|'
-        br'^\s*const\s+([a-z]+)'
-        br'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
-        br'|'
-        br'^\s*(?:#.*|//.*)?$')
-    parse_arg_re = re.compile(br'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
-    parse_fp_re  = re.compile(br'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
-                              br'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
-    parse_int_re = re.compile(br'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
+        r'^\s*([a-z]+)\s+'
+        r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
+            r'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
+            r'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
+        r')?\s*\)\s*$'
+        r'|'
+        r'^\s*const\s+([a-z]+)'
+        r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
+        r'|'
+        r'^\s*(?:#.*|//.*)?$')
+    parse_arg_re = re.compile(r'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
+    parse_fp_re  = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
+                              r'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
+    parse_int_re = re.compile(r'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
    parse_str_re = re.compile(u'^"((?:[^"\\\\]|\\\\.)*)"$')

-    f = open(builtins, 'rb')
+    f = open(builtins, 'r')
    try:
        linenum = 0
        try:
-            ubuiltins = builtins.decode(sys.getfilesystemencoding())
+            ubuiltins = str2u(builtins, sys.getfilesystemencoding())
        except UnicodeDecodeError:
            # This is just a guess at the filename encoding.
            ubuiltins = builtins.decode('iso-8859-15')
@ -70,7 +71,7 @@ def LoadLibrary(builtins = None, fndata = None):
            if not line: break
            if line[-1] == '\n': line = line[:-1]
            try:
-                uline = line.decode('utf8')
+                uline = str2u(line, 'utf8')
            except UnicodeDecodeError:
                warning(u"Bad Unicode in %s line %d" % (ubuiltins, linenum))
                continue
@ -153,7 +154,7 @@ def LoadLibrary(builtins = None, fndata = None):
                elif typ == 'float':
                    value = lslfuncs.F32(float(value))
                elif typ == 'string':
-                    value = value.decode('utf8')
+                    value = str2u(value, 'utf8')
                    if parse_str_re.search(value):
                        esc = False
                        tmp = value[1:-1]
@ -242,14 +243,14 @@ def LoadLibrary(builtins = None, fndata = None):
    # TODO: "quaternion" doesn't compare equal to "rotation" even if they are
    #       equivalent. Canonicalize it before comparison, to avoid false
    #       reports of mismatches.
-    f = open(fndata, 'rb')
+    f = open(fndata, 'r')
    try:
        linenum = 0
        curr_fn = None
        curr_ty = None
        skipping = False
        try:
-            ufndata = fndata.decode(sys.getfilesystemencoding())
+            ufndata = str2u(fndata, sys.getfilesystemencoding())
        except UnicodeDecodeError:
            # This is just a guess at the filename encoding.
            ufndata = fndata.decode('iso-8859-15')
@ -259,7 +260,7 @@ def LoadLibrary(builtins = None, fndata = None):
            if not line: break
            if line[-1] == '\n': line = line[:-1]
            try:
-                uline = line.decode('utf8')
+                uline = str2u(line, 'utf8')
            except UnicodeDecodeError:
                warning(u"Bad Unicode in %s line %d" % (ufndata, linenum))
                continue
@ -272,7 +273,7 @@ def LoadLibrary(builtins = None, fndata = None):
            if match_fn and (rettype in ('void', 'event') or rettype in types):
                skipping = True  # until proven otherwise
                name = match_fn.group(2)
-                uname = name.decode('utf8')
+                uname = str2u(name, 'utf8')
                if (rettype == 'event' and name not in events
                    or rettype != 'event' and name not in functions
                   ):
@ -347,7 +348,7 @@ def LoadLibrary(builtins = None, fndata = None):
                        skipping = True
                        continue
                    if not skipping:
-                        ucurr_fn = curr_fn.decode('utf8')
+                        ucurr_fn = str2u(curr_fn, 'utf8')
                        if match_flag.group(1):
                            # SEF
                            # We don't handle conditions yet. Take the
@ -438,7 +439,7 @@ def LoadLibrary(builtins = None, fndata = None):

    # Post-checks
    for i in functions:
-        ui = i.decode('utf8')
+        ui = str2u(i, 'utf8')
        if 'NeedsData' in functions[i]:
            del functions[i]['NeedsData']
            warning(u"Library data, file %s: Function %s has no data."
@ -455,7 +456,7 @@ def LoadLibrary(builtins = None, fndata = None):
                    u" delay. Removing SEF." % ui)
            del functions[i]['SEF']
    for i in events:
-        ui = i.decode('utf8')
+        ui = str2u(i, 'utf8')
        if 'NeedsData' in events[i]:
            del events[i]['NeedsData']
            warning(u"Library data, file %s: Event %s has no data."
--- a/lslopt/lsloutput.py
+++ b/lslopt/lsloutput.py
@ -21,6 +21,7 @@ from lslopt import lslfuncs
 from lslopt import lslcommon
 from lslopt.lslcommon import Key, Vector, Quaternion, warning
 from math import copysign
+from strutil import *

 debugScopes = False

@ -62,7 +63,7 @@ class outscript(object):
                         " spaces by the viewer when copy-pasting the code"
                         " (disable this warning by disabling the 'warntabs'"
                         " option).")
-            return pfx + '"' + value.encode('utf8').replace('\\','\\\\') \
+            return pfx + '"' + any2str(value, 'utf8').replace('\\','\\\\') \
                .replace('"','\\"').replace('\n','\\n') + '"' + sfx
        if tvalue == int:
            if value < 0 and not self.globalmode and self.optsigns:
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@ -29,6 +29,10 @@ import re
 # Note this module was basically written from bottom to top, which may help
 # reading it.

+WHITESPACE_CHARS = frozenset({' ', '\r', '\n', '\x0B', '\x0C'})
+SINGLE_SYMBOLS = frozenset({'.', ';', '{', '}', ',', '=', '(', ')', '-', '+',
+    '*', '/', '%', '@', ':', '<', '>', '[', ']', '&', '|', '^', '~', '!'})
+
 def isdigit(c):
    return '0' <= c <= '9'

@ -48,7 +52,7 @@ def GetErrLineCol(parser):
    # Find start of current line
    lstart = parser.script.rfind('\n', 0, errorpos) + 1
    # Find zero-based column number in characters
-    cno = len(parser.script[lstart:errorpos].decode('utf8'))
+    cno = len(any2u(parser.script[lstart:errorpos], 'utf8'))
    # Find in #line directives list
    i = len(parser.linedir)
    filename = '<stdin>'  # value to return if there's no #line before lno
@ -75,7 +79,7 @@ class EParse(Exception):
        if parser.emap and filename == '<stdin>':
            filename = parser.filename

-        filename = (filename.decode('utf8', 'replace')
+        filename = (str2u(filename, 'utf8')
                    .replace(u'\\', u'\\\\')
                    .replace(u'"', u'\\"')
                   )
@ -543,7 +547,7 @@ class parser(object):

                # self.linestart is related to the preprocessor, therefore we
                # check the characters that are relevant for standard C.
-                if c not in ' \n\r\x0B\x0C':
+                if c not in WHITESPACE_CHARS:
                    self.linestart = False

                # Process strings
@ -584,7 +588,7 @@ class parser(object):

                    if is_string:
                        self.pos += 1
-                        return ('STRING_VALUE', lslfuncs.zstr(strliteral.decode('utf8')))
+                        return ('STRING_VALUE', lslfuncs.zstr(str2u(strliteral, 'utf8')))
                    # fall through (to consider the L or to ignore the ")

                if isalpha_(c):
@ -705,7 +709,7 @@ class parser(object):
                        return (self.script[self.pos-3:self.pos],)
                    return (self.script[self.pos-2:self.pos],)

-                if c in '.;{},=()-+*/%@:<>[]&|^~!' and c != '':
+                if c in SINGLE_SYMBOLS:
                    return (c,)

                if c == '\n':
@ -2801,8 +2805,7 @@ list lazy_list_set(list L, integer i, list v)

        self.filename = filename

-        if type(script) is unicode:
-            script = script.encode('utf8')
+        script = any2str(script, 'utf8')

        self.script = script
        self.length = len(script)
--- a/lslopt/lslrenamer.py
+++ b/lslopt/lslrenamer.py
@ -23,6 +23,8 @@
 #
 # A side effect of this change is that the script becomes unreadable gibberish.

+from strutil import xrange
+
 class renamer(object):
    CharSet1 = '_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    CharSet2 = '0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
--- a/main.py
+++ b/main.py
@ -188,15 +188,15 @@ def PreparePreproc(script):

 def ScriptHeader(script, avname):
    if avname:
-        avname = b' - ' + avname
-    return (b'//start_unprocessed_text\n/*'
+        avname = ' - ' + avname
+    return ('//start_unprocessed_text\n/*'
        # + re.sub(r'([*/])(?=[*|/])', r'\1|', script) # FS's algorithm
        # HACK: This won't break strings containing ** or /* or // like URLs,
        # while still being compatible with FS.
-        + re.sub(br'([*/]\||\*(?=/))', br'\1|', script)
-        + b'*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
-          b'//program_version LSL PyOptimizer v' + str2b(VERSION)
-        + str2b(avname) + b'\n//mono\n\n')
+        + re.sub(r'([*/]\||\*(?=/))', r'\1|', script)
+        + '*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
+          '//program_version LSL PyOptimizer v' + VERSION
+        + avname + '\n//mono\n\n')

 def Usage(progname, about = None):
    if about is None:
@ -453,7 +453,7 @@ def main(argv):
                if chgfix[1:] not in validoptions:
                    Usage(argv[0], 'optimizer-options')
                    werr(u"\nError: Unrecognized"
-                         u" optimizer option: %s\n" % chg.decode('utf8'))
+                         u" optimizer option: %s\n" % str2u(chg, 'utf8'))
                    return 1
                if chgfix[0] == '-':
                    options.discard(chgfix[1:])
@ -591,6 +591,28 @@ def main(argv):
                f.close()
                del f

+        # Transform to str and check Unicode validity
+        if type(script) is unicode:
+            script = u2str(script, 'utf8')
+        else:
+            try:
+                # Try converting the script to Unicode, to report any encoding
+                # errors with accurate line information.
+                tmp = UniConvScript(script, options,
+                                    fname if fname != '-' else '<stdin>',
+                                    emap).to_unicode()
+                # For Python 2, just report any errors and ignore the result.
+                # For Python 3, use the Unicode.
+                if python3:
+                    script = tmp
+                del tmp
+            except EParse as e:
+                # We don't call ReportError to prevent problems due to
+                # displaying invalid UTF-8
+                werr(e.args[0] + u"\n")
+                return 1
+        # Now script is in native str format.
+
        if script_header:
            script_header = ScriptHeader(script, avname)

@ -598,7 +620,7 @@ def main(argv):
            import time
            tmp = time.time()
            script_timestamp = time.strftime(
-                b'// Generated on %Y-%m-%dT%H:%M:%S.{0:06d}Z\n'
+                '// Generated on %Y-%m-%dT%H:%M:%S.{0:06d}Z\n'
                .format(int(tmp % 1 * 1000000)), time.gmtime(tmp))
            del tmp

@ -642,27 +664,11 @@ def main(argv):
        # Append user arguments at the end to allow them to override defaults
        preproc_cmdline += preproc_user_postargs

-        # Transform to bytes and check Unicode validity
-        if type(script) is unicode:
-            script = script.encode('utf8')
-        else:
-            try:
-                # Try converting the script to Unicode, to report any encoding
-                # errors with accurate line information. At this point we don't
-                # need the result.
-                UniConvScript(script, options,
-                              fname if fname != '-' else '<stdin>',
-                              emap).to_unicode()
-            except EParse as e:
-                # We don't call ReportError to prevent problems due to
-                # displaying invalid UTF-8
-                werr(e.args[0] + u"\n")
-                return 1
-
        if preproc != 'none':
+            # PreparePreproc uses and returns Unicode string encoding.
+            script = u2b(PreparePreproc(any2u(script, 'utf8')), 'utf8')
            # At this point, for the external preprocessor to work we need the
            # script as a byte array, not as unicode, but it should be UTF-8.
-            script = PreparePreproc(script.decode('utf8')).encode('utf8')
            if preproc == 'mcpp':
                # As a special treatment for mcpp, we force it to output its
                # macros so we can read if USE_xxx are defined. With GCC that
@ -680,6 +686,8 @@ def main(argv):
                return status
            del p, status

+            script = any2str(script, 'utf8')
+
            # This method is very imperfect, in several senses. However, since
            # it's applied to the output of the preprocessor, all of the
            # concerns should be addressed:
@ -687,13 +695,13 @@ def main(argv):
            #    - Comments preceding the directive should not cause problems.
            #              e.g.: /* test */ #directive
            #    - #directive within a comment or string should be ignored.
-            for x in re.findall(br'(?:(?<=\n)|^)\s*#\s*define\s+('
-                                br'USE_SWITCHES'
-                                br'|USE_LAZY_LISTS'
-                                br')(?:$|[^A-Za-z0-9_])', script, re.S):
-                if x == b'USE_SWITCHES':
+            for x in re.findall(r'(?:(?<=\n)|^)\s*#\s*define\s+('
+                                r'USE_SWITCHES'
+                                r'|USE_LAZY_LISTS'
+                                r')(?:$|[^A-Za-z0-9_])', script, re.S):
+                if x == 'USE_SWITCHES':
                    options.add('enableswitch')
-                elif x == b'USE_LAZY_LISTS':
+                elif x == 'USE_LAZY_LISTS':
                    options.add('lazylists')

        if not preshow:
@ -703,9 +711,10 @@ def main(argv):

            lib = lslopt.lslloadlib.LoadLibrary(builtins, libdata)
            p = parser(lib)
+            assert type(script) == str
            try:
                ts = p.parse(script, options,
-                             fname if fname != '-' else '<stdin>')
+                             'stdin' if fname == '-' else fname)
            except EParse as e:
                ReportError(script, e)
                return 1
--- a/run-tests.py
+++ b/run-tests.py
@ -213,9 +213,9 @@ def invokeMain(argv, stdin = None):
    lslcommon.IsCalc = False
    lslcommon.Bugs.clear()
    lslcommon.Bugs.add(6495)
-    save_stdin = sys.stdin
-    save_stdout = sys.stdout
-    save_stderr = sys.stderr
+    lslcommon.save_stdin = sys.stdin
+    lslcommon.save_stdout = sys.stdout
+    lslcommon.save_stderr = sys.stderr
    stdout_output = None
    stderr_output = None
    try:
@ -231,9 +231,9 @@ def invokeMain(argv, stdin = None):
        stdout_output = sys.stdout.getvalue()
        stderr_output = sys.stderr.getvalue()
    finally:
-        sys.stdin = save_stdin
-        sys.stdout = save_stdout
-        sys.stderr = save_stderr
+        sys.stdin = lslcommon.save_stdin
+        sys.stdout = lslcommon.save_stdout
+        sys.stderr = lslcommon.save_stderr
        lslcommon.LSO = False
        lslcommon.IsCalc = False
        lslcommon.Bugs.clear()
@ -721,10 +721,9 @@ def generateScriptTests():

                    try:
                        if expected_stderr.startswith(b'REGEX\n'):
-                            self.assertIsNotNone(
-                                re.search(expected_stderr[6:],
-                                          actual_stderr.decode('utf8')
-                                )
+                            self.assertIsNotNone(re.search(
+                                b2u(expected_stderr[6:], 'utf8'),
+                                b2u(actual_stderr, 'utf8'))
                            )
                        else:
                            self.assertTrue(expected_stderr == actual_stderr)
@ -734,6 +733,7 @@ def generateScriptTests():
                        werr(expected_stderr)
                        werr(u'\n************ actual stderr:\n')
                        werr(actual_stderr)
+#                        werr(('1' if difflib else '0')+('1' if expected_stderr else '0') + ('1' if actual_stderr else '0'))
                        if difflib and expected_stderr and actual_stderr \
                           and not expected_stderr.startswith(b'REGEX\n'):
                            werr(u'\n************ diff:\n'
@ -746,8 +746,9 @@ def generateScriptTests():
                        raise
                    try:
                        if expected_stdout.startswith(b'REGEX\n'):
-                            self.assertIsNotNone(re.search(expected_stdout[6:],
-                                                           actual_stdout))
+                            self.assertIsNotNone(re.search(
+                                b2u(expected_stdout[6:], 'utf8'),
+                                b2u(actual_stdout, 'utf8')))
                        else:
                            self.assertTrue(expected_stdout == actual_stdout)
                    except AssertionError:
--- a/unit_tests/coverage.suite/invalid-file.err
+++ b/unit_tests/coverage.suite/invalid-file.err
@ -1,2 +1,2 @@
 REGEX
-IOError: (?:\[Errno 21\] Is a directory|\[Errno 13\] Permission denied): 'unit_tests/coverage.suite/actually-a-dir.d'
+Error: (?:\[Errno 21\] Is a directory|\[Errno 13\] Permission denied): 'unit_tests/coverage.suite/actually-a-dir.d'