Misc small non-user-visible fixes

Some are Unicode vs Str stuff, others are style adjustments, others in unused functions, and so on.
2025-07-05 12:44:30 -07:00 · 2020-11-09 01:51:24 +01:00 · 2020-11-09 01:51:24 +01:00 · d9938f1a37
commit d9938f1a37
parent 59c3f9fc71
5 changed files with 33 additions and 32 deletions
--- a/lslopt/lslinliner.py
+++ b/lslopt/lslinliner.py
@ -18,9 +18,9 @@
 # Expand inlined functions. This could perhaps be made at parse time, but that
 # would obfuscate the source too much.

-from lslcommon import nr
+from lslopt.lslcommon import nr

-# Statement-level nodes that have at most 1 child and is of type expression
+# Statement-level nodes that have at most 1 child and are of type expression
 SINGLE_OPT_EXPR_CHILD_NODES = frozenset({'DECL', 'EXPR', 'RETURN',
    '@', 'STSW', 'JUMP', ';', 'LAMBDA'})

--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@ -3007,7 +3007,7 @@ list lazy_list_set(list L, integer i, list v)
        del self.scopestack

        if self.enable_inline:
-            import lslinliner
+            from lslopt import lslinliner
            lslinliner.inliner().inline(self.tree, self.symtab)

        treesymtab = self.tree, self.symtab
--- a/main.py
+++ b/main.py
@ -96,7 +96,7 @@ def PreparePreproc(script):
    line up to the point where the string was closed. That will place the next
    token in the same line and column it previously was.
    """
-    s = ''
+    s = u''
    nlines = 0
    col = 0

@ -120,29 +120,29 @@ def PreparePreproc(script):
    # least surprise seems to suggest to accept valid LSL strings as LSL
    # instead of reproducing that C quirk. This also matches what FS is doing
    # currently, so it's good for compatibility.
-    tok = re.compile(str2u(
+    tok = re.compile(str2u(  # Python 3.5 does not recognize ur'...' literals
        r'(?:'
            r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
            r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
            r'|[^"]'
        r')+'
        r'|"'
-        ), re.S)
+        , 'utf8'), re.S)
    # RE used inside strings.
    tok2 = re.compile(str2u(
        r'(?:'
            r"\?\?[='()!<>-]"  # valid trigraph except ??/ (backslash)
            r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
-                                # backslash trigraph or actual backslash,
-                                # followed by any trigraph or non-newline
+                               # backslash trigraph or actual backslash,
+                               # followed by any trigraph or non-newline
            r'|(?!\?\?/\n|\\\n|"|\n).'
-                                # any character that doesn't start a trigraph/
-                                # backslash escape followed by a newline
-                                # or is a newline or double quote, as we're
-                                # interested in all those individually.
-        r')+'                   # as many of those as possible
-        r'|\?\?/\n|\\\n|\n|"'   # or any of those individually
-        ))
+                               # any character that doesn't start a trigraph/
+                               # backslash escape followed by a newline
+                               # or is a newline or double quote, as we're
+                               # interested in all those individually.
+        r')'                   # as many of those as possible
+        r'|\?\?/\n|\\\n|\n|"'  # or any of those individually
+        , 'utf8'))

    pos = 0
    match = tok.search(script, pos)
@ -157,24 +157,24 @@ def PreparePreproc(script):
                matched2 = match2.group(0)
                pos += len(matched2)

-                if matched2 == b'\\\n' or matched2 == b'??/\n':
+                if matched2 == u'\\\n' or matched2 == u'??/\n':
                    nlines += 1
                    col = 0
                    match2 = tok2.search(script, pos)
                    continue
-                if matched2 == b'"':
+                if matched2 == u'"':
                    if nlines:
-                        if script[pos:pos+1] == b'\n':
+                        if script[pos:pos+1] == u'\n':
                            col = -1 # don't add spaces if not necessary
                        # col misses the quote added here, so add 1
-                        s += b'"' + b'\n'*nlines + b' '*(col+1)
+                        s += u'"' + u'\n'*nlines + u' '*(col+1)
                    else:
-                        s += b'"'
+                        s += u'"'
                    break
-                if matched2 == b'\n':
+                if matched2 == u'\n':
                    nlines += 1
                    col = 0
-                    s += b'\\n'
+                    s += u'\\n'
                else:
                    col += len(matched2)
                    s += matched2
@ -453,7 +453,7 @@ def main(argv):
                if chgfix[1:] not in validoptions:
                    Usage(argv[0], 'optimizer-options')
                    werr(u"\nError: Unrecognized"
-                        u" optimizer option: %s\n" % chg.decode('utf8'))
+                         u" optimizer option: %s\n" % chg.decode('utf8'))
                    return 1
                if chgfix[0] == '-':
                    options.discard(chgfix[1:])
@ -642,6 +642,7 @@ def main(argv):
        # Append user arguments at the end to allow them to override defaults
        preproc_cmdline += preproc_user_postargs

+        # Transform to bytes and check Unicode validity
        if type(script) is unicode:
            script = script.encode('utf8')
        else:
--- a/run-tests.py
+++ b/run-tests.py
@ -698,7 +698,7 @@ def generateScriptTests():
            # Create a closure with the test data
            def makeTestFunction(fbase, suite):
                def TestFunction(self):
-                    stdin = tryRead(fbase + '.lsl') or ''
+                    stdin = tryRead(fbase + '.lsl') or b''
                    expected_stdout = tryRead(fbase + '.out') or b''
                    expected_stderr = tryRead(fbase + '.err') or b''
                    runargs = (parseArgs(tryRead(fbase + '.run', Binary=False))
@ -734,8 +734,9 @@ def generateScriptTests():
                        werr(expected_stderr)
                        werr(u'\n************ actual stderr:\n')
                        werr(actual_stderr)
-                        if difflib and expected_stderr and actual_stderr:
-                            sys.stderr.write(u'\n************ diff:\n'
+                        if difflib and expected_stderr and actual_stderr \
+                           and not expected_stderr.startswith(b'REGEX\n'):
+                            werr(u'\n************ diff:\n'
                                 + u'\n'.join(difflib.unified_diff(
                                    b2u(expected_stderr).split(u'\n'),
                                    b2u(actual_stderr).split(u'\n'),
@ -755,7 +756,8 @@ def generateScriptTests():
                        werr(expected_stdout)
                        werr(u'\n************ actual stdout:\n')
                        werr(actual_stdout)
-                        if difflib and expected_stdout and actual_stdout:
+                        if difflib and expected_stdout and actual_stdout \
+                           and not expected_stdout.startswith(b'REGEX\n'):
                            werr(u'\n************ diff:\n'
                                 + u'\n'.join(difflib.unified_diff(
                                    b2u(expected_stdout).split('\n'),
--- a/strutil.py
+++ b/strutil.py
@ -31,8 +31,7 @@ if sys.version_info.major >= 3:

    def str2b(s, enc=None):
        """Convert a native Python3 str to bytes, with the given encoding."""
-        return s.encode(enc if type(enc) == str
-                        else getattr(enc, 'encoding', 'utf8'),
+        return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
                        'backslashreplace')

    def u2str(s, enc=None):
@ -56,8 +55,7 @@ else:

    def u2str(s, enc=None):
        """Convert a Unicode string to native Python 2 str."""
-        return s.encode(enc if type(enc) == str
-                        else getattr(enc, 'encoding', 'utf8'),
+        return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
                        'backslashreplace')

    def b2str(s, enc=None):
@ -70,7 +68,7 @@ def b2u(s, enc=None):

 def u2b(s, enc=None):
    """Unicode to Bytes"""
-    return u2str(str2b(s, enc), enc)
+    return str2b(u2str(s, enc), enc)

 def any2b(s, enc=None):
    """Bytes or Unicode to Bytes"""