First baby steps towards dual Python2+3 compatibility

2025-07-01 15:48:21 +00:00 · 2019-01-15 20:27:02 +01:00 · 2019-01-15 20:27:02 +01:00 · fe2dd9a721
commit fe2dd9a721
parent 789eb85bfe
17 changed files with 319 additions and 175 deletions
--- a/lslopt/lslbasefuncs.py
+++ b/lslopt/lslbasefuncs.py
@ -34,12 +34,13 @@
 # The JSON functions have been separated to their own module.
 import re
-from lslcommon import *
+from lslopt.lslcommon import *
-import lslcommon
+from lslopt import lslcommon
 from ctypes import c_float
 import math
 import hashlib
 from base64 import b64encode, b64decode
 from strutil import *
 # Regular expressions used along the code. They are needed mainly because
@ -58,18 +59,49 @@ from base64 import b64encode, b64decode
 # as is (vector)"<1,inf,info>". The 1st gives <0,0,0>, the others <1,inf,inf>.
 # The lookahead (?!i) is essential for parsing them that way without extra code.
 # Note that '|' in REs is order-sensitive.
-float_re = re.compile(ur'^\s*[+-]?(?:0(x)(?:[0-9a-f]+(?:\.[0-9a-f]*)?|\.[0-9a-f]+)(?:p[+-]?[0-9]+)?'
+float_re  = re.compile(str2u(r'''
-                      ur'|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?|inf|(nan))',
+    ^\s*[+-]?(?:
-                      re.I)
+        0(x)(?:             # Hex float or hex int (captures the 'x')
-vfloat_re = re.compile(ur'^\s*[+-]?(?:0(x)(?:[0-9a-f]+(?:\.[0-9a-f]*)?|\.[0-9a-f]+)(?:p[+-]?[0-9]+)?'
+            [0-9a-f]+(?:\.[0-9a-f]*)?
-                      ur'|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?|infinity|inf(?!i)|(nan))',
+            |\.[0-9a-f]+    # Hex digits
        )(?:
            p[+-]?[0-9]+    # Hex float exponent
        )?                  # (optional)
        |(?:                # Decimal float or decimal int
            [0-9]+(?:\.[0-9]*)?
            |\.[0-9]+       # Decimal digits
        )(?:
            e[+-]?[0-9]+    # Decimal float exponent
        )?                  # (optional)
        |inf                # Infinity
        |(nan)              # NaN (captured)
    )
    '''), re.I | re.X)
 vfloat_re = re.compile(str2u(r'''
    ^\s*[+-]?(?:
        0(x)(?:             # Hex float or hex int (captures the 'x')
            [0-9a-f]+(?:\.[0-9a-f]*)?
            |\.[0-9a-f]+    # Hex digits
        )(?:
            p[+-]?[0-9]+    # Hex float exponent
        )?                  # (optional)
        |(?:                # Decimal float or decimal int
            [0-9]+(?:\.[0-9]*)?
            |\.[0-9]+       # Decimal digits
        )(?:
            e[+-]?[0-9]+    # Decimal float exponent
        )?                  # (optional)
        |infinity|inf(?!i)  # Infinity (the only difference with the above)
        |(nan)              # NaN (captured)
    )
    '''), re.I | re.X)
 int_re = re.compile(str2u(r'^0(x)[0-9a-f]+|^\s*[+-]?[0-9]+'), re.I)
 key_re = re.compile(str2u(r'^[0-9a-f]{8}(?:-[0-9a-f]{4}){4}[0-9a-f]{8}$'),
                    re.I)
-int_re = re.compile(ur'^0(x)[0-9a-f]+|^\s*[+-]?[0-9]+', re.I)
+b64_re = re.compile(str2u(r'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2,3})?'))
 key_re = re.compile(ur'^[0-9a-f]{8}(?:-[0-9a-f]{4}){4}[0-9a-f]{8}$', re.I)
 b64_re = re.compile(ur'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2,3})?')
 ZERO_VECTOR      = Vector((0.0, 0.0, 0.0))
 ZERO_ROTATION    = Quaternion((0.0, 0.0, 0.0, 1.0))
--- a/lslopt/lslcommon.py
+++ b/lslopt/lslcommon.py
@ -18,6 +18,7 @@
 # Classes, functions and variables for use of all modules.
 import sys
 from strutil import *
 _exclusions = frozenset(('nt','t','name','value','ch', 'X','SEF'))
--- a/lslopt/lsldeadcode.py
+++ b/lslopt/lsldeadcode.py
@ -17,8 +17,8 @@
 # Dead Code Removal optimization
-import lslfuncs
+from lslopt import lslfuncs
-from lslcommon import nr
+from lslopt.lslcommon import nr
 class deadcode(object):
--- a/lslopt/lslextrafuncs.py
+++ b/lslopt/lslextrafuncs.py
@ -17,8 +17,8 @@
 # Extra functions that have predictable return values for certain arguments.
-from lslcommon import Key, Vector #, Quaternion
+from lslopt.lslcommon import Key, Vector #, Quaternion
-from lslbasefuncs import ELSLCantCompute, fi,ff,fs,fk,v2f,q2f,fl, \
+from lslopt.lslbasefuncs import ELSLCantCompute, fi,ff,fs,fk,v2f,q2f,fl, \
  NULL_KEY, ZERO_VECTOR, ZERO_ROTATION, \
  TOUCH_INVALID_TEXCOORD, cond
 ff, q2f  # keep pyflakes happy as these are not used
--- a/lslopt/lslfoldconst.py
+++ b/lslopt/lslfoldconst.py
@ -17,12 +17,12 @@
 # Constant folding and simplification of expressions and statements.
-import lslcommon
+from lslopt import lslcommon
-from lslcommon import Vector, Quaternion, warning, nr
+from lslopt.lslcommon import Vector, Quaternion, warning, nr
-import lslfuncs
+from lslopt import lslfuncs
-from lslfuncs import ZERO_VECTOR, ZERO_ROTATION
+from lslopt.lslfuncs import ZERO_VECTOR, ZERO_ROTATION
 import math
-from lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
+from lslopt.lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
 # TODO: Remove special handling of @ within IF,WHILE,FOR,DO
--- a/lslopt/lslfuncopt.py
+++ b/lslopt/lslfuncopt.py
@ -18,9 +18,9 @@
 # Optimize calls to LSL library functions and parameters where possible
 # This is dependent on the LSL function library.
-import lslcommon
+from lslopt import lslcommon
-from lslcommon import Key, Vector, Quaternion, nr
+from lslopt.lslcommon import Key, Vector, Quaternion, nr
-import lslfuncs
+from lslopt import lslfuncs
 def OptimizeArgs(node, sym):
    """Transform function arguments to shorter equivalents where possible."""
--- a/lslopt/lslfuncs.py
+++ b/lslopt/lslfuncs.py
@ -17,6 +17,6 @@
 # Put all LSL functions together in one single module
-from lslbasefuncs import *
+from lslopt.lslbasefuncs import *
-from lsljson import *
+from lslopt.lsljson import *
-from lslextrafuncs import *
+from lslopt.lslextrafuncs import *
--- a/lslopt/lsljson.py
+++ b/lslopt/lsljson.py
@ -19,8 +19,8 @@
 import re
 import math
-from lslcommon import *
+from lslopt.lslcommon import *
-from lslbasefuncs import llStringTrim, fs, fl, InternalTypecast
+from lslopt.lslbasefuncs import llStringTrim, fs, fl, InternalTypecast
 # INCOMPATIBILITY NOTE: The JSON functions in SL have very weird behaviour
 # in corner cases. Despite our best efforts, that behaviour is not replicated
@ -44,8 +44,8 @@ JSON_DELETE  = u'\uFDD8'
 JSON_APPEND  = -1
 jsonesc_re = re.compile(u'[\x08\x09\x0A\x0C\x0D"/\\\\]')
-jsonesc_dict = {u'\x08':ur'\b', u'\x09':ur'\t', u'\x0A':ur'\n', u'\x0C':ur'\f',
+jsonesc_dict = {u'\x08':u'\\b', u'\x09':u'\\t', u'\x0A':u'\\n', u'\x0C':u'\\f',
-                u'\x0D':ur'\r', u'"':ur'\"', u'/':ur'\/', u'\\':ur'\\'}
+                u'\x0D':u'\\r', u'"':u'\\"', u'/':u'\\/', u'\\':u'\\\\'}
 jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u'\x0D'}
 # LSL JSON numbers differ from standard JSON numbers in many respects:
@ -72,18 +72,37 @@ jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u
 # elements when appropriate.
 # Real JSON number parser:
-#jsonnum_re = re.compile(ur'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?')
+#jsonnum_re = re.compile(str2u(
 #    r'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?'
 #    ))
 # BUG-6466 active:
-jsonnumbug_re = re.compile(ur'-?(?:[0-9]*([Ee])-?[0-9]*\.?[0-9]*|(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*))')
+jsonnumbug_re = re.compile(str2u(r'''
    -?(?:
        [0-9]*([Ee])-?[0-9]*\.?[0-9]*
        |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*)
    )
    '''), re.X)
 # BUG-6466 fixed:
 # The new RE is just a modified version of the crap, allowing + exponents and
 # disallowing zeros, sometimes even when legal (e.g. 0e0)
-#jsonnum_re = re.compile(ur'-?(?:(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]*|(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*))')
+#jsonnum_re = re.compile(str2u(r'''
 #    -?(?:
 #        (?=[1-9]|\.(?:[^e]|$)
 #            |0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]*
 #        |(?=[1-9]|\.(?:[^e]|$)
 #            |0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*)
 #    )
 #    '''), re.X)
 # They've fixed BUG-6657 by bringing BUG-6466 back to life.
-jsonnum_re = re.compile(ur'-?(?:[0-9]*([Ee])-?[0-9]*\.?[0-9]*|(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*))')
+jsonnum_re = re.compile(str2u(r'''
    -?(?:
        [0-9]*([Ee])-?[0-9]*\.?[0-9]*
        |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*)
    )
    '''), re.X)
-jsonstring_re = re.compile(ur'"(?:[^"\\]|\\.)*"')
+jsonstring_re = re.compile(str2u(r'"(?:[^"\\]|\\.)*"'))
 # This might need some explanation. The ] and - are included in the first
 # set, the ] in the first after the ^ and the - in the last positions of
@ -91,7 +110,7 @@ jsonstring_re = re.compile(ur'"(?:[^"\\]|\\.)*"')
 # though it confuses things. The set comprises any character not in
 # -{}[],:"0123456789
 # The second set comprises zero or more characters not in ,:]}
-#word_re = re.compile(ur'[^][{}0-9",:-][^]},:]*')
+#word_re = re.compile(str2u(r'[^][{}0-9",:-][^]},:]*'))
 # Screw that, we're using just a fallback.
 jsoncatchall_re = re.compile(u'(.*?)[\x09\x0A\x0B\x0C\x0D ]*(?:[]},]|$)')
--- a/lslopt/lsllastpass.py
+++ b/lslopt/lsllastpass.py
@ -17,8 +17,8 @@
 # Optimizations that have a negative effect on other stages.
-import lslcommon
+from lslopt import lslcommon
-from lslcommon import nr
+from lslopt.lslcommon import nr
 #from lslcommon import Vector, Quaternion
 #import lslfuncs
 #from lslfuncs import ZERO_VECTOR, ZERO_ROTATION
--- a/lslopt/lslloadlib.py
+++ b/lslopt/lslloadlib.py
@ -18,8 +18,8 @@
 # Load the builtins and function properties.
 import sys, re
-from lslcommon import types, warning, Vector, Quaternion
+from lslopt.lslcommon import types, warning, Vector, Quaternion
-import lslcommon, lslfuncs
+from lslopt import lslcommon, lslfuncs
 def LoadLibrary(builtins = None, fndata = None):
    """Load builtins.txt and fndata.txt (or the given filenames) and return
@ -40,21 +40,21 @@ def LoadLibrary(builtins = None, fndata = None):
    # Library read code
    parse_lin_re = re.compile(
-        r'^\s*([a-z]+)\s+'
+        br'^\s*([a-z]+)\s+'
-        r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
+        br'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
-            r'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
+            br'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
-            r'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
+            br'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
-        r')?\s*\)\s*$'
+        br')?\s*\)\s*$'
-        r'|'
+        br'|'
-        r'^\s*const\s+([a-z]+)'
+        br'^\s*const\s+([a-z]+)'
-        r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
+        br'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
-        r'|'
+        br'|'
-        r'^\s*(?:#.*|//.*)?$')
+        br'^\s*(?:#.*|//.*)?$')
-    parse_arg_re = re.compile(r'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
+    parse_arg_re = re.compile(br'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
-    parse_fp_re  = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
+    parse_fp_re  = re.compile(br'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
-                              r'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
+                              br'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
-    parse_int_re = re.compile(r'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
+    parse_int_re = re.compile(br'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
-    parse_str_re = re.compile(ur'^"((?:[^"\\]|\\.)*)"$')
+    parse_str_re = re.compile(u'^"((?:[^"\\\\]|\\\\.)*)"$')
    f = open(builtins, 'rb')
    try:
--- a/lslopt/lsloptimizer.py
+++ b/lslopt/lsloptimizer.py
@ -17,13 +17,13 @@
 # Optimizer class that wraps and calls the other parts.
-import lslfuncs
+from lslopt import lslfuncs
-from lslcommon import nr
+from lslopt.lslcommon import nr
-from lslfoldconst import foldconst
+from lslopt.lslfoldconst import foldconst
-from lslrenamer import renamer
+from lslopt.lslrenamer import renamer
-from lsldeadcode import deadcode
+from lslopt.lsldeadcode import deadcode
-from lsllastpass import lastpass
+from lslopt.lsllastpass import lastpass
 class optimizer(foldconst, renamer, deadcode, lastpass):
--- a/lslopt/lsloutput.py
+++ b/lslopt/lsloutput.py
@ -17,9 +17,9 @@
 # Convert an abstract syntax tree + symbol table back to a script as text.
-import lslfuncs
+from lslopt import lslfuncs
-import lslcommon
+from lslopt import lslcommon
-from lslcommon import Key, Vector, Quaternion, warning
+from lslopt.lslcommon import Key, Vector, Quaternion, warning
 from math import copysign
 debugScopes = False
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@ -20,8 +20,9 @@
 # TODO: Add info to be able to propagate error position to the source.
-from lslcommon import Key, Vector, Quaternion, types, nr
+from lslopt.lslcommon import Key, Vector, Quaternion, types, nr
-import lslcommon, lslfuncs
+from lslopt import lslcommon, lslfuncs
 from strutil import *
 import re
 # Note this module was basically written from bottom to top, which may help
@ -70,8 +71,8 @@ class EParse(Exception):
        self.errorpos = parser.errorpos
        self.lno, self.cno, self.fname = GetErrLineCol(parser)
        filename = (self.fname.decode('utf8', 'replace')
-                    .replace(u'\\', ur'\\')
+                    .replace(u'\\', u'\\\\')
-                    .replace(u'"', ur'\"')
+                    .replace(u'"', u'\\"')
                   )
        if parser.processpre and filename != '<stdin>':
--- a/main.py
+++ b/main.py
@ -31,6 +31,7 @@ from lslopt.lsloptimizer import optimizer
 import sys, os, getopt, re
 import lslopt.lslcommon
 import lslopt.lslloadlib
 from strutil import *
 VERSION = '0.3.0beta'
@ -44,7 +45,7 @@ def ReportError(script, e):
    # When the encoding of stderr is unknown (e.g. when redirected to a file),
    # output will be encoded in UTF-8; otherwise the terminal's encoding will
    # be used.
-    enc = sys.stderr.encoding or 'utf8'
+    enc = getattr(sys.stderr, 'encoding', 'utf8')
    # Synchronize the UTF-8 encoded line with the output line in the
    # terminal's encoding. We need to compensate for the fact that the
@ -58,15 +59,15 @@ def ReportError(script, e):
    # Write the whole line in the target encoding.
    err_line = script[linestart:lineend] + b'\n'
-    sys.stderr.write(err_line.decode('utf8').encode(enc, 'backslashreplace'))
+    werr(err_line.decode('utf8'))
-    sys.stderr.write(u" " * cno + u"^\n")
+    werr(" " * cno + "^\n")
-    sys.stderr.write(e.args[0] + u"\n")
+    werr(e.args[0] + u"\n")
 class UniConvScript(object):
    """Converts the script to Unicode, setting the properties required by
    EParse to report a meaningful error position.
    """
-    def __init__(self, script, options = (), filename = '<stdin>'):
+    def __init__(self, script, options = (), filename = b'<stdin>'):
        self.linedir = []
        self.filename = filename
        # We don't interpret #line here. In case of an encode error,
@ -118,29 +119,29 @@ def PreparePreproc(script):
    # least surprise seems to suggest to accept valid LSL strings as LSL
    # instead of reproducing that C quirk. This also matches what FS is doing
    # currently, so it's good for compatibility.
-    tok = re.compile(
+    tok = re.compile(str2u(
-        ur'(?:'
+        r'(?:'
-            ur'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
+            r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
-            ur'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
+            r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
-            ur'|[^"]'
+            r'|[^"]'
-        ur')+'
+        r')+'
-        ur'|"'
+        r'|"'
-        , re.S)
+        ), re.S)
    # RE used inside strings.
-    tok2 = re.compile(
+    tok2 = re.compile(str2u(
-        ur'(?:'
+        r'(?:'
-            ur"\?\?[='()!<>-]"  # valid trigraph except ??/ (backslash)
+            r"\?\?[='()!<>-]"  # valid trigraph except ??/ (backslash)
-            ur"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
+            r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
                                # backslash trigraph or actual backslash,
                                # followed by any trigraph or non-newline
-            ur'|(?!\?\?/\n|\\\n|"|\n).'
+            r'|(?!\?\?/\n|\\\n|"|\n).'
                                # any character that doesn't start a trigraph/
                                # backslash escape followed by a newline
                                # or is a newline or double quote, as we're
                                # interested in all those individually.
-        ur')+'                  # as many of those as possible
+        r')+'                   # as many of those as possible
-        ur'|\?\?/\n|\\\n|\n|"'  # or any of those individually
+        r'|\?\?/\n|\\\n|\n|"'   # or any of those individually
-        )
+        ))
    pos = 0
    match = tok.search(script, pos)
@ -155,24 +156,24 @@ def PreparePreproc(script):
                matched2 = match2.group(0)
                pos += len(matched2)
-                if matched2 == u'\\\n' or matched2 == u'??/\n':
+                if matched2 == b'\\\n' or matched2 == b'??/\n':
                    nlines += 1
                    col = 0
                    match2 = tok2.search(script, pos)
                    continue
-                if matched2 == u'"':
+                if matched2 == b'"':
                    if nlines:
-                        if script[pos:pos+1] == u'\n':
+                        if script[pos:pos+1] == b'\n':
                            col = -1 # don't add spaces if not necessary
                        # col misses the quote added here, so add 1
-                        s += u'"' + u'\n'*nlines + u' '*(col+1)
+                        s += b'"' + b'\n'*nlines + b' '*(col+1)
                    else:
-                        s += u'"'
+                        s += b'"'
                    break
-                if matched2 == u'\n':
+                if matched2 == b'\n':
                    nlines += 1
                    col = 0
-                    s += u'\\n'
+                    s += b'\\n'
                else:
                    col += len(matched2)
                    s += matched2
@ -186,20 +187,20 @@ def PreparePreproc(script):
 def ScriptHeader(script, avname):
    if avname:
-        avname = ' - ' + avname
+        avname = b' - ' + avname
-    return ('//start_unprocessed_text\n/*'
+    return (b'//start_unprocessed_text\n/*'
        # + re.sub(r'([*/])(?=[*|/])', r'\1|', script) # FS's algorithm
        # HACK: This won't break strings containing ** or /* or // like URLs,
        # while still being compatible with FS.
-        + re.sub(r'([*/]\||\*(?=/))', r'\1|', script)
+        + re.sub(br'([*/]\||\*(?=/))', br'\1|', script)
-        + '*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
+        + b'*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
-          '//program_version LSL PyOptimizer v' + VERSION + avname
+          b'//program_version LSL PyOptimizer v' + str2b(VERSION)
-        + '\n//mono\n\n')
+        + str2b(avname) + b'\n//mono\n\n')
 def Usage(progname, about = None):
    if about is None:
-        sys.stderr.write(
+        werr(
-ur"""LSL optimizer v{version}
+u"""LSL optimizer v{version}
    (C) Copyright 2015-2019 Sei Lisa. All rights reserved.
@ -253,12 +254,12 @@ Preprocessor modes:
 Normally, running the preprocessor needs the option 'processpre' active, to
 make the output readable by the optimizer. This option is active by default.
-""".format(progname=progname, version=VERSION))
+""".format(progname=str2u(progname), version=str2u(VERSION)))
        return
    if about == 'optimizer-options':
-        sys.stderr.write(
+        werr(
-ur"""
+u"""
 Optimizer control options.
 + means active by default, - means inactive by default.
 Case insensitive.
@ -363,7 +364,7 @@ For example:
   {progname} -O -DCR,+BreakCont scriptname.lsl
 would turn off dead code removal (which is active by default) and turn on the
 break/continue syntax extension (which is inactive by default).
-""".format(progname=progname))
+""".format(progname=str2u(progname)))
        return
 validoptions = frozenset(('extendedglobalexpr','breakcont','extendedtypecast',
@ -405,7 +406,7 @@ def main(argv):
            'libdata='))
    except getopt.GetoptError as e:
        Usage(argv[0])
-        sys.stderr.write(u"\nError: %s\n" % str(e).decode('utf8', 'replace'))
+        werr(u"\nError: %s\n" % str(e).decode('utf8', 'replace'))
        return 1
    outfile = '-'
@ -462,7 +463,7 @@ def main(argv):
            return 0
        elif opt == '--version':
-            sys.stdout.write('LSL PyOptimizer version %s\n' % VERSION)
+            wout(u'LSL PyOptimizer version %s\n' % str2u(VERSION))
            return 0
        elif opt in ('-o', '--output'):
@ -558,7 +559,7 @@ def main(argv):
        fname = args[0] if args else None
        if fname is None:
            Usage(argv[0])
-            sys.stderr.write(u"\nError: Input file not specified. Use -"
+            werr(u"\nError: Input file not specified. Use -"
                u" if you want to use stdin.\n")
            return 1
@ -644,7 +645,7 @@ def main(argv):
            except EParse as e:
                # We don't call ReportError to prevent problems due to
                # displaying invalid UTF-8
-                sys.stderr.write(e.args[0] + u"\n")
+                werr(e.args[0] + u"\n")
                return 1
        if preproc != 'none':
--- a/run-tests.py
+++ b/run-tests.py
@ -56,9 +56,13 @@ try:
    import difflib
 except ImportError:
    difflib = None
-import StringIO as StringStream
+if sys.hexversion < 0x3000000:
    from StringIO import StringIO as StringStream
 else:
    from io import BytesIO as StringStream
 from lslopt import lslcommon,lslfuncs,lslparse,lsloutput,lslloadlib
 from lslopt.lslcommon import nr
 from strutil import *
 class EArgError(Exception):
    pass
@ -89,25 +93,25 @@ def parseArgs(s):
    State = Space
    p = 0
    Len = len(s)
-    arg = ''
+    arg = b''
    while p < Len:
-        c = s[p]
+        c = s[p:p+1]
        p += 1
        if State in (Space, Normal):
-            if c == '\\':
+            if c == b'\\':
                State = NBackslash if State == Normal else SBackslash
-            elif c == '"':
+            elif c == b'"':
                State = DQuote
-            elif c == "'":
+            elif c == b"'":
                State = SQuote
-            elif c in (' ', '\t'):
+            elif c in (b' ', b'\t'):
                if State == Normal:
                    State = Space
                    args.append(arg)
-                    arg = ''
+                    arg = b''
                # else remain in the 'Space' state
-            elif c == '\n':
+            elif c == b'\n':
                break
            else:
                State = Normal
@ -118,20 +122,20 @@ def parseArgs(s):
                         else Space if State == SBackslash
                         else Normal)
            else:
-                if State == DQBackslash and c not in ('"', '`', '$', '\\'):
+                if State == DQBackslash and c not in (b'"', b'`', b'$', b'\\'):
-                    arg += '\\'
+                    arg += b'\\'
                arg += c
                State = DQuote if State == DQBackslash else Normal
        elif State == DQuote:
-            if c == '\\':
+            if c == b'\\':
                State = DQBackslash
            # ` and $ are not interpreted by this parser.
-            elif c == '"':
+            elif c == b'"':
                State = Normal
            else:
                arg += c
        elif State == SQuote:
-            if c == "'":
+            if c == b"'":
                State = Normal
            else:
                arg += c
@ -185,7 +189,7 @@ def parseArgs(s):
 def tryRead(fn):
    result = None
    try:
-        f = open(fn, 'r')
+        f = open(fn, 'rb')
        try:
            result = f.read()
        finally:
@ -197,12 +201,9 @@ def tryRead(fn):
 # In StringIO, mixing unicode and str causes problems with non-ASCII chars.
 # Avoid it by overriding the write method, to always encode unicode as UTF-8.
-class StrUTF8IO(StringStream.StringIO):
+class StrUTF8IO(StringStream):
    def write(self, s):
-        if type(s) == unicode:
+        StringStream.write(self, any2b(s))
            StringStream.StringIO.write(self, s.encode('utf8'))
        else:
            StringStream.StringIO.write(self, s)
 def invokeMain(argv, stdin = None):
    """Invoke main.main, substituting stdin, stdout, stderr.
@ -218,7 +219,7 @@ def invokeMain(argv, stdin = None):
    stdout_output = None
    stderr_output = None
    try:
-        sys.stdin = StringStream.StringIO(stdin)
+        sys.stdin = StringStream(stdin)
        sys.stdout = StrUTF8IO()
        sys.stderr = StrUTF8IO()
        sys.stdin.encoding = 'utf8'
@ -314,8 +315,10 @@ class UnitTestRegression(UnitTestCase):
        stdout_output = False
        stderr_output = False
        try:
-            sys.stdout = StringStream.StringIO()
+            sys.stdout = StringStream()
-            sys.stderr = StringStream.StringIO()
+            sys.stdout.encoding = 'utf8'
            sys.stderr = StringStream()
            sys.stderr.encoding = 'utf8'
            errs = json.run_tests()
            stdout_output = sys.stdout.getvalue()
            stderr_output = sys.stderr.getvalue()
@ -439,7 +442,8 @@ class UnitTestCoverage(UnitTestCase):
        self.assertEqual(repr(lslfuncs.q2f(lslcommon.Quaternion((1,0,0,0)))),
                         'Quaternion((1.0, 0.0, 0.0, 0.0))')
        # Key repr coverage
-        self.assertEqual(repr(lslcommon.Key(u'')), "Key(u'')")
+        self.assertEqual(repr(lslcommon.Key(u'')), "Key(u'')"
            if str != unicode else "Key('')")
        # string + key coverage
        self.assertEqual(lslfuncs.add(u'a', lslcommon.Key(u'b')), u'ab')
@ -684,8 +688,8 @@ def generateScriptTests():
            def makeTestFunction(fbase, suite):
                def TestFunction(self):
                    stdin = tryRead(fbase + '.lsl') or ''
-                    expected_stdout = tryRead(fbase + '.out') or ''
+                    expected_stdout = tryRead(fbase + '.out') or b''
-                    expected_stderr = tryRead(fbase + '.err') or ''
+                    expected_stderr = tryRead(fbase + '.err') or b''
                    runargs = (parseArgs(tryRead(fbase + '.run'))
                               or (['main.py', '-y', '-'] if suite != 'Expr'
                                   else ['main.py',
@ -694,18 +698,18 @@ def generateScriptTests():
                                               ',addstrings,expr',
                                         '-y',
                                         '-']))
-                    sys.stderr.write("\nRunning test %s: " % fbase)
+                    werr(u"\nRunning test %s: " % any2u(fbase))
                    actual_stdout, actual_stderr = invokeMain(runargs, stdin)
-                    actual_stdout = (actual_stdout.replace('\r','\r\n')
+                    actual_stdout = (actual_stdout.replace(b'\r',b'\r\n')
-                                     .replace('\r\n\n','\n')
+                                     .replace(b'\r\n\n',b'\n')
-                                     .replace('\r\n','\n'))
+                                     .replace(b'\r\n',b'\n'))
-                    actual_stderr = (actual_stderr.replace('\r','\r\n')
+                    actual_stderr = (actual_stderr.replace(b'\r',b'\r\n')
-                                     .replace('\r\n\n','\n')
+                                     .replace(b'\r\n\n',b'\n')
-                                     .replace('\r\n','\n'))
+                                     .replace(b'\r\n',b'\n'))
                    try:
-                        if expected_stderr.startswith('REGEX\n'):
+                        if expected_stderr.startswith(b'REGEX\n'):
                            self.assertIsNotNone(
                                re.search(expected_stderr[6:],
                                          actual_stderr.decode('utf8')
@ -714,66 +718,67 @@ def generateScriptTests():
                        else:
                            self.assertTrue(expected_stderr == actual_stderr)
                    except AssertionError:
-                        sys.stderr.write('Failed'
+                        werr(u'Failed'
-                                         '\n************ expected stderr:\n')
+                             u'\n************ expected stderr:\n')
-                        sys.stderr.write(expected_stderr)
+                        werr(expected_stderr)
-                        sys.stderr.write('\n************ actual stderr:\n')
+                        werr(u'\n************ actual stderr:\n')
-                        sys.stderr.write(actual_stderr)
+                        werr(actual_stderr)
                        if difflib and expected_stderr and actual_stderr:
-                            sys.stderr.write('\n************ diff:\n'
+                            sys.stderr.write(u'\n************ diff:\n'
-                                 + '\n'.join(difflib.unified_diff(
+                                 + u'\n'.join(difflib.unified_diff(
-                                    expected_stderr.split('\n'),
+                                    b2u(expected_stderr).split(u'\n'),
-                                    actual_stderr.split('\n'),
+                                    b2u(actual_stderr).split(u'\n'),
                                    'expected', 'actual', lineterm=''
                            )))
-                        sys.stderr.write('\n************ ')
+                        werr(u'\n************ ')
                        raise
                    try:
-                        if expected_stdout.startswith('REGEX\n'):
+                        if expected_stdout.startswith(b'REGEX\n'):
                            self.assertIsNotNone(re.search(expected_stdout[6:],
                                                           actual_stdout))
                        else:
                            self.assertTrue(expected_stdout == actual_stdout)
                    except AssertionError:
-                        sys.stderr.write('Failed'
+                        werr(u'Failed'
-                                         '\n************ expected stdout:\n')
+                             u'\n************ expected stdout:\n')
-                        sys.stderr.write(expected_stdout)
+                        werr(expected_stdout)
-                        sys.stderr.write('\n************ actual stdout:\n')
+                        werr(u'\n************ actual stdout:\n')
-                        sys.stderr.write(actual_stdout)
+                        werr(actual_stdout)
                        if difflib and expected_stdout and actual_stdout:
-                            sys.stderr.write('\n************ diff:\n'
+                            werr(u'\n************ diff:\n'
-                                 + '\n'.join(difflib.unified_diff(
+                                 + u'\n'.join(difflib.unified_diff(
-                                    expected_stdout.split('\n'),
+                                    b2u(expected_stdout).split('\n'),
-                                    actual_stdout.split('\n'),
+                                    b2u(actual_stdout).split('\n'),
                                    'expected', 'actual', lineterm=''
                            )))
-                        sys.stderr.write('\n************ ')
+                        sys.stderr.write(u'\n************ ')
                        raise
                return TestFunction
            TestFunction = makeTestFunction(fbase, testsuite)
            # __doc__ is used by Eric
-            line = ''
+            line = b''
            try:
-                f = open(fbase + '.lsl')
+                f = open(fbase + '.lsl', 'rb')
                try:
                    line = f.readline()
-                    if line.endswith('\r\n'):
+                    if line.endswith(b'\r\n'):
                        line = line[:-2]
-                    elif line[-1:] in ('\r', '\n'):
+                    elif line[-1:] in (b'\r', b'\n'):
                        line = line[:-1]
                finally:
                    f.close()
            except IOError as e:
                if e.errno != 2:
                    raise
-            TestFunction.__doc__ = line[3:] if line.startswith('// ') else None
+            TestFunction.__doc__ = (b2u(line[3:]) if line.startswith(b'// ')
                                    else None)
            TestFunction.__name__ = ('test_' + testsuite + '__'
                + os.path.basename(fbase).replace('-','_'))
            fail = tryRead(fbase + '.fail')
            if fail is not None:
                if fail:
-                    TestFunction.__doc__ = fail
+                    TestFunction.__doc__ = b2u(fail)
                TestFunction = unittest.expectedFailure(TestFunction)
            else:
                skip = tryRead(fbase + '.skp')
@ -786,3 +791,4 @@ def generateScriptTests():
 generateScriptTests()
 if __name__ == '__main__':
    unittest.main(argv = sys.argv)
 #UnitTestRegression().test_Regression__multiline_string()
--- a/strutil.py
+++ b/strutil.py
@ -0,0 +1,85 @@
 #    (C) Copyright 2015-2019 Sei Lisa. All rights reserved.
 #
 #    This file is part of LSL PyOptimizer.
 #
 #    LSL PyOptimizer is free software: you can redistribute it and/or
 #    modify it under the terms of the GNU General Public License as
 #    published by the Free Software Foundation, either version 3 of the
 #    License, or (at your option) any later version.
 #
 #    LSL PyOptimizer is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #    GNU General Public License for more details.
 #
 #    You should have received a copy of the GNU General Public License
 #    along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
 # String <-> Bytes conversion and output utilities
 import sys
 if sys.hexversion >= 0x3000000:
    unicode = str
    unichr = chr
    def str2u(s, enc=None):
        """Convert a native Python3 str to Unicode. This is a NOP."""
        return s
    def str2b(s, enc=None):
        """Convert a native Python3 str to bytes, with the given encoding."""
        return s.encode(enc if type(enc) == str
                        else getattr(enc, 'encoding', 'utf8'),
                        'backslashreplace')
    def u2str(s, enc=None):
        """Convert a Unicode string to native Python 3 str. This is a NOP."""
        return s
    def b2str(s, enc=None):
        """Convert a Bytes string to native Python 3 str."""
        return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
                        'backslashreplace')
 else:
    def str2u(s, enc=None):
        """Convert a native Python2 str to Unicode."""
        return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
                        'backslashreplace')
    def str2b(s, enc=None):
        """Convert a native Python2 str to bytes. This is a NOP."""
        return s
    def u2str(s, enc=None):
        """Convert a Unicode string to native Python 2 str."""
        return s.encode(enc if type(enc) == str
                        else getattr(enc, 'encoding', 'utf8'),
                        'backslashreplace')
    def b2str(s, enc=None):
        """Convert a Bytes string to native Python 2 str. This is a NOP."""
        return s
 def b2u(s, enc=None):
    """Bytes to Unicode"""
    return str2u(b2str(s, enc), enc)
 def u2b(s, enc=None):
    """Unicode to Bytes"""
    return u2str(str2b(s, enc), enc)
 def any2b(s, enc=None):
    """Bytes or Unicode to Bytes"""
    return s if type(s) == bytes else u2b(s, enc)
 def any2u(s, enc=None):
    """Bytes or Unicode to Unicode"""
    return s if type(s) == unicode else b2u(s, enc)
 def werr(s):
    """Write any string to stderr"""
    sys.stderr.write(any2u(s, sys.stderr))
 def wout(s):
    """Write any string to stdout"""
    sys.stdout.write(any2u(s, sys.stdout))
--- a/unit_tests/json.py
+++ b/unit_tests/json.py
@ -1,4 +1,5 @@
 import sys
 from strutil import *
 from lslopt.lslfuncs import *
 tests = 0
@ -7,7 +8,6 @@ errors = 0
 # Begin JSON tests from http://wiki.secondlife.com/wiki/Json_usage_in_LSL/TestScript
 def verify(msg, result, expected):
    global tests
    werr = sys.stderr.write
    tests += 1
    if expected != result:
        global errors
@ -356,7 +356,6 @@ def test_jira_fixes():
    maint3081();
 def run_tests():
    werr = sys.stderr.write
    # JSON tests from the wiki
    test_types();
    test_get_value();