From fe2dd9a7210abfc0246b25427a12abdd8dfbbbbf Mon Sep 17 00:00:00 2001
From: Sei Lisa <sei-lisa@email.fake>
Date: Tue, 15 Jan 2019 20:27:02 +0100
Subject: [PATCH] First baby steps towards dual Python2+3 compatibility

---
 lslopt/lslbasefuncs.py  |  54 +++++++++++++----
 lslopt/lslcommon.py     |   1 +
 lslopt/lsldeadcode.py   |   4 +-
 lslopt/lslextrafuncs.py |   4 +-
 lslopt/lslfoldconst.py  |  10 ++--
 lslopt/lslfuncopt.py    |   6 +-
 lslopt/lslfuncs.py      |   6 +-
 lslopt/lsljson.py       |  39 ++++++++----
 lslopt/lsllastpass.py   |   4 +-
 lslopt/lslloadlib.py    |  34 +++++------
 lslopt/lsloptimizer.py  |  12 ++--
 lslopt/lsloutput.py     |   6 +-
 lslopt/lslparse.py      |   9 +--
 main.py                 |  89 ++++++++++++++--------------
 run-tests.py            | 128 +++++++++++++++++++++-------------------
 strutil.py              |  85 ++++++++++++++++++++++++++
 unit_tests/json.py      |   3 +-
 17 files changed, 319 insertions(+), 175 deletions(-)
 create mode 100644 strutil.py

diff --git a/lslopt/lslbasefuncs.py b/lslopt/lslbasefuncs.py
index 20cf12b..e1cae4c 100644
--- a/lslopt/lslbasefuncs.py
+++ b/lslopt/lslbasefuncs.py
@@ -34,12 +34,13 @@
 # The JSON functions have been separated to their own module.
 
 import re
-from lslcommon import *
-import lslcommon
+from lslopt.lslcommon import *
+from lslopt import lslcommon
 from ctypes import c_float
 import math
 import hashlib
 from base64 import b64encode, b64decode
+from strutil import *
 
 
 # Regular expressions used along the code. They are needed mainly because
@@ -58,18 +59,49 @@ from base64 import b64encode, b64decode
 # as is (vector)"<1,inf,info>". The 1st gives <0,0,0>, the others <1,inf,inf>.
 # The lookahead (?!i) is essential for parsing them that way without extra code.
 # Note that '|' in REs is order-sensitive.
-float_re = re.compile(ur'^\s*[+-]?(?:0(x)(?:[0-9a-f]+(?:\.[0-9a-f]*)?|\.[0-9a-f]+)(?:p[+-]?[0-9]+)?'
-                      ur'|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?|inf|(nan))',
-                      re.I)
-vfloat_re = re.compile(ur'^\s*[+-]?(?:0(x)(?:[0-9a-f]+(?:\.[0-9a-f]*)?|\.[0-9a-f]+)(?:p[+-]?[0-9]+)?'
-                      ur'|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?|infinity|inf(?!i)|(nan))',
-                      re.I)
+float_re  = re.compile(str2u(r'''
+    ^\s*[+-]?(?:
+        0(x)(?:             # Hex float or hex int (captures the 'x')
+            [0-9a-f]+(?:\.[0-9a-f]*)?
+            |\.[0-9a-f]+    # Hex digits
+        )(?:
+            p[+-]?[0-9]+    # Hex float exponent
+        )?                  # (optional)
+        |(?:                # Decimal float or decimal int
+            [0-9]+(?:\.[0-9]*)?
+            |\.[0-9]+       # Decimal digits
+        )(?:
+            e[+-]?[0-9]+    # Decimal float exponent
+        )?                  # (optional)
+        |inf                # Infinity
+        |(nan)              # NaN (captured)
+    )
+    '''), re.I | re.X)
+vfloat_re = re.compile(str2u(r'''
+    ^\s*[+-]?(?:
+        0(x)(?:             # Hex float or hex int (captures the 'x')
+            [0-9a-f]+(?:\.[0-9a-f]*)?
+            |\.[0-9a-f]+    # Hex digits
+        )(?:
+            p[+-]?[0-9]+    # Hex float exponent
+        )?                  # (optional)
+        |(?:                # Decimal float or decimal int
+            [0-9]+(?:\.[0-9]*)?
+            |\.[0-9]+       # Decimal digits
+        )(?:
+            e[+-]?[0-9]+    # Decimal float exponent
+        )?                  # (optional)
+        |infinity|inf(?!i)  # Infinity (the only difference with the above)
+        |(nan)              # NaN (captured)
+    )
+    '''), re.I | re.X)
 
-int_re = re.compile(ur'^0(x)[0-9a-f]+|^\s*[+-]?[0-9]+', re.I)
+int_re = re.compile(str2u(r'^0(x)[0-9a-f]+|^\s*[+-]?[0-9]+'), re.I)
 
-key_re = re.compile(ur'^[0-9a-f]{8}(?:-[0-9a-f]{4}){4}[0-9a-f]{8}$', re.I)
+key_re = re.compile(str2u(r'^[0-9a-f]{8}(?:-[0-9a-f]{4}){4}[0-9a-f]{8}$'),
+                    re.I)
 
-b64_re = re.compile(ur'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2,3})?')
+b64_re = re.compile(str2u(r'^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2,3})?'))
 
 ZERO_VECTOR      = Vector((0.0, 0.0, 0.0))
 ZERO_ROTATION    = Quaternion((0.0, 0.0, 0.0, 1.0))
diff --git a/lslopt/lslcommon.py b/lslopt/lslcommon.py
index e0e36c9..4f45604 100644
--- a/lslopt/lslcommon.py
+++ b/lslopt/lslcommon.py
@@ -18,6 +18,7 @@
 # Classes, functions and variables for use of all modules.
 
 import sys
+from strutil import *
 
 _exclusions = frozenset(('nt','t','name','value','ch', 'X','SEF'))
 
diff --git a/lslopt/lsldeadcode.py b/lslopt/lsldeadcode.py
index 59cf2fa..4f03d9c 100644
--- a/lslopt/lsldeadcode.py
+++ b/lslopt/lsldeadcode.py
@@ -17,8 +17,8 @@
 
 # Dead Code Removal optimization
 
-import lslfuncs
-from lslcommon import nr
+from lslopt import lslfuncs
+from lslopt.lslcommon import nr
 
 class deadcode(object):
 
diff --git a/lslopt/lslextrafuncs.py b/lslopt/lslextrafuncs.py
index 8c412fa..a295727 100644
--- a/lslopt/lslextrafuncs.py
+++ b/lslopt/lslextrafuncs.py
@@ -17,8 +17,8 @@
 
 # Extra functions that have predictable return values for certain arguments.
 
-from lslcommon import Key, Vector #, Quaternion
-from lslbasefuncs import ELSLCantCompute, fi,ff,fs,fk,v2f,q2f,fl, \
+from lslopt.lslcommon import Key, Vector #, Quaternion
+from lslopt.lslbasefuncs import ELSLCantCompute, fi,ff,fs,fk,v2f,q2f,fl, \
   NULL_KEY, ZERO_VECTOR, ZERO_ROTATION, \
   TOUCH_INVALID_TEXCOORD, cond
 ff, q2f  # keep pyflakes happy as these are not used
diff --git a/lslopt/lslfoldconst.py b/lslopt/lslfoldconst.py
index 2a9514d..ce3a0a7 100644
--- a/lslopt/lslfoldconst.py
+++ b/lslopt/lslfoldconst.py
@@ -17,12 +17,12 @@
 
 # Constant folding and simplification of expressions and statements.
 
-import lslcommon
-from lslcommon import Vector, Quaternion, warning, nr
-import lslfuncs
-from lslfuncs import ZERO_VECTOR, ZERO_ROTATION
+from lslopt import lslcommon
+from lslopt.lslcommon import Vector, Quaternion, warning, nr
+from lslopt import lslfuncs
+from lslopt.lslfuncs import ZERO_VECTOR, ZERO_ROTATION
 import math
-from lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
+from lslopt.lslfuncopt import OptimizeFunc, OptimizeArgs, FuncOptSetup
 
 # TODO: Remove special handling of @ within IF,WHILE,FOR,DO
 
diff --git a/lslopt/lslfuncopt.py b/lslopt/lslfuncopt.py
index 65cf8d9..f4ec0d2 100644
--- a/lslopt/lslfuncopt.py
+++ b/lslopt/lslfuncopt.py
@@ -18,9 +18,9 @@
 # Optimize calls to LSL library functions and parameters where possible
 # This is dependent on the LSL function library.
 
-import lslcommon
-from lslcommon import Key, Vector, Quaternion, nr
-import lslfuncs
+from lslopt import lslcommon
+from lslopt.lslcommon import Key, Vector, Quaternion, nr
+from lslopt import lslfuncs
 
 def OptimizeArgs(node, sym):
     """Transform function arguments to shorter equivalents where possible."""
diff --git a/lslopt/lslfuncs.py b/lslopt/lslfuncs.py
index 5c6dff1..2263512 100644
--- a/lslopt/lslfuncs.py
+++ b/lslopt/lslfuncs.py
@@ -17,6 +17,6 @@
 
 # Put all LSL functions together in one single module
 
-from lslbasefuncs import *
-from lsljson import *
-from lslextrafuncs import *
+from lslopt.lslbasefuncs import *
+from lslopt.lsljson import *
+from lslopt.lslextrafuncs import *
diff --git a/lslopt/lsljson.py b/lslopt/lsljson.py
index 15ed2c6..7f18e1b 100644
--- a/lslopt/lsljson.py
+++ b/lslopt/lsljson.py
@@ -19,8 +19,8 @@
 
 import re
 import math
-from lslcommon import *
-from lslbasefuncs import llStringTrim, fs, fl, InternalTypecast
+from lslopt.lslcommon import *
+from lslopt.lslbasefuncs import llStringTrim, fs, fl, InternalTypecast
 
 # INCOMPATIBILITY NOTE: The JSON functions in SL have very weird behaviour
 # in corner cases. Despite our best efforts, that behaviour is not replicated
@@ -44,8 +44,8 @@ JSON_DELETE  = u'\uFDD8'
 JSON_APPEND  = -1
 
 jsonesc_re = re.compile(u'[\x08\x09\x0A\x0C\x0D"/\\\\]')
-jsonesc_dict = {u'\x08':ur'\b', u'\x09':ur'\t', u'\x0A':ur'\n', u'\x0C':ur'\f',
-                u'\x0D':ur'\r', u'"':ur'\"', u'/':ur'\/', u'\\':ur'\\'}
+jsonesc_dict = {u'\x08':u'\\b', u'\x09':u'\\t', u'\x0A':u'\\n', u'\x0C':u'\\f',
+                u'\x0D':u'\\r', u'"':u'\\"', u'/':u'\\/', u'\\':u'\\\\'}
 jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u'\x0D'}
 
 # LSL JSON numbers differ from standard JSON numbers in many respects:
@@ -72,18 +72,37 @@ jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u
 # elements when appropriate.
 
 # Real JSON number parser:
-#jsonnum_re = re.compile(ur'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?')
+#jsonnum_re = re.compile(str2u(
+#    r'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?'
+#    ))
 
 # BUG-6466 active:
-jsonnumbug_re = re.compile(ur'-?(?:[0-9]*([Ee])-?[0-9]*\.?[0-9]*|(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*))')
+jsonnumbug_re = re.compile(str2u(r'''
+    -?(?:
+        [0-9]*([Ee])-?[0-9]*\.?[0-9]*
+        |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*)
+    )
+    '''), re.X)
 # BUG-6466 fixed:
 # The new RE is just a modified version of the crap, allowing + exponents and
 # disallowing zeros, sometimes even when legal (e.g. 0e0)
-#jsonnum_re = re.compile(ur'-?(?:(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]*|(?=[1-9]|\.(?:[^e]|$)|0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*))')
+#jsonnum_re = re.compile(str2u(r'''
+#    -?(?:
+#        (?=[1-9]|\.(?:[^e]|$)
+#            |0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]*
+#        |(?=[1-9]|\.(?:[^e]|$)
+#            |0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*)
+#    )
+#    '''), re.X)
 # They've fixed BUG-6657 by bringing BUG-6466 back to life.
-jsonnum_re = re.compile(ur'-?(?:[0-9]*([Ee])-?[0-9]*\.?[0-9]*|(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*))')
+jsonnum_re = re.compile(str2u(r'''
+    -?(?:
+        [0-9]*([Ee])-?[0-9]*\.?[0-9]*
+        |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*)
+    )
+    '''), re.X)
 
-jsonstring_re = re.compile(ur'"(?:[^"\\]|\\.)*"')
+jsonstring_re = re.compile(str2u(r'"(?:[^"\\]|\\.)*"'))
 
 # This might need some explanation. The ] and - are included in the first
 # set, the ] in the first after the ^ and the - in the last positions of
@@ -91,7 +110,7 @@ jsonstring_re = re.compile(ur'"(?:[^"\\]|\\.)*"')
 # though it confuses things. The set comprises any character not in
 # -{}[],:"0123456789
 # The second set comprises zero or more characters not in ,:]}
-#word_re = re.compile(ur'[^][{}0-9",:-][^]},:]*')
+#word_re = re.compile(str2u(r'[^][{}0-9",:-][^]},:]*'))
 # Screw that, we're using just a fallback.
 jsoncatchall_re = re.compile(u'(.*?)[\x09\x0A\x0B\x0C\x0D ]*(?:[]},]|$)')
 
diff --git a/lslopt/lsllastpass.py b/lslopt/lsllastpass.py
index 5d541b4..f950596 100644
--- a/lslopt/lsllastpass.py
+++ b/lslopt/lsllastpass.py
@@ -17,8 +17,8 @@
 
 # Optimizations that have a negative effect on other stages.
 
-import lslcommon
-from lslcommon import nr
+from lslopt import lslcommon
+from lslopt.lslcommon import nr
 #from lslcommon import Vector, Quaternion
 #import lslfuncs
 #from lslfuncs import ZERO_VECTOR, ZERO_ROTATION
diff --git a/lslopt/lslloadlib.py b/lslopt/lslloadlib.py
index e755a02..c08f325 100644
--- a/lslopt/lslloadlib.py
+++ b/lslopt/lslloadlib.py
@@ -18,8 +18,8 @@
 # Load the builtins and function properties.
 
 import sys, re
-from lslcommon import types, warning, Vector, Quaternion
-import lslcommon, lslfuncs
+from lslopt.lslcommon import types, warning, Vector, Quaternion
+from lslopt import lslcommon, lslfuncs
 
 def LoadLibrary(builtins = None, fndata = None):
     """Load builtins.txt and fndata.txt (or the given filenames) and return
@@ -40,21 +40,21 @@ def LoadLibrary(builtins = None, fndata = None):
     # Library read code
 
     parse_lin_re = re.compile(
-        r'^\s*([a-z]+)\s+'
-        r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
-            r'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
-            r'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
-        r')?\s*\)\s*$'
-        r'|'
-        r'^\s*const\s+([a-z]+)'
-        r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
-        r'|'
-        r'^\s*(?:#.*|//.*)?$')
-    parse_arg_re = re.compile(r'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
-    parse_fp_re  = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
-                              r'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
-    parse_int_re = re.compile(r'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
-    parse_str_re = re.compile(ur'^"((?:[^"\\]|\\.)*)"$')
+        br'^\s*([a-z]+)\s+'
+        br'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
+            br'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
+            br'(?:\s*,\s*[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
+        br')?\s*\)\s*$'
+        br'|'
+        br'^\s*const\s+([a-z]+)'
+        br'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
+        br'|'
+        br'^\s*(?:#.*|//.*)?$')
+    parse_arg_re = re.compile(br'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$')
+    parse_fp_re  = re.compile(br'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
+                              br'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
+    parse_int_re = re.compile(br'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$')
+    parse_str_re = re.compile(u'^"((?:[^"\\\\]|\\\\.)*)"$')
 
     f = open(builtins, 'rb')
     try:
diff --git a/lslopt/lsloptimizer.py b/lslopt/lsloptimizer.py
index 0d61af4..3f332de 100644
--- a/lslopt/lsloptimizer.py
+++ b/lslopt/lsloptimizer.py
@@ -17,13 +17,13 @@
 
 # Optimizer class that wraps and calls the other parts.
 
-import lslfuncs
+from lslopt import lslfuncs
 
-from lslcommon import nr
-from lslfoldconst import foldconst
-from lslrenamer import renamer
-from lsldeadcode import deadcode
-from lsllastpass import lastpass
+from lslopt.lslcommon import nr
+from lslopt.lslfoldconst import foldconst
+from lslopt.lslrenamer import renamer
+from lslopt.lsldeadcode import deadcode
+from lslopt.lsllastpass import lastpass
 
 class optimizer(foldconst, renamer, deadcode, lastpass):
 
diff --git a/lslopt/lsloutput.py b/lslopt/lsloutput.py
index 15854c6..1ea9658 100644
--- a/lslopt/lsloutput.py
+++ b/lslopt/lsloutput.py
@@ -17,9 +17,9 @@
 
 # Convert an abstract syntax tree + symbol table back to a script as text.
 
-import lslfuncs
-import lslcommon
-from lslcommon import Key, Vector, Quaternion, warning
+from lslopt import lslfuncs
+from lslopt import lslcommon
+from lslopt.lslcommon import Key, Vector, Quaternion, warning
 from math import copysign
 
 debugScopes = False
diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py
index f99c310..5cf6300 100644
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@@ -20,8 +20,9 @@
 
 # TODO: Add info to be able to propagate error position to the source.
 
-from lslcommon import Key, Vector, Quaternion, types, nr
-import lslcommon, lslfuncs
+from lslopt.lslcommon import Key, Vector, Quaternion, types, nr
+from lslopt import lslcommon, lslfuncs
+from strutil import *
 import re
 
 # Note this module was basically written from bottom to top, which may help
@@ -70,8 +71,8 @@ class EParse(Exception):
         self.errorpos = parser.errorpos
         self.lno, self.cno, self.fname = GetErrLineCol(parser)
         filename = (self.fname.decode('utf8', 'replace')
-                    .replace(u'\\', ur'\\')
-                    .replace(u'"', ur'\"')
+                    .replace(u'\\', u'\\\\')
+                    .replace(u'"', u'\\"')
                    )
 
         if parser.processpre and filename != '<stdin>':
diff --git a/main.py b/main.py
index 8a98803..b1c7ce8 100755
--- a/main.py
+++ b/main.py
@@ -31,6 +31,7 @@ from lslopt.lsloptimizer import optimizer
 import sys, os, getopt, re
 import lslopt.lslcommon
 import lslopt.lslloadlib
+from strutil import *
 
 
 VERSION = '0.3.0beta'
@@ -44,7 +45,7 @@ def ReportError(script, e):
     # When the encoding of stderr is unknown (e.g. when redirected to a file),
     # output will be encoded in UTF-8; otherwise the terminal's encoding will
     # be used.
-    enc = sys.stderr.encoding or 'utf8'
+    enc = getattr(sys.stderr, 'encoding', 'utf8')
 
     # Synchronize the UTF-8 encoded line with the output line in the
     # terminal's encoding. We need to compensate for the fact that the
@@ -58,15 +59,15 @@ def ReportError(script, e):
 
     # Write the whole line in the target encoding.
     err_line = script[linestart:lineend] + b'\n'
-    sys.stderr.write(err_line.decode('utf8').encode(enc, 'backslashreplace'))
-    sys.stderr.write(u" " * cno + u"^\n")
-    sys.stderr.write(e.args[0] + u"\n")
+    werr(err_line.decode('utf8'))
+    werr(" " * cno + "^\n")
+    werr(e.args[0] + u"\n")
 
 class UniConvScript(object):
     """Converts the script to Unicode, setting the properties required by
     EParse to report a meaningful error position.
     """
-    def __init__(self, script, options = (), filename = '<stdin>'):
+    def __init__(self, script, options = (), filename = b'<stdin>'):
         self.linedir = []
         self.filename = filename
         # We don't interpret #line here. In case of an encode error,
@@ -118,29 +119,29 @@ def PreparePreproc(script):
     # least surprise seems to suggest to accept valid LSL strings as LSL
     # instead of reproducing that C quirk. This also matches what FS is doing
     # currently, so it's good for compatibility.
-    tok = re.compile(
-        ur'(?:'
-            ur'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
-            ur'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
-            ur'|[^"]'
-        ur')+'
-        ur'|"'
-        , re.S)
+    tok = re.compile(str2u(
+        r'(?:'
+            r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
+            r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
+            r'|[^"]'
+        r')+'
+        r'|"'
+        ), re.S)
     # RE used inside strings.
-    tok2 = re.compile(
-        ur'(?:'
-            ur"\?\?[='()!<>-]"  # valid trigraph except ??/ (backslash)
-            ur"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
+    tok2 = re.compile(str2u(
+        r'(?:'
+            r"\?\?[='()!<>-]"  # valid trigraph except ??/ (backslash)
+            r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
                                 # backslash trigraph or actual backslash,
                                 # followed by any trigraph or non-newline
-            ur'|(?!\?\?/\n|\\\n|"|\n).'
+            r'|(?!\?\?/\n|\\\n|"|\n).'
                                 # any character that doesn't start a trigraph/
                                 # backslash escape followed by a newline
                                 # or is a newline or double quote, as we're
                                 # interested in all those individually.
-        ur')+'                  # as many of those as possible
-        ur'|\?\?/\n|\\\n|\n|"'  # or any of those individually
-        )
+        r')+'                   # as many of those as possible
+        r'|\?\?/\n|\\\n|\n|"'   # or any of those individually
+        ))
 
     pos = 0
     match = tok.search(script, pos)
@@ -155,24 +156,24 @@ def PreparePreproc(script):
                 matched2 = match2.group(0)
                 pos += len(matched2)
 
-                if matched2 == u'\\\n' or matched2 == u'??/\n':
+                if matched2 == b'\\\n' or matched2 == b'??/\n':
                     nlines += 1
                     col = 0
                     match2 = tok2.search(script, pos)
                     continue
-                if matched2 == u'"':
+                if matched2 == b'"':
                     if nlines:
-                        if script[pos:pos+1] == u'\n':
+                        if script[pos:pos+1] == b'\n':
                             col = -1 # don't add spaces if not necessary
                         # col misses the quote added here, so add 1
-                        s += u'"' + u'\n'*nlines + u' '*(col+1)
+                        s += b'"' + b'\n'*nlines + b' '*(col+1)
                     else:
-                        s += u'"'
+                        s += b'"'
                     break
-                if matched2 == u'\n':
+                if matched2 == b'\n':
                     nlines += 1
                     col = 0
-                    s += u'\\n'
+                    s += b'\\n'
                 else:
                     col += len(matched2)
                     s += matched2
@@ -186,20 +187,20 @@ def PreparePreproc(script):
 
 def ScriptHeader(script, avname):
     if avname:
-        avname = ' - ' + avname
-    return ('//start_unprocessed_text\n/*'
+        avname = b' - ' + avname
+    return (b'//start_unprocessed_text\n/*'
         # + re.sub(r'([*/])(?=[*|/])', r'\1|', script) # FS's algorithm
         # HACK: This won't break strings containing ** or /* or // like URLs,
         # while still being compatible with FS.
-        + re.sub(r'([*/]\||\*(?=/))', r'\1|', script)
-        + '*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
-          '//program_version LSL PyOptimizer v' + VERSION + avname
-        + '\n//mono\n\n')
+        + re.sub(br'([*/]\||\*(?=/))', br'\1|', script)
+        + b'*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n'
+          b'//program_version LSL PyOptimizer v' + str2b(VERSION)
+        + str2b(avname) + b'\n//mono\n\n')
 
 def Usage(progname, about = None):
     if about is None:
-        sys.stderr.write(
-ur"""LSL optimizer v{version}
+        werr(
+u"""LSL optimizer v{version}
 
     (C) Copyright 2015-2019 Sei Lisa. All rights reserved.
 
@@ -253,12 +254,12 @@ Preprocessor modes:
 
 Normally, running the preprocessor needs the option 'processpre' active, to
 make the output readable by the optimizer. This option is active by default.
-""".format(progname=progname, version=VERSION))
+""".format(progname=str2u(progname), version=str2u(VERSION)))
         return
 
     if about == 'optimizer-options':
-        sys.stderr.write(
-ur"""
+        werr(
+u"""
 Optimizer control options.
 + means active by default, - means inactive by default.
 Case insensitive.
@@ -363,7 +364,7 @@ For example:
    {progname} -O -DCR,+BreakCont scriptname.lsl
 would turn off dead code removal (which is active by default) and turn on the
 break/continue syntax extension (which is inactive by default).
-""".format(progname=progname))
+""".format(progname=str2u(progname)))
         return
 
 validoptions = frozenset(('extendedglobalexpr','breakcont','extendedtypecast',
@@ -405,7 +406,7 @@ def main(argv):
             'libdata='))
     except getopt.GetoptError as e:
         Usage(argv[0])
-        sys.stderr.write(u"\nError: %s\n" % str(e).decode('utf8', 'replace'))
+        werr(u"\nError: %s\n" % str(e).decode('utf8', 'replace'))
         return 1
 
     outfile = '-'
@@ -462,7 +463,7 @@ def main(argv):
             return 0
 
         elif opt == '--version':
-            sys.stdout.write('LSL PyOptimizer version %s\n' % VERSION)
+            wout(u'LSL PyOptimizer version %s\n' % str2u(VERSION))
             return 0
 
         elif opt in ('-o', '--output'):
@@ -558,7 +559,7 @@ def main(argv):
         fname = args[0] if args else None
         if fname is None:
             Usage(argv[0])
-            sys.stderr.write(u"\nError: Input file not specified. Use -"
+            werr(u"\nError: Input file not specified. Use -"
                 u" if you want to use stdin.\n")
             return 1
 
@@ -644,7 +645,7 @@ def main(argv):
             except EParse as e:
                 # We don't call ReportError to prevent problems due to
                 # displaying invalid UTF-8
-                sys.stderr.write(e.args[0] + u"\n")
+                werr(e.args[0] + u"\n")
                 return 1
 
         if preproc != 'none':
diff --git a/run-tests.py b/run-tests.py
index 2f391fb..096616d 100755
--- a/run-tests.py
+++ b/run-tests.py
@@ -56,9 +56,13 @@ try:
     import difflib
 except ImportError:
     difflib = None
-import StringIO as StringStream
+if sys.hexversion < 0x3000000:
+    from StringIO import StringIO as StringStream
+else:
+    from io import BytesIO as StringStream
 from lslopt import lslcommon,lslfuncs,lslparse,lsloutput,lslloadlib
 from lslopt.lslcommon import nr
+from strutil import *
 
 class EArgError(Exception):
     pass
@@ -89,25 +93,25 @@ def parseArgs(s):
     State = Space
     p = 0
     Len = len(s)
-    arg = ''
+    arg = b''
 
     while p < Len:
-        c = s[p]
+        c = s[p:p+1]
         p += 1
         if State in (Space, Normal):
-            if c == '\\':
+            if c == b'\\':
                 State = NBackslash if State == Normal else SBackslash
-            elif c == '"':
+            elif c == b'"':
                 State = DQuote
-            elif c == "'":
+            elif c == b"'":
                 State = SQuote
-            elif c in (' ', '\t'):
+            elif c in (b' ', b'\t'):
                 if State == Normal:
                     State = Space
                     args.append(arg)
-                    arg = ''
+                    arg = b''
                 # else remain in the 'Space' state
-            elif c == '\n':
+            elif c == b'\n':
                 break
             else:
                 State = Normal
@@ -118,20 +122,20 @@ def parseArgs(s):
                          else Space if State == SBackslash
                          else Normal)
             else:
-                if State == DQBackslash and c not in ('"', '`', '$', '\\'):
-                    arg += '\\'
+                if State == DQBackslash and c not in (b'"', b'`', b'$', b'\\'):
+                    arg += b'\\'
                 arg += c
                 State = DQuote if State == DQBackslash else Normal
         elif State == DQuote:
-            if c == '\\':
+            if c == b'\\':
                 State = DQBackslash
             # ` and $ are not interpreted by this parser.
-            elif c == '"':
+            elif c == b'"':
                 State = Normal
             else:
                 arg += c
         elif State == SQuote:
-            if c == "'":
+            if c == b"'":
                 State = Normal
             else:
                 arg += c
@@ -185,7 +189,7 @@ def parseArgs(s):
 def tryRead(fn):
     result = None
     try:
-        f = open(fn, 'r')
+        f = open(fn, 'rb')
         try:
             result = f.read()
         finally:
@@ -197,12 +201,9 @@ def tryRead(fn):
 
 # In StringIO, mixing unicode and str causes problems with non-ASCII chars.
 # Avoid it by overriding the write method, to always encode unicode as UTF-8.
-class StrUTF8IO(StringStream.StringIO):
+class StrUTF8IO(StringStream):
     def write(self, s):
-        if type(s) == unicode:
-            StringStream.StringIO.write(self, s.encode('utf8'))
-        else:
-            StringStream.StringIO.write(self, s)
+        StringStream.write(self, any2b(s))
 
 def invokeMain(argv, stdin = None):
     """Invoke main.main, substituting stdin, stdout, stderr.
@@ -218,7 +219,7 @@ def invokeMain(argv, stdin = None):
     stdout_output = None
     stderr_output = None
     try:
-        sys.stdin = StringStream.StringIO(stdin)
+        sys.stdin = StringStream(stdin)
         sys.stdout = StrUTF8IO()
         sys.stderr = StrUTF8IO()
         sys.stdin.encoding = 'utf8'
@@ -314,8 +315,10 @@ class UnitTestRegression(UnitTestCase):
         stdout_output = False
         stderr_output = False
         try:
-            sys.stdout = StringStream.StringIO()
-            sys.stderr = StringStream.StringIO()
+            sys.stdout = StringStream()
+            sys.stdout.encoding = 'utf8'
+            sys.stderr = StringStream()
+            sys.stderr.encoding = 'utf8'
             errs = json.run_tests()
             stdout_output = sys.stdout.getvalue()
             stderr_output = sys.stderr.getvalue()
@@ -439,7 +442,8 @@ class UnitTestCoverage(UnitTestCase):
         self.assertEqual(repr(lslfuncs.q2f(lslcommon.Quaternion((1,0,0,0)))),
                          'Quaternion((1.0, 0.0, 0.0, 0.0))')
         # Key repr coverage
-        self.assertEqual(repr(lslcommon.Key(u'')), "Key(u'')")
+        self.assertEqual(repr(lslcommon.Key(u'')), "Key(u'')"
+            if str != unicode else "Key('')")
 
         # string + key coverage
         self.assertEqual(lslfuncs.add(u'a', lslcommon.Key(u'b')), u'ab')
@@ -684,8 +688,8 @@ def generateScriptTests():
             def makeTestFunction(fbase, suite):
                 def TestFunction(self):
                     stdin = tryRead(fbase + '.lsl') or ''
-                    expected_stdout = tryRead(fbase + '.out') or ''
-                    expected_stderr = tryRead(fbase + '.err') or ''
+                    expected_stdout = tryRead(fbase + '.out') or b''
+                    expected_stderr = tryRead(fbase + '.err') or b''
                     runargs = (parseArgs(tryRead(fbase + '.run'))
                                or (['main.py', '-y', '-'] if suite != 'Expr'
                                    else ['main.py',
@@ -694,18 +698,18 @@ def generateScriptTests():
                                                ',addstrings,expr',
                                          '-y',
                                          '-']))
-                    sys.stderr.write("\nRunning test %s: " % fbase)
+                    werr(u"\nRunning test %s: " % any2u(fbase))
                     actual_stdout, actual_stderr = invokeMain(runargs, stdin)
-                    actual_stdout = (actual_stdout.replace('\r','\r\n')
-                                     .replace('\r\n\n','\n')
-                                     .replace('\r\n','\n'))
+                    actual_stdout = (actual_stdout.replace(b'\r',b'\r\n')
+                                     .replace(b'\r\n\n',b'\n')
+                                     .replace(b'\r\n',b'\n'))
 
-                    actual_stderr = (actual_stderr.replace('\r','\r\n')
-                                     .replace('\r\n\n','\n')
-                                     .replace('\r\n','\n'))
+                    actual_stderr = (actual_stderr.replace(b'\r',b'\r\n')
+                                     .replace(b'\r\n\n',b'\n')
+                                     .replace(b'\r\n',b'\n'))
 
                     try:
-                        if expected_stderr.startswith('REGEX\n'):
+                        if expected_stderr.startswith(b'REGEX\n'):
                             self.assertIsNotNone(
                                 re.search(expected_stderr[6:],
                                           actual_stderr.decode('utf8')
@@ -714,66 +718,67 @@ def generateScriptTests():
                         else:
                             self.assertTrue(expected_stderr == actual_stderr)
                     except AssertionError:
-                        sys.stderr.write('Failed'
-                                         '\n************ expected stderr:\n')
-                        sys.stderr.write(expected_stderr)
-                        sys.stderr.write('\n************ actual stderr:\n')
-                        sys.stderr.write(actual_stderr)
+                        werr(u'Failed'
+                             u'\n************ expected stderr:\n')
+                        werr(expected_stderr)
+                        werr(u'\n************ actual stderr:\n')
+                        werr(actual_stderr)
                         if difflib and expected_stderr and actual_stderr:
-                            sys.stderr.write('\n************ diff:\n'
-                                 + '\n'.join(difflib.unified_diff(
-                                    expected_stderr.split('\n'),
-                                    actual_stderr.split('\n'),
+                            sys.stderr.write(u'\n************ diff:\n'
+                                 + u'\n'.join(difflib.unified_diff(
+                                    b2u(expected_stderr).split(u'\n'),
+                                    b2u(actual_stderr).split(u'\n'),
                                     'expected', 'actual', lineterm=''
                             )))
-                        sys.stderr.write('\n************ ')
+                        werr(u'\n************ ')
                         raise
                     try:
-                        if expected_stdout.startswith('REGEX\n'):
+                        if expected_stdout.startswith(b'REGEX\n'):
                             self.assertIsNotNone(re.search(expected_stdout[6:],
                                                            actual_stdout))
                         else:
                             self.assertTrue(expected_stdout == actual_stdout)
                     except AssertionError:
-                        sys.stderr.write('Failed'
-                                         '\n************ expected stdout:\n')
-                        sys.stderr.write(expected_stdout)
-                        sys.stderr.write('\n************ actual stdout:\n')
-                        sys.stderr.write(actual_stdout)
+                        werr(u'Failed'
+                             u'\n************ expected stdout:\n')
+                        werr(expected_stdout)
+                        werr(u'\n************ actual stdout:\n')
+                        werr(actual_stdout)
                         if difflib and expected_stdout and actual_stdout:
-                            sys.stderr.write('\n************ diff:\n'
-                                 + '\n'.join(difflib.unified_diff(
-                                    expected_stdout.split('\n'),
-                                    actual_stdout.split('\n'),
+                            werr(u'\n************ diff:\n'
+                                 + u'\n'.join(difflib.unified_diff(
+                                    b2u(expected_stdout).split('\n'),
+                                    b2u(actual_stdout).split('\n'),
                                     'expected', 'actual', lineterm=''
                             )))
-                        sys.stderr.write('\n************ ')
+                        sys.stderr.write(u'\n************ ')
                         raise
                 return TestFunction
             TestFunction = makeTestFunction(fbase, testsuite)
             # __doc__ is used by Eric
-            line = ''
+            line = b''
             try:
-                f = open(fbase + '.lsl')
+                f = open(fbase + '.lsl', 'rb')
                 try:
                     line = f.readline()
-                    if line.endswith('\r\n'):
+                    if line.endswith(b'\r\n'):
                         line = line[:-2]
-                    elif line[-1:] in ('\r', '\n'):
+                    elif line[-1:] in (b'\r', b'\n'):
                         line = line[:-1]
                 finally:
                     f.close()
             except IOError as e:
                 if e.errno != 2:
                     raise
-            TestFunction.__doc__ = line[3:] if line.startswith('// ') else None
+            TestFunction.__doc__ = (b2u(line[3:]) if line.startswith(b'// ')
+                                    else None)
 
             TestFunction.__name__ = ('test_' + testsuite + '__'
                 + os.path.basename(fbase).replace('-','_'))
             fail = tryRead(fbase + '.fail')
             if fail is not None:
                 if fail:
-                    TestFunction.__doc__ = fail
+                    TestFunction.__doc__ = b2u(fail)
                 TestFunction = unittest.expectedFailure(TestFunction)
             else:
                 skip = tryRead(fbase + '.skp')
@@ -786,3 +791,4 @@ def generateScriptTests():
 generateScriptTests()
 if __name__ == '__main__':
     unittest.main(argv = sys.argv)
+#UnitTestRegression().test_Regression__multiline_string()
diff --git a/strutil.py b/strutil.py
new file mode 100644
index 0000000..aae6159
--- /dev/null
+++ b/strutil.py
@@ -0,0 +1,85 @@
+#    (C) Copyright 2015-2019 Sei Lisa. All rights reserved.
+#
+#    This file is part of LSL PyOptimizer.
+#
+#    LSL PyOptimizer is free software: you can redistribute it and/or
+#    modify it under the terms of the GNU General Public License as
+#    published by the Free Software Foundation, either version 3 of the
+#    License, or (at your option) any later version.
+#
+#    LSL PyOptimizer is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
+
+# String <-> Bytes conversion and output utilities
+
+import sys
+if sys.hexversion >= 0x3000000:
+    unicode = str
+    unichr = chr
+    def str2u(s, enc=None):
+        """Convert a native Python3 str to Unicode. This is a NOP."""
+        return s
+
+    def str2b(s, enc=None):
+        """Convert a native Python3 str to bytes, with the given encoding."""
+        return s.encode(enc if type(enc) == str
+                        else getattr(enc, 'encoding', 'utf8'),
+                        'backslashreplace')
+
+    def u2str(s, enc=None):
+        """Convert a Unicode string to native Python 3 str. This is a NOP."""
+        return s
+
+    def b2str(s, enc=None):
+        """Convert a Bytes string to native Python 3 str."""
+        return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
+                        'backslashreplace')
+
+else:
+    def str2u(s, enc=None):
+        """Convert a native Python2 str to Unicode."""
+        return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
+                        'backslashreplace')
+
+    def str2b(s, enc=None):
+        """Convert a native Python2 str to bytes. This is a NOP."""
+        return s
+
+    def u2str(s, enc=None):
+        """Convert a Unicode string to native Python 2 str."""
+        return s.encode(enc if type(enc) == str
+                        else getattr(enc, 'encoding', 'utf8'),
+                        'backslashreplace')
+
+    def b2str(s, enc=None):
+        """Convert a Bytes string to native Python 2 str. This is a NOP."""
+        return s
+
+def b2u(s, enc=None):
+    """Bytes to Unicode"""
+    return str2u(b2str(s, enc), enc)
+
+def u2b(s, enc=None):
+    """Unicode to Bytes"""
+    return u2str(str2b(s, enc), enc)
+
+def any2b(s, enc=None):
+    """Bytes or Unicode to Bytes"""
+    return s if type(s) == bytes else u2b(s, enc)
+
+def any2u(s, enc=None):
+    """Bytes or Unicode to Unicode"""
+    return s if type(s) == unicode else b2u(s, enc)
+
+def werr(s):
+    """Write any string to stderr"""
+    sys.stderr.write(any2u(s, sys.stderr))
+
+def wout(s):
+    """Write any string to stdout"""
+    sys.stdout.write(any2u(s, sys.stdout))
diff --git a/unit_tests/json.py b/unit_tests/json.py
index 30d4012..37864ce 100644
--- a/unit_tests/json.py
+++ b/unit_tests/json.py
@@ -1,4 +1,5 @@
 import sys
+from strutil import *
 from lslopt.lslfuncs import *
 
 tests = 0
@@ -7,7 +8,6 @@ errors = 0
 # Begin JSON tests from http://wiki.secondlife.com/wiki/Json_usage_in_LSL/TestScript
 def verify(msg, result, expected):
     global tests
-    werr = sys.stderr.write
     tests += 1
     if expected != result:
         global errors
@@ -356,7 +356,6 @@ def test_jira_fixes():
     maint3081();
 
 def run_tests():
-    werr = sys.stderr.write
     # JSON tests from the wiki
     test_types();
     test_get_value();