Revamp encoding handling in lslloadlib

There were multiple encoding issues, especially on Windows. Address them by loading the file in binary and converting to Unicode for internal use. This means that the function/event tables are now Unicode.

Originally reported by @Tonaie (see #20).
This commit is contained in:
Sei Lisa 2022-12-08 13:48:11 +01:00
parent 0b7d04e5ff
commit cc55c4c23e

View file

@ -40,7 +40,7 @@ def LoadLibrary(builtins = None, fndata = None):
# Library read code # Library read code
parse_lin_re = re.compile( parse_lin_re = re.compile(str2u(
r'^\s*([a-z]+)\s+' r'^\s*([a-z]+)\s+'
r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*(' r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
r'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*' r'[a-z]+\s+[a-zA-Z_][a-zA-Z0-9_]*'
@ -50,14 +50,15 @@ def LoadLibrary(builtins = None, fndata = None):
r'^\s*const\s+([a-z]+)' r'^\s*const\s+([a-z]+)'
r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$' r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
r'|' r'|'
r'^\s*(?:#.*|//.*)?$') r'^\s*(?:#.*|//.*)?$'))
parse_arg_re = re.compile(r'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$') parse_arg_re = re.compile(str2u(
parse_fp_re = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*' r'^\s*([a-z]+)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*$'))
r'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$') parse_fp_re = re.compile(str2u(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*'
parse_int_re = re.compile(r'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$') r'((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$'))
parse_str_re = re.compile(u'^"((?:[^"\\\\]|\\\\.)*)"$') parse_int_re = re.compile(str2u(r'^\s*(-?0x[0-9A-Fa-f]+|-?[0-9]+)\s*$'))
parse_str_re = re.compile(str2u(r'^"((?:[^"\\]|\\.)*)"$'))
f = open(builtins, 'r') f = open(builtins, 'rb')
try: try:
linenum = 0 linenum = 0
try: try:
@ -69,9 +70,10 @@ def LoadLibrary(builtins = None, fndata = None):
linenum += 1 linenum += 1
line = f.readline() line = f.readline()
if not line: break if not line: break
if line[-1] == '\n': line = line[:-1] if line.endswith(b'\r\n'): line = line[:-2]
if line.endswith(b'\n'): line = line[:-1]
try: try:
uline = str2u(line, 'utf8') line = b2u(line, 'utf8')
except UnicodeDecodeError: except UnicodeDecodeError:
warning(u"Bad Unicode in %s line %d" % (ubuiltins, linenum)) warning(u"Bad Unicode in %s line %d" % (ubuiltins, linenum))
continue continue
@ -98,10 +100,8 @@ def LoadLibrary(builtins = None, fndata = None):
for arg in arglist: for arg in arglist:
argtyp = parse_arg_re.search(arg).group(1) argtyp = parse_arg_re.search(arg).group(1)
if argtyp not in types: if argtyp not in types:
uargtyp = str2u(argtyp, 'utf8')
warning(u"Invalid type in %s, line %d: %s" warning(u"Invalid type in %s, line %d: %s"
% (ubuiltins, linenum, uargtyp)) % (ubuiltins, linenum, argtyp))
del uargtyp
bad = True bad = True
break break
args.append(argtyp) args.append(argtyp)
@ -110,22 +110,18 @@ def LoadLibrary(builtins = None, fndata = None):
name = match.group(2) name = match.group(2)
if typ == 'event': if typ == 'event':
if name in events: if name in events:
uname = str2u(name, 'utf8')
warning(u"Event at line %d was already defined in %s," warning(u"Event at line %d was already defined in %s,"
u" overwriting: %s" u" overwriting: %s"
% (linenum, ubuiltins, uname)) % (linenum, ubuiltins, name))
del uname
events[name] = {'pt':tuple(args), 'NeedsData':True} events[name] = {'pt':tuple(args), 'NeedsData':True}
else: else:
# Library functions go to the functions table. If # Library functions go to the functions table. If
# they are implemented in lslfuncs.*, they get a # they are implemented in lslfuncs.*, they get a
# reference to the implementation; otherwise None. # reference to the implementation; otherwise None.
if name in functions: if name in functions:
uname = str2u(name, 'utf8')
warning(u"Function at line %d was already defined" warning(u"Function at line %d was already defined"
u" in %s, overwriting: %s" u" in %s, overwriting: %s"
% (linenum, ubuiltins, uname)) % (linenum, ubuiltins, name))
del uname
fn = getattr(lslfuncs, name, None) fn = getattr(lslfuncs, name, None)
functions[name] = {'Kind':'f', 'Type':typ, 'uns':True, functions[name] = {'Kind':'f', 'Type':typ, 'uns':True,
'ParamTypes':args, 'NeedsData':True} 'ParamTypes':args, 'NeedsData':True}
@ -135,16 +131,12 @@ def LoadLibrary(builtins = None, fndata = None):
# constant # constant
name = match.group(5) name = match.group(5)
if name in constants: if name in constants:
uname = str2u(name, 'utf8')
warning(u"Global at line %d was already defined in %s," warning(u"Global at line %d was already defined in %s,"
u" overwriting: %s" % (linenum, ubuiltins, uname)) u" overwriting: %s" % (linenum, ubuiltins, name))
del uname
typ = match.group(4) typ = match.group(4)
if typ not in types: if typ not in types:
utyp = str2u(typ, 'utf8')
warning(u"Invalid type in %s, line %d: %s" warning(u"Invalid type in %s, line %d: %s"
% (ubuiltins, linenum, utyp)) % (ubuiltins, linenum, typ))
del utyp
continue continue
if typ == 'quaternion': if typ == 'quaternion':
typ = 'rotation' typ = 'rotation'
@ -154,7 +146,6 @@ def LoadLibrary(builtins = None, fndata = None):
elif typ == 'float': elif typ == 'float':
value = lslfuncs.F32(float(value)) value = lslfuncs.F32(float(value))
elif typ == 'string': elif typ == 'string':
value = str2u(value, 'utf8')
if parse_str_re.search(value): if parse_str_re.search(value):
esc = False esc = False
tmp = value[1:-1] tmp = value[1:-1]
@ -175,11 +166,11 @@ def LoadLibrary(builtins = None, fndata = None):
# value = Key(value) # value = Key(value)
else: else:
warning(u"Invalid string in %s line %d: %s" warning(u"Invalid string in %s line %d: %s"
% (ubuiltins, linenum, uline)) % (ubuiltins, linenum, line))
value = None value = None
elif typ == 'key': elif typ == 'key':
warning(u"Key constants not supported in %s, line %d: %s" warning(u"Key constants not supported in %s, line %d: %s"
% (ubuiltins, linenum, uline)) % (ubuiltins, linenum, line))
value = None value = None
elif typ in ('vector', 'rotation'): elif typ in ('vector', 'rotation'):
try: try:
@ -210,16 +201,16 @@ def LoadLibrary(builtins = None, fndata = None):
value = Quaternion(value) value = Quaternion(value)
except ValueError: except ValueError:
warning(u"Invalid vector/rotation syntax in %s" warning(u"Invalid vector/rotation syntax in %s"
u" line %d: %s" % (ubuiltins, linenum, uline)) u" line %d: %s" % (ubuiltins, linenum, line))
else: else:
assert typ == 'list' assert typ == 'list'
if value[0:1] != '[' or value[-1:] != ']': if value[0:1] != '[' or value[-1:] != ']':
warning(u"Invalid list value in %s, line %d: %s" warning(u"Invalid list value in %s, line %d: %s"
% (ubuiltins, linenum, uline)) % (ubuiltins, linenum, line))
elif value[1:-1].strip() != '': elif value[1:-1].strip() != '':
warning(u"Non-empty list constants not supported" warning(u"Non-empty list constants not supported"
u" in %s, line %d: %s" u" in %s, line %d: %s"
% (ubuiltins, linenum, uline)) % (ubuiltins, linenum, line))
value = None value = None
else: else:
value = [] value = []
@ -231,19 +222,19 @@ def LoadLibrary(builtins = None, fndata = None):
# Load the function data table as well. # Load the function data table as well.
parse_flag_re = re.compile(r'^\s*-\s+(?:(?:(sef)|return\s+' parse_flag_re = re.compile(str2u(r'^\s*-\s+(?:(?:(sef)|return\s+'
r'("(?:\\.|[^"])*"|<[^>]+>|[-+0-9x.e]+' # strings, vectors, numbers r'("(?:\\.|[^"])*"|<[^>]+>|[-+0-9x.e]+' # strings, vectors, numbers
r'|\[(?:[^]"]|"(?:\\.|[^"])*")*\]))' # lists r'|\[(?:[^]"]|"(?:\\.|[^"])*")*\]))' # lists
r'(?:\s+if\s+(.*\S))?' r'(?:\s+if\s+(.*\S))?'
r'|(unstable|stop|strlen|detect|touch|grab)' r'|(unstable|stop|strlen|detect|touch|grab)'
r'|(min|max|delay)\s+([-0-9.]+)' r'|(min|max|delay)\s+([-0-9.]+)'
r'|listto\s+(integer|float|string|key|vector|rotation|list)' r'|listto\s+(integer|float|string|key|vector|rotation|list)'
r')\s*$', re.I) r')\s*$'), re.I)
# TODO: "quaternion" doesn't compare equal to "rotation" even if they are # TODO: "quaternion" doesn't compare equal to "rotation" even if they are
# equivalent. Canonicalize it before comparison, to avoid false # equivalent. Canonicalize it before comparison, to avoid false
# reports of mismatches. # reports of mismatches.
f = open(fndata, 'r') f = open(fndata, 'rb')
try: try:
linenum = 0 linenum = 0
curr_fn = None curr_fn = None
@ -258,9 +249,10 @@ def LoadLibrary(builtins = None, fndata = None):
linenum += 1 linenum += 1
line = f.readline() line = f.readline()
if not line: break if not line: break
if line[-1] == '\n': line = line[:-1] if line.endswith(b'\r\n'): line = line[:-2]
if line.endswith(b'\n'): line = line[:-1]
try: try:
uline = str2u(line, 'utf8') line = b2u(line, 'utf8')
except UnicodeDecodeError: except UnicodeDecodeError:
warning(u"Bad Unicode in %s line %d" % (ufndata, linenum)) warning(u"Bad Unicode in %s line %d" % (ufndata, linenum))
continue continue
@ -270,23 +262,22 @@ def LoadLibrary(builtins = None, fndata = None):
continue continue
rettype = match_fn.group(1) if match_fn else None rettype = match_fn.group(1) if match_fn else None
if match_fn and (rettype in ('void', 'event') or rettype in types): if match_fn and (rettype in (u'void', u'event') or rettype in types):
skipping = True # until proven otherwise skipping = True # until proven otherwise
name = match_fn.group(2) name = match_fn.group(2)
uname = str2u(name, 'utf8') if (rettype == u'event' and name not in events
if (rettype == 'event' and name not in events or rettype != u'event' and name not in functions
or rettype != 'event' and name not in functions
): ):
warning(u"%s %s is not in builtins, in %s line %d," warning(u"%s %s is not in builtins, in %s line %d,"
u" skipping." u" skipping."
% (u"Function" if rettype != 'event' else u"Event", % (u"Function" if rettype != 'event' else u"Event",
uname, ufndata, linenum)) name, ufndata, linenum))
continue continue
rettype = rettype if rettype != 'void' else None rettype = rettype if rettype != 'void' else None
if 'event' != rettype != functions[name]['Type']: if 'event' != rettype != functions[name]['Type']:
warning(u"Function %s returns invalid type, in %s line %d," warning(u"Function %s returns invalid type, in %s line %d,"
u" skipping." u" skipping."
% (uname, ufndata, linenum)) % (name, ufndata, linenum))
continue continue
argnames = [] argnames = []
arglist = match_fn.group(3) arglist = match_fn.group(3)
@ -294,13 +285,13 @@ def LoadLibrary(builtins = None, fndata = None):
if rettype != 'event' if rettype != 'event'
else events[name]['pt']) else events[name]['pt'])
if arglist: if arglist:
arglist = arglist.split(',') arglist = arglist.split(u',')
if len(current_args) != len(arglist): if len(current_args) != len(arglist):
warning(u"Parameter list mismatch in %s line %d," warning(u"Parameter list mismatch in %s line %d,"
u" %s %s. Skipping." u" %s %s. Skipping."
% (ufndata, linenum, % (ufndata, linenum,
u"function" if rettype != 'event' u"function" if rettype != u'event'
else u"event", uname)) else u"event", name))
continue continue
bad = False # used to 'continue' at this loop level bad = False # used to 'continue' at this loop level
@ -312,8 +303,8 @@ def LoadLibrary(builtins = None, fndata = None):
warning(u"Parameter list mismatch in %s line %d," warning(u"Parameter list mismatch in %s line %d,"
u" %s %s. Skipping." u" %s %s. Skipping."
% (ufndata, linenum, % (ufndata, linenum,
u"function" if rettype != 'event' u"function" if rettype != u'event'
else u"event", uname)) else u"event", name))
bad = True bad = True
break break
argnames.append(argname) argnames.append(argname)
@ -326,7 +317,7 @@ def LoadLibrary(builtins = None, fndata = None):
else events[name]): else events[name]):
warning(u"Duplicate %s %s in %s line %d. Skipping." warning(u"Duplicate %s %s in %s line %d. Skipping."
% (u"function" if rettype != 'event' else u"event", % (u"function" if rettype != 'event' else u"event",
uname, ufndata, linenum)) name, ufndata, linenum))
continue continue
# passed all tests # passed all tests
@ -344,11 +335,10 @@ def LoadLibrary(builtins = None, fndata = None):
if curr_fn is None and not skipping: if curr_fn is None and not skipping:
warning(u"Flags present before any function or event" warning(u"Flags present before any function or event"
u" in %s line %d: %s" u" in %s line %d: %s"
% (ufndata, linenum, uline)) % (ufndata, linenum, line))
skipping = True skipping = True
continue continue
if not skipping: if not skipping:
ucurr_fn = str2u(curr_fn, 'utf8')
if match_flag.group(1): if match_flag.group(1):
# SEF # SEF
# We don't handle conditions yet. Take the # We don't handle conditions yet. Take the
@ -358,7 +348,7 @@ def LoadLibrary(builtins = None, fndata = None):
warning(u"Events do not support conditions" warning(u"Events do not support conditions"
u" in SEF flags, in line %d, event %s." u" in SEF flags, in line %d, event %s."
u" Omitting: %s" u" Omitting: %s"
% (linenum, ucurr_fn, uline)) % (linenum, curr_fn, line))
continue continue
elif curr_ty == 'event': elif curr_ty == 'event':
events[curr_fn]['SEF'] = True events[curr_fn]['SEF'] = True
@ -375,7 +365,7 @@ def LoadLibrary(builtins = None, fndata = None):
warning(u"Events only support a few flags" warning(u"Events only support a few flags"
u", in line %d, event %s." u", in line %d, event %s."
u" Omitting: %s" u" Omitting: %s"
% (linenum, ucurr_fn, uline)) % (linenum, curr_fn, line))
continue continue
elif match_flag.group(2): elif match_flag.group(2):
pass # return not handled yet pass # return not handled yet
@ -405,14 +395,14 @@ def LoadLibrary(builtins = None, fndata = None):
else: else:
warning(u"Type mismatch or value error in %s" warning(u"Type mismatch or value error in %s"
u" line %d: %s" u" line %d: %s"
% (ufndata, linenum, uline)) % (ufndata, linenum, line))
continue continue
else: # delay else: # delay
value = parse_fp_re.search(match_flag.group(6)) value = parse_fp_re.search(match_flag.group(6))
if not value: if not value:
warning(u"Invalid delay value in %s" warning(u"Invalid delay value in %s"
u" line %d: %s" u" line %d: %s"
% (ufndata, linenum, uline)) % (ufndata, linenum, line))
continue continue
value = float(value.group(1)) # no need to F32 value = float(value.group(1)) # no need to F32
@ -420,7 +410,7 @@ def LoadLibrary(builtins = None, fndata = None):
warning(u"Side-effect-free function" warning(u"Side-effect-free function"
u" %s contradicts delay, in %s" u" %s contradicts delay, in %s"
u" line %d" u" line %d"
% (ucurr_fn, ufndata, linenum)) % (curr_fn, ufndata, linenum))
continue continue
functions[curr_fn]['delay'] = value functions[curr_fn]['delay'] = value
@ -431,7 +421,7 @@ def LoadLibrary(builtins = None, fndata = None):
pass pass
else: else:
warning(u"Syntax error in %s line %d, skipping: %s" warning(u"Syntax error in %s line %d, skipping: %s"
% (ufndata, linenum, uline)) % (ufndata, linenum, line))
continue continue
finally: finally:
@ -439,27 +429,25 @@ def LoadLibrary(builtins = None, fndata = None):
# Post-checks # Post-checks
for i in functions: for i in functions:
ui = str2u(i, 'utf8')
if 'NeedsData' in functions[i]: if 'NeedsData' in functions[i]:
del functions[i]['NeedsData'] del functions[i]['NeedsData']
warning(u"Library data, file %s: Function %s has no data." warning(u"Library data, file %s: Function %s has no data."
% (ufndata, ui)) % (ufndata, i))
if 'min' in functions[i] and 'max' in functions[i]: if 'min' in functions[i] and 'max' in functions[i]:
if functions[i]['min'] > functions[i]['max']: if functions[i]['min'] > functions[i]['max']:
warning(u"Library data: Function %s has min > max:" warning(u"Library data: Function %s has min > max:"
u" min=%s max=%s, removing both." u" min=%s max=%s, removing both."
% (ui, str2u(repr(functions[i]['min']), 'utf8'), % (i, str2u(repr(functions[i]['min']), 'utf8'),
repr(functions[i]['max']))) repr(functions[i]['max'])))
del functions[i]['min'], functions[i]['max'] del functions[i]['min'], functions[i]['max']
if 'SEF' in functions[i] and 'delay' in functions[i]: if 'SEF' in functions[i] and 'delay' in functions[i]:
warning(u"Library data: Side-effect-free function %s contradicts" warning(u"Library data: Side-effect-free function %s contradicts"
u" delay. Removing SEF." % ui) u" delay. Removing SEF." % i)
del functions[i]['SEF'] del functions[i]['SEF']
for i in events: for i in events:
ui = str2u(i, 'utf8')
if 'NeedsData' in events[i]: if 'NeedsData' in events[i]:
del events[i]['NeedsData'] del events[i]['NeedsData']
warning(u"Library data, file %s: Event %s has no data." warning(u"Library data, file %s: Event %s has no data."
% (ufndata, ui)) % (ufndata, i))
return events, constants, functions return events, constants, functions