# (C) Copyright 2015-2021 Sei Lisa. All rights reserved. # # This file is part of LSL PyOptimizer. # # LSL PyOptimizer is free software: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # LSL PyOptimizer is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LSL PyOptimizer. If not, see . # JSON functions import re import math from lslopt.lslcommon import * from lslopt.lslbasefuncs import llStringTrim, fs, fl, InternalTypecast # INCOMPATIBILITY NOTE: The JSON functions in SL have very weird behaviour # in corner cases. Despite our best efforts, that behaviour is not replicated # here, as doing so proved to be too difficult to investigate and implement. # The functions in here behave somewhat more sanely in these corner cases than # in SL, and may therefore fail to reproduce the same results as SL does. # If you wish to maintain compatibility, you can disable the JSON functions # by commenting out the 'from lsljson import *' line in lslfuncs.py. JSON_INVALID = u'\uFDD0' JSON_OBJECT = u'\uFDD1' JSON_ARRAY = u'\uFDD2' JSON_NUMBER = u'\uFDD3' JSON_STRING = u'\uFDD4' JSON_NULL = u'\uFDD5' JSON_TRUE = u'\uFDD6' JSON_FALSE = u'\uFDD7' JSON_DELETE = u'\uFDD8' JSON_APPEND = -1 jsonesc_re = re.compile(u'[\x08\x09\x0A\x0C\x0D"/\\\\]') jsonesc_dict = {u'\x08':u'\\b', u'\x09':u'\\t', u'\x0A':u'\\n', u'\x0C':u'\\f', u'\x0D':u'\\r', u'"':u'\\"', u'/':u'\\/', u'\\':u'\\\\'} jsonunesc_dict = {u'b':u'\x08', u't':u'\x09', u'n':u'\x0A', u'f':u'\x0C', u'r':u'\x0D'} # LSL JSON numbers differ from standard JSON numbers in many respects: # Numbers starting with 0 are allowed, e.g. 01.3e4, 00042 # .5 is allowed. # 1e+0 is NOT allowed (the + after the e, to be precise). BUG-6466. # . is allowed, as is -.e-0 etc. # 1E is allowed. # E.2 is allowed. # E is allowed. # 1E-1.2 is allowed. # In general, the rule seems to be: at most one 'E' (optionally followed by a # '-') and one '.', with optional digits interspersed and an optional initial # minus sign. # # Our RE below checks for the two possible orders of '.' and 'E'. One branch # must have a mandatory 'E'; in the other everything is optional but it must # have at least 1 character (done by the lookahead assertion). # # The capturing groups serve to check whether the first variant was taken, and # whether there is something after the digits in the second variant. If both # are empty, then the match is just one or more digits preceded by an optional # minus sign (i.e. an integer). That's used by llJson2List to return integer # elements when appropriate. # Real JSON number parser: #jsonnum_re = re.compile(str2u( # r'-?(?:[1-9][0-9]*|0)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?' # )) # BUG-6466 active: jsonnumbug_re = re.compile(str2u(r''' -?(?: [0-9]*([Ee])-?[0-9]*\.?[0-9]* |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*) ) '''), re.X) # BUG-6466 fixed: # The new RE is just a modified version of the crap, allowing + exponents and # disallowing zeros, sometimes even when legal (e.g. 0e0) #jsonnum_re = re.compile(str2u(r''' # -?(?: # (?=[1-9]|\.(?:[^e]|$) # |0(?:[^0-9e]|$))[0-9]*([Ee])[+-]?[0-9]*\.?[0-9]* # |(?=[1-9]|\.(?:[^e]|$) # |0(?:[^0-9e]|$))[0-9]*(\.?[0-9]*(?:[Ee][+-]?)?[0-9]*) # ) # '''), re.X) # They've fixed BUG-6657 by bringing BUG-6466 back to life. jsonnum_re = re.compile(str2u(r''' -?(?: [0-9]*([Ee])-?[0-9]*\.?[0-9]* |(?=[0-9Ee.])[0-9]*(\.?[0-9]*(?:[Ee]-?)?[0-9]*) ) '''), re.X) jsonstring_re = re.compile(str2u(r'"(?:[^"\\]|\\.)*"')) # This might need some explanation. The ] and - are included in the first # set, the ] in the first after the ^ and the - in the last positions of # the set as required by RE syntax. The [ is part of it and isn't special, # though it confuses things. The set comprises any character not in # -{}[],:"0123456789 # The second set comprises zero or more characters not in ,:]} #word_re = re.compile(str2u(r'[^][{}0-9",:-][^]},:]*')) # Screw that, we're using just a fallback. jsoncatchall_re = re.compile(u'(.*?)[\x09\x0A\x0B\x0C\x0D ]*(?:[]},]|$)') digits_re = re.compile(u'[0-9]{1,9}') class EInternalJsonInvalid(Exception): """Used to force return of JSON_INVALID from child functions""" pass def InternalJsonQuote(s): return u'"' + jsonesc_re.sub(lambda x: jsonesc_dict[x.group()], s) + u'"' def InternalJsonUnquote(s): """Relaxed unquote with LSL rules. Assumes string starts and ends in ", may contain " and may end in \" too (i.e. malformed). E.g. "a"b\" is a valid string for this function and the result is a"b\ """ assert s != u'' assert s[0] == s[-1] == u'"' and s[1:2] ret = u'' esc = False for c in s[1:-1]: if esc: try: ret += jsonunesc_dict[c] except KeyError: ret += c esc = False else: if c == u'\\': esc = True else: ret += c if esc: return ret + u'\\' return ret def InternalJsonUnquoteX(s): """Rigorous unquote; checks for quotes at the beginning and end only.""" esc = last = False first = True ret = u'' for c in s: if last: break if esc: try: ret += jsonunesc_dict[c] except: ret += c esc = False first = False elif first: if c != u'"': break first = False elif c == u'"': last = True first = False elif c == u'\\': esc = True else: ret += c else: if not first and last: return ret return s # malformed string, return the original def InternalJsonF2S(f): if math.isnan(f): return u'nan' if math.isinf(f): return u'inf' if f > 0 else u'-inf' return u'%.6f' % f def InternalJsonScanMatching(json, idx): """Shortcut: scan for a matching pair of {} or [] with proper nesting and string handling, with no validity check other than well-formedness, meaning all {} or [] must match. """ matching = json[idx] matching += '}' if json[idx] == '{' else ']' level = 1 str = False esc = False for i in xrange(idx+1, len(json)): c = json[i] if str: if esc: esc = False elif c == u'\\': esc = True elif c == u'"': str = False elif c == u'"': str = True elif c in matching: if c == matching[0]: level += 1 else: level -= 1 if not level: return i+1 return None def InternalElement2Json(elem, ParseNumbers = True): telem = type(elem) if telem == unicode: elem = llStringTrim(elem, 3) # STRING_TRIM if elem == u'': return u'""' # Yes, these are checked after trimming. Don't facepalm too hard. if elem == JSON_NULL: return u'null' if elem == JSON_TRUE: return u'true' if elem == JSON_FALSE: return u'false' if elem[0] == elem[-1] == u'"' and elem[1:2] or elem in ('null','false','true') \ or elem[0] == u'[' and elem[-1] == u']' \ or elem[0] == u'{' and elem[-1] == u'}': return elem if ParseNumbers: match = (jsonnumbug_re if 6466 in Bugs else jsonnum_re).match(elem) if match and match.end() == len(elem): return elem if elem == JSON_INVALID: return u'' return InternalJsonQuote(elem) if telem == Key: return u'"' + unicode(elem) + u'"' if telem in (Vector, Quaternion): return u'"<' + u', '.join([InternalJsonF2S(x) for x in elem]) + u'>"' if telem == float: return InternalJsonF2S(elem) # Integer return unicode(elem) def InternalJsonGetToken(json, idx): #start = idx num_re = jsonnumbug_re if 6466 in Bugs else jsonnum_re L = len(json) while idx < L: c = json[idx] if c not in u'\x09\x0A\x0B\x0C\x0D ': break idx += 1 if idx >= L: return (idx, idx, None) c = json[idx] if c in u',:{}[]': return (idx, idx+1, c) match = jsonstring_re.match(json, idx) if match: return (idx, match.end(), JSON_STRING) match = num_re.match(json, idx) if match: return (idx, match.end(), JSON_NUMBER) match = jsoncatchall_re.match(json, idx) # matches always, even if empty string s = match.group(1) if s in (u'null', u'true', u'false'): return (idx, match.end(1), JSON_NULL if s == u'null' else JSON_TRUE if s == u'true' else JSON_FALSE) return (idx, match.end(1), JSON_INVALID) def InternalJsonGetTokenFull(json, idx): ret = InternalJsonGetToken(json, idx) if ret[2] in (u'{', u'['): match = InternalJsonScanMatching(json, ret[0]) if match is not None: return (ret[0], match, JSON_OBJECT if ret[2] == u'{' else JSON_ARRAY) return ret def InternalJsonPathMatches(key, pathelem): if type(key) == type(pathelem) == int or type(key) == unicode and isinstance(pathelem, unicode): return key == pathelem if type(key) == unicode and type(pathelem) == int: raise EInternalJsonInvalid # one combo remains - key is numeric and pathelem is unicode or Key match = digits_re.match(pathelem) if not match: raise EInternalJsonInvalid return key == int(match.group()) def InternalJsonFindValue(json, tgtpath, ReturnsToken, SetRules = False): # Building a function that meets the strange requisites of LL's json is not easy. # These requisites include syntax-checking of all items at the current level, # but not of items at a deeper nesting level. # Making it one-pass iterative O(len) instead of recursive O(depth*len) is even # more of a challenge, especially with these constraints. token = InternalJsonGetToken(json, 0) if tgtpath == []: # No nesting receives special treatment. if token[2] in (JSON_NUMBER, JSON_STRING, JSON_NULL, JSON_TRUE, JSON_FALSE, JSON_INVALID): if InternalJsonGetToken(json, token[1])[2] is None: if ReturnsToken: return token if token[2] == JSON_NUMBER: return json[token[0]:token[1]] if token[2] == JSON_STRING: return InternalJsonUnquote(json[token[0]:token[1]]) if token[2] == JSON_INVALID: # Accept malformed strings if they start and end in quotes s = json[token[0]:token[1]] if s[1:2] and s[0] == s[-1] == u'"': return InternalJsonUnquote(s) return token[2] return JSON_INVALID if token[2] not in (u'{', u'['): return JSON_INVALID json = llStringTrim(json, 2) # STRING_TRIM_RIGHT if json[-1] == u'}' and token[2] == u'{': if ReturnsToken: return (token[0], len(json), JSON_OBJECT) return json[token[0]:] if json[-1] == u']' and token[2] == u'[': if ReturnsToken: return (token[0], len(json), JSON_ARRAY) return json[token[0]:] return JSON_INVALID # This would be the code if there was proper scanning. #match = InternalJsonScanMatching(json, token[0]) #if match is None or InternalJsonGetToken(json, match)[2] is not None: # return JSON_INVALID #if ReturnsType: # this has been changed tho' - review if ever used # return JSON_OBJECT if token[2] == u'{' else JSON_ARRAY #return json[token[0]:match] if token[2] not in (u'{', u'['): return JSON_INVALID # Follow the path L = len(tgtpath) # For the current position, matchlvl keeps track of how many levels are # matched. When matchlvl == L, we are at the item of interest. # For example: if we're at the ! in [1.0, "y", true, [1, ![6], {"a":5}]] # and the path is [3, 2, "a"], matchlvl will be 1 (meaning the first level # of the path, i.e. position 3, is matched, but we're not in sub-position # 2 yet). matchlvl = 0 ret = None # the target token, if found, or None if not # Keeps track of what we have opened so far. stk = [token[2]] # This tracks the current key within an array or object. Here we assume # it's an array; if it's an object, the item key will replace it anyway. curkey = 0 just_open = True just_closed = False # Load next token token = InternalJsonGetToken(json, token[1]) try: while True: # Process value if it can be present kind = token[2] if not (just_closed or just_open and kind in (u'}', u']')): # Item processing. # Not entering here immediately after a } or ] (just_closed) # or after a { or [ followed by } or ] (just_open...) just_open = False if kind in u':,]}' or kind == JSON_INVALID: return JSON_INVALID if stk[-1] == u'{': # Read the current key if kind != JSON_STRING: return JSON_INVALID colon = InternalJsonGetToken(json, token[1]) if colon[2] != u':': return JSON_INVALID curkey = InternalJsonUnquote(json[token[0]:token[1]]) token = InternalJsonGetToken(json, colon[1]) kind = token[2] del colon if matchlvl < L and InternalJsonPathMatches(curkey, tgtpath[matchlvl]): # Descend to this level matchlvl += 1 ret = None # because e.g. llJsonGetValue("{\"a\":[1],\"a\":2}",["a",0])==JSON_INVALID if matchlvl == L: if kind in u'{[': match = InternalJsonScanMatching(json, token[0]) if match is None: return JSON_INVALID token = (token[0], match, JSON_OBJECT if token[2] == u'{' else JSON_ARRAY) ret = token matchlvl -= 1 elif kind in u'{[': stk.append(token[2]) curkey = 0 just_open = True token = InternalJsonGetToken(json, token[1]) continue else: # We're skipping the element if kind in u'[{': match = InternalJsonScanMatching(json, token[0]) if match is None: return JSON_INVALID token = (None, match) # HACK: shortcut to: (token[0], match, JSON_OBJECT if kind == u'{' else JSON_ARRAY) just_closed = True token = InternalJsonGetToken(json, token[1]) # prepare next token kind = token[2] just_closed = False # Process coma if it can be present if not just_open: if kind == u',': token = InternalJsonGetToken(json, token[1]) # load next token if stk[-1] == u'[': curkey += 1 continue if kind == u'}' and stk[-1] == u'{' or kind == u']' and stk[-1] == u'[': stk = stk[:-1] matchlvl -= 1 if stk == []: if InternalJsonGetToken(json, token[1])[2] is None: break # Yay! end of job! return JSON_INVALID # No yay - something at end of string just_closed = True token = InternalJsonGetToken(json, token[1]) continue return JSON_INVALID except EInternalJsonInvalid: return JSON_INVALID if ret is None: return JSON_INVALID if ReturnsToken: return ret if ret[2] == JSON_STRING: return InternalJsonUnquote(json[ret[0]:ret[1]]) if ret[2] in (JSON_NUMBER, JSON_OBJECT, JSON_ARRAY): return json[ret[0]:ret[1]] return ret[2] # JSON_TRUE, JSON_FALSE, JSON_NULL def InternalJson2Elem(json): if json == u'': # checking this now lets us check for json[0] and json[-1] later return u'' if json == u'null': return JSON_NULL if json == u'false': return JSON_FALSE if json == u'true': return JSON_TRUE match = (jsonnumbug_re if 6466 in Bugs else jsonnum_re).match(json) if match and match.end() == len(json): # HACK: Use our RE to know if the number is an integer if not match.group(1) and not match.group(2): # we have just digits with optional minus sign, i.e. an integer if len(json) > 11: # surely overflown if json[0] == u'-': return -2147483648 return 2147483647 # a bit harder to test; we could check in ASCII to avoid conversion # to long in 32 bit systems, but it's probably not worth the effort elem = int(json) if elem > 2147483647: return 2147483647 if elem < -2147483648: return -2147483648 return elem return InternalTypecast(json, float, InList=False, f32=True) # Malformed strings are valid, e.g. "a\" (final \" is converted into a \) if json[0] == json[-1] == u'"' and json[1:2]: # the latter check ensures len(json) > 1 return InternalJsonUnquote(json) return json def llJson2List(json): json = fs(json) json = llStringTrim(json, 3) # STRING_TRIM if json == u'': return [] if json[0] == u'[' and json[-1] == u']': # Array can of worms. Not all LSL quirks are implemented. ret = [] token = InternalJsonGetTokenFull(json, 1) if token[2] == u']' and token[1] == len(json): return ret if token[2] == u':': return [JSON_INVALID] if token[2] == u',': ret.append(u'') else: ret.append(InternalJson2Elem(json[token[0]:token[1]])) token = InternalJsonGetTokenFull(json, token[1]) while True: if token[2] == u']' and token[1] == len(json): break elif token[2] != u',': return [JSON_INVALID] token = InternalJsonGetTokenFull(json, token[1]) if token[2] == u',' or token[2] == u']' and token[1] == len(json): ret.append(u'') else: if token[2] == u':': return JSON_INVALID ret.append(InternalJson2Elem(json[token[0]:token[1]])) token = InternalJsonGetTokenFull(json, token[1]) return ret if json[0] == u'{' and json[-1] == u'}': # Object can of worms. Worse than array. Not all LSL quirks are implemented. # Parse this grammar: # object: '{' complete_list incomplete_element '}' $ # complete_list: | complete_list complete_element ',' # complete_element: nonempty_string ':' value # incomplete_element: | value | string ':' value # string: '"' '"' | nonempty_string # # That allows: # {"a":1,"b":2,} # incomplete_element is empty # {"a":1,"b":2} # "b" is an incomplete_element # {2} # complete_list empty # {} # both empty # etc. ret = [] token = InternalJsonGetTokenFull(json, 1) if token[2] == u'}' and token[1] == len(json): return ret if token[2] in (u':', u','): return [JSON_INVALID] while True: k = u'' if token[2] == u'}' and token[1] == len(json): ret.append(k) ret.append(k) return ret if token[2] == JSON_STRING: colon = InternalJsonGetTokenFull(json, token[1]) if colon[2] == u':': k = InternalJsonUnquote(json[token[0]:token[1]]) token = InternalJsonGetTokenFull(json, colon[1]) if token[2] in (u',', u':'): return [JSON_INVALID] ret.append(k) ret.append(InternalJson2Elem(json[token[0]:token[1]])) token = InternalJsonGetTokenFull(json, token[1]) if token[2] == u'}' and token[1] == len(json): return ret if token[2] != u',' or k == u'': return [JSON_INVALID] token = InternalJsonGetTokenFull(json, token[1]) return [InternalJson2Elem(json)] def llJsonGetValue(json, lst): json = fs(json) lst = fl(lst) return InternalJsonFindValue(json, lst, ReturnsToken=False) # llJsonSetValue was finally not implemented. This is a failed attempt # at tackling it in the way that LSL does it. '''def InternalJsonRecuriveSetValue(json, lst, val): # We give up and make it recursive if lst == []: if val == JSON_DELETE: return val return InternalElement2Json(val, ParseNumbers=True) ret = None lst0 = lst[0] tlst0 = type(lst0) if tlst0 == Key: tlst0 = unicode if val != JSON_DELETE: json = llStringTrim(json, 3) # STRING_TRIM if tlst0 == int and json[0:1] == u'[' and json[-1:] == u']': ret = [] close = u']' if tlst0 == unicode and json[0:1] == u'{' and json[-1:] == u'}': ret = {} close = u'}' if ret is not None: if close: pass def llJsonSetValue(json, lst, val): json = fs(json) lst = fl(lst) val = fs(val) if lst == []: # [] replaces the entire string no matter if it was invalid if val == JSON_DELETE: return val # this is a special case for SetValue with [] return InternalElement2Json(val, ParseNumbers=True) # Needs to cope with JSON_APPEND, JSON_DELETE, lastindex+1. # Needs to do deep assignment. # Recursive works best here return InternalJsonRecursiveSetValue(json, lst, val) return u"----unimplemented----" ''' def llJsonValueType(json, lst): json = fs(json) lst = fl(lst) ret = InternalJsonFindValue(json, lst, ReturnsToken=True) if ret == JSON_INVALID: return ret return ret[2] def llList2Json(kind, lst): kind = fs(kind) lst = fl(lst) if kind == JSON_OBJECT: ret = u'{' if len(lst) & 1: return JSON_INVALID for i in xrange(0, len(lst), 2): if ret != u'{': ret += u',' ret += InternalJsonQuote(lst[i]) + u':' + InternalElement2Json(lst[i+1], ParseNumbers=False) ret += u'}' elif kind == JSON_ARRAY: ret = u'[' if lst: ret += InternalElement2Json(lst[0], ParseNumbers=False) del lst[0] for elem in lst: ret += u',' + InternalElement2Json(elem, ParseNumbers=False) ret += u']' else: ret = JSON_INVALID return ret