Fix UTF-8 in the surrogate range passing as good.

Our UTF-8 validity checker failed to recognize that characters in the surrogate range (D800-DFFF) were invalid. Fortunately, Python 2 is happy about that, therefore it doesn't crash (Python 3 fixed that range too). Unfortunately, SL isn't, therefore we fix it. Added corresponding unit tests.
2025-07-01 23:58:20 +00:00 · 2016-12-13 03:10:01 +01:00 · 2016-12-13 03:10:01 +01:00 · b593141f9f
commit b593141f9f
parent ae984169ad
2 changed files with 12 additions and 1 deletions
--- a/lslopt/lslbasefuncs.py
+++ b/lslopt/lslbasefuncs.py
@ -460,8 +460,9 @@ def InternalUTF8toString(s):
        if partialchar:
            if 0x80 <= o < 0xC0 and (
                    partialchar[1:2]
-                    or b'\xC2' <= partialchar < b'\xF4' and partialchar not in b'\xE0\xF0'
+                    or b'\xC2' <= partialchar < b'\xF4' and partialchar not in b'\xE0\xED\xF0'
                    or partialchar == b'\xE0' and o >= 0xA0
+                    or partialchar == b'\xED' and o < 0xA0
                    or partialchar == b'\xF0' and o >= 0x90
                    or partialchar == b'\xF4' and o < 0x90
                    ):
--- a/testfuncs.py
+++ b/testfuncs.py
@ -1173,11 +1173,18 @@ def do_tests():
    test('llEscapeURL(llUnescapeURL(u"%E0%80%80x"))', u'%3F%3F%3Fx') # aliased range begin (U+0000)
    test('llEscapeURL(llUnescapeURL(u"%E0%9F%BFx"))', u'%3F%3F%3Fx') # aliased range end   (U+07FF)
    test('llEscapeURL(llUnescapeURL(u"%E0%A0%80x"))', u'%E0%A0%80x') # U+0800 (3-byte range start)
+    test('llEscapeURL(llUnescapeURL(u"%ED%9F%BFx"))', u'%ED%9F%BFx') # U+D7FF (right before first UTF-16 high surrogate)
+    test('llEscapeURL(llUnescapeURL(u"%EE%80%80x"))', u'%EE%80%80x') # U+E000 (right after last UTF-16 low surrogate)
    test('llEscapeURL(llUnescapeURL(u"%EF%BF%BFx"))', u'%EF%BF%BFx') # U+FFFF (3-byte range end)
    test('llEscapeURL(llUnescapeURL(u"%F0%80%80%80x"))', u'%3F%3F%3F%3Fx') # aliased range begin (U+0000)
    test('llEscapeURL(llUnescapeURL(u"%F0%8F%BF%BFx"))', u'%3F%3F%3F%3Fx') # aliased range end   (U+FFFF)
    test('llEscapeURL(llUnescapeURL(u"%F0%90%80%80x"))', u'%F0%90%80%80x') # U+10000 (4-byte range start)
    test('llEscapeURL(llUnescapeURL(u"%F4%8F%BF%BFx"))', u'%F4%8F%BF%BFx') # U+10FFFF (valid 4-byte range end)
+    # excluded because they are used for UTF-16 surrogates, not valid characters
+    test('llEscapeURL(llUnescapeURL(u"%ED%A0%80"))', u'%3F%3F%3F') # D800 - first high surrogate
+    test('llEscapeURL(llUnescapeURL(u"%ED%AF%BF"))', u'%3F%3F%3F') # DBFF - last high surrogate
+    test('llEscapeURL(llUnescapeURL(u"%ED%B0%80"))', u'%3F%3F%3F') # DC00 - first low surrogate
+    test('llEscapeURL(llUnescapeURL(u"%ED%BF%BF"))', u'%3F%3F%3F') # DFFF - last low  surrogate
    # excluded because of truncation to U+10FFFF
    test('llEscapeURL(llUnescapeURL(u"%F4%90%80%80x"))', u'%3F%3F%3F%3Fx') # U+110000 (invalid 4-byte range start)
    test('llEscapeURL(llUnescapeURL(u"%F7%BF%BF%BFx"))', u'%3F%3F%3F%3Fx') # U+1FFFFF (invalid 4-byte range end)
@ -1302,6 +1309,9 @@ def do_tests():
    test('llGetEnv(u"yadda")', u'')
    shouldexcept('llGetEnv(u"agent_limit")', ELSLCantCompute)

+
+
+
    # JSON tests - Here be dragons.
    print("9 errors expected past here -------------------------------------------------")
    test('''llEscapeURL(llList2Json(JSON_OBJECT, [llUnescapeURL(