mirror of
https://github.com/Sei-Lisa/LSL-PyOptimizer
synced 2025-07-01 07:38:21 +00:00
Make PreparePreproc Unicode-aware.
Fixes mismatches in column number output after a multiline string, if the last line of the string contains non-ASCII Unicode characters.
This commit is contained in:
parent
eba4df6903
commit
ed05a2e022
1 changed files with 21 additions and 21 deletions
42
main.py
42
main.py
|
@ -106,27 +106,27 @@ def PreparePreproc(script):
|
||||||
# instead of reproducing that C quirk. This also matches what FS is doing
|
# instead of reproducing that C quirk. This also matches what FS is doing
|
||||||
# currently, so it's good for compatibility.
|
# currently, so it's good for compatibility.
|
||||||
tok = re.compile(
|
tok = re.compile(
|
||||||
r'(?:'
|
ur'(?:'
|
||||||
r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
|
ur'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
|
||||||
r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
|
ur'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
|
||||||
r'|[^"]'
|
ur'|[^"]'
|
||||||
r')+'
|
ur')+'
|
||||||
r'|"'
|
ur'|"'
|
||||||
, re.S)
|
, re.S)
|
||||||
# RE used inside strings.
|
# RE used inside strings.
|
||||||
tok2 = re.compile(
|
tok2 = re.compile(
|
||||||
r'(?:'
|
ur'(?:'
|
||||||
r"\?\?[='()!<>-]" # valid trigraph except ??/ (backslash)
|
ur"\?\?[='()!<>-]" # valid trigraph except ??/ (backslash)
|
||||||
r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
|
ur"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
|
||||||
# backslash trigraph or actual backslash,
|
# backslash trigraph or actual backslash,
|
||||||
# followed by any trigraph or non-newline
|
# followed by any trigraph or non-newline
|
||||||
r'|(?!\?\?/\n|\\\n|"|\n).'
|
ur'|(?!\?\?/\n|\\\n|"|\n).'
|
||||||
# any character that doesn't start a trigraph/
|
# any character that doesn't start a trigraph/
|
||||||
# backslash escape followed by a newline
|
# backslash escape followed by a newline
|
||||||
# or is a newline or double quote, as we're
|
# or is a newline or double quote, as we're
|
||||||
# interested in all those individually.
|
# interested in all those individually.
|
||||||
r')+' # as many of those as possible
|
ur')+' # as many of those as possible
|
||||||
r'|\?\?/\n|\\\n|\n|"' # or any of those individually
|
ur'|\?\?/\n|\\\n|\n|"' # or any of those individually
|
||||||
)
|
)
|
||||||
|
|
||||||
pos = 0
|
pos = 0
|
||||||
|
@ -134,7 +134,7 @@ def PreparePreproc(script):
|
||||||
while match:
|
while match:
|
||||||
matched = match.group(0)
|
matched = match.group(0)
|
||||||
pos += len(matched)
|
pos += len(matched)
|
||||||
if matched == '"':
|
if matched == u'"':
|
||||||
s += matched
|
s += matched
|
||||||
nlines = col = 0
|
nlines = col = 0
|
||||||
match2 = tok2.search(script, pos)
|
match2 = tok2.search(script, pos)
|
||||||
|
@ -142,24 +142,24 @@ def PreparePreproc(script):
|
||||||
matched2 = match2.group(0)
|
matched2 = match2.group(0)
|
||||||
pos += len(matched2)
|
pos += len(matched2)
|
||||||
|
|
||||||
if matched2 == '\\\n' or matched2 == '??/\n':
|
if matched2 == u'\\\n' or matched2 == u'??/\n':
|
||||||
nlines += 1
|
nlines += 1
|
||||||
col = 0
|
col = 0
|
||||||
match2 = tok2.search(script, pos)
|
match2 = tok2.search(script, pos)
|
||||||
continue
|
continue
|
||||||
if matched2 == '"':
|
if matched2 == u'"':
|
||||||
if nlines:
|
if nlines:
|
||||||
if script[pos:pos+1] == '\n':
|
if script[pos:pos+1] == u'\n':
|
||||||
col = -1 # don't add spaces if not necessary
|
col = -1 # don't add spaces if not necessary
|
||||||
# col misses the quote added here, so add 1
|
# col misses the quote added here, so add 1
|
||||||
s += '"' + '\n'*nlines + ' '*(col+1)
|
s += u'"' + u'\n'*nlines + u' '*(col+1)
|
||||||
else:
|
else:
|
||||||
s += '"'
|
s += u'"'
|
||||||
break
|
break
|
||||||
if matched2 == '\n':
|
if matched2 == u'\n':
|
||||||
nlines += 1
|
nlines += 1
|
||||||
col = 0
|
col = 0
|
||||||
s += '\\n'
|
s += u'\\n'
|
||||||
else:
|
else:
|
||||||
col += len(matched2)
|
col += len(matched2)
|
||||||
s += matched2
|
s += matched2
|
||||||
|
@ -628,7 +628,7 @@ def main(argv):
|
||||||
if preproc != 'none':
|
if preproc != 'none':
|
||||||
# At this point, for the external preprocessor to work we need the
|
# At this point, for the external preprocessor to work we need the
|
||||||
# script as a byte array, not as unicode, but it should be UTF-8.
|
# script as a byte array, not as unicode, but it should be UTF-8.
|
||||||
script = PreparePreproc(script)
|
script = PreparePreproc(script.decode('utf8')).encode('utf8')
|
||||||
if preproc == 'mcpp':
|
if preproc == 'mcpp':
|
||||||
# As a special treatment for mcpp, we force it to output its
|
# As a special treatment for mcpp, we force it to output its
|
||||||
# macros so we can read if USE_xxx are defined. With GCC that
|
# macros so we can read if USE_xxx are defined. With GCC that
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue