mirror of
https://github.com/Sei-Lisa/LSL-PyOptimizer
synced 2025-07-01 23:58:20 +00:00
Revamp and fixes of REs for pre-preprocessing.
Easier to follow and to maintain this way. Fixes known and potential bugs, plus potential lack of greediness.
This commit is contained in:
parent
cd1826e9e0
commit
47eee0312b
1 changed files with 31 additions and 22 deletions
53
main.py
53
main.py
|
@ -51,30 +51,39 @@ def PreparePreproc(script):
|
||||||
col = 0
|
col = 0
|
||||||
|
|
||||||
# Trigraphs make our life really difficult.
|
# Trigraphs make our life really difficult.
|
||||||
# We join lines with \<return> or ??/<return> inside strings,
|
# We join lines that have \<return> or ??/<return> inside strings,
|
||||||
# and count <return>s to add them back at the end of the string,
|
# and we also replace regular <return> inside strings with \n, counting how
|
||||||
# as well as spaces.
|
# many lines we join, to add them back at the end of the string in order to
|
||||||
# We skip as much as possible in one go every time, only stopping to
|
# keep the line count exact prior to preprocessing. We also preserve the
|
||||||
# analyze critical substrings.
|
# original column after the string, by adding as many spaces as necessary.
|
||||||
tok = re.compile(r'[^"/]+|"|/(?:\?\?\/\n)*\*.*?\*(?:\?\?\/\n)*/'
|
# We could let the preprocessor do the line joining on backslash-newline,
|
||||||
r'|/(?:\?\?\/\n)*/(?:\?\?\/.|\\.|.)*?\n'
|
# but by eliminating all newlines, we have control over the output column
|
||||||
|
# of the text that follows the string and can report an accurate column
|
||||||
|
# position in case of error.
|
||||||
|
# The REs skip as much as possible in one go every time, only stopping to
|
||||||
|
# analyze critical tokens.
|
||||||
|
tok = re.compile(
|
||||||
|
r'(?:'
|
||||||
|
r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/'
|
||||||
|
r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n'
|
||||||
|
r'|[^"]'
|
||||||
|
r')+'
|
||||||
|
r'|"'
|
||||||
, re.S)
|
, re.S)
|
||||||
#tok2 = re.compile(r'(?:(?!\?\?/.|\\.|"|\n).)+|\\.|\?\?/.|.', re.S)
|
# RE used inside strings.
|
||||||
tok2 = re.compile(
|
tok2 = re.compile(
|
||||||
r"\\\n|\?\?/\n|" '"' r"|\n|"
|
r'(?:'
|
||||||
r"(?:"
|
r"\?\?[='()!<>-]" # valid trigraph except ??/ (backslash)
|
||||||
# negative match for the above - tough
|
r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])"
|
||||||
# eat as a unit:
|
# backslash trigraph or actual backslash,
|
||||||
# - a backslash or corresponding trigraph followed by any trigraph
|
# followed by any trigraph or non-newline
|
||||||
# or by any non-newline character
|
r'|(?!\?\?/\n|\\\n|"|\n).'
|
||||||
# - any trigraph other than ??/
|
# any character that doesn't start a trigraph/
|
||||||
# - any character that is not a newline, double quote, backslash
|
# backslash escape followed by a newline
|
||||||
# or the start of a trigraph
|
# or is a newline or double quote, as we're
|
||||||
# - any trigraph-like sequence that is not a trigraph
|
# interested in all those individually.
|
||||||
r"(?:\\|\?\?/)(?:\?\?[=/'()!<>\-]|[^\n])"
|
r')+' # as many of those as possible
|
||||||
r"|\?\?[='()!<>\-]"
|
r'|\?\?/\n|\\\n|\n|"' # or any of those individually
|
||||||
r"|[^\n" '"' r"\\?]|\?(?!\?[=/'()!<>\-])"
|
|
||||||
r")+"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
pos = 0
|
pos = 0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue