Implement accurate error reporting through #line directives.

Also simplify and fix the matching expression for #line (gcc inserts numeric flags at the end).

It still has many problems. It's O(n^2). It's calculated at every EParse, and EParse can be triggered and ignored while scanning vectors or globals. UniConvScript doesn't read #line at all, thus failing to report a meaningful input line. But at least it's a start.
This commit is contained in:
Sei Lisa 2017-10-11 05:04:13 +02:00
parent 4ba0518353
commit 1071941301
2 changed files with 65 additions and 17 deletions

View file

@ -46,18 +46,44 @@ def ishex(c):
def GetErrLineCol(parser):
errorpos = parser.errorpos
# Find zero-based line number
lno = parser.script.count('\n', 0, errorpos)
# Find start of current line
lstart = parser.script.rfind('\n', 0, errorpos) + 1
# Find column number in characters
# Find zero-based column number in characters
cno = len(parser.script[lstart:errorpos].decode('utf8'))
return (lno + 1, cno + 1)
# Find in #line directives list
i = len(parser.linedir)
filename = '<stdin>' # value to return if there's no #line before lno
while i:
i -= 1
line = parser.linedir[i]
# We wouldn't know where to report the error in this case:
assert lno != line[0], \
"Error position is in processed #line directive?!"
if line[0] < lno: # found the last #line directive before lno
# replace the value of lno
lno = lno - line[0] + line[1] - 2
filename = line[2]
break
return (lno + 1, cno + 1, filename)
class EParse(Exception):
def __init__(self, parser, msg):
self.errorpos = parser.errorpos
self.lno, self.cno = GetErrLineCol(parser)
self.lno, self.cno, self.fname = GetErrLineCol(parser)
filename = (self.fname.decode('utf8', 'replace')
.replace(u'\\', ur'\\')
.replace(u'"', ur'\"')
)
msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
if parser.processpre and filename != '<stdin>':
msg = u"(Line %d char %d): ERROR in \"%s\": %s" % (self.lno,
self.cno, filename, msg)
else:
msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
super(EParse, self).__init__(msg)
class EParseUEOF(EParse):
@ -385,10 +411,11 @@ class parser(object):
if self.parse_directive_re is None:
self.parse_directive_re = re.compile(
r'^#\s*(?:'
r'(?:[Ll][Ii][Nn][Ee]\s+)?(\d+)(?:\s+("(?:[^"\\]|\\.)*"))?'
r'(?:line)?\s+(\d+)(?:\s+("(?:\\.|[^"])*")(?:\s+\d+)*)?'
r'|'
r'([A-Za-z0-9_]+)\s+([A-Za-z0-9_]+)\s+([-+,A-Za-z0-9_]+)'
r'([a-z0-9_]+)\s+([a-z0-9_]+)\s+([-+,a-z0-9_]+)'
r')\s*$'
, re.I
)
match = self.parse_directive_re.search(directive)
if match is not None:
@ -403,14 +430,17 @@ class parser(object):
filename = literal_eval(match.group(2))
else:
filename = match.group(2)[1:-1]
# TODO: what do we do with the filename?
filename # keep pyflakes happy
self.lastFILE = filename
else:
filename = self.lastFILE
del filename
linenum = int(match.group(1))
linenum # keep pyflakes happy
# TODO: process line number
del linenum
# Referenced line number (in the #line directive)
reflinenum = int(match.group(1))
# Actual line number (where the #line directive itself is)
# FIXME: this is O(n^2); track line number instead of this hack
actlinenum = self.script.count('\n', 0, self.pos)
self.linedir.append((actlinenum, reflinenum, filename))
del actlinenum, reflinenum, filename
else:
assert match.group(3) is not None
if match.group(3).lower() == 'pragma' and match.group(4) == 'OPT':
@ -2496,12 +2526,14 @@ list lazy_list_set(list L, integer i, list v)
self.NextToken()
def parse(self, script, options = ()):
def parse(self, script, options = (), filename = '<stdin>'):
"""Parse the given stream with the given options.
This function also builds the temporary globals table.
"""
self.filename = filename
if type(script) is unicode:
script = script.encode('utf8')
@ -2607,6 +2639,12 @@ list lazy_list_set(list L, integer i, list v)
self.symtab[0][-1] = None
self.scopeindex = 0
# Last preprocessor __FILE__. <stdin> means the current file.
self.lastFILE = '<stdin>'
# List of preprocessor #line directives.
self.linedir = []
# This is a small hack to prevent circular definitions in globals when
# extended expressions are enabled. When false (default), forward
# globals are allowed; if true, only already seen globals are permitted.

16
main.py
View file

@ -59,7 +59,15 @@ class UniConvScript(object):
"""Converts the script to Unicode, setting the properties required by
EParse to report a meaningful error position.
"""
def __init__(self, script):
def __init__(self, script, options = (), filename = '<stdin>'):
self.linedir = []
self.filename = filename
# We don't interpret #line here. In case of an encode error,
# we're in the dark about which file it comes from. User needs
# --preshow to view the #line directives and find the correspondence
# themselves.
#self.processpre = 'processpre' in options
self.processpre = False
self.script = script
def to_unicode(self):
@ -596,7 +604,8 @@ def main(argv):
# Try converting the script to Unicode, to report any encoding
# errors with accurate line information. At this point we don't
# need the result.
UniConvScript(script).to_unicode()
UniConvScript(script, options,
fname if fname != '-' else '<stdin>').to_unicode()
except EParse as e:
# We don't call ReportError to prevent problems due to
# displaying invalid UTF-8
@ -644,7 +653,8 @@ def main(argv):
p = parser(builtins, seftable)
try:
ts = p.parse(script, options)
ts = p.parse(script, options,
fname if fname != '-' else '<stdin>')
except EParse as e:
ReportError(script, e)
return 1