Implement accurate error reporting through #line directives.

Also simplify and fix the matching expression for #line (gcc inserts numeric flags at the end).

It still has many problems. It's O(n^2). It's calculated at every EParse, and EParse can be triggered and ignored while scanning vectors or globals. UniConvScript doesn't read #line at all, thus failing to report a meaningful input line. But at least it's a start.
This commit is contained in:
Sei Lisa 2017-10-11 05:04:13 +02:00
parent 4ba0518353
commit 1071941301
2 changed files with 65 additions and 17 deletions

View file

@ -46,17 +46,43 @@ def ishex(c):
def GetErrLineCol(parser): def GetErrLineCol(parser):
errorpos = parser.errorpos errorpos = parser.errorpos
# Find zero-based line number
lno = parser.script.count('\n', 0, errorpos) lno = parser.script.count('\n', 0, errorpos)
# Find start of current line
lstart = parser.script.rfind('\n', 0, errorpos) + 1 lstart = parser.script.rfind('\n', 0, errorpos) + 1
# Find column number in characters # Find zero-based column number in characters
cno = len(parser.script[lstart:errorpos].decode('utf8')) cno = len(parser.script[lstart:errorpos].decode('utf8'))
return (lno + 1, cno + 1) # Find in #line directives list
i = len(parser.linedir)
filename = '<stdin>' # value to return if there's no #line before lno
while i:
i -= 1
line = parser.linedir[i]
# We wouldn't know where to report the error in this case:
assert lno != line[0], \
"Error position is in processed #line directive?!"
if line[0] < lno: # found the last #line directive before lno
# replace the value of lno
lno = lno - line[0] + line[1] - 2
filename = line[2]
break
return (lno + 1, cno + 1, filename)
class EParse(Exception): class EParse(Exception):
def __init__(self, parser, msg): def __init__(self, parser, msg):
self.errorpos = parser.errorpos self.errorpos = parser.errorpos
self.lno, self.cno = GetErrLineCol(parser) self.lno, self.cno, self.fname = GetErrLineCol(parser)
filename = (self.fname.decode('utf8', 'replace')
.replace(u'\\', ur'\\')
.replace(u'"', ur'\"')
)
if parser.processpre and filename != '<stdin>':
msg = u"(Line %d char %d): ERROR in \"%s\": %s" % (self.lno,
self.cno, filename, msg)
else:
msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg) msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
super(EParse, self).__init__(msg) super(EParse, self).__init__(msg)
@ -385,10 +411,11 @@ class parser(object):
if self.parse_directive_re is None: if self.parse_directive_re is None:
self.parse_directive_re = re.compile( self.parse_directive_re = re.compile(
r'^#\s*(?:' r'^#\s*(?:'
r'(?:[Ll][Ii][Nn][Ee]\s+)?(\d+)(?:\s+("(?:[^"\\]|\\.)*"))?' r'(?:line)?\s+(\d+)(?:\s+("(?:\\.|[^"])*")(?:\s+\d+)*)?'
r'|' r'|'
r'([A-Za-z0-9_]+)\s+([A-Za-z0-9_]+)\s+([-+,A-Za-z0-9_]+)' r'([a-z0-9_]+)\s+([a-z0-9_]+)\s+([-+,a-z0-9_]+)'
r')\s*$' r')\s*$'
, re.I
) )
match = self.parse_directive_re.search(directive) match = self.parse_directive_re.search(directive)
if match is not None: if match is not None:
@ -403,14 +430,17 @@ class parser(object):
filename = literal_eval(match.group(2)) filename = literal_eval(match.group(2))
else: else:
filename = match.group(2)[1:-1] filename = match.group(2)[1:-1]
# TODO: what do we do with the filename? self.lastFILE = filename
filename # keep pyflakes happy else:
filename = self.lastFILE
del filename # Referenced line number (in the #line directive)
linenum = int(match.group(1)) reflinenum = int(match.group(1))
linenum # keep pyflakes happy # Actual line number (where the #line directive itself is)
# TODO: process line number # FIXME: this is O(n^2); track line number instead of this hack
del linenum actlinenum = self.script.count('\n', 0, self.pos)
self.linedir.append((actlinenum, reflinenum, filename))
del actlinenum, reflinenum, filename
else: else:
assert match.group(3) is not None assert match.group(3) is not None
if match.group(3).lower() == 'pragma' and match.group(4) == 'OPT': if match.group(3).lower() == 'pragma' and match.group(4) == 'OPT':
@ -2496,12 +2526,14 @@ list lazy_list_set(list L, integer i, list v)
self.NextToken() self.NextToken()
def parse(self, script, options = ()): def parse(self, script, options = (), filename = '<stdin>'):
"""Parse the given stream with the given options. """Parse the given stream with the given options.
This function also builds the temporary globals table. This function also builds the temporary globals table.
""" """
self.filename = filename
if type(script) is unicode: if type(script) is unicode:
script = script.encode('utf8') script = script.encode('utf8')
@ -2607,6 +2639,12 @@ list lazy_list_set(list L, integer i, list v)
self.symtab[0][-1] = None self.symtab[0][-1] = None
self.scopeindex = 0 self.scopeindex = 0
# Last preprocessor __FILE__. <stdin> means the current file.
self.lastFILE = '<stdin>'
# List of preprocessor #line directives.
self.linedir = []
# This is a small hack to prevent circular definitions in globals when # This is a small hack to prevent circular definitions in globals when
# extended expressions are enabled. When false (default), forward # extended expressions are enabled. When false (default), forward
# globals are allowed; if true, only already seen globals are permitted. # globals are allowed; if true, only already seen globals are permitted.

16
main.py
View file

@ -59,7 +59,15 @@ class UniConvScript(object):
"""Converts the script to Unicode, setting the properties required by """Converts the script to Unicode, setting the properties required by
EParse to report a meaningful error position. EParse to report a meaningful error position.
""" """
def __init__(self, script): def __init__(self, script, options = (), filename = '<stdin>'):
self.linedir = []
self.filename = filename
# We don't interpret #line here. In case of an encode error,
# we're in the dark about which file it comes from. User needs
# --preshow to view the #line directives and find the correspondence
# themselves.
#self.processpre = 'processpre' in options
self.processpre = False
self.script = script self.script = script
def to_unicode(self): def to_unicode(self):
@ -596,7 +604,8 @@ def main(argv):
# Try converting the script to Unicode, to report any encoding # Try converting the script to Unicode, to report any encoding
# errors with accurate line information. At this point we don't # errors with accurate line information. At this point we don't
# need the result. # need the result.
UniConvScript(script).to_unicode() UniConvScript(script, options,
fname if fname != '-' else '<stdin>').to_unicode()
except EParse as e: except EParse as e:
# We don't call ReportError to prevent problems due to # We don't call ReportError to prevent problems due to
# displaying invalid UTF-8 # displaying invalid UTF-8
@ -644,7 +653,8 @@ def main(argv):
p = parser(builtins, seftable) p = parser(builtins, seftable)
try: try:
ts = p.parse(script, options) ts = p.parse(script, options,
fname if fname != '-' else '<stdin>')
except EParse as e: except EParse as e:
ReportError(script, e) ReportError(script, e)
return 1 return 1