From 107194130182e154fbcd58ad68f1ee60fdb693a8 Mon Sep 17 00:00:00 2001 From: Sei Lisa Date: Wed, 11 Oct 2017 05:04:13 +0200 Subject: [PATCH] Implement accurate error reporting through #line directives. Also simplify and fix the matching expression for #line (gcc inserts numeric flags at the end). It still has many problems. It's O(n^2). It's calculated at every EParse, and EParse can be triggered and ignored while scanning vectors or globals. UniConvScript doesn't read #line at all, thus failing to report a meaningful input line. But at least it's a start. --- lslopt/lslparse.py | 66 ++++++++++++++++++++++++++++++++++++---------- main.py | 16 ++++++++--- 2 files changed, 65 insertions(+), 17 deletions(-) diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py index 1badd3d..9e6296c 100644 --- a/lslopt/lslparse.py +++ b/lslopt/lslparse.py @@ -46,18 +46,44 @@ def ishex(c): def GetErrLineCol(parser): errorpos = parser.errorpos + # Find zero-based line number lno = parser.script.count('\n', 0, errorpos) + # Find start of current line lstart = parser.script.rfind('\n', 0, errorpos) + 1 - # Find column number in characters + # Find zero-based column number in characters cno = len(parser.script[lstart:errorpos].decode('utf8')) - return (lno + 1, cno + 1) + # Find in #line directives list + i = len(parser.linedir) + filename = '' # value to return if there's no #line before lno + while i: + i -= 1 + line = parser.linedir[i] + # We wouldn't know where to report the error in this case: + assert lno != line[0], \ + "Error position is in processed #line directive?!" + + if line[0] < lno: # found the last #line directive before lno + # replace the value of lno + lno = lno - line[0] + line[1] - 2 + filename = line[2] + break + + return (lno + 1, cno + 1, filename) class EParse(Exception): def __init__(self, parser, msg): self.errorpos = parser.errorpos - self.lno, self.cno = GetErrLineCol(parser) + self.lno, self.cno, self.fname = GetErrLineCol(parser) + filename = (self.fname.decode('utf8', 'replace') + .replace(u'\\', ur'\\') + .replace(u'"', ur'\"') + ) - msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg) + if parser.processpre and filename != '': + msg = u"(Line %d char %d): ERROR in \"%s\": %s" % (self.lno, + self.cno, filename, msg) + else: + msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg) super(EParse, self).__init__(msg) class EParseUEOF(EParse): @@ -385,10 +411,11 @@ class parser(object): if self.parse_directive_re is None: self.parse_directive_re = re.compile( r'^#\s*(?:' - r'(?:[Ll][Ii][Nn][Ee]\s+)?(\d+)(?:\s+("(?:[^"\\]|\\.)*"))?' + r'(?:line)?\s+(\d+)(?:\s+("(?:\\.|[^"])*")(?:\s+\d+)*)?' r'|' - r'([A-Za-z0-9_]+)\s+([A-Za-z0-9_]+)\s+([-+,A-Za-z0-9_]+)' + r'([a-z0-9_]+)\s+([a-z0-9_]+)\s+([-+,a-z0-9_]+)' r')\s*$' + , re.I ) match = self.parse_directive_re.search(directive) if match is not None: @@ -403,14 +430,17 @@ class parser(object): filename = literal_eval(match.group(2)) else: filename = match.group(2)[1:-1] - # TODO: what do we do with the filename? - filename # keep pyflakes happy + self.lastFILE = filename + else: + filename = self.lastFILE - del filename - linenum = int(match.group(1)) - linenum # keep pyflakes happy - # TODO: process line number - del linenum + # Referenced line number (in the #line directive) + reflinenum = int(match.group(1)) + # Actual line number (where the #line directive itself is) + # FIXME: this is O(n^2); track line number instead of this hack + actlinenum = self.script.count('\n', 0, self.pos) + self.linedir.append((actlinenum, reflinenum, filename)) + del actlinenum, reflinenum, filename else: assert match.group(3) is not None if match.group(3).lower() == 'pragma' and match.group(4) == 'OPT': @@ -2496,12 +2526,14 @@ list lazy_list_set(list L, integer i, list v) self.NextToken() - def parse(self, script, options = ()): + def parse(self, script, options = (), filename = ''): """Parse the given stream with the given options. This function also builds the temporary globals table. """ + self.filename = filename + if type(script) is unicode: script = script.encode('utf8') @@ -2607,6 +2639,12 @@ list lazy_list_set(list L, integer i, list v) self.symtab[0][-1] = None self.scopeindex = 0 + # Last preprocessor __FILE__. means the current file. + self.lastFILE = '' + + # List of preprocessor #line directives. + self.linedir = [] + # This is a small hack to prevent circular definitions in globals when # extended expressions are enabled. When false (default), forward # globals are allowed; if true, only already seen globals are permitted. diff --git a/main.py b/main.py index 52e55af..e5a46cc 100755 --- a/main.py +++ b/main.py @@ -59,7 +59,15 @@ class UniConvScript(object): """Converts the script to Unicode, setting the properties required by EParse to report a meaningful error position. """ - def __init__(self, script): + def __init__(self, script, options = (), filename = ''): + self.linedir = [] + self.filename = filename + # We don't interpret #line here. In case of an encode error, + # we're in the dark about which file it comes from. User needs + # --preshow to view the #line directives and find the correspondence + # themselves. + #self.processpre = 'processpre' in options + self.processpre = False self.script = script def to_unicode(self): @@ -596,7 +604,8 @@ def main(argv): # Try converting the script to Unicode, to report any encoding # errors with accurate line information. At this point we don't # need the result. - UniConvScript(script).to_unicode() + UniConvScript(script, options, + fname if fname != '-' else '').to_unicode() except EParse as e: # We don't call ReportError to prevent problems due to # displaying invalid UTF-8 @@ -644,7 +653,8 @@ def main(argv): p = parser(builtins, seftable) try: - ts = p.parse(script, options) + ts = p.parse(script, options, + fname if fname != '-' else '') except EParse as e: ReportError(script, e) return 1