Implement accurate error reporting through #line directives.

Also simplify and fix the matching expression for #line (gcc inserts numeric flags at the end). It still has many problems. It's O(n^2). It's calculated at every EParse, and EParse can be triggered and ignored while scanning vectors or globals. UniConvScript doesn't read #line at all, thus failing to report a meaningful input line. But at least it's a start.
2025-07-01 07:38:21 +00:00 · 2017-10-11 05:04:13 +02:00 · 2017-10-11 05:04:13 +02:00 · 1071941301
commit 1071941301
parent 4ba0518353
2 changed files with 65 additions and 17 deletions
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@ -46,18 +46,44 @@ def ishex(c):

 def GetErrLineCol(parser):
    errorpos = parser.errorpos
+    # Find zero-based line number
    lno = parser.script.count('\n', 0, errorpos)
+    # Find start of current line
    lstart = parser.script.rfind('\n', 0, errorpos) + 1
-    # Find column number in characters
+    # Find zero-based column number in characters
    cno = len(parser.script[lstart:errorpos].decode('utf8'))
-    return (lno + 1, cno + 1)
+    # Find in #line directives list
+    i = len(parser.linedir)
+    filename = '<stdin>'  # value to return if there's no #line before lno
+    while i:
+        i -= 1
+        line = parser.linedir[i]
+        # We wouldn't know where to report the error in this case:
+        assert lno != line[0], \
+            "Error position is in processed #line directive?!"
+
+        if line[0] < lno:  # found the last #line directive before lno
+            # replace the value of lno
+            lno = lno - line[0] + line[1] - 2
+            filename = line[2]
+            break
+
+    return (lno + 1, cno + 1, filename)

 class EParse(Exception):
    def __init__(self, parser, msg):
        self.errorpos = parser.errorpos
-        self.lno, self.cno = GetErrLineCol(parser)
+        self.lno, self.cno, self.fname = GetErrLineCol(parser)
+        filename = (self.fname.decode('utf8', 'replace')
+                 .replace(u'\\', ur'\\')
+                 .replace(u'"', ur'\"')
+                )

-        msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
+        if parser.processpre and filename != '<stdin>':
+            msg = u"(Line %d char %d): ERROR in \"%s\": %s" % (self.lno,
+                self.cno, filename, msg)
+        else:
+            msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
        super(EParse, self).__init__(msg)

 class EParseUEOF(EParse):
@ -385,10 +411,11 @@ class parser(object):
        if self.parse_directive_re is None:
            self.parse_directive_re = re.compile(
                r'^#\s*(?:'
-                    r'(?:[Ll][Ii][Nn][Ee]\s+)?(\d+)(?:\s+("(?:[^"\\]|\\.)*"))?'
+                    r'(?:line)?\s+(\d+)(?:\s+("(?:\\.|[^"])*")(?:\s+\d+)*)?'
                    r'|'
-                    r'([A-Za-z0-9_]+)\s+([A-Za-z0-9_]+)\s+([-+,A-Za-z0-9_]+)'
+                    r'([a-z0-9_]+)\s+([a-z0-9_]+)\s+([-+,a-z0-9_]+)'
                r')\s*$'
+                , re.I
            )
        match = self.parse_directive_re.search(directive)
        if match is not None:
@ -403,14 +430,17 @@ class parser(object):
                        filename = literal_eval(match.group(2))
                    else:
                        filename = match.group(2)[1:-1]
-                    # TODO: what do we do with the filename?
-                    filename # keep pyflakes happy
+                    self.lastFILE = filename
+                else:
+                    filename = self.lastFILE

-                    del filename
-                linenum = int(match.group(1))
-                linenum # keep pyflakes happy
-                # TODO: process line number
-                del linenum
+                # Referenced line number (in the #line directive)
+                reflinenum = int(match.group(1))
+                # Actual line number (where the #line directive itself is)
+                # FIXME: this is O(n^2); track line number instead of this hack
+                actlinenum = self.script.count('\n', 0, self.pos)
+                self.linedir.append((actlinenum, reflinenum, filename))
+                del actlinenum, reflinenum, filename
            else:
                assert match.group(3) is not None
                if match.group(3).lower() == 'pragma' and match.group(4) == 'OPT':
@ -2496,12 +2526,14 @@ list lazy_list_set(list L, integer i, list v)
                self.NextToken()


-    def parse(self, script, options = ()):
+    def parse(self, script, options = (), filename = '<stdin>'):
        """Parse the given stream with the given options.

        This function also builds the temporary globals table.
        """

+        self.filename = filename
+
        if type(script) is unicode:
            script = script.encode('utf8')

@ -2607,6 +2639,12 @@ list lazy_list_set(list L, integer i, list v)
        self.symtab[0][-1] = None
        self.scopeindex = 0

+        # Last preprocessor __FILE__. <stdin> means the current file.
+        self.lastFILE = '<stdin>'
+
+        # List of preprocessor #line directives.
+        self.linedir = []
+
        # This is a small hack to prevent circular definitions in globals when
        # extended expressions are enabled. When false (default), forward
        # globals are allowed; if true, only already seen globals are permitted.
--- a/main.py
+++ b/main.py
@ -59,7 +59,15 @@ class UniConvScript(object):
    """Converts the script to Unicode, setting the properties required by
    EParse to report a meaningful error position.
    """
-    def __init__(self, script):
+    def __init__(self, script, options = (), filename = '<stdin>'):
+        self.linedir = []
+        self.filename = filename
+        # We don't interpret #line here. In case of an encode error,
+        # we're in the dark about which file it comes from. User needs
+        # --preshow to view the #line directives and find the correspondence
+        # themselves.
+        #self.processpre = 'processpre' in options
+        self.processpre = False
        self.script = script

    def to_unicode(self):
@ -596,7 +604,8 @@ def main(argv):
                # Try converting the script to Unicode, to report any encoding
                # errors with accurate line information. At this point we don't
                # need the result.
-                UniConvScript(script).to_unicode()
+                UniConvScript(script, options,
+                              fname if fname != '-' else '<stdin>').to_unicode()
            except EParse as e:
                # We don't call ReportError to prevent problems due to
                # displaying invalid UTF-8
@ -644,7 +653,8 @@ def main(argv):

            p = parser(builtins, seftable)
            try:
-                ts = p.parse(script, options)
+                ts = p.parse(script, options,
+                             fname if fname != '-' else '<stdin>')
            except EParse as e:
                ReportError(script, e)
                return 1