From 107194130182e154fbcd58ad68f1ee60fdb693a8 Mon Sep 17 00:00:00 2001
From: Sei Lisa <sei-lisa@email.fake>
Date: Wed, 11 Oct 2017 05:04:13 +0200
Subject: [PATCH] Implement accurate error reporting through  #line directives.

Also simplify and fix the matching expression for #line (gcc inserts numeric flags at the end).

It still has many problems. It's O(n^2). It's calculated at every EParse, and EParse can be triggered and ignored while scanning vectors or globals. UniConvScript doesn't read #line at all, thus failing to report a meaningful input line. But at least it's a start.
---
 lslopt/lslparse.py | 66 ++++++++++++++++++++++++++++++++++++----------
 main.py            | 16 ++++++++---
 2 files changed, 65 insertions(+), 17 deletions(-)
diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py
index 1badd3d..9e6296c 100644
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@@ -46,18 +46,44 @@ def ishex(c):
 
 def GetErrLineCol(parser):
     errorpos = parser.errorpos
+    # Find zero-based line number
     lno = parser.script.count('\n', 0, errorpos)
+    # Find start of current line
     lstart = parser.script.rfind('\n', 0, errorpos) + 1
-    # Find column number in characters
+    # Find zero-based column number in characters
     cno = len(parser.script[lstart:errorpos].decode('utf8'))
-    return (lno + 1, cno + 1)
+    # Find in #line directives list
+    i = len(parser.linedir)
+    filename = '<stdin>'  # value to return if there's no #line before lno
+    while i:
+        i -= 1
+        line = parser.linedir[i]
+        # We wouldn't know where to report the error in this case:
+        assert lno != line[0], \
+            "Error position is in processed #line directive?!"
+
+        if line[0] < lno:  # found the last #line directive before lno
+            # replace the value of lno
+            lno = lno - line[0] + line[1] - 2
+            filename = line[2]
+            break
+
+    return (lno + 1, cno + 1, filename)
 
 class EParse(Exception):
     def __init__(self, parser, msg):
         self.errorpos = parser.errorpos
-        self.lno, self.cno = GetErrLineCol(parser)
+        self.lno, self.cno, self.fname = GetErrLineCol(parser)
+        filename = (self.fname.decode('utf8', 'replace')
+                 .replace(u'\\', ur'\\')
+                 .replace(u'"', ur'\"')
+                )
 
-        msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
+        if parser.processpre and filename != '<stdin>':
+            msg = u"(Line %d char %d): ERROR in \"%s\": %s" % (self.lno,
+                self.cno, filename, msg)
+        else:
+            msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
         super(EParse, self).__init__(msg)
 
 class EParseUEOF(EParse):
@@ -385,10 +411,11 @@ class parser(object):
         if self.parse_directive_re is None:
             self.parse_directive_re = re.compile(
                 r'^#\s*(?:'
-                    r'(?:[Ll][Ii][Nn][Ee]\s+)?(\d+)(?:\s+("(?:[^"\\]|\\.)*"))?'
+                    r'(?:line)?\s+(\d+)(?:\s+("(?:\\.|[^"])*")(?:\s+\d+)*)?'
                     r'|'
-                    r'([A-Za-z0-9_]+)\s+([A-Za-z0-9_]+)\s+([-+,A-Za-z0-9_]+)'
+                    r'([a-z0-9_]+)\s+([a-z0-9_]+)\s+([-+,a-z0-9_]+)'
                 r')\s*$'
+                , re.I
             )
         match = self.parse_directive_re.search(directive)
         if match is not None:
@@ -403,14 +430,17 @@ class parser(object):
                         filename = literal_eval(match.group(2))
                     else:
                         filename = match.group(2)[1:-1]
-                    # TODO: what do we do with the filename?
-                    filename # keep pyflakes happy
+                    self.lastFILE = filename
+                else:
+                    filename = self.lastFILE
 
-                    del filename
-                linenum = int(match.group(1))
-                linenum # keep pyflakes happy
-                # TODO: process line number
-                del linenum
+                # Referenced line number (in the #line directive)
+                reflinenum = int(match.group(1))
+                # Actual line number (where the #line directive itself is)
+                # FIXME: this is O(n^2); track line number instead of this hack
+                actlinenum = self.script.count('\n', 0, self.pos)
+                self.linedir.append((actlinenum, reflinenum, filename))
+                del actlinenum, reflinenum, filename
             else:
                 assert match.group(3) is not None
                 if match.group(3).lower() == 'pragma' and match.group(4) == 'OPT':
@@ -2496,12 +2526,14 @@ list lazy_list_set(list L, integer i, list v)
                 self.NextToken()
 
 
-    def parse(self, script, options = ()):
+    def parse(self, script, options = (), filename = '<stdin>'):
         """Parse the given stream with the given options.
 
         This function also builds the temporary globals table.
         """
 
+        self.filename = filename
+
         if type(script) is unicode:
             script = script.encode('utf8')
 
@@ -2607,6 +2639,12 @@ list lazy_list_set(list L, integer i, list v)
         self.symtab[0][-1] = None
         self.scopeindex = 0
 
+        # Last preprocessor __FILE__. <stdin> means the current file.
+        self.lastFILE = '<stdin>'
+
+        # List of preprocessor #line directives.
+        self.linedir = []
+
         # This is a small hack to prevent circular definitions in globals when
         # extended expressions are enabled. When false (default), forward
         # globals are allowed; if true, only already seen globals are permitted.
diff --git a/main.py b/main.py
index 52e55af..e5a46cc 100755
--- a/main.py
+++ b/main.py
@@ -59,7 +59,15 @@ class UniConvScript(object):
     """Converts the script to Unicode, setting the properties required by
     EParse to report a meaningful error position.
     """
-    def __init__(self, script):
+    def __init__(self, script, options = (), filename = '<stdin>'):
+        self.linedir = []
+        self.filename = filename
+        # We don't interpret #line here. In case of an encode error,
+        # we're in the dark about which file it comes from. User needs
+        # --preshow to view the #line directives and find the correspondence
+        # themselves.
+        #self.processpre = 'processpre' in options
+        self.processpre = False
         self.script = script
 
     def to_unicode(self):
@@ -596,7 +604,8 @@ def main(argv):
                 # Try converting the script to Unicode, to report any encoding
                 # errors with accurate line information. At this point we don't
                 # need the result.
-                UniConvScript(script).to_unicode()
+                UniConvScript(script, options,
+                              fname if fname != '-' else '<stdin>').to_unicode()
             except EParse as e:
                 # We don't call ReportError to prevent problems due to
                 # displaying invalid UTF-8
@@ -644,7 +653,8 @@ def main(argv):
 
             p = parser(builtins, seftable)
             try:
-                ts = p.parse(script, options)
+                ts = p.parse(script, options,
+                             fname if fname != '-' else '<stdin>')
             except EParse as e:
                 ReportError(script, e)
                 return 1