From fb832797069a7794e64e137757cb552e38db2bb8 Mon Sep 17 00:00:00 2001
From: Sei Lisa <sei-lisa@email.fake>
Date: Mon, 27 Jun 2016 16:50:09 +0200
Subject: [PATCH] Add infrastructure to process preprocessor directives.

Also check that they appear at the beginning of the line.
---
 lslopt/lslparse.py | 80 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py
index 988d099..572d45b 100644
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@@ -168,6 +168,11 @@ class EParseInvalidCont(EParse):
         super(EParseInvalidCont, self).__init__(parser,
             u"'continue' used outside a loop")
 
+class EParseInvalidBackslash(EParse):
+    def __init__(self, parser):
+        super(EParseInvalidBackslash, self).__init__(parser,
+            u"Preprocessor directive can't end in backslash")
+
 class EInternal(Exception):
     """This exception is a construct to allow a different function to cause an
     immediate return of EOF from parser.GetToken().
@@ -310,6 +315,45 @@ class parser(object):
         if self.pos >= self.length:
             raise EInternal() # force GetToken to return EOF
 
+    def ProcessDirective(self, directive):
+        """Process a given preprocessor directive during parsing."""
+
+        if directive[len(directive)-1:] == '\\':
+            raise EParseInvalidBackslash(self)
+
+        if self.parse_directive_re is None:
+            self.parse_directive_re = re.compile(
+                r'^#\s*(?:'
+                    r'(?:[Ll][Ii][Nn][Ee]\s+)?(\d+)(?:\s+("(?:[^"\\]|\\.)*"))?'
+                    r'|'
+                    r'([A-Za-z0-9_]+\s+(.+?))'
+                r')\s*$'
+            )
+        match = self.parse_directive_re.search(directive)
+        if match is not None:
+            # Something parsed
+            if match.group(1) is not None:
+                #line directive
+                if match.group(2) is not None:
+                    # filename included
+                    if match.group(2).find('\\') != -1:
+                        # interpret escapes
+                        from ast import literal_eval
+                        filename = literal_eval(match.group(2))
+                    else:
+                        filename = match.group(2)[1:-1]
+                    # TODO: what do we do with the filename?
+
+                    del filename
+                linenum = int(match.group(1))
+                # TODO: process line number
+                del linenum
+            else:
+                assert match.group(3) is not None
+                if match.group(3) == 'pragma':
+                    # TODO: process #pragma
+                    pass
+
     def GetToken(self):
         """Lexer"""
 
@@ -322,18 +366,27 @@ class parser(object):
                 self.pos += 1
 
                 # Process comments
-                if c == '#' and self.skippreproc:
-                    # Preprocessor directives act like single line comments.
-                    # Most are not supposed to reach us but cpp also generates
-                    # lines in the output like: # 123 "file.lsl"
-                    # and other preprocessors including Boost Wave and mcpp
-                    # generate lines like: #line 123 "file.lsl" (Firestorm
-                    # comments these out and outputs //#line 123 "file.lsl")
+                if self.skippreproc and self.linestart and c == '#':
+                    # Preprocessor directive.
+                    # Most are not supposed to reach us but some do:
+                    # - gcpp generates lines in the output like:
+                    #       # 123 "file.lsl"
+                    # - other preprocessors including Boost Wave and mcpp
+                    #   generate lines like:
+                    #       #line 123 "file.lsl"
+                    #   Firestorm comments these out and instead outputs
+                    #   //#line 123 "file.lsl"
+                    # - #pragma directives
+                    # - #define directives from mcpp's #pragma MCPP put_defines
+                    #   or from gcpp's -dN option, that we use to detect some
+                    #   definitions.
                     self.ceof()
                     while self.script[self.pos] != '\n':
                         self.pos += 1
                         self.ceof() # A preprocessor command at EOF is not unexpected EOF.
 
+                    self.ProcessDirective(self.script[self.errorpos:self.pos])
+
                     self.pos += 1
                     self.ceof()
                     continue
@@ -346,6 +399,7 @@ class parser(object):
                             self.pos += 1
                             self.ceof() # A single-line comment at EOF is not unexpected EOF.
 
+                        self.linestart = True
                         self.pos += 1
                         self.ceof()
                         continue
@@ -360,6 +414,11 @@ class parser(object):
                         self.ceof()
                         continue
 
+                # self.linestart is related to the preprocessor, therefore we
+                # check the characters that are relevant for standard C.
+                if c not in ' \n\r\x0B\x0C':
+                    self.linestart = False
+
                 # Process strings
                 if c == '"' or c == 'L' and self.script[self.pos:self.pos+1] == '"':
                     strliteral = ''
@@ -388,6 +447,7 @@ class parser(object):
                                 # '\' followed by a newline; it's not a string.
                                 self.pos = savepos
                                 is_string = False
+                                self.linestart = True
                                 break
                             else:
                                 strliteral += self.script[self.pos]
@@ -521,6 +581,8 @@ class parser(object):
                 if c in '.;{},=()-+*/%@:<>[]&|^~!' and c != '':
                     return (c,)
 
+                if c == '\n':
+                    self.linestart = True
                 # We eat spacers AND any other character, so the following is not needed,
                 # although the lex file includes it (the lex file does not count() invalid characters
                 # for the purpose of error reporting).
@@ -2429,6 +2491,7 @@ list lazy_list_set(list L, integer i, list v)
         # function arguments. And that's what we do.
 
         self.pos = self.errorpos = 0
+        self.linestart = True
         self.tok = self.GetToken()
 
         self.globals = self.BuildTempGlobalsTable()
@@ -2436,6 +2499,7 @@ list lazy_list_set(list L, integer i, list v)
         # Restart
 
         self.pos = self.errorpos = 0
+        self.linestart = True
         self.tok = self.GetToken()
 
         # Reserve spots at the beginning for functions we add
@@ -2471,6 +2535,8 @@ list lazy_list_set(list L, integer i, list v)
         self.constants = {}
         self.funclibrary = {}
 
+        self.parse_directive_re = None
+
         # Library read code
 
         parse_lin_re = re.compile(