From 6a0eebf157ba426675c2f0bbbfcffa8db0f932d0 Mon Sep 17 00:00:00 2001
From: Sei Lisa <sei-lisa@email.fake>
Date: Sun, 8 Mar 2015 16:53:58 +0100
Subject: [PATCH] Implement a couple subtleties of the lexer in the handling of
 strings.

First, a string does not have its own context, it's a regular expression, therefore if there isn't a matching closing quote until EOF, it's not an unterminated string. It plainly *isn't* a string.

Second, the string lexer searches for either the regexp '[^"\\]' (any character except a double quote or a backslash) or the regexp '\\.', that is, a backslash followed by an "any character" meta. But the "any character" meta does NOT match a newline. The consequence is that a '\' followed by a newline cancels the matching, and the whole thing is not considered a string.

Isolated double quotes are then plain ignored as many other characters.

An example illustrating both cases:
```
default
{
    state_entry()
    {
        string msg = "Hello, Avatar!";
        llOwnerSay"(msg); // the opening quote will be ignored
        // the following backslash at EOL cancels the previous double quote: \
        llOwnerSay(llGetKey(")); // Unterminated string; unmatched so ignored.
    }
}
```
---
 lslopt/lslparse.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py
index e362f2c..fbee4c0 100644
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@@ -356,8 +356,16 @@ class parser(object):
                         self.pos += 1
                         strliteral = '"'
 
+                    savepos = self.pos # we may need to backtrack
+                    is_string = True # by default
+
                     while self.script[self.pos:self.pos+1] != '"':
-                        self.ueof()
+                        # per the grammar, on EOF, it's not considered a string
+                        if self.pos >= self.length:
+                            self.pos = savepos
+                            is_string = False
+                            break
+
                         if self.script[self.pos] == '\\':
                             self.pos += 1
                             self.ueof()
@@ -365,14 +373,21 @@ class parser(object):
                                 strliteral += '\n'
                             elif self.script[self.pos] == 't':
                                 strliteral += '    '
+                            elif self.script[self.pos] == '\n':
+                                # '\' followed by a newline; it's not a string.
+                                self.pos = savepos
+                                is_string = False
+                                break
                             else:
                                 strliteral += self.script[self.pos]
                         else:
                             strliteral += self.script[self.pos]
                         self.pos += 1
 
-                    self.pos += 1
-                    return ('STRING_VALUE', strliteral.decode('utf8'))
+                    if is_string:
+                        self.pos += 1
+                        return ('STRING_VALUE', strliteral.decode('utf8'))
+                    # fall through (to consider the L or to ignore the ")
 
                 if isalpha_(c):
                     # Identifier or reserved