From 6a0eebf157ba426675c2f0bbbfcffa8db0f932d0 Mon Sep 17 00:00:00 2001 From: Sei Lisa Date: Sun, 8 Mar 2015 16:53:58 +0100 Subject: [PATCH] Implement a couple subtleties of the lexer in the handling of strings. First, a string does not have its own context, it's a regular expression, therefore if there isn't a matching closing quote until EOF, it's not an unterminated string. It plainly *isn't* a string. Second, the string lexer searches for either the regexp '[^"\\]' (any character except a double quote or a backslash) or the regexp '\\.', that is, a backslash followed by an "any character" meta. But the "any character" meta does NOT match a newline. The consequence is that a '\' followed by a newline cancels the matching, and the whole thing is not considered a string. Isolated double quotes are then plain ignored as many other characters. An example illustrating both cases: ``` default { state_entry() { string msg = "Hello, Avatar!"; llOwnerSay"(msg); // the opening quote will be ignored // the following backslash at EOL cancels the previous double quote: \ llOwnerSay(llGetKey(")); // Unterminated string; unmatched so ignored. } } ``` --- lslopt/lslparse.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lslopt/lslparse.py b/lslopt/lslparse.py index e362f2c..fbee4c0 100644 --- a/lslopt/lslparse.py +++ b/lslopt/lslparse.py @@ -356,8 +356,16 @@ class parser(object): self.pos += 1 strliteral = '"' + savepos = self.pos # we may need to backtrack + is_string = True # by default + while self.script[self.pos:self.pos+1] != '"': - self.ueof() + # per the grammar, on EOF, it's not considered a string + if self.pos >= self.length: + self.pos = savepos + is_string = False + break + if self.script[self.pos] == '\\': self.pos += 1 self.ueof() @@ -365,14 +373,21 @@ class parser(object): strliteral += '\n' elif self.script[self.pos] == 't': strliteral += ' ' + elif self.script[self.pos] == '\n': + # '\' followed by a newline; it's not a string. + self.pos = savepos + is_string = False + break else: strliteral += self.script[self.pos] else: strliteral += self.script[self.pos] self.pos += 1 - self.pos += 1 - return ('STRING_VALUE', strliteral.decode('utf8')) + if is_string: + self.pos += 1 + return ('STRING_VALUE', strliteral.decode('utf8')) + # fall through (to consider the L or to ignore the ") if isalpha_(c): # Identifier or reserved