LSL-PyOptimizer/lslopt/lslparse.py
Sei Lisa cef3e626a9 Associate identifiers with their direct location.
Instead of returning the scope level at which search can start, return the scope level at which the symbol actually is.
2014-07-27 01:49:44 +02:00

1913 lines
77 KiB
Python

from lslcommon import Key, Vector, Quaternion
import lslfuncs
import sys, re
# Note this module was basically written from bottom to top, which may help
# reading it.
def warning(txt):
assert type(txt) == unicode
sys.stderr.write(txt + u'\n')
def isdigit(c):
return '0' <= c <= '9'
def isalpha_(c):
return c == '_' or 'A' <= c <= 'Z' or 'a' <= c <= 'z'
def isalphanum_(c):
return isalpha_(c) or isdigit(c)
def ishex(c):
return '0' <= c <= '9' or 'A' <= c <= 'F' or 'a' <= c <= 'f'
def fieldpos(inp, sep, n):
"Return the starting position of field n in a string inp that has zero or more fields separated by sep"
i = -1
for n in xrange(n):
i = inp.find(sep, i + 1)
if i < 0:
return i
return i + 1
class EParse(Exception):
def __init__(self, parser, msg):
lno = parser.script.count('\n', 0, parser.errorpos)
cno = parser.errorpos - fieldpos(parser.script, '\n', lno)
# Note the column number reported is in bytes.
msg = u"(Line %d char %d): ERROR: %s" % (lno + 1, cno + 1, msg)
super(EParse, self).__init__(msg)
class EParseUEOF(EParse):
def __init__(self, parser):
parser.errorpos = len(parser.script)
super(self.__class__, self).__init__(parser, u"Unexpected EOF")
class EParseSyntax(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Syntax error")
class EParseAlreadyDefined(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Name previously declared within scope")
class EParseUndefined(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Name not defined within scope")
class EParseUnexpected(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Unexpected internal error")
class EParseTypeMismatch(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Type mismatch")
class EParseReturnShouldBeEmpty(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Return statement type doesn't match function return type")
class EParseReturnIsEmpty(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Function returns a value but return statement doesn't")
# This error message may sound funny, for good reasons.
class EParseInvalidField(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Use of vector or quaternion method on incorrect type")
class EParseFunctionMismatch(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Function type mismatches type or number of arguments")
class EParseDeclarationScope(EParse):
def __init__(self, parser):
super(self.__class__, self).__init__(parser, u"Declaration requires a new scope -- use { and }")
class EInternal(Exception):
"""This exception is a construct to allow a different function to cause an
immediate return of EOF from parser.GetToken(). Reused elsewhere for
detecting parsing errors.
"""
pass
# This table is to save memory in the parse tree in interpreters that don't
# intern strings.
S = ('integer','float','string','key','vector','rotation','quaternion','list',
'IDENT','x','y','z','s','CAST','<','<=','>=','>','CONSTANT','VECTOR',
'ROTATION','LIST','PRINT','FUNCTION','FIELD','EXPR','V++','V--','=',
'+=','-=','*=','/=','%=','&=','|=','^=','<<=','>>=','NEG','!','~','++V',
'--V','()','*','/','%','+','-','<<','>>','==','!=','&','^','|','&&','||',
'@','JUMP','STATE','RETURN','IF','WHILE','DO','FOR','DECL','{}',';',
'Label','State','TRUE','FALSE','default','DEFAULT'
)
S = {i:i for i in S}
class parser(object):
assignment_ops = frozenset(('=', '+=', '-=', '*=', '/=', '%='))
extassignment_ops = frozenset(('|=', '&=', '^=', '<<=', '>>='))
double_ops = frozenset(('++', '--', '+=', '-=', '*=', '/=', '%=', '==',
'!=', '>=', '<=', '&&', '||', '<<', '>>'))
extdouble_ops = frozenset(('|=', '&=', '^='))
# These are hardcoded because additions or modifications imply
# important changes to the code anyway.
keywords = frozenset((S['default'], 'state', 'event', 'jump', 'return', 'if',
'else', 'for', 'do', 'while', 'print', S['TRUE'], S['FALSE']))
types = frozenset((S['integer'],S['float'],S['string'],S['key'],S['vector'],
S['quaternion'],S['rotation'],S['list']))
# Default values per type when declaring variables
DefaultValues = {S['integer']: 0, S['float']: 0.0, S['string']: u'',
S['key']: Key(u''), S['vector']: lslfuncs.ZERO_VECTOR,
S['rotation']: lslfuncs.ZERO_ROTATION, S['list']: []
}
PythonType2LSL = {int: S['integer'], float: S['float'],
unicode: S['string'], Key: S['key'], Vector: S['vector'],
Quaternion: S['rotation'], list: S['list']}
PythonType2LSLToken = {int:'INTEGER_VALUE', float:'FLOAT_VALUE',
unicode:'STRING_VALUE', Key:'KEY_VALUE', Vector:'VECTOR_VALUE',
Quaternion:'ROTATION_VALUE', list:'LIST_VALUE'}
def PushScope(self):
"""Create a new symbol table / scope level"""
self.symtab.append({-1: self.scopeindex})
self.scopeindex = len(self.symtab)-1
def PopScope(self):
"""Return to the previous scope level"""
self.scopeindex = self.symtab[self.scopeindex][-1]
if self.scopeindex is None:
raise EParseUnexpected(self)
def FindSymbolPartial(self, symbol, MustBeLabel = False):
"""Find a symbol in all visible scopes in order.
Labels have special scope rules: other identifiers with the same
name that are not labels are invisible to JUMP statements. Example:
default{timer(){ @x; {integer x; jump x;} }}
finds the label at the outer block. However:
default{timer(){ @x; integer x; }}
gives an identifier already defined error. On the other hand, labels
hide other types (but that's dealt with in the caller to this function):
default{timer(){ integer a; { @a; a++; } }}
gives an Name Not Defined error.
"""
scope = self.scopeindex
while scope is not None:
symtab = self.symtab[scope]
if symbol in symtab and (not MustBeLabel or symtab[symbol][1] == 'Label'):
return symtab[symbol]
scope = symtab[-1] # it's a dict, not a list; -1 is a key
return None
# No labels or states allowed here (but functions are)
def FindSymbolFull(self, symbol):
"""Returns either a string with the LSL type, or a tuple if it's a
function.
"""
scope = self.scopeindex
while scope:
symtab = self.symtab[scope]
if symbol in symtab:
# This can't happen, as functions can't be local
#if len(symtab[symbol]) > 3:
# return (symtab[symbol][1], symtab[symbol][3])
return symtab[symbol][1]
scope = symtab[-1]
if symbol not in self.globals:
return None
return self.globals[symbol]
def FindScopeIndex(self, symbol, MustBeLabel = False):
"""Same as FindSymbolPartial, but stops at globals, and returns scope
level instead of symbol table entry.
"""
scope = self.scopeindex
while scope:
symtab = self.symtab[scope]
if symbol in symtab and (not MustBeLabel or symtab[symbol][1] == 'Label'):
return scope
scope = symtab[-1]
return scope
def ValidateField(self, typ, field):
if typ == 'vector' and field in ('x', 'y', 'z') \
or typ == 'rotation' and field in ('x', 'y', 'z', 's'):
return
raise EParseInvalidField(self)
def order(self):
self.dictorder += 1
return self.dictorder
def autocastcheck(self, value, typ):
"""Check if automatic dynamic cast is possible, and insert it if
requested explicitly.
"""
if value[1] == typ:
return value
if value[1] in ('string', 'key') and typ in ('string', 'key') \
or value[1] == 'integer' and typ == 'float':
if self.explicitcast:
return [S['CAST'], S[typ], value]
return value
raise EParseTypeMismatch(self)
def ueof(self):
"Check for unexpected EOF"
if self.pos >= self.length:
raise EParseUEOF(self)
def ceof(self):
"Check for normal EOF"
if self.pos >= self.length:
raise EInternal() # force GetToken to return EOF
def GetToken(self):
"Lexer"
# Keep track of the current position. If an error occurs, it will happen at the start of this token.
self.errorpos = self.pos
try:
while self.pos < self.length:
c = self.script[self.pos]
self.pos += 1
# Process comments
if c == '/':
if self.script[self.pos:self.pos+1] == '/':
self.pos += 1
self.ceof()
while self.script[self.pos] != '\n':
self.pos += 1
self.ceof() # A single-line comment at EOF is not unexpected EOF.
self.pos += 1
self.ceof()
continue
elif self.script[self.pos:self.pos+1] == '*':
self.pos += 2
while self.script[self.pos-1:self.pos+1] != '*/':
self.pos += 1
self.ueof() # An unterminated multiline comment *is* unexpected EOF.
self.pos += 1
self.ceof()
continue
# Process strings
if c == '"' or c == 'L' and self.script[self.pos:self.pos+1] == '"':
strliteral = ''
if c == 'L':
self.pos += 1
strliteral = '"'
while self.script[self.pos:self.pos+1] != '"':
self.ueof()
if self.script[self.pos] == '\\':
self.pos += 1
self.ueof()
if self.script[self.pos] == 'n':
strliteral += '\n'
elif self.script[self.pos] == 't':
strliteral += ' '
else:
strliteral += self.script[self.pos]
else:
strliteral += self.script[self.pos]
self.pos += 1
self.pos += 1
return ('STRING_VALUE', strliteral.decode('utf8'))
if isalpha_(c):
# Identifier or reserved
ident = c
while isalphanum_(self.script[self.pos:self.pos+1]):
ident += self.script[self.pos]
self.pos += 1
# Got an identifier - check if it's a reserved word
if ident in self.keywords:
return (ident.upper(),)
if ident in self.types:
if ident == 'quaternion':
ident = 'rotation' # Normalize types
return ('TYPE',ident)
if ident in self.events:
return ('EVENT_NAME',ident)
if ident in self.constants:
value = self.constants[ident]
return (self.PythonType2LSLToken[type(value)], value)
return ('IDENT', ident)
# Process numbers: float, hex integer, dec integer
if c == '.' or isdigit(c):
number = ''
if c != '.':
# We have a digit, which means we have for sure either
# an integer or a float.
# Eat as many decimal digits as possible
number = c
while isdigit(self.script[self.pos:self.pos+1]):
number += self.script[self.pos]
self.pos += 1
if number == '0' and self.script[self.pos:self.pos+1] in ('x','X') \
and ishex(self.script[self.pos+1:self.pos+2]):
# We don't need the 0x prefix.
self.pos += 1
# Eat leading zeros to know the real length.
while self.script[self.pos:self.pos+1] == '0':
self.pos += 1
number = ''
while ishex(self.script[self.pos:self.pos+1]):
if len(number) < 9: # don't let it grow more than necessary
number += self.script[self.pos]
self.pos += 1
if number == '':
# We know there was at least a valid digit so it
# must've been all zeros.
number = '0'
if len(number) > 8:
number = -1
else:
number = lslfuncs.S32(int(number, 16))
return ('INTEGER_VALUE', number)
# Add the dot if present
if self.script[self.pos:self.pos+1] == '.':
number += '.'
self.pos += 1
else:
number = c
while isdigit(self.script[self.pos:self.pos+1]):
number += self.script[self.pos]
self.pos += 1
# At this point, number contains as many digits as there are before the dot,
# the dot if present, and as many digits as there are after the dot.
if number != '.': # A dot alone can't be a number so we rule it out here.
exp = ''
if self.script[self.pos:self.pos+1] in ('e','E'):
epos = self.pos # Temporary position tracker, made permanent only if the match succeeds
exp = self.script[epos]
epos += 1
if self.script[epos:epos+1] in ('+','-'):
exp += self.script[epos]
epos += 1
if isdigit(self.script[epos:epos+1]):
# Now we *do* have an exponent.
exp += self.script[epos]
epos += 1
while isdigit(self.script[epos:epos+1]):
exp += self.script[epos]
epos += 1
self.pos = epos # "Commit" the new position
else:
exp = '' # No cigar. Rollback and backtrack. Invalidate exp.
if exp != '' or '.' in number: # Float
if '.' in number:
# Eat the 'F' if present
if self.script[self.pos:self.pos+1] in ('f','F'):
# Python doesn't like the 'F' so don't return it
#exp += self.script[self.pos]
self.pos += 1
return ('FLOAT_VALUE', lslfuncs.F32(float(number + exp)))
if len(number) > 10 or len(number) == 10 and number > '4294967295':
number = -1
else:
number = lslfuncs.S32(int(number))
return ('INTEGER_VALUE', number)
if self.script[self.pos-1:self.pos+1] in self.double_ops \
or self.extendedassignment and self.script[self.pos-1:self.pos+1] in self.extdouble_ops:
self.pos += 1
if self.extendedassignment and self.script[self.pos-2:self.pos+1] in ('<<=', '>>='):
self.pos += 1
return (self.script[self.pos-3:self.pos],)
return (self.script[self.pos-2:self.pos],)
if c in '.;{},=()-+*/%@:<>[]&|^~!' and c != '':
return (c,)
# We eat spacers AND any other character so the following is not needed,
# although the lex file includes it (the lex file does not count() invalid characters
# for the purpose of error reporting).
#if c in ' \n\r\x0B':
# continue
except EInternal:
pass # clear the exception and fall through
return ('EOF',)
def NextToken(self):
"""Calls GetToken and sets the internal token."""
self.tok = self.GetToken()
# Recursive-descendent parser. The result is a symbol table.
def expect(self, toktype):
"""Raise exception if the current token is not the given one."""
if self.tok[0] != toktype:
if self.tok[0] == 'EOF':
raise EParseUEOF(self)
raise EParseSyntax(self)
def Parse_vector_rotation_tail(self):
"""(See Parse_unary_postfix_expression for context)
To our advantage, the precedence of the closing '>' in a vector or
rotation literal is that of an inequality. Our strategy will thus be
to perform the job of an inequality, calling the lower level 'shift'
rule and building the inequalities if they are not '>'. When we find a
'>', we check whether the next token makes sense as beginning an
inequality; if not, we finally close the vector or rotation.
But first, a quaternion _may_ have a full expression at the third
component, so we tentatively parse this position as an expression, and
backtrack if it causes an error. This is the only point where this
parser backtracks.
"""
ret = []
pos = self.pos
errorpos = self.errorpos
tok = self.tok
try:
ret.append(self.Parse_expression())
# Checking here for '>' might parse a different grammar, because
# it might allow e.g. <1,2,3==3>; as a vector, which is not valid.
# Not too sure about that, but we're cautious and disable this
# just in case.
#if self.tok[0] == '>':
# return ret
self.expect(',')
self.NextToken()
except EParseSyntax:
# Backtrack
self.pos = pos
self.errorpos = errorpos
self.tok = tok
# OK, here we are.
inequality = self.Parse_shift() # shift is the descendant of inequality
while self.tok[0] in ('<', '<=', '>=', '>'):
op = self.tok[0]
self.NextToken()
if op == '>':
# Check if the current token can be a part of a comparison.
# If not, it's a vector/quaternion terminator.
if self.tok[0] not in (
# List adapted from this section of the bison report:
#state 570
#
# 176 expression: expression '>' . expression
# 214 quaternion_initializer: '<' expression ',' expression ',' expression ',' expression '>' .
'IDENT', 'INTEGER_VALUE', 'FLOAT_VALUE', 'STRING_VALUE',
'KEY_VALUE', 'VECTOR_VALUE', 'ROTATION_VALUE', 'LIST_VALUE',
'TRUE', 'FALSE', '++', '--', 'PRINT', '!', '~', '(', '['
):
ret.append(inequality)
return ret
# This is basically a copy/paste of the Parse_inequality handler
type1 = inequality[1]
if type1 not in ('integer', 'float'):
raise EParseTypeMismatch(self)
value = self.Parse_shift()
type2 = value[1]
if type2 not in ('integer', 'float'):
raise EParseTypeMismatch(self)
if type1 != type2:
if type2 == 'float':
inequality = self.autocastcheck(inequality, type2)
else:
value = self.autocastcheck(value, type1)
inequality = [S[op], S['integer'], inequality, value]
# Reaching this means an operator or lower precedence happened,
# e.g. <1,1,1,2==2> (that's syntax error in ==)
raise EParseSyntax(self)
def Parse_unary_postfix_expression(self, AllowAssignment = True):
"""Grammar parsed here:
unary_postfix_expression: INTEGER_VALUE | FLOAT_VALUE
| STRING_VALUE | KEY_VALUE | VECTOR_VALUE | ROTATION_VALUE
| LIST_VALUE | TRUE | FALSE | vector_literal | rotation_literal | list_literal
| PRINT '(' expression ')' | IDENT '(' expression_list ')'
| lvalue '++' | lvalue '--' | assignment %if allowed
| lvalue
vector_literal: '<' expression ',' expression ',' expression '>'
rotation_literal: '<' expression ',' expression ',' expression
',' expression '>'
list_literal: '[' optional_expression_list ']'
assignment: lvalue '=' expression | lvalue '+=' expression
| lvalue '-=' expression | lvalue '*=' expression
| lvalue '/=' expression | lvalue '%=' expression
%EXTENDED RULES:
| lvalue '|=' expression | lvalue '&=' expression
| lvalue '<<=' expression | lvalue '>>=' expression
lvalue: IDENT | IDENT '.' IDENT
"""
tok0 = self.tok[0]
val = self.tok[1] if len(self.tok) > 1 else None
self.NextToken()
CONSTANT = S['CONSTANT']
if tok0 == '-' and self.tok[0] in ('INTEGER_VALUE', 'FLOAT_VALUE'):
tok0 = self.tok[0]
val = self.tok[1]
self.NextToken()
return [CONSTANT, S['integer' if type(val) == int else 'float'], -val]
if tok0 == 'INTEGER_VALUE':
return [CONSTANT, S['integer'], val]
if tok0 == 'FLOAT_VALUE':
return [CONSTANT, S['float'], val]
if tok0 == 'STRING_VALUE':
if self.allowmultistrings:
while self.tok[0] == 'STRING_VALUE':
val += self.tok[1]
self.NextToken()
return [CONSTANT, S['string'], val]
# Key constants are not currently supported - use string
#if tok0 == 'KEY_VALUE':
# return [CONSTANT, S['key'], val]
if tok0 == 'VECTOR_VALUE':
return [CONSTANT, S['vector'], val]
if tok0 == 'ROTATION_VALUE':
return [CONSTANT, S['rotation'], val]
if tok0 == 'LIST_VALUE':
return [CONSTANT, S['list'], val]
if tok0 in ('TRUE', 'FALSE'):
return [CONSTANT, S['integer'], 1 if tok0 == 'TRUE' else 0]
if tok0 == '<':
val = [self.Parse_expression()]
self.expect(',')
self.NextToken()
val.append(self.Parse_expression())
self.expect(',')
self.NextToken()
# It would be cute if it were this simple:
#val.append(self.Parse_expression())
#if self.tok[0] == '>':
# self.NextToken()
# return [S['VECTOR'], S['vector']] + val
#self.expect(',')
#self.NextToken()
#val.append(self.Parse_inequality())
#self.expect('>')
#self.NextToken()
#return [S['ROTATION'], S['rotation']] + val
# Alas, it isn't. The closing angle bracket of a vector '>'
# conflicts with the inequality operator '>' in unexpected ways.
# Example: <2,2,2> * 2 will trigger the problem:
# the expression parser tries to parse the inequality 2 > *2,
# choking at the *. To make things worse, LSL admits things such as
# <2,2,2 > 2> (but not things like <2,2,2 == 2> because the == has
# lower precedence than the '>' and thus it forces termination of
# the vector constant). And to make things even worse, it also
# admits things such as <2,2,2 == 2, 2> because the comma is not in
# the precedence scale, so it's quite complex to handle.
# We defer it to a separate function.
val += self.Parse_vector_rotation_tail()
if len(val) == 3:
return [S['VECTOR'], S['vector']] + val
return [S['ROTATION'], S['rotation']] + val
if tok0 == '[':
val = self.Parse_optional_expression_list()
self.expect(']')
self.NextToken()
return [S['LIST'], S['list']] + val
if tok0 == 'PRINT':
self.expect('(')
self.NextToken()
val = self.Parse_expression()
if val[1] not in self.types:
raise EParseTypeMismatch(self) if val[1] is None else EParseUndefined(self)
self.expect(')')
self.NextToken()
return [S['PRINT'], None, val]
if tok0 != 'IDENT':
if tok0 == 'EOF':
raise EParseUEOF(self)
raise EParseSyntax(self)
typ = self.FindSymbolFull(val)
if typ is None:
raise EParseUndefined(self)
# Note this may fail to do interning of the string from the symbol table.
# Doing so with a dictionary key may affect performance.
name = val
# Course of action decided here.
tok0 = self.tok[0]
if tok0 == '(':
# Function call
self.NextToken()
if type(typ) != tuple:
raise EParseUndefined(self)
args = self.Parse_optional_expression_list(typ[1])
self.expect(')')
self.NextToken()
return [S['FUNCTION'], None if typ[0] is None else S[typ[0]], name, args, self.scopeindex]
if typ not in self.types:
raise EParseTypeMismatch(self)
typ = S[typ]
lvalue = [S['IDENT'], typ, name, self.FindScopeIndex(name)]
if tok0 == '.':
self.NextToken()
self.expect('IDENT')
self.ValidateField(typ, self.tok[1])
lvalue = [S['FIELD'], S['float'], lvalue, S[self.tok[1]]]
self.NextToken()
tok0 = self.tok[0]
if tok0 in ('++', '--'):
self.NextToken()
if lvalue[1] not in ('integer', 'float'):
raise EParseTypeMismatch(self)
return [S['V'+tok0], lvalue[1], lvalue]
if AllowAssignment and (tok0 in self.assignment_ops
or self.extendedassignment and tok0 in self.extassignment_ops):
self.NextToken()
expr = self.Parse_expression()
rtyp = expr[1]
if rtyp not in self.types:
raise EParseTypeMismatch(self)
if typ in ('integer', 'float'):
# LSL admits integer *= float (go figger).
# It acts like: lhs = (integer)((float)lhs * rhs)
# That would trigger an error without this check.
if tok0 != '*=' or typ == 'float':
expr = self.autocastcheck(expr, typ)
rtyp = typ
# Lots of drama for checking types. This is pretty much like
# addition, subtraction, multiply, divide, etc. all in one go.
if tok0 == '=':
if typ == 'list' != rtyp:
if self.explicitcast:
expr = [S['CAST'], typ, expr]
else:
expr = self.autocastcheck(expr, typ)
return [S['='], typ, lvalue, expr]
if tok0 == '+=':
if typ == 'float':
expr = self.autocastcheck(expr, typ)
if rtyp != typ != 'list' or typ == rtyp == 'key':
# key + key is the only disallowed combo of equals
raise EParseTypeMismatch(self)
if self.explicitcast:
if typ == 'list' != rtyp:
expr = [S['CAST'], S[typ], expr]
return [S[tok0], typ, lvalue, expr]
if tok0 == '-=':
if typ == rtyp in ('integer', 'float', 'vector', 'rotation'):
return [S[tok0], typ, lvalue, expr]
raise EParseTypeMismatch(self)
if tok0 in ('*=', '/='):
# There is a special case dealt with in advance.
if tok0 == '*=' and typ == 'integer' and rtyp == 'float':
return [S[tok0], typ, lvalue, expr]
if (typ == rtyp or typ == 'vector') and rtyp in ('integer', 'float', 'rotation'):
if typ == 'vector' and rtyp == 'integer':
expr = self.autocastcheck(expr, 'float')
return [S[tok0], typ, lvalue, expr]
raise EParseTypeMismatch(self)
if tok0 == '%=':
if typ == rtyp in ('integer', 'vector'):
return [S[tok0], typ, lvalue, expr]
# Rest take integer operands only
if typ == rtyp == 'integer':
return [S[tok0], typ, lvalue, expr]
return lvalue
def Parse_unary_expression(self, AllowAssignment = True):
"""Grammar parsed here:
unary_expression: '-' factor | '!' unary_expression | '~' unary_expression
# we expand lvalue here to facilitate parsing
| '++' IDENT | '++' IDENT '.' IDENT
| '--' IDENT | '--' IDENT '.' IDENT
| '(' TYPE ')' typecast_expression | '(' expression ')'
| unary_postfix_expression
%NORMAL RULES ONLY:
typecast_expression: '(' expression ')' | unary_postfix_expression %except assignment
%EXTENDED RULES ONLY:
typecast_expression: unary_expression %except assignment
"""
tok0 = self.tok[0]
if tok0 == '-':
# Unary minus
self.NextToken()
value = self.Parse_factor()
if value[1] not in ('integer', 'float', 'vector', 'rotation'):
raise EParseTypeMismatch(self)
return [S['NEG'], value[1], value]
if tok0 in ('!', '~'):
# Unary logic and bitwise NOT - applies to integers only
self.NextToken()
value = self.Parse_unary_expression()
if value[1] != 'integer':
raise EParseTypeMismatch(self)
return [S[tok0], S['integer'], value]
if tok0 in ('++', '--'):
# Pre-increment / pre-decrement
self.NextToken()
self.expect('IDENT')
name = self.tok[1]
typ = self.FindSymbolFull(name)
if typ not in self.types:
# Pretend it doesn't exist
raise EParseUndefined(self)
typ = S[typ]
ret = [S['IDENT'], typ, name, self.FindScopeIndex(name)]
self.NextToken()
if self.tok[0] == '.':
self.NextToken()
self.expect('IDENT')
self.ValidateField(typ, self.tok[1])
ret = [S['FIELD'], S['float'], ret, S[self.tok[1]]]
self.NextToken()
typ = ret[1]
if typ not in ('integer', 'float'):
raise EParseTypeMismatch(self)
return [S[tok0+'V'], typ, ret]
if tok0 == '(':
# Parenthesized expression or typecast
self.NextToken()
if self.tok[0] != 'TYPE':
# Parenthesized expression
expr = self.Parse_expression()
self.expect(')')
self.NextToken()
return [S['()'], expr[1], expr]
# Typecast
typ = S[self.tok[1]]
self.NextToken()
self.expect(')')
self.NextToken()
if self.extendedtypecast:
# Allow any unary expression (except assignment). The type cast
# acts as a prefix operator.
expr = self.Parse_unary_expression(AllowAssignment = False)
else:
if self.tok[0] == '(':
self.NextToken()
expr = self.Parse_expression()
self.expect(')')
self.NextToken()
expr = [S['()'], expr[1], expr]
else:
expr = self.Parse_unary_postfix_expression(AllowAssignment = False)
basetype = expr[1]
if typ == 'list' and basetype in self.types \
or basetype in ('integer', 'float') and typ in ('integer', 'float', 'string') \
or basetype == 'string' and typ in self.types \
or basetype == 'key' and typ in ('string', 'key') \
or basetype == 'vector' and typ in ('string', 'vector') \
or basetype == 'rotation' and typ in ('string', 'rotation') \
or basetype == 'list' and typ == 'string':
return [S['CAST'], typ, expr]
raise EParseTypeMismatch(self)
# Must be a postfix expression.
return self.Parse_unary_postfix_expression(AllowAssignment)
def Parse_factor(self):
"""Grammar parsed here:
factor: unary_expression | factor '*' unary_expression
| factor '/' unary_expresssion | factor '%' unary_expression
"""
factor = self.Parse_unary_expression()
while self.tok[0] in ('*', '/', '%'):
op = self.tok[0]
type1 = factor[1]
# Acceptable types for LHS
if op in ('*', '/') and type1 not in ('integer', 'float',
'vector', 'rotation') \
or op == '%' and type1 not in ('integer', 'vector'):
raise EParseTypeMismatch(self)
self.NextToken()
value = self.Parse_unary_expression()
type2 = value[1]
# Mod is easier to check for
if op == '%' and type1 != type2:
raise EParseTypeMismatch(self)
if op == '%' or type1 == type2 == 'integer':
# Deal with the special cases first (it's easy)
factor = [S[op], S[type1], factor, value]
else:
# Any integer must be promoted to float now
if type1 == 'integer':
type1 = 'float'
factor = self.autocastcheck(factor, type1)
if type2 == 'integer':
type2 = 'float'
value = self.autocastcheck(value, type2)
if type1 == 'float' and type2 in ('float', 'vector') \
or type1 == 'vector' and type2 in ('float', 'vector', 'rotation') \
or type1 == type2 == 'rotation':
if op == '/' and type2 == 'vector':
# Division by vector isn't valid
raise EParseTypeMismatch(self)
# The rest are valid
if type1 == 'float' and type2 == 'vector':
resulttype = type2
elif type1 == type2 == 'vector':
resulttype = 'float'
else:
resulttype = type1
factor = [S[op], S[resulttype], factor, value]
else:
raise EParseTypeMismatch(self)
return factor
def Parse_term(self):
"""Grammar parsed here:
term: factor | term '+' factor | term '-' factor
"""
term = self.Parse_factor()
while self.tok[0] in ('+', '-'):
op = self.tok[0]
type1 = term[1]
if op == '+' and type1 not in self.types \
or op == '-' and type1 not in ('integer', 'float',
'vector', 'rotation'):
raise EParseTypeMismatch(self)
self.NextToken()
value = self.Parse_factor()
type2 = value[1]
# This is necessary, but the reason is subtle.
# The types must match in principle (except integer/float), so it
# doesn't seem necessary to check type2. But there's the case
# where the first element is a list, where the types don't need to
# match but the second type must make sense.
if op == '+' and type2 not in self.types:
#or op == '-' and type2 not in ('integer', 'float',
# 'vector', 'rotation'):
raise EParseTypeMismatch(self)
# Isolate the additions where the types match to make our life easier later
if op == '+' and (type1 == type2 or type1 == 'list' or type2 == 'list'):
if type1 == type2 == 'key':
# key + key is the only disallowed combo of equals
raise EParseTypeMismatch(self)
if self.explicitcast:
if type1 == 'list' != type2:
value = [S['CAST'], S[type1], value]
#type2 = type1 # unused
elif type2 == 'list' != type1:
term = [S['CAST'], S[type2], term]
type1 = type2
term = [S[op], S[type1], term, value]
# Note that although list + nonlist is semantically the same as
# list + (list)nonlist and same goes for nonlist + list, they
# don't compile to the same thing, but the optimizer should deal
# with typecast removal anyway.
elif self.allowkeyconcat and op == '+' \
and type1 in ('key', 'string') and type2 in ('key', 'string'):
# Allow string+key addition (but add explicit cast)
if type1 == 'key':
term = [S[op], S[type2], [S['CAST'], S[type2], term], value]
else:
term = [S[op], S[type1], term, [S['CAST'], S[type1], value]]
elif type1 == 'key' or type2 == 'key':
# Only list + key or key + list is allowed, otherwise keys can't
# be added or subtracted with anything.
raise EParseTypeMismatch(self)
else:
if type1 == 'float':
# Promote value to float
term = [S[op], S[type1], term, self.autocastcheck(value, type1)]
else:
# Convert LHS to type2 if possible (note no keys arrive here)
term = [S[op], S[type2], self.autocastcheck(term, type2), value]
return term
def Parse_shift(self):
"""Grammar parsed here:
shift: term | shift '<<' term | shift '>>' term
"""
shift = self.Parse_term()
while self.tok[0] in ('<<', '>>'):
if shift[1] != 'integer':
raise EParseTypeMismatch(self)
op = self.tok[0]
self.NextToken()
shift = [S[op], S['integer'], shift , self.Parse_term()]
if shift[3][1] != 'integer':
raise EParseTypeMismatch(self)
return shift
def Parse_inequality(self):
"""Grammar parsed here:
inequality: shift | inequality '<' shift | inequality '<=' shift
| inequality '>' shift | inequality '>=' shift
"""
inequality = self.Parse_shift()
while self.tok[0] in ('<', '<=', '>', '>='):
op = self.tok[0]
type1 = inequality[1]
if type1 not in ('integer', 'float'):
raise EParseTypeMismatch(self)
self.NextToken()
value = self.Parse_shift()
type2 = value[1]
if type2 not in ('integer', 'float'):
raise EParseTypeMismatch(self)
if type1 != type2:
if type2 == 'float':
inequality = self.autocastcheck(inequality, type2)
else:
value = self.autocastcheck(value, type1)
inequality = [S[op], S['integer'], inequality, value]
return inequality
def Parse_comparison(self):
"""Grammar parsed here:
comparison: inequality | comparison '==' inequality
| comparison '!=' inequality
"""
comparison = self.Parse_inequality()
while self.tok[0] in ('==', '!='):
op = self.tok[0]
type1 = comparison[1]
if type1 not in self.types:
raise EParseTypeMismatch(self)
self.NextToken()
value = self.Parse_inequality()
type2 = value[1]
if type1 == 'float':
value = self.autocastcheck(value, type1)
else:
# For string & key, RHS (type2) mandates the conversion
# (that's room for optimization: always compare strings)
comparison = self.autocastcheck(comparison, type2)
comparison = [S[op], S['integer'], comparison, value]
return comparison
def Parse_bitbool_factor(self):
"""Grammar parsed here:
bitbool_factor: comparison | bitbool_factor '&' comparison
"""
bitbool_factor = self.Parse_comparison()
while self.tok[0] == '&':
if bitbool_factor[1] != 'integer':
raise EParseTypeMismatch(self)
op = self.tok[0]
self.NextToken()
bitbool_factor = [S[op], S['integer'], bitbool_factor, self.Parse_comparison()]
if bitbool_factor[3][1] != 'integer':
raise EParseTypeMismatch(self)
return bitbool_factor
def Parse_bitxor_term(self):
"""Grammar parsed here:
bitxor_term: bitbool_factor | bitxor_term '^' bitbool_factor
"""
bitxor_term = self.Parse_bitbool_factor()
while self.tok[0] == '^':
if bitxor_term[1] != 'integer':
raise EParseTypeMismatch(self)
op = self.tok[0]
self.NextToken()
bitxor_term = [S[op], S['integer'], bitxor_term, self.Parse_bitbool_factor()]
if bitxor_term[3][1] != 'integer':
raise EParseTypeMismatch(self)
return bitxor_term
def Parse_bitbool_term(self):
"""Grammar parsed here:
bitbool_term: bitxor_term | bitbool_term '|' bitxor_term
"""
bitbool_term = self.Parse_bitxor_term()
while self.tok[0] == '|':
if bitbool_term[1] != 'integer':
raise EParseTypeMismatch(self)
op = self.tok[0]
self.NextToken()
bitbool_term = [S[op], S['integer'], bitbool_term, self.Parse_bitxor_term()]
if bitbool_term[3][1] != 'integer':
raise EParseTypeMismatch(self)
return bitbool_term
def Parse_expression(self):
"""Grammar parsed here:
expression: bitbool_term | expression '||' bitbool_term
| expression '&&' bitbool_term
Most operators with same priority, in general, are executed in
right-to-left order but calculated with precedence left-to-right.
That is, the tree is generated LTR but traversed RTL (in post-order).
E.g. a-b+c is calculated (in RPN notation) as: c, b, a, swap, -, +
i.e. c is evaluated first and a last, but the operation is still (a-b)+c
which is normal LTR.
At this point we're just constructing the tree, so we follow normal
precedence rules.
"""
expression = self.Parse_bitbool_term()
while self.tok[0] in ('&&', '||'):
if expression[1] != 'integer':
raise EParseTypeMismatch(self)
op = self.tok[0]
self.NextToken()
expression = [S[op], S['integer'], expression, self.Parse_bitbool_term()]
if expression[3][1] != 'integer':
raise EParseTypeMismatch(self)
return [S['EXPR'], expression[1], expression]
def Parse_optional_expression_list(self, expected_types = None):
"""Grammar parsed here:
optional_expression_list: LAMBDA | expression_list
expression_list: expression | expression_list ',' expression
"""
# This is a maze of which we get out with a dirty hack.
# optional_expression_list is used by FOR statements (closed by ';' or ')'),
# list constants (closed by ']') and function arguments (closed by ')').
# If it's not the right token, we'll err anyway, in Parse_expression or
# upon return.
ret = []
idx = 0
if self.tok[0] not in (']', ')', ';'):
while True:
val = self.Parse_expression()
if expected_types is not None:
if idx >= len(expected_types):
raise EParseFunctionMismatch(self)
try:
val = self.autocastcheck(val, expected_types[idx]);
except EParseTypeMismatch:
raise EParseFunctionMismatch(self)
else:
if val[1] not in self.types:
raise EParseTypeMismatch(self)
idx += 1
ret.append(val)
if self.tok[0] != ',':
break
self.NextToken()
if expected_types is not None and idx != len(expected_types):
raise EParseFunctionMismatch(self)
return ret
def Parse_statement(self, ReturnType, AllowDecl = False):
"""Grammar parsed here:
statement: ';' | single_statement | code_block
single_statement: if_statement | while_statement | do_statement
| for_statement | jump_statement | state_statement | label_statement
| return_statement | declaration_statement | expression ';'
if_statement: IF '(' expression ')' statement ELSE statement
| IF '(' expression ')' statement
while_statement: WHILE '(' expression ')' statement
do_statement: DO statement WHILE '(' expression ')' ';'
for_statement: FOR '(' optional_expression_list ';' expression ';'
optional_expression_list ')' statement
jump_statement: JUMP IDENT ';'
state_statement: STATE DEFAULT ';' | STATE IDENT ';'
label_statement: '@' IDENT ';'
return_statement: RETURN ';' | RETURN expression ';'
declaration_statement: TYPE lvalue ';' | TYPE lvalue '=' expression ';'
There's a restriction: a *single* statement can not be a declaration.
"""
tok0 = self.tok[0]
if tok0 == '{':
return self.Parse_code_block(ReturnType)
if tok0 == ';':
self.NextToken()
return [';', None]
if tok0 == '@':
self.NextToken()
self.expect('IDENT')
name = self.tok[1]
if name in self.symtab[self.scopeindex]:
raise EParseAlreadyDefined(self)
self.symtab[self.scopeindex][name] = (self.order(), S['Label'])
self.NextToken()
self.expect(';')
self.NextToken()
return [S['@'], None, name]
if tok0 == 'JUMP':
self.NextToken()
self.expect('IDENT')
name = self.tok[1]
tmp = self.FindSymbolPartial(name, MustBeLabel=True)
if not tmp or tmp[1] != 'Label':
# It might still be a forward reference, so we add it to the
# list of things to look up when done
self.jump_lookups.append((name, self.scopeindex, self.errorpos))
self.NextToken()
self.expect(';')
self.NextToken()
return [S['JUMP'], None, ['IDENT', S['Label'], name, self.FindScopeIndex(name, MustBeLabel=True)]]
if tok0 == 'STATE':
self.NextToken()
if self.tok[0] not in ('DEFAULT', 'IDENT'):
raise EParseSyntax(self)
# States are only searched in the global scope
name = self.tok[1] if self.tok[0] == 'IDENT' else 'default'
if name not in self.symtab[0] and (name not in self.globals or self.globals[name] != 'State'):
raise EParseUndefined(self)
self.NextToken()
self.expect(';')
self.NextToken()
return [S['STATE'], None,
[S['IDENT'], S['State'], name, 0] if name != 'default' else S['DEFAULT']]
if tok0 == 'RETURN':
self.NextToken()
if self.tok[0] == ';':
value = None
else:
value = self.Parse_expression()
self.expect(';')
self.NextToken()
if ReturnType is None and value is not None:
raise EParseReturnShouldBeEmpty(self)
if ReturnType is not None and value is None:
raise EParseReturnIsEmpty(self)
if value is None:
return [S['RETURN'], None, None]
return [S['RETURN'], None, self.autocastcheck(value, ReturnType)]
if tok0 == 'IF':
self.NextToken()
self.expect('(')
self.NextToken()
condition = self.Parse_expression()
self.expect(')')
self.NextToken()
then_branch = self.Parse_statement(ReturnType)
else_branch = None
if self.tok[0] == 'ELSE':
self.NextToken()
else_branch = self.Parse_statement(ReturnType)
return [S['IF'], None, condition, then_branch] + ([else_branch] if else_branch is not None else [])
if tok0 == 'WHILE':
self.NextToken()
self.expect('(')
self.NextToken()
condition = self.Parse_expression()
self.expect(')')
self.NextToken()
return [S['WHILE'], None, condition, self.Parse_statement(ReturnType)]
if tok0 == 'DO':
self.NextToken()
stmt = self.Parse_statement(ReturnType)
self.expect('WHILE')
self.NextToken()
self.expect('(')
self.NextToken()
condition = self.Parse_expression()
self.expect(')')
self.NextToken()
self.expect(';')
self.NextToken()
return [S['DO'], None, stmt, condition]
if tok0 == 'FOR':
self.NextToken()
self.expect('(')
self.NextToken()
initializer = self.Parse_optional_expression_list()
self.expect(';')
self.NextToken()
condition = self.Parse_expression()
self.expect(';')
self.NextToken()
iterator = self.Parse_optional_expression_list()
self.expect(')')
self.NextToken()
stmt = self.Parse_statement(ReturnType)
return [S['FOR'], None, initializer, condition, iterator, stmt]
if tok0 == 'TYPE':
if not AllowDecl:
raise EParseDeclarationScope(self)
typ = S[self.tok[1]]
self.NextToken()
self.expect('IDENT')
name = self.tok[1]
if name in self.symtab[self.scopeindex]:
raise EParseAlreadyDefined(self)
self.NextToken()
value = None
if self.tok[0] == '=':
self.NextToken()
value = self.Parse_expression()
self.expect(';')
self.NextToken()
self.symtab[self.scopeindex][name] = (self.order(), typ, value)
return [S['DECL'], None, name, self.scopeindex]
# If none of the above, it must be an expression.
value = self.Parse_expression()
self.expect(';')
self.NextToken()
return value
def Parse_code_block(self, ReturnType):
"""Grammar parsed here:
code_block: '{' statements '}'
statements: LAMBDA | statements statement
It receives the return type to expect for return statements.
"""
self.expect('{')
self.NextToken()
self.PushScope()
ret = [S['{}'], None]
while True:
if self.tok[0] == '}':
break
ret.append(self.Parse_statement(ReturnType, AllowDecl = True))
self.PopScope()
self.expect('}')
self.NextToken()
return ret
def Parse_simple_expr(self, List=False):
"""Grammar parsed here:
simple_expr: simple_expr_except_list | list_simple_expr
simple_expr_except_list: STRING_VALUE | KEY_VALUE | VECTOR_VALUE
| ROTATION_VALUE | TRUE | FALSE | number_value
| '<' simple_expr ',' simple_expr ',' simple_expr '>'
| '<' simple_expr ',' simple_expr ',' simple_expr ',' simple_expr '>'
number_value: FLOAT_VALUE | INTEGER_VALUE | '-' FLOAT_VALUE | '-' INTEGER_VALUE
list_simple_expr: '[' ']' | '[' list_simple_expr_items ']'
list_simple_expr_items: simple_expr_except_list
| list_simple_expr_items ',' simple_expr_except_list
"""
tok = self.tok
self.NextToken()
if tok[0] == 'TRUE': # TRUE and FALSE don't admit sign in globals
return 1
if tok[0] == 'FALSE':
return 0
if tok[0] in ('STRING_VALUE', 'KEY_VALUE', 'VECTOR_VALUE', 'ROTATION_VALUE', 'LIST_VALUE'):
val = tok[1]
if tok[0] == 'STRING_VALUE' and self.allowmultistrings:
while self.tok[0] == 'STRING_VALUE':
val += self.tok[1]
self.NextToken()
return val
if tok[0] == 'IDENT':
tmp = self.FindSymbolPartial(tok[1])
if tmp is None or len(tmp) > 3 or tmp[1] not in self.types:
raise EParseUndefined(self)
#return tmp[2]
return (S['IDENT'], S[tmp[1]], tok[1], self.FindScopeIndex(tok[1]))
if tok[0] == '<':
value = [self.Parse_simple_expr()]
self.autocastcheck((0, self.PythonType2LSL[type(value[0])]), 'float')
self.expect(',')
self.NextToken()
value.append(self.Parse_simple_expr())
self.autocastcheck((0, self.PythonType2LSL[type(value[1])]), 'float')
self.expect(',')
self.NextToken()
value.append(self.Parse_simple_expr())
self.autocastcheck((0, self.PythonType2LSL[type(value[2])]), 'float')
if self.tok[0] == '>':
self.NextToken()
return Vector(value)
self.expect(',')
self.NextToken()
value.append(self.Parse_simple_expr())
self.autocastcheck((0, self.PythonType2LSL[type(value[3])]), 'float')
self.expect('>')
self.NextToken()
return Quaternion(value)
if tok[0] == '[' and not List:
value = []
if self.tok[0] == ']':
self.NextToken()
return value
while True:
value.append(self.Parse_simple_expr(List=True))
if self.tok[0] == ']':
self.NextToken()
return value
self.expect(',')
self.NextToken()
neg = False
if tok[0] == '-':
neg = True
tok = self.tok
self.NextToken()
if tok[0] not in ('INTEGER_VALUE', 'FLOAT_VALUE'):
raise EParseSyntax(self)
if neg:
if tok[0] == 'INTEGER_VALUE':
if tok[1] == -2147483648:
return -2147483648
return -tok[1]
return tok[1]
def Parse_optional_param_list(self):
"""Grammar parsed here:
optional_param_list: LAMBDA | param_list
param_list: TYPE IDENT | param_list ',' TYPE IDENT
"""
ret = []
if self.tok[0] == 'TYPE':
while True:
typ = S[self.tok[1]]
self.NextToken()
self.expect('IDENT')
name = self.tok[1]
ret.append(name)
if name in self.symtab[self.scopeindex]:
raise EParseAlreadyDefined(self)
self.symtab[self.scopeindex][name] = (self.order(), typ, None) # Value is not predefined
self.NextToken()
if self.tok[0] != ',':
break
self.NextToken()
self.expect('TYPE')
return tuple(ret)
def Parse_events(self):
"""Grammar parsed here:
events: event | events event
event: EVENT_NAME '(' optional_parameter_list ')' code_block
"""
self.expect('EVENT_NAME') # mandatory
ret = {}
while self.tok[0] == 'EVENT_NAME':
name = self.tok[1]
self.NextToken()
self.expect('(')
self.NextToken()
# Function parameters go to a dedicated symbol table.
self.PushScope()
params = self.Parse_optional_param_list()
# NOTE: Parse_events: This is a bit crude, as the error is given at the end of the param list.
# To do it correctly, we can pass the parameter list to Parse_optional_param_list().
if tuple(self.symtab[self.scopeindex][x][1] for x in params) != self.events[name]:
raise EParseSyntax(self)
self.expect(')')
self.NextToken()
value = tuple(self.Parse_code_block(None))
ret[name] = (self.order(), None, value, params, self.scopeindex)
self.PopScope()
return ret
def Parse_globals(self):
"""Grammar parsed here:
globals: LAMBDA | globals var_def | globals func_def
var_def: TYPE IDENT ';' | TYPE IDENT '=' simple_expr ';'
func_def: optional_type IDENT '(' optional_param_list ')' code_block
optional_type: LAMBDA | TYPE
"""
while self.tok[0] in ('TYPE','IDENT'):
typ = None
if self.tok[0] == 'TYPE':
typ = S[self.tok[1]]
self.NextToken()
self.expect('IDENT')
name = self.tok[1]
if name in self.symtab[self.scopeindex]:
raise EParseAlreadyDefined(self)
self.NextToken()
if self.tok[0] == '=' or self.tok[0] == ';':
# This is a variable definition
if typ is None: # Typeless variables are not allowed
raise EParseSyntax(self)
if self.tok[0] == '=':
self.NextToken()
if self.extendedglobalexpr:
value = tuple(self.Parse_expression()) # Use advanced expression evaluation.
else:
value = self.Parse_simple_expr() # Use LSL's dull global expression.
self.expect(';')
self.NextToken()
else: # must be semicolon
self.NextToken()
value = None
if value is not None:
if type(value) != tuple and not self.extendedglobalexpr:
self.autocastcheck((0, self.PythonType2LSL[type(value)]), typ)
else:
self.autocastcheck(value, typ)
self.symtab[self.scopeindex][name] = (self.order(), typ, value)
elif self.tok[0] == '(':
# This is a function definition
self.NextToken()
self.PushScope() # Parameter names don't conflict with globals.
params = self.Parse_optional_param_list()
self.expect(')')
self.NextToken()
value = tuple(self.Parse_code_block(typ))
paramscope = self.scopeindex
self.PopScope()
self.symtab[self.scopeindex][name] = (self.order(), typ, value, params, paramscope)
else:
raise EParseSyntax(self)
pass
def Parse_states(self):
"""Grammar parsed here:
states: LAMBDA | states state
state: state_header '{' events '}'
state_header: DEFAULT | STATE IDENT
(but we enforce DEFAULT to be the first token found, meaning there will
be at least one state and the first must be DEFAULT as in the original
grammar)
"""
self.expect('DEFAULT')
while True:
if self.tok[0] != 'DEFAULT' and self.tok[0] != 'STATE':
return
if self.tok[0] == 'DEFAULT':
name = S['default']
else:
self.NextToken()
if self.tok[0] != 'IDENT':
raise EParseSyntax(self)
name = self.tok[1]
if name in self.symtab[self.scopeindex]:
raise EParseAlreadyDefined(self)
self.symtab[self.scopeindex][name] = (self.order(), S['State']) # to expand later
self.NextToken()
self.expect('{')
self.NextToken()
events = self.Parse_events()
self.expect('}')
self.symtab[self.scopeindex][name] += (events,)
self.NextToken()
def Parse_script(self):
"""Parses the whole LSL script
Grammar parsed here:
script: globals states EOF
"""
self.Parse_globals()
self.Parse_states()
self.expect('EOF')
# Check the pending jump targets
for tgt in self.jump_lookups:
self.scopeindex = tgt[1]
if self.FindSymbolPartial(tgt[0], MustBeLabel = True) is None:
self.errorpos = tgt[2]
raise EParseUndefined(self)
def BuildTempGlobalsTable(self):
"""Build an approximate globals table.
If the script syntax is correct, the globals table will be accurate.
If it is not, it may contain too many or too few symbols (normally the
latter). This globals table is not the normal globals in the symbol
table; it's just needed to resolve which names are declared at all as
globals and their type. It's temporary.
The grammar is approximately:
script: globals states
globals: [global [global [...]]]
global: [TYPE] IDENT '(' TYPE anytoken [',' TYPE anytoken [...]]
anytoken_except_comma balanced_braces_or_anything_else
| TYPE IDENT [anytoken_except_semicolon [...]] ';'
states: state [state [...]]
state: (DEFAULT | STATE IDENT) balanced_braces_or_anything_else
"""
ret = self.functions.copy() # The library functions go here too.
# If there's a syntax error, that's not our business. We just return
# what we have so far. Doing a proper parse will determine the exact
# location and cause.
# Here we don't even care if it's duplicate - that will be caught
# when adding to the real symbol table.
# Scan globals
try:
while self.tok[0] not in ('DEFAULT', 'EOF'):
typ = None
if self.tok[0] == 'TYPE':
typ = S[self.tok[1]]
self.NextToken()
if self.tok[0] != 'IDENT':
return ret
name = self.tok[1]
self.NextToken()
if self.tok[0] == '(':
# Function call
self.NextToken()
params = []
if self.tok[0] != ')':
while True:
if self.tok[0] != 'TYPE':
return ret
params.append(S[self.tok[1]])
self.NextToken()
self.NextToken() # not interested in parameter names
if self.tok[0] != ',':
break
self.NextToken()
self.NextToken()
if self.tok[0] != '{':
return ret
self.NextToken() # Enter the first brace
bracelevel = 1
while bracelevel and self.tok[0] != 'EOF':
if self.tok[0] == '{':
bracelevel += 1
elif self.tok[0] == '}':
bracelevel -= 1
self.NextToken()
ret[name] = (typ, tuple(params))
elif typ is None:
return ret # A variable needs a type
else:
ret[name] = typ
while self.tok[0] != ';': # Don't stop to analyze what's before the ending ';'
self.NextToken()
self.NextToken()
except EParseUEOF:
return ret
# Scan states
while True:
if self.tok[0] not in ('DEFAULT', 'STATE'):
return ret # includes EOF i.e. this is the normal return
if self.tok[0] == 'STATE':
self.NextToken()
if self.tok[0] != 'IDENT':
return ret
name = self.tok[1]
else:
name = S['default']
ret[name] = S['State']
self.NextToken()
if self.tok[0] != '{':
return ret
self.NextToken() # Enter the first brace
bracelevel = 1
while bracelevel and self.tok[0] != 'EOF':
if self.tok[0] == '{':
bracelevel += 1
elif self.tok[0] == '}':
bracelevel -= 1
self.NextToken()
def parse(self, script, options = frozenset()):
"""Parse the given stream with the given options.
This function also builds the temporary globals table.
"""
self.script = script
self.length = len(script)
# Extended expressions in globals (needs support from the optimizer to work)
self.extendedglobalexpr = 'extendedglobalexpr' in options
# Extended typecast syntax (typecast as a regular unary operator)
self.extendedtypecast = 'extendedtypecast' in options
# Extended assignment operators: |= &= <<= >>=
self.extendedassignment = 'extendedassignment' in options
# Add explicit type casts when implicit (the output module takes care of
# the correctness of the output)
self.explicitcast = 'explicitcast' in options
# Allow string + key = string and key + string = string
self.allowkeyconcat = 'allowkeyconcat' in options
# Allow C style string composition of strings: "blah" "blah" = "blahblah"
self.allowmultistrings = 'allowmultistrings' in options
# TODO: Allow pure C-style string parsing. This is low-priority.
#self.allowcescapes = 'allowcescapes' in options
# TODO: Enable switch statements.
#self.enableswitch = 'enableswitch' in options
# TODO: Enable brackets for list elements e.g. (float)mylist[3], or mylist[5]=4
#self.lazylists = 'lazylists' in options
del options # no longer needed
# Symbol table:
# This is a list of all local and global symbol tables.
# The first element (0) is the global scope. Each symbol table is a
# dictionary. Element -1 of the dictionary is the parent index. The
# entries are lists of three or four values. The first one is the
# order; the second is the type, the third is the value, and if it's
# a function, the fourth is the parameter list. Functions contain a
# parse tree as their value.
self.symtab = [{-1: None}]
self.scopeindex = 0
self.dictorder = 0
# Globals and labels can be referenced before they are defined. That
# includes states.
#
# Our first approach was going to be to build a list that keeps track of
# undefined references, to check them after parsing. But that has a big
# problem: expressions need to know the types of the arguments in order
# to give appropriate errors if they don't suit the operand, and in
# order to mark and check the types appropriately. But we don't know the
# types of the globals that haven't been found yet. Therefore, sticking
# to this approach would mean to scan the tree for every expression with
# a pending reference, fixing up every node upstream with the correct
# type with the possibility to find a type mismatch in a place for which
# we have no location info.
#
# For that reason, we change the strategy. We still don't want to do
# two full or almost full passes of the parser, nitpicking on every
# detail. But given LSL's structure, it's relatively easy to do a fast
# incomplete parsing pass, gathering globals with their types and
# function arguments. And that's what we do.
self.pos = 0
self.tok = self.GetToken()
self.globals = self.BuildTempGlobalsTable()
# We need a table of undefined jump references anyway, to check later,
# as jumps are local, not global, and allow forward definitions.
self.jump_lookups = []
# Restart
self.pos = 0
self.tok = self.GetToken()
# Start the parsing proper
self.Parse_script()
del self.globals # No longer needed. The data that is not in self.functions is in self.symtab[0].
del self.jump_lookups # Already used.
#while self.tok[0] != 'EOF':
# print self.tok
# self.NextToken()
#for n in xrange(len(self.symtab)):
# print n, '{',
# i = self.symtab[n]
# for j in sorted(i.items(), key=lambda k: -1 if k[0]==-1 else k[1][0]):
# print repr(j[0]) + ':' + repr(j[1]) + ',',
# print '}'
return self.symtab
def parsefile(self, filename, options = set()):
"""Convenience function to parse a file"""
f = open(filename, 'rb')
try:
script = f.read()
finally:
f.close()
return self.parse(script, options)
def __init__(self):
"""Reads the library."""
self.events = {}
self.constants = {}
self.functions = {}
# Library read code
parse_lin_re = re.compile(
r'^\s*(event|void|integer|float|string|key|vector|quaternion|rotation|list)\s+'
r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\s*('
r'(?:integer|float|string|key|vector|quaternion|rotation|list)\s+[a-zA-Z_][a-zA-Z0-9_]*'
r'(?:\s*,\s*(?:integer|float|string|key|vector|quaternion|rotation|list)\s+[a-zA-Z_][a-zA-Z0-9_]*)*'
r')?\s*\)\s*$'
r'|'
r'^\s*const\s+(integer|float|string|key|vector|quaternion|rotation|list)'
r'\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.*?)\s*$'
r'|'
r'^\s*(?:#.*|//.*)?$')
parse_arg_re = re.compile(r'^\s*([a-z]+)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*$')
parse_num_re = re.compile(r'^\s*(-?(?=[0-9]|\.[0-9])[0-9]*((?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?))\s*$')
parse_str_re = re.compile(ur'^"((?:[^"\\]|\\.)*)"$')
f = open('builtins.txt', 'rb')
try:
while True:
line = f.readline()
if not line: break
match = parse_lin_re.match(line)
if not match:
warning(u'Syntax error in builtins.txt: ' + line.decode('utf8'))
continue
if match.group(1):
# event or function
typ = match.group(1)
if typ == 'quaternion':
typ = 'rotation'
if typ == 'void':
typ = None
args = []
arglist = match.group(3)
if arglist:
arglist = arglist.split(',')
for arg in arglist:
args.append(parse_arg_re.match(arg).group(1))
name = match.group(2)
if typ == 'event':
if name in self.events:
warning(u'Event already defined in bultins.txt, overwriting: ' + name.decode('utf8'))
self.events[name] = tuple(args)
else:
# Library functions go to the functions table. If
# they are implemented in lslfuncs.*, they get a
# reference to the implementation; otherwise None.
if name in self.functions:
warning(u'Function already defined in bultins.txt, overwriting: ' + name.decode('utf8'))
self.functions[name] = (typ, tuple(args), getattr(lslfuncs, name, None))
elif match.group(4):
# constant
name = match.group(5)
if name in self.constants:
warning(u'Global already defined in bultins.txt, overwriting: ' + name.decode('utf8'))
try:
typ = match.group(4)
if typ == 'quaternion':
typ = 'rotation'
val = match.group(6)
if typ == 'integer':
val = int(val, 0)
elif typ == 'float':
val = lslfuncs.F32(float(val))
elif typ == 'string':
val = val.decode('utf8')
if not parse_str_re.match(val):
raise EInternal
esc = False
tmp = val[1:-1]
val = u''
for c in tmp:
if esc:
if c == u'n':
c = u'\n'
elif c == u't':
c = u' '
val += c
esc = False
elif c == u'\\':
esc = True
else:
val += c
#if typ == 'key':
# val = Key(val)
elif typ == 'key':
warning(u'Key constants not supported: ' + line.decode('utf8'))
val = None
elif typ in ('vector', 'rotation'):
if val[0:1] != '<' or val[-1:] != '>':
raise ValueError
val = val[1:-1].split(',')
if len(val) != (3 if typ == 'vector' else 4):
raise ValueError
num = parse_num_re.match(val[0])
if not num:
raise ValueError
val[0] = lslfuncs.F32(float(num.group(1)))
num = parse_num_re.match(val[1])
if not num:
raise ValueError
val[1] = lslfuncs.F32(float(num.group(1)))
num = parse_num_re.match(val[2])
if not num:
raise ValueError
val[2] = lslfuncs.F32(float(num.group(1)))
if typ != 'vector':
num = parse_num_re.match(val[3])
if not num:
raise ValueError
val[3] = lslfuncs.F32(float(num.group(1)))
val = Quaternion(val)
else:
val = Vector(val)
else:
assert typ == 'list'
warning(u'List constants not supported: ' + line.decode('utf8'))
val = None
if val is not None:
self.constants[name] = val
except EInternal:
warning(u'Invalid string in builtins.txt: ' + line.decode('utf8'))
except ValueError:
warning(u'Invalid vector syntax in builtins.txt: ' + line.decode('utf8'))
finally:
f.close()