# (C) Copyright 2015-2019 Sei Lisa. All rights reserved. # # This file is part of LSL PyOptimizer. # # LSL PyOptimizer is free software: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # LSL PyOptimizer is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LSL PyOptimizer. If not, see . # # This file includes an excerpt from PCPP, by Niall Douglas and David # Beazley. PCPP is available here: https://github.com/ned14/pcpp and # distributed under the following conditions: # # (C) Copyright 2018-2019 Niall Douglas http://www.nedproductions.biz/ # (C) Copyright 2007-2019 David Beazley http://www.dabeaz.com/ # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # (End of terms and conditions for the PCPP excerpt) # # The particular excerpt used is this one: # https://github.com/ned14/pcpp/blob/e1219ce157b4dfcfee3181faa6ec5129c3a41e78/pcpp/preprocessor.py#L873-L935 # # The following fragments of code are hereby irrevokably donated to the # public domain: # - The Evaluator class in its entirety. # - The evalexpr method in its entirety except for the excerpt mentioned # above, which remains copyright of its authors. # - Every line between this one and the Evaluator class. import sys, os, re, copy oldsyspath = sys.path sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'pcpp')) from pcpp import preprocessor path = oldsyspath # Define the number of bits to work with in expression evaluation # (per the standard, this should be the bits in uintmax_t). INTMAXBITS = 64 UINTMAX_MAX = (1 << INTMAXBITS) - 1 INTMAX_MIN = -(1 << (INTMAXBITS - 1)) DSYMBOLS = {'->', '-=', '--', '==', '<<', '<=', '>>', '>=', '||', '|=', '&&', '&=', '!=', '^=', '*=', '/=', '%=', '+=', '++'} DIGRAPHS = {'<:':'[', ':>':']', '<%':'{', '%>':'}', '%:':'#'} ESCAPES = {'a':7,'b':8,'f':12,'n':10,'r':13,'t':9,'v':11, '"':34, '\\':92, '\'':39, '?':63} # Exception to report an evaluation error class EvalError(Exception): pass class uint(long): pass class sint(long): pass class Evaluator(object): """Recursive descendent parser to evaluate C preprocessor expressions.""" # Int parser resolve_int_regex = re.compile( # Group 1: Hex # Group 2: Oct # Group 3: Dec # Group 4: Unsigned r'^(?:(0x[0-9a-f]+)|(0[0-7]*)|([1-9][0-9]*))' r'(?:(u(?:ll?)?|(?:ll?)?u)|(?:ll?)?)$', re.I | re.S) # Char parser (without the quotes) ctoken_regex = re.compile( r'\\(?:' r'[\?' r"'" r'"\\abfnrtv]|[Xx][0-9a-fA-F]+|[0-7]{1,3}' r'|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}' r')' r'|.', re.S) def __init__(self, tokens): assert tokens, "Empty tokens list???" self.tokens = tokens self.ptr = 0 self.evaluating = True self.conv = {uint: self.to_uint, sint: self.to_sint} self.nextToken() def to_uint(self, i): return uint(i & UINTMAX_MAX) def to_sint(self, i): return sint(((i - INTMAX_MIN) & UINTMAX_MAX) + INTMAX_MIN) def nextToken(self): """Sets self.token to the next token and advances the token pointer. Skips whitespace tokens. Returns a CPP_WS token with value '\n' if there's no next token. Returns synthesized tokens for multichar tokens not currently handled by PCPP. """ try: while True: tok = self.token = self.tokens[self.ptr] self.ptr += 1 if tok.type != 'CPP_WS' or '\n' in tok.value: break except IndexError: # Synthesize a new CPP_WS token with a newline, to signal # end-of-text (we copy it from the last one in the token stream). self.token = copy.copy(self.tokens[-1]) self.token.type = 'CPP_WS' self.token.value = '\n' return # Work around a lexing problem in PCPP # # PCPP doesn't tokenize multichar tokens except ##, so we do that job # here, to ease processing and report more errors (e.g. 5--3 should be # reported as an error because it uses the post-decrement operator, # instead of evaluating to 8, which is the correct result for 5- -3). # The tokens processed here are those in the C standard missed by PCPP: # -> -= -- << <= >> >= || |= && &= == != ^= *= /= += ++ %= # >>= <<= # ... # <: :> <% %> %: # %:%: # # This is already a single token, therefore it's not processed here: # ## try: next = self.tokens[self.ptr] except IndexError: return s = tok.type + next.type if s in DSYMBOLS: tok = self.token = copy.copy(tok) tok.type = s tok.value += next.value self.ptr += 1 if s in ('<<', '>>'): # check for <<= >>= try: next2 = self.tokens[self.ptr] if next2.type == '=': tok.type += next2.type tok.value += next2.value self.ptr += 1 except IndexError: pass return if s in DIGRAPHS: # digraph or DPOUND tok = self.token = copy.copy(tok) tok.type = DIGRAPHS[s] tok.value += next.value self.ptr += 1 try: next2 = self.tokens[self.ptr] next3 = self.tokens[self.ptr + 1] if next2.type == '%' and next3.type == ':': tok.type = '##' tok.value += next2.value + next3.value self.ptr += 2 except IndexError: pass return if s == '..': try: next2 = self.tokens[self.ptr + 1] if next2.type == '.': tok = self.token = copy.copy(tok) tok.type = '...' tok.value += next.value + next2.value self.ptr += 2 except IndexError: pass return def eat(self, *toktypes): """Return True and advance pointer if the current token matches. """ if self.token.type in toktypes: self.nextToken() return True return False def expect(self, toktype): """Checks an expected token and eats it""" expect = toktype if toktype == 'END' and '\n' in self.token.value: expect = 'CPP_WS' if not self.eat(expect): raise EvalError( "Unexpected token %s (%s) in expression, expected %s" % (repr(self.token.value), self.token.type, toktype)) def conversions(self, op1, op2): """Perform usual arithmetic conversions on two operands.""" assert type(op1) in (sint, uint) and type(op2) in (sint, uint) if type(op1) != type(op2): return uint(op1), uint(op2) return op1, op2 def primary_expression(self, evaluating): """Non-terminal: primary_expression. primary_expression: IDENTIFIER | STRING_LITERAL | CHAR_LITERAL | INTEGER | '(' expression ')' """ tok = self.token if self.eat('('): ret = self.expression(evaluating) self.expect(')') return ret #if self.eat('CPP_STRING'): # return tok.value if self.eat('CPP_CHAR'): charstr = tok.value unicode = False if tok.value.startswith('L'): unicode = True charstr = charstr[2:-1] else: charstr = charstr[1:-1] onechar = False for ctok in self.ctoken_regex.finditer(charstr): if onechar: raise EvalError("Multiple characters in char literal") onechar = True c = ctok.group(0) if c == '\\': raise EvalError("Invalid escape sequence in char literal") if c.startswith('\\'): if c.startswith('\\u') or c.startswith('\\U'): result = int(c[2:], 16) if ((result < 0xA0 and result not in (0x24,0x40,0x60)) or 0xD800 <= result <= 0xDFFF ): raise EvalError("Invalid universal character %s" % c) if result > 0xFF and not unicode: raise EvalError("Char literal out of range") elif c.startswith('\\x') or c.startswith('\\X'): result = int(c[2:], 16) if result > 0xFF: raise EvalError("Hex literal out of range") elif c[1] in ESCAPES: result = ESCAPES[c[1]] else: result = int(c[1:], 8) else: assert len(c) == 1 and c != '\'' return ord(c) # This may need reconsideration if INTMAXBITS is < 22 (the bits # necessary to fit a Unicode codepoint in a signed integer). return sint(result) # our char is unsigned if tok.type == 'CPP_ID': tok = self.token = copy.copy(tok) tok.type = 'CPP_INTEGER' tok.value = '0' # fall through to process it as CPP_INTEGER if self.eat('CPP_INTEGER'): m = self.resolve_int_regex.search(tok.value) if not m: raise EvalError("Invalid integer literal") val = (int(m.group(2), 8) if m.group(2) else int(m.group(1) or m.group(3), 0)) val = self.to_uint(val) if m.group(4) else self.to_sint(val) return val if tok.type == 'CPP_STRING': raise EvalError("Strings are not allowed in expressions") if tok.type == 'CPP_WS' and '\n' in tok.value: raise EvalError('Unexpected end of expression') self.expect('CPP_INTEGER') def factor_expression(self, evaluating): """Non-terminal: factor_expression factor_expression: primary_expression | unary_operator factor_expression """ # Avoid recursing for unary operators. Apply them post-evaluation. k = None while True: toktype = self.token.type if self.eat('-', '+', '~', '!') and toktype != '+': k = k or [] k.append(toktype) else: break result = self.primary_expression(evaluating) while k: operation = k.pop() if operation == '!': result = sint(0 if result else 1) else: result = self.conv[type(result)](-result if operation == '-' else ~result) return result def term_expression(self, evaluating): """Non-terminal: term_expression term_expression: factor_expression | term_expression '*' factor_expression | term_expression '/' factor_expression | term_expression '%' factor_expression """ result = self.factor_expression(evaluating) while True: toktype = self.token.type if not self.eat('*', '/', '%'): return result operand = self.factor_expression(evaluating) if evaluating and operand == 0 and toktype != '*': raise EvalError("Division by zero") result, operand = self.conversions(result, operand) result = self.conv[type(result)](result if not evaluating else result * operand if toktype == '*' else result // operand if toktype == '/' else result % operand) def arithmetic_expression(self, evaluating): """Non-terminal: arithmetic_expression arithmetic_expression: term_expression | arithmetic_expression '+' term_expression | arithmetic_expression '-' term_expression """ result = self.term_expression(evaluating) while True: toktype = self.token.type if not self.eat('+', '-'): return result operand = self.term_expression(evaluating) result, operand = self.conversions(result, operand) result = self.conv[type(result)](result + operand if toktype == '+' else result - operand) def shift_expression(self, evaluating): """Non-terminal: shift_expression shift_expression: arithmetic_expression | shift_expression '<<' arithmetic_expression | shift_expression '>>' arithmetic_expression """ result = self.arithmetic_expression(evaluating) while True: tok = self.token if not self.eat('<<', '>>'): return result operand = self.arithmetic_expression(evaluating) # We don't want a too large intermediate result, to prevent DoS result = self.conv[type(result)](result << min(operand, INTMAXBITS) if tok.type == '<<' else result >> max(operand, 0)) def relational_expression(self, evaluating): """Non-terminal: relational_expression relational_expression: shift_expression | relational_expression '>' shift_expression | relational_expression '<' shift_expression | relational_expression '>=' shift_expression | relational_expression '<=' shift_expression """ result = self.shift_expression(evaluating) while True: tok = self.token if not self.eat('<', '>', '<=', '>='): return result operand = self.shift_expression(evaluating) result, operand = self.conversions(result, operand) # Use the fact that a < b <-> b > a # Use the fact that a < b <-> !(a >= b) if tok.type == '>' or tok.type == '<=': result, operand = operand, result result = sint(1 if (result < operand) == (tok.type in ('<', '>')) else 0) def equality_expression(self, evaluating): """Non-terminal: equality_expression equality_expression: relational_expression | equality_expression '==' relational_expression | equality_expression '!=' relational_expression """ result = self.relational_expression(evaluating) while True: tok = self.token if not self.eat('==', '!='): return result operand = self.relational_expression(evaluating) result, operand = self.conversions(result, operand) result = sint(1 if (result == operand) == (tok.type == '==') else 0) def bitwise_and_expression(self, evaluating): """Non-terminal: bitwise_and_expression bitwise_and_expression: equality_expression | bitwise_and_expression '&' equality_expression """ result = self.equality_expression(evaluating) while True: if not self.eat('&'): return result operand = self.equality_expression(evaluating) result, operand = self.conversions(result, operand) result = self.conv[type(result)](result & operand) def bitwise_xor_expression(self, evaluating): """Non-terminal: bitwise_xor_expression bitwise_xor_expression: bitwise_and_expression | bitwise_xor_expression '^' bitwise_and_expression """ result = self.bitwise_and_expression(evaluating) while True: if not self.eat('^'): return result operand = self.bitwise_and_expression(evaluating) result, operand = self.conversions(result, operand) result = self.conv[type(result)](result ^ operand) def bitwise_or_expression(self, evaluating): """Non-terminal: bitwise_or_expression bitwise_or_expression: bitwise_xor_expression | bitwise_or_expression '|' bitwise_xor_expression """ result = self.bitwise_xor_expression(evaluating) while True: if not self.eat('|'): return result operand = self.bitwise_xor_expression(evaluating) result, operand = self.conversions(result, operand) result = self.conv[type(result)](result | operand) def logical_and_expression(self, evaluating): """Non-terminal: logical_and_expression logical_and_expression: bitwise_or_expression | logical_and_expression '&&' bitwise_or_expression """ result = self.bitwise_or_expression(evaluating) while True: if not self.eat('&&'): return result evaluating = evaluating and not not result operand = self.bitwise_or_expression(evaluating) result = sint(1 if result and (not evaluating or operand) else 0) def logical_or_expression(self, evaluating): """Non-terminal: logical_or_expression logical_or_expression: logical_and_expression | logical_or_expression '||' logical_and_expression """ result = self.logical_and_expression(evaluating) while True: if not self.eat('||'): return result evaluating = evaluating and not result operand = self.logical_and_expression(evaluating) result = sint(1 if result or (evaluating and operand) else 0) def conditional_expression(self, evaluating): """Non-terminal: conditional_expression. conditional_expression: logical_or_expression | logical_or_expression '?' expression ':' conditional_expression """ result = self.logical_or_expression(evaluating) if self.eat('?'): if result: result = self.expression(evaluating) self.expect(':') operand = self.conditional_expression(False) else: operand = self.expression(False) self.expect(':') result = self.conditional_expression(evaluating) result, operand = self.conversions(result, operand) return result def expression(self, evaluating = True): """Non-terminal: expression. expression: conditional_expression (always) | expression conditional_expression (if not evaluating) """ if evaluating: return self.conditional_expression(evaluating) while True: result = self.conditional_expression(evaluating) if not self.eat(','): return result def evaluate(self): result = self.expression(True) # Did we eat all tokens? self.expect('END') return result class Preproc(preprocessor.Preprocessor): def __init__(self, input, defines=(), sysincpaths=(), incpaths=()): super(Preproc, self).__init__() self.auto_pragma_once_enabled = False for define in defines: self.define('%s %s' % define) for v in sysincpaths: self.add_path(v) for v in incpaths: self.add_path(v) self.ignore = set() self.parser = self.parsegen(input, '', '') def get(self): try: import StringIO except ImportError: import io as StringIO ret = StringIO.StringIO() self.write(ret) return (ret.getvalue(), self.macros) def on_include_not_found(self, is_system_include, curdir, includepath): """Don't pass through the #include line if the file does not exist""" self.on_error(self.lastdirective.source, self.lastdirective.lineno, "Include file not found: %s" % includepath) def evalexpr(self, tokens): """Evaluate a sequence of tokens as an expression. The original uses eval(), which is unsafe for web usage. This one uses our own recursive-descendent parser. """ # **************************************************** # Start of fragment copied from PCPP's preprocessor.py """Evaluate an expression token sequence for the purposes of evaluating integral expressions.""" if not tokens: self.on_error('unknown', 0, "Empty expression") return (0, None) # tokens = tokenize(line) # Search for defined macros evalfuncts = {'defined' : lambda x: True} evalvars = {} def replace_defined(tokens): i = 0 while i < len(tokens): if tokens[i].type == self.t_ID and tokens[i].value == 'defined': j = i + 1 needparen = False result = "0L" while j < len(tokens): if tokens[j].type in self.t_WS: j += 1 continue elif tokens[j].type == self.t_ID: if tokens[j].value in self.macros: result = "1L" else: repl = self.on_unknown_macro_in_defined_expr(tokens[j]) if repl is None: # Add this identifier to a dictionary of variables evalvars[tokens[j].value] = 0 result = 'defined('+tokens[j].value+')' else: result = "1L" if repl else "0L" if not needparen: break elif tokens[j].value == '(': needparen = True elif tokens[j].value == ')': break else: self.on_error(tokens[i].source,tokens[i].lineno,"Malformed defined()") j += 1 if result.startswith('defined'): tokens[i].type = self.t_ID tokens[i].value = result else: tokens[i].type = self.t_INTEGER tokens[i].value = self.t_INTEGER_TYPE(result) del tokens[i+1:j+1] i += 1 return tokens # Replace any defined(macro) before macro expansion tokens = replace_defined(tokens) tokens = self.expand_macros(tokens) # Replace any defined(macro) after macro expansion tokens = replace_defined(tokens) if not tokens: return (0, None) for i,t in enumerate(tokens): if t.type == self.t_ID: repl = self.on_unknown_macro_in_expr(copy.copy(t)) if repl is None: # Add this identifier to a dictionary of variables evalvars[t.value] = 0 else: tokens[i] = t = repl # End of fragment copied from PCPP's preprocessor.py # ************************************************** del evalfuncts # we don't use this evaluator = Evaluator(tokens) try: result = evaluator.evaluate() except EvalError as e: self.on_error(evaluator.token.source, evaluator.token.lineno, e.message) return (0, None) del evaluator return (result, tokens) if evalvars else (result, None)