diff --git a/cpreproc.py b/cpreproc.py index 877fd54..fceff74 100644 --- a/cpreproc.py +++ b/cpreproc.py @@ -14,567 +14,18 @@ # # You should have received a copy of the GNU General Public License # along with LSL PyOptimizer. If not, see . -# -# This file includes an excerpt from PCPP, by Niall Douglas and David -# Beazley. PCPP is available here: https://github.com/ned14/pcpp and -# the fragment used here was distributed under the following conditions: -# -# (C) Copyright 2018-2019 Niall Douglas http://www.nedproductions.biz/ -# (C) Copyright 2007-2019 David Beazley http://www.dabeaz.com/ -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# (End of terms and conditions for the PCPP excerpt) -# -# The particular excerpt used is this one: -# https://github.com/ned14/pcpp/blob/e1219ce157b4dfcfee3181faa6ec5129c3a41e78/pcpp/preprocessor.py#L873-L935 -# The license that applies, reproduced above, is this one: -# https://raw.githubusercontent.com/ned14/pcpp/e1219ce157b4dfcfee3181faa6ec5129c3a41e78/LICENSE.txt -# -# The following fragments of code are hereby irrevokably donated to the -# public domain: -# - The Evaluator class in its entirety. -# - The evalexpr method in its entirety except for the excerpt mentioned -# above, which remains copyright of its authors. -# - Every line between this one and the Evaluator class. -import sys, os, re, copy +# Interface for Niall Douglas' and David M. Beazley's PCPP (a C preprocessor) + +import sys, os oldsyspath = sys.path sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'pcpp')) -from pcpp import preprocessor +from pcpp import preprocessor, OutputDirective, Action path = oldsyspath -# Define the number of bits to work with in expression evaluation -# (per the standard, this should be the bits in uintmax_t). -INTMAXBITS = 64 - -UINTMAX_MAX = (1 << INTMAXBITS) - 1 -INTMAX_MIN = -(1 << (INTMAXBITS - 1)) - -DSYMBOLS = {'->', '-=', '--', '==', '<<', '<=', '>>', '>=', '||', '|=', - '&&', '&=', '!=', '^=', '*=', '/=', '%=', '+=', '++'} -DIGRAPHS = {'<:':'[', ':>':']', '<%':'{', '%>':'}', '%:':'#'} -ESCAPES = {'a':7,'b':8,'f':12,'n':10,'r':13,'t':9,'v':11, - '"':34, '\\':92, '\'':39, '?':63} - -# Exception to report an evaluation error -class EvalError(Exception): pass - -class uint(long): pass -class sint(long): pass - -class Evaluator(object): - """Recursive descendent parser to evaluate C preprocessor expressions.""" - - # Int parser - resolve_int_regex = re.compile( - # Group 1: Hex - # Group 2: Oct - # Group 3: Dec - # Group 4: Unsigned - r'^(?:(0x[0-9a-f]+)|(0[0-7]*)|([1-9][0-9]*))' - r'(?:(u(?:ll?)?|(?:ll?)?u)|(?:ll?)?)$', re.I | re.S) - - # Char parser (without the quotes) - ctoken_regex = re.compile( - r'\\(?:' - r'[\?' r"'" r'"\\abfnrtv]|[Xx][0-9a-fA-F]+|[0-7]{1,3}' - r'|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}' - r')' - r'|.', re.S) - - def __init__(self, tokens): - assert tokens, "Empty tokens list???" - self.tokens = tokens - self.ptr = 0 - self.evaluating = True - self.conv = {uint: self.to_uint, sint: self.to_sint} - self.nextToken() - - def to_uint(self, i): - return uint(i & UINTMAX_MAX) - - def to_sint(self, i): - return sint(((i - INTMAX_MIN) & UINTMAX_MAX) + INTMAX_MIN) - - def nextToken(self): - """Sets self.token to the next token and advances the token pointer. - Skips whitespace tokens. Returns a CPP_WS token with value '\n' if - there's no next token. Returns synthesized tokens for multichar tokens - not currently handled by PCPP. - """ - try: - while True: - tok = self.token = self.tokens[self.ptr] - self.ptr += 1 - # Eat whitespace except newlines, and /* */ comments - if (tok.type == 'CPP_WS' and '\n' not in tok.value - or tok.type == 'CPP_COMMENT1' - ): - continue - break - - except IndexError: - # Synthesize a new CPP_WS token with a newline, to signal - # end-of-text (we copy it from the last one in the token stream). - tok = self.token = copy.copy(self.tokens[-1]) - tok.type = 'CPP_WS' - tok.value = '\n' - return - - # Single-line comments are line terminators; convert them - if tok.type == 'CPP_COMMENT2': - tok = self.token = copy.copy(tok) - tok.type = 'CPP_WS' - tok.value = '\n' - return - - # Work around a lexing problem in PCPP - # - # PCPP doesn't tokenize multichar tokens except ##, so we do that job - # here, to ease processing and report more errors (e.g. 5--3 should be - # reported as an error because it uses the post-decrement operator, - # instead of evaluating to 8, which is the correct result for 5- -3). - # The tokens processed here are those in the C standard missed by PCPP: - # -> -= -- << <= >> >= || |= && &= == != ^= *= /= += ++ %= - # >>= <<= - # ... - # <: :> <% %> %: - # %:%: - # - # This is already a single token, therefore it's not processed here: - # ## - - try: - next = self.tokens[self.ptr] - except IndexError: - return - - s = tok.type + next.type - - if s in DSYMBOLS: - tok = self.token = copy.copy(tok) - tok.type = s - tok.value += next.value - self.ptr += 1 - if s in ('<<', '>>'): - # check for <<= >>= - try: - next2 = self.tokens[self.ptr] - if next2.type == '=': - tok.type += next2.type - tok.value += next2.value - self.ptr += 1 - except IndexError: - pass - return - - if s in DIGRAPHS: - # digraph or DPOUND - tok = self.token = copy.copy(tok) - tok.type = DIGRAPHS[s] - tok.value += next.value - self.ptr += 1 - try: - next2 = self.tokens[self.ptr] - next3 = self.tokens[self.ptr + 1] - if next2.type == '%' and next3.type == ':': - tok.type = '##' - tok.value += next2.value + next3.value - self.ptr += 2 - except IndexError: - pass - return - - if s == '..': - try: - next2 = self.tokens[self.ptr + 1] - if next2.type == '.': - tok = self.token = copy.copy(tok) - tok.type = '...' - tok.value += next.value + next2.value - self.ptr += 2 - except IndexError: - pass - return - - def eat(self, *toktypes): - """Return True and advance pointer if the current token matches. """ - if self.token.type in toktypes: - self.nextToken() - return True - return False - - def expect(self, toktype): - """Checks an expected token and eats it""" - expect = toktype - if toktype == 'END' and '\n' in self.token.value: - expect = 'CPP_WS' - if not self.eat(expect): - raise EvalError( - "Unexpected token %s (%s) in expression, expected %s" - % (repr(self.token.value), self.token.type, toktype)) - - def conversions(self, op1, op2): - """Perform usual arithmetic conversions on two operands.""" - assert type(op1) in (sint, uint) and type(op2) in (sint, uint) - if type(op1) != type(op2): - return self.to_uint(op1), self.to_uint(op2) - return op1, op2 - - def primary_expression(self, evaluating): - """Non-terminal: primary_expression. - - primary_expression: - IDENTIFIER | STRING_LITERAL | CHAR_LITERAL | INTEGER - | '(' expression ')' - """ - tok = self.token - if self.eat('('): - ret = self.expression(evaluating) - self.expect(')') - return ret - - #if self.eat('CPP_STRING'): - # return tok.value - - if self.eat('CPP_CHAR'): - charstr = tok.value - unicode = False - if tok.value.startswith('L'): - unicode = True - charstr = charstr[2:-1] - else: - charstr = charstr[1:-1] - onechar = False - for ctok in self.ctoken_regex.finditer(charstr): - if onechar: - raise EvalError("Multiple characters in char literal") - onechar = True - c = ctok.group(0) - if c == '\\': - raise EvalError("Invalid escape sequence in char literal") - if c.startswith('\\'): - if c.startswith('\\u') or c.startswith('\\U'): - result = int(c[2:], 16) - if ((result < 0xA0 and result not in (0x24,0x40,0x60)) - or 0xD800 <= result <= 0xDFFF - ): - raise EvalError("Invalid universal character %s" - % c) - if result > 0xFF and not unicode: - raise EvalError("Char literal out of range") - elif c.startswith('\\x') or c.startswith('\\X'): - result = int(c[2:], 16) - if result > 0xFF: - raise EvalError("Hex literal out of range") - elif c[1] in ESCAPES: - result = ESCAPES[c[1]] - else: - result = int(c[1:], 8) - else: - assert len(c) == 1 and c != '\'' - return ord(c) - - # This may need reconsideration if INTMAXBITS is < 22 (the bits - # necessary to fit a Unicode codepoint in a signed integer). - return sint(result) # our char is unsigned - - if tok.type == 'CPP_ID': - tok = self.token = copy.copy(tok) - tok.type = 'CPP_INTEGER' - tok.value = '0' - # fall through to process it as CPP_INTEGER - - if self.eat('CPP_INTEGER'): - m = self.resolve_int_regex.search(tok.value) - if not m: - raise EvalError("Invalid integer literal") - val = (int(m.group(2), 8) if m.group(2) - else int(m.group(1) or m.group(3), 0)) - val = (self.to_uint(val) - if m.group(4) - or val >= -INTMAX_MIN and m.group(3) is None - else self.to_sint(val)) - return val - - if tok.type == 'CPP_STRING': - raise EvalError("Strings are not allowed in expressions") - - if tok.type == 'CPP_WS' and '\n' in tok.value: - raise EvalError('Unexpected end of expression') - - self.expect('CPP_INTEGER') - - def factor_expression(self, evaluating): - """Non-terminal: factor_expression - - factor_expression: - primary_expression - | unary_operator factor_expression - """ - # Avoid recursing for unary operators. Apply them post-evaluation. - k = None - while True: - toktype = self.token.type - if self.eat('-', '+', '~', '!') and toktype != '+': - k = k or [] - k.append(toktype) - else: - break - result = self.primary_expression(evaluating) - while k: - operation = k.pop() - if operation == '!': - result = sint(0 if result else 1) - else: - result = self.conv[type(result)](-result if operation == '-' - else ~result) - return result - - def term_expression(self, evaluating): - """Non-terminal: term_expression - - term_expression: - factor_expression - | term_expression '*' factor_expression - | term_expression '/' factor_expression - | term_expression '%' factor_expression - """ - result = self.factor_expression(evaluating) - while True: - toktype = self.token.type - if not self.eat('*', '/', '%'): - return result - operand = self.factor_expression(evaluating) - if evaluating and operand == 0 and toktype != '*': - raise EvalError("Division by zero") - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result if not evaluating - else result * operand if toktype == '*' - else result // operand if toktype == '/' - else result % operand) - - def arithmetic_expression(self, evaluating): - """Non-terminal: arithmetic_expression - - arithmetic_expression: - term_expression - | arithmetic_expression '+' term_expression - | arithmetic_expression '-' term_expression - """ - result = self.term_expression(evaluating) - while True: - toktype = self.token.type - if not self.eat('+', '-'): - return result - operand = self.term_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result + operand if toktype == '+' - else result - operand) - - def shift_expression(self, evaluating): - """Non-terminal: shift_expression - - shift_expression: - arithmetic_expression - | shift_expression '<<' arithmetic_expression - | shift_expression '>>' arithmetic_expression - """ - result = self.arithmetic_expression(evaluating) - while True: - tok = self.token - if not self.eat('<<', '>>'): - return result - operand = self.arithmetic_expression(evaluating) - # We don't want a too large intermediate result, to prevent DoS - result = self.conv[type(result)](result << min(operand, INTMAXBITS) - if tok.type == '<<' else result >> max(operand, 0)) - - def relational_expression(self, evaluating): - """Non-terminal: relational_expression - - relational_expression: - shift_expression - | relational_expression '>' shift_expression - | relational_expression '<' shift_expression - | relational_expression '>=' shift_expression - | relational_expression '<=' shift_expression - """ - result = self.shift_expression(evaluating) - while True: - tok = self.token - if not self.eat('<', '>', '<=', '>='): - return result - operand = self.shift_expression(evaluating) - result, operand = self.conversions(result, operand) - # Use the fact that a < b <-> b > a - # Use the fact that a < b <-> !(a >= b) - if tok.type == '>' or tok.type == '<=': - result, operand = operand, result - result = sint(1 if (result < operand) == (tok.type in ('<', '>')) - else 0) - - def equality_expression(self, evaluating): - """Non-terminal: equality_expression - - equality_expression: - relational_expression - | equality_expression '==' relational_expression - | equality_expression '!=' relational_expression - """ - result = self.relational_expression(evaluating) - while True: - tok = self.token - if not self.eat('==', '!='): - return result - operand = self.relational_expression(evaluating) - result, operand = self.conversions(result, operand) - result = sint(1 if (result == operand) == (tok.type == '==') - else 0) - - def bitwise_and_expression(self, evaluating): - """Non-terminal: bitwise_and_expression - - bitwise_and_expression: - equality_expression - | bitwise_and_expression '&' equality_expression - """ - result = self.equality_expression(evaluating) - while True: - if not self.eat('&'): - return result - operand = self.equality_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result & operand) - - def bitwise_xor_expression(self, evaluating): - """Non-terminal: bitwise_xor_expression - - bitwise_xor_expression: - bitwise_and_expression - | bitwise_xor_expression '^' bitwise_and_expression - """ - result = self.bitwise_and_expression(evaluating) - while True: - if not self.eat('^'): - return result - operand = self.bitwise_and_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result ^ operand) - - def bitwise_or_expression(self, evaluating): - """Non-terminal: bitwise_or_expression - - bitwise_or_expression: - bitwise_xor_expression - | bitwise_or_expression '|' bitwise_xor_expression - """ - result = self.bitwise_xor_expression(evaluating) - while True: - if not self.eat('|'): - return result - operand = self.bitwise_xor_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result | operand) - - def logical_and_expression(self, evaluating): - """Non-terminal: logical_and_expression - - logical_and_expression: - bitwise_or_expression - | logical_and_expression '&&' bitwise_or_expression - """ - result = self.bitwise_or_expression(evaluating) - while True: - if not self.eat('&&'): - return result - evaluating = evaluating and not not result - operand = self.bitwise_or_expression(evaluating) - result = sint(1 if result and (not evaluating or operand) else 0) - - def logical_or_expression(self, evaluating): - """Non-terminal: logical_or_expression - - logical_or_expression: - logical_and_expression - | logical_or_expression '||' logical_and_expression - """ - result = self.logical_and_expression(evaluating) - while True: - if not self.eat('||'): - return result - evaluating = evaluating and not result - operand = self.logical_and_expression(evaluating) - result = sint(1 if result or (evaluating and operand) else 0) - - def conditional_expression(self, evaluating): - """Non-terminal: conditional_expression. - - conditional_expression: - logical_or_expression - | logical_or_expression '?' expression ':' conditional_expression - """ - result = self.logical_or_expression(evaluating) - if self.eat('?'): - if result: - result = self.expression(evaluating) - self.expect(':') - operand = self.conditional_expression(False) - else: - operand = self.expression(False) - self.expect(':') - result = self.conditional_expression(evaluating) - result, operand = self.conversions(result, operand) - return result - - def expression(self, evaluating = True): - """Non-terminal: expression. - - expression: - conditional_expression (always) - | expression conditional_expression (if not evaluating) - """ - if evaluating: - return self.conditional_expression(evaluating) - while True: - result = self.conditional_expression(evaluating) - if not self.eat(','): - return result - - def evaluate(self): - result = self.expression(True) - - # Did we eat all tokens? - self.expect('END') - return result +DIRECTIVES_PASSED_THROUGH = {'warning', 'pragma', 'line'} class Preproc(preprocessor.Preprocessor): def __init__(self, input, params=()): @@ -622,95 +73,22 @@ class Preproc(preprocessor.Preprocessor): self.errors_present = True return super(Preproc, self).on_error(*args, **kwargs) - def on_include_not_found(self, is_system_include, curdir, includepath): + def on_include_not_found(self, is_malformed, is_system_include, curdir, + includepath): """Don't pass through the #include line if the file does not exist.""" - self.on_error(self.lastdirective.source, self.lastdirective.lineno, - "Include file not found: %s" % includepath) + if is_malformed: + self.on_error(self.lastdirective.source, self.lastdirective.lineno, + "Malformed include file directive") + else: + self.on_error(self.lastdirective.source, self.lastdirective.lineno, + "Include file not found: %s" % includepath) + raise OutputDirective(Action.IgnoreAndRemove) - def evalexpr(self, tokens): - """Evaluate a sequence of tokens as an expression. - - The original uses eval(), which is unsafe for web usage. This one uses - our own recursive-descendent parser. - """ - - # **************************************************** - # Start of fragment copied from PCPP's preprocessor.py - """Evaluate an expression token sequence for the purposes of evaluating - integral expressions.""" - if not tokens: - self.on_error('unknown', 0, "Empty expression") - return (0, None) - # tokens = tokenize(line) - # Search for defined macros - evalfuncts = {'defined' : lambda x: True} - evalvars = {} - def replace_defined(tokens): - i = 0 - while i < len(tokens): - if tokens[i].type == self.t_ID and tokens[i].value == 'defined': - j = i + 1 - needparen = False - result = "0L" - while j < len(tokens): - if tokens[j].type in self.t_WS: - j += 1 - continue - elif tokens[j].type == self.t_ID: - if tokens[j].value in self.macros: - result = "1L" - else: - repl = self.on_unknown_macro_in_defined_expr(tokens[j]) - if repl is None: - # Add this identifier to a dictionary of variables - evalvars[tokens[j].value] = 0 - result = 'defined('+tokens[j].value+')' - else: - result = "1L" if repl else "0L" - if not needparen: break - elif tokens[j].value == '(': - needparen = True - elif tokens[j].value == ')': - break - else: - self.on_error(tokens[i].source,tokens[i].lineno,"Malformed defined()") - j += 1 - if result.startswith('defined'): - tokens[i].type = self.t_ID - tokens[i].value = result - else: - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE(result) - del tokens[i+1:j+1] - i += 1 - return tokens - # Replace any defined(macro) before macro expansion - tokens = replace_defined(tokens) - tokens = self.expand_macros(tokens) - # Replace any defined(macro) after macro expansion - tokens = replace_defined(tokens) - if not tokens: - return (0, None) - for i,t in enumerate(tokens): - if t.type == self.t_ID: - repl = self.on_unknown_macro_in_expr(copy.copy(t)) - if repl is None: - # Add this identifier to a dictionary of variables - evalvars[t.value] = 0 - else: - tokens[i] = t = repl - # End of fragment copied from PCPP's preprocessor.py - # ************************************************** - - del evalfuncts # we don't use this - - evaluator = Evaluator(tokens) - try: - result = int(evaluator.evaluate()) - except EvalError as e: - self.on_error(evaluator.token.source, evaluator.token.lineno, - e.message) - return (0, None) - del evaluator - - return (result, tokens) if evalvars else (result, None) + def on_directive_unknown(self, directive, toks, ifpassthru, precedingtoks): + """pcpp does not process #error/#warning/#pragma/#line; do it here.""" + if directive.value == 'error': + self.on_error(directive.source, directive.lineno, + "Error directive: \"%s\"" % ''.join(i.value for i in toks)) + elif directive.value not in DIRECTIVES_PASSED_THROUGH: + self.on_error(directive.source, directive.lineno, + "Unknown directive: \"%s\"" % directive.value) diff --git a/pcpp b/pcpp index ed3b3f0..18d5bc4 160000 --- a/pcpp +++ b/pcpp @@ -1 +1 @@ -Subproject commit ed3b3f02e8f97c9112e2f6cd82115864ee056e21 +Subproject commit 18d5bc4cdb594c6d76a67f4e76fd5250015f6700