From 5e88adcff385200fe3407ae276c5a8a2b3a128b9 Mon Sep 17 00:00:00 2001 From: Sei Lisa Date: Thu, 29 Oct 2020 20:24:00 +0100 Subject: [PATCH] Update to latest pcpp, remove expression evaluator PCPP now includes a decent expression evaluator instead of relying on Python's, therefore our evaluator is removed. The corresponding license text no longer applies, because the snippet that used it has been removed. This patch also corrects the handling of include files, which was supposed to raise an exception or return something (and now it needs an extra parameter). We just report an error and raise the special exception that pcpp uses to ignore and remove the directive. This patch also handles #error and invalid directives, which were passed through in previous versions. #warning and #line are also passed through. --- cpreproc.py | 666 ++-------------------------------------------------- pcpp | 2 +- 2 files changed, 23 insertions(+), 645 deletions(-) diff --git a/cpreproc.py b/cpreproc.py index 877fd54..fceff74 100644 --- a/cpreproc.py +++ b/cpreproc.py @@ -14,567 +14,18 @@ # # You should have received a copy of the GNU General Public License # along with LSL PyOptimizer. If not, see . -# -# This file includes an excerpt from PCPP, by Niall Douglas and David -# Beazley. PCPP is available here: https://github.com/ned14/pcpp and -# the fragment used here was distributed under the following conditions: -# -# (C) Copyright 2018-2019 Niall Douglas http://www.nedproductions.biz/ -# (C) Copyright 2007-2019 David Beazley http://www.dabeaz.com/ -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# (End of terms and conditions for the PCPP excerpt) -# -# The particular excerpt used is this one: -# https://github.com/ned14/pcpp/blob/e1219ce157b4dfcfee3181faa6ec5129c3a41e78/pcpp/preprocessor.py#L873-L935 -# The license that applies, reproduced above, is this one: -# https://raw.githubusercontent.com/ned14/pcpp/e1219ce157b4dfcfee3181faa6ec5129c3a41e78/LICENSE.txt -# -# The following fragments of code are hereby irrevokably donated to the -# public domain: -# - The Evaluator class in its entirety. -# - The evalexpr method in its entirety except for the excerpt mentioned -# above, which remains copyright of its authors. -# - Every line between this one and the Evaluator class. -import sys, os, re, copy +# Interface for Niall Douglas' and David M. Beazley's PCPP (a C preprocessor) + +import sys, os oldsyspath = sys.path sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'pcpp')) -from pcpp import preprocessor +from pcpp import preprocessor, OutputDirective, Action path = oldsyspath -# Define the number of bits to work with in expression evaluation -# (per the standard, this should be the bits in uintmax_t). -INTMAXBITS = 64 - -UINTMAX_MAX = (1 << INTMAXBITS) - 1 -INTMAX_MIN = -(1 << (INTMAXBITS - 1)) - -DSYMBOLS = {'->', '-=', '--', '==', '<<', '<=', '>>', '>=', '||', '|=', - '&&', '&=', '!=', '^=', '*=', '/=', '%=', '+=', '++'} -DIGRAPHS = {'<:':'[', ':>':']', '<%':'{', '%>':'}', '%:':'#'} -ESCAPES = {'a':7,'b':8,'f':12,'n':10,'r':13,'t':9,'v':11, - '"':34, '\\':92, '\'':39, '?':63} - -# Exception to report an evaluation error -class EvalError(Exception): pass - -class uint(long): pass -class sint(long): pass - -class Evaluator(object): - """Recursive descendent parser to evaluate C preprocessor expressions.""" - - # Int parser - resolve_int_regex = re.compile( - # Group 1: Hex - # Group 2: Oct - # Group 3: Dec - # Group 4: Unsigned - r'^(?:(0x[0-9a-f]+)|(0[0-7]*)|([1-9][0-9]*))' - r'(?:(u(?:ll?)?|(?:ll?)?u)|(?:ll?)?)$', re.I | re.S) - - # Char parser (without the quotes) - ctoken_regex = re.compile( - r'\\(?:' - r'[\?' r"'" r'"\\abfnrtv]|[Xx][0-9a-fA-F]+|[0-7]{1,3}' - r'|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}' - r')' - r'|.', re.S) - - def __init__(self, tokens): - assert tokens, "Empty tokens list???" - self.tokens = tokens - self.ptr = 0 - self.evaluating = True - self.conv = {uint: self.to_uint, sint: self.to_sint} - self.nextToken() - - def to_uint(self, i): - return uint(i & UINTMAX_MAX) - - def to_sint(self, i): - return sint(((i - INTMAX_MIN) & UINTMAX_MAX) + INTMAX_MIN) - - def nextToken(self): - """Sets self.token to the next token and advances the token pointer. - Skips whitespace tokens. Returns a CPP_WS token with value '\n' if - there's no next token. Returns synthesized tokens for multichar tokens - not currently handled by PCPP. - """ - try: - while True: - tok = self.token = self.tokens[self.ptr] - self.ptr += 1 - # Eat whitespace except newlines, and /* */ comments - if (tok.type == 'CPP_WS' and '\n' not in tok.value - or tok.type == 'CPP_COMMENT1' - ): - continue - break - - except IndexError: - # Synthesize a new CPP_WS token with a newline, to signal - # end-of-text (we copy it from the last one in the token stream). - tok = self.token = copy.copy(self.tokens[-1]) - tok.type = 'CPP_WS' - tok.value = '\n' - return - - # Single-line comments are line terminators; convert them - if tok.type == 'CPP_COMMENT2': - tok = self.token = copy.copy(tok) - tok.type = 'CPP_WS' - tok.value = '\n' - return - - # Work around a lexing problem in PCPP - # - # PCPP doesn't tokenize multichar tokens except ##, so we do that job - # here, to ease processing and report more errors (e.g. 5--3 should be - # reported as an error because it uses the post-decrement operator, - # instead of evaluating to 8, which is the correct result for 5- -3). - # The tokens processed here are those in the C standard missed by PCPP: - # -> -= -- << <= >> >= || |= && &= == != ^= *= /= += ++ %= - # >>= <<= - # ... - # <: :> <% %> %: - # %:%: - # - # This is already a single token, therefore it's not processed here: - # ## - - try: - next = self.tokens[self.ptr] - except IndexError: - return - - s = tok.type + next.type - - if s in DSYMBOLS: - tok = self.token = copy.copy(tok) - tok.type = s - tok.value += next.value - self.ptr += 1 - if s in ('<<', '>>'): - # check for <<= >>= - try: - next2 = self.tokens[self.ptr] - if next2.type == '=': - tok.type += next2.type - tok.value += next2.value - self.ptr += 1 - except IndexError: - pass - return - - if s in DIGRAPHS: - # digraph or DPOUND - tok = self.token = copy.copy(tok) - tok.type = DIGRAPHS[s] - tok.value += next.value - self.ptr += 1 - try: - next2 = self.tokens[self.ptr] - next3 = self.tokens[self.ptr + 1] - if next2.type == '%' and next3.type == ':': - tok.type = '##' - tok.value += next2.value + next3.value - self.ptr += 2 - except IndexError: - pass - return - - if s == '..': - try: - next2 = self.tokens[self.ptr + 1] - if next2.type == '.': - tok = self.token = copy.copy(tok) - tok.type = '...' - tok.value += next.value + next2.value - self.ptr += 2 - except IndexError: - pass - return - - def eat(self, *toktypes): - """Return True and advance pointer if the current token matches. """ - if self.token.type in toktypes: - self.nextToken() - return True - return False - - def expect(self, toktype): - """Checks an expected token and eats it""" - expect = toktype - if toktype == 'END' and '\n' in self.token.value: - expect = 'CPP_WS' - if not self.eat(expect): - raise EvalError( - "Unexpected token %s (%s) in expression, expected %s" - % (repr(self.token.value), self.token.type, toktype)) - - def conversions(self, op1, op2): - """Perform usual arithmetic conversions on two operands.""" - assert type(op1) in (sint, uint) and type(op2) in (sint, uint) - if type(op1) != type(op2): - return self.to_uint(op1), self.to_uint(op2) - return op1, op2 - - def primary_expression(self, evaluating): - """Non-terminal: primary_expression. - - primary_expression: - IDENTIFIER | STRING_LITERAL | CHAR_LITERAL | INTEGER - | '(' expression ')' - """ - tok = self.token - if self.eat('('): - ret = self.expression(evaluating) - self.expect(')') - return ret - - #if self.eat('CPP_STRING'): - # return tok.value - - if self.eat('CPP_CHAR'): - charstr = tok.value - unicode = False - if tok.value.startswith('L'): - unicode = True - charstr = charstr[2:-1] - else: - charstr = charstr[1:-1] - onechar = False - for ctok in self.ctoken_regex.finditer(charstr): - if onechar: - raise EvalError("Multiple characters in char literal") - onechar = True - c = ctok.group(0) - if c == '\\': - raise EvalError("Invalid escape sequence in char literal") - if c.startswith('\\'): - if c.startswith('\\u') or c.startswith('\\U'): - result = int(c[2:], 16) - if ((result < 0xA0 and result not in (0x24,0x40,0x60)) - or 0xD800 <= result <= 0xDFFF - ): - raise EvalError("Invalid universal character %s" - % c) - if result > 0xFF and not unicode: - raise EvalError("Char literal out of range") - elif c.startswith('\\x') or c.startswith('\\X'): - result = int(c[2:], 16) - if result > 0xFF: - raise EvalError("Hex literal out of range") - elif c[1] in ESCAPES: - result = ESCAPES[c[1]] - else: - result = int(c[1:], 8) - else: - assert len(c) == 1 and c != '\'' - return ord(c) - - # This may need reconsideration if INTMAXBITS is < 22 (the bits - # necessary to fit a Unicode codepoint in a signed integer). - return sint(result) # our char is unsigned - - if tok.type == 'CPP_ID': - tok = self.token = copy.copy(tok) - tok.type = 'CPP_INTEGER' - tok.value = '0' - # fall through to process it as CPP_INTEGER - - if self.eat('CPP_INTEGER'): - m = self.resolve_int_regex.search(tok.value) - if not m: - raise EvalError("Invalid integer literal") - val = (int(m.group(2), 8) if m.group(2) - else int(m.group(1) or m.group(3), 0)) - val = (self.to_uint(val) - if m.group(4) - or val >= -INTMAX_MIN and m.group(3) is None - else self.to_sint(val)) - return val - - if tok.type == 'CPP_STRING': - raise EvalError("Strings are not allowed in expressions") - - if tok.type == 'CPP_WS' and '\n' in tok.value: - raise EvalError('Unexpected end of expression') - - self.expect('CPP_INTEGER') - - def factor_expression(self, evaluating): - """Non-terminal: factor_expression - - factor_expression: - primary_expression - | unary_operator factor_expression - """ - # Avoid recursing for unary operators. Apply them post-evaluation. - k = None - while True: - toktype = self.token.type - if self.eat('-', '+', '~', '!') and toktype != '+': - k = k or [] - k.append(toktype) - else: - break - result = self.primary_expression(evaluating) - while k: - operation = k.pop() - if operation == '!': - result = sint(0 if result else 1) - else: - result = self.conv[type(result)](-result if operation == '-' - else ~result) - return result - - def term_expression(self, evaluating): - """Non-terminal: term_expression - - term_expression: - factor_expression - | term_expression '*' factor_expression - | term_expression '/' factor_expression - | term_expression '%' factor_expression - """ - result = self.factor_expression(evaluating) - while True: - toktype = self.token.type - if not self.eat('*', '/', '%'): - return result - operand = self.factor_expression(evaluating) - if evaluating and operand == 0 and toktype != '*': - raise EvalError("Division by zero") - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result if not evaluating - else result * operand if toktype == '*' - else result // operand if toktype == '/' - else result % operand) - - def arithmetic_expression(self, evaluating): - """Non-terminal: arithmetic_expression - - arithmetic_expression: - term_expression - | arithmetic_expression '+' term_expression - | arithmetic_expression '-' term_expression - """ - result = self.term_expression(evaluating) - while True: - toktype = self.token.type - if not self.eat('+', '-'): - return result - operand = self.term_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result + operand if toktype == '+' - else result - operand) - - def shift_expression(self, evaluating): - """Non-terminal: shift_expression - - shift_expression: - arithmetic_expression - | shift_expression '<<' arithmetic_expression - | shift_expression '>>' arithmetic_expression - """ - result = self.arithmetic_expression(evaluating) - while True: - tok = self.token - if not self.eat('<<', '>>'): - return result - operand = self.arithmetic_expression(evaluating) - # We don't want a too large intermediate result, to prevent DoS - result = self.conv[type(result)](result << min(operand, INTMAXBITS) - if tok.type == '<<' else result >> max(operand, 0)) - - def relational_expression(self, evaluating): - """Non-terminal: relational_expression - - relational_expression: - shift_expression - | relational_expression '>' shift_expression - | relational_expression '<' shift_expression - | relational_expression '>=' shift_expression - | relational_expression '<=' shift_expression - """ - result = self.shift_expression(evaluating) - while True: - tok = self.token - if not self.eat('<', '>', '<=', '>='): - return result - operand = self.shift_expression(evaluating) - result, operand = self.conversions(result, operand) - # Use the fact that a < b <-> b > a - # Use the fact that a < b <-> !(a >= b) - if tok.type == '>' or tok.type == '<=': - result, operand = operand, result - result = sint(1 if (result < operand) == (tok.type in ('<', '>')) - else 0) - - def equality_expression(self, evaluating): - """Non-terminal: equality_expression - - equality_expression: - relational_expression - | equality_expression '==' relational_expression - | equality_expression '!=' relational_expression - """ - result = self.relational_expression(evaluating) - while True: - tok = self.token - if not self.eat('==', '!='): - return result - operand = self.relational_expression(evaluating) - result, operand = self.conversions(result, operand) - result = sint(1 if (result == operand) == (tok.type == '==') - else 0) - - def bitwise_and_expression(self, evaluating): - """Non-terminal: bitwise_and_expression - - bitwise_and_expression: - equality_expression - | bitwise_and_expression '&' equality_expression - """ - result = self.equality_expression(evaluating) - while True: - if not self.eat('&'): - return result - operand = self.equality_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result & operand) - - def bitwise_xor_expression(self, evaluating): - """Non-terminal: bitwise_xor_expression - - bitwise_xor_expression: - bitwise_and_expression - | bitwise_xor_expression '^' bitwise_and_expression - """ - result = self.bitwise_and_expression(evaluating) - while True: - if not self.eat('^'): - return result - operand = self.bitwise_and_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result ^ operand) - - def bitwise_or_expression(self, evaluating): - """Non-terminal: bitwise_or_expression - - bitwise_or_expression: - bitwise_xor_expression - | bitwise_or_expression '|' bitwise_xor_expression - """ - result = self.bitwise_xor_expression(evaluating) - while True: - if not self.eat('|'): - return result - operand = self.bitwise_xor_expression(evaluating) - result, operand = self.conversions(result, operand) - result = self.conv[type(result)](result | operand) - - def logical_and_expression(self, evaluating): - """Non-terminal: logical_and_expression - - logical_and_expression: - bitwise_or_expression - | logical_and_expression '&&' bitwise_or_expression - """ - result = self.bitwise_or_expression(evaluating) - while True: - if not self.eat('&&'): - return result - evaluating = evaluating and not not result - operand = self.bitwise_or_expression(evaluating) - result = sint(1 if result and (not evaluating or operand) else 0) - - def logical_or_expression(self, evaluating): - """Non-terminal: logical_or_expression - - logical_or_expression: - logical_and_expression - | logical_or_expression '||' logical_and_expression - """ - result = self.logical_and_expression(evaluating) - while True: - if not self.eat('||'): - return result - evaluating = evaluating and not result - operand = self.logical_and_expression(evaluating) - result = sint(1 if result or (evaluating and operand) else 0) - - def conditional_expression(self, evaluating): - """Non-terminal: conditional_expression. - - conditional_expression: - logical_or_expression - | logical_or_expression '?' expression ':' conditional_expression - """ - result = self.logical_or_expression(evaluating) - if self.eat('?'): - if result: - result = self.expression(evaluating) - self.expect(':') - operand = self.conditional_expression(False) - else: - operand = self.expression(False) - self.expect(':') - result = self.conditional_expression(evaluating) - result, operand = self.conversions(result, operand) - return result - - def expression(self, evaluating = True): - """Non-terminal: expression. - - expression: - conditional_expression (always) - | expression conditional_expression (if not evaluating) - """ - if evaluating: - return self.conditional_expression(evaluating) - while True: - result = self.conditional_expression(evaluating) - if not self.eat(','): - return result - - def evaluate(self): - result = self.expression(True) - - # Did we eat all tokens? - self.expect('END') - return result +DIRECTIVES_PASSED_THROUGH = {'warning', 'pragma', 'line'} class Preproc(preprocessor.Preprocessor): def __init__(self, input, params=()): @@ -622,95 +73,22 @@ class Preproc(preprocessor.Preprocessor): self.errors_present = True return super(Preproc, self).on_error(*args, **kwargs) - def on_include_not_found(self, is_system_include, curdir, includepath): + def on_include_not_found(self, is_malformed, is_system_include, curdir, + includepath): """Don't pass through the #include line if the file does not exist.""" - self.on_error(self.lastdirective.source, self.lastdirective.lineno, - "Include file not found: %s" % includepath) + if is_malformed: + self.on_error(self.lastdirective.source, self.lastdirective.lineno, + "Malformed include file directive") + else: + self.on_error(self.lastdirective.source, self.lastdirective.lineno, + "Include file not found: %s" % includepath) + raise OutputDirective(Action.IgnoreAndRemove) - def evalexpr(self, tokens): - """Evaluate a sequence of tokens as an expression. - - The original uses eval(), which is unsafe for web usage. This one uses - our own recursive-descendent parser. - """ - - # **************************************************** - # Start of fragment copied from PCPP's preprocessor.py - """Evaluate an expression token sequence for the purposes of evaluating - integral expressions.""" - if not tokens: - self.on_error('unknown', 0, "Empty expression") - return (0, None) - # tokens = tokenize(line) - # Search for defined macros - evalfuncts = {'defined' : lambda x: True} - evalvars = {} - def replace_defined(tokens): - i = 0 - while i < len(tokens): - if tokens[i].type == self.t_ID and tokens[i].value == 'defined': - j = i + 1 - needparen = False - result = "0L" - while j < len(tokens): - if tokens[j].type in self.t_WS: - j += 1 - continue - elif tokens[j].type == self.t_ID: - if tokens[j].value in self.macros: - result = "1L" - else: - repl = self.on_unknown_macro_in_defined_expr(tokens[j]) - if repl is None: - # Add this identifier to a dictionary of variables - evalvars[tokens[j].value] = 0 - result = 'defined('+tokens[j].value+')' - else: - result = "1L" if repl else "0L" - if not needparen: break - elif tokens[j].value == '(': - needparen = True - elif tokens[j].value == ')': - break - else: - self.on_error(tokens[i].source,tokens[i].lineno,"Malformed defined()") - j += 1 - if result.startswith('defined'): - tokens[i].type = self.t_ID - tokens[i].value = result - else: - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE(result) - del tokens[i+1:j+1] - i += 1 - return tokens - # Replace any defined(macro) before macro expansion - tokens = replace_defined(tokens) - tokens = self.expand_macros(tokens) - # Replace any defined(macro) after macro expansion - tokens = replace_defined(tokens) - if not tokens: - return (0, None) - for i,t in enumerate(tokens): - if t.type == self.t_ID: - repl = self.on_unknown_macro_in_expr(copy.copy(t)) - if repl is None: - # Add this identifier to a dictionary of variables - evalvars[t.value] = 0 - else: - tokens[i] = t = repl - # End of fragment copied from PCPP's preprocessor.py - # ************************************************** - - del evalfuncts # we don't use this - - evaluator = Evaluator(tokens) - try: - result = int(evaluator.evaluate()) - except EvalError as e: - self.on_error(evaluator.token.source, evaluator.token.lineno, - e.message) - return (0, None) - del evaluator - - return (result, tokens) if evalvars else (result, None) + def on_directive_unknown(self, directive, toks, ifpassthru, precedingtoks): + """pcpp does not process #error/#warning/#pragma/#line; do it here.""" + if directive.value == 'error': + self.on_error(directive.source, directive.lineno, + "Error directive: \"%s\"" % ''.join(i.value for i in toks)) + elif directive.value not in DIRECTIVES_PASSED_THROUGH: + self.on_error(directive.source, directive.lineno, + "Unknown directive: \"%s\"" % directive.value) diff --git a/pcpp b/pcpp index ed3b3f0..18d5bc4 160000 --- a/pcpp +++ b/pcpp @@ -1 +1 @@ -Subproject commit ed3b3f02e8f97c9112e2f6cd82115864ee056e21 +Subproject commit 18d5bc4cdb594c6d76a67f4e76fd5250015f6700