Add files necessary to add an internal preprocessor (not implemented yet)

Includes PCPP as a submodule (which in turn pulls PLY as a submodule, so be sure to initialize submodules recursively). Also includes a file to interface PCPP with the optimizer, patching its behaviour according to our needs.

Special thanks to Niall Douglas and David Bezley for authoring PCPP.
This commit is contained in:
Sei Lisa 2019-01-11 21:21:36 +01:00
parent 352f410d12
commit c0168c8a34
3 changed files with 680 additions and 0 deletions

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "pcpp"]
path = pcpp
url = https://github.com/ned14/pcpp

676
cpreproc.py Normal file
View file

@ -0,0 +1,676 @@
# (C) Copyright 2015-2019 Sei Lisa. All rights reserved.
#
# This file is part of LSL PyOptimizer.
#
# LSL PyOptimizer is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# LSL PyOptimizer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
#
# This file includes an excerpt from PCPP, by Niall Douglas and David
# Beazley, available here:
# https://github.com/ned14/pcpp/blob/e1219ce157b4dfcfee3181faa6ec5129c3a41e78/pcpp/preprocessor.py#L873-L935
# which is distributed under the following conditions:
#
# (C) Copyright 2018-2019 Niall Douglas http://www.nedproductions.biz/
# (C) Copyright 2007-2019 David Beazley http://www.dabeaz.com/
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of the David Beazley or Dabeaz LLC may be used to
# endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# (End of terms and conditions for the PCPP excerpt)
#
# The following fragments of code are hereby irrevokably donated to the
# public domain:
# - The Evaluator class in its entirety.
# - The evalexpr method in its entirety except for the excerpt mentioned
# above, which remains copyright of its authors.
# - Every line between this one and the Evaluator class.
import sys, os, re, copy
oldsyspath = sys.path
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)),
'pcpp'))
from pcpp import preprocessor
path = oldsyspath
# Define the number of bits to work with in expression evaluation
# (per the standard, this should be the bits in uintmax_t).
INTMAXBITS = 64
UINTMAX_MAX = (1 << INTMAXBITS) - 1
INTMAX_MIN = -(1 << (INTMAXBITS - 1))
DSYMBOLS = {'->', '-=', '--', '==', '<<', '<=', '>>', '>=', '||', '|=',
'&&', '&=', '!=', '^=', '*=', '/=', '%=', '+=', '++'}
DIGRAPHS = {'<:':'[', ':>':']', '<%':'{', '%>':'}', '%:':'#'}
ESCAPES = {'a':7,'b':8,'f':12,'n':10,'r':13,'t':9,'v':11,
'"':34, '\\':92, '\'':39, '?':63}
# Exception to report an evaluation error
class EvalError(Exception): pass
class uint(long): pass
class sint(long): pass
class Evaluator(object):
"""Recursive descendent parser to evaluate C preprocessor expressions."""
# Int parser
resolve_int_regex = re.compile(
# Group 1: Hex
# Group 2: Oct
# Group 3: Dec
# Group 4: Unsigned
r'^(?:(0x[0-9a-f]+)|(0[0-7]*)|([1-9][0-9]*))'
r'(?:(u(?:ll?)?|(?:ll?)?u)|(?:ll?)?)$', re.I | re.S)
# Char parser (without the quotes)
ctoken_regex = re.compile(
r'\\(?:'
r'[\?' r"'" r'"\\abfnrtv]|[Xx][0-9a-fA-F]+|[0-7]{1,3}'
r'|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}'
r')'
r'|.', re.S)
def __init__(self, tokens):
assert tokens, "Empty tokens list???"
self.tokens = tokens
self.ptr = 0
self.evaluating = True
self.conv = {uint: self.to_uint, sint: self.to_sint}
self.nextToken()
def to_uint(self, i):
return uint(i & UINTMAX_MAX)
def to_sint(self, i):
return sint(((i - INTMAX_MIN) & UINTMAX_MAX) + INTMAX_MIN)
def nextToken(self):
"""Sets self.token to the next token and advances the token pointer.
Skips whitespace tokens. Returns a CPP_WS token with value '\n' if
there's no next token. Returns synthesized tokens for multichar tokens
not currently handled by PCPP.
"""
try:
while True:
tok = self.token = self.tokens[self.ptr]
self.ptr += 1
if tok.type != 'CPP_WS' or '\n' in tok.value:
break
except IndexError:
# Synthesize a new CPP_WS token with a newline, to signal
# end-of-text (we copy it from the last one in the token stream).
self.token = copy.copy(self.tokens[-1])
self.token.type = 'CPP_WS'
self.token.value = '\n'
return
# Work around a lexing problem in PCPP
#
# PCPP doesn't tokenize multichar tokens except ##, so we do that job
# here, to ease processing and report more errors (e.g. 5--3 should be
# reported as an error because it uses the post-decrement operator,
# instead of evaluating to 8, which is the correct result for 5- -3).
# The tokens processed here are those in the C standard missed by PCPP:
# -> -= -- << <= >> >= || |= && &= == != ^= *= /= += ++ %=
# >>= <<=
# ...
# <: :> <% %> %:
# %:%:
#
# This is already a single token, therefore it's not processed here:
# ##
try:
next = self.tokens[self.ptr]
except IndexError:
return
s = tok.type + next.type
if s in DSYMBOLS:
tok = self.token = copy.copy(tok)
tok.type = s
tok.value += next.value
self.ptr += 1
if s in ('<<', '>>'):
# check for <<= >>=
try:
next2 = self.tokens[self.ptr]
if next2.type == '=':
tok.type += next2.type
tok.value += next2.value
self.ptr += 1
except IndexError:
pass
return
if s in DIGRAPHS:
# digraph or DPOUND
tok = self.token = copy.copy(tok)
tok.type = DIGRAPHS[s]
tok.value += next.value
self.ptr += 1
try:
next2 = self.tokens[self.ptr]
next3 = self.tokens[self.ptr + 1]
if next2.type == '%' and next3.type == ':':
tok.type = '##'
tok.value += next2.value + next3.value
self.ptr += 2
except IndexError:
pass
return
if s == '..':
try:
next2 = self.tokens[self.ptr + 1]
if next2.type == '.':
tok = self.token = copy.copy(tok)
tok.type = '...'
tok.value += next.value + next2.value
self.ptr += 2
except IndexError:
pass
return
def eat(self, *toktypes):
"""Return True and advance pointer if the current token matches. """
if self.token.type in toktypes:
self.nextToken()
return True
return False
def expect(self, toktype):
"""Checks an expected token and eats it"""
expect = toktype
if toktype == 'END' and '\n' in self.token.value:
expect = 'CPP_WS'
if not self.eat(expect):
raise EvalError(
"Unexpected token %s (%s) in expression, expected %s"
% (repr(self.token.value), self.token.type, toktype))
def conversions(self, op1, op2):
"""Perform usual arithmetic conversions on two operands."""
assert type(op1) in (sint, uint) and type(op2) in (sint, uint)
if type(op1) != type(op2):
return uint(op1), uint(op2)
return op1, op2
def primary_expression(self, evaluating):
"""Non-terminal: primary_expression.
primary_expression:
IDENTIFIER | STRING_LITERAL | CHAR_LITERAL | INTEGER
| '(' expression ')'
"""
tok = self.token
if self.eat('('):
ret = self.expression(evaluating)
self.expect(')')
return ret
#if self.eat('CPP_STRING'):
# return tok.value
if self.eat('CPP_CHAR'):
charstr = tok.value
unicode = False
if tok.value.startswith('L'):
unicode = True
charstr = charstr[2:-1]
else:
charstr = charstr[1:-1]
onechar = False
for ctok in self.ctoken_regex.finditer(charstr):
if onechar:
raise EvalError("Multiple characters in char literal")
onechar = True
c = ctok.group(0)
if c == '\\':
raise EvalError("Invalid escape sequence in char literal")
if c.startswith('\\'):
if c.startswith('\\u') or c.startswith('\\U'):
result = int(c[2:], 16)
if ((result < 0xA0 and result not in (0x24,0x40,0x60))
or 0xD800 <= result <= 0xDFFF
):
raise EvalError("Invalid universal character %s"
% c)
if result > 0xFF and not unicode:
raise EvalError("Char literal out of range")
elif c.startswith('\\x') or c.startswith('\\X'):
result = int(c[2:], 16)
if result > 0xFF:
raise EvalError("Hex literal out of range")
elif c[1] in 'abfnrtv"?\'\\':
result = ESCAPES[c[1]]
else:
result = int(c[1:], 8)
else:
assert len(c) == 1 and c != '\''
return ord(c)
# This may need reconsideration if INTMAXBITS is < 22 (the bits
# necessary to fit a Unicode codepoint in a signed integer).
return sint(result) # our char is unsigned
if tok.type == 'CPP_ID':
tok = self.token = copy.copy(tok)
tok.type = 'CPP_INTEGER'
tok.value = '0'
# fall through to process it as CPP_INTEGER
if self.eat('CPP_INTEGER'):
m = self.resolve_int_regex.search(tok.value)
if not m:
raise EvalError("Invalid integer literal")
val = (int(m.group(2), 8) if m.group(2)
else int(m.group(1) or m.group(3), 0))
val = self.to_uint(val) if m.group(4) else self.to_sint(val)
return val
if tok.type == 'CPP_STRING':
raise EvalError("Strings are not allowed in expressions")
if tok.type == 'CPP_WS' and '\n' in tok.value:
raise EvalError('Unexpected end of expression')
self.expect('CPP_INTEGER')
def factor_expression(self, evaluating):
"""Non-terminal: factor_expression
factor_expression:
primary_expression
| unary_operator factor_expression
"""
# Avoid recursing for unary operators. Apply them post-evaluation.
k = None
while True:
toktype = self.token.type
if self.eat('-', '+', '~', '!') and toktype != '+':
k = k or []
k.append(toktype)
else:
break
result = self.primary_expression(evaluating)
while k:
operation = k.pop()
if operation == '!':
result = sint(0 if result else 1)
else:
result = self.conv[type(result)](-result if operation == '-'
else ~result)
return result
def term_expression(self, evaluating):
"""Non-terminal: term_expression
term_expression:
factor_expression
| term_expression '*' factor_expression
| term_expression '/' factor_expression
| term_expression '%' factor_expression
"""
result = self.factor_expression(evaluating)
while True:
toktype = self.token.type
if not self.eat('*', '/', '%'):
return result
operand = self.factor_expression(evaluating)
if evaluating and operand == 0 and toktype != '*':
raise EvalError("Division by zero")
result, operand = self.conversions(result, operand)
result = self.conv[type(result)](result if not evaluating
else result * operand if toktype == '*'
else result // operand if toktype == '/'
else result % operand)
def arithmetic_expression(self, evaluating):
"""Non-terminal: arithmetic_expression
arithmetic_expression:
term_expression
| arithmetic_expression '+' term_expression
| arithmetic_expression '-' term_expression
"""
result = self.term_expression(evaluating)
while True:
toktype = self.token.type
if not self.eat('+', '-'):
return result
operand = self.term_expression(evaluating)
result, operand = self.conversions(result, operand)
result = self.conv[type(result)](result + operand if toktype == '+'
else result - operand)
def shift_expression(self, evaluating):
"""Non-terminal: shift_expression
shift_expression:
arithmetic_expression
| shift_expression '<<' arithmetic_expression
| shift_expression '>>' arithmetic_expression
"""
result = self.arithmetic_expression(evaluating)
while True:
tok = self.token
if not self.eat('<<', '>>'):
return result
operand = self.arithmetic_expression(evaluating)
# We don't want a too large intermediate result, to prevent DoS
result = self.conv[type(result)](result << min(operand, INTMAXBITS)
if tok.type == '<<' else result >> max(operand, 0))
def relational_expression(self, evaluating):
"""Non-terminal: relational_expression
relational_expression:
shift_expression
| relational_expression '>' shift_expression
| relational_expression '<' shift_expression
| relational_expression '>=' shift_expression
| relational_expression '<=' shift_expression
"""
result = self.shift_expression(evaluating)
while True:
tok = self.token
if not self.eat('<', '>', '<=', '>='):
return result
operand = self.shift_expression(evaluating)
result, operand = self.conversions(result, operand)
# Use the fact that a < b <-> b > a
# Use the fact that a < b <-> !(a >= b)
if tok.type == '>' or tok.type == '<=':
result, operand = operand, result
result = sint(1 if (result < operand) == (tok.type in ('<', '>'))
else 0)
def equality_expression(self, evaluating):
"""Non-terminal: equality_expression
equality_expression:
relational_expression
| equality_expression '==' relational_expression
| equality_expression '!=' relational_expression
"""
result = self.relational_expression(evaluating)
while True:
tok = self.token
if not self.eat('==', '!='):
return result
operand = self.relational_expression(evaluating)
result, operand = self.conversions(result, operand)
result = sint(1 if (result == operand) == (tok.type == '==')
else 0)
def bitwise_and_expression(self, evaluating):
"""Non-terminal: bitwise_and_expression
bitwise_and_expression:
equality_expression
| bitwise_and_expression '&' equality_expression
"""
result = self.equality_expression(evaluating)
while True:
if not self.eat('&'):
return result
operand = self.equality_expression(evaluating)
result, operand = self.conversions(result, operand)
result = self.conv[type(result)](result & operand)
def bitwise_xor_expression(self, evaluating):
"""Non-terminal: bitwise_xor_expression
bitwise_xor_expression:
bitwise_and_expression
| bitwise_xor_expression '^' bitwise_and_expression
"""
result = self.bitwise_and_expression(evaluating)
while True:
if not self.eat('^'):
return result
operand = self.bitwise_and_expression(evaluating)
result, operand = self.conversions(result, operand)
result = self.conv[type(result)](result ^ operand)
def bitwise_or_expression(self, evaluating):
"""Non-terminal: bitwise_or_expression
bitwise_or_expression:
bitwise_xor_expression
| bitwise_or_expression '|' bitwise_xor_expression
"""
result = self.bitwise_xor_expression(evaluating)
while True:
if not self.eat('|'):
return result
operand = self.bitwise_xor_expression(evaluating)
result, operand = self.conversions(result, operand)
result = self.conv[type(result)](result | operand)
def logical_and_expression(self, evaluating):
"""Non-terminal: logical_and_expression
logical_and_expression:
bitwise_or_expression
| logical_and_expression '&&' bitwise_or_expression
"""
result = self.bitwise_or_expression(evaluating)
while True:
if not self.eat('&&'):
return result
evaluating = evaluating and not not result
operand = self.bitwise_or_expression(evaluating)
result = sint(1 if result and (not evaluating or operand) else 0)
def logical_or_expression(self, evaluating):
"""Non-terminal: logical_or_expression
logical_or_expression:
logical_and_expression
| logical_or_expression '||' logical_and_expression
"""
result = self.logical_and_expression(evaluating)
while True:
if not self.eat('||'):
return result
evaluating = evaluating and not result
operand = self.logical_and_expression(evaluating)
result = sint(1 if result or (evaluating and operand) else 0)
def conditional_expression(self, evaluating):
"""Non-terminal: conditional_expression.
conditional_expression:
logical_or_expression
| logical_or_expression '?' expression ':' conditional_expression
"""
result = self.logical_or_expression(evaluating)
if self.eat('?'):
if result:
result = self.expression(evaluating)
self.expect(':')
operand = self.conditional_expression(False)
else:
operand = self.expression(False)
self.expect(':')
result = self.conditional_expression(evaluating)
result, operand = self.conversions(result, operand)
return result
def expression(self, evaluating = True):
"""Non-terminal: expression.
expression:
conditional_expression (always)
| expression conditional_expression (if not evaluating)
"""
if evaluating:
return self.conditional_expression(evaluating)
while True:
result = self.conditional_expression(evaluating)
if not self.eat(','):
return result
def evaluate(self):
result = self.expression(True)
# Did we eat all tokens?
self.expect('END')
return result
class Preproc(preprocessor.Preprocessor):
def __init__(self, input, defines=(), sysincpaths=(), incpaths=()):
super(Preproc, self).__init__()
self.auto_pragma_once_enabled = False
for define in defines:
self.define('%s %s' % define)
for v in sysincpaths:
self.add_path(v)
for v in incpaths:
self.add_path(v)
self.ignore = set()
self.parser = self.parsegen(input, '<stdin>', '<stdin>')
def get(self):
try:
import StringIO
except ImportError:
import io as StringIO
ret = StringIO.StringIO()
self.write(ret)
return (ret.getvalue(), self.macros)
def on_include_not_found(self, is_system_include, curdir, includepath):
"""Don't pass through the #include line if the file does not exist"""
self.on_error(self.lastdirective.source, self.lastdirective.lineno,
"Include file not found: %s" % includepath)
def evalexpr(self, tokens):
"""Evaluate a sequence of tokens as an expression.
The original uses eval(), which is unsafe for web usage. This one uses
our own recursive-descendent parser.
"""
# ****************************************************
# Start of fragment copied from PCPP's preprocessor.py
"""Evaluate an expression token sequence for the purposes of evaluating
integral expressions."""
if not tokens:
self.on_error('unknown', 0, "Empty expression")
return (0, None)
# tokens = tokenize(line)
# Search for defined macros
evalfuncts = {'defined' : lambda x: True}
evalvars = {}
def replace_defined(tokens):
i = 0
while i < len(tokens):
if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
j = i + 1
needparen = False
result = "0L"
while j < len(tokens):
if tokens[j].type in self.t_WS:
j += 1
continue
elif tokens[j].type == self.t_ID:
if tokens[j].value in self.macros:
result = "1L"
else:
repl = self.on_unknown_macro_in_defined_expr(tokens[j])
if repl is None:
# Add this identifier to a dictionary of variables
evalvars[tokens[j].value] = 0
result = 'defined('+tokens[j].value+')'
else:
result = "1L" if repl else "0L"
if not needparen: break
elif tokens[j].value == '(':
needparen = True
elif tokens[j].value == ')':
break
else:
self.on_error(tokens[i].source,tokens[i].lineno,"Malformed defined()")
j += 1
if result.startswith('defined'):
tokens[i].type = self.t_ID
tokens[i].value = result
else:
tokens[i].type = self.t_INTEGER
tokens[i].value = self.t_INTEGER_TYPE(result)
del tokens[i+1:j+1]
i += 1
return tokens
# Replace any defined(macro) before macro expansion
tokens = replace_defined(tokens)
tokens = self.expand_macros(tokens)
# Replace any defined(macro) after macro expansion
tokens = replace_defined(tokens)
if not tokens:
return (0, None)
for i,t in enumerate(tokens):
if t.type == self.t_ID:
repl = self.on_unknown_macro_in_expr(copy.copy(t))
if repl is None:
# Add this identifier to a dictionary of variables
evalvars[t.value] = 0
else:
tokens[i] = t = repl
# End of fragment copied from PCPP's preprocessor.py
# **************************************************
del evalfuncts # we don't use this
evaluator = Evaluator(tokens)
try:
result = evaluator.evaluate()
except EvalError as e:
self.on_error(evaluator.token.source, evaluator.token.lineno,
e.message)
return (0, None)
del evaluator
return (result, tokens) if evalvars else (result, None)

1
pcpp Submodule

@ -0,0 +1 @@
Subproject commit e1219ce157b4dfcfee3181faa6ec5129c3a41e78