#!/usr/bin/env python2 # # (C) Copyright 2015-2024 Sei Lisa. All rights reserved. # # This file is part of LSL PyOptimizer. # # LSL PyOptimizer is free software: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # LSL PyOptimizer is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LSL PyOptimizer. If not, see . # # If you choose to use a later version of the GPL, please modify the text # in the Usage() function appropriately to indicate the correct version. # # Sei Lisa is the author's username in the Second Life(R) virtual world. # Second Life is a registered trademark of Linden Research, Inc. # This is the main executable program that imports the libraries. from lslopt.lslparse import parser,EParse from lslopt.lsloutput import outscript from lslopt.lsloptimizer import optimizer import sys, os, getopt, re import lslopt.lslcommon import lslopt.lslloadlib from strutil import * VERSION = '0.3.0beta' def ReportError(script, e): linestart = script.rfind('\n', 0, e.errorpos) + 1 lineend = script.find('\n', e.errorpos) if lineend == -1: lineend = len(script) # may hit EOF # When the encoding of stderr is unknown (e.g. when redirected to a file), # output will be encoded in UTF-8; otherwise the terminal's encoding will # be used. enc = getattr(sys.stderr, 'encoding', 'utf8') or 'utf8' # Synchronize the UTF-8 encoded line with the output line in the # terminal's encoding. We need to compensate for the fact that the # reported column applies to the UTF-8 version of the script. # 1. Trim the UTF-8 line. err_frag = script[linestart:e.errorpos] # 2. Convert to Unicode; encode in the target encoding with replacing. err_frag = str2u(err_frag, 'utf8').encode(enc, 'backslashreplace') # 3. Collect our prize: the length of that in characters. cno = len(err_frag.decode(enc)) # Write the whole line in the target encoding. err_line = script[linestart:lineend] + '\n' werr(err_line) werr(u" " * cno + u"^\n") werr(e.args[0] + u"\n") class UniConvScript(object): """Converts the script to Unicode, setting the properties required by EParse to report a meaningful error position. """ def __init__(self, script, options=(), filename=b'', emap=False): self.linedir = [] self.filename = filename self.emap = emap # We don't interpret #line here. In case of an encode error, # we're in the dark about which file it comes from. User needs # --preshow to view the #line directives and find the correspondence # themselves. #self.processpre = 'processpre' in options self.processpre = False self.script = script def to_unicode(self): if type(self.script) is not unicode: try: self.script = self.script.decode('utf8') except UnicodeDecodeError as e: # EParse requires str self.script = b2str(self.script, 'utf8') self.errorpos = e.start raise EParse(self, u"Invalid UTF-8 in script") return self.script def PreparePreproc(script): """LSL accepts multiline strings, but the preprocessor doesn't. Fix that by converting newlines to "\n". But in order to report accurate line and column numbers for text past that point, insert blank lines to fill the space previously occupied by the string, and spaces in the last line up to the point where the string was closed. That will place the next token in the same line and column it previously was. """ s = u'' nlines = 0 col = 0 # Trigraphs make our life really difficult. # We join lines that have \ or ??/ inside strings, # and we also replace regular inside strings with \n, counting how # many lines we join, to add them back at the end of the string in order to # keep the line count exact prior to preprocessing. We also preserve the # original column of the text after the string, by adding as many spaces as # necessary. # We could let the preprocessor do the line joining on backslash-newline, # but by eliminating all newlines, we have control over the output column # of the text that follows the string and can report an accurate column # and line position in case of error. # The REs skip as much as possible in one go every time, only stopping to # analyze critical tokens. # We don't follow the C convention that backslash-return is analyzed first. # In c, the string "a\\nb" is the same as "a\nb" which prints as # ab. But in LSL, forgetting about the preprocessor, the string # "a\\nb" is valid and stands for a\nb. The principle of # least surprise seems to suggest to accept valid LSL strings as LSL # instead of reproducing that C quirk. This also matches what FS is doing # currently, so it's good for compatibility. tok = re.compile(str2u( # Python 3.5 does not recognize ur'...' literals r'(?:' r'/(?:\?\?/\n|\\\n)*\*.*?\*(?:\?\?/\n|\\\n)*/' r'|/(?:\?\?/\n|\\\n)*/(?:\?\?/\n|\\\n|[^\n])*\n' r'|[^"]' r')+' r'|"' , 'utf8'), re.S) # RE used inside strings. tok2 = re.compile(str2u( r'(?:' r"\?\?[='()!<>-]" # valid trigraph except ??/ (backslash) r"|(?:\?\?/|\\)(?:\?\?[/='()!<>-]|[^\n])" # backslash trigraph or actual backslash, # followed by any trigraph or non-newline r'|(?!\?\?/\n|\\\n|"|\n).' # any character that doesn't start a trigraph/ # backslash escape followed by a newline # or is a newline or double quote, as we're # interested in all those individually. r')' # as many of those as possible r'|\?\?/\n|\\\n|\n|"' # or any of those individually , 'utf8')) pos = 0 match = tok.search(script, pos) while match: matched = match.group(0) pos += len(matched) if matched == u'"': s += matched nlines = col = 0 match2 = tok2.search(script, pos) while match2: matched2 = match2.group(0) pos += len(matched2) if matched2 == u'\\\n' or matched2 == u'??/\n': nlines += 1 col = 0 match2 = tok2.search(script, pos) continue if matched2 == u'"': if nlines: if script[pos:pos+1] == u'\n': col = -1 # don't add spaces if not necessary # col misses the quote added here, so add 1 s += u'"' + u'\n'*nlines + u' '*(col+1) else: s += u'"' break if matched2 == u'\n': nlines += 1 col = 0 s += u'\\n' else: col += len(matched2) s += matched2 match2 = tok2.search(script, pos) else: s += matched match = tok.search(script, pos) return s def ScriptHeader(script, avname): if avname: avname = ' - ' + avname return ('//start_unprocessed_text\n/*' # + re.sub(r'([*/])(?=[*|/])', r'\1|', script) # FS's algorithm # HACK: This won't break strings containing ** or /* or // like URLs, # while still being compatible with FS. + re.sub(r'([*/]\||\*(?=/))', r'\1|', script) + '*/\n//end_unprocessed_text\n//nfo_preprocessor_version 0\n' '//program_version LSL PyOptimizer v' + VERSION + avname + '\n//mono\n\n') def Usage(progname, about = None): if about is None: werr( u"""LSL optimizer v{version} (C) Copyright 2015-2024 Sei Lisa. All rights reserved. This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under certain conditions; see the file COPYING for details. This program is licensed under the GNU General Public License version 3. Usage: {progname} [-O|--optimizer-options=[+|-]