Rewrite ReportError() and change EParse to report columns in chars.

ReportError() needed to account for terminal encodings that don't support the characters being printed. It was also reporting an inaccurate column number and its corresponding marker position, because the count was in bytes, not in characters, so that has been fixed.

Now EParse.__init__() calls a new function GetErrLineCol() that calculates the line and column corresponding to an error position.

The algorithm for finding the start of the line has also been changed in both ReportError() and EParse.__init__(); as a result, function fieldpos() has been removed.

The exception's lno and cno fields have been changed to be 1-based, rather than 0-based.

Thanks to @Jomik for the report. Fixes #5.
This commit is contained in:
Sei Lisa 2017-10-02 00:40:59 +02:00
parent 08c69eee0f
commit c544b51e37
3 changed files with 35 additions and 24 deletions

View file

@ -44,26 +44,20 @@ def isalphanum_(c):
def ishex(c):
return '0' <= c <= '9' or 'A' <= c <= 'F' or 'a' <= c <= 'f'
def fieldpos(inp, sep, n):
"""Return the starting position of field n in a string inp that has zero or
more fields separated by sep
"""
i = -1
for n in xrange(n):
i = inp.find(sep, i + 1)
if i < 0:
return i
return i + 1
def GetErrLineCol(parser):
errorpos = parser.errorpos
lno = parser.script.count('\n', 0, errorpos)
lstart = parser.script.rfind('\n', 0, errorpos) + 1
# Find column number in characters
cno = len(parser.script[lstart:errorpos].decode('utf8'))
return (lno + 1, cno + 1)
class EParse(Exception):
def __init__(self, parser, msg):
self.errorpos = parser.errorpos
self.lno = parser.script.count('\n', 0, self.errorpos)
self.cno = self.errorpos - fieldpos(parser.script, '\n', self.lno)
# Note the column number reported is in bytes.
self.lno, self.cno = GetErrLineCol(parser)
msg = u"(Line %d char %d): ERROR: %s" % (self.lno + 1, self.cno + 1, msg)
msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
super(EParse, self).__init__(msg)
class EParseUEOF(EParse):

30
main.py
View file

@ -19,7 +19,7 @@
# This is the main executable program that imports the libraries.
from lslopt.lslparse import parser,EParse,fieldpos
from lslopt.lslparse import parser,EParse
from lslopt.lsloutput import outscript
from lslopt.lsloptimizer import optimizer
import sys, os, getopt, re
@ -30,11 +30,29 @@ VERSION = '0.2.1beta'
def ReportError(script, e):
lastpos = fieldpos(script, '\n', e.lno+1)-1
assert lastpos != -1
if lastpos < -1: lastpos = len(script) # may hit EOF
sys.stderr.write(script[fieldpos(script, '\n', e.lno):lastpos].decode('utf8') + u"\n")
sys.stderr.write(u" " * e.cno + u"^\n")
linestart = script.rfind(b'\n', 0, e.errorpos) + 1
lineend = script.find(b'\n', e.errorpos)
if lineend == -1: lineend = len(script) # may hit EOF
# When the encoding of stderr is unknown (e.g. when redirected to a file),
# output will be encoded in UTF-8; otherwise the terminal's encoding will
# be used.
enc = sys.stderr.encoding if sys.stderr.encoding is not None else 'utf8'
# Synchronize the UTF-8 encoded line with the output line in the
# terminal's encoding. We need to compensate for the fact that the
# reported column applies to the UTF-8 version of the script.
# 1. Trim the UTF-8 line.
err_frag = script[linestart:e.errorpos]
# 2. Convert to Unicode; encode in the target encoding with replacing.
err_frag = err_frag.decode('utf8').encode(enc, 'backslashreplace')
# 3. Collect our prize: the length of that in characters.
cno = len(err_frag.decode(enc))
# Write the whole line in the target encoding.
err_line = script[linestart:lineend] + b'\n'
sys.stderr.write(err_line.decode('utf8').encode(enc, 'backslashreplace'))
sys.stderr.write(u" " * cno + u"^\n")
sys.stderr.write(e.args[0] + u"\n")
class UniConvScript(object):

View file

@ -21,7 +21,7 @@
from lslopt.lslparse import parser,EParseSyntax,EParseUEOF,EParseAlreadyDefined,\
EParseUndefined,EParseTypeMismatch,EParseReturnShouldBeEmpty,EParseReturnIsEmpty,\
EParseInvalidField,EParseFunctionMismatch,EParseDeclarationScope,\
EParseDuplicateLabel,EParseCantChangeState,EParseCodePathWithoutRet,fieldpos
EParseDuplicateLabel,EParseCantChangeState,EParseCodePathWithoutRet
from lslopt.lsloutput import outscript
from lslopt.lsloptimizer import optimizer
from lslopt import lslfuncs
@ -217,7 +217,6 @@ class Test02_Parser(UnitTestCase):
))
print self.parser.scopeindex
self.assertEqual(fieldpos("a,b", ",", 3), -1)
self.assertEqual(self.outscript.Value2LSL(lslfuncs.Key(u'')), '((key)"")')
self.assertRaises(AssertionError, self.outscript.Value2LSL, '')
@ -528,7 +527,7 @@ class Test03_Optimizer(UnitTestCase):
self.assertFalse(True)
except EParseSyntax as e:
# should err before first closing brace
self.assertEqual(e.cno, 27)
self.assertEqual(e.cno, 28)
except:
# should raise no other exception
self.assertFalse(True)