Rewrite ReportError() and change EParse to report columns in chars.

ReportError() needed to account for terminal encodings that don't support the characters being printed. It was also reporting an inaccurate column number and its corresponding marker position, because the count was in bytes, not in characters, so that has been fixed.

Now EParse.__init__() calls a new function GetErrLineCol() that calculates the line and column corresponding to an error position.

The algorithm for finding the start of the line has also been changed in both ReportError() and EParse.__init__(); as a result, function fieldpos() has been removed.

The exception's lno and cno fields have been changed to be 1-based, rather than 0-based.

Thanks to @Jomik for the report. Fixes #5.
This commit is contained in:
Sei Lisa 2017-10-02 00:40:59 +02:00
parent 08c69eee0f
commit c544b51e37
3 changed files with 35 additions and 24 deletions

View file

@ -44,26 +44,20 @@ def isalphanum_(c):
def ishex(c): def ishex(c):
return '0' <= c <= '9' or 'A' <= c <= 'F' or 'a' <= c <= 'f' return '0' <= c <= '9' or 'A' <= c <= 'F' or 'a' <= c <= 'f'
def fieldpos(inp, sep, n): def GetErrLineCol(parser):
"""Return the starting position of field n in a string inp that has zero or errorpos = parser.errorpos
more fields separated by sep lno = parser.script.count('\n', 0, errorpos)
""" lstart = parser.script.rfind('\n', 0, errorpos) + 1
i = -1 # Find column number in characters
for n in xrange(n): cno = len(parser.script[lstart:errorpos].decode('utf8'))
i = inp.find(sep, i + 1) return (lno + 1, cno + 1)
if i < 0:
return i
return i + 1
class EParse(Exception): class EParse(Exception):
def __init__(self, parser, msg): def __init__(self, parser, msg):
self.errorpos = parser.errorpos self.errorpos = parser.errorpos
self.lno = parser.script.count('\n', 0, self.errorpos) self.lno, self.cno = GetErrLineCol(parser)
self.cno = self.errorpos - fieldpos(parser.script, '\n', self.lno)
# Note the column number reported is in bytes.
msg = u"(Line %d char %d): ERROR: %s" % (self.lno + 1, self.cno + 1, msg) msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
super(EParse, self).__init__(msg) super(EParse, self).__init__(msg)
class EParseUEOF(EParse): class EParseUEOF(EParse):

30
main.py
View file

@ -19,7 +19,7 @@
# This is the main executable program that imports the libraries. # This is the main executable program that imports the libraries.
from lslopt.lslparse import parser,EParse,fieldpos from lslopt.lslparse import parser,EParse
from lslopt.lsloutput import outscript from lslopt.lsloutput import outscript
from lslopt.lsloptimizer import optimizer from lslopt.lsloptimizer import optimizer
import sys, os, getopt, re import sys, os, getopt, re
@ -30,11 +30,29 @@ VERSION = '0.2.1beta'
def ReportError(script, e): def ReportError(script, e):
lastpos = fieldpos(script, '\n', e.lno+1)-1 linestart = script.rfind(b'\n', 0, e.errorpos) + 1
assert lastpos != -1 lineend = script.find(b'\n', e.errorpos)
if lastpos < -1: lastpos = len(script) # may hit EOF if lineend == -1: lineend = len(script) # may hit EOF
sys.stderr.write(script[fieldpos(script, '\n', e.lno):lastpos].decode('utf8') + u"\n")
sys.stderr.write(u" " * e.cno + u"^\n") # When the encoding of stderr is unknown (e.g. when redirected to a file),
# output will be encoded in UTF-8; otherwise the terminal's encoding will
# be used.
enc = sys.stderr.encoding if sys.stderr.encoding is not None else 'utf8'
# Synchronize the UTF-8 encoded line with the output line in the
# terminal's encoding. We need to compensate for the fact that the
# reported column applies to the UTF-8 version of the script.
# 1. Trim the UTF-8 line.
err_frag = script[linestart:e.errorpos]
# 2. Convert to Unicode; encode in the target encoding with replacing.
err_frag = err_frag.decode('utf8').encode(enc, 'backslashreplace')
# 3. Collect our prize: the length of that in characters.
cno = len(err_frag.decode(enc))
# Write the whole line in the target encoding.
err_line = script[linestart:lineend] + b'\n'
sys.stderr.write(err_line.decode('utf8').encode(enc, 'backslashreplace'))
sys.stderr.write(u" " * cno + u"^\n")
sys.stderr.write(e.args[0] + u"\n") sys.stderr.write(e.args[0] + u"\n")
class UniConvScript(object): class UniConvScript(object):

View file

@ -21,7 +21,7 @@
from lslopt.lslparse import parser,EParseSyntax,EParseUEOF,EParseAlreadyDefined,\ from lslopt.lslparse import parser,EParseSyntax,EParseUEOF,EParseAlreadyDefined,\
EParseUndefined,EParseTypeMismatch,EParseReturnShouldBeEmpty,EParseReturnIsEmpty,\ EParseUndefined,EParseTypeMismatch,EParseReturnShouldBeEmpty,EParseReturnIsEmpty,\
EParseInvalidField,EParseFunctionMismatch,EParseDeclarationScope,\ EParseInvalidField,EParseFunctionMismatch,EParseDeclarationScope,\
EParseDuplicateLabel,EParseCantChangeState,EParseCodePathWithoutRet,fieldpos EParseDuplicateLabel,EParseCantChangeState,EParseCodePathWithoutRet
from lslopt.lsloutput import outscript from lslopt.lsloutput import outscript
from lslopt.lsloptimizer import optimizer from lslopt.lsloptimizer import optimizer
from lslopt import lslfuncs from lslopt import lslfuncs
@ -217,7 +217,6 @@ class Test02_Parser(UnitTestCase):
)) ))
print self.parser.scopeindex print self.parser.scopeindex
self.assertEqual(fieldpos("a,b", ",", 3), -1)
self.assertEqual(self.outscript.Value2LSL(lslfuncs.Key(u'')), '((key)"")') self.assertEqual(self.outscript.Value2LSL(lslfuncs.Key(u'')), '((key)"")')
self.assertRaises(AssertionError, self.outscript.Value2LSL, '') self.assertRaises(AssertionError, self.outscript.Value2LSL, '')
@ -528,7 +527,7 @@ class Test03_Optimizer(UnitTestCase):
self.assertFalse(True) self.assertFalse(True)
except EParseSyntax as e: except EParseSyntax as e:
# should err before first closing brace # should err before first closing brace
self.assertEqual(e.cno, 27) self.assertEqual(e.cno, 28)
except: except:
# should raise no other exception # should raise no other exception
self.assertFalse(True) self.assertFalse(True)