Rewrite ReportError() and change EParse to report columns in chars.

ReportError() needed to account for terminal encodings that don't support the characters being printed. It was also reporting an inaccurate column number and its corresponding marker position, because the count was in bytes, not in characters, so that has been fixed. Now EParse.__init__() calls a new function GetErrLineCol() that calculates the line and column corresponding to an error position. The algorithm for finding the start of the line has also been changed in both ReportError() and EParse.__init__(); as a result, function fieldpos() has been removed. The exception's lno and cno fields have been changed to be 1-based, rather than 0-based. Thanks to @Jomik for the report. Fixes #5.
2025-07-01 07:38:21 +00:00 · 2017-10-02 00:40:59 +02:00 · 2017-10-02 00:40:59 +02:00 · c544b51e37
commit c544b51e37
parent 08c69eee0f
3 changed files with 35 additions and 24 deletions
--- a/lslopt/lslparse.py
+++ b/lslopt/lslparse.py
@ -44,26 +44,20 @@ def isalphanum_(c):
 def ishex(c):
    return '0' <= c <= '9' or 'A' <= c <= 'F' or 'a' <= c <= 'f'

-def fieldpos(inp, sep, n):
-    """Return the starting position of field n in a string inp that has zero or
-    more fields separated by sep
-    """
-    i = -1
-    for n in xrange(n):
-        i = inp.find(sep, i + 1)
-        if i < 0:
-            return i
-    return i + 1
+def GetErrLineCol(parser):
+    errorpos = parser.errorpos
+    lno = parser.script.count('\n', 0, errorpos)
+    lstart = parser.script.rfind('\n', 0, errorpos) + 1
+    # Find column number in characters
+    cno = len(parser.script[lstart:errorpos].decode('utf8'))
+    return (lno + 1, cno + 1)

 class EParse(Exception):
-
    def __init__(self, parser, msg):
        self.errorpos = parser.errorpos
-        self.lno = parser.script.count('\n', 0, self.errorpos)
-        self.cno = self.errorpos - fieldpos(parser.script, '\n', self.lno)
-        # Note the column number reported is in bytes.
+        self.lno, self.cno = GetErrLineCol(parser)

-        msg = u"(Line %d char %d): ERROR: %s" % (self.lno + 1, self.cno + 1, msg)
+        msg = u"(Line %d char %d): ERROR: %s" % (self.lno, self.cno, msg)
        super(EParse, self).__init__(msg)

 class EParseUEOF(EParse):
--- a/main.py
+++ b/main.py
@ -19,7 +19,7 @@

 # This is the main executable program that imports the libraries.

-from lslopt.lslparse import parser,EParse,fieldpos
+from lslopt.lslparse import parser,EParse
 from lslopt.lsloutput import outscript
 from lslopt.lsloptimizer import optimizer
 import sys, os, getopt, re
@ -30,11 +30,29 @@ VERSION = '0.2.1beta'


 def ReportError(script, e):
-    lastpos = fieldpos(script, '\n', e.lno+1)-1
-    assert lastpos != -1
-    if lastpos < -1: lastpos = len(script) # may hit EOF
-    sys.stderr.write(script[fieldpos(script, '\n', e.lno):lastpos].decode('utf8') + u"\n")
-    sys.stderr.write(u" " * e.cno + u"^\n")
+    linestart = script.rfind(b'\n', 0, e.errorpos) + 1
+    lineend = script.find(b'\n', e.errorpos)
+    if lineend == -1: lineend = len(script) # may hit EOF
+
+    # When the encoding of stderr is unknown (e.g. when redirected to a file),
+    # output will be encoded in UTF-8; otherwise the terminal's encoding will
+    # be used.
+    enc = sys.stderr.encoding if sys.stderr.encoding is not None else 'utf8'
+
+    # Synchronize the UTF-8 encoded line with the output line in the
+    # terminal's encoding. We need to compensate for the fact that the
+    # reported column applies to the UTF-8 version of the script.
+    # 1. Trim the UTF-8 line.
+    err_frag = script[linestart:e.errorpos]
+    # 2. Convert to Unicode; encode in the target encoding with replacing.
+    err_frag = err_frag.decode('utf8').encode(enc, 'backslashreplace')
+    # 3. Collect our prize: the length of that in characters.
+    cno = len(err_frag.decode(enc))
+
+    # Write the whole line in the target encoding.
+    err_line = script[linestart:lineend] + b'\n'
+    sys.stderr.write(err_line.decode('utf8').encode(enc, 'backslashreplace'))
+    sys.stderr.write(u" " * cno + u"^\n")
    sys.stderr.write(e.args[0] + u"\n")

 class UniConvScript(object):
--- a/testparser.py
+++ b/testparser.py
@ -21,7 +21,7 @@
 from lslopt.lslparse import parser,EParseSyntax,EParseUEOF,EParseAlreadyDefined,\
    EParseUndefined,EParseTypeMismatch,EParseReturnShouldBeEmpty,EParseReturnIsEmpty,\
    EParseInvalidField,EParseFunctionMismatch,EParseDeclarationScope,\
-    EParseDuplicateLabel,EParseCantChangeState,EParseCodePathWithoutRet,fieldpos
+    EParseDuplicateLabel,EParseCantChangeState,EParseCodePathWithoutRet
 from lslopt.lsloutput import outscript
 from lslopt.lsloptimizer import optimizer
 from lslopt import lslfuncs
@ -217,7 +217,6 @@ class Test02_Parser(UnitTestCase):
            ))
        print self.parser.scopeindex

-        self.assertEqual(fieldpos("a,b", ",", 3), -1)
        self.assertEqual(self.outscript.Value2LSL(lslfuncs.Key(u'')), '((key)"")')
        self.assertRaises(AssertionError, self.outscript.Value2LSL, '')

@ -528,7 +527,7 @@ class Test03_Optimizer(UnitTestCase):
            self.assertFalse(True)
        except EParseSyntax as e:
            # should err before first closing brace
-            self.assertEqual(e.cno, 27)
+            self.assertEqual(e.cno, 28)
        except:
            # should raise no other exception
            self.assertFalse(True)