2024-04-14 02:40:21 -07:00
|
|
|
# (C) Copyright 2015-2024 Sei Lisa. All rights reserved.
|
2019-01-15 12:27:02 -07:00
|
|
|
#
|
|
|
|
# This file is part of LSL PyOptimizer.
|
|
|
|
#
|
|
|
|
# LSL PyOptimizer is free software: you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# LSL PyOptimizer is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
# String <-> Bytes conversion and output utilities
|
|
|
|
|
2019-01-31 03:49:31 -07:00
|
|
|
# Microsoft again not following standards. Sigh.
|
|
|
|
import codecs
|
|
|
|
codecs.register(lambda x: codecs.lookup('utf8') if x == 'cp65001' else None)
|
|
|
|
|
2019-01-15 12:27:02 -07:00
|
|
|
import sys
|
2022-12-11 12:39:44 -07:00
|
|
|
|
2023-02-03 13:48:33 -07:00
|
|
|
python2Narrow = False
|
2019-01-18 15:41:45 -07:00
|
|
|
if sys.version_info.major >= 3:
|
2019-01-15 12:27:02 -07:00
|
|
|
unicode = str
|
|
|
|
unichr = chr
|
2020-11-08 18:12:29 -07:00
|
|
|
xrange = range
|
|
|
|
python3 = True
|
|
|
|
python2 = False
|
2022-12-11 12:39:44 -07:00
|
|
|
uniwrap = unicode
|
2023-02-03 13:48:33 -07:00
|
|
|
bytewrap = bytes
|
2020-11-08 18:12:29 -07:00
|
|
|
|
2019-01-15 12:27:02 -07:00
|
|
|
def str2u(s, enc=None):
|
|
|
|
"""Convert a native Python3 str to Unicode. This is a NOP."""
|
|
|
|
return s
|
|
|
|
|
|
|
|
def str2b(s, enc=None):
|
|
|
|
"""Convert a native Python3 str to bytes, with the given encoding."""
|
2020-11-08 17:51:24 -07:00
|
|
|
return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
|
2019-01-15 12:27:02 -07:00
|
|
|
'backslashreplace')
|
|
|
|
|
|
|
|
def u2str(s, enc=None):
|
|
|
|
"""Convert a Unicode string to native Python 3 str. This is a NOP."""
|
|
|
|
return s
|
|
|
|
|
|
|
|
def b2str(s, enc=None):
|
|
|
|
"""Convert a Bytes string to native Python 3 str."""
|
|
|
|
return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
|
2019-01-18 12:33:02 -07:00
|
|
|
'replace')
|
2019-01-15 12:27:02 -07:00
|
|
|
|
2020-11-08 18:12:29 -07:00
|
|
|
def any2str(s, enc=None):
|
|
|
|
"""Convert Bytes or Unicode to native Python 3 str."""
|
|
|
|
return s if type(s) == str else b2str(s, enc)
|
|
|
|
|
2019-01-15 12:27:02 -07:00
|
|
|
else:
|
2020-11-08 18:12:29 -07:00
|
|
|
unicode = unicode
|
|
|
|
unichr = unichr
|
|
|
|
xrange = xrange
|
|
|
|
python2 = True
|
|
|
|
python3 = False
|
2022-12-11 12:39:44 -07:00
|
|
|
uniwrap = unicode
|
2023-02-03 13:48:33 -07:00
|
|
|
bytewrap = bytearray
|
2020-11-08 18:12:29 -07:00
|
|
|
|
2019-01-15 12:27:02 -07:00
|
|
|
def str2u(s, enc=None):
|
|
|
|
"""Convert a native Python2 str to Unicode."""
|
|
|
|
return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
|
2019-01-18 12:33:02 -07:00
|
|
|
'replace')
|
2019-01-15 12:27:02 -07:00
|
|
|
|
|
|
|
def str2b(s, enc=None):
|
|
|
|
"""Convert a native Python2 str to bytes. This is a NOP."""
|
|
|
|
return s
|
|
|
|
|
|
|
|
def u2str(s, enc=None):
|
|
|
|
"""Convert a Unicode string to native Python 2 str."""
|
2020-11-08 17:51:24 -07:00
|
|
|
return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
|
2019-01-15 12:27:02 -07:00
|
|
|
'backslashreplace')
|
|
|
|
|
|
|
|
def b2str(s, enc=None):
|
|
|
|
"""Convert a Bytes string to native Python 2 str. This is a NOP."""
|
|
|
|
return s
|
|
|
|
|
2020-11-08 18:12:29 -07:00
|
|
|
def any2str(s, enc=None):
|
|
|
|
"""Convert Bytes or Unicode to native Python 2 str."""
|
|
|
|
return s if type(s) == str else u2str(s, enc)
|
|
|
|
|
2022-12-11 12:39:44 -07:00
|
|
|
if len(u'\U00010001') == 2:
|
|
|
|
# Narrow character build (UTF-16 strings)
|
|
|
|
# Monkey-patch the relevant functions
|
|
|
|
python2Narrow = True
|
|
|
|
_unichr = unichr
|
|
|
|
_ord = ord
|
|
|
|
_len = len
|
|
|
|
|
|
|
|
def unichr(n):
|
|
|
|
if not (65536 <= n < 0x110000):
|
|
|
|
return _unichr(n)
|
|
|
|
return ('\\U%08X' % n).decode('unicode-escape')
|
|
|
|
|
|
|
|
def ord(x):
|
|
|
|
if isinstance(x, unicode) and _len(x) == 2:
|
|
|
|
x = unicode(x)
|
|
|
|
if 0xD800 <= _ord(x[0]) < 0xDC00:
|
|
|
|
return 65536 + ((_ord(x[0]) & 0x3FF) << 10
|
|
|
|
| (_ord(x[1]) & 0x3FF))
|
|
|
|
return _ord(x)
|
|
|
|
|
|
|
|
def len(x):
|
|
|
|
if isinstance(x, unicode):
|
|
|
|
return _len(x.encode('utf-32le')) >> 2
|
|
|
|
return _len(x)
|
|
|
|
|
|
|
|
# Alas, we can't monkey-patch the unicode class' __getitem__ and
|
|
|
|
# __getslice__ methods; we need a workaround.
|
|
|
|
class uniwrap(unicode):
|
|
|
|
def __getslice__(self, start, stop):
|
|
|
|
lim = sys.maxint >> 2
|
|
|
|
if start < 0: start = 0
|
|
|
|
if stop < 0: stop = 0
|
|
|
|
if start < lim:
|
|
|
|
start <<= 2
|
|
|
|
else:
|
|
|
|
start = sys.maxint
|
|
|
|
if stop < lim:
|
|
|
|
stop <<= 2
|
|
|
|
else:
|
|
|
|
stop = sys.maxint
|
|
|
|
return self.encode('utf-32le')[start:stop].decode(
|
|
|
|
'utf-32le')
|
|
|
|
def __getitem__(self, item):
|
|
|
|
if type(item) == slice:
|
|
|
|
start = item.start
|
|
|
|
stop = item.stop
|
|
|
|
step = item.step
|
|
|
|
if start is not None:
|
|
|
|
start <<= 2
|
|
|
|
if stop is not None:
|
|
|
|
stop <<= 2
|
|
|
|
if step is not None:
|
|
|
|
step <<= 2
|
|
|
|
return self.encode('utf-32le')[start:stop:step].decode(
|
|
|
|
'utf-32le')
|
|
|
|
u = self.encode('utf-32le')
|
|
|
|
item <<= 2
|
|
|
|
if item >= _len(u):
|
|
|
|
return u[item] # raise IndexError, as slicing doesn't
|
|
|
|
return u[item:(item+4 if item != -4 else None)].decode(
|
|
|
|
'utf-32le')
|
2020-11-08 18:12:29 -07:00
|
|
|
|
2019-01-15 12:27:02 -07:00
|
|
|
def b2u(s, enc=None):
|
|
|
|
"""Bytes to Unicode"""
|
|
|
|
return str2u(b2str(s, enc), enc)
|
|
|
|
|
|
|
|
def u2b(s, enc=None):
|
|
|
|
"""Unicode to Bytes"""
|
2020-11-08 17:51:24 -07:00
|
|
|
return str2b(u2str(s, enc), enc)
|
2019-01-15 12:27:02 -07:00
|
|
|
|
|
|
|
def any2b(s, enc=None):
|
|
|
|
"""Bytes or Unicode to Bytes"""
|
|
|
|
return s if type(s) == bytes else u2b(s, enc)
|
|
|
|
|
|
|
|
def any2u(s, enc=None):
|
|
|
|
"""Bytes or Unicode to Unicode"""
|
|
|
|
return s if type(s) == unicode else b2u(s, enc)
|
|
|
|
|
|
|
|
def werr(s):
|
|
|
|
"""Write any string to stderr"""
|
2020-11-08 18:12:29 -07:00
|
|
|
sys.stderr.write(any2str(s, sys.stderr))
|
2019-01-15 12:27:02 -07:00
|
|
|
|
|
|
|
def wout(s):
|
|
|
|
"""Write any string to stdout"""
|
2020-11-08 18:12:29 -07:00
|
|
|
sys.stdout.write(any2str(s, sys.stdout))
|
2019-01-18 15:41:45 -07:00
|
|
|
|
|
|
|
strutil_used = True
|