LSL-PyOptimizer/strutil.py
2024-04-14 11:40:21 +02:00

176 lines
5.7 KiB
Python

# (C) Copyright 2015-2024 Sei Lisa. All rights reserved.
#
# This file is part of LSL PyOptimizer.
#
# LSL PyOptimizer is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# LSL PyOptimizer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
# String <-> Bytes conversion and output utilities
# Microsoft again not following standards. Sigh.
import codecs
codecs.register(lambda x: codecs.lookup('utf8') if x == 'cp65001' else None)
import sys
python2Narrow = False
if sys.version_info.major >= 3:
unicode = str
unichr = chr
xrange = range
python3 = True
python2 = False
uniwrap = unicode
bytewrap = bytes
def str2u(s, enc=None):
"""Convert a native Python3 str to Unicode. This is a NOP."""
return s
def str2b(s, enc=None):
"""Convert a native Python3 str to bytes, with the given encoding."""
return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
'backslashreplace')
def u2str(s, enc=None):
"""Convert a Unicode string to native Python 3 str. This is a NOP."""
return s
def b2str(s, enc=None):
"""Convert a Bytes string to native Python 3 str."""
return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
'replace')
def any2str(s, enc=None):
"""Convert Bytes or Unicode to native Python 3 str."""
return s if type(s) == str else b2str(s, enc)
else:
unicode = unicode
unichr = unichr
xrange = xrange
python2 = True
python3 = False
uniwrap = unicode
bytewrap = bytearray
def str2u(s, enc=None):
"""Convert a native Python2 str to Unicode."""
return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
'replace')
def str2b(s, enc=None):
"""Convert a native Python2 str to bytes. This is a NOP."""
return s
def u2str(s, enc=None):
"""Convert a Unicode string to native Python 2 str."""
return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
'backslashreplace')
def b2str(s, enc=None):
"""Convert a Bytes string to native Python 2 str. This is a NOP."""
return s
def any2str(s, enc=None):
"""Convert Bytes or Unicode to native Python 2 str."""
return s if type(s) == str else u2str(s, enc)
if len(u'\U00010001') == 2:
# Narrow character build (UTF-16 strings)
# Monkey-patch the relevant functions
python2Narrow = True
_unichr = unichr
_ord = ord
_len = len
def unichr(n):
if not (65536 <= n < 0x110000):
return _unichr(n)
return ('\\U%08X' % n).decode('unicode-escape')
def ord(x):
if isinstance(x, unicode) and _len(x) == 2:
x = unicode(x)
if 0xD800 <= _ord(x[0]) < 0xDC00:
return 65536 + ((_ord(x[0]) & 0x3FF) << 10
| (_ord(x[1]) & 0x3FF))
return _ord(x)
def len(x):
if isinstance(x, unicode):
return _len(x.encode('utf-32le')) >> 2
return _len(x)
# Alas, we can't monkey-patch the unicode class' __getitem__ and
# __getslice__ methods; we need a workaround.
class uniwrap(unicode):
def __getslice__(self, start, stop):
lim = sys.maxint >> 2
if start < 0: start = 0
if stop < 0: stop = 0
if start < lim:
start <<= 2
else:
start = sys.maxint
if stop < lim:
stop <<= 2
else:
stop = sys.maxint
return self.encode('utf-32le')[start:stop].decode(
'utf-32le')
def __getitem__(self, item):
if type(item) == slice:
start = item.start
stop = item.stop
step = item.step
if start is not None:
start <<= 2
if stop is not None:
stop <<= 2
if step is not None:
step <<= 2
return self.encode('utf-32le')[start:stop:step].decode(
'utf-32le')
u = self.encode('utf-32le')
item <<= 2
if item >= _len(u):
return u[item] # raise IndexError, as slicing doesn't
return u[item:(item+4 if item != -4 else None)].decode(
'utf-32le')
def b2u(s, enc=None):
"""Bytes to Unicode"""
return str2u(b2str(s, enc), enc)
def u2b(s, enc=None):
"""Unicode to Bytes"""
return str2b(u2str(s, enc), enc)
def any2b(s, enc=None):
"""Bytes or Unicode to Bytes"""
return s if type(s) == bytes else u2b(s, enc)
def any2u(s, enc=None):
"""Bytes or Unicode to Unicode"""
return s if type(s) == unicode else b2u(s, enc)
def werr(s):
"""Write any string to stderr"""
sys.stderr.write(any2str(s, sys.stderr))
def wout(s):
"""Write any string to stdout"""
sys.stdout.write(any2str(s, sys.stdout))
strutil_used = True