minimize influence of implicit conversions errors (between unicode, bytes and str), provide new universal helper `uni_string`, which uses safe explicit conversion to string (also if default encoding is ascii); avoid conversion errors on wrong-chars by replace tags.

pull/2171/head
sebres 2018-07-04 16:51:18 +02:00
parent 227550684a
commit 85fd1854bc
2 changed files with 65 additions and 46 deletions

View File

@ -49,11 +49,18 @@ if sys.version_info < (3,): # python >= 2.6
def __resetDefaultEncoding(encoding): def __resetDefaultEncoding(encoding):
global PREFER_ENC global PREFER_ENC
ode = sys.getdefaultencoding().upper() ode = sys.getdefaultencoding().upper()
if ode == 'ASCII' or ode != PREFER_ENC.upper(): if ode == 'ASCII' and ode != PREFER_ENC.upper():
# setdefaultencoding is normally deleted after site initialized, so hack-in using load of sys-module: # setdefaultencoding is normally deleted after site initialized, so hack-in using load of sys-module:
from imp import load_dynamic as __ldm _sys = sys
_sys = __ldm('_sys', 'sys') if not hasattr(_sys, "setdefaultencoding"):
_sys.setdefaultencoding(encoding) try:
from imp import load_dynamic as __ldm
_sys = __ldm('_sys', 'sys')
except ImportError: # pragma: no cover (only if load_dynamic fails)
reload(sys)
_sys = sys
if hasattr(_sys, "setdefaultencoding"):
_sys.setdefaultencoding(encoding)
# override to PREFER_ENC: # override to PREFER_ENC:
__resetDefaultEncoding(PREFER_ENC) __resetDefaultEncoding(PREFER_ENC)
del __resetDefaultEncoding del __resetDefaultEncoding
@ -61,11 +68,58 @@ if sys.version_info < (3,): # python >= 2.6
# todo: rewrite explicit (and implicit) str-conversions via encode/decode with IO-encoding (sys.stdout.encoding), # todo: rewrite explicit (and implicit) str-conversions via encode/decode with IO-encoding (sys.stdout.encoding),
# e. g. inside tags-replacement by command-actions, etc. # e. g. inside tags-replacement by command-actions, etc.
#
# Following "uni_decode", "uni_string" functions unified python independent any
# to string converting.
#
# Typical example resp. work-case for understanding the coding/decoding issues:
#
# [isinstance('', str), isinstance(b'', str), isinstance(u'', str)]
# [True, True, False]; # -- python2
# [True, False, True]; # -- python3
#
if sys.version_info >= (3,):
def uni_decode(x, enc=PREFER_ENC, errors='strict'):
try:
if isinstance(x, bytes):
return x.decode(enc, errors)
return x
except (UnicodeDecodeError, UnicodeEncodeError): # pragma: no cover - unsure if reachable
if errors != 'strict':
raise
return uni_decode(x, enc, 'replace')
def uni_string(x):
if not isinstance(x, bytes):
return str(x)
return uni_decode(x)
else:
def uni_decode(x, enc=PREFER_ENC, errors='strict'):
try:
if isinstance(x, unicode):
return x.encode(enc, errors)
return x
except (UnicodeDecodeError, UnicodeEncodeError): # pragma: no cover - unsure if reachable
if errors != 'strict':
raise
return uni_decode(x, enc, 'replace')
if sys.getdefaultencoding().upper() != 'UTF-8':
def uni_string(x):
if not isinstance(x, unicode):
return str(x)
return uni_decode(x)
else:
uni_string = str
def _as_bool(val):
return bool(val) if not isinstance(val, basestring) \
else val.lower() in ('1', 'on', 'true', 'yes')
def formatExceptionInfo(): def formatExceptionInfo():
""" Consistently format exception information """ """ Consistently format exception information """
cla, exc = sys.exc_info()[:2] cla, exc = sys.exc_info()[:2]
return (cla.__name__, str(exc)) return (cla.__name__, uni_string(exc))
# #
@ -235,41 +289,6 @@ else:
r.update(y) r.update(y)
return r return r
#
# Following "uni_decode" function unified python independent any to string converting
#
# Typical example resp. work-case for understanding the coding/decoding issues:
#
# [isinstance('', str), isinstance(b'', str), isinstance(u'', str)]
# [True, True, False]; # -- python2
# [True, False, True]; # -- python3
#
if sys.version_info >= (3,):
def uni_decode(x, enc=PREFER_ENC, errors='strict'):
try:
if isinstance(x, bytes):
return x.decode(enc, errors)
return x
except (UnicodeDecodeError, UnicodeEncodeError): # pragma: no cover - unsure if reachable
if errors != 'strict':
raise
return uni_decode(x, enc, 'replace')
else:
def uni_decode(x, enc=PREFER_ENC, errors='strict'):
try:
if isinstance(x, unicode):
return x.encode(enc, errors)
return x
except (UnicodeDecodeError, UnicodeEncodeError): # pragma: no cover - unsure if reachable
if errors != 'strict':
raise
return uni_decode(x, enc, 'replace')
def _as_bool(val):
return bool(val) if not isinstance(val, basestring) \
else val.lower() in ('1', 'on', 'true', 'yes')
# #
# Following function used for parse options from parameter (e.g. `name[p1=0, p2="..."][p3='...']`). # Following function used for parse options from parameter (e.g. `name[p1=0, p2="..."][p3='...']`).
# #
@ -347,7 +366,7 @@ def substituteRecursiveTags(inptags, conditional='',
if tag in ignore or tag in done: continue if tag in ignore or tag in done: continue
# ignore replacing callable items from calling map - should be converted on demand only (by get): # ignore replacing callable items from calling map - should be converted on demand only (by get):
if noRecRepl and callable(tags.getRawItem(tag)): continue if noRecRepl and callable(tags.getRawItem(tag)): continue
value = orgval = str(tags[tag]) value = orgval = uni_string(tags[tag])
# search and replace all tags within value, that can be interpolated using other tags: # search and replace all tags within value, that can be interpolated using other tags:
m = tre_search(value) m = tre_search(value)
refCounts = {} refCounts = {}
@ -382,7 +401,7 @@ def substituteRecursiveTags(inptags, conditional='',
m = tre_search(value, m.end()) m = tre_search(value, m.end())
continue continue
# if calling map - be sure we've string: # if calling map - be sure we've string:
if noRecRepl: repl = str(repl) if noRecRepl: repl = uni_string(repl)
value = value.replace('<%s>' % rtag, repl) value = value.replace('<%s>' % rtag, repl)
#logSys.log(5, 'value now: %s' % value) #logSys.log(5, 'value now: %s' % value)
# increment reference count: # increment reference count:

View File

@ -36,7 +36,7 @@ from .failregex import mapTag2Opt
from .ipdns import asip, DNSUtils from .ipdns import asip, DNSUtils
from .mytime import MyTime from .mytime import MyTime
from .utils import Utils from .utils import Utils
from ..helpers import getLogger, _merge_copy_dicts, substituteRecursiveTags, TAG_CRE, MAX_TAG_REPLACE_COUNT from ..helpers import getLogger, _merge_copy_dicts, uni_string, substituteRecursiveTags, TAG_CRE, MAX_TAG_REPLACE_COUNT
# Gets the instance of the logger. # Gets the instance of the logger.
logSys = getLogger(__name__) logSys = getLogger(__name__)
@ -608,7 +608,7 @@ class CommandAction(ActionBase):
if value is None: if value is None:
# fallback (no or default replacement) # fallback (no or default replacement)
return ADD_REPL_TAGS_CM.get(tag, m.group()) return ADD_REPL_TAGS_CM.get(tag, m.group())
value = str(value) # assure string value = uni_string(value) # assure string
if tag in cls._escapedTags: if tag in cls._escapedTags:
# That one needs to be escaped since its content is # That one needs to be escaped since its content is
# out of our control # out of our control
@ -687,7 +687,7 @@ class CommandAction(ActionBase):
except KeyError: except KeyError:
# fallback (no or default replacement) # fallback (no or default replacement)
return ADD_REPL_TAGS_CM.get(tag, m.group()) return ADD_REPL_TAGS_CM.get(tag, m.group())
value = str(value) # assure string value = uni_string(value) # assure string
# replacement for tag: # replacement for tag:
return escapeVal(tag, value) return escapeVal(tag, value)
@ -701,7 +701,7 @@ class CommandAction(ActionBase):
def substTag(m): def substTag(m):
tag = mapTag2Opt(m.groups()[0]) tag = mapTag2Opt(m.groups()[0])
try: try:
value = str(tickData[tag]) value = uni_string(tickData[tag])
except KeyError: except KeyError:
return "" return ""
return escapeVal("F_"+tag, value) return escapeVal("F_"+tag, value)