Try to deal with non latin encoding

pull/87/head
Andrew Krasichkov 2018-09-28 10:55:31 +03:00
parent 84f79a3e04
commit c0dd214e61
9 changed files with 107 additions and 10 deletions

17
gixy/formatters/_jinja.py Normal file
View File

@ -0,0 +1,17 @@
from __future__ import absolute_import
from jinja2 import Environment, PackageLoader
from gixy.utils.text import to_text
def load_template(name):
env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True)
env.filters['to_text'] = to_text_filter
return env.get_template(name)
def to_text_filter(text):
try:
return text.encode('latin1').decode('utf-8')
except UnicodeEncodeError:
return to_text(text)

View File

@ -1,14 +1,13 @@
from __future__ import absolute_import
from jinja2 import Environment, PackageLoader
from gixy.formatters.base import BaseFormatter
from gixy.formatters._jinja import load_template
class ConsoleFormatter(BaseFormatter):
def __init__(self):
super(ConsoleFormatter, self).__init__()
env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True)
self.template = env.get_template('console.j2')
self.template = load_template('console.j2')
def format_reports(self, reports, stats):
return self.template.render(reports=reports, stats=stats)

View File

@ -22,7 +22,7 @@ Additional info: {{ issue.help_url }}
Reason: {{ issue.reason }}
{% endif %}
Pseudo config:
{{ issue.config }}
{{ issue.config | to_text }}
{% if not loop.last %}
------------------------------------------------

View File

@ -22,7 +22,7 @@ Additional info: {{ issue.help_url }}
Reason: {{ issue.reason }}
{% endif %}
Pseudo config:
{{ issue.config }}
{{ issue.config | to_text }}
{% if not loop.last %}
------------------------------------------------

View File

@ -1,14 +1,13 @@
from __future__ import absolute_import
from jinja2 import Environment, PackageLoader
from gixy.formatters.base import BaseFormatter
from gixy.formatters._jinja import load_template
class TextFormatter(BaseFormatter):
def __init__(self):
super(TextFormatter, self).__init__()
env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True)
self.template = env.get_template('text.j2')
self.template = load_template('text.j2')
def format_reports(self, reports, stats):
return self.template.render(reports=reports, stats=stats)

View File

@ -7,6 +7,7 @@ from pyparsing import ParseException
from gixy.core.exceptions import InvalidConfiguration
from gixy.parser import raw_parser
from gixy.directives import block, directive
from gixy.utils.text import to_native
LOG = logging.getLogger(__name__)
@ -69,14 +70,14 @@ class NginxParser(object):
return None
if klass.is_block:
args = [str(v).strip() for v in parsed_args[0]]
args = [to_native(v).strip() for v in parsed_args[0]]
children = parsed_args[1]
inst = klass(parsed_name, args)
self.parse_block(children, inst)
return inst
else:
args = [str(v).strip() for v in parsed_args]
args = [to_native(v).strip() for v in parsed_args]
return klass(parsed_name, args)
def _get_directive_class(self, parsed_type, parsed_name):

0
gixy/utils/__init__.py Normal file
View File

72
gixy/utils/text.py Normal file
View File

@ -0,0 +1,72 @@
from __future__ import absolute_import
from six import PY3, text_type, binary_type
def to_bytes(obj, encoding='latin1', errors='strict', nonstring='replace'):
if isinstance(obj, binary_type):
return obj
if isinstance(obj, text_type):
try:
# Try this first as it's the fastest
return obj.encode(encoding, errors)
except UnicodeEncodeError:
return b'failed_to_encode'
if nonstring == 'simplerepr':
try:
value = str(obj)
except UnicodeError:
try:
value = repr(obj)
except UnicodeError:
# Giving up
return b'failed_to_encode'
elif nonstring == 'passthru':
return obj
elif nonstring == 'replace':
return b'failed_to_encode'
elif nonstring == 'strict':
raise TypeError('obj must be a string type')
else:
raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)
return to_bytes(value, encoding, errors)
def to_text(obj, encoding='latin1', errors='strict', nonstring='replace'):
if isinstance(obj, text_type):
return obj
if isinstance(obj, binary_type):
try:
return obj.decode(encoding, errors)
except UnicodeEncodeError:
return u'failed_to_encode'
if nonstring == 'simplerepr':
try:
value = str(obj)
except UnicodeError:
try:
value = repr(obj)
except UnicodeError:
# Giving up
return u'failed_to_encode'
elif nonstring == 'passthru':
return obj
elif nonstring == 'replace':
return u'failed_to_encode'
elif nonstring == 'strict':
raise TypeError('obj must be a string type')
else:
raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)
return to_text(value, encoding, errors)
if PY3:
to_native = to_text
else:
to_native = to_bytes

View File

@ -99,6 +99,15 @@ server {
assert_equal(listen.args, ['80'])
def test_encoding():
configs = [
'bar "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";'
]
for i, config in enumerate(configs):
_parse(config)
def assert_config(config, expected):
tree = _parse(config)
assert_is_instance(tree, Directive)