diff --git a/gixy/formatters/_jinja.py b/gixy/formatters/_jinja.py new file mode 100644 index 0000000..d93b853 --- /dev/null +++ b/gixy/formatters/_jinja.py @@ -0,0 +1,17 @@ +from __future__ import absolute_import +from jinja2 import Environment, PackageLoader + +from gixy.utils.text import to_text + + +def load_template(name): + env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True) + env.filters['to_text'] = to_text_filter + return env.get_template(name) + + +def to_text_filter(text): + try: + return text.encode('latin1').decode('utf-8') + except UnicodeEncodeError: + return to_text(text) diff --git a/gixy/formatters/console.py b/gixy/formatters/console.py index 18016c7..b99d28b 100644 --- a/gixy/formatters/console.py +++ b/gixy/formatters/console.py @@ -1,14 +1,13 @@ from __future__ import absolute_import -from jinja2 import Environment, PackageLoader from gixy.formatters.base import BaseFormatter +from gixy.formatters._jinja import load_template class ConsoleFormatter(BaseFormatter): def __init__(self): super(ConsoleFormatter, self).__init__() - env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True) - self.template = env.get_template('console.j2') + self.template = load_template('console.j2') def format_reports(self, reports, stats): return self.template.render(reports=reports, stats=stats) diff --git a/gixy/formatters/templates/console.j2 b/gixy/formatters/templates/console.j2 index 182d107..d1b73fe 100644 --- a/gixy/formatters/templates/console.j2 +++ b/gixy/formatters/templates/console.j2 @@ -22,7 +22,7 @@ Additional info: {{ issue.help_url }} Reason: {{ issue.reason }} {% endif %} Pseudo config: -{{ issue.config }} +{{ issue.config | to_text }} {% if not loop.last %} ------------------------------------------------ diff --git a/gixy/formatters/templates/text.j2 b/gixy/formatters/templates/text.j2 index 59b830d..39c6c71 100644 --- a/gixy/formatters/templates/text.j2 +++ b/gixy/formatters/templates/text.j2 @@ -22,7 +22,7 @@ Additional info: {{ issue.help_url }} Reason: {{ issue.reason }} {% endif %} Pseudo config: -{{ issue.config }} +{{ issue.config | to_text }} {% if not loop.last %} ------------------------------------------------ diff --git a/gixy/formatters/text.py b/gixy/formatters/text.py index 9520d05..4d9938f 100644 --- a/gixy/formatters/text.py +++ b/gixy/formatters/text.py @@ -1,14 +1,13 @@ from __future__ import absolute_import -from jinja2 import Environment, PackageLoader from gixy.formatters.base import BaseFormatter +from gixy.formatters._jinja import load_template class TextFormatter(BaseFormatter): def __init__(self): super(TextFormatter, self).__init__() - env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True) - self.template = env.get_template('text.j2') + self.template = load_template('text.j2') def format_reports(self, reports, stats): return self.template.render(reports=reports, stats=stats) diff --git a/gixy/parser/nginx_parser.py b/gixy/parser/nginx_parser.py index 38c3f8b..039b134 100644 --- a/gixy/parser/nginx_parser.py +++ b/gixy/parser/nginx_parser.py @@ -7,6 +7,7 @@ from pyparsing import ParseException from gixy.core.exceptions import InvalidConfiguration from gixy.parser import raw_parser from gixy.directives import block, directive +from gixy.utils.text import to_native LOG = logging.getLogger(__name__) @@ -69,14 +70,14 @@ class NginxParser(object): return None if klass.is_block: - args = [str(v).strip() for v in parsed_args[0]] + args = [to_native(v).strip() for v in parsed_args[0]] children = parsed_args[1] inst = klass(parsed_name, args) self.parse_block(children, inst) return inst else: - args = [str(v).strip() for v in parsed_args] + args = [to_native(v).strip() for v in parsed_args] return klass(parsed_name, args) def _get_directive_class(self, parsed_type, parsed_name): diff --git a/gixy/utils/__init__.py b/gixy/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gixy/utils/text.py b/gixy/utils/text.py new file mode 100644 index 0000000..2ed7d66 --- /dev/null +++ b/gixy/utils/text.py @@ -0,0 +1,72 @@ +from __future__ import absolute_import +from six import PY3, text_type, binary_type + + +def to_bytes(obj, encoding='latin1', errors='strict', nonstring='replace'): + if isinstance(obj, binary_type): + return obj + + if isinstance(obj, text_type): + try: + # Try this first as it's the fastest + return obj.encode(encoding, errors) + except UnicodeEncodeError: + return b'failed_to_encode' + + if nonstring == 'simplerepr': + try: + + value = str(obj) + except UnicodeError: + try: + value = repr(obj) + except UnicodeError: + # Giving up + return b'failed_to_encode' + elif nonstring == 'passthru': + return obj + elif nonstring == 'replace': + return b'failed_to_encode' + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) + + return to_bytes(value, encoding, errors) + + +def to_text(obj, encoding='latin1', errors='strict', nonstring='replace'): + if isinstance(obj, text_type): + return obj + + if isinstance(obj, binary_type): + try: + return obj.decode(encoding, errors) + except UnicodeEncodeError: + return u'failed_to_encode' + + if nonstring == 'simplerepr': + try: + value = str(obj) + except UnicodeError: + try: + value = repr(obj) + except UnicodeError: + # Giving up + return u'failed_to_encode' + elif nonstring == 'passthru': + return obj + elif nonstring == 'replace': + return u'failed_to_encode' + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) + + return to_text(value, encoding, errors) + + +if PY3: + to_native = to_text +else: + to_native = to_bytes diff --git a/tests/parser/test_nginx_parser.py b/tests/parser/test_nginx_parser.py index d82ffad..0f42e56 100644 --- a/tests/parser/test_nginx_parser.py +++ b/tests/parser/test_nginx_parser.py @@ -99,6 +99,15 @@ server { assert_equal(listen.args, ['80']) +def test_encoding(): + configs = [ + 'bar "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";' + ] + + for i, config in enumerate(configs): + _parse(config) + + def assert_config(config, expected): tree = _parse(config) assert_is_instance(tree, Directive)