Merge pull request #44 from yandex/nonstrict-encoding

Autodetect config encoding
pull/52/head
Andrew Krasichkov 2017-05-16 23:05:28 +03:00 committed by GitHub
commit 2ea357ea7b
3 changed files with 36 additions and 7 deletions

View File

@ -150,10 +150,10 @@ def main():
with Gixy(config=config) as yoda:
if path == '-':
with os.fdopen(sys.stdin.fileno(), 'r') as fdata:
with os.fdopen(sys.stdin.fileno(), 'rb') as fdata:
yoda.audit('<stdin>', fdata, is_stdin=True)
else:
with open(path, mode='r') as fdata:
with open(path, mode='rb') as fdata:
yoda.audit(path, fdata, is_stdin=False)
formatted = formatters()[config.output_format]().format(yoda)

View File

@ -1,4 +1,6 @@
import logging
import codecs
import six
from cached_property import cached_property
from pyparsing import (
@ -27,11 +29,19 @@ class RawParser(object):
"""
Returns the parsed tree.
"""
content = data.strip()
if isinstance(data, six.binary_type):
if data[:3] == codecs.BOM_UTF8:
encoding = 'utf-8-sig'
else:
encoding = 'latin1'
content = data.decode(encoding).strip()
else:
content = data.strip()
if not content:
return ParseResults()
return self.script.parseString(data, parseAll=True)
return self.script.parseString(content, parseAll=True)
@cached_property
def script(self):

View File

@ -1,7 +1,4 @@
from nose.tools import assert_equals
import mock
from six import StringIO
from six.moves import builtins
from gixy.parser.raw_parser import *
@ -527,6 +524,28 @@ def test_empty_config():
assert_config(config, expected)
def test_utfbom_decoding():
config = b'''\xef\xbb\xbf
add_header X-Test "Windows-1251";
'''
expected = [
['add_header', 'X-Test', 'Windows-1251']
]
assert_config(config, expected)
def test_national_comment_decoding():
config = b'''
# \xeb\xff-\xeb\xff-\xeb\xff = Lya-lya-lya
add_header X-Test "Windows-1251";
'''
actual = RawParser().parse(config)
assert_equals(len(actual.asList()), 2)
def assert_config(config, expected):
actual = RawParser().parse(config)
assert_equals(actual.asList(), expected)