mirror of https://github.com/yandex/gixy
185 lines
5.6 KiB
Python
185 lines
5.6 KiB
Python
import logging
|
|
import codecs
|
|
import six
|
|
from cached_property import cached_property
|
|
|
|
from pyparsing import (
|
|
Literal, Suppress, White, Word, alphanums, Forward, Group, Optional, Combine,
|
|
Keyword, OneOrMore, ZeroOrMore, Regex, QuotedString, nestedExpr, ParseResults)
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class NginxQuotedString(QuotedString):
|
|
def __init__(self, quoteChar):
|
|
super(NginxQuotedString, self).__init__(quoteChar, escChar='\\', multiline=True)
|
|
# Nginx parse quoted values in special manner:
|
|
# '^https?:\/\/yandex\.ru\/\00\'\"' -> ^https?:\/\/yandex\.ru\/\00'"
|
|
# TODO(buglloc): research and find another special characters!
|
|
|
|
self.escCharReplacePattern = '\\\\(\'|")'
|
|
|
|
|
|
class RawParser(object):
|
|
"""
|
|
A class that parses nginx configuration with pyparsing
|
|
"""
|
|
|
|
def parse(self, data):
|
|
"""
|
|
Returns the parsed tree.
|
|
"""
|
|
if isinstance(data, six.binary_type):
|
|
if data[:3] == codecs.BOM_UTF8:
|
|
encoding = 'utf-8-sig'
|
|
else:
|
|
encoding = 'latin1'
|
|
content = data.decode(encoding).strip()
|
|
else:
|
|
content = data.strip()
|
|
|
|
if not content:
|
|
return ParseResults()
|
|
|
|
return self.script.parseString(content, parseAll=True)
|
|
|
|
@cached_property
|
|
def script(self):
|
|
# constants
|
|
left_bracket = Suppress("{")
|
|
right_bracket = Suppress("}")
|
|
semicolon = Suppress(";")
|
|
space = White().suppress()
|
|
keyword = Word(alphanums + ".+-_/")
|
|
path = Word(alphanums + ".-_/")
|
|
variable = Word("$_-" + alphanums)
|
|
value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+')
|
|
value_sq = NginxQuotedString(quoteChar="'")
|
|
value_dq = NginxQuotedString(quoteChar='"')
|
|
value = (value_dq | value_sq | value_wq)
|
|
# modifier for location uri [ = | ~ | ~* | ^~ ]
|
|
location_modifier = (
|
|
Keyword("=") |
|
|
Keyword("~*") | Keyword("~") |
|
|
Keyword("^~"))
|
|
# modifier for if statement
|
|
if_modifier = Combine(Optional("!") + (
|
|
Keyword("=") |
|
|
Keyword("~*") | Keyword("~") |
|
|
(Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x")))))
|
|
# This ugly workaround needed to parse unquoted regex with nested parentheses
|
|
# so we capture all content between parentheses and then parse it :(
|
|
# TODO(buglloc): may be use something better?
|
|
condition_body = (
|
|
(if_modifier + Optional(space) + value) |
|
|
(variable + Optional(space + if_modifier + Optional(space) + value))
|
|
)
|
|
condition = Regex(r'\((?:[^()\n\r\\]|(?:\(.*\))|(?:\\.))+?\)')\
|
|
.setParseAction(lambda s, l, t: condition_body.parseString(t[0][1:-1]))
|
|
|
|
# rules
|
|
include = (
|
|
Keyword("include") +
|
|
space +
|
|
value +
|
|
semicolon
|
|
)("include")
|
|
|
|
directive = (
|
|
keyword +
|
|
ZeroOrMore(space + value) +
|
|
semicolon
|
|
)("directive")
|
|
|
|
file_delimiter = (
|
|
Suppress("# configuration file ") +
|
|
path +
|
|
Suppress(":")
|
|
)("file_delimiter")
|
|
|
|
comment = (
|
|
Regex(r"#.*")
|
|
)("comment").setParseAction(_fix_comment)
|
|
|
|
hash_value = Group(
|
|
value +
|
|
ZeroOrMore(space + value) +
|
|
semicolon
|
|
)("hash_value")
|
|
|
|
generic_block = Forward()
|
|
if_block = Forward()
|
|
location_block = Forward()
|
|
hash_block = Forward()
|
|
unparsed_block = Forward()
|
|
|
|
sub_block = OneOrMore(Group(if_block |
|
|
location_block |
|
|
hash_block |
|
|
generic_block |
|
|
include |
|
|
directive |
|
|
file_delimiter |
|
|
comment |
|
|
unparsed_block))
|
|
|
|
if_block << (
|
|
Keyword("if") +
|
|
Group(condition) +
|
|
Group(
|
|
left_bracket +
|
|
Optional(sub_block) +
|
|
right_bracket)
|
|
)("block")
|
|
|
|
location_block << (
|
|
Keyword("location") +
|
|
Group(
|
|
Optional(space + location_modifier) +
|
|
Optional(space) + value) +
|
|
Group(
|
|
left_bracket +
|
|
Optional(sub_block) +
|
|
right_bracket)
|
|
)("block")
|
|
|
|
hash_block << (
|
|
keyword +
|
|
Group(OneOrMore(space + value)) +
|
|
Group(
|
|
left_bracket +
|
|
Optional(OneOrMore(hash_value)) +
|
|
right_bracket)
|
|
)("block")
|
|
|
|
generic_block << (
|
|
keyword +
|
|
Group(ZeroOrMore(space + value)) +
|
|
Group(
|
|
left_bracket +
|
|
Optional(sub_block) +
|
|
right_bracket)
|
|
)("block")
|
|
|
|
unparsed_block << (
|
|
keyword +
|
|
Group(ZeroOrMore(space + value)) +
|
|
nestedExpr(opener="{", closer="}")
|
|
)("unparsed_block")
|
|
|
|
return sub_block
|
|
|
|
|
|
def _fix_comment(string, location, tokens):
|
|
"""
|
|
Returns "cleared" comment text
|
|
|
|
:param string: original parse string
|
|
:param location: location in the string where matching started
|
|
:param tokens: list of the matched tokens, packaged as a ParseResults_ object
|
|
:return: list of the cleared comment tokens
|
|
"""
|
|
|
|
comment = tokens[0][1:].strip()
|
|
return [comment]
|