# ~*~ coding: utf-8 ~*~ # import chardet import unicodecsv from common.utils import lazyproperty from .base import BaseFileParser from ..const import CSV_FILE_ESCAPE_CHARS class CSVFileParser(BaseFileParser): media_type = 'text/csv' @lazyproperty def match_escape_chars(self): chars = [] for c in CSV_FILE_ESCAPE_CHARS: dq_char = '"{}'.format(c) sg_char = "'{}".format(c) chars.append(dq_char) chars.append(sg_char) return tuple(chars) @staticmethod def _universal_newlines(stream): """ 保证在`通用换行模式`下打开文件 """ for line in stream.splitlines(): yield line def __parse_row(self, row): row_escape = [] for d in row: if isinstance(d, str) and d.strip().startswith(self.match_escape_chars): d = d.lstrip("'").lstrip('"') row_escape.append(d) return row_escape def generate_rows(self, stream_data): detect_result = chardet.detect(stream_data) encoding = detect_result.get("encoding", "utf-8") lines = self._universal_newlines(stream_data) csv_reader = unicodecsv.reader(lines, encoding=encoding) for row in csv_reader: row = self.__parse_row(row) yield row