2019-05-21 08:24:01 +00:00
|
|
|
|
# ~*~ coding: utf-8 ~*~
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
import json
|
2019-05-31 09:40:57 +00:00
|
|
|
|
import chardet
|
2019-05-31 10:20:24 +00:00
|
|
|
|
import codecs
|
2019-05-21 08:24:01 +00:00
|
|
|
|
import unicodecsv
|
|
|
|
|
|
2020-06-28 11:02:20 +00:00
|
|
|
|
from django.utils.translation import ugettext as _
|
2019-05-21 08:24:01 +00:00
|
|
|
|
from rest_framework.parsers import BaseParser
|
2020-06-28 11:02:20 +00:00
|
|
|
|
from rest_framework.exceptions import ParseError, APIException
|
|
|
|
|
from rest_framework import status
|
2019-05-21 08:24:01 +00:00
|
|
|
|
|
2019-12-05 07:09:25 +00:00
|
|
|
|
from common.utils import get_logger
|
2019-05-21 08:24:01 +00:00
|
|
|
|
|
|
|
|
|
logger = get_logger(__file__)
|
|
|
|
|
|
|
|
|
|
|
2020-06-28 11:02:20 +00:00
|
|
|
|
class CsvDataTooBig(APIException):
|
|
|
|
|
status_code = status.HTTP_400_BAD_REQUEST
|
|
|
|
|
default_code = 'csv_data_too_big'
|
|
|
|
|
default_detail = _('The max size of CSV is %d bytes')
|
|
|
|
|
|
|
|
|
|
|
2019-05-21 08:24:01 +00:00
|
|
|
|
class JMSCSVParser(BaseParser):
|
|
|
|
|
"""
|
|
|
|
|
Parses CSV file to serializer data
|
|
|
|
|
"""
|
2020-06-28 11:02:20 +00:00
|
|
|
|
CSV_UPLOAD_MAX_SIZE = 1024 * 1024 * 10
|
2019-05-21 08:24:01 +00:00
|
|
|
|
|
|
|
|
|
media_type = 'text/csv'
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _universal_newlines(stream):
|
|
|
|
|
"""
|
|
|
|
|
保证在`通用换行模式`下打开文件
|
|
|
|
|
"""
|
|
|
|
|
for line in stream.splitlines():
|
|
|
|
|
yield line
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _gen_rows(csv_data, charset='utf-8', **kwargs):
|
|
|
|
|
csv_reader = unicodecsv.reader(csv_data, encoding=charset, **kwargs)
|
|
|
|
|
for row in csv_reader:
|
|
|
|
|
if not any(row): # 空行
|
|
|
|
|
continue
|
|
|
|
|
yield row
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
2020-07-28 12:28:38 +00:00
|
|
|
|
def _get_fields_map(serializer_cls):
|
2019-05-21 08:24:01 +00:00
|
|
|
|
fields_map = {}
|
2020-07-28 12:28:38 +00:00
|
|
|
|
fields = serializer_cls().fields
|
2019-05-21 08:24:01 +00:00
|
|
|
|
fields_map.update({v.label: k for k, v in fields.items()})
|
|
|
|
|
fields_map.update({k: k for k, _ in fields.items()})
|
|
|
|
|
return fields_map
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
2020-06-28 11:02:20 +00:00
|
|
|
|
def _replace_chinese_quot(str_):
|
|
|
|
|
trans_table = str.maketrans({
|
|
|
|
|
'“': '"',
|
|
|
|
|
'”': '"',
|
|
|
|
|
'‘': '"',
|
|
|
|
|
'’': '"',
|
|
|
|
|
'\'': '"'
|
|
|
|
|
})
|
|
|
|
|
return str_.translate(trans_table)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _process_row(cls, row):
|
2019-05-21 08:24:01 +00:00
|
|
|
|
"""
|
|
|
|
|
构建json数据前的行处理
|
|
|
|
|
"""
|
|
|
|
|
_row = []
|
2020-06-28 11:02:20 +00:00
|
|
|
|
|
2019-05-21 08:24:01 +00:00
|
|
|
|
for col in row:
|
|
|
|
|
# 列表转换
|
2020-06-28 11:02:20 +00:00
|
|
|
|
if isinstance(col, str) and col.startswith('[') and col.endswith(']'):
|
|
|
|
|
col = cls._replace_chinese_quot(col)
|
2019-05-21 08:24:01 +00:00
|
|
|
|
col = json.loads(col)
|
2020-06-15 08:34:51 +00:00
|
|
|
|
# 字典转换
|
2020-06-28 11:02:20 +00:00
|
|
|
|
if isinstance(col, str) and col.startswith("{") and col.endswith("}"):
|
|
|
|
|
col = cls._replace_chinese_quot(col)
|
2020-06-15 08:34:51 +00:00
|
|
|
|
col = json.loads(col)
|
2019-05-21 08:24:01 +00:00
|
|
|
|
_row.append(col)
|
|
|
|
|
return _row
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _process_row_data(row_data):
|
|
|
|
|
"""
|
|
|
|
|
构建json数据后的行数据处理
|
|
|
|
|
"""
|
|
|
|
|
_row_data = {}
|
|
|
|
|
for k, v in row_data.items():
|
2020-06-15 08:34:51 +00:00
|
|
|
|
if isinstance(v, list) or isinstance(v, dict)\
|
2019-05-21 08:24:01 +00:00
|
|
|
|
or isinstance(v, str) and k.strip() and v.strip():
|
|
|
|
|
_row_data[k] = v
|
|
|
|
|
return _row_data
|
|
|
|
|
|
|
|
|
|
def parse(self, stream, media_type=None, parser_context=None):
|
|
|
|
|
parser_context = parser_context or {}
|
|
|
|
|
try:
|
2020-06-28 11:02:20 +00:00
|
|
|
|
view = parser_context['view']
|
|
|
|
|
meta = view.request.META
|
2020-07-28 12:28:38 +00:00
|
|
|
|
serializer_cls = view.get_serializer_class()
|
2019-05-21 08:24:01 +00:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug(e, exc_info=True)
|
|
|
|
|
raise ParseError('The resource does not support imports!')
|
|
|
|
|
|
2020-06-28 11:02:20 +00:00
|
|
|
|
content_length = int(meta.get('CONTENT_LENGTH', meta.get('HTTP_CONTENT_LENGTH', 0)))
|
|
|
|
|
if content_length > self.CSV_UPLOAD_MAX_SIZE:
|
|
|
|
|
msg = CsvDataTooBig.default_detail % self.CSV_UPLOAD_MAX_SIZE
|
|
|
|
|
logger.error(msg)
|
|
|
|
|
raise CsvDataTooBig(msg)
|
|
|
|
|
|
2019-05-21 08:24:01 +00:00
|
|
|
|
try:
|
|
|
|
|
stream_data = stream.read()
|
2019-05-31 10:20:24 +00:00
|
|
|
|
stream_data = stream_data.strip(codecs.BOM_UTF8)
|
2019-05-31 09:40:57 +00:00
|
|
|
|
detect_result = chardet.detect(stream_data)
|
|
|
|
|
encoding = detect_result.get("encoding", "utf-8")
|
2019-05-21 08:24:01 +00:00
|
|
|
|
binary = self._universal_newlines(stream_data)
|
|
|
|
|
rows = self._gen_rows(binary, charset=encoding)
|
|
|
|
|
|
|
|
|
|
header = next(rows)
|
2020-07-28 12:28:38 +00:00
|
|
|
|
fields_map = self._get_fields_map(serializer_cls)
|
2019-08-12 09:05:01 +00:00
|
|
|
|
header = [fields_map.get(name.strip('*'), '') for name in header]
|
2019-05-21 08:24:01 +00:00
|
|
|
|
|
|
|
|
|
data = []
|
|
|
|
|
for row in rows:
|
|
|
|
|
row = self._process_row(row)
|
|
|
|
|
row_data = dict(zip(header, row))
|
|
|
|
|
row_data = self._process_row_data(row_data)
|
|
|
|
|
data.append(row_data)
|
|
|
|
|
return data
|
|
|
|
|
except Exception as e:
|
2019-05-31 09:40:57 +00:00
|
|
|
|
logger.error(e, exc_info=True)
|
2019-05-21 08:24:01 +00:00
|
|
|
|
raise ParseError('CSV parse error!')
|