2020-12-07 07:23:05 +00:00
|
|
|
|
import abc
|
|
|
|
|
import codecs
|
2023-03-10 07:52:07 +00:00
|
|
|
|
import json
|
|
|
|
|
import re
|
|
|
|
|
|
2020-12-07 07:23:05 +00:00
|
|
|
|
from django.utils.translation import ugettext_lazy as _
|
2023-03-10 07:52:07 +00:00
|
|
|
|
from rest_framework import serializers
|
2020-12-07 07:23:05 +00:00
|
|
|
|
from rest_framework import status
|
|
|
|
|
from rest_framework.exceptions import ParseError, APIException
|
2023-03-10 07:52:07 +00:00
|
|
|
|
from rest_framework.parsers import BaseParser
|
|
|
|
|
|
|
|
|
|
from common.serializers.fields import ObjectRelatedField
|
2020-12-07 07:23:05 +00:00
|
|
|
|
from common.utils import get_logger
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__file__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FileContentOverflowedError(APIException):
|
|
|
|
|
status_code = status.HTTP_400_BAD_REQUEST
|
|
|
|
|
default_code = 'file_content_overflowed'
|
|
|
|
|
default_detail = _('The file content overflowed (The maximum length `{}` bytes)')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BaseFileParser(BaseParser):
|
|
|
|
|
FILE_CONTENT_MAX_LENGTH = 1024 * 1024 * 10
|
|
|
|
|
|
|
|
|
|
serializer_cls = None
|
2021-03-08 08:55:21 +00:00
|
|
|
|
serializer_fields = None
|
2023-03-10 07:52:07 +00:00
|
|
|
|
obj_pattern = re.compile(r'^(.+)\(([a-z0-9-]+)\)$')
|
2020-12-07 07:23:05 +00:00
|
|
|
|
|
|
|
|
|
def check_content_length(self, meta):
|
|
|
|
|
content_length = int(meta.get('CONTENT_LENGTH', meta.get('HTTP_CONTENT_LENGTH', 0)))
|
|
|
|
|
if content_length > self.FILE_CONTENT_MAX_LENGTH:
|
|
|
|
|
msg = FileContentOverflowedError.default_detail.format(self.FILE_CONTENT_MAX_LENGTH)
|
|
|
|
|
logger.error(msg)
|
|
|
|
|
raise FileContentOverflowedError(msg)
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def get_stream_data(stream):
|
|
|
|
|
stream_data = stream.read()
|
|
|
|
|
stream_data = stream_data.strip(codecs.BOM_UTF8)
|
|
|
|
|
return stream_data
|
|
|
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
|
def generate_rows(self, stream_data):
|
2022-10-10 05:56:42 +00:00
|
|
|
|
raise NotImplementedError
|
2020-12-07 07:23:05 +00:00
|
|
|
|
|
|
|
|
|
def get_column_titles(self, rows):
|
|
|
|
|
return next(rows)
|
|
|
|
|
|
|
|
|
|
def convert_to_field_names(self, column_titles):
|
|
|
|
|
fields_map = {}
|
2021-03-08 08:55:21 +00:00
|
|
|
|
fields = self.serializer_fields
|
2021-06-16 05:00:55 +00:00
|
|
|
|
for k, v in fields.items():
|
|
|
|
|
if v.read_only:
|
|
|
|
|
continue
|
|
|
|
|
fields_map.update({
|
|
|
|
|
v.label: k,
|
|
|
|
|
k: k
|
|
|
|
|
})
|
2020-12-07 07:23:05 +00:00
|
|
|
|
field_names = [
|
|
|
|
|
fields_map.get(column_title.strip('*'), '')
|
|
|
|
|
for column_title in column_titles
|
|
|
|
|
]
|
|
|
|
|
return field_names
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _replace_chinese_quote(s):
|
2021-05-17 06:35:04 +00:00
|
|
|
|
if not isinstance(s, str):
|
|
|
|
|
return s
|
2020-12-07 07:23:05 +00:00
|
|
|
|
trans_table = str.maketrans({
|
|
|
|
|
'“': '"',
|
|
|
|
|
'”': '"',
|
|
|
|
|
'‘': '"',
|
|
|
|
|
'’': '"',
|
|
|
|
|
'\'': '"'
|
|
|
|
|
})
|
|
|
|
|
return s.translate(trans_table)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
2023-03-10 07:52:07 +00:00
|
|
|
|
def load_row(cls, row):
|
2020-12-07 07:23:05 +00:00
|
|
|
|
"""
|
|
|
|
|
构建json数据前的行处理
|
|
|
|
|
"""
|
|
|
|
|
new_row = []
|
|
|
|
|
for col in row:
|
|
|
|
|
# 转换中文引号
|
|
|
|
|
col = cls._replace_chinese_quote(col)
|
|
|
|
|
# 列表/字典转换
|
|
|
|
|
if isinstance(col, str) and (
|
2023-03-10 07:52:07 +00:00
|
|
|
|
(col.startswith('[') and col.endswith(']')) or
|
2020-12-07 07:23:05 +00:00
|
|
|
|
(col.startswith("{") and col.endswith("}"))
|
|
|
|
|
):
|
2023-03-14 06:12:42 +00:00
|
|
|
|
try:
|
|
|
|
|
col = json.loads(col)
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
logger.error('Json load error: ', e)
|
|
|
|
|
logger.error('col: ', col)
|
2020-12-07 07:23:05 +00:00
|
|
|
|
new_row.append(col)
|
|
|
|
|
return new_row
|
|
|
|
|
|
2023-03-10 07:52:07 +00:00
|
|
|
|
def id_name_to_obj(self, v):
|
|
|
|
|
if not v or not isinstance(v, str):
|
|
|
|
|
return v
|
|
|
|
|
matched = self.obj_pattern.match(v)
|
|
|
|
|
if not matched:
|
|
|
|
|
return v
|
|
|
|
|
obj_name, obj_id = matched.groups()
|
|
|
|
|
if len(obj_id) < 36:
|
|
|
|
|
obj_id = int(obj_id)
|
|
|
|
|
return {'pk': obj_id, 'name': obj_name}
|
|
|
|
|
|
|
|
|
|
def parse_value(self, field, value):
|
2023-03-20 01:59:34 +00:00
|
|
|
|
if value == '-' and field and field.allow_null:
|
2023-03-10 07:52:07 +00:00
|
|
|
|
return None
|
|
|
|
|
elif hasattr(field, 'to_file_internal_value'):
|
|
|
|
|
value = field.to_file_internal_value(value)
|
|
|
|
|
elif isinstance(field, serializers.BooleanField):
|
|
|
|
|
value = value.lower() in ['true', '1', 'yes']
|
|
|
|
|
elif isinstance(field, serializers.ChoiceField):
|
|
|
|
|
value = value
|
|
|
|
|
elif isinstance(field, ObjectRelatedField):
|
|
|
|
|
if field.many:
|
|
|
|
|
value = [self.id_name_to_obj(v) for v in value]
|
|
|
|
|
else:
|
|
|
|
|
value = self.id_name_to_obj(value)
|
|
|
|
|
elif isinstance(field, serializers.ListSerializer):
|
|
|
|
|
value = [self.parse_value(field.child, v) for v in value]
|
|
|
|
|
elif isinstance(field, serializers.Serializer):
|
|
|
|
|
value = self.id_name_to_obj(value)
|
|
|
|
|
elif isinstance(field, serializers.ManyRelatedField):
|
|
|
|
|
value = [self.parse_value(field.child_relation, v) for v in value]
|
|
|
|
|
elif isinstance(field, serializers.ListField):
|
|
|
|
|
value = [self.parse_value(field.child, v) for v in value]
|
|
|
|
|
|
|
|
|
|
return value
|
|
|
|
|
|
2020-12-21 08:49:19 +00:00
|
|
|
|
def process_row_data(self, row_data):
|
2020-12-07 07:23:05 +00:00
|
|
|
|
"""
|
|
|
|
|
构建json数据后的行数据处理
|
|
|
|
|
"""
|
2023-03-10 07:52:07 +00:00
|
|
|
|
new_row = {}
|
2020-12-07 07:23:05 +00:00
|
|
|
|
for k, v in row_data.items():
|
2023-03-10 07:52:07 +00:00
|
|
|
|
field = self.serializer_fields.get(k)
|
|
|
|
|
v = self.parse_value(field, v)
|
|
|
|
|
new_row[k] = v
|
|
|
|
|
return new_row
|
2020-12-07 07:23:05 +00:00
|
|
|
|
|
|
|
|
|
def generate_data(self, fields_name, rows):
|
|
|
|
|
data = []
|
|
|
|
|
for row in rows:
|
|
|
|
|
# 空行不处理
|
|
|
|
|
if not any(row):
|
|
|
|
|
continue
|
2023-03-10 07:52:07 +00:00
|
|
|
|
row = self.load_row(row)
|
2020-12-07 07:23:05 +00:00
|
|
|
|
row_data = dict(zip(fields_name, row))
|
|
|
|
|
row_data = self.process_row_data(row_data)
|
|
|
|
|
data.append(row_data)
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
def parse(self, stream, media_type=None, parser_context=None):
|
2021-03-08 08:55:21 +00:00
|
|
|
|
assert parser_context is not None, '`parser_context` should not be `None`'
|
|
|
|
|
|
|
|
|
|
view = parser_context['view']
|
|
|
|
|
request = view.request
|
2020-12-07 07:23:05 +00:00
|
|
|
|
|
|
|
|
|
try:
|
2021-03-08 08:55:21 +00:00
|
|
|
|
meta = request.META
|
2020-12-07 07:23:05 +00:00
|
|
|
|
self.serializer_cls = view.get_serializer_class()
|
2021-03-08 08:55:21 +00:00
|
|
|
|
self.serializer_fields = self.serializer_cls().fields
|
2020-12-07 07:23:05 +00:00
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug(e, exc_info=True)
|
|
|
|
|
raise ParseError('The resource does not support imports!')
|
|
|
|
|
|
|
|
|
|
self.check_content_length(meta)
|
|
|
|
|
try:
|
|
|
|
|
stream_data = self.get_stream_data(stream)
|
|
|
|
|
rows = self.generate_rows(stream_data)
|
|
|
|
|
column_titles = self.get_column_titles(rows)
|
|
|
|
|
field_names = self.convert_to_field_names(column_titles)
|
2021-03-08 08:55:21 +00:00
|
|
|
|
|
|
|
|
|
# 给 `common.mixins.api.RenderToJsonMixin` 提供,暂时只能耦合
|
|
|
|
|
column_title_field_pairs = list(zip(column_titles, field_names))
|
2023-03-10 07:52:07 +00:00
|
|
|
|
column_title_field_pairs = [(k, v) for k, v in column_title_field_pairs if k and v]
|
2021-03-08 08:55:21 +00:00
|
|
|
|
if not hasattr(request, 'jms_context'):
|
|
|
|
|
request.jms_context = {}
|
|
|
|
|
request.jms_context['column_title_field_pairs'] = column_title_field_pairs
|
|
|
|
|
|
2020-12-07 07:23:05 +00:00
|
|
|
|
data = self.generate_data(field_names, rows)
|
|
|
|
|
return data
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(e, exc_info=True)
|
2021-04-08 02:11:46 +00:00
|
|
|
|
raise ParseError(_('Parse file error: {}').format(e))
|