You've already forked favicon-api-async
init
This commit is contained in:
318
favicon_app/utils/file_util.py
Normal file
318
favicon_app/utils/file_util.py
Normal file
@@ -0,0 +1,318 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
|
||||
import urllib3
|
||||
|
||||
# 配置日志
|
||||
urllib3.disable_warnings()
|
||||
logging.captureWarnings(True)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileUtil:
|
||||
"""文件操作工具类,提供文件和目录的常用操作"""
|
||||
|
||||
@staticmethod
|
||||
def _validate_path(path: str) -> bool:
|
||||
"""验证路径是否存在且可访问"""
|
||||
if not path or not os.path.exists(path):
|
||||
logger.error(f"路径不存在: {path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _match_pattern(filename: str, pattern: str) -> bool:
|
||||
"""简单的文件名模式匹配"""
|
||||
if '*' not in pattern and '?' not in pattern:
|
||||
return filename == pattern
|
||||
import fnmatch
|
||||
return fnmatch.fnmatch(filename, pattern)
|
||||
|
||||
@staticmethod
|
||||
def _process_file(
|
||||
root: str,
|
||||
filename: str,
|
||||
min_size: int,
|
||||
include_size: bool,
|
||||
result: List[Any]
|
||||
) -> None:
|
||||
"""处理单个文件并添加到结果列表"""
|
||||
file_path = os.path.join(root, filename)
|
||||
try:
|
||||
size = os.path.getsize(file_path)
|
||||
if size >= min_size:
|
||||
if include_size:
|
||||
result.append({
|
||||
'name': filename,
|
||||
'path': file_path,
|
||||
'size': size
|
||||
})
|
||||
else:
|
||||
result.append(filename)
|
||||
except OSError as e:
|
||||
logger.warning(f"无法访问文件: {file_path}, 错误: {e}")
|
||||
|
||||
@staticmethod
|
||||
def list_files(
|
||||
path: str,
|
||||
recursive: bool = True,
|
||||
include_size: bool = False,
|
||||
min_size: int = 0,
|
||||
pattern: Optional[str] = None
|
||||
) -> Union[List[str], List[Dict[str, Any]]]:
|
||||
"""
|
||||
遍历目录下的所有文件,支持更多过滤选项
|
||||
|
||||
Args:
|
||||
path: 要遍历的目录路径
|
||||
recursive: 是否递归遍历子目录
|
||||
include_size: 是否包含文件大小信息
|
||||
min_size: 最小文件大小(字节),默认为0
|
||||
pattern: 文件名匹配模式,支持简单的通配符(例如 *.txt)
|
||||
|
||||
Returns:
|
||||
如果include_size为False,返回文件名列表;否则返回包含文件名和大小的字典列表
|
||||
"""
|
||||
if not FileUtil._validate_path(path):
|
||||
return []
|
||||
|
||||
logger.info(f"开始遍历目录: {path}, 递归: {recursive}, 最小文件大小: {min_size}字节")
|
||||
result = []
|
||||
|
||||
if recursive:
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
if pattern and not FileUtil._match_pattern(filename, pattern):
|
||||
continue
|
||||
FileUtil._process_file(root, filename, min_size, include_size, result)
|
||||
else:
|
||||
for filename in os.listdir(path):
|
||||
file_path = os.path.join(path, filename)
|
||||
if os.path.isfile(file_path):
|
||||
if pattern and not FileUtil._match_pattern(filename, pattern):
|
||||
continue
|
||||
FileUtil._process_file(path, filename, min_size, include_size, result)
|
||||
|
||||
logger.info(f"目录遍历完成: {path}, 找到文件数: {len(result)}")
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def get_file_dict(
|
||||
path: str,
|
||||
key_by_name: bool = True,
|
||||
include_size: bool = True,
|
||||
recursive: bool = True,
|
||||
min_size: int = 0
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
获取目录下所有文件的字典映射
|
||||
|
||||
Args:
|
||||
path: 要遍历的目录路径
|
||||
key_by_name: 是否使用文件名作为键(否则使用完整路径)
|
||||
include_size: 是否在值中包含文件大小
|
||||
recursive: 是否递归遍历子目录
|
||||
min_size: 最小文件大小(字节)
|
||||
|
||||
Returns:
|
||||
文件字典,键为文件名或完整路径,值为文件路径或包含路径和大小的字典
|
||||
"""
|
||||
if not FileUtil._validate_path(path):
|
||||
return {}
|
||||
|
||||
logger.info(f"开始构建文件字典: {path}")
|
||||
file_dict = {}
|
||||
|
||||
for root, _, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
try:
|
||||
size = os.path.getsize(file_path)
|
||||
if size >= min_size:
|
||||
key = filename if key_by_name else file_path
|
||||
if include_size:
|
||||
file_dict[key] = {
|
||||
'path': file_path,
|
||||
'size': size
|
||||
}
|
||||
else:
|
||||
file_dict[key] = file_path
|
||||
except OSError as e:
|
||||
logger.warning(f"无法访问文件: {file_path}, 错误: {e}")
|
||||
|
||||
# 如果不递归,只处理当前目录
|
||||
if not recursive:
|
||||
break
|
||||
|
||||
logger.info(f"文件字典构建完成: {path}, 文件数: {len(file_dict)}")
|
||||
return file_dict
|
||||
|
||||
@staticmethod
|
||||
def read_file(
|
||||
file_path: str,
|
||||
mode: str = 'r',
|
||||
encoding: str = 'utf-8',
|
||||
max_size: Optional[int] = None
|
||||
) -> Optional[Union[str, bytes]]:
|
||||
"""
|
||||
读取文件内容,支持大小限制和异常处理
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
mode: 打开模式
|
||||
encoding: 编码格式(文本模式下)
|
||||
max_size: 最大读取字节数,超出将返回None
|
||||
|
||||
Returns:
|
||||
文件内容,失败返回None
|
||||
"""
|
||||
if not os.path.exists(file_path) or not os.path.isfile(file_path):
|
||||
logger.error(f"文件不存在: {file_path}")
|
||||
return None
|
||||
|
||||
file_size = os.path.getsize(file_path)
|
||||
if max_size and file_size > max_size:
|
||||
logger.error(f"文件大小超出限制: {file_path}, 大小: {file_size}字节, 限制: {max_size}字节")
|
||||
return None
|
||||
|
||||
try:
|
||||
if 'b' in mode:
|
||||
with open(file_path, mode) as f:
|
||||
return f.read(max_size) if max_size else f.read()
|
||||
else:
|
||||
with open(file_path, mode, encoding=encoding) as f:
|
||||
return f.read(max_size) if max_size else f.read()
|
||||
except UnicodeDecodeError:
|
||||
logger.error(f"文件编码错误: {file_path}, 请尝试使用二进制模式读取")
|
||||
except PermissionError:
|
||||
logger.error(f"没有权限读取文件: {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"读取文件失败: {file_path}, 错误: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def write_file(
|
||||
file_path: str,
|
||||
content: Union[str, bytes],
|
||||
mode: str = 'w',
|
||||
encoding: str = 'utf-8',
|
||||
atomic: bool = False
|
||||
) -> bool:
|
||||
"""
|
||||
写入文件内容,支持原子写入
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
content: 要写入的内容
|
||||
mode: 写入模式
|
||||
encoding: 编码格式(文本模式下)
|
||||
atomic: 是否使用原子写入(先写入临时文件,成功后再重命名)
|
||||
|
||||
Returns:
|
||||
成功返回True,失败返回False
|
||||
"""
|
||||
try:
|
||||
dir_path = os.path.dirname(file_path)
|
||||
if dir_path and not os.path.exists(dir_path):
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
if atomic:
|
||||
temp_path = f"{file_path}.tmp"
|
||||
try:
|
||||
if 'b' in mode:
|
||||
with open(temp_path, mode) as f:
|
||||
f.write(content)
|
||||
else:
|
||||
with open(temp_path, mode, encoding=encoding) as f:
|
||||
f.write(content)
|
||||
os.replace(temp_path, file_path)
|
||||
finally:
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if 'b' in mode:
|
||||
with open(file_path, mode) as f:
|
||||
f.write(content)
|
||||
else:
|
||||
with open(file_path, mode, encoding=encoding) as f:
|
||||
f.write(content)
|
||||
|
||||
# logger.info(f"文件写入成功: {file_path}")
|
||||
return True
|
||||
except PermissionError:
|
||||
logger.error(f"没有权限写入文件: {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"写入文件失败: {file_path}, 错误: {e}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def get_file_info(file_path: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
获取文件的详细信息
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
|
||||
Returns:
|
||||
包含文件信息的字典,失败返回None
|
||||
"""
|
||||
if not os.path.exists(file_path) or not os.path.isfile(file_path):
|
||||
logger.error(f"文件不存在: {file_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
stat_info = os.stat(file_path)
|
||||
return {
|
||||
'path': file_path,
|
||||
'name': os.path.basename(file_path),
|
||||
'size': stat_info.st_size,
|
||||
'created_time': stat_info.st_ctime,
|
||||
'modified_time': stat_info.st_mtime,
|
||||
'access_time': stat_info.st_atime,
|
||||
'is_readonly': not os.access(file_path, os.W_OK)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取文件信息失败: {file_path}, 错误: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# 保持向后兼容性的函数
|
||||
|
||||
def read_file(
|
||||
file_path: str,
|
||||
mode: str = 'r',
|
||||
encoding: str = 'utf-8'
|
||||
) -> Optional[Union[str, bytes]]:
|
||||
"""向后兼容的函数:读取文件内容"""
|
||||
return FileUtil.read_file(file_path, mode=mode, encoding=encoding)
|
||||
|
||||
|
||||
def write_file(
|
||||
file_path: str,
|
||||
content: Union[str, bytes],
|
||||
mode: str = 'w',
|
||||
encoding: str = 'utf-8'
|
||||
) -> bool:
|
||||
"""向后兼容的函数:写入文件内容"""
|
||||
return FileUtil.write_file(file_path, content, mode=mode, encoding=encoding)
|
||||
|
||||
|
||||
def find_project_root(
|
||||
current_file: str,
|
||||
markers=("main.py", ".env", "requirements.txt")
|
||||
) -> Path:
|
||||
current_path = Path(current_file).parent
|
||||
for parent in current_path.parents:
|
||||
for marker in markers:
|
||||
if (parent / marker).exists():
|
||||
return parent
|
||||
return current_path
|
||||
# PROJECT_ROOT = find_project_root(__file__)
|
||||
# sys.path.append(str(PROJECT_ROOT))
|
||||
4
favicon_app/utils/filetype/__init__.py
Normal file
4
favicon_app/utils/filetype/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from .filetype import guess_mime
|
||||
from .helpers import is_image
|
||||
188
favicon_app/utils/filetype/filetype.py
Normal file
188
favicon_app/utils/filetype/filetype.py
Normal file
@@ -0,0 +1,188 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from .helpers import IMAGE_MAGIC_NUMBERS, MIN_READ_BYTES
|
||||
|
||||
# 常见文件类型的MIME映射
|
||||
MIME_TYPES = {
|
||||
# 图片文件
|
||||
'image/jpeg': 'jpg',
|
||||
'image/png': 'png',
|
||||
'image/gif': 'gif',
|
||||
'image/bmp': 'bmp',
|
||||
'image/x-icon': 'ico',
|
||||
'image/webp': 'webp',
|
||||
'image/svg+xml': 'svg',
|
||||
'image/tiff': 'tiff',
|
||||
'image/jp2': 'jp2',
|
||||
'image/avif': 'avif',
|
||||
# 文档文件
|
||||
'application/pdf': 'pdf',
|
||||
'application/msword': 'doc',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
|
||||
'application/vnd.ms-excel': 'xls',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
|
||||
'application/vnd.ms-powerpoint': 'ppt',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
|
||||
# 压缩文件
|
||||
'application/zip': 'zip',
|
||||
'application/x-rar-compressed': 'rar',
|
||||
'application/gzip': 'gz',
|
||||
'application/x-tar': 'tar',
|
||||
# 音频文件
|
||||
'audio/mpeg': 'mp3',
|
||||
'audio/wav': 'wav',
|
||||
'audio/ogg': 'ogg',
|
||||
'audio/flac': 'flac',
|
||||
# 视频文件
|
||||
'video/mp4': 'mp4',
|
||||
'video/avi': 'avi',
|
||||
'video/mpeg': 'mpeg',
|
||||
'video/quicktime': 'mov',
|
||||
# 文本文件
|
||||
'text/plain': 'txt',
|
||||
'text/html': 'html',
|
||||
'text/css': 'css',
|
||||
'application/javascript': 'js',
|
||||
'application/json': 'json',
|
||||
'text/xml': 'xml',
|
||||
}
|
||||
|
||||
|
||||
# 猜测文件的MIME类型
|
||||
def guess_mime(data: bytes) -> str:
|
||||
"""
|
||||
根据二进制数据猜测文件的MIME类型
|
||||
|
||||
Args:
|
||||
data: 要检测的二进制数据
|
||||
|
||||
Returns:
|
||||
str: 猜测的MIME类型,如果无法确定则返回空字符串
|
||||
"""
|
||||
if not data or len(data) < 4:
|
||||
return ''
|
||||
|
||||
# 截取足够长的数据用于检测
|
||||
sample = data[:MIN_READ_BYTES]
|
||||
|
||||
# 检查所有已知的文件头
|
||||
for magic, mime_type in IMAGE_MAGIC_NUMBERS.items():
|
||||
# 检查数据长度是否足够
|
||||
if len(sample) < len(magic):
|
||||
continue
|
||||
|
||||
# 检查文件头是否匹配
|
||||
if sample.startswith(magic):
|
||||
# 如果是函数(如WebP和AVIF的特殊检测),则调用函数进行进一步验证
|
||||
if callable(mime_type):
|
||||
if mime_type(data):
|
||||
# 返回对应的MIME类型
|
||||
if magic == b'RIFF':
|
||||
return 'image/webp'
|
||||
elif magic == b'ftypavif':
|
||||
return 'image/avif'
|
||||
else:
|
||||
return mime_type
|
||||
|
||||
# 检查其他常见文件类型
|
||||
# PDF文件
|
||||
if sample.startswith(b'%PDF'):
|
||||
return 'application/pdf'
|
||||
|
||||
# ZIP文件
|
||||
if sample.startswith(b'PK\x03\x04') or sample.startswith(b'PK\x05\x06') or sample.startswith(b'PK\x07\x08'):
|
||||
return 'application/zip'
|
||||
|
||||
# RAR文件
|
||||
if sample.startswith(b'Rar!'):
|
||||
return 'application/x-rar-compressed'
|
||||
|
||||
# GZIP文件
|
||||
if sample.startswith(b'\x1f\x8b'):
|
||||
return 'application/gzip'
|
||||
|
||||
# TAR文件
|
||||
if len(sample) >= 262 and sample[257:262] == b'ustar':
|
||||
return 'application/x-tar'
|
||||
|
||||
# MP3文件(ID3v2标签)
|
||||
if sample.startswith(b'ID3'):
|
||||
return 'audio/mpeg'
|
||||
|
||||
# MP4文件
|
||||
if sample.startswith(b'ftypisom') or sample.startswith(b'ftypmp42'):
|
||||
return 'video/mp4'
|
||||
|
||||
# JSON文件(简单检测)
|
||||
if len(sample) >= 2:
|
||||
sample_str = sample.decode('utf-8', errors='ignore')
|
||||
if (sample_str.startswith('{') and sample_str.endswith('}')) or (
|
||||
sample_str.startswith('[') and sample_str.endswith(']')):
|
||||
try:
|
||||
import json
|
||||
json.loads(sample_str)
|
||||
return 'application/json'
|
||||
except:
|
||||
pass
|
||||
|
||||
# XML文件(简单检测)
|
||||
if sample_str.startswith('<?xml') or sample_str.startswith('<') and '>' in sample_str:
|
||||
return 'text/xml'
|
||||
|
||||
# 纯文本文件(启发式检测)
|
||||
try:
|
||||
# 尝试将数据解码为UTF-8文本
|
||||
sample.decode('utf-8')
|
||||
# 检查控制字符的比例
|
||||
control_chars = sum(1 for c in sample if c < 32 and c not in [9, 10, 13])
|
||||
if len(sample) > 0 and control_chars / len(sample) < 0.3:
|
||||
return 'text/plain'
|
||||
except:
|
||||
pass
|
||||
|
||||
return ''
|
||||
|
||||
|
||||
# 获取文件扩展名
|
||||
def get_extension(mime_type: str) -> str:
|
||||
"""
|
||||
根据MIME类型获取常见的文件扩展名
|
||||
|
||||
Args:
|
||||
mime_type: MIME类型字符串
|
||||
|
||||
Returns:
|
||||
str: 文件扩展名(不包含点号),如果未知则返回空字符串
|
||||
"""
|
||||
return MIME_TYPES.get(mime_type.lower(), '')
|
||||
|
||||
|
||||
# 猜测文件扩展名
|
||||
def guess_extension(data: bytes) -> str:
|
||||
"""
|
||||
根据二进制数据猜测文件扩展名
|
||||
|
||||
Args:
|
||||
data: 要检测的二进制数据
|
||||
|
||||
Returns:
|
||||
str: 猜测的文件扩展名(不包含点号),如果无法确定则返回空字符串
|
||||
"""
|
||||
mime_type = guess_mime(data)
|
||||
return get_extension(mime_type)
|
||||
|
||||
|
||||
# 检测是否为特定类型的文件
|
||||
def is_type(data: bytes, mime_type: str) -> bool:
|
||||
"""
|
||||
检测给定的二进制数据是否为指定类型的文件
|
||||
|
||||
Args:
|
||||
data: 要检测的二进制数据
|
||||
mime_type: 要检测的MIME类型
|
||||
|
||||
Returns:
|
||||
bool: 如果是指定类型返回True,否则返回False
|
||||
"""
|
||||
guessed_mime = guess_mime(data)
|
||||
return guessed_mime == mime_type
|
||||
104
favicon_app/utils/filetype/helpers.py
Normal file
104
favicon_app/utils/filetype/helpers.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import struct
|
||||
|
||||
# 图片文件的魔术数字(文件头)
|
||||
IMAGE_MAGIC_NUMBERS = {
|
||||
# JPEG
|
||||
b'\xff\xd8\xff': 'image/jpeg',
|
||||
# PNG
|
||||
b'\x89PNG\r\n\x1a\n': 'image/png',
|
||||
# GIF
|
||||
b'GIF87a': 'image/gif',
|
||||
b'GIF89a': 'image/gif',
|
||||
# BMP
|
||||
b'BM': 'image/bmp',
|
||||
# ICO
|
||||
b'\x00\x00\x01\x00': 'image/x-icon',
|
||||
# WebP
|
||||
b'RIFF': lambda data: _is_webp(data) if len(data) >= 12 else False,
|
||||
# SVG (基于XML)
|
||||
b'<?xml': 'image/svg+xml',
|
||||
b'<svg': 'image/svg+xml',
|
||||
# TIFF
|
||||
b'II\x2a\x00': 'image/tiff',
|
||||
b'MM\x00\x2a': 'image/tiff',
|
||||
# JPEG2000
|
||||
b'\x00\x00\x00\x0cjP\x1a\x00\x00\x00\x00\x00': 'image/jp2',
|
||||
# AVIF
|
||||
b'ftypavif': lambda data: _is_avif(data) if len(data) >= 12 else False,
|
||||
}
|
||||
|
||||
# 最小需要读取的字节数,确保能检测所有支持的文件类型
|
||||
MIN_READ_BYTES = 32
|
||||
|
||||
|
||||
# 检测是否为WebP文件
|
||||
def _is_webp(data: bytes) -> bool:
|
||||
if len(data) < 12:
|
||||
return False
|
||||
# WebP文件格式:RIFF[4字节长度]WEBP
|
||||
return data[8:12] == b'WEBP'
|
||||
|
||||
|
||||
# 检测是否为AVIF文件
|
||||
def _is_avif(data: bytes) -> bool:
|
||||
if len(data) < 12:
|
||||
return False
|
||||
# AVIF文件格式:ftypavif[4字节版本]...
|
||||
return data[4:12] == b'ftypavif' or data[4:12] == b'ftypavis'
|
||||
|
||||
|
||||
# 检测数据是否为图片文件
|
||||
def is_image(data: bytes) -> bool:
|
||||
"""
|
||||
检测给定的二进制数据是否为图片文件
|
||||
|
||||
Args:
|
||||
data: 要检测的二进制数据
|
||||
|
||||
Returns:
|
||||
bool: 如果是图片文件返回True,否则返回False
|
||||
"""
|
||||
if not data or len(data) < 4:
|
||||
return False
|
||||
|
||||
# 截取足够长的数据用于检测
|
||||
sample = data[:MIN_READ_BYTES]
|
||||
|
||||
# 检查所有已知的图片文件头
|
||||
for magic, mime_type in IMAGE_MAGIC_NUMBERS.items():
|
||||
# 检查数据长度是否足够
|
||||
if len(sample) < len(magic):
|
||||
continue
|
||||
|
||||
# 检查文件头是否匹配
|
||||
if sample.startswith(magic):
|
||||
# 如果是函数(如WebP和AVIF的特殊检测),则调用函数进行进一步验证
|
||||
if callable(mime_type):
|
||||
if mime_type(data):
|
||||
return True
|
||||
else:
|
||||
return True
|
||||
|
||||
# 检查是否为某些特殊格式的图片
|
||||
# 例如一些可能缺少标准文件头的图片
|
||||
try:
|
||||
# 检查是否为常见图片宽度/高度字段的位置
|
||||
# 这是一个启发式方法,不是100%准确
|
||||
if len(data) >= 24:
|
||||
# 检查JPEG的SOF marker后的尺寸信息
|
||||
for i in range(4, len(data) - 16):
|
||||
if data[i] == 0xFF and data[i + 1] in [0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD,
|
||||
0xCE, 0xCF]:
|
||||
# 找到SOF marker,尝试读取高度和宽度
|
||||
if i + 8 < len(data):
|
||||
height = struct.unpack('!H', data[i + 5:i + 7])[0]
|
||||
width = struct.unpack('!H', data[i + 7:i + 9])[0]
|
||||
# 合理的图片尺寸
|
||||
if 1 <= height <= 10000 and 1 <= width <= 10000:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return False
|
||||
274
favicon_app/utils/header.py
Normal file
274
favicon_app/utils/header.py
Normal file
@@ -0,0 +1,274 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import random
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
|
||||
# 配置日志
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HeaderConfig:
|
||||
"""HTTP请求头管理类,提供灵活的请求头配置和生成功能"""
|
||||
|
||||
_USER_AGENTS = [
|
||||
# Firefox
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) Gecko/20100101 Firefox/112.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:110.0) Gecko/20100101 Firefox/110.0',
|
||||
# Chrome
|
||||
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
|
||||
# Edge
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.70',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 Edg/103.0.1264.77',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.62',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.58',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
|
||||
# macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||||
# iOS
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Mobile/15E148 Safari/604.1',
|
||||
'Mozilla/5.0 (iPad; CPU OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Mobile/15E148 Safari/604.1',
|
||||
# Android
|
||||
'Mozilla/5.0 (Linux; Android 13; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36',
|
||||
'Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Mobile Safari/537.36'
|
||||
]
|
||||
|
||||
# 合并两个版本的图片类型,并添加更多常见的图片格式
|
||||
IMAGE_TYPES = [
|
||||
'image/gif',
|
||||
'image/jpeg',
|
||||
'image/png',
|
||||
'image/svg+xml',
|
||||
'image/tiff',
|
||||
'image/vnd.wap.wbmp',
|
||||
'image/webp',
|
||||
'image/x-icon',
|
||||
'image/x-jng',
|
||||
'image/x-ms-bmp',
|
||||
'image/vnd.microsoft.icon',
|
||||
'image/vnd.dwg',
|
||||
'image/vnd.dxf',
|
||||
'image/jpx',
|
||||
'image/apng',
|
||||
'image/bmp',
|
||||
'image/vnd.ms-photo',
|
||||
'image/vnd.adobe.photoshop',
|
||||
'image/heic',
|
||||
'image/avif',
|
||||
'image/jfif',
|
||||
'image/pjpeg',
|
||||
'image/vnd.adobe.illustrator',
|
||||
'application/pdf',
|
||||
'application/x-pdf'
|
||||
]
|
||||
|
||||
# 默认内容类型
|
||||
CONTENT_TYPE = 'application/json; charset=utf-8'
|
||||
|
||||
# 不同场景的请求头模板
|
||||
_HEADER_TEMPLATES = {
|
||||
'default': {
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
|
||||
'Connection': 'keep-alive'
|
||||
},
|
||||
'image': {
|
||||
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Connection': 'keep-alive'
|
||||
},
|
||||
'api': {
|
||||
'Accept': 'application/json, application/xml',
|
||||
'Content-Type': CONTENT_TYPE,
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Connection': 'keep-alive'
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
# 线程锁,确保线程安全
|
||||
self._lock = threading.RLock()
|
||||
# 存储自定义请求头
|
||||
self._custom_headers = {}
|
||||
|
||||
def get_random_user_agent(self) -> str:
|
||||
"""获取随机的User-Agent字符串"""
|
||||
with self._lock:
|
||||
return random.choice(self._USER_AGENTS)
|
||||
|
||||
def get_headers(
|
||||
self,
|
||||
template: str = 'default',
|
||||
include_user_agent: bool = True,
|
||||
custom_headers: Optional[Dict[str, str]] = None
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
获取配置好的请求头字典
|
||||
|
||||
Args:
|
||||
template: 请求头模板类型,可选值:'default', 'image', 'api'
|
||||
include_user_agent: 是否包含随机User-Agent
|
||||
custom_headers: 自定义请求头,将覆盖默认值
|
||||
|
||||
Returns:
|
||||
配置好的请求头字典
|
||||
"""
|
||||
with self._lock:
|
||||
# 选择基础模板
|
||||
headers = self._HEADER_TEMPLATES.get(template, self._HEADER_TEMPLATES['default']).copy()
|
||||
|
||||
# 添加随机User-Agent
|
||||
if include_user_agent:
|
||||
headers['User-Agent'] = self.get_random_user_agent()
|
||||
|
||||
# 添加自定义请求头
|
||||
if self._custom_headers:
|
||||
headers.update(self._custom_headers)
|
||||
|
||||
# 添加方法参数中的自定义请求头
|
||||
if custom_headers:
|
||||
headers.update(custom_headers)
|
||||
|
||||
return headers
|
||||
|
||||
def set_custom_header(self, key: str, value: str) -> None:
|
||||
"""设置自定义请求头,将应用于所有后续生成的请求头"""
|
||||
if not key or not value:
|
||||
logger.warning("尝试设置空的请求头键或值")
|
||||
return
|
||||
|
||||
with self._lock:
|
||||
self._custom_headers[key] = value
|
||||
logger.debug(f"已设置自定义请求头: {key} = {value}")
|
||||
|
||||
def remove_custom_header(self, key: str) -> None:
|
||||
"""移除自定义请求头"""
|
||||
with self._lock:
|
||||
if key in self._custom_headers:
|
||||
del self._custom_headers[key]
|
||||
logger.debug(f"已移除自定义请求头: {key}")
|
||||
|
||||
def clear_custom_headers(self) -> None:
|
||||
"""清除所有自定义请求头"""
|
||||
with self._lock:
|
||||
self._custom_headers.clear()
|
||||
logger.debug("已清除所有自定义请求头")
|
||||
|
||||
def is_image_content_type(self, content_type: str) -> bool:
|
||||
"""检查内容类型是否为图片类型"""
|
||||
if not content_type:
|
||||
return False
|
||||
|
||||
# 处理可能包含参数的Content-Type,如 'image/png; charset=utf-8'
|
||||
base_type = content_type.split(';')[0].strip().lower()
|
||||
return base_type in self.IMAGE_TYPES
|
||||
|
||||
def add_user_agent(self, user_agent: str) -> None:
|
||||
"""添加自定义User-Agent到池"""
|
||||
if not user_agent or user_agent in self._USER_AGENTS:
|
||||
return
|
||||
|
||||
with self._lock:
|
||||
self._USER_AGENTS.append(user_agent)
|
||||
logger.debug(f"已添加自定义User-Agent")
|
||||
|
||||
def get_specific_headers(
|
||||
self,
|
||||
url: str = None,
|
||||
referer: str = None,
|
||||
content_type: str = None
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
获取针对特定场景优化的请求头
|
||||
|
||||
Args:
|
||||
url: 目标URL,用于设置Host
|
||||
referer: 引用页URL
|
||||
content_type: 内容类型
|
||||
|
||||
Returns:
|
||||
优化后的请求头字典
|
||||
"""
|
||||
headers = self.get_headers()
|
||||
|
||||
# 设置Host
|
||||
if url:
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.netloc:
|
||||
headers['Host'] = parsed_url.netloc
|
||||
except Exception as e:
|
||||
logger.warning(f"解析URL失败: {e}")
|
||||
|
||||
# 设置Referer
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
|
||||
# 设置Content-Type
|
||||
if content_type:
|
||||
headers['Content-Type'] = content_type
|
||||
|
||||
return headers
|
||||
|
||||
|
||||
# 创建全局HeaderConfig实例,用于向后兼容
|
||||
_header_config = HeaderConfig()
|
||||
|
||||
# 全局请求头字典,用于向后兼容
|
||||
_headers = {'User-Agent': '-'}
|
||||
|
||||
# 向后兼容的常量和函数
|
||||
content_type = HeaderConfig.CONTENT_TYPE
|
||||
image_type = HeaderConfig.IMAGE_TYPES
|
||||
|
||||
|
||||
def get_header():
|
||||
"""向后兼容的函数:获取请求头"""
|
||||
global _headers
|
||||
_headers = _header_config.get_headers(template='default')
|
||||
return _headers
|
||||
|
||||
|
||||
def set_header(key: str, value: str):
|
||||
"""向后兼容的函数:设置请求头"""
|
||||
if key and value:
|
||||
_header_config.set_custom_header(key, value)
|
||||
|
||||
|
||||
def del_header(key: str):
|
||||
"""向后兼容的函数:删除请求头"""
|
||||
_header_config.remove_custom_header(key)
|
||||
|
||||
|
||||
def get_user_agent():
|
||||
"""向后兼容的函数:获取请求头中的User-Agent"""
|
||||
return _headers.get('User-Agent', '')
|
||||
|
||||
|
||||
def set_user_agent(ua: str):
|
||||
"""向后兼容的函数:设置请求头中的User-Agent"""
|
||||
if ua:
|
||||
_header_config.set_custom_header('User-Agent', ua)
|
||||
143
favicon_app/utils/redis_pool.py
Normal file
143
favicon_app/utils/redis_pool.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from typing import AsyncGenerator, Optional
|
||||
|
||||
from redis.asyncio import ConnectionPool, Redis
|
||||
|
||||
import setting
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REDIS_URL = setting.REDIS_URL
|
||||
|
||||
# Redis键前缀
|
||||
FAILED_DOMAINS_PREFIX = "favicon:failed_domain:" # 失败域名前缀
|
||||
TASK_QUEUE_PREFIX = "favicon:task_queue:" # 任务队列前缀
|
||||
PROCESSING_SET_PREFIX = "favicon:processing:" # 处理中任务集合前缀
|
||||
ICON_QUEUE_PREFIX = "favicon:icon_queue:"
|
||||
|
||||
pool = ConnectionPool.from_url(
|
||||
REDIS_URL,
|
||||
max_connections=200,
|
||||
decode_responses=True,
|
||||
)
|
||||
|
||||
|
||||
async def get_redis() -> AsyncGenerator[Redis, None]:
|
||||
async with Redis(connection_pool=pool) as conn:
|
||||
yield conn
|
||||
|
||||
|
||||
async def set_cache(key: str, value: [str | int], ttl: int = None) -> None:
|
||||
if not key:
|
||||
return
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
await redis.set(key, value, ex=ttl)
|
||||
except Exception as e:
|
||||
logger.error(f"存入redis时出错:{e}")
|
||||
|
||||
|
||||
async def get_cache(key: str) -> Optional[str | int]:
|
||||
if not key:
|
||||
return None
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
return await redis.get(key)
|
||||
except Exception as e:
|
||||
logger.error(f"读取redis时出错:{e}")
|
||||
|
||||
|
||||
async def exist_cache(key: str) -> bool:
|
||||
if not key:
|
||||
return False
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
result = await redis.exists(key)
|
||||
return result > 0
|
||||
except Exception as e:
|
||||
logger.error(f"读取redis时出错:{e}")
|
||||
return False
|
||||
|
||||
|
||||
async def remove_cache(key: str) -> None:
|
||||
if not key:
|
||||
return
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
await redis.delete(key)
|
||||
except Exception as e:
|
||||
logger.error(f"删除redis时出错:{e}")
|
||||
|
||||
|
||||
async def get_cache_size(cache_name: str = "default") -> int:
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
return await redis.llen(cache_name)
|
||||
except Exception as e:
|
||||
logger.error(f"获取队列大小时出错:{e}")
|
||||
return 0
|
||||
|
||||
|
||||
async def set_failed_domain(domain: str, expire_seconds: int = setting.time_of_7_days) -> None:
|
||||
"""将失败的域名存入Redis,并设置过期时间
|
||||
|
||||
Args:
|
||||
domain: 失败的域名
|
||||
expire_seconds: 过期时间(秒),默认为7天
|
||||
"""
|
||||
if not domain:
|
||||
return
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
redis_key = f"{FAILED_DOMAINS_PREFIX}{domain}"
|
||||
await redis.set(redis_key, domain, ex=expire_seconds)
|
||||
logger.debug(f"已将失败域名 {domain} 存入Redis,过期时间:{expire_seconds}秒")
|
||||
except Exception as e:
|
||||
logger.error(f"将失败域名存入Redis时出错:{e}")
|
||||
|
||||
|
||||
async def is_domain_failed(domain: str) -> bool:
|
||||
"""检查域名是否在Redis的失败列表中
|
||||
|
||||
Args:
|
||||
domain: 要检查的域名
|
||||
|
||||
Returns:
|
||||
True: 域名在失败列表中;False: 不在或Redis查询失败
|
||||
"""
|
||||
if not domain:
|
||||
return False
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
redis_key = f"{FAILED_DOMAINS_PREFIX}{domain}"
|
||||
result = await redis.exists(redis_key)
|
||||
return result > 0
|
||||
except Exception as e:
|
||||
logger.error(f"检查域名是否失败时出错:{e}")
|
||||
return False
|
||||
|
||||
|
||||
async def delete_failed_domain(domain: str) -> None:
|
||||
"""从Redis中删除失败域名记录
|
||||
|
||||
Args:
|
||||
domain: 要删除的域名
|
||||
"""
|
||||
if not domain:
|
||||
return
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
redis_key = f"{FAILED_DOMAINS_PREFIX}{domain}"
|
||||
await redis.delete(redis_key)
|
||||
logger.debug(f"已从Redis删除失败域名 {domain}")
|
||||
except Exception as e:
|
||||
logger.error(f"从Redis删除失败域名时出错:{e}")
|
||||
Reference in New Issue
Block a user