favicon-api-v3/favicon_app/utils/header.py

275 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import logging
import random
import threading
from typing import Dict, Optional
# 配置日志
logger = logging.getLogger(__name__)
class HeaderConfig:
"""HTTP请求头管理类提供灵活的请求头配置和生成功能"""
_USER_AGENTS = [
# Firefox
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) Gecko/20100101 Firefox/112.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:110.0) Gecko/20100101 Firefox/110.0',
# Chrome
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
# Edge
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.70',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 Edg/103.0.1264.77',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.62',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.58',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
# macOS
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
# iOS
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (iPad; CPU OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Mobile/15E148 Safari/604.1',
# Android
'Mozilla/5.0 (Linux; Android 13; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36',
'Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Mobile Safari/537.36'
]
# 合并两个版本的图片类型,并添加更多常见的图片格式
IMAGE_TYPES = [
'image/gif',
'image/jpeg',
'image/png',
'image/svg+xml',
'image/tiff',
'image/vnd.wap.wbmp',
'image/webp',
'image/x-icon',
'image/x-jng',
'image/x-ms-bmp',
'image/vnd.microsoft.icon',
'image/vnd.dwg',
'image/vnd.dxf',
'image/jpx',
'image/apng',
'image/bmp',
'image/vnd.ms-photo',
'image/vnd.adobe.photoshop',
'image/heic',
'image/avif',
'image/jfif',
'image/pjpeg',
'image/vnd.adobe.illustrator',
'application/pdf',
'application/x-pdf'
]
# 默认内容类型
CONTENT_TYPE = 'application/json; charset=utf-8'
# 不同场景的请求头模板
_HEADER_TEMPLATES = {
'default': {
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Accept-Encoding': 'gzip, deflate',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'Connection': 'keep-alive'
},
'image': {
'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive'
},
'api': {
'Accept': 'application/json, application/xml',
'Content-Type': CONTENT_TYPE,
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive'
}
}
def __init__(self):
# 线程锁,确保线程安全
self._lock = threading.RLock()
# 存储自定义请求头
self._custom_headers = {}
def get_random_user_agent(self) -> str:
"""获取随机的User-Agent字符串"""
with self._lock:
return random.choice(self._USER_AGENTS)
def get_headers(
self,
template: str = 'default',
include_user_agent: bool = True,
custom_headers: Optional[Dict[str, str]] = None
) -> Dict[str, str]:
"""
获取配置好的请求头字典
Args:
template: 请求头模板类型,可选值:'default', 'image', 'api'
include_user_agent: 是否包含随机User-Agent
custom_headers: 自定义请求头,将覆盖默认值
Returns:
配置好的请求头字典
"""
with self._lock:
# 选择基础模板
headers = self._HEADER_TEMPLATES.get(template, self._HEADER_TEMPLATES['default']).copy()
# 添加随机User-Agent
if include_user_agent:
headers['User-Agent'] = self.get_random_user_agent()
# 添加自定义请求头
if self._custom_headers:
headers.update(self._custom_headers)
# 添加方法参数中的自定义请求头
if custom_headers:
headers.update(custom_headers)
return headers
def set_custom_header(self, key: str, value: str) -> None:
"""设置自定义请求头,将应用于所有后续生成的请求头"""
if not key or not value:
logger.warning("尝试设置空的请求头键或值")
return
with self._lock:
self._custom_headers[key] = value
logger.debug(f"已设置自定义请求头: {key} = {value}")
def remove_custom_header(self, key: str) -> None:
"""移除自定义请求头"""
with self._lock:
if key in self._custom_headers:
del self._custom_headers[key]
logger.debug(f"已移除自定义请求头: {key}")
def clear_custom_headers(self) -> None:
"""清除所有自定义请求头"""
with self._lock:
self._custom_headers.clear()
logger.debug("已清除所有自定义请求头")
def is_image_content_type(self, content_type: str) -> bool:
"""检查内容类型是否为图片类型"""
if not content_type:
return False
# 处理可能包含参数的Content-Type如 'image/png; charset=utf-8'
base_type = content_type.split(';')[0].strip().lower()
return base_type in self.IMAGE_TYPES
def add_user_agent(self, user_agent: str) -> None:
"""添加自定义User-Agent到池"""
if not user_agent or user_agent in self._USER_AGENTS:
return
with self._lock:
self._USER_AGENTS.append(user_agent)
logger.debug(f"已添加自定义User-Agent")
def get_specific_headers(
self,
url: str = None,
referer: str = None,
content_type: str = None
) -> Dict[str, str]:
"""
获取针对特定场景优化的请求头
Args:
url: 目标URL用于设置Host
referer: 引用页URL
content_type: 内容类型
Returns:
优化后的请求头字典
"""
headers = self.get_headers()
# 设置Host
if url:
try:
from urllib.parse import urlparse
parsed_url = urlparse(url)
if parsed_url.netloc:
headers['Host'] = parsed_url.netloc
except Exception as e:
logger.warning(f"解析URL失败: {e}")
# 设置Referer
if referer:
headers['Referer'] = referer
# 设置Content-Type
if content_type:
headers['Content-Type'] = content_type
return headers
# 创建全局HeaderConfig实例用于向后兼容
_header_config = HeaderConfig()
# 全局请求头字典,用于向后兼容
_headers = {'User-Agent': '-'}
# 向后兼容的常量和函数
content_type = HeaderConfig.CONTENT_TYPE
image_type = HeaderConfig.IMAGE_TYPES
def get_header():
"""向后兼容的函数:获取请求头"""
global _headers
_headers = _header_config.get_headers(template='default')
return _headers
def set_header(key: str, value: str):
"""向后兼容的函数:设置请求头"""
if key and value:
_header_config.set_custom_header(key, value)
def del_header(key: str):
"""向后兼容的函数:删除请求头"""
_header_config.remove_custom_header(key)
def get_user_agent():
"""向后兼容的函数获取请求头中的User-Agent"""
return _headers.get('User-Agent', '')
def set_user_agent(ua: str):
"""向后兼容的函数设置请求头中的User-Agent"""
if ua:
_header_config.set_custom_header('User-Agent', ua)