25.09.10
parent
f0b8929035
commit
cb9b9c2d35
|
@ -22,6 +22,7 @@ urllib3.disable_warnings()
|
||||||
logging.captureWarnings(True)
|
logging.captureWarnings(True)
|
||||||
# 配置日志
|
# 配置日志
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
# warnings.filterwarnings("ignore", category=RuntimeWarning)
|
||||||
|
|
||||||
# 创建requests会话池
|
# 创建requests会话池
|
||||||
requests_session = requests.Session()
|
requests_session = requests.Session()
|
||||||
|
@ -103,7 +104,7 @@ class Favicon:
|
||||||
self.scheme = 'http'
|
self.scheme = 'http'
|
||||||
|
|
||||||
# 检查域名合法性
|
# 检查域名合法性
|
||||||
if self.domain and not self._check_url(self.domain):
|
if self.domain and not _check_url(self.domain):
|
||||||
self.domain = None
|
self.domain = None
|
||||||
|
|
||||||
# 生成域名MD5哈希值
|
# 生成域名MD5哈希值
|
||||||
|
@ -165,6 +166,21 @@ class Favicon:
|
||||||
self._get_icon_url(icon_path)
|
self._get_icon_url(icon_path)
|
||||||
return self.icon_url
|
return self.icon_url
|
||||||
|
|
||||||
|
def get_base_url(self) -> Optional[str]:
|
||||||
|
"""获取网站基础URL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
网站基础URL
|
||||||
|
"""
|
||||||
|
if not self.domain or '.' not in self.domain:
|
||||||
|
return None
|
||||||
|
|
||||||
|
_url = f"{self.scheme}://{self.domain}"
|
||||||
|
if self.port and self.port not in [80, 443]:
|
||||||
|
_url += f":{self.port}"
|
||||||
|
|
||||||
|
return _url
|
||||||
|
|
||||||
async def get_icon_file(self, icon_path: str, default: bool = False) -> Tuple[Optional[bytes], Optional[str]]:
|
async def get_icon_file(self, icon_path: str, default: bool = False) -> Tuple[Optional[bytes], Optional[str]]:
|
||||||
"""获取图标文件内容和类型
|
"""获取图标文件内容和类型
|
||||||
|
|
||||||
|
@ -189,7 +205,7 @@ class Favicon:
|
||||||
_content = base64.b64decode(data_uri[-1])
|
_content = base64.b64decode(data_uri[-1])
|
||||||
_ct = data_uri[0].split(';')[0].split(':')[-1]
|
_ct = data_uri[0].split(';')[0].split(':')[-1]
|
||||||
else:
|
else:
|
||||||
_content, _ct = await self._req_get(self.icon_url, domain=self.domain)
|
_content, _ct = await _req_get(self.icon_url, domain=self.domain)
|
||||||
|
|
||||||
# 验证是否为图片
|
# 验证是否为图片
|
||||||
# image/* application/x-ico
|
# image/* application/x-ico
|
||||||
|
@ -204,21 +220,6 @@ class Favicon:
|
||||||
|
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def get_base_url(self) -> Optional[str]:
|
|
||||||
"""获取网站基础URL
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
网站基础URL
|
|
||||||
"""
|
|
||||||
if not self.domain or '.' not in self.domain:
|
|
||||||
return None
|
|
||||||
|
|
||||||
_url = f"{self.scheme}://{self.domain}"
|
|
||||||
if self.port and self.port not in [80, 443]:
|
|
||||||
_url += f":{self.port}"
|
|
||||||
|
|
||||||
return _url
|
|
||||||
|
|
||||||
async def req_get(self) -> Optional[bytes]:
|
async def req_get(self) -> Optional[bytes]:
|
||||||
"""获取网站首页内容
|
"""获取网站首页内容
|
||||||
|
|
||||||
|
@ -229,7 +230,7 @@ class Favicon:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
_url = self.get_base_url()
|
_url = self.get_base_url()
|
||||||
_content, _ct = await self._req_get(_url, domain=self.domain)
|
_content, _ct = await _req_get(_url, domain=self.domain)
|
||||||
|
|
||||||
# 验证类型并检查大小
|
# 验证类型并检查大小
|
||||||
if _ct and ('text' in _ct or 'html' in _ct or 'xml' in _ct):
|
if _ct and ('text' in _ct or 'html' in _ct or 'xml' in _ct):
|
||||||
|
@ -240,13 +241,51 @@ class Favicon:
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
async def _req_get(
|
def _check_internal(domain: str) -> bool:
|
||||||
url: str,
|
"""检查网址是否非内网地址
|
||||||
|
|
||||||
|
Args:
|
||||||
|
domain: 域名
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True: 非内网;False: 是内网/无法解析
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 检查是否为IP地址
|
||||||
|
if domain.replace('.', '').isdigit():
|
||||||
|
return not ipaddress.ip_address(domain).is_private
|
||||||
|
else:
|
||||||
|
# 解析域名获取IP地址
|
||||||
|
ips = socket.getaddrinfo(domain, None)
|
||||||
|
for ip_info in ips:
|
||||||
|
ip = ip_info[4][0]
|
||||||
|
if '.' in ip:
|
||||||
|
if not ipaddress.ip_address(ip).is_private:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
redis_pool.set_failed_domain(domain, setting.time_of_1_days)
|
||||||
|
logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _check_url(domain: str) -> bool:
|
||||||
|
"""检查域名是否合法且非内网地址
|
||||||
|
|
||||||
|
Args:
|
||||||
|
domain: 域名
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
域名是否合法且非内网地址
|
||||||
|
"""
|
||||||
|
return _pattern_domain.match(domain) and _check_internal(domain)
|
||||||
|
|
||||||
|
|
||||||
|
async def _req_get(url: str,
|
||||||
domain: str,
|
domain: str,
|
||||||
retries: int = DEFAULT_RETRIES,
|
retries: int = DEFAULT_RETRIES,
|
||||||
timeout: int = DEFAULT_TIMEOUT
|
timeout: int = DEFAULT_TIMEOUT) -> Tuple[Optional[bytes], Optional[str]]:
|
||||||
) -> Tuple[Optional[bytes], Optional[str]]:
|
|
||||||
"""异步发送HTTP GET请求获取内容
|
"""异步发送HTTP GET请求获取内容
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -296,63 +335,24 @@ class Favicon:
|
||||||
content = await resp.read()
|
content = await resp.read()
|
||||||
return content, ct_type
|
return content, ct_type
|
||||||
else:
|
else:
|
||||||
await redis_pool.set_failed_domain(domain, setting.time_of_7_days)
|
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
|
||||||
logger.error('异步请求失败: %d, URL: %s', resp.status, url)
|
logger.error('异步请求失败: %d, URL: %s', resp.status, url)
|
||||||
break
|
break
|
||||||
except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
|
except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
|
||||||
retry_count += 1
|
retry_count += 1
|
||||||
if retry_count > retries:
|
if retry_count > retries:
|
||||||
|
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
|
||||||
logger.error('异步请求超时: %s, URL: %s', str(e), url)
|
logger.error('异步请求超时: %s, URL: %s', str(e), url)
|
||||||
else:
|
else:
|
||||||
logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url)
|
logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url)
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
await redis_pool.set_failed_domain(domain, setting.time_of_7_days)
|
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
|
||||||
logger.error('异步请求异常: %s, URL: %s', str(e), url)
|
logger.error('异步请求异常: %s, URL: %s', str(e), url)
|
||||||
break
|
break
|
||||||
|
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _check_url(domain: str) -> bool:
|
|
||||||
"""检查域名是否合法且非内网地址
|
|
||||||
|
|
||||||
Args:
|
|
||||||
domain: 域名
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
域名是否合法且非内网地址
|
|
||||||
"""
|
|
||||||
return Favicon.check_internal(domain) and _pattern_domain.match(domain)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def check_internal(domain: str) -> bool:
|
|
||||||
"""检查网址是否非内网地址
|
|
||||||
|
|
||||||
Args:
|
|
||||||
domain: 域名
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True: 非内网;False: 是内网/无法解析
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 检查是否为IP地址
|
|
||||||
if domain.replace('.', '').isdigit():
|
|
||||||
return not ipaddress.ip_address(domain).is_private
|
|
||||||
else:
|
|
||||||
# 解析域名获取IP地址
|
|
||||||
ips = socket.getaddrinfo(domain, None)
|
|
||||||
for ip_info in ips:
|
|
||||||
ip = ip_info[4][0]
|
|
||||||
if '.' in ip:
|
|
||||||
if not ipaddress.ip_address(ip).is_private:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
redis_pool.set_failed_domain(domain, setting.time_of_7_days)
|
|
||||||
logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# 域名验证正则表达式
|
# 域名验证正则表达式
|
||||||
_pattern_domain = re.compile(
|
_pattern_domain = re.compile(
|
||||||
|
|
|
@ -19,9 +19,6 @@ logger = logging.getLogger(__name__)
|
||||||
_icon_root_path = setting.icon_root_path
|
_icon_root_path = setting.icon_root_path
|
||||||
_default_icon_path = setting.default_icon_path
|
_default_icon_path = setting.default_icon_path
|
||||||
|
|
||||||
# 创建全局服务实例
|
|
||||||
_service = favicon_service.FaviconService()
|
|
||||||
|
|
||||||
# 创建FastAPI路由器
|
# 创建FastAPI路由器
|
||||||
favicon_router = APIRouter(prefix="", tags=["favicon"])
|
favicon_router = APIRouter(prefix="", tags=["favicon"])
|
||||||
|
|
||||||
|
@ -35,13 +32,13 @@ async def get_favicon(
|
||||||
refresh: Optional[str] = Query(None, include_in_schema=False),
|
refresh: Optional[str] = Query(None, include_in_schema=False),
|
||||||
):
|
):
|
||||||
"""获取网站图标"""
|
"""获取网站图标"""
|
||||||
return await _service.get_favicon_handler(request, bg_tasks, url, refresh)
|
return await favicon_service.get_favicon_handler(request, bg_tasks, url, refresh)
|
||||||
|
|
||||||
|
|
||||||
@favicon_router.get('/icon/default')
|
@favicon_router.get('/icon/default')
|
||||||
async def get_default_icon():
|
async def get_default_icon():
|
||||||
"""获取默认图标"""
|
"""获取默认图标"""
|
||||||
return _service.get_default()
|
return favicon_service.get_default()
|
||||||
|
|
||||||
|
|
||||||
@favicon_router.get('/icon/referer', include_in_schema=False)
|
@favicon_router.get('/icon/referer', include_in_schema=False)
|
||||||
|
|
|
@ -7,7 +7,7 @@ import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Tuple, List, Optional
|
from typing import Tuple, Optional
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import urllib3
|
import urllib3
|
||||||
|
@ -30,272 +30,104 @@ warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
|
||||||
_current_dir = os.path.dirname(os.path.abspath(__file__))
|
_current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
class FaviconService:
|
async def get_favicon_handler(request: Request,
|
||||||
"""图标服务类,封装所有与图标获取、缓存和处理相关的功能"""
|
bg_tasks: BackgroundTasks,
|
||||||
|
url: Optional[str] = None,
|
||||||
|
refresh: Optional[str] = None) -> dict[str, str] | Response:
|
||||||
|
"""异步处理获取图标的请求"""
|
||||||
|
|
||||||
def __init__(self):
|
logger.info(
|
||||||
# 全局计数器
|
f"队列大小(异步) queue/failed:"
|
||||||
self.url_count = 0
|
f"{await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)} "
|
||||||
self.request_icon_count = 0
|
f"| {await redis_pool.get_cache_size(prefix=redis_pool.FAILED_DOMAINS_PREFIX)}")
|
||||||
self.request_cache_count = 0
|
|
||||||
|
|
||||||
# 预编译正则表达式,提高性能
|
# 验证URL参数
|
||||||
self.pattern_icon = re.compile(r'(icon|shortcut icon|alternate icon|apple-touch-icon)+', re.I)
|
if not url:
|
||||||
self.pattern_link = re.compile(r'(<link[^>]+rel=.(icon|shortcut icon|alternate icon|apple-touch-icon)[^>]+>)',
|
return {"message": "请提供url参数"}
|
||||||
re.I)
|
|
||||||
|
|
||||||
# 计算默认图标的MD5值
|
|
||||||
self.default_icon_md5 = self._initialize_default_icon_md5()
|
|
||||||
|
|
||||||
def _initialize_default_icon_md5(self) -> List[str]:
|
|
||||||
"""初始化默认图标MD5值列表"""
|
|
||||||
md5_list = [self._get_file_md5(setting.default_icon_path),
|
|
||||||
'05231fb6b69aff47c3f35efe09c11ba0',
|
|
||||||
'3ca64f83fdcf25135d87e08af65e68c9',
|
|
||||||
'db470fd0b65c8c121477343c37f74f02',
|
|
||||||
'52419f3f4f7d11945d272facc76c9e6a',
|
|
||||||
'b8a0bf372c762e966cc99ede8682bc71',
|
|
||||||
'71e9c45f29eadfa2ec5495302c22bcf6',
|
|
||||||
'ababc687adac587b8a06e580ee79aaa1',
|
|
||||||
'43802bddf65eeaab643adb8265bfbada']
|
|
||||||
# 过滤掉None值
|
|
||||||
return [md5 for md5 in md5_list if md5]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_file_md5(file_path: str) -> Optional[str]:
|
|
||||||
"""计算文件的MD5值"""
|
|
||||||
try:
|
|
||||||
md5 = hashlib.md5()
|
|
||||||
with open(file_path, 'rb') as f:
|
|
||||||
while True:
|
|
||||||
buffer = f.read(1024 * 8)
|
|
||||||
if not buffer:
|
|
||||||
break
|
|
||||||
md5.update(buffer)
|
|
||||||
return md5.hexdigest().lower()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"计算文件MD5失败 {file_path}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _is_default_icon_md5(self, icon_md5: str) -> bool:
|
|
||||||
"""检查图标MD5是否为默认图标"""
|
|
||||||
return icon_md5 in self.default_icon_md5
|
|
||||||
|
|
||||||
def _is_default_icon_file(self, file_path: str) -> bool:
|
|
||||||
"""检查文件是否为默认图标"""
|
|
||||||
if os.path.exists(file_path) and os.path.isfile(file_path):
|
|
||||||
md5 = self._get_file_md5(file_path)
|
|
||||||
return md5 in self.default_icon_md5 if md5 else False
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _is_default_icon_byte(self, file_content: bytes) -> bool:
|
|
||||||
"""检查字节内容是否为默认图标"""
|
|
||||||
try:
|
|
||||||
md5 = hashlib.md5(file_content).hexdigest().lower()
|
|
||||||
return md5 in self.default_icon_md5
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"计算字节内容MD5失败: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _get_cache_file(self, domain: str, refresh: bool = False) -> Tuple[Optional[bytes], Optional[bytes]]:
|
|
||||||
"""从缓存中获取图标文件"""
|
|
||||||
cache_path = os.path.join(setting.icon_root_path, 'data', 'icon', domain + '.png')
|
|
||||||
if os.path.exists(cache_path) and os.path.isfile(cache_path) and os.path.getsize(cache_path) > 0:
|
|
||||||
try:
|
|
||||||
cached_icon = FileUtil.read_file(cache_path, mode='rb')
|
|
||||||
file_time = int(os.path.getmtime(cache_path))
|
|
||||||
|
|
||||||
# 验证是否为有效的图片文件
|
|
||||||
if not helpers.is_image(cached_icon):
|
|
||||||
logger.warning(f"缓存的图标不是有效图片: {cache_path}")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
# 处理刷新请求或缓存过期情况
|
|
||||||
if refresh:
|
|
||||||
if int(time.time()) - file_time <= setting.time_of_12_hours:
|
|
||||||
logger.info(f"缓存文件修改时间在有效期内,不执行刷新: {cache_path}")
|
|
||||||
return cached_icon, cached_icon
|
|
||||||
return cached_icon, None
|
|
||||||
|
|
||||||
# 检查缓存是否过期(最大30天)
|
|
||||||
if int(time.time()) - file_time > setting.time_of_30_days:
|
|
||||||
logger.info(f"图标缓存过期(>30天): {cache_path}")
|
|
||||||
return cached_icon, None
|
|
||||||
|
|
||||||
# 默认图标,使用随机的缓存时间
|
|
||||||
if (int(time.time()) - file_time > setting.time_of_1_days * random.randint(1, 7)
|
|
||||||
and self._is_default_icon_file(cache_path)):
|
|
||||||
logger.info(f"默认图标缓存过期: {cache_path}")
|
|
||||||
return cached_icon, None
|
|
||||||
|
|
||||||
return cached_icon, cached_icon
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"读取缓存文件失败 {cache_path}: {e}")
|
|
||||||
return None, None
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def _get_cache_icon(self, domain_md5: str, refresh: bool = False) -> Tuple[Optional[bytes], Optional[bytes]]:
|
|
||||||
"""获取缓存的图标"""
|
|
||||||
_cached, cached_icon = self._get_cache_file(domain_md5, refresh)
|
|
||||||
|
|
||||||
# 替换默认图标
|
|
||||||
if _cached and self._is_default_icon_byte(_cached):
|
|
||||||
_cached = setting.default_icon_file
|
|
||||||
if cached_icon and self._is_default_icon_byte(cached_icon):
|
|
||||||
cached_icon = setting.default_icon_file
|
|
||||||
|
|
||||||
return _cached, cached_icon
|
|
||||||
|
|
||||||
def _get_header(self, content_type: str, cache_time: int = None) -> dict:
|
|
||||||
"""生成响应头"""
|
|
||||||
if cache_time is None:
|
|
||||||
cache_time = setting.time_of_7_days
|
|
||||||
|
|
||||||
_ct = 'image/x-icon'
|
|
||||||
if content_type and content_type in header.image_type:
|
|
||||||
_ct = content_type
|
|
||||||
|
|
||||||
cache_control = 'no-store, no-cache, must-revalidate, max-age=0' if cache_time == 0 else f'public, max-age={cache_time}'
|
|
||||||
|
|
||||||
return {
|
|
||||||
'Content-Type': _ct,
|
|
||||||
'Cache-Control': cache_control,
|
|
||||||
'X-Robots-Tag': 'noindex, nofollow'
|
|
||||||
}
|
|
||||||
|
|
||||||
def _parse_html(self, content: Optional[bytes], entity: Favicon) -> Optional[str]:
|
|
||||||
"""从HTML内容中解析图标URL"""
|
|
||||||
if not content:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 尝试将bytes转换为字符串
|
entity = Favicon(url)
|
||||||
# str(content).encode('utf-8', 'replace').decode('utf-8', 'replace')
|
|
||||||
content_str = content.decode('utf-8', 'replace')
|
|
||||||
|
|
||||||
# 使用更高效的解析器
|
# 验证域名
|
||||||
bs = bs4.BeautifulSoup(content_str, features='lxml', parse_only=SoupStrainer("link"))
|
if not entity.domain:
|
||||||
if len(bs) == 0:
|
logger.warning(f"无效的URL: {url}")
|
||||||
bs = bs4.BeautifulSoup(content_str, features='html.parser', parse_only=SoupStrainer("link"))
|
return get_default(setting.time_of_7_days)
|
||||||
|
|
||||||
html_links = bs.find_all("link", rel=self.pattern_icon)
|
# 检查缓存中的失败URL
|
||||||
|
if await redis_pool.is_domain_failed(entity.domain):
|
||||||
|
return get_default(setting.time_of_1_days)
|
||||||
|
|
||||||
# 如果没有找到,尝试使用正则表达式直接匹配
|
# 检查缓存
|
||||||
if not html_links or len(html_links) == 0:
|
_cached, cached_icon = _get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1'])
|
||||||
content_links = self.pattern_link.findall(content_str)
|
|
||||||
c_link = ''.join([_links[0] for _links in content_links])
|
|
||||||
bs = bs4.BeautifulSoup(c_link, features='lxml')
|
|
||||||
html_links = bs.find_all("link", rel=self.pattern_icon)
|
|
||||||
|
|
||||||
if html_links and len(html_links) > 0:
|
if _cached or cached_icon:
|
||||||
# 优先查找指定rel类型的图标
|
# 使用缓存图标
|
||||||
icon_url = (self._get_link_rel(html_links, entity, 'shortcut icon') or
|
icon_content = cached_icon if cached_icon else _cached
|
||||||
self._get_link_rel(html_links, entity, 'icon') or
|
|
||||||
self._get_link_rel(html_links, entity, 'alternate icon') or
|
|
||||||
self._get_link_rel(html_links, entity, ''))
|
|
||||||
|
|
||||||
if icon_url:
|
# 确定内容类型和缓存时间
|
||||||
logger.info(f"-> 从HTML获取图标URL: {icon_url}")
|
content_type = filetype.guess_mime(icon_content) if icon_content else ""
|
||||||
|
cache_time = setting.time_of_12_hours \
|
||||||
|
if _is_default_icon_byte(icon_content) else setting.time_of_7_days
|
||||||
|
|
||||||
return icon_url
|
# 乐观缓存机制:检查缓存是否已过期但仍有缓存内容
|
||||||
except Exception as e:
|
# _cached 存在但 cached_icon 为 None 表示缓存已过期
|
||||||
logger.error(f"解析HTML失败: {e}")
|
if _cached and not cached_icon:
|
||||||
|
# 缓存已过期,后台刷新缓存
|
||||||
|
logger.info(f"缓存已过期,加入后台队列刷新(异步): {entity.domain}")
|
||||||
|
await redis_pool.set_cache(
|
||||||
|
f"{entity.domain}",
|
||||||
|
entity.domain,
|
||||||
|
setting.time_of_2_hours,
|
||||||
|
prefix=redis_pool.ICON_QUEUE_PREFIX
|
||||||
|
)
|
||||||
|
bg_tasks.add_task(get_icon_async, entity, _cached)
|
||||||
|
|
||||||
return None
|
return Response(content=icon_content,
|
||||||
|
media_type=content_type if content_type else "image/x-icon",
|
||||||
@staticmethod
|
headers=_get_header(content_type, cache_time))
|
||||||
def _get_link_rel(links, entity: Favicon, _rel: str) -> Optional[str]:
|
|
||||||
"""从链接列表中查找指定rel类型的图标URL"""
|
|
||||||
if not links:
|
|
||||||
return None
|
|
||||||
|
|
||||||
for link in links:
|
|
||||||
r = link.get('rel')
|
|
||||||
_r = ' '.join(r) if isinstance(r, list) else r
|
|
||||||
_href = link.get('href')
|
|
||||||
|
|
||||||
if _rel:
|
|
||||||
if _r.lower() == _rel:
|
|
||||||
return entity.get_icon_url(str(_href))
|
|
||||||
else:
|
else:
|
||||||
return entity.get_icon_url(str(_href))
|
# 开始图标处理,加入队列
|
||||||
|
await redis_pool.set_cache(
|
||||||
|
f"{entity.domain}",
|
||||||
|
entity.domain,
|
||||||
|
setting.time_of_2_hours,
|
||||||
|
prefix=redis_pool.ICON_QUEUE_PREFIX
|
||||||
|
)
|
||||||
|
|
||||||
return None
|
# 没有缓存,实时处理,检查队列大小
|
||||||
|
_queue_size = await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)
|
||||||
def get_default(self, cache_time: int = None) -> Response:
|
if _queue_size >= setting.MAX_QUEUE_SIZE:
|
||||||
if cache_time is None:
|
# 加入后台队列并返回默认图片
|
||||||
cache_time = setting.time_of_1_days
|
logger.info(
|
||||||
return Response(content=setting.default_icon_file,
|
f"队列大小({_queue_size})>={setting.MAX_QUEUE_SIZE},返回默认图片并加入后台队列(异步): {entity.domain}")
|
||||||
media_type="image/png",
|
bg_tasks.add_task(get_icon_async, entity, _cached)
|
||||||
headers=self._get_header("image/png", cache_time))
|
return get_default(0)
|
||||||
|
|
||||||
def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
|
|
||||||
"""同步获取图标"""
|
|
||||||
icon_content = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 尝试从网站获取HTML内容
|
|
||||||
html_content = entity.req_get()
|
|
||||||
if html_content:
|
|
||||||
icon_url = self._parse_html(html_content, entity)
|
|
||||||
else:
|
else:
|
||||||
icon_url = None
|
# 队列<MAX_QUEUE_SIZE,实时处理
|
||||||
|
logger.info(f"队列大小({_queue_size})<{setting.MAX_QUEUE_SIZE},实时处理(异步): {entity.domain}")
|
||||||
|
|
||||||
# 尝试不同的图标获取策略
|
# 始终使用异步方法获取图标
|
||||||
strategies = [
|
icon_content = await get_icon_async(entity, _cached)
|
||||||
# 1. 从原始网页标签链接中获取
|
|
||||||
lambda: (icon_url, "原始网页标签") if icon_url else (None, None),
|
|
||||||
# 2. 从 gstatic.cn 接口获取
|
|
||||||
lambda: (
|
|
||||||
f'https://t3.gstatic.cn/faviconV2?client=SOCIAL&fallback_opts=TYPE,SIZE,URL&type=FAVICON&size=128&url={entity.get_base_url()}',
|
|
||||||
"gstatic接口"),
|
|
||||||
# 3. 从网站默认位置获取
|
|
||||||
lambda: ('', "网站默认位置/favicon.ico"),
|
|
||||||
# 4. 从其他api接口获取
|
|
||||||
lambda: (f'https://ico.kucat.cn/get.php?url={entity.get_base_url()}', "第三方API"),
|
|
||||||
# 99. 最后的尝试,cloudflare workers
|
|
||||||
# lambda: (f'https://favicon.cary.cc/?url={entity.get_base_url()}', "cloudflare"),
|
|
||||||
]
|
|
||||||
|
|
||||||
for strategy in strategies:
|
if not icon_content:
|
||||||
if icon_content:
|
# 获取失败,返回默认图标
|
||||||
break
|
return get_default()
|
||||||
|
|
||||||
strategy_url, strategy_name = strategy()
|
# 确定内容类型和缓存时间
|
||||||
if strategy_url is not None:
|
content_type = filetype.guess_mime(icon_content) if icon_content else ""
|
||||||
logger.debug(f"-> 尝试从 {strategy_name} 获取图标")
|
cache_time = setting.time_of_12_hours \
|
||||||
icon_content, icon_type = entity.get_icon_file(strategy_url, strategy_url == '')
|
if _is_default_icon_byte(icon_content) else setting.time_of_7_days
|
||||||
|
|
||||||
# 图标获取失败,或图标不是支持的图片格式,写入默认图标
|
return Response(content=icon_content,
|
||||||
if (not icon_content) or (not helpers.is_image(icon_content) or self._is_default_icon_byte(icon_content)):
|
media_type=content_type if content_type else "image/x-icon",
|
||||||
logger.warning(f"-> 获取图标失败,使用默认图标: {entity.domain}")
|
headers=_get_header(content_type, cache_time))
|
||||||
icon_content = _cached if _cached else setting.default_icon_file
|
|
||||||
|
|
||||||
if icon_content:
|
|
||||||
cache_path = os.path.join(setting.icon_root_path, 'data', 'icon', entity.domain_md5 + '.png')
|
|
||||||
md5_path = os.path.join(setting.icon_root_path, 'data', 'text', entity.domain_md5 + '.txt')
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 确保目录存在
|
|
||||||
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
|
||||||
os.makedirs(os.path.dirname(md5_path), exist_ok=True)
|
|
||||||
|
|
||||||
# 写入缓存文件
|
|
||||||
FileUtil.write_file(cache_path, icon_content, mode='wb')
|
|
||||||
FileUtil.write_file(md5_path, entity.domain, mode='w')
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"写入缓存文件失败: {e}")
|
logger.error(f"处理图标请求时发生错误 {url}: {e}")
|
||||||
|
# 返回默认图标
|
||||||
|
return get_default()
|
||||||
|
|
||||||
self.request_icon_count += 1
|
|
||||||
|
|
||||||
return icon_content
|
async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
|
|
||||||
return _cached or setting.default_icon_file
|
|
||||||
finally:
|
|
||||||
redis_pool.remove_cache(f"{redis_pool.ICON_QUEUE_PREFIX}{entity.domain}")
|
|
||||||
|
|
||||||
async def get_icon_async(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
|
|
||||||
"""异步获取图标"""
|
"""异步获取图标"""
|
||||||
icon_content = None
|
icon_content = None
|
||||||
|
|
||||||
|
@ -303,7 +135,7 @@ class FaviconService:
|
||||||
# 尝试从网站异步获取HTML内容
|
# 尝试从网站异步获取HTML内容
|
||||||
html_content = await entity.req_get()
|
html_content = await entity.req_get()
|
||||||
if html_content:
|
if html_content:
|
||||||
icon_url = self._parse_html(html_content, entity)
|
icon_url = _parse_html(html_content, entity)
|
||||||
else:
|
else:
|
||||||
icon_url = None
|
icon_url = None
|
||||||
|
|
||||||
|
@ -331,7 +163,7 @@ class FaviconService:
|
||||||
icon_content, icon_type = await entity.get_icon_file(strategy_url, strategy_url == '')
|
icon_content, icon_type = await entity.get_icon_file(strategy_url, strategy_url == '')
|
||||||
|
|
||||||
# 图标获取失败,或图标不是支持的图片格式,写入默认图标
|
# 图标获取失败,或图标不是支持的图片格式,写入默认图标
|
||||||
if (not icon_content) or (not helpers.is_image(icon_content) or self._is_default_icon_byte(icon_content)):
|
if (not icon_content) or (not helpers.is_image(icon_content) or _is_default_icon_byte(icon_content)):
|
||||||
logger.warning(f"-> 异步获取图标失败,使用默认图标: {entity.domain}")
|
logger.warning(f"-> 异步获取图标失败,使用默认图标: {entity.domain}")
|
||||||
icon_content = _cached if _cached else setting.default_icon_file
|
icon_content = _cached if _cached else setting.default_icon_file
|
||||||
|
|
||||||
|
@ -350,109 +182,204 @@ class FaviconService:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"异步写入缓存文件失败: {e}")
|
logger.error(f"异步写入缓存文件失败: {e}")
|
||||||
|
|
||||||
self.request_icon_count += 1
|
|
||||||
|
|
||||||
return icon_content
|
return icon_content
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"异步获取图标时发生错误 {entity.domain}: {e}")
|
logger.error(f"异步获取图标时发生错误 {entity.domain}: {e}")
|
||||||
return _cached or setting.default_icon_file
|
return _cached or setting.default_icon_file
|
||||||
finally:
|
finally:
|
||||||
await redis_pool.remove_cache(f"{redis_pool.ICON_QUEUE_PREFIX}{entity.domain}")
|
await redis_pool.remove_cache(f"{entity.domain}", prefix=redis_pool.ICON_QUEUE_PREFIX)
|
||||||
|
|
||||||
async def get_favicon_handler(
|
|
||||||
self,
|
|
||||||
request: Request,
|
|
||||||
bg_tasks: BackgroundTasks,
|
|
||||||
url: Optional[str] = None,
|
|
||||||
refresh: Optional[str] = None,
|
|
||||||
) -> dict[str, str] | Response:
|
|
||||||
"""异步处理获取图标的请求"""
|
|
||||||
|
|
||||||
logger.info(
|
# 预编译正则表达式,提高性能
|
||||||
f"队列大小(异步) queue/failed:{await redis_pool.get_cache_size(f"{redis_pool.ICON_QUEUE_PREFIX}")} | {await redis_pool.get_cache_size(f"{redis_pool.FAILED_DOMAINS_PREFIX}")}")
|
pattern_icon = re.compile(r'(icon|shortcut icon|alternate icon|apple-touch-icon)+', re.I)
|
||||||
|
pattern_link = re.compile(r'(<link[^>]+rel=.(icon|shortcut icon|alternate icon|apple-touch-icon)[^>]+>)', re.I)
|
||||||
|
|
||||||
self.url_count += 1
|
|
||||||
|
|
||||||
# 验证URL参数
|
def _get_link_rel(links, entity: Favicon, _rel: str) -> Optional[str]:
|
||||||
if not url:
|
"""从链接列表中查找指定rel类型的图标URL"""
|
||||||
return {"message": "请提供url参数"}
|
if not links:
|
||||||
|
return None
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
r = link.get('rel')
|
||||||
|
_r = ' '.join(r) if isinstance(r, list) else r
|
||||||
|
_href = link.get('href')
|
||||||
|
|
||||||
|
if _rel:
|
||||||
|
if _r.lower() == _rel:
|
||||||
|
return entity.get_icon_url(str(_href))
|
||||||
|
else:
|
||||||
|
return entity.get_icon_url(str(_href))
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_html(content: Optional[bytes], entity: Favicon) -> Optional[str]:
|
||||||
|
"""从HTML内容中解析图标URL"""
|
||||||
|
if not content:
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
entity = Favicon(url)
|
# 尝试将bytes转换为字符串
|
||||||
|
# str(content).encode('utf-8', 'replace').decode('utf-8', 'replace')
|
||||||
|
# content_str = content.decode('utf-8', 'replace')
|
||||||
|
content_str = str(content).encode('utf-8', 'replace').decode('utf-8', 'replace')
|
||||||
|
|
||||||
# 验证域名
|
# 使用更高效的解析器
|
||||||
if not entity.domain:
|
bs = bs4.BeautifulSoup(content_str, features='lxml', parse_only=SoupStrainer("link"))
|
||||||
logger.warning(f"无效的URL: {url}")
|
if len(bs) == 0:
|
||||||
return self.get_default(setting.time_of_7_days)
|
bs = bs4.BeautifulSoup(content_str, features='html.parser', parse_only=SoupStrainer("link"))
|
||||||
|
|
||||||
# 检查缓存中的失败URL
|
html_links = bs.find_all("link", rel=pattern_icon)
|
||||||
if await redis_pool.is_domain_failed(entity.domain):
|
|
||||||
return self.get_default(setting.time_of_7_days)
|
|
||||||
|
|
||||||
# 检查缓存
|
# 如果没有找到,尝试使用正则表达式直接匹配
|
||||||
_cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1'])
|
if not html_links or len(html_links) == 0:
|
||||||
|
content_links = pattern_link.findall(content_str)
|
||||||
|
c_link = ''.join([_links[0] for _links in content_links])
|
||||||
|
bs = bs4.BeautifulSoup(c_link, features='lxml')
|
||||||
|
html_links = bs.find_all("link", rel=pattern_icon)
|
||||||
|
|
||||||
if _cached or cached_icon:
|
if html_links and len(html_links) > 0:
|
||||||
# 使用缓存图标
|
# 优先查找指定rel类型的图标
|
||||||
icon_content = cached_icon if cached_icon else _cached
|
icon_url = (_get_link_rel(html_links, entity, 'shortcut icon') or
|
||||||
self.request_cache_count += 1
|
_get_link_rel(html_links, entity, 'icon') or
|
||||||
|
_get_link_rel(html_links, entity, 'alternate icon') or
|
||||||
|
_get_link_rel(html_links, entity, ''))
|
||||||
|
|
||||||
# 确定内容类型和缓存时间
|
if icon_url:
|
||||||
content_type = filetype.guess_mime(icon_content) if icon_content else ""
|
logger.debug(f"-> 从HTML获取图标URL: {icon_url}")
|
||||||
cache_time = setting.time_of_12_hours \
|
|
||||||
if self._is_default_icon_byte(icon_content) else setting.time_of_7_days
|
|
||||||
|
|
||||||
# 乐观缓存机制:检查缓存是否已过期但仍有缓存内容
|
return icon_url
|
||||||
# _cached 存在但 cached_icon 为 None 表示缓存已过期
|
|
||||||
if _cached and not cached_icon:
|
|
||||||
# 缓存已过期,后台刷新缓存
|
|
||||||
logger.info(f"缓存已过期,加入后台队列刷新(异步): {entity.domain}")
|
|
||||||
await redis_pool.set_cache(
|
|
||||||
f"{redis_pool.ICON_QUEUE_PREFIX}{entity.domain}",
|
|
||||||
entity.domain,
|
|
||||||
setting.time_of_2_hours
|
|
||||||
)
|
|
||||||
bg_tasks.add_task(self.get_icon_sync, entity, _cached)
|
|
||||||
|
|
||||||
return Response(content=icon_content,
|
|
||||||
media_type=content_type if content_type else "image/x-icon",
|
|
||||||
headers=self._get_header(content_type, cache_time))
|
|
||||||
else:
|
|
||||||
# 开始图标处理,加入队列
|
|
||||||
await redis_pool.set_cache(
|
|
||||||
f"{redis_pool.ICON_QUEUE_PREFIX}{entity.domain}",
|
|
||||||
entity.domain,
|
|
||||||
setting.time_of_2_hours
|
|
||||||
)
|
|
||||||
|
|
||||||
# 没有缓存,实时处理,检查队列大小
|
|
||||||
_queue_size = await redis_pool.get_cache_size(f"{redis_pool.ICON_QUEUE_PREFIX}")
|
|
||||||
if _queue_size >= setting.MAX_QUEUE_SIZE:
|
|
||||||
# 加入后台队列并返回默认图片
|
|
||||||
logger.info(
|
|
||||||
f"队列大小({_queue_size})>={setting.MAX_QUEUE_SIZE},返回默认图片并加入后台队列(异步): {entity.domain}")
|
|
||||||
bg_tasks.add_task(self.get_icon_sync, entity, _cached)
|
|
||||||
return self.get_default(0)
|
|
||||||
else:
|
|
||||||
# 队列<MAX_QUEUE_SIZE,实时处理
|
|
||||||
logger.info(f"队列大小({_queue_size})<{setting.MAX_QUEUE_SIZE},实时处理(异步): {entity.domain}")
|
|
||||||
|
|
||||||
# 始终使用异步方法获取图标
|
|
||||||
icon_content = await self.get_icon_async(entity, _cached)
|
|
||||||
|
|
||||||
if not icon_content:
|
|
||||||
# 获取失败,返回默认图标
|
|
||||||
return self.get_default()
|
|
||||||
|
|
||||||
# 确定内容类型和缓存时间
|
|
||||||
content_type = filetype.guess_mime(icon_content) if icon_content else ""
|
|
||||||
cache_time = setting.time_of_12_hours \
|
|
||||||
if self._is_default_icon_byte(icon_content) else setting.time_of_7_days
|
|
||||||
|
|
||||||
return Response(content=icon_content,
|
|
||||||
media_type=content_type if content_type else "image/x-icon",
|
|
||||||
headers=self._get_header(content_type, cache_time))
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"处理图标请求时发生错误 {url}: {e}")
|
logger.error(f"解析HTML失败: {e}")
|
||||||
# 返回默认图标
|
|
||||||
return self.get_default()
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_file_md5(file_path: str) -> Optional[str]:
|
||||||
|
"""计算文件的MD5值"""
|
||||||
|
try:
|
||||||
|
md5 = hashlib.md5()
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
while True:
|
||||||
|
buffer = f.read(1024 * 8)
|
||||||
|
if not buffer:
|
||||||
|
break
|
||||||
|
md5.update(buffer)
|
||||||
|
return md5.hexdigest().lower()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"计算文件MD5失败 {file_path}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
default_icon_md5 = [
|
||||||
|
_get_file_md5(setting.default_icon_path),
|
||||||
|
'05231fb6b69aff47c3f35efe09c11ba0',
|
||||||
|
'3ca64f83fdcf25135d87e08af65e68c9',
|
||||||
|
'db470fd0b65c8c121477343c37f74f02',
|
||||||
|
'52419f3f4f7d11945d272facc76c9e6a',
|
||||||
|
'b8a0bf372c762e966cc99ede8682bc71',
|
||||||
|
'71e9c45f29eadfa2ec5495302c22bcf6',
|
||||||
|
'ababc687adac587b8a06e580ee79aaa1',
|
||||||
|
'43802bddf65eeaab643adb8265bfbada',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_header(content_type: str, cache_time: int = None) -> dict:
|
||||||
|
"""生成响应头"""
|
||||||
|
if cache_time is None:
|
||||||
|
cache_time = setting.time_of_7_days
|
||||||
|
|
||||||
|
_ct = 'image/x-icon'
|
||||||
|
if content_type and content_type in header.image_type:
|
||||||
|
_ct = content_type
|
||||||
|
|
||||||
|
cache_control = 'no-store, no-cache, must-revalidate, max-age=0' if cache_time == 0 else f'public, max-age={cache_time}'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'Content-Type': _ct,
|
||||||
|
'Cache-Control': cache_control,
|
||||||
|
'X-Robots-Tag': 'noindex, nofollow'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_default(cache_time: int = None) -> Response:
|
||||||
|
if cache_time is None:
|
||||||
|
cache_time = setting.time_of_1_days
|
||||||
|
return Response(content=setting.default_icon_file,
|
||||||
|
media_type="image/png",
|
||||||
|
headers=_get_header("image/png", cache_time))
|
||||||
|
|
||||||
|
|
||||||
|
def _is_default_icon_md5(icon_md5: str) -> bool:
|
||||||
|
"""检查图标MD5是否为默认图标"""
|
||||||
|
return icon_md5 in default_icon_md5
|
||||||
|
|
||||||
|
|
||||||
|
def _is_default_icon_file(file_path: str) -> bool:
|
||||||
|
"""检查文件是否为默认图标"""
|
||||||
|
if os.path.exists(file_path) and os.path.isfile(file_path):
|
||||||
|
md5 = _get_file_md5(file_path)
|
||||||
|
return md5 in default_icon_md5 if md5 else False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _is_default_icon_byte(file_content: bytes) -> bool:
|
||||||
|
"""检查字节内容是否为默认图标"""
|
||||||
|
try:
|
||||||
|
md5 = hashlib.md5(file_content).hexdigest().lower()
|
||||||
|
return md5 in default_icon_md5
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"计算字节内容MD5失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cache_file(domain: str, refresh: bool = False) -> Tuple[Optional[bytes], Optional[bytes]]:
|
||||||
|
"""从缓存中获取图标文件"""
|
||||||
|
cache_path = os.path.join(setting.icon_root_path, 'data', 'icon', domain + '.png')
|
||||||
|
if os.path.exists(cache_path) and os.path.isfile(cache_path) and os.path.getsize(cache_path) > 0:
|
||||||
|
try:
|
||||||
|
cached_icon = FileUtil.read_file(cache_path, mode='rb')
|
||||||
|
file_time = int(os.path.getmtime(cache_path))
|
||||||
|
|
||||||
|
# 验证是否为有效的图片文件
|
||||||
|
if not helpers.is_image(cached_icon):
|
||||||
|
logger.warning(f"缓存的图标不是有效图片: {cache_path}")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# 处理刷新请求或缓存过期情况
|
||||||
|
if refresh:
|
||||||
|
if int(time.time()) - file_time <= setting.time_of_12_hours:
|
||||||
|
logger.info(f"缓存文件修改时间在有效期内,不执行刷新: {cache_path}")
|
||||||
|
return cached_icon, cached_icon
|
||||||
|
return cached_icon, None
|
||||||
|
|
||||||
|
# 检查缓存是否过期(最大30天)
|
||||||
|
if int(time.time()) - file_time > setting.time_of_30_days:
|
||||||
|
logger.info(f"图标缓存过期(>30天): {cache_path}")
|
||||||
|
return cached_icon, None
|
||||||
|
|
||||||
|
# 默认图标,使用随机的缓存时间
|
||||||
|
if (int(time.time()) - file_time > setting.time_of_1_days * random.randint(1, 7)
|
||||||
|
and _is_default_icon_file(cache_path)):
|
||||||
|
logger.info(f"默认图标缓存过期: {cache_path}")
|
||||||
|
return cached_icon, None
|
||||||
|
|
||||||
|
return cached_icon, cached_icon
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"读取缓存文件失败 {cache_path}: {e}")
|
||||||
|
return None, None
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cache_icon(domain_md5: str, refresh: bool = False) -> Tuple[Optional[bytes], Optional[bytes]]:
|
||||||
|
"""获取缓存的图标"""
|
||||||
|
_cached, cached_icon = _get_cache_file(domain_md5, refresh)
|
||||||
|
|
||||||
|
# 替换默认图标
|
||||||
|
if _cached and _is_default_icon_byte(_cached):
|
||||||
|
_cached = setting.default_icon_file
|
||||||
|
if cached_icon and _is_default_icon_byte(cached_icon):
|
||||||
|
cached_icon = setting.default_icon_file
|
||||||
|
|
||||||
|
return _cached, cached_icon
|
||||||
|
|
|
@ -29,34 +29,43 @@ async def get_redis() -> AsyncGenerator[Redis, None]:
|
||||||
yield conn
|
yield conn
|
||||||
|
|
||||||
|
|
||||||
async def set_cache(key: str, value: [str | int], ttl: int = None) -> None:
|
async def set_cache(key: str, value: [str | int], ttl: int = None, prefix: str = None) -> None:
|
||||||
if not key:
|
if not key:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
async for redis in get_redis():
|
||||||
await redis.set(key, value, ex=ttl)
|
_key = key
|
||||||
|
if prefix:
|
||||||
|
_key = f"{prefix}{key}"
|
||||||
|
await redis.sadd(prefix, key)
|
||||||
|
await redis.expire(prefix, ttl)
|
||||||
|
await redis.set(_key, value, ex=ttl)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"存入redis时出错:{e}")
|
logger.error(f"存入redis时出错:{e}")
|
||||||
|
|
||||||
|
|
||||||
async def get_cache(key: str) -> Optional[str | int]:
|
async def get_cache(key: str, prefix: str = None) -> Optional[str | int]:
|
||||||
if not key:
|
if not key:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
async for redis in get_redis():
|
||||||
|
if prefix:
|
||||||
|
key = f"{prefix}{key}"
|
||||||
return await redis.get(key)
|
return await redis.get(key)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"读取redis时出错:{e}")
|
logger.error(f"读取redis时出错:{e}")
|
||||||
|
|
||||||
|
|
||||||
async def exist_cache(key: str) -> bool:
|
async def exist_cache(key: str, prefix: str = None) -> bool:
|
||||||
if not key:
|
if not key:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
async for redis in get_redis():
|
||||||
|
if prefix:
|
||||||
|
key = f"{prefix}{key}"
|
||||||
result = await redis.exists(key)
|
result = await redis.exists(key)
|
||||||
return result > 0
|
return result > 0
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -64,80 +73,62 @@ async def exist_cache(key: str) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def remove_cache(key: str) -> None:
|
async def remove_cache(key: str, prefix: str = None) -> None:
|
||||||
if not key:
|
if not key:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
async for redis in get_redis():
|
||||||
await redis.delete(key)
|
_key = key
|
||||||
|
if prefix:
|
||||||
|
_key = f"{prefix}{key}"
|
||||||
|
await redis.srem(prefix, key)
|
||||||
|
await redis.delete(_key)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"删除redis时出错:{e}")
|
logger.error(f"删除redis时出错:{e}")
|
||||||
|
|
||||||
|
|
||||||
async def get_cache_size(cache_name: str = "default") -> int:
|
async def get_cache_size(prefix: str = None) -> int:
|
||||||
|
"""根据前缀统计数量,用于统计Set集合
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
async for redis in get_redis():
|
||||||
return await redis.llen(cache_name)
|
return await redis.scard(prefix)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"获取队列大小时出错:{e}")
|
logger.error(f"获取队列大小时出错:{e}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
async def set_failed_domain(domain: str, expire_seconds: int = setting.time_of_7_days) -> None:
|
async def set_failed_domain(domain: str, expire_seconds: int = None) -> None:
|
||||||
"""将失败的域名存入Redis,并设置过期时间
|
|
||||||
|
|
||||||
Args:
|
|
||||||
domain: 失败的域名
|
|
||||||
expire_seconds: 过期时间(秒),默认为7天
|
|
||||||
"""
|
|
||||||
if not domain:
|
if not domain:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
await set_cache(f"{domain}", domain, ttl=expire_seconds, prefix=FAILED_DOMAINS_PREFIX)
|
||||||
redis_key = f"{FAILED_DOMAINS_PREFIX}{domain}"
|
|
||||||
await redis.set(redis_key, domain, ex=expire_seconds)
|
|
||||||
logger.debug(f"已将失败域名 {domain} 存入Redis,过期时间:{expire_seconds}秒")
|
logger.debug(f"已将失败域名 {domain} 存入Redis,过期时间:{expire_seconds}秒")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"将失败域名存入Redis时出错:{e}")
|
logger.error(f"将失败域名存入Redis时出错:{e}")
|
||||||
|
|
||||||
|
|
||||||
async def is_domain_failed(domain: str) -> bool:
|
async def is_domain_failed(domain: str) -> bool:
|
||||||
"""检查域名是否在Redis的失败列表中
|
|
||||||
|
|
||||||
Args:
|
|
||||||
domain: 要检查的域名
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True: 域名在失败列表中;False: 不在或Redis查询失败
|
|
||||||
"""
|
|
||||||
if not domain:
|
if not domain:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
return await exist_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
|
||||||
redis_key = f"{FAILED_DOMAINS_PREFIX}{domain}"
|
|
||||||
result = await redis.exists(redis_key)
|
|
||||||
return result > 0
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"检查域名是否失败时出错:{e}")
|
logger.error(f"检查域名是否失败时出错:{e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def delete_failed_domain(domain: str) -> None:
|
async def delete_failed_domain(domain: str) -> None:
|
||||||
"""从Redis中删除失败域名记录
|
|
||||||
|
|
||||||
Args:
|
|
||||||
domain: 要删除的域名
|
|
||||||
"""
|
|
||||||
if not domain:
|
if not domain:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for redis in get_redis():
|
await remove_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
|
||||||
redis_key = f"{FAILED_DOMAINS_PREFIX}{domain}"
|
|
||||||
await redis.delete(redis_key)
|
|
||||||
logger.debug(f"已从Redis删除失败域名 {domain}")
|
logger.debug(f"已从Redis删除失败域名 {domain}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"从Redis删除失败域名时出错:{e}")
|
logger.error(f"从Redis删除失败域名时出错:{e}")
|
||||||
|
|
|
@ -18,7 +18,7 @@ default_icon_file = FileUtil.read_file(default_icon_path, mode='rb')
|
||||||
referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt')
|
referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt')
|
||||||
|
|
||||||
# 队列阈值常量配置
|
# 队列阈值常量配置
|
||||||
MAX_QUEUE_SIZE = 3
|
MAX_QUEUE_SIZE = 10
|
||||||
|
|
||||||
# 时间常量
|
# 时间常量
|
||||||
time_of_1_minus = 1 * 60
|
time_of_1_minus = 1 * 60
|
||||||
|
|
Loading…
Reference in New Issue