From 5972366caea154f6be128660da412df2ed4ac75a Mon Sep 17 00:00:00 2001 From: jinql Date: Sat, 6 Sep 2025 12:45:58 +0800 Subject: [PATCH] 25.09.06 --- favicon_app/models/favicon.py | 35 ++++++++++++++++++--------- favicon_app/routes/favicon_service.py | 14 ++++++++++- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/favicon_app/models/favicon.py b/favicon_app/models/favicon.py index a0cb38e..819e6ef 100644 --- a/favicon_app/models/favicon.py +++ b/favicon_app/models/favicon.py @@ -6,7 +6,8 @@ import ipaddress import logging import re import socket -from typing import Tuple, Optional, Any +import time +from typing import Tuple, Optional, Dict from urllib.parse import urlparse import requests @@ -31,6 +32,13 @@ requests_session.verify = False DEFAULT_TIMEOUT = 10 DEFAULT_RETRIES = 2 +# 时间常量 +time_of_1_days = 1 * 24 * 60 * 60 +time_of_7_days = 7 * time_of_1_days + +# 存储失败的URL,值为缓存过期时间戳 +failed_urls: Dict[str, int] = dict() + class Favicon: """Favicon类,用于处理网站图标的获取和解析 @@ -74,8 +82,7 @@ class Favicon: elif not (url.startswith('https://') or url.startswith('http://')): self._parse('http://' + url) except Exception as e: - logger.error(e) - logger.error('初始化错误: %s', url) + logger.error('初始化错误: %s, URL: %s', str(e), url) def _parse(self, url: str): """解析URL,提取协议、域名、路径和端口 @@ -104,10 +111,10 @@ class Favicon: if self.domain: self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest() except Exception as e: + failed_url_cache(self.domain, time_of_1_days) self.scheme = None self.domain = None - logger.error(e) - logger.error('URL解析错误: %s', url) + logger.error('URL解析错误: %s, URL: %s', str(e), url) def _get_icon_url(self, icon_path: str): """根据图标路径生成完整的图标URL @@ -183,7 +190,7 @@ class Favicon: _content = base64.b64decode(data_uri[-1]) _ct = data_uri[0].split(';')[0].split(':')[-1] else: - _content, _ct = self._req_get(self.icon_url) + _content, _ct = self._req_get(self.icon_url, domain=self.domain) # 验证是否为图片 # image/* application/x-ico @@ -194,8 +201,7 @@ class Favicon: logger.warning('图片过大: %d bytes, 域名: %s', len(_content), self.domain) return _content, filetype.guess_mime(_content) or _ct except Exception as e: - logger.error(e) - logger.error('获取图标文件失败: %s', self.icon_url) + logger.error('获取图标文件失败: %s, URL: %s', str(e), self.icon_url) return None, None @@ -224,7 +230,7 @@ class Favicon: return None _url = self.get_base_url() - _content, _ct = self._req_get(_url) + _content, _ct = self._req_get(_url, domain=self.domain) # 验证类型并检查大小 if _ct and ('text' in _ct or 'html' in _ct or 'xml' in _ct): @@ -238,6 +244,7 @@ class Favicon: @staticmethod def _req_get( url: str, + domain: str, retries: int = DEFAULT_RETRIES, timeout: int = DEFAULT_TIMEOUT ) -> Tuple[Optional[bytes], Optional[str]]: @@ -283,6 +290,7 @@ class Favicon: return req.content, ct_type else: + failed_url_cache(domain, time_of_7_days) logger.error('请求失败: %d, URL: %s', req.status_code, url) break except (ConnectTimeoutError, ReadTimeoutError) as e: @@ -296,6 +304,7 @@ class Favicon: logger.error('重定向次数过多: %s, URL: %s', str(e), url) break except Exception as e: + failed_url_cache(domain, time_of_7_days) logger.error('请求异常: %s, URL: %s', str(e), url) break @@ -337,6 +346,7 @@ class Favicon: return True return False except Exception as e: + failed_url_cache(domain, time_of_7_days) logger.error('解析域名出错: %s, 错误: %s', domain, str(e)) return False @@ -347,5 +357,8 @@ _pattern_domain = re.compile( re.I) -def _check_url(domain: str) -> Optional[Any]: - return Favicon.check_internal(domain) and _pattern_domain.match(domain) +def failed_url_cache(_domain: str, _time: int): + if _domain: + _current_time = int(time.time()) + if (not failed_urls[_domain]) or (_current_time <= failed_urls[_domain]): + failed_urls[_domain] = _current_time + _time diff --git a/favicon_app/routes/favicon_service.py b/favicon_app/routes/favicon_service.py index ed7cd71..4a34016 100644 --- a/favicon_app/routes/favicon_service.py +++ b/favicon_app/routes/favicon_service.py @@ -16,7 +16,7 @@ from bs4 import SoupStrainer from fastapi import Request, BackgroundTasks from fastapi.responses import Response -from favicon_app.models import Favicon +from favicon_app.models import Favicon, favicon from favicon_app.utils import header from favicon_app.utils.file_util import FileUtil from favicon_app.utils.filetype import helpers, filetype @@ -368,6 +368,9 @@ class FaviconService: sync: Optional[str] = None ) -> dict[str, str] | Response: """处理获取图标的请求""" + + logger.info(f"队列大小:{self.icon_queue.qsize()} | {self.total_queue.qsize()}") + with self._lock: self.url_count += 1 @@ -383,6 +386,15 @@ class FaviconService: logger.warning(f"无效的URL: {url}") return self.get_default(self.time_of_7_days) + # 检查内存缓存中的失败URL + with self._lock: + if entity.domain in favicon.failed_urls: + _expire_time = favicon.failed_urls[entity.domain] + if int(time.time()) <= _expire_time: + return self.get_default(self.time_of_7_days) + else: + del favicon.failed_urls[entity.domain] + # 检查缓存 _cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True'])