This commit is contained in:
jinql
2025-09-07 21:23:42 +08:00
parent 3102ce8b8e
commit d78476e78a
9 changed files with 484 additions and 106 deletions

View File

@@ -8,6 +8,8 @@ import urllib3
from fastapi import APIRouter, Request, Query, BackgroundTasks
from fastapi.responses import Response
import setting
from favicon_app.asyncs import favicon_service_async
from favicon_app.routes import favicon_service
from favicon_app.utils.file_util import FileUtil
@@ -15,12 +17,12 @@ urllib3.disable_warnings()
logging.captureWarnings(True)
logger = logging.getLogger(__name__)
_icon_root_path = favicon_service.icon_root_path
_default_icon_path = favicon_service.default_icon_path
_default_icon_content = favicon_service.default_icon_content
_icon_root_path = setting.icon_root_path
_default_icon_path = setting.default_icon_path
# 创建全局服务实例
_service = favicon_service.FaviconService()
_async_service = favicon_service_async.FaviconServiceAsync()
# 创建FastAPI路由器
favicon_router = APIRouter(prefix="", tags=["favicon"])
@@ -33,18 +35,24 @@ async def get_favicon(
bg_tasks: BackgroundTasks,
url: Optional[str] = Query(None, description="网址eg. https://www.baidu.com"),
refresh: Optional[str] = Query(None, include_in_schema=False),
sync: Optional[str] = Query('false', description="是否使用同步方式获取")
sync: Optional[str] = Query(setting.sync, description="是否使用同步方式获取"),
):
"""获取网站图标"""
return await _service.get_favicon_handler(request, bg_tasks, url, refresh, sync)
# 根据参数决定使用同步还是异步处理
use_async = (not (sync in ['true', '1']))
if use_async:
# 使用异步方式
return await _async_service.get_favicon_handler_async(request, bg_tasks, url, refresh)
else:
# 使用同步方式
return _service.get_favicon_handler(request, bg_tasks, url, refresh)
@favicon_router.get('/icon/default')
async def get_default_icon(cache_time: int = Query(_service.time_of_1_days, include_in_schema=False)):
async def get_default_icon():
"""获取默认图标"""
return Response(content=_default_icon_content,
media_type="image/png",
headers=_service.get_header("image/png", cache_time))
return _service.get_default()
@favicon_router.get('/icon/count')

View File

@@ -6,15 +6,18 @@ import os
import random
import re
import time
import warnings
from queue import Queue
from typing import Optional, Tuple, Dict, List
import bs4
import urllib3
from bs4 import SoupStrainer
from bs4 import XMLParsedAsHTMLWarning
from fastapi import Request, BackgroundTasks
from fastapi.responses import Response
import setting
from favicon_app.models import Favicon, favicon
from favicon_app.utils import header
from favicon_app.utils.file_util import FileUtil
@@ -23,13 +26,10 @@ from favicon_app.utils.filetype import helpers, filetype
urllib3.disable_warnings()
logging.captureWarnings(True)
logger = logging.getLogger(__name__)
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
# 获取当前所在目录的绝对路径
current_dir = os.path.dirname(os.path.abspath(__file__))
# icon 存储的绝对路径,上两级目录
icon_root_path = os.path.abspath(os.path.join(current_dir, '..', '..'))
default_icon_path = os.path.join(icon_root_path, 'favicon.png')
default_icon_content = FileUtil.read_file(default_icon_path, mode='rb')
_current_dir = os.path.dirname(os.path.abspath(__file__))
class FaviconService:
@@ -44,29 +44,29 @@ class FaviconService:
# 初始化队列
# 实时处理的任务数量
self.icon_queue = Queue()
# self.icon_queue = Queue()
# 所有正在处理的任务数量
self.total_queue = Queue()
# 队列阈值常量配置
self.MAX_QUEUE_SIZE = 3
# 时间常量
self.time_of_1_minus = 1 * 60
self.time_of_5_minus = 5 * self.time_of_1_minus
self.time_of_10_minus = 10 * self.time_of_1_minus
self.time_of_30_minus = 30 * self.time_of_1_minus
self.time_of_1_hours = 1 * 60 * 60
self.time_of_2_hours = 2 * self.time_of_1_hours
self.time_of_3_hours = 3 * self.time_of_1_hours
self.time_of_6_hours = 6 * self.time_of_1_hours
self.time_of_12_hours = 12 * self.time_of_1_hours
self.time_of_1_days = 1 * 24 * 60 * 60
self.time_of_7_days = 7 * self.time_of_1_days
self.time_of_15_days = 15 * self.time_of_1_days
self.time_of_30_days = 30 * self.time_of_1_days
# # 队列阈值常量配置
# self.MAX_QUEUE_SIZE = 5
#
# # 时间常量
# self.time_of_1_minus = 1 * 60
# self.time_of_5_minus = 5 * self.time_of_1_minus
# self.time_of_10_minus = 10 * self.time_of_1_minus
# self.time_of_30_minus = 30 * self.time_of_1_minus
#
# self.time_of_1_hours = 1 * 60 * 60
# self.time_of_2_hours = 2 * self.time_of_1_hours
# self.time_of_3_hours = 3 * self.time_of_1_hours
# self.time_of_6_hours = 6 * self.time_of_1_hours
# self.time_of_12_hours = 12 * self.time_of_1_hours
#
# self.time_of_1_days = 1 * 24 * 60 * 60
# self.time_of_7_days = 7 * self.time_of_1_days
# self.time_of_15_days = 15 * self.time_of_1_days
# self.time_of_30_days = 30 * self.time_of_1_days
# 预编译正则表达式,提高性能
self.pattern_icon = re.compile(r'(icon|shortcut icon|alternate icon|apple-touch-icon)+', re.I)
@@ -78,7 +78,7 @@ class FaviconService:
def _initialize_default_icon_md5(self) -> List[str]:
"""初始化默认图标MD5值列表"""
md5_list = [self._get_file_md5(default_icon_path),
md5_list = [self._get_file_md5(setting.default_icon_path),
'05231fb6b69aff47c3f35efe09c11ba0',
'3ca64f83fdcf25135d87e08af65e68c9',
'db470fd0b65c8c121477343c37f74f02',
@@ -128,7 +128,7 @@ class FaviconService:
def _get_cache_file(self, domain: str, refresh: bool = False) -> Tuple[Optional[bytes], Optional[bytes]]:
"""从缓存中获取图标文件"""
cache_path = os.path.join(icon_root_path, 'data', 'icon', domain + '.png')
cache_path = os.path.join(setting.icon_root_path, 'data', 'icon', domain + '.png')
if os.path.exists(cache_path) and os.path.isfile(cache_path) and os.path.getsize(cache_path) > 0:
try:
cached_icon = FileUtil.read_file(cache_path, mode='rb')
@@ -141,18 +141,18 @@ class FaviconService:
# 处理刷新请求或缓存过期情况
if refresh:
if int(time.time()) - file_time <= self.time_of_12_hours:
if int(time.time()) - file_time <= setting.time_of_12_hours:
logger.info(f"缓存文件修改时间在有效期内,不执行刷新: {cache_path}")
return cached_icon, cached_icon
return cached_icon, None
# 检查缓存是否过期最大30天
if int(time.time()) - file_time > self.time_of_30_days:
if int(time.time()) - file_time > setting.time_of_30_days:
logger.info(f"图标缓存过期(>30天): {cache_path}")
return cached_icon, None
# 默认图标,使用随机的缓存时间
if int(time.time()) - file_time > self.time_of_1_days * random.randint(1, 7) and self._is_default_icon_file(cache_path):
if int(time.time()) - file_time > setting.time_of_1_days * random.randint(1, 7) and self._is_default_icon_file(cache_path):
logger.info(f"默认图标缓存过期: {cache_path}")
return cached_icon, None
@@ -168,16 +168,16 @@ class FaviconService:
# 替换默认图标
if _cached and self._is_default_icon_byte(_cached):
_cached = default_icon_content
_cached = setting.default_icon_file
if cached_icon and self._is_default_icon_byte(cached_icon):
cached_icon = default_icon_content
cached_icon = setting.default_icon_file
return _cached, cached_icon
def _get_header(self, content_type: str, cache_time: int = None) -> dict:
"""生成响应头"""
if cache_time is None:
cache_time = self.time_of_7_days
cache_time = setting.time_of_7_days
_ct = 'image/x-icon'
if content_type and content_type in header.image_type:
@@ -194,10 +194,10 @@ class FaviconService:
def _queue_pull(self, is_pull: bool = True, _queue: Queue = None) -> None:
"""从队列中取出元素,用于任务完成后移除队列中的记录
- is_pull: 是否执行取出操作
- _queue: 要操作的队列,默认为icon_queue
- _queue: 要操作的队列,默认为 total_queue
"""
if _queue is None:
_queue = self.icon_queue
_queue = self.total_queue
if is_pull and not _queue.empty():
try:
@@ -272,7 +272,7 @@ class FaviconService:
try:
if entity.domain in self.domain_list:
self._queue_pull(True, self.total_queue)
return _cached or default_icon_content
return _cached or setting.default_icon_file
else:
self.domain_list.append(entity.domain)
@@ -311,11 +311,11 @@ class FaviconService:
# 图标获取失败,或图标不是支持的图片格式,写入默认图标
if (not icon_content) or (not helpers.is_image(icon_content) or self._is_default_icon_byte(icon_content)):
logger.warning(f"-> 获取图标失败,使用默认图标: {entity.domain}")
icon_content = _cached if _cached else default_icon_content
icon_content = _cached if _cached else setting.default_icon_file
if icon_content:
cache_path = os.path.join(icon_root_path, 'data', 'icon', entity.domain_md5 + '.png')
md5_path = os.path.join(icon_root_path, 'data', 'text', entity.domain_md5 + '.txt')
cache_path = os.path.join(setting.icon_root_path, 'data', 'icon', entity.domain_md5 + '.png')
md5_path = os.path.join(setting.icon_root_path, 'data', 'text', entity.domain_md5 + '.txt')
try:
# 确保目录存在
@@ -333,12 +333,11 @@ class FaviconService:
return icon_content
except Exception as e:
logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
return _cached or default_icon_content
return _cached or setting.default_icon_file
finally:
if entity.domain in self.domain_list:
self.domain_list.remove(entity.domain)
# 任务完成,从两个队列中移出元素
self._queue_pull(True, self.icon_queue)
self._queue_pull(True, self.total_queue)
def get_count(self) -> Dict[str, int]:
@@ -347,22 +346,21 @@ class FaviconService:
'url_count': self.url_count,
'request_icon_count': self.request_icon_count,
'request_cache_count': self.request_cache_count,
'queue_size': self.icon_queue.qsize(),
'total_queue_size': self.total_queue.qsize(),
'domain_list': self.domain_list,
'queue_size': self.total_queue.qsize(),
'domain_list': len(self.domain_list),
}
async def get_favicon_handler(
def get_favicon_handler(
self,
request: Request,
bg_tasks: BackgroundTasks,
url: Optional[str] = None,
refresh: Optional[str] = None,
sync: Optional[str] = None
# sync: Optional[str] = None
) -> dict[str, str] | Response:
"""处理获取图标的请求"""
logger.info(f"队列大小 icon/total/failed{self.icon_queue.qsize()} | {self.total_queue.qsize()} | {len(favicon.failed_urls)}")
logger.info(f"队列大小 queue/failed{self.total_queue.qsize()} | {len(favicon.failed_urls)}")
self.url_count += 1
@@ -376,13 +374,12 @@ class FaviconService:
# 验证域名
if not entity.domain:
logger.warning(f"无效的URL: {url}")
return self.get_default(self.time_of_7_days)
return self.get_default(setting.time_of_7_days)
# 检查内存缓存中的失败URL
if entity.domain in favicon.failed_urls:
_expire_time = favicon.failed_urls.get(entity.domain)
if int(time.time()) <= _expire_time:
return self.get_default(self.time_of_7_days)
if int(time.time()) <= favicon.failed_urls.get(entity.domain):
return self.get_default(setting.time_of_7_days)
else:
del favicon.failed_urls[entity.domain]
@@ -396,7 +393,7 @@ class FaviconService:
# 确定内容类型和缓存时间
content_type = filetype.guess_mime(icon_content) if icon_content else ""
cache_time = self.time_of_12_hours if self._is_default_icon_byte(icon_content) else self.time_of_7_days
cache_time = setting.time_of_12_hours if self._is_default_icon_byte(icon_content) else setting.time_of_7_days
# 乐观缓存机制:检查缓存是否已过期但仍有缓存内容
# _cached 存在但 cached_icon 为 None 表示缓存已过期
@@ -404,7 +401,6 @@ class FaviconService:
# 缓存已过期,后台刷新缓存
logger.info(f"缓存已过期,加入后台队列刷新: {entity.domain}")
# 开始图标处理,加入两个队列
self.icon_queue.put(entity.domain)
self.total_queue.put(entity.domain)
bg_tasks.add_task(self.get_icon_sync, entity, _cached)
@@ -413,40 +409,33 @@ class FaviconService:
headers=self._get_header(content_type, cache_time))
else:
# 开始图标处理,加入两个队列
self.icon_queue.put(entity.domain)
self.total_queue.put(entity.domain)
# 检查sync参数
is_sync = sync in ['true', '1']
if (not is_sync) or (not check_referer(request)):
# 返回默认图片并加入后台队列
logger.info(f"返回默认图片并加入后台队列: {entity.domain}")
# 没有缓存,实时处理,检查队列大小
_queue_size = self.total_queue.qsize()
if _queue_size >= setting.MAX_QUEUE_SIZE:
# 加入后台队列并返回默认图片
logger.info(f"队列大小({_queue_size})>={setting.MAX_QUEUE_SIZE}返回默认图片并加入后台队列: {entity.domain}")
bg_tasks.add_task(self.get_icon_sync, entity, _cached)
return self.get_default(0)
else:
# 没有缓存,实时处理,检查队列大小
queue_size = self.icon_queue.qsize()
if queue_size >= self.MAX_QUEUE_SIZE:
# 加入后台队列并返回默认图片
logger.info(f"队列大小({queue_size})>={self.MAX_QUEUE_SIZE},返回默认图片并加入后台队列: {entity.domain}")
bg_tasks.add_task(self.get_icon_sync, entity, _cached)
return self.get_default(0)
else:
# 队列<MAX_QUEUE_SIZE实时处理
logger.info(f"队列大小({queue_size})<{self.MAX_QUEUE_SIZE},实时处理: {entity.domain}")
icon_content = self.get_icon_sync(entity, _cached)
# 队列<MAX_QUEUE_SIZE实时处理
logger.info(f"队列大小({_queue_size})<{setting.MAX_QUEUE_SIZE},实时处理: {entity.domain}")
if not icon_content:
# 获取失败,返回默认图标
return self.get_default()
# 使用同步方法获取图标
icon_content = self.get_icon_sync(entity, _cached)
# 确定内容类型和缓存时间
content_type = filetype.guess_mime(icon_content) if icon_content else ""
cache_time = self.time_of_12_hours if self._is_default_icon_byte(icon_content) else self.time_of_7_days
if not icon_content:
# 获取失败,返回默认图标
return self.get_default()
return Response(content=icon_content,
media_type=content_type if content_type else "image/x-icon",
headers=self._get_header(content_type, cache_time))
# 确定内容类型和缓存时间
content_type = filetype.guess_mime(icon_content) if icon_content else ""
cache_time = setting.time_of_12_hours if self._is_default_icon_byte(icon_content) else setting.time_of_7_days
return Response(content=icon_content,
media_type=content_type if content_type else "image/x-icon",
headers=self._get_header(content_type, cache_time))
except Exception as e:
logger.error(f"处理图标请求时发生错误 {url}: {e}")
# 返回默认图标
@@ -457,8 +446,8 @@ class FaviconService:
def get_default(self, cache_time: int = None) -> Response:
if cache_time is None:
cache_time = self.time_of_1_days
return Response(content=default_icon_content,
cache_time = setting.time_of_1_days
return Response(content=setting.default_icon_file,
media_type="image/png",
headers=self._get_header("image/png", cache_time))