diff --git a/Dockerfile b/Dockerfile index 0ef8b51..5371c5a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.13-slim +FROM python:3.12-slim WORKDIR /app diff --git a/favicon_app/routes/favicon_routes.py b/favicon_app/routes/favicon_routes.py index 08c6f87..7575c19 100644 --- a/favicon_app/routes/favicon_routes.py +++ b/favicon_app/routes/favicon_routes.py @@ -5,7 +5,7 @@ import os from typing import Optional import urllib3 -from fastapi import APIRouter, Request, Query +from fastapi import APIRouter, Request, Query, BackgroundTasks from fastapi.responses import Response from favicon_app.routes import favicon_service @@ -31,11 +31,13 @@ favicon_router = APIRouter(prefix="", tags=["favicon"]) @favicon_router.get('/') async def get_favicon( request: Request, + bg_tasks: BackgroundTasks, url: Optional[str] = Query(None, description="网址:eg. https://www.baidu.com"), - refresh: Optional[str] = Query(None, include_in_schema=False) + refresh: Optional[str] = Query(None, include_in_schema=False), + sync: Optional[str] = Query(False, description="是否使用同步方式获取") ): """获取网站图标""" - return await _service.get_favicon_handler(request, url, refresh) + return await _service.get_favicon_handler(request, bg_tasks, url, refresh, sync) @favicon_router.get('/icon/default') diff --git a/favicon_app/routes/favicon_service.py b/favicon_app/routes/favicon_service.py index b2b3626..22b95d6 100644 --- a/favicon_app/routes/favicon_service.py +++ b/favicon_app/routes/favicon_service.py @@ -6,7 +6,6 @@ import os import random import re import time -from concurrent.futures import ThreadPoolExecutor from queue import Queue from threading import Lock from typing import Optional, Tuple, Dict, Set, List @@ -14,7 +13,7 @@ from typing import Optional, Tuple, Dict, Set, List import bs4 import urllib3 from bs4 import SoupStrainer -from fastapi import Request +from fastapi import Request, BackgroundTasks from fastapi.responses import Response from favicon_app.models import Favicon @@ -51,9 +50,6 @@ class FaviconService: self.icon_queue = Queue() self.total_queue = Queue() - # 初始化线程池(FastAPI默认已使用异步,但保留线程池用于CPU密集型任务) - self.executor = ThreadPoolExecutor(15) - # 时间常量 self.time_of_1_minus = 1 * 60 self.time_of_5_minus = 5 * self.time_of_1_minus @@ -144,6 +140,9 @@ class FaviconService: # 处理刷新请求或缓存过期情况 if refresh: + if int(time.time()) - file_time <= self.time_of_12_hours: + logger.info(f"缓存文件修改时间在有效期内,不执行刷新: {cache_path}") + return cached_icon, cached_icon return cached_icon, None # 检查缓存是否过期(最大30天) @@ -151,7 +150,7 @@ class FaviconService: logger.info(f"图标缓存过期(>30天): {cache_path}") return cached_icon, None - # 对于默认图标,使用随机的缓存时间 + # 默认图标,使用随机的缓存时间 if int(time.time()) - file_time > self.time_of_1_days * random.randint(1, 7) and self._is_default_icon_file(cache_path): logger.info(f"默认图标缓存过期: {cache_path}") return cached_icon, None @@ -364,10 +363,6 @@ class FaviconService: self.domain_list.remove(entity.domain) self._queue_pull(True, self.total_queue) - def get_icon_background(self, entity: Favicon, _cached: bytes = None) -> None: - """在后台线程中获取图标""" - self.executor.submit(self.get_icon_sync, entity, _cached) - def get_count(self) -> Dict[str, int]: """获取统计数据""" with self._lock: @@ -383,8 +378,10 @@ class FaviconService: async def get_favicon_handler( self, request: Request, + bg_tasks: BackgroundTasks, url: Optional[str] = None, - refresh: Optional[str] = None + refresh: Optional[str] = None, + sync: Optional[str] = None ) -> dict[str, str] | Response: """处理获取图标的请求""" with self._lock: @@ -405,46 +402,62 @@ class FaviconService: # 检测并记录referer await self._referer(request) - # 检查队列大小 - if self.icon_queue.qsize() > 100: - logger.warning(f'-> 警告: 队列大小已达到 => {self.icon_queue.qsize()}') - # 检查缓存 - _cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1']) + _cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True']) if cached_icon: # 使用缓存图标 icon_content = cached_icon with self._lock: self.request_cache_count += 1 + + # 确定内容类型和缓存时间 + content_type = filetype.guess_mime(icon_content) if icon_content else "" + cache_time = self.time_of_1_hours * random.randint(1, 6) if self._is_default_icon_byte(icon_content) else self.time_of_7_days + + # 乐观缓存机制:检查缓存是否已过期但仍有缓存内容 + # _cached 存在但 cached_icon 为 None 表示缓存已过期 + if _cached and not cached_icon: + # 缓存已过期,后台刷新缓存 + logger.info(f"缓存已过期,加入后台队列刷新: {entity.domain}") + bg_tasks.add_task(self.get_icon_sync, entity, _cached) + + return Response(content=icon_content, + media_type=content_type if content_type else "image/x-icon", + headers=self._get_header(content_type, cache_time)) else: - # 将域名加入队列 - self.icon_queue.put(entity.domain) - self.total_queue.put(entity.domain) - - if self.icon_queue.qsize() > 10: - # 如果队列较大,使用后台任务处理 - self.get_icon_background(entity, _cached) - self._queue_pull(True) - - # 返回默认图标,不缓存 + # 检查sync参数 + is_sync = sync in ['true', '1', 'True'] + + if not is_sync: + # 返回默认图片并加入后台队列 + logger.info(f"返回默认图片并加入后台队列: {entity.domain}") + bg_tasks.add_task(self.get_icon_sync, entity, _cached) return self.get_default(0) else: - # 直接处理请求 - icon_content = self.get_icon_sync(entity, _cached) - self._queue_pull(True) - - if not icon_content: - # 获取失败,返回默认图标,不缓存 + # 没有缓存,实时处理,检查队列大小 + queue_size = self.icon_queue.qsize() + if queue_size >= 16: + # 加入后台队列并返回默认图片 + logger.info(f"队列大小({queue_size})>=16,返回默认图片并加入后台队列: {entity.domain}") + bg_tasks.add_task(self.get_icon_sync, entity, _cached) return self.get_default(0) + else: + # 队列<16,实时处理 + logger.info(f"队列大小({queue_size})<16,实时处理: {entity.domain}") + icon_content = self.get_icon_sync(entity, _cached) + + if not icon_content: + # 获取失败,返回默认图标,不缓存 + return self.get_default(0) + + # 确定内容类型和缓存时间 + content_type = filetype.guess_mime(icon_content) if icon_content else "" + cache_time = self.time_of_1_hours * random.randint(1, 6) if self._is_default_icon_byte(icon_content) else self.time_of_7_days - # 确定内容类型和缓存时间 - content_type = filetype.guess_mime(icon_content) if icon_content else "" - cache_time = self.time_of_1_hours * random.randint(1, 6) if self._is_default_icon_byte(icon_content) else self.time_of_7_days - - return Response(content=icon_content, - media_type=content_type if content_type else "image/x-icon", - headers=self._get_header(content_type, cache_time)) + return Response(content=icon_content, + media_type=content_type if content_type else "image/x-icon", + headers=self._get_header(content_type, cache_time)) except Exception as e: logger.error(f"处理图标请求时发生错误 {url}: {e}") # 返回默认图标 diff --git a/gunicorn.conf.py b/gunicorn.conf.py index fd83a73..a68f262 100644 --- a/gunicorn.conf.py +++ b/gunicorn.conf.py @@ -11,6 +11,7 @@ worker_class = "uvicorn.workers.UvicornWorker" # 可选:日志级别 loglevel = "info" +# loglevel = "warning" # 可选:访问日志和错误日志输出到控制台(Docker 常用) accesslog = "-" diff --git a/run.py b/run.py index f62c62f..bdd2ced 100644 --- a/run.py +++ b/run.py @@ -9,6 +9,7 @@ if __name__ == "__main__": port=8000, reload=False, log_level="info", + # log_level="warning", workers=1, ) server = uvicorn.Server(config)