From 0adc1e717d9bd8db353c2b73385f2d7d219e522e Mon Sep 17 00:00:00 2001 From: jinql Date: Wed, 10 Sep 2025 15:36:18 +0800 Subject: [PATCH] 25.09.10 --- Dockerfile | 4 +- docker-compose.yml | 17 ---- favicon_app/models/favicon.py | 24 ++--- favicon_app/routes/favicon_service.py | 74 +++++--------- favicon_app/utils/redis_pool.py | 134 -------------------------- main.py | 14 +-- requirements.txt | 27 +++--- setting.py | 7 -- 8 files changed, 48 insertions(+), 253 deletions(-) delete mode 100644 favicon_app/utils/redis_pool.py diff --git a/Dockerfile b/Dockerfile index d689c2b..d6d445a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-slim AS builder +FROM python:3.13-slim AS builder WORKDIR /app @@ -9,7 +9,7 @@ COPY . . RUN python -m compileall -b . -FROM python:3.12-slim +FROM python:3.13-slim WORKDIR /app diff --git a/docker-compose.yml b/docker-compose.yml index e2dc6cb..be7a9eb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,11 +4,8 @@ services: container_name: favicon-api ports: - 8001:8000 - env_file: - - .env environment: TZ: Asia/Shanghai - REDIS_URL: redis://redis:6379/0 volumes: - /usr/share/zoneinfo/Asia/Shanghai:/usr/share/zoneinfo/Asia/Shanghai:ro - /etc/localtime:/etc/localtime:ro @@ -17,17 +14,3 @@ services: - ./conf:/app/conf:rw - ./logs:/app/logs:rw restart: unless-stopped - networks: - - favicon_network - depends_on: - - redis - - redis: - image: redis:7-alpine - container_name: favicon-redis - networks: - - favicon_network - -networks: - favicon_network: - driver: bridge diff --git a/favicon_app/models/favicon.py b/favicon_app/models/favicon.py index 351ee46..d041e76 100644 --- a/favicon_app/models/favicon.py +++ b/favicon_app/models/favicon.py @@ -6,7 +6,8 @@ import ipaddress import logging import re import socket -from typing import Tuple, Optional +import time +from typing import Tuple, Optional, Dict from urllib.parse import urlparse import aiohttp @@ -14,7 +15,7 @@ import requests import urllib3 import setting -from favicon_app.utils import header, redis_pool +from favicon_app.utils import header from favicon_app.utils.filetype import helpers, filetype # 禁用SSL警告 @@ -22,7 +23,6 @@ urllib3.disable_warnings() logging.captureWarnings(True) # 配置日志 logger = logging.getLogger(__name__) -# warnings.filterwarnings("ignore", category=RuntimeWarning) # 创建requests会话池 requests_session = requests.Session() @@ -34,7 +34,7 @@ DEFAULT_TIMEOUT = 10 DEFAULT_RETRIES = 2 # 存储失败的URL,值为缓存过期时间戳 -# failed_urls: Dict[str, int] = dict() +failed_urls: Dict[str, int] = dict() # 创建aiohttp客户端会话池 _aiohttp_client = None @@ -111,7 +111,7 @@ class Favicon: if self.domain: self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest() except Exception as e: - redis_pool.set_failed_domain(self.domain, setting.time_of_1_days) + failed_urls[self.domain] = setting.time_of_1_days + int(time.time()) self.scheme = None self.domain = None logger.error('URL解析错误: %s, URL: %s', str(e), url) @@ -265,7 +265,7 @@ def _check_internal(domain: str) -> bool: return True return False except Exception as e: - redis_pool.set_failed_domain(domain, setting.time_of_1_days) + failed_urls[domain] = setting.time_of_1_days + int(time.time()) logger.error('解析域名出错: %s, 错误: %s', domain, str(e)) return False @@ -335,19 +335,19 @@ async def _req_get(url: str, content = await resp.read() return content, ct_type else: - await redis_pool.set_failed_domain(domain, setting.time_of_5_minus) + failed_urls[domain] = setting.time_of_1_hours + int(time.time()) logger.error('异步请求失败: %d, URL: %s', resp.status, url) break except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e: retry_count += 1 if retry_count > retries: - await redis_pool.set_failed_domain(domain, setting.time_of_5_minus) + failed_urls[domain] = setting.time_of_5_minus + int(time.time()) logger.error('异步请求超时: %s, URL: %s', str(e), url) else: logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url) continue except Exception as e: - await redis_pool.set_failed_domain(domain, setting.time_of_5_minus) + failed_urls[domain] = setting.time_of_1_hours + int(time.time()) logger.error('异步请求异常: %s, URL: %s', str(e), url) break @@ -358,9 +358,3 @@ async def _req_get(url: str, _pattern_domain = re.compile( r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?', re.I) - -# def failed_url_cache(_domain: str, _time: int): -# if _domain: -# _current_time = int(time.time()) -# if (not failed_urls.get(_domain)) or (_current_time <= failed_urls.get(_domain)): -# failed_urls[_domain] = _current_time + _time diff --git a/favicon_app/routes/favicon_service.py b/favicon_app/routes/favicon_service.py index 574a4fb..5abee88 100644 --- a/favicon_app/routes/favicon_service.py +++ b/favicon_app/routes/favicon_service.py @@ -16,8 +16,8 @@ from fastapi import Request, BackgroundTasks from fastapi.responses import Response import setting -from favicon_app.models import Favicon -from favicon_app.utils import header, redis_pool +from favicon_app.models import Favicon, favicon +from favicon_app.utils import header from favicon_app.utils.file_util import FileUtil from favicon_app.utils.filetype import helpers, filetype @@ -36,11 +36,6 @@ async def get_favicon_handler(request: Request, refresh: Optional[str] = None) -> dict[str, str] | Response: """异步处理获取图标的请求""" - logger.info( - f"队列大小(异步) queue/failed:" - f"{await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)} " - f"| {await redis_pool.get_cache_size(prefix=redis_pool.FAILED_DOMAINS_PREFIX)}") - # 验证URL参数 if not url: return {"message": "请提供url参数"} @@ -48,14 +43,19 @@ async def get_favicon_handler(request: Request, try: entity = Favicon(url) + logger.info(f"-> failed url size: {len(favicon.failed_urls)}") + # 验证域名 if not entity.domain: logger.warning(f"无效的URL: {url}") - return get_default(setting.time_of_7_days) + return get_default(setting.time_of_1_days) # 检查缓存中的失败URL - if await redis_pool.is_domain_failed(entity.domain): - return get_default(setting.time_of_1_days) + if entity.domain in favicon.failed_urls: + if int(time.time()) <= favicon.failed_urls.get(entity.domain): + return get_default(setting.time_of_1_days) + else: + del favicon.failed_urls[entity.domain] # 检查缓存 _cached, cached_icon = _get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1']) @@ -74,53 +74,27 @@ async def get_favicon_handler(request: Request, if _cached and not cached_icon: # 缓存已过期,后台刷新缓存 logger.info(f"缓存已过期,加入后台队列刷新(异步): {entity.domain}") - await redis_pool.set_cache( - f"{entity.domain}", - entity.domain, - setting.time_of_2_hours, - prefix=redis_pool.ICON_QUEUE_PREFIX - ) bg_tasks.add_task(get_icon_async, entity, _cached) return Response(content=icon_content, media_type=content_type if content_type else "image/x-icon", headers=_get_header(content_type, cache_time)) else: - # 开始图标处理,加入队列 - await redis_pool.set_cache( - f"{entity.domain}", - entity.domain, - setting.time_of_2_hours, - prefix=redis_pool.ICON_QUEUE_PREFIX - ) + # 没有缓存,开始图标处理,始终使用异步方法获取图标 + icon_content = await get_icon_async(entity, _cached) - # 没有缓存,实时处理,检查队列大小 - _queue_size = await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX) - if _queue_size >= setting.MAX_QUEUE_SIZE: - # 加入后台队列并返回默认图片 - logger.info( - f"队列大小({_queue_size})>={setting.MAX_QUEUE_SIZE},返回默认图片并加入后台队列(异步): {entity.domain}") - bg_tasks.add_task(get_icon_async, entity, _cached) - return get_default(0) - else: - # 队列 Optional[byt # 图标获取失败,或图标不是支持的图片格式,写入默认图标 if (not icon_content) or (not helpers.is_image(icon_content) or _is_default_icon_byte(icon_content)): - logger.warning(f"-> 异步获取图标失败,使用默认图标: {entity.domain}") + logger.debug(f"-> 异步获取图标失败,使用默认图标: {entity.domain}") icon_content = _cached if _cached else setting.default_icon_file if icon_content: @@ -186,8 +160,6 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt except Exception as e: logger.error(f"异步获取图标时发生错误 {entity.domain}: {e}") return _cached or setting.default_icon_file - finally: - await redis_pool.remove_cache(f"{entity.domain}", prefix=redis_pool.ICON_QUEUE_PREFIX) # 预编译正则表达式,提高性能 diff --git a/favicon_app/utils/redis_pool.py b/favicon_app/utils/redis_pool.py deleted file mode 100644 index 89103eb..0000000 --- a/favicon_app/utils/redis_pool.py +++ /dev/null @@ -1,134 +0,0 @@ -# -*- coding: utf-8 -*- - -import logging -from typing import AsyncGenerator, Optional - -from redis.asyncio import ConnectionPool, Redis - -import setting - -logger = logging.getLogger(__name__) - -REDIS_URL = setting.REDIS_URL - -# Redis键前缀 -FAILED_DOMAINS_PREFIX = "favicon:failed_domain:" # 失败域名前缀 -TASK_QUEUE_PREFIX = "favicon:task_queue:" # 任务队列前缀 -PROCESSING_SET_PREFIX = "favicon:processing:" # 处理中任务集合前缀 -ICON_QUEUE_PREFIX = "favicon:icon_queue:" - -pool = ConnectionPool.from_url( - REDIS_URL, - max_connections=200, - decode_responses=True, -) - - -async def get_redis() -> AsyncGenerator[Redis, None]: - async with Redis(connection_pool=pool) as conn: - yield conn - - -async def set_cache(key: str, value: [str | int], ttl: int = None, prefix: str = None) -> None: - if not key: - return - - try: - async for redis in get_redis(): - _key = key - if prefix: - _key = f"{prefix}{key}" - await redis.sadd(prefix, key) - await redis.expire(prefix, ttl) - await redis.set(_key, value, ex=ttl) - except Exception as e: - logger.error(f"存入redis时出错:{e}") - - -async def get_cache(key: str, prefix: str = None) -> Optional[str | int]: - if not key: - return None - - try: - async for redis in get_redis(): - if prefix: - key = f"{prefix}{key}" - return await redis.get(key) - except Exception as e: - logger.error(f"读取redis时出错:{e}") - - -async def exist_cache(key: str, prefix: str = None) -> bool: - if not key: - return False - - try: - async for redis in get_redis(): - if prefix: - key = f"{prefix}{key}" - result = await redis.exists(key) - return result > 0 - except Exception as e: - logger.error(f"读取redis时出错:{e}") - return False - - -async def remove_cache(key: str, prefix: str = None) -> None: - if not key: - return - - try: - async for redis in get_redis(): - _key = key - if prefix: - _key = f"{prefix}{key}" - await redis.srem(prefix, key) - await redis.delete(_key) - except Exception as e: - logger.error(f"删除redis时出错:{e}") - - -async def get_cache_size(prefix: str = None) -> int: - """根据前缀统计数量,用于统计Set集合 - """ - try: - async for redis in get_redis(): - return await redis.scard(prefix) - except Exception as e: - logger.error(f"获取队列大小时出错:{e}") - return 0 - - -async def set_failed_domain(domain: str, expire_seconds: int = None) -> None: - if not domain: - return - - try: - await set_cache(f"{domain}", domain, ttl=expire_seconds, prefix=FAILED_DOMAINS_PREFIX) - - logger.debug(f"已将失败域名 {domain} 存入Redis,过期时间:{expire_seconds}秒") - except Exception as e: - logger.error(f"将失败域名存入Redis时出错:{e}") - - -async def is_domain_failed(domain: str) -> bool: - if not domain: - return False - - try: - return await exist_cache(domain, prefix=FAILED_DOMAINS_PREFIX) - except Exception as e: - logger.error(f"检查域名是否失败时出错:{e}") - return False - - -async def delete_failed_domain(domain: str) -> None: - if not domain: - return - - try: - await remove_cache(domain, prefix=FAILED_DOMAINS_PREFIX) - - logger.debug(f"已从Redis删除失败域名 {domain}") - except Exception as e: - logger.error(f"从Redis删除失败域名时出错:{e}") diff --git a/main.py b/main.py index f2afab2..b93a5a6 100644 --- a/main.py +++ b/main.py @@ -2,14 +2,12 @@ import logging import os -from contextlib import asynccontextmanager from fastapi import FastAPI, Request from fastapi.responses import Response import setting from favicon_app.routes import favicon_router -from favicon_app.utils import redis_pool from favicon_app.utils.file_util import FileUtil logger = logging.getLogger(__name__) @@ -23,18 +21,8 @@ default_icon_file = setting.default_icon_file # referer日志文件路径 referer_log_file = setting.referer_log_file - -@asynccontextmanager -async def lifespan(app: FastAPI): - """应用级生命周期:启动/清理。""" - logger.warning("Redis pool ready.") - yield - await redis_pool.pool.aclose() - logger.warning("Redis pool closed.") - - # fastapi -app = FastAPI(lifespan=lifespan, title="Favicon API", description="获取网站favicon图标", version="3.0") +app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0") app.include_router(favicon_router) diff --git a/requirements.txt b/requirements.txt index fabefd6..69dfaf7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,16 @@ --index https://mirrors.xinac.net/pypi/simple --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple -fastapi~=0.116 -pydantic~=2.11 -pydantic_core~=2.33 -starlette~=0.47 -requests~=2.32 -aiohttp~=3.10.10 -bs4~=0.0 -beautifulsoup4~=4.13 -lxml~=6.0 -PyYAML~=6.0 -uvicorn~=0.35 -uvicorn-worker~=0.3 -gunicorn~=23.0 -redis[hiredis]~=6.4 +fastapi~=0.116.1 +pydantic~=2.11.7 +pydantic_core~=2.33.2 +starlette~=0.47.3 +requests~=2.32.5 +aiohttp~=3.12.15 +bs4~=0.0.2 +beautifulsoup4~=4.13.5 +lxml~=6.0.1 +PyYAML~=6.0.2 +uvicorn~=0.35.0 +uvicorn-worker~=0.3.0 +gunicorn~=23.0.0 diff --git a/setting.py b/setting.py index 6f8aa1d..ac32ecf 100644 --- a/setting.py +++ b/setting.py @@ -17,9 +17,6 @@ default_icon_file = FileUtil.read_file(default_icon_path, mode='rb') # 定义referer日志文件路径 referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt') -# 队列阈值常量配置 -MAX_QUEUE_SIZE = 10 - # 时间常量 time_of_1_minus = 1 * 60 time_of_5_minus = 5 * time_of_1_minus @@ -36,7 +33,3 @@ time_of_1_days = 1 * 24 * 60 * 60 time_of_7_days = 7 * time_of_1_days time_of_15_days = 15 * time_of_1_days time_of_30_days = 30 * time_of_1_days - -# 是否使用同步方式 -# REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") -REDIS_URL = os.getenv("REDIS_URL", "redis://192.168.31.254:6379/0")