master
jinql 2025-09-10 15:36:18 +08:00
parent cb9b9c2d35
commit 0adc1e717d
8 changed files with 48 additions and 253 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.12-slim AS builder FROM python:3.13-slim AS builder
WORKDIR /app WORKDIR /app
@ -9,7 +9,7 @@ COPY . .
RUN python -m compileall -b . RUN python -m compileall -b .
FROM python:3.12-slim FROM python:3.13-slim
WORKDIR /app WORKDIR /app

View File

@ -4,11 +4,8 @@ services:
container_name: favicon-api container_name: favicon-api
ports: ports:
- 8001:8000 - 8001:8000
env_file:
- .env
environment: environment:
TZ: Asia/Shanghai TZ: Asia/Shanghai
REDIS_URL: redis://redis:6379/0
volumes: volumes:
- /usr/share/zoneinfo/Asia/Shanghai:/usr/share/zoneinfo/Asia/Shanghai:ro - /usr/share/zoneinfo/Asia/Shanghai:/usr/share/zoneinfo/Asia/Shanghai:ro
- /etc/localtime:/etc/localtime:ro - /etc/localtime:/etc/localtime:ro
@ -17,17 +14,3 @@ services:
- ./conf:/app/conf:rw - ./conf:/app/conf:rw
- ./logs:/app/logs:rw - ./logs:/app/logs:rw
restart: unless-stopped restart: unless-stopped
networks:
- favicon_network
depends_on:
- redis
redis:
image: redis:7-alpine
container_name: favicon-redis
networks:
- favicon_network
networks:
favicon_network:
driver: bridge

View File

@ -6,7 +6,8 @@ import ipaddress
import logging import logging
import re import re
import socket import socket
from typing import Tuple, Optional import time
from typing import Tuple, Optional, Dict
from urllib.parse import urlparse from urllib.parse import urlparse
import aiohttp import aiohttp
@ -14,7 +15,7 @@ import requests
import urllib3 import urllib3
import setting import setting
from favicon_app.utils import header, redis_pool from favicon_app.utils import header
from favicon_app.utils.filetype import helpers, filetype from favicon_app.utils.filetype import helpers, filetype
# 禁用SSL警告 # 禁用SSL警告
@ -22,7 +23,6 @@ urllib3.disable_warnings()
logging.captureWarnings(True) logging.captureWarnings(True)
# 配置日志 # 配置日志
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# warnings.filterwarnings("ignore", category=RuntimeWarning)
# 创建requests会话池 # 创建requests会话池
requests_session = requests.Session() requests_session = requests.Session()
@ -34,7 +34,7 @@ DEFAULT_TIMEOUT = 10
DEFAULT_RETRIES = 2 DEFAULT_RETRIES = 2
# 存储失败的URL值为缓存过期时间戳 # 存储失败的URL值为缓存过期时间戳
# failed_urls: Dict[str, int] = dict() failed_urls: Dict[str, int] = dict()
# 创建aiohttp客户端会话池 # 创建aiohttp客户端会话池
_aiohttp_client = None _aiohttp_client = None
@ -111,7 +111,7 @@ class Favicon:
if self.domain: if self.domain:
self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest() self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest()
except Exception as e: except Exception as e:
redis_pool.set_failed_domain(self.domain, setting.time_of_1_days) failed_urls[self.domain] = setting.time_of_1_days + int(time.time())
self.scheme = None self.scheme = None
self.domain = None self.domain = None
logger.error('URL解析错误: %s, URL: %s', str(e), url) logger.error('URL解析错误: %s, URL: %s', str(e), url)
@ -265,7 +265,7 @@ def _check_internal(domain: str) -> bool:
return True return True
return False return False
except Exception as e: except Exception as e:
redis_pool.set_failed_domain(domain, setting.time_of_1_days) failed_urls[domain] = setting.time_of_1_days + int(time.time())
logger.error('解析域名出错: %s, 错误: %s', domain, str(e)) logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
return False return False
@ -335,19 +335,19 @@ async def _req_get(url: str,
content = await resp.read() content = await resp.read()
return content, ct_type return content, ct_type
else: else:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus) failed_urls[domain] = setting.time_of_1_hours + int(time.time())
logger.error('异步请求失败: %d, URL: %s', resp.status, url) logger.error('异步请求失败: %d, URL: %s', resp.status, url)
break break
except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e: except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
retry_count += 1 retry_count += 1
if retry_count > retries: if retry_count > retries:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus) failed_urls[domain] = setting.time_of_5_minus + int(time.time())
logger.error('异步请求超时: %s, URL: %s', str(e), url) logger.error('异步请求超时: %s, URL: %s', str(e), url)
else: else:
logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url) logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url)
continue continue
except Exception as e: except Exception as e:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus) failed_urls[domain] = setting.time_of_1_hours + int(time.time())
logger.error('异步请求异常: %s, URL: %s', str(e), url) logger.error('异步请求异常: %s, URL: %s', str(e), url)
break break
@ -358,9 +358,3 @@ async def _req_get(url: str,
_pattern_domain = re.compile( _pattern_domain = re.compile(
r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?', r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?',
re.I) re.I)
# def failed_url_cache(_domain: str, _time: int):
# if _domain:
# _current_time = int(time.time())
# if (not failed_urls.get(_domain)) or (_current_time <= failed_urls.get(_domain)):
# failed_urls[_domain] = _current_time + _time

View File

@ -16,8 +16,8 @@ from fastapi import Request, BackgroundTasks
from fastapi.responses import Response from fastapi.responses import Response
import setting import setting
from favicon_app.models import Favicon from favicon_app.models import Favicon, favicon
from favicon_app.utils import header, redis_pool from favicon_app.utils import header
from favicon_app.utils.file_util import FileUtil from favicon_app.utils.file_util import FileUtil
from favicon_app.utils.filetype import helpers, filetype from favicon_app.utils.filetype import helpers, filetype
@ -36,11 +36,6 @@ async def get_favicon_handler(request: Request,
refresh: Optional[str] = None) -> dict[str, str] | Response: refresh: Optional[str] = None) -> dict[str, str] | Response:
"""异步处理获取图标的请求""" """异步处理获取图标的请求"""
logger.info(
f"队列大小(异步) queue/failed"
f"{await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)} "
f"| {await redis_pool.get_cache_size(prefix=redis_pool.FAILED_DOMAINS_PREFIX)}")
# 验证URL参数 # 验证URL参数
if not url: if not url:
return {"message": "请提供url参数"} return {"message": "请提供url参数"}
@ -48,14 +43,19 @@ async def get_favicon_handler(request: Request,
try: try:
entity = Favicon(url) entity = Favicon(url)
logger.info(f"-> failed url size: {len(favicon.failed_urls)}")
# 验证域名 # 验证域名
if not entity.domain: if not entity.domain:
logger.warning(f"无效的URL: {url}") logger.warning(f"无效的URL: {url}")
return get_default(setting.time_of_7_days) return get_default(setting.time_of_1_days)
# 检查缓存中的失败URL # 检查缓存中的失败URL
if await redis_pool.is_domain_failed(entity.domain): if entity.domain in favicon.failed_urls:
if int(time.time()) <= favicon.failed_urls.get(entity.domain):
return get_default(setting.time_of_1_days) return get_default(setting.time_of_1_days)
else:
del favicon.failed_urls[entity.domain]
# 检查缓存 # 检查缓存
_cached, cached_icon = _get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1']) _cached, cached_icon = _get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1'])
@ -74,39 +74,13 @@ async def get_favicon_handler(request: Request,
if _cached and not cached_icon: if _cached and not cached_icon:
# 缓存已过期,后台刷新缓存 # 缓存已过期,后台刷新缓存
logger.info(f"缓存已过期,加入后台队列刷新(异步): {entity.domain}") logger.info(f"缓存已过期,加入后台队列刷新(异步): {entity.domain}")
await redis_pool.set_cache(
f"{entity.domain}",
entity.domain,
setting.time_of_2_hours,
prefix=redis_pool.ICON_QUEUE_PREFIX
)
bg_tasks.add_task(get_icon_async, entity, _cached) bg_tasks.add_task(get_icon_async, entity, _cached)
return Response(content=icon_content, return Response(content=icon_content,
media_type=content_type if content_type else "image/x-icon", media_type=content_type if content_type else "image/x-icon",
headers=_get_header(content_type, cache_time)) headers=_get_header(content_type, cache_time))
else: else:
# 开始图标处理,加入队列 # 没有缓存,开始图标处理,始终使用异步方法获取图标
await redis_pool.set_cache(
f"{entity.domain}",
entity.domain,
setting.time_of_2_hours,
prefix=redis_pool.ICON_QUEUE_PREFIX
)
# 没有缓存,实时处理,检查队列大小
_queue_size = await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)
if _queue_size >= setting.MAX_QUEUE_SIZE:
# 加入后台队列并返回默认图片
logger.info(
f"队列大小({_queue_size})>={setting.MAX_QUEUE_SIZE},返回默认图片并加入后台队列(异步): {entity.domain}")
bg_tasks.add_task(get_icon_async, entity, _cached)
return get_default(0)
else:
# 队列<MAX_QUEUE_SIZE实时处理
logger.info(f"队列大小({_queue_size})<{setting.MAX_QUEUE_SIZE},实时处理(异步): {entity.domain}")
# 始终使用异步方法获取图标
icon_content = await get_icon_async(entity, _cached) icon_content = await get_icon_async(entity, _cached)
if not icon_content: if not icon_content:
@ -164,7 +138,7 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
# 图标获取失败,或图标不是支持的图片格式,写入默认图标 # 图标获取失败,或图标不是支持的图片格式,写入默认图标
if (not icon_content) or (not helpers.is_image(icon_content) or _is_default_icon_byte(icon_content)): if (not icon_content) or (not helpers.is_image(icon_content) or _is_default_icon_byte(icon_content)):
logger.warning(f"-> 异步获取图标失败,使用默认图标: {entity.domain}") logger.debug(f"-> 异步获取图标失败,使用默认图标: {entity.domain}")
icon_content = _cached if _cached else setting.default_icon_file icon_content = _cached if _cached else setting.default_icon_file
if icon_content: if icon_content:
@ -186,8 +160,6 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
except Exception as e: except Exception as e:
logger.error(f"异步获取图标时发生错误 {entity.domain}: {e}") logger.error(f"异步获取图标时发生错误 {entity.domain}: {e}")
return _cached or setting.default_icon_file return _cached or setting.default_icon_file
finally:
await redis_pool.remove_cache(f"{entity.domain}", prefix=redis_pool.ICON_QUEUE_PREFIX)
# 预编译正则表达式,提高性能 # 预编译正则表达式,提高性能

View File

@ -1,134 +0,0 @@
# -*- coding: utf-8 -*-
import logging
from typing import AsyncGenerator, Optional
from redis.asyncio import ConnectionPool, Redis
import setting
logger = logging.getLogger(__name__)
REDIS_URL = setting.REDIS_URL
# Redis键前缀
FAILED_DOMAINS_PREFIX = "favicon:failed_domain:" # 失败域名前缀
TASK_QUEUE_PREFIX = "favicon:task_queue:" # 任务队列前缀
PROCESSING_SET_PREFIX = "favicon:processing:" # 处理中任务集合前缀
ICON_QUEUE_PREFIX = "favicon:icon_queue:"
pool = ConnectionPool.from_url(
REDIS_URL,
max_connections=200,
decode_responses=True,
)
async def get_redis() -> AsyncGenerator[Redis, None]:
async with Redis(connection_pool=pool) as conn:
yield conn
async def set_cache(key: str, value: [str | int], ttl: int = None, prefix: str = None) -> None:
if not key:
return
try:
async for redis in get_redis():
_key = key
if prefix:
_key = f"{prefix}{key}"
await redis.sadd(prefix, key)
await redis.expire(prefix, ttl)
await redis.set(_key, value, ex=ttl)
except Exception as e:
logger.error(f"存入redis时出错{e}")
async def get_cache(key: str, prefix: str = None) -> Optional[str | int]:
if not key:
return None
try:
async for redis in get_redis():
if prefix:
key = f"{prefix}{key}"
return await redis.get(key)
except Exception as e:
logger.error(f"读取redis时出错{e}")
async def exist_cache(key: str, prefix: str = None) -> bool:
if not key:
return False
try:
async for redis in get_redis():
if prefix:
key = f"{prefix}{key}"
result = await redis.exists(key)
return result > 0
except Exception as e:
logger.error(f"读取redis时出错{e}")
return False
async def remove_cache(key: str, prefix: str = None) -> None:
if not key:
return
try:
async for redis in get_redis():
_key = key
if prefix:
_key = f"{prefix}{key}"
await redis.srem(prefix, key)
await redis.delete(_key)
except Exception as e:
logger.error(f"删除redis时出错{e}")
async def get_cache_size(prefix: str = None) -> int:
"""根据前缀统计数量用于统计Set集合
"""
try:
async for redis in get_redis():
return await redis.scard(prefix)
except Exception as e:
logger.error(f"获取队列大小时出错:{e}")
return 0
async def set_failed_domain(domain: str, expire_seconds: int = None) -> None:
if not domain:
return
try:
await set_cache(f"{domain}", domain, ttl=expire_seconds, prefix=FAILED_DOMAINS_PREFIX)
logger.debug(f"已将失败域名 {domain} 存入Redis过期时间{expire_seconds}")
except Exception as e:
logger.error(f"将失败域名存入Redis时出错{e}")
async def is_domain_failed(domain: str) -> bool:
if not domain:
return False
try:
return await exist_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
except Exception as e:
logger.error(f"检查域名是否失败时出错:{e}")
return False
async def delete_failed_domain(domain: str) -> None:
if not domain:
return
try:
await remove_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
logger.debug(f"已从Redis删除失败域名 {domain}")
except Exception as e:
logger.error(f"从Redis删除失败域名时出错{e}")

14
main.py
View File

@ -2,14 +2,12 @@
import logging import logging
import os import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.responses import Response from fastapi.responses import Response
import setting import setting
from favicon_app.routes import favicon_router from favicon_app.routes import favicon_router
from favicon_app.utils import redis_pool
from favicon_app.utils.file_util import FileUtil from favicon_app.utils.file_util import FileUtil
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -23,18 +21,8 @@ default_icon_file = setting.default_icon_file
# referer日志文件路径 # referer日志文件路径
referer_log_file = setting.referer_log_file referer_log_file = setting.referer_log_file
@asynccontextmanager
async def lifespan(app: FastAPI):
"""应用级生命周期:启动/清理。"""
logger.warning("Redis pool ready.")
yield
await redis_pool.pool.aclose()
logger.warning("Redis pool closed.")
# fastapi # fastapi
app = FastAPI(lifespan=lifespan, title="Favicon API", description="获取网站favicon图标", version="3.0") app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0")
app.include_router(favicon_router) app.include_router(favicon_router)

View File

@ -1,17 +1,16 @@
--index https://mirrors.xinac.net/pypi/simple --index https://mirrors.xinac.net/pypi/simple
--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple --extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
fastapi~=0.116 fastapi~=0.116.1
pydantic~=2.11 pydantic~=2.11.7
pydantic_core~=2.33 pydantic_core~=2.33.2
starlette~=0.47 starlette~=0.47.3
requests~=2.32 requests~=2.32.5
aiohttp~=3.10.10 aiohttp~=3.12.15
bs4~=0.0 bs4~=0.0.2
beautifulsoup4~=4.13 beautifulsoup4~=4.13.5
lxml~=6.0 lxml~=6.0.1
PyYAML~=6.0 PyYAML~=6.0.2
uvicorn~=0.35 uvicorn~=0.35.0
uvicorn-worker~=0.3 uvicorn-worker~=0.3.0
gunicorn~=23.0 gunicorn~=23.0.0
redis[hiredis]~=6.4

View File

@ -17,9 +17,6 @@ default_icon_file = FileUtil.read_file(default_icon_path, mode='rb')
# 定义referer日志文件路径 # 定义referer日志文件路径
referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt') referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt')
# 队列阈值常量配置
MAX_QUEUE_SIZE = 10
# 时间常量 # 时间常量
time_of_1_minus = 1 * 60 time_of_1_minus = 1 * 60
time_of_5_minus = 5 * time_of_1_minus time_of_5_minus = 5 * time_of_1_minus
@ -36,7 +33,3 @@ time_of_1_days = 1 * 24 * 60 * 60
time_of_7_days = 7 * time_of_1_days time_of_7_days = 7 * time_of_1_days
time_of_15_days = 15 * time_of_1_days time_of_15_days = 15 * time_of_1_days
time_of_30_days = 30 * time_of_1_days time_of_30_days = 30 * time_of_1_days
# 是否使用同步方式
# REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
REDIS_URL = os.getenv("REDIS_URL", "redis://192.168.31.254:6379/0")