master
jinql 2025-09-10 15:36:18 +08:00
parent cb9b9c2d35
commit 0adc1e717d
8 changed files with 48 additions and 253 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.12-slim AS builder
FROM python:3.13-slim AS builder
WORKDIR /app
@ -9,7 +9,7 @@ COPY . .
RUN python -m compileall -b .
FROM python:3.12-slim
FROM python:3.13-slim
WORKDIR /app

View File

@ -4,11 +4,8 @@ services:
container_name: favicon-api
ports:
- 8001:8000
env_file:
- .env
environment:
TZ: Asia/Shanghai
REDIS_URL: redis://redis:6379/0
volumes:
- /usr/share/zoneinfo/Asia/Shanghai:/usr/share/zoneinfo/Asia/Shanghai:ro
- /etc/localtime:/etc/localtime:ro
@ -17,17 +14,3 @@ services:
- ./conf:/app/conf:rw
- ./logs:/app/logs:rw
restart: unless-stopped
networks:
- favicon_network
depends_on:
- redis
redis:
image: redis:7-alpine
container_name: favicon-redis
networks:
- favicon_network
networks:
favicon_network:
driver: bridge

View File

@ -6,7 +6,8 @@ import ipaddress
import logging
import re
import socket
from typing import Tuple, Optional
import time
from typing import Tuple, Optional, Dict
from urllib.parse import urlparse
import aiohttp
@ -14,7 +15,7 @@ import requests
import urllib3
import setting
from favicon_app.utils import header, redis_pool
from favicon_app.utils import header
from favicon_app.utils.filetype import helpers, filetype
# 禁用SSL警告
@ -22,7 +23,6 @@ urllib3.disable_warnings()
logging.captureWarnings(True)
# 配置日志
logger = logging.getLogger(__name__)
# warnings.filterwarnings("ignore", category=RuntimeWarning)
# 创建requests会话池
requests_session = requests.Session()
@ -34,7 +34,7 @@ DEFAULT_TIMEOUT = 10
DEFAULT_RETRIES = 2
# 存储失败的URL值为缓存过期时间戳
# failed_urls: Dict[str, int] = dict()
failed_urls: Dict[str, int] = dict()
# 创建aiohttp客户端会话池
_aiohttp_client = None
@ -111,7 +111,7 @@ class Favicon:
if self.domain:
self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest()
except Exception as e:
redis_pool.set_failed_domain(self.domain, setting.time_of_1_days)
failed_urls[self.domain] = setting.time_of_1_days + int(time.time())
self.scheme = None
self.domain = None
logger.error('URL解析错误: %s, URL: %s', str(e), url)
@ -265,7 +265,7 @@ def _check_internal(domain: str) -> bool:
return True
return False
except Exception as e:
redis_pool.set_failed_domain(domain, setting.time_of_1_days)
failed_urls[domain] = setting.time_of_1_days + int(time.time())
logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
return False
@ -335,19 +335,19 @@ async def _req_get(url: str,
content = await resp.read()
return content, ct_type
else:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
logger.error('异步请求失败: %d, URL: %s', resp.status, url)
break
except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
retry_count += 1
if retry_count > retries:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
failed_urls[domain] = setting.time_of_5_minus + int(time.time())
logger.error('异步请求超时: %s, URL: %s', str(e), url)
else:
logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url)
continue
except Exception as e:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
logger.error('异步请求异常: %s, URL: %s', str(e), url)
break
@ -358,9 +358,3 @@ async def _req_get(url: str,
_pattern_domain = re.compile(
r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?',
re.I)
# def failed_url_cache(_domain: str, _time: int):
# if _domain:
# _current_time = int(time.time())
# if (not failed_urls.get(_domain)) or (_current_time <= failed_urls.get(_domain)):
# failed_urls[_domain] = _current_time + _time

View File

@ -16,8 +16,8 @@ from fastapi import Request, BackgroundTasks
from fastapi.responses import Response
import setting
from favicon_app.models import Favicon
from favicon_app.utils import header, redis_pool
from favicon_app.models import Favicon, favicon
from favicon_app.utils import header
from favicon_app.utils.file_util import FileUtil
from favicon_app.utils.filetype import helpers, filetype
@ -36,11 +36,6 @@ async def get_favicon_handler(request: Request,
refresh: Optional[str] = None) -> dict[str, str] | Response:
"""异步处理获取图标的请求"""
logger.info(
f"队列大小(异步) queue/failed"
f"{await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)} "
f"| {await redis_pool.get_cache_size(prefix=redis_pool.FAILED_DOMAINS_PREFIX)}")
# 验证URL参数
if not url:
return {"message": "请提供url参数"}
@ -48,14 +43,19 @@ async def get_favicon_handler(request: Request,
try:
entity = Favicon(url)
logger.info(f"-> failed url size: {len(favicon.failed_urls)}")
# 验证域名
if not entity.domain:
logger.warning(f"无效的URL: {url}")
return get_default(setting.time_of_7_days)
return get_default(setting.time_of_1_days)
# 检查缓存中的失败URL
if await redis_pool.is_domain_failed(entity.domain):
if entity.domain in favicon.failed_urls:
if int(time.time()) <= favicon.failed_urls.get(entity.domain):
return get_default(setting.time_of_1_days)
else:
del favicon.failed_urls[entity.domain]
# 检查缓存
_cached, cached_icon = _get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1'])
@ -74,39 +74,13 @@ async def get_favicon_handler(request: Request,
if _cached and not cached_icon:
# 缓存已过期,后台刷新缓存
logger.info(f"缓存已过期,加入后台队列刷新(异步): {entity.domain}")
await redis_pool.set_cache(
f"{entity.domain}",
entity.domain,
setting.time_of_2_hours,
prefix=redis_pool.ICON_QUEUE_PREFIX
)
bg_tasks.add_task(get_icon_async, entity, _cached)
return Response(content=icon_content,
media_type=content_type if content_type else "image/x-icon",
headers=_get_header(content_type, cache_time))
else:
# 开始图标处理,加入队列
await redis_pool.set_cache(
f"{entity.domain}",
entity.domain,
setting.time_of_2_hours,
prefix=redis_pool.ICON_QUEUE_PREFIX
)
# 没有缓存,实时处理,检查队列大小
_queue_size = await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)
if _queue_size >= setting.MAX_QUEUE_SIZE:
# 加入后台队列并返回默认图片
logger.info(
f"队列大小({_queue_size})>={setting.MAX_QUEUE_SIZE},返回默认图片并加入后台队列(异步): {entity.domain}")
bg_tasks.add_task(get_icon_async, entity, _cached)
return get_default(0)
else:
# 队列<MAX_QUEUE_SIZE实时处理
logger.info(f"队列大小({_queue_size})<{setting.MAX_QUEUE_SIZE},实时处理(异步): {entity.domain}")
# 始终使用异步方法获取图标
# 没有缓存,开始图标处理,始终使用异步方法获取图标
icon_content = await get_icon_async(entity, _cached)
if not icon_content:
@ -164,7 +138,7 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
# 图标获取失败,或图标不是支持的图片格式,写入默认图标
if (not icon_content) or (not helpers.is_image(icon_content) or _is_default_icon_byte(icon_content)):
logger.warning(f"-> 异步获取图标失败,使用默认图标: {entity.domain}")
logger.debug(f"-> 异步获取图标失败,使用默认图标: {entity.domain}")
icon_content = _cached if _cached else setting.default_icon_file
if icon_content:
@ -186,8 +160,6 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
except Exception as e:
logger.error(f"异步获取图标时发生错误 {entity.domain}: {e}")
return _cached or setting.default_icon_file
finally:
await redis_pool.remove_cache(f"{entity.domain}", prefix=redis_pool.ICON_QUEUE_PREFIX)
# 预编译正则表达式,提高性能

View File

@ -1,134 +0,0 @@
# -*- coding: utf-8 -*-
import logging
from typing import AsyncGenerator, Optional
from redis.asyncio import ConnectionPool, Redis
import setting
logger = logging.getLogger(__name__)
REDIS_URL = setting.REDIS_URL
# Redis键前缀
FAILED_DOMAINS_PREFIX = "favicon:failed_domain:" # 失败域名前缀
TASK_QUEUE_PREFIX = "favicon:task_queue:" # 任务队列前缀
PROCESSING_SET_PREFIX = "favicon:processing:" # 处理中任务集合前缀
ICON_QUEUE_PREFIX = "favicon:icon_queue:"
pool = ConnectionPool.from_url(
REDIS_URL,
max_connections=200,
decode_responses=True,
)
async def get_redis() -> AsyncGenerator[Redis, None]:
async with Redis(connection_pool=pool) as conn:
yield conn
async def set_cache(key: str, value: [str | int], ttl: int = None, prefix: str = None) -> None:
if not key:
return
try:
async for redis in get_redis():
_key = key
if prefix:
_key = f"{prefix}{key}"
await redis.sadd(prefix, key)
await redis.expire(prefix, ttl)
await redis.set(_key, value, ex=ttl)
except Exception as e:
logger.error(f"存入redis时出错{e}")
async def get_cache(key: str, prefix: str = None) -> Optional[str | int]:
if not key:
return None
try:
async for redis in get_redis():
if prefix:
key = f"{prefix}{key}"
return await redis.get(key)
except Exception as e:
logger.error(f"读取redis时出错{e}")
async def exist_cache(key: str, prefix: str = None) -> bool:
if not key:
return False
try:
async for redis in get_redis():
if prefix:
key = f"{prefix}{key}"
result = await redis.exists(key)
return result > 0
except Exception as e:
logger.error(f"读取redis时出错{e}")
return False
async def remove_cache(key: str, prefix: str = None) -> None:
if not key:
return
try:
async for redis in get_redis():
_key = key
if prefix:
_key = f"{prefix}{key}"
await redis.srem(prefix, key)
await redis.delete(_key)
except Exception as e:
logger.error(f"删除redis时出错{e}")
async def get_cache_size(prefix: str = None) -> int:
"""根据前缀统计数量用于统计Set集合
"""
try:
async for redis in get_redis():
return await redis.scard(prefix)
except Exception as e:
logger.error(f"获取队列大小时出错:{e}")
return 0
async def set_failed_domain(domain: str, expire_seconds: int = None) -> None:
if not domain:
return
try:
await set_cache(f"{domain}", domain, ttl=expire_seconds, prefix=FAILED_DOMAINS_PREFIX)
logger.debug(f"已将失败域名 {domain} 存入Redis过期时间{expire_seconds}")
except Exception as e:
logger.error(f"将失败域名存入Redis时出错{e}")
async def is_domain_failed(domain: str) -> bool:
if not domain:
return False
try:
return await exist_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
except Exception as e:
logger.error(f"检查域名是否失败时出错:{e}")
return False
async def delete_failed_domain(domain: str) -> None:
if not domain:
return
try:
await remove_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
logger.debug(f"已从Redis删除失败域名 {domain}")
except Exception as e:
logger.error(f"从Redis删除失败域名时出错{e}")

14
main.py
View File

@ -2,14 +2,12 @@
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.responses import Response
import setting
from favicon_app.routes import favicon_router
from favicon_app.utils import redis_pool
from favicon_app.utils.file_util import FileUtil
logger = logging.getLogger(__name__)
@ -23,18 +21,8 @@ default_icon_file = setting.default_icon_file
# referer日志文件路径
referer_log_file = setting.referer_log_file
@asynccontextmanager
async def lifespan(app: FastAPI):
"""应用级生命周期:启动/清理。"""
logger.warning("Redis pool ready.")
yield
await redis_pool.pool.aclose()
logger.warning("Redis pool closed.")
# fastapi
app = FastAPI(lifespan=lifespan, title="Favicon API", description="获取网站favicon图标", version="3.0")
app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0")
app.include_router(favicon_router)

View File

@ -1,17 +1,16 @@
--index https://mirrors.xinac.net/pypi/simple
--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
fastapi~=0.116
pydantic~=2.11
pydantic_core~=2.33
starlette~=0.47
requests~=2.32
aiohttp~=3.10.10
bs4~=0.0
beautifulsoup4~=4.13
lxml~=6.0
PyYAML~=6.0
uvicorn~=0.35
uvicorn-worker~=0.3
gunicorn~=23.0
redis[hiredis]~=6.4
fastapi~=0.116.1
pydantic~=2.11.7
pydantic_core~=2.33.2
starlette~=0.47.3
requests~=2.32.5
aiohttp~=3.12.15
bs4~=0.0.2
beautifulsoup4~=4.13.5
lxml~=6.0.1
PyYAML~=6.0.2
uvicorn~=0.35.0
uvicorn-worker~=0.3.0
gunicorn~=23.0.0

View File

@ -17,9 +17,6 @@ default_icon_file = FileUtil.read_file(default_icon_path, mode='rb')
# 定义referer日志文件路径
referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt')
# 队列阈值常量配置
MAX_QUEUE_SIZE = 10
# 时间常量
time_of_1_minus = 1 * 60
time_of_5_minus = 5 * time_of_1_minus
@ -36,7 +33,3 @@ time_of_1_days = 1 * 24 * 60 * 60
time_of_7_days = 7 * time_of_1_days
time_of_15_days = 15 * time_of_1_days
time_of_30_days = 30 * time_of_1_days
# 是否使用同步方式
# REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
REDIS_URL = os.getenv("REDIS_URL", "redis://192.168.31.254:6379/0")