25.09.10
parent
cb9b9c2d35
commit
0adc1e717d
|
@ -1,4 +1,4 @@
|
|||
FROM python:3.12-slim AS builder
|
||||
FROM python:3.13-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
@ -9,7 +9,7 @@ COPY . .
|
|||
|
||||
RUN python -m compileall -b .
|
||||
|
||||
FROM python:3.12-slim
|
||||
FROM python:3.13-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
|
|
@ -4,11 +4,8 @@ services:
|
|||
container_name: favicon-api
|
||||
ports:
|
||||
- 8001:8000
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
volumes:
|
||||
- /usr/share/zoneinfo/Asia/Shanghai:/usr/share/zoneinfo/Asia/Shanghai:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
|
@ -17,17 +14,3 @@ services:
|
|||
- ./conf:/app/conf:rw
|
||||
- ./logs:/app/logs:rw
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- favicon_network
|
||||
depends_on:
|
||||
- redis
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: favicon-redis
|
||||
networks:
|
||||
- favicon_network
|
||||
|
||||
networks:
|
||||
favicon_network:
|
||||
driver: bridge
|
||||
|
|
|
@ -6,7 +6,8 @@ import ipaddress
|
|||
import logging
|
||||
import re
|
||||
import socket
|
||||
from typing import Tuple, Optional
|
||||
import time
|
||||
from typing import Tuple, Optional, Dict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiohttp
|
||||
|
@ -14,7 +15,7 @@ import requests
|
|||
import urllib3
|
||||
|
||||
import setting
|
||||
from favicon_app.utils import header, redis_pool
|
||||
from favicon_app.utils import header
|
||||
from favicon_app.utils.filetype import helpers, filetype
|
||||
|
||||
# 禁用SSL警告
|
||||
|
@ -22,7 +23,6 @@ urllib3.disable_warnings()
|
|||
logging.captureWarnings(True)
|
||||
# 配置日志
|
||||
logger = logging.getLogger(__name__)
|
||||
# warnings.filterwarnings("ignore", category=RuntimeWarning)
|
||||
|
||||
# 创建requests会话池
|
||||
requests_session = requests.Session()
|
||||
|
@ -34,7 +34,7 @@ DEFAULT_TIMEOUT = 10
|
|||
DEFAULT_RETRIES = 2
|
||||
|
||||
# 存储失败的URL,值为缓存过期时间戳
|
||||
# failed_urls: Dict[str, int] = dict()
|
||||
failed_urls: Dict[str, int] = dict()
|
||||
|
||||
# 创建aiohttp客户端会话池
|
||||
_aiohttp_client = None
|
||||
|
@ -111,7 +111,7 @@ class Favicon:
|
|||
if self.domain:
|
||||
self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest()
|
||||
except Exception as e:
|
||||
redis_pool.set_failed_domain(self.domain, setting.time_of_1_days)
|
||||
failed_urls[self.domain] = setting.time_of_1_days + int(time.time())
|
||||
self.scheme = None
|
||||
self.domain = None
|
||||
logger.error('URL解析错误: %s, URL: %s', str(e), url)
|
||||
|
@ -265,7 +265,7 @@ def _check_internal(domain: str) -> bool:
|
|||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
redis_pool.set_failed_domain(domain, setting.time_of_1_days)
|
||||
failed_urls[domain] = setting.time_of_1_days + int(time.time())
|
||||
logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
|
||||
return False
|
||||
|
||||
|
@ -335,19 +335,19 @@ async def _req_get(url: str,
|
|||
content = await resp.read()
|
||||
return content, ct_type
|
||||
else:
|
||||
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
|
||||
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
|
||||
logger.error('异步请求失败: %d, URL: %s', resp.status, url)
|
||||
break
|
||||
except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
|
||||
retry_count += 1
|
||||
if retry_count > retries:
|
||||
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
|
||||
failed_urls[domain] = setting.time_of_5_minus + int(time.time())
|
||||
logger.error('异步请求超时: %s, URL: %s', str(e), url)
|
||||
else:
|
||||
logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url)
|
||||
continue
|
||||
except Exception as e:
|
||||
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
|
||||
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
|
||||
logger.error('异步请求异常: %s, URL: %s', str(e), url)
|
||||
break
|
||||
|
||||
|
@ -358,9 +358,3 @@ async def _req_get(url: str,
|
|||
_pattern_domain = re.compile(
|
||||
r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?',
|
||||
re.I)
|
||||
|
||||
# def failed_url_cache(_domain: str, _time: int):
|
||||
# if _domain:
|
||||
# _current_time = int(time.time())
|
||||
# if (not failed_urls.get(_domain)) or (_current_time <= failed_urls.get(_domain)):
|
||||
# failed_urls[_domain] = _current_time + _time
|
||||
|
|
|
@ -16,8 +16,8 @@ from fastapi import Request, BackgroundTasks
|
|||
from fastapi.responses import Response
|
||||
|
||||
import setting
|
||||
from favicon_app.models import Favicon
|
||||
from favicon_app.utils import header, redis_pool
|
||||
from favicon_app.models import Favicon, favicon
|
||||
from favicon_app.utils import header
|
||||
from favicon_app.utils.file_util import FileUtil
|
||||
from favicon_app.utils.filetype import helpers, filetype
|
||||
|
||||
|
@ -36,11 +36,6 @@ async def get_favicon_handler(request: Request,
|
|||
refresh: Optional[str] = None) -> dict[str, str] | Response:
|
||||
"""异步处理获取图标的请求"""
|
||||
|
||||
logger.info(
|
||||
f"队列大小(异步) queue/failed:"
|
||||
f"{await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)} "
|
||||
f"| {await redis_pool.get_cache_size(prefix=redis_pool.FAILED_DOMAINS_PREFIX)}")
|
||||
|
||||
# 验证URL参数
|
||||
if not url:
|
||||
return {"message": "请提供url参数"}
|
||||
|
@ -48,14 +43,19 @@ async def get_favicon_handler(request: Request,
|
|||
try:
|
||||
entity = Favicon(url)
|
||||
|
||||
logger.info(f"-> failed url size: {len(favicon.failed_urls)}")
|
||||
|
||||
# 验证域名
|
||||
if not entity.domain:
|
||||
logger.warning(f"无效的URL: {url}")
|
||||
return get_default(setting.time_of_7_days)
|
||||
return get_default(setting.time_of_1_days)
|
||||
|
||||
# 检查缓存中的失败URL
|
||||
if await redis_pool.is_domain_failed(entity.domain):
|
||||
return get_default(setting.time_of_1_days)
|
||||
if entity.domain in favicon.failed_urls:
|
||||
if int(time.time()) <= favicon.failed_urls.get(entity.domain):
|
||||
return get_default(setting.time_of_1_days)
|
||||
else:
|
||||
del favicon.failed_urls[entity.domain]
|
||||
|
||||
# 检查缓存
|
||||
_cached, cached_icon = _get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1'])
|
||||
|
@ -74,53 +74,27 @@ async def get_favicon_handler(request: Request,
|
|||
if _cached and not cached_icon:
|
||||
# 缓存已过期,后台刷新缓存
|
||||
logger.info(f"缓存已过期,加入后台队列刷新(异步): {entity.domain}")
|
||||
await redis_pool.set_cache(
|
||||
f"{entity.domain}",
|
||||
entity.domain,
|
||||
setting.time_of_2_hours,
|
||||
prefix=redis_pool.ICON_QUEUE_PREFIX
|
||||
)
|
||||
bg_tasks.add_task(get_icon_async, entity, _cached)
|
||||
|
||||
return Response(content=icon_content,
|
||||
media_type=content_type if content_type else "image/x-icon",
|
||||
headers=_get_header(content_type, cache_time))
|
||||
else:
|
||||
# 开始图标处理,加入队列
|
||||
await redis_pool.set_cache(
|
||||
f"{entity.domain}",
|
||||
entity.domain,
|
||||
setting.time_of_2_hours,
|
||||
prefix=redis_pool.ICON_QUEUE_PREFIX
|
||||
)
|
||||
# 没有缓存,开始图标处理,始终使用异步方法获取图标
|
||||
icon_content = await get_icon_async(entity, _cached)
|
||||
|
||||
# 没有缓存,实时处理,检查队列大小
|
||||
_queue_size = await redis_pool.get_cache_size(prefix=redis_pool.ICON_QUEUE_PREFIX)
|
||||
if _queue_size >= setting.MAX_QUEUE_SIZE:
|
||||
# 加入后台队列并返回默认图片
|
||||
logger.info(
|
||||
f"队列大小({_queue_size})>={setting.MAX_QUEUE_SIZE},返回默认图片并加入后台队列(异步): {entity.domain}")
|
||||
bg_tasks.add_task(get_icon_async, entity, _cached)
|
||||
return get_default(0)
|
||||
else:
|
||||
# 队列<MAX_QUEUE_SIZE,实时处理
|
||||
logger.info(f"队列大小({_queue_size})<{setting.MAX_QUEUE_SIZE},实时处理(异步): {entity.domain}")
|
||||
if not icon_content:
|
||||
# 获取失败,返回默认图标
|
||||
return get_default()
|
||||
|
||||
# 始终使用异步方法获取图标
|
||||
icon_content = await get_icon_async(entity, _cached)
|
||||
# 确定内容类型和缓存时间
|
||||
content_type = filetype.guess_mime(icon_content) if icon_content else ""
|
||||
cache_time = setting.time_of_12_hours \
|
||||
if _is_default_icon_byte(icon_content) else setting.time_of_7_days
|
||||
|
||||
if not icon_content:
|
||||
# 获取失败,返回默认图标
|
||||
return get_default()
|
||||
|
||||
# 确定内容类型和缓存时间
|
||||
content_type = filetype.guess_mime(icon_content) if icon_content else ""
|
||||
cache_time = setting.time_of_12_hours \
|
||||
if _is_default_icon_byte(icon_content) else setting.time_of_7_days
|
||||
|
||||
return Response(content=icon_content,
|
||||
media_type=content_type if content_type else "image/x-icon",
|
||||
headers=_get_header(content_type, cache_time))
|
||||
return Response(content=icon_content,
|
||||
media_type=content_type if content_type else "image/x-icon",
|
||||
headers=_get_header(content_type, cache_time))
|
||||
except Exception as e:
|
||||
logger.error(f"处理图标请求时发生错误 {url}: {e}")
|
||||
# 返回默认图标
|
||||
|
@ -164,7 +138,7 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
|
|||
|
||||
# 图标获取失败,或图标不是支持的图片格式,写入默认图标
|
||||
if (not icon_content) or (not helpers.is_image(icon_content) or _is_default_icon_byte(icon_content)):
|
||||
logger.warning(f"-> 异步获取图标失败,使用默认图标: {entity.domain}")
|
||||
logger.debug(f"-> 异步获取图标失败,使用默认图标: {entity.domain}")
|
||||
icon_content = _cached if _cached else setting.default_icon_file
|
||||
|
||||
if icon_content:
|
||||
|
@ -186,8 +160,6 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
|
|||
except Exception as e:
|
||||
logger.error(f"异步获取图标时发生错误 {entity.domain}: {e}")
|
||||
return _cached or setting.default_icon_file
|
||||
finally:
|
||||
await redis_pool.remove_cache(f"{entity.domain}", prefix=redis_pool.ICON_QUEUE_PREFIX)
|
||||
|
||||
|
||||
# 预编译正则表达式,提高性能
|
||||
|
|
|
@ -1,134 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from typing import AsyncGenerator, Optional
|
||||
|
||||
from redis.asyncio import ConnectionPool, Redis
|
||||
|
||||
import setting
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REDIS_URL = setting.REDIS_URL
|
||||
|
||||
# Redis键前缀
|
||||
FAILED_DOMAINS_PREFIX = "favicon:failed_domain:" # 失败域名前缀
|
||||
TASK_QUEUE_PREFIX = "favicon:task_queue:" # 任务队列前缀
|
||||
PROCESSING_SET_PREFIX = "favicon:processing:" # 处理中任务集合前缀
|
||||
ICON_QUEUE_PREFIX = "favicon:icon_queue:"
|
||||
|
||||
pool = ConnectionPool.from_url(
|
||||
REDIS_URL,
|
||||
max_connections=200,
|
||||
decode_responses=True,
|
||||
)
|
||||
|
||||
|
||||
async def get_redis() -> AsyncGenerator[Redis, None]:
|
||||
async with Redis(connection_pool=pool) as conn:
|
||||
yield conn
|
||||
|
||||
|
||||
async def set_cache(key: str, value: [str | int], ttl: int = None, prefix: str = None) -> None:
|
||||
if not key:
|
||||
return
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
_key = key
|
||||
if prefix:
|
||||
_key = f"{prefix}{key}"
|
||||
await redis.sadd(prefix, key)
|
||||
await redis.expire(prefix, ttl)
|
||||
await redis.set(_key, value, ex=ttl)
|
||||
except Exception as e:
|
||||
logger.error(f"存入redis时出错:{e}")
|
||||
|
||||
|
||||
async def get_cache(key: str, prefix: str = None) -> Optional[str | int]:
|
||||
if not key:
|
||||
return None
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
if prefix:
|
||||
key = f"{prefix}{key}"
|
||||
return await redis.get(key)
|
||||
except Exception as e:
|
||||
logger.error(f"读取redis时出错:{e}")
|
||||
|
||||
|
||||
async def exist_cache(key: str, prefix: str = None) -> bool:
|
||||
if not key:
|
||||
return False
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
if prefix:
|
||||
key = f"{prefix}{key}"
|
||||
result = await redis.exists(key)
|
||||
return result > 0
|
||||
except Exception as e:
|
||||
logger.error(f"读取redis时出错:{e}")
|
||||
return False
|
||||
|
||||
|
||||
async def remove_cache(key: str, prefix: str = None) -> None:
|
||||
if not key:
|
||||
return
|
||||
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
_key = key
|
||||
if prefix:
|
||||
_key = f"{prefix}{key}"
|
||||
await redis.srem(prefix, key)
|
||||
await redis.delete(_key)
|
||||
except Exception as e:
|
||||
logger.error(f"删除redis时出错:{e}")
|
||||
|
||||
|
||||
async def get_cache_size(prefix: str = None) -> int:
|
||||
"""根据前缀统计数量,用于统计Set集合
|
||||
"""
|
||||
try:
|
||||
async for redis in get_redis():
|
||||
return await redis.scard(prefix)
|
||||
except Exception as e:
|
||||
logger.error(f"获取队列大小时出错:{e}")
|
||||
return 0
|
||||
|
||||
|
||||
async def set_failed_domain(domain: str, expire_seconds: int = None) -> None:
|
||||
if not domain:
|
||||
return
|
||||
|
||||
try:
|
||||
await set_cache(f"{domain}", domain, ttl=expire_seconds, prefix=FAILED_DOMAINS_PREFIX)
|
||||
|
||||
logger.debug(f"已将失败域名 {domain} 存入Redis,过期时间:{expire_seconds}秒")
|
||||
except Exception as e:
|
||||
logger.error(f"将失败域名存入Redis时出错:{e}")
|
||||
|
||||
|
||||
async def is_domain_failed(domain: str) -> bool:
|
||||
if not domain:
|
||||
return False
|
||||
|
||||
try:
|
||||
return await exist_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
|
||||
except Exception as e:
|
||||
logger.error(f"检查域名是否失败时出错:{e}")
|
||||
return False
|
||||
|
||||
|
||||
async def delete_failed_domain(domain: str) -> None:
|
||||
if not domain:
|
||||
return
|
||||
|
||||
try:
|
||||
await remove_cache(domain, prefix=FAILED_DOMAINS_PREFIX)
|
||||
|
||||
logger.debug(f"已从Redis删除失败域名 {domain}")
|
||||
except Exception as e:
|
||||
logger.error(f"从Redis删除失败域名时出错:{e}")
|
14
main.py
14
main.py
|
@ -2,14 +2,12 @@
|
|||
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import Response
|
||||
|
||||
import setting
|
||||
from favicon_app.routes import favicon_router
|
||||
from favicon_app.utils import redis_pool
|
||||
from favicon_app.utils.file_util import FileUtil
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -23,18 +21,8 @@ default_icon_file = setting.default_icon_file
|
|||
# referer日志文件路径
|
||||
referer_log_file = setting.referer_log_file
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""应用级生命周期:启动/清理。"""
|
||||
logger.warning("Redis pool ready.")
|
||||
yield
|
||||
await redis_pool.pool.aclose()
|
||||
logger.warning("Redis pool closed.")
|
||||
|
||||
|
||||
# fastapi
|
||||
app = FastAPI(lifespan=lifespan, title="Favicon API", description="获取网站favicon图标", version="3.0")
|
||||
app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0")
|
||||
app.include_router(favicon_router)
|
||||
|
||||
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
--index https://mirrors.xinac.net/pypi/simple
|
||||
--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
fastapi~=0.116
|
||||
pydantic~=2.11
|
||||
pydantic_core~=2.33
|
||||
starlette~=0.47
|
||||
requests~=2.32
|
||||
aiohttp~=3.10.10
|
||||
bs4~=0.0
|
||||
beautifulsoup4~=4.13
|
||||
lxml~=6.0
|
||||
PyYAML~=6.0
|
||||
uvicorn~=0.35
|
||||
uvicorn-worker~=0.3
|
||||
gunicorn~=23.0
|
||||
redis[hiredis]~=6.4
|
||||
fastapi~=0.116.1
|
||||
pydantic~=2.11.7
|
||||
pydantic_core~=2.33.2
|
||||
starlette~=0.47.3
|
||||
requests~=2.32.5
|
||||
aiohttp~=3.12.15
|
||||
bs4~=0.0.2
|
||||
beautifulsoup4~=4.13.5
|
||||
lxml~=6.0.1
|
||||
PyYAML~=6.0.2
|
||||
uvicorn~=0.35.0
|
||||
uvicorn-worker~=0.3.0
|
||||
gunicorn~=23.0.0
|
||||
|
|
|
@ -17,9 +17,6 @@ default_icon_file = FileUtil.read_file(default_icon_path, mode='rb')
|
|||
# 定义referer日志文件路径
|
||||
referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt')
|
||||
|
||||
# 队列阈值常量配置
|
||||
MAX_QUEUE_SIZE = 10
|
||||
|
||||
# 时间常量
|
||||
time_of_1_minus = 1 * 60
|
||||
time_of_5_minus = 5 * time_of_1_minus
|
||||
|
@ -36,7 +33,3 @@ time_of_1_days = 1 * 24 * 60 * 60
|
|||
time_of_7_days = 7 * time_of_1_days
|
||||
time_of_15_days = 15 * time_of_1_days
|
||||
time_of_30_days = 30 * time_of_1_days
|
||||
|
||||
# 是否使用同步方式
|
||||
# REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://192.168.31.254:6379/0")
|
||||
|
|
Loading…
Reference in New Issue