master
jinql 2025-09-04 21:41:05 +08:00
parent ae73b2cd4a
commit 38c681b553
6 changed files with 32 additions and 156 deletions

View File

@ -17,3 +17,5 @@ __pycache__/
# 忽略指定目录 # 忽略指定目录
data/ data/
logs/
conf/

View File

@ -8,7 +8,7 @@ import yaml
bind = "0.0.0.0:8000" bind = "0.0.0.0:8000"
# Worker 进程数(推荐 CPU 核心数 * 2 + 1 # Worker 进程数(推荐 CPU 核心数 * 2 + 1
workers = 4 workers = 2
# 工作模式sync、gevent、uvicorn.workers.UvicornWorker # 工作模式sync、gevent、uvicorn.workers.UvicornWorker
worker_class = "uvicorn.workers.UvicornWorker" worker_class = "uvicorn.workers.UvicornWorker"
@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker"
log_dir = Path("logs") log_dir = Path("logs")
log_dir.mkdir(exist_ok=True) log_dir.mkdir(exist_ok=True)
# 允许来自这些IP的代理转发
forwarded_allow_ips = "*"
# 日志配置 # 日志配置
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f: with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
logconfig_dict = yaml.safe_load(f) logconfig_dict = yaml.safe_load(f)
@ -36,10 +39,10 @@ raw_env = [
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s" "UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
] ]
# 可选:超时时间(秒) # 超时时间(秒)
timeout = 120 timeout = 120
# Keep - Alive超时 # Keep-Alive超时
keepalive = 5 keepalive = 5
# 进程名ps aux 中显示) # 进程名ps aux 中显示)

View File

@ -8,7 +8,7 @@ import yaml
bind = "0.0.0.0:8000" bind = "0.0.0.0:8000"
# Worker 进程数(推荐 CPU 核心数 * 2 + 1 # Worker 进程数(推荐 CPU 核心数 * 2 + 1
workers = 4 workers = 2
# 工作模式sync、gevent、uvicorn.workers.UvicornWorker # 工作模式sync、gevent、uvicorn.workers.UvicornWorker
worker_class = "uvicorn.workers.UvicornWorker" worker_class = "uvicorn.workers.UvicornWorker"
@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker"
log_dir = Path("logs") log_dir = Path("logs")
log_dir.mkdir(exist_ok=True) log_dir.mkdir(exist_ok=True)
# 允许来自这些IP的代理转发
forwarded_allow_ips = "*"
# 日志配置 # 日志配置
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f: with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
logconfig_dict = yaml.safe_load(f) logconfig_dict = yaml.safe_load(f)
@ -36,10 +39,10 @@ raw_env = [
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s" "UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
] ]
# 可选:超时时间(秒) # 超时时间(秒)
timeout = 120 timeout = 120
# Keep - Alive超时 # Keep-Alive超时
keepalive = 5 keepalive = 5
# 进程名ps aux 中显示) # 进程名ps aux 中显示)

View File

@ -28,7 +28,6 @@ favicon_router = APIRouter(prefix="", tags=["favicon"])
@favicon_router.get('/icon/') @favicon_router.get('/icon/')
@favicon_router.get('/icon') @favicon_router.get('/icon')
@favicon_router.get('/')
async def get_favicon( async def get_favicon(
request: Request, request: Request,
bg_tasks: BackgroundTasks, bg_tasks: BackgroundTasks,
@ -54,14 +53,14 @@ async def get_count():
return _service.get_count() return _service.get_count()
@favicon_router.get('/icon/referrer', include_in_schema=False) @favicon_router.get('/icon/referer', include_in_schema=False)
async def get_referrer(): async def get_referrer():
"""获取请求来源信息""" """获取请求来源信息"""
content = 'None' content = 'None'
path = os.path.join(_icon_root_path, 'conf', 'referrer.txt') path = os.path.join(_icon_root_path, 'data', 'referer.txt')
if os.path.exists(path): if os.path.exists(path):
try: try:
content = FileUtil.read_file(path, mode='r') or 'None' content = FileUtil.read_file(path, mode='r') or 'None'
except Exception as e: except Exception as e:
logger.error(f"读取referrer文件失败: {e}") logger.error(f"读取referer文件失败: {e}")
return Response(content=content, media_type="text/plain") return Response(content=content, media_type="text/plain")

View File

@ -3,13 +3,12 @@
import hashlib import hashlib
import logging import logging
import os import os
import platform
import random import random
import re import re
import time import time
from queue import Queue from queue import Queue
from threading import Lock from threading import Lock
from typing import Optional, Tuple, Dict, Set, List from typing import Optional, Tuple, Dict, List
import bs4 import bs4
import urllib3 import urllib3
@ -22,15 +21,6 @@ from favicon_app.utils import header
from favicon_app.utils.file_util import FileUtil from favicon_app.utils.file_util import FileUtil
from favicon_app.utils.filetype import helpers, filetype from favicon_app.utils.filetype import helpers, filetype
if platform.system() == 'Windows':
import msvcrt
else:
import fcntl
# 多进程加锁
LOCKS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'data', 'locks')
os.makedirs(LOCKS_DIR, exist_ok=True)
urllib3.disable_warnings() urllib3.disable_warnings()
logging.captureWarnings(True) logging.captureWarnings(True)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -53,7 +43,6 @@ class FaviconService:
self.url_count = 0 self.url_count = 0
self.request_icon_count = 0 self.request_icon_count = 0
self.request_cache_count = 0 self.request_cache_count = 0
self.href_referrer: Set[str] = set()
self.domain_list: List[str] = list() self.domain_list: List[str] = list()
# 初始化队列 # 初始化队列
@ -275,137 +264,11 @@ class FaviconService:
return None return None
@staticmethod
def _lock_file(file_handle, lock_type='exclusive'):
"""跨平台文件锁"""
if platform.system() == 'Windows':
try:
msvcrt.locking(file_handle.fileno(), msvcrt.LK_LOCK, 1)
return True
except Exception:
time.sleep(0.01)
try:
msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1)
return True
except:
return False
else:
if lock_type == 'exclusive':
fcntl.flock(file_handle, fcntl.LOCK_EX)
else:
fcntl.flock(file_handle, fcntl.LOCK_SH)
return True
@staticmethod
def _unlock_file(file_handle):
"""释放文件锁"""
if platform.system() == 'Windows':
try:
msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
except Exception as e:
logger.error(f"释放Windows文件锁失败: {e}")
else:
try:
fcntl.flock(file_handle, fcntl.LOCK_UN)
except Exception as e:
logger.error(f"释放Unix文件锁失败: {e}")
def _get_domain_lock_path(self, domain: str) -> str:
"""获取域名对应的锁文件路径"""
domain_hash = hashlib.md5(domain.encode('utf-8')).hexdigest()
return os.path.join(LOCKS_DIR, f"{domain_hash}.lock")
def _acquire_domain_lock(self, domain: str, timeout: float = 5.0) -> Optional[str]:
"""获取域名锁防止多进程同时获取同一个域名的favicon"""
lock_path = self._get_domain_lock_path(domain)
start_time = time.time()
while time.time() - start_time < timeout:
try:
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
os.close(fd)
return lock_path
except FileExistsError:
time.sleep(0.1)
logger.warning(f"获取域名锁超时: {domain}")
return None
def _release_domain_lock(self, lock_path: str) -> None:
"""释放域名锁"""
try:
if os.path.exists(lock_path):
os.remove(lock_path)
except Exception as e:
logger.error(f"释放锁文件失败 {lock_path}: {e}")
async def _referer(self, req: Request) -> None:
"""记录请求来源"""
_referrer = req.headers.get('referrer') or req.headers.get('referer')
if _referrer:
logger.debug(f"-> Referrer: {_referrer}")
_path = os.path.join(icon_root_path, 'conf', 'referrer.txt')
os.makedirs(os.path.dirname(_path), exist_ok=True)
try:
if _referrer in self.href_referrer:
return
with open(_path, 'a+', encoding='utf-8') as f:
try:
locked = self._lock_file(f, 'exclusive')
if not locked:
logger.warning(f"无法获取文件锁跳过referrer记录: {_referrer}")
return
f.seek(0)
existing_referrers = {line.strip() for line in f.readlines()}
if _referrer not in existing_referrers:
f.seek(0, os.SEEK_END)
f.write(f'{_referrer}\n')
f.flush()
if platform.system() != 'Windows':
os.fsync(f.fileno())
logger.debug(f"成功添加新referrer: {_referrer}")
self.href_referrer.add(_referrer)
else:
if _referrer not in self.href_referrer:
self.href_referrer.add(_referrer)
finally:
self._unlock_file(f)
except Exception as e:
logger.error(f"处理referrer文件失败: {e}")
if len(self.href_referrer) > 1000 or random.random() < 0.01:
await self._refresh_referrer_cache(_path)
async def _refresh_referrer_cache(self, file_path: str) -> None:
"""刷新内存中的referrer缓存"""
try:
if os.path.exists(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
try:
locked = self._lock_file(f, 'shared')
if locked:
self.href_referrer = {line.strip() for line in f.readlines() if line.strip()}
finally:
self._unlock_file(f)
except Exception as e:
logger.error(f"刷新referrer缓存失败: {e}")
def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]: def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
"""同步获取图标""" """同步获取图标"""
domain_lock = None
icon_content = None icon_content = None
try: try:
domain_lock = self._acquire_domain_lock(entity.domain)
if not domain_lock:
logger.warning(f"无法获取域名锁,跳过获取图标: {entity.domain}")
return _cached or default_icon_content
with self._lock: with self._lock:
if entity.domain in self.domain_list: if entity.domain in self.domain_list:
@ -474,9 +337,6 @@ class FaviconService:
logger.error(f"获取图标时发生错误 {entity.domain}: {e}") logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
return _cached or default_icon_content return _cached or default_icon_content
finally: finally:
if domain_lock:
self._release_domain_lock(domain_lock)
with self._lock: with self._lock:
if entity.domain in self.domain_list: if entity.domain in self.domain_list:
self.domain_list.remove(entity.domain) self.domain_list.remove(entity.domain)
@ -491,7 +351,6 @@ class FaviconService:
'request_cache_count': self.request_cache_count, 'request_cache_count': self.request_cache_count,
'queue_size': self.icon_queue.qsize(), 'queue_size': self.icon_queue.qsize(),
'total_queue_size': self.total_queue.qsize(), 'total_queue_size': self.total_queue.qsize(),
'href_referrer': len(self.href_referrer),
} }
async def get_favicon_handler( async def get_favicon_handler(
@ -519,9 +378,6 @@ class FaviconService:
logger.warning(f"无效的URL: {url}") logger.warning(f"无效的URL: {url}")
return self.get_default(self.time_of_7_days) return self.get_default(self.time_of_7_days)
# 检测并记录referer
await self._referer(request)
# 检查缓存 # 检查缓存
_cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True']) _cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True'])

15
main.py
View File

@ -4,7 +4,7 @@ import logging
import os import os
import sys import sys
from fastapi import FastAPI from fastapi import FastAPI, Request
from fastapi.responses import Response from fastapi.responses import Response
from favicon_app.routes import favicon_router from favicon_app.routes import favicon_router
@ -20,11 +20,24 @@ favicon_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.ico'),
# 默认的站点图标 # 默认的站点图标
default_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.png'), mode='rb') default_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.png'), mode='rb')
# 定义referer日志文件路径
referer_log_file = os.path.join(current_dir, 'data', 'referer.txt')
# fastapi # fastapi
app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0.0") app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0.0")
app.include_router(favicon_router) app.include_router(favicon_router)
@app.middleware("http")
async def log_referer(request: Request, call_next):
referer = request.headers.get('referrer') or request.headers.get('referer')
if referer:
FileUtil.write_file(referer_log_file, referer, mode='a', atomic=True)
response = await call_next(request)
return response
# @app.get("/") # @app.get("/")
async def root(): async def root():
return {"message": "Welcome to Favicon API! Use /icon/?url=example.com to get favicon."} return {"message": "Welcome to Favicon API! Use /icon/?url=example.com to get favicon."}