master
jinql 2025-09-04 21:41:05 +08:00
parent ae73b2cd4a
commit 38c681b553
6 changed files with 32 additions and 156 deletions

View File

@ -17,3 +17,5 @@ __pycache__/
# 忽略指定目录
data/
logs/
conf/

View File

@ -8,7 +8,7 @@ import yaml
bind = "0.0.0.0:8000"
# Worker 进程数(推荐 CPU 核心数 * 2 + 1
workers = 4
workers = 2
# 工作模式sync、gevent、uvicorn.workers.UvicornWorker
worker_class = "uvicorn.workers.UvicornWorker"
@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker"
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
# 允许来自这些IP的代理转发
forwarded_allow_ips = "*"
# 日志配置
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
logconfig_dict = yaml.safe_load(f)
@ -36,10 +39,10 @@ raw_env = [
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
]
# 可选:超时时间(秒)
# 超时时间(秒)
timeout = 120
# Keep - Alive超时
# Keep-Alive超时
keepalive = 5
# 进程名ps aux 中显示)

View File

@ -8,7 +8,7 @@ import yaml
bind = "0.0.0.0:8000"
# Worker 进程数(推荐 CPU 核心数 * 2 + 1
workers = 4
workers = 2
# 工作模式sync、gevent、uvicorn.workers.UvicornWorker
worker_class = "uvicorn.workers.UvicornWorker"
@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker"
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
# 允许来自这些IP的代理转发
forwarded_allow_ips = "*"
# 日志配置
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
logconfig_dict = yaml.safe_load(f)
@ -36,10 +39,10 @@ raw_env = [
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
]
# 可选:超时时间(秒)
# 超时时间(秒)
timeout = 120
# Keep - Alive超时
# Keep-Alive超时
keepalive = 5
# 进程名ps aux 中显示)

View File

@ -28,7 +28,6 @@ favicon_router = APIRouter(prefix="", tags=["favicon"])
@favicon_router.get('/icon/')
@favicon_router.get('/icon')
@favicon_router.get('/')
async def get_favicon(
request: Request,
bg_tasks: BackgroundTasks,
@ -54,14 +53,14 @@ async def get_count():
return _service.get_count()
@favicon_router.get('/icon/referrer', include_in_schema=False)
@favicon_router.get('/icon/referer', include_in_schema=False)
async def get_referrer():
"""获取请求来源信息"""
content = 'None'
path = os.path.join(_icon_root_path, 'conf', 'referrer.txt')
path = os.path.join(_icon_root_path, 'data', 'referer.txt')
if os.path.exists(path):
try:
content = FileUtil.read_file(path, mode='r') or 'None'
except Exception as e:
logger.error(f"读取referrer文件失败: {e}")
logger.error(f"读取referer文件失败: {e}")
return Response(content=content, media_type="text/plain")

View File

@ -3,13 +3,12 @@
import hashlib
import logging
import os
import platform
import random
import re
import time
from queue import Queue
from threading import Lock
from typing import Optional, Tuple, Dict, Set, List
from typing import Optional, Tuple, Dict, List
import bs4
import urllib3
@ -22,15 +21,6 @@ from favicon_app.utils import header
from favicon_app.utils.file_util import FileUtil
from favicon_app.utils.filetype import helpers, filetype
if platform.system() == 'Windows':
import msvcrt
else:
import fcntl
# 多进程加锁
LOCKS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'data', 'locks')
os.makedirs(LOCKS_DIR, exist_ok=True)
urllib3.disable_warnings()
logging.captureWarnings(True)
logger = logging.getLogger(__name__)
@ -53,7 +43,6 @@ class FaviconService:
self.url_count = 0
self.request_icon_count = 0
self.request_cache_count = 0
self.href_referrer: Set[str] = set()
self.domain_list: List[str] = list()
# 初始化队列
@ -275,137 +264,11 @@ class FaviconService:
return None
@staticmethod
def _lock_file(file_handle, lock_type='exclusive'):
"""跨平台文件锁"""
if platform.system() == 'Windows':
try:
msvcrt.locking(file_handle.fileno(), msvcrt.LK_LOCK, 1)
return True
except Exception:
time.sleep(0.01)
try:
msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1)
return True
except:
return False
else:
if lock_type == 'exclusive':
fcntl.flock(file_handle, fcntl.LOCK_EX)
else:
fcntl.flock(file_handle, fcntl.LOCK_SH)
return True
@staticmethod
def _unlock_file(file_handle):
"""释放文件锁"""
if platform.system() == 'Windows':
try:
msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
except Exception as e:
logger.error(f"释放Windows文件锁失败: {e}")
else:
try:
fcntl.flock(file_handle, fcntl.LOCK_UN)
except Exception as e:
logger.error(f"释放Unix文件锁失败: {e}")
def _get_domain_lock_path(self, domain: str) -> str:
"""获取域名对应的锁文件路径"""
domain_hash = hashlib.md5(domain.encode('utf-8')).hexdigest()
return os.path.join(LOCKS_DIR, f"{domain_hash}.lock")
def _acquire_domain_lock(self, domain: str, timeout: float = 5.0) -> Optional[str]:
"""获取域名锁防止多进程同时获取同一个域名的favicon"""
lock_path = self._get_domain_lock_path(domain)
start_time = time.time()
while time.time() - start_time < timeout:
try:
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
os.close(fd)
return lock_path
except FileExistsError:
time.sleep(0.1)
logger.warning(f"获取域名锁超时: {domain}")
return None
def _release_domain_lock(self, lock_path: str) -> None:
"""释放域名锁"""
try:
if os.path.exists(lock_path):
os.remove(lock_path)
except Exception as e:
logger.error(f"释放锁文件失败 {lock_path}: {e}")
async def _referer(self, req: Request) -> None:
"""记录请求来源"""
_referrer = req.headers.get('referrer') or req.headers.get('referer')
if _referrer:
logger.debug(f"-> Referrer: {_referrer}")
_path = os.path.join(icon_root_path, 'conf', 'referrer.txt')
os.makedirs(os.path.dirname(_path), exist_ok=True)
try:
if _referrer in self.href_referrer:
return
with open(_path, 'a+', encoding='utf-8') as f:
try:
locked = self._lock_file(f, 'exclusive')
if not locked:
logger.warning(f"无法获取文件锁跳过referrer记录: {_referrer}")
return
f.seek(0)
existing_referrers = {line.strip() for line in f.readlines()}
if _referrer not in existing_referrers:
f.seek(0, os.SEEK_END)
f.write(f'{_referrer}\n')
f.flush()
if platform.system() != 'Windows':
os.fsync(f.fileno())
logger.debug(f"成功添加新referrer: {_referrer}")
self.href_referrer.add(_referrer)
else:
if _referrer not in self.href_referrer:
self.href_referrer.add(_referrer)
finally:
self._unlock_file(f)
except Exception as e:
logger.error(f"处理referrer文件失败: {e}")
if len(self.href_referrer) > 1000 or random.random() < 0.01:
await self._refresh_referrer_cache(_path)
async def _refresh_referrer_cache(self, file_path: str) -> None:
"""刷新内存中的referrer缓存"""
try:
if os.path.exists(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
try:
locked = self._lock_file(f, 'shared')
if locked:
self.href_referrer = {line.strip() for line in f.readlines() if line.strip()}
finally:
self._unlock_file(f)
except Exception as e:
logger.error(f"刷新referrer缓存失败: {e}")
def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
"""同步获取图标"""
domain_lock = None
icon_content = None
try:
domain_lock = self._acquire_domain_lock(entity.domain)
if not domain_lock:
logger.warning(f"无法获取域名锁,跳过获取图标: {entity.domain}")
return _cached or default_icon_content
with self._lock:
if entity.domain in self.domain_list:
@ -474,9 +337,6 @@ class FaviconService:
logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
return _cached or default_icon_content
finally:
if domain_lock:
self._release_domain_lock(domain_lock)
with self._lock:
if entity.domain in self.domain_list:
self.domain_list.remove(entity.domain)
@ -491,7 +351,6 @@ class FaviconService:
'request_cache_count': self.request_cache_count,
'queue_size': self.icon_queue.qsize(),
'total_queue_size': self.total_queue.qsize(),
'href_referrer': len(self.href_referrer),
}
async def get_favicon_handler(
@ -519,9 +378,6 @@ class FaviconService:
logger.warning(f"无效的URL: {url}")
return self.get_default(self.time_of_7_days)
# 检测并记录referer
await self._referer(request)
# 检查缓存
_cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True'])

15
main.py
View File

@ -4,7 +4,7 @@ import logging
import os
import sys
from fastapi import FastAPI
from fastapi import FastAPI, Request
from fastapi.responses import Response
from favicon_app.routes import favicon_router
@ -20,11 +20,24 @@ favicon_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.ico'),
# 默认的站点图标
default_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.png'), mode='rb')
# 定义referer日志文件路径
referer_log_file = os.path.join(current_dir, 'data', 'referer.txt')
# fastapi
app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0.0")
app.include_router(favicon_router)
@app.middleware("http")
async def log_referer(request: Request, call_next):
referer = request.headers.get('referrer') or request.headers.get('referer')
if referer:
FileUtil.write_file(referer_log_file, referer, mode='a', atomic=True)
response = await call_next(request)
return response
# @app.get("/")
async def root():
return {"message": "Welcome to Favicon API! Use /icon/?url=example.com to get favicon."}