diff --git a/.dockerignore b/.dockerignore index ec8daad..c08a4f3 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,3 +17,5 @@ __pycache__/ # 忽略指定目录 data/ +logs/ +conf/ diff --git a/conf.default/gunicorn_conf_py b/conf.default/gunicorn_conf_py index 389fafd..6ef9c56 100644 --- a/conf.default/gunicorn_conf_py +++ b/conf.default/gunicorn_conf_py @@ -8,7 +8,7 @@ import yaml bind = "0.0.0.0:8000" # Worker 进程数(推荐 CPU 核心数 * 2 + 1) -workers = 4 +workers = 2 # 工作模式(sync、gevent、uvicorn.workers.UvicornWorker) worker_class = "uvicorn.workers.UvicornWorker" @@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker" log_dir = Path("logs") log_dir.mkdir(exist_ok=True) +# 允许来自这些IP的代理转发 +forwarded_allow_ips = "*" + # 日志配置 with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f: logconfig_dict = yaml.safe_load(f) @@ -36,10 +39,10 @@ raw_env = [ "UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s" ] -# 可选:超时时间(秒) +# 超时时间(秒) timeout = 120 -# Keep - Alive超时 +# Keep-Alive超时 keepalive = 5 # 进程名(ps aux 中显示) diff --git a/conf/gunicorn.conf.py b/conf/gunicorn.conf.py index 389fafd..6ef9c56 100644 --- a/conf/gunicorn.conf.py +++ b/conf/gunicorn.conf.py @@ -8,7 +8,7 @@ import yaml bind = "0.0.0.0:8000" # Worker 进程数(推荐 CPU 核心数 * 2 + 1) -workers = 4 +workers = 2 # 工作模式(sync、gevent、uvicorn.workers.UvicornWorker) worker_class = "uvicorn.workers.UvicornWorker" @@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker" log_dir = Path("logs") log_dir.mkdir(exist_ok=True) +# 允许来自这些IP的代理转发 +forwarded_allow_ips = "*" + # 日志配置 with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f: logconfig_dict = yaml.safe_load(f) @@ -36,10 +39,10 @@ raw_env = [ "UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s" ] -# 可选:超时时间(秒) +# 超时时间(秒) timeout = 120 -# Keep - Alive超时 +# Keep-Alive超时 keepalive = 5 # 进程名(ps aux 中显示) diff --git a/favicon_app/routes/favicon_routes.py b/favicon_app/routes/favicon_routes.py index 11cd0c8..e26b61c 100644 --- a/favicon_app/routes/favicon_routes.py +++ b/favicon_app/routes/favicon_routes.py @@ -28,7 +28,6 @@ favicon_router = APIRouter(prefix="", tags=["favicon"]) @favicon_router.get('/icon/') @favicon_router.get('/icon') -@favicon_router.get('/') async def get_favicon( request: Request, bg_tasks: BackgroundTasks, @@ -54,14 +53,14 @@ async def get_count(): return _service.get_count() -@favicon_router.get('/icon/referrer', include_in_schema=False) +@favicon_router.get('/icon/referer', include_in_schema=False) async def get_referrer(): """获取请求来源信息""" content = 'None' - path = os.path.join(_icon_root_path, 'conf', 'referrer.txt') + path = os.path.join(_icon_root_path, 'data', 'referer.txt') if os.path.exists(path): try: content = FileUtil.read_file(path, mode='r') or 'None' except Exception as e: - logger.error(f"读取referrer文件失败: {e}") + logger.error(f"读取referer文件失败: {e}") return Response(content=content, media_type="text/plain") diff --git a/favicon_app/routes/favicon_service.py b/favicon_app/routes/favicon_service.py index c68ea52..f04a72a 100644 --- a/favicon_app/routes/favicon_service.py +++ b/favicon_app/routes/favicon_service.py @@ -3,13 +3,12 @@ import hashlib import logging import os -import platform import random import re import time from queue import Queue from threading import Lock -from typing import Optional, Tuple, Dict, Set, List +from typing import Optional, Tuple, Dict, List import bs4 import urllib3 @@ -22,15 +21,6 @@ from favicon_app.utils import header from favicon_app.utils.file_util import FileUtil from favicon_app.utils.filetype import helpers, filetype -if platform.system() == 'Windows': - import msvcrt -else: - import fcntl - -# 多进程加锁 -LOCKS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'data', 'locks') -os.makedirs(LOCKS_DIR, exist_ok=True) - urllib3.disable_warnings() logging.captureWarnings(True) logger = logging.getLogger(__name__) @@ -53,7 +43,6 @@ class FaviconService: self.url_count = 0 self.request_icon_count = 0 self.request_cache_count = 0 - self.href_referrer: Set[str] = set() self.domain_list: List[str] = list() # 初始化队列 @@ -275,137 +264,11 @@ class FaviconService: return None - @staticmethod - def _lock_file(file_handle, lock_type='exclusive'): - """跨平台文件锁""" - if platform.system() == 'Windows': - try: - msvcrt.locking(file_handle.fileno(), msvcrt.LK_LOCK, 1) - return True - except Exception: - time.sleep(0.01) - try: - msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1) - return True - except: - return False - else: - if lock_type == 'exclusive': - fcntl.flock(file_handle, fcntl.LOCK_EX) - else: - fcntl.flock(file_handle, fcntl.LOCK_SH) - return True - - @staticmethod - def _unlock_file(file_handle): - """释放文件锁""" - if platform.system() == 'Windows': - try: - msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1) - except Exception as e: - logger.error(f"释放Windows文件锁失败: {e}") - else: - try: - fcntl.flock(file_handle, fcntl.LOCK_UN) - except Exception as e: - logger.error(f"释放Unix文件锁失败: {e}") - - def _get_domain_lock_path(self, domain: str) -> str: - """获取域名对应的锁文件路径""" - domain_hash = hashlib.md5(domain.encode('utf-8')).hexdigest() - return os.path.join(LOCKS_DIR, f"{domain_hash}.lock") - - def _acquire_domain_lock(self, domain: str, timeout: float = 5.0) -> Optional[str]: - """获取域名锁,防止多进程同时获取同一个域名的favicon""" - lock_path = self._get_domain_lock_path(domain) - start_time = time.time() - - while time.time() - start_time < timeout: - try: - fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY) - os.close(fd) - return lock_path - except FileExistsError: - time.sleep(0.1) - - logger.warning(f"获取域名锁超时: {domain}") - return None - - def _release_domain_lock(self, lock_path: str) -> None: - """释放域名锁""" - try: - if os.path.exists(lock_path): - os.remove(lock_path) - except Exception as e: - logger.error(f"释放锁文件失败 {lock_path}: {e}") - - async def _referer(self, req: Request) -> None: - """记录请求来源""" - _referrer = req.headers.get('referrer') or req.headers.get('referer') - - if _referrer: - logger.debug(f"-> Referrer: {_referrer}") - - _path = os.path.join(icon_root_path, 'conf', 'referrer.txt') - os.makedirs(os.path.dirname(_path), exist_ok=True) - - try: - if _referrer in self.href_referrer: - return - - with open(_path, 'a+', encoding='utf-8') as f: - try: - locked = self._lock_file(f, 'exclusive') - if not locked: - logger.warning(f"无法获取文件锁,跳过referrer记录: {_referrer}") - return - - f.seek(0) - existing_referrers = {line.strip() for line in f.readlines()} - - if _referrer not in existing_referrers: - f.seek(0, os.SEEK_END) - f.write(f'{_referrer}\n') - f.flush() - if platform.system() != 'Windows': - os.fsync(f.fileno()) - logger.debug(f"成功添加新referrer: {_referrer}") - self.href_referrer.add(_referrer) - else: - if _referrer not in self.href_referrer: - self.href_referrer.add(_referrer) - finally: - self._unlock_file(f) - except Exception as e: - logger.error(f"处理referrer文件失败: {e}") - - if len(self.href_referrer) > 1000 or random.random() < 0.01: - await self._refresh_referrer_cache(_path) - - async def _refresh_referrer_cache(self, file_path: str) -> None: - """刷新内存中的referrer缓存""" - try: - if os.path.exists(file_path): - with open(file_path, 'r', encoding='utf-8') as f: - try: - locked = self._lock_file(f, 'shared') - if locked: - self.href_referrer = {line.strip() for line in f.readlines() if line.strip()} - finally: - self._unlock_file(f) - except Exception as e: - logger.error(f"刷新referrer缓存失败: {e}") - def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]: """同步获取图标""" - domain_lock = None icon_content = None try: - domain_lock = self._acquire_domain_lock(entity.domain) - if not domain_lock: - logger.warning(f"无法获取域名锁,跳过获取图标: {entity.domain}") - return _cached or default_icon_content with self._lock: if entity.domain in self.domain_list: @@ -474,9 +337,6 @@ class FaviconService: logger.error(f"获取图标时发生错误 {entity.domain}: {e}") return _cached or default_icon_content finally: - if domain_lock: - self._release_domain_lock(domain_lock) - with self._lock: if entity.domain in self.domain_list: self.domain_list.remove(entity.domain) @@ -491,7 +351,6 @@ class FaviconService: 'request_cache_count': self.request_cache_count, 'queue_size': self.icon_queue.qsize(), 'total_queue_size': self.total_queue.qsize(), - 'href_referrer': len(self.href_referrer), } async def get_favicon_handler( @@ -519,9 +378,6 @@ class FaviconService: logger.warning(f"无效的URL: {url}") return self.get_default(self.time_of_7_days) - # 检测并记录referer - await self._referer(request) - # 检查缓存 _cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True']) diff --git a/main.py b/main.py index 864edb5..9a291f7 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ import logging import os import sys -from fastapi import FastAPI +from fastapi import FastAPI, Request from fastapi.responses import Response from favicon_app.routes import favicon_router @@ -20,11 +20,24 @@ favicon_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.ico'), # 默认的站点图标 default_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.png'), mode='rb') +# 定义referer日志文件路径 +referer_log_file = os.path.join(current_dir, 'data', 'referer.txt') + # fastapi app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0.0") app.include_router(favicon_router) +@app.middleware("http") +async def log_referer(request: Request, call_next): + referer = request.headers.get('referrer') or request.headers.get('referer') + if referer: + FileUtil.write_file(referer_log_file, referer, mode='a', atomic=True) + response = await call_next(request) + + return response + + # @app.get("/") async def root(): return {"message": "Welcome to Favicon API! Use /icon/?url=example.com to get favicon."}