25.09.04
parent
ae73b2cd4a
commit
38c681b553
|
@ -17,3 +17,5 @@ __pycache__/
|
||||||
|
|
||||||
# 忽略指定目录
|
# 忽略指定目录
|
||||||
data/
|
data/
|
||||||
|
logs/
|
||||||
|
conf/
|
||||||
|
|
|
@ -8,7 +8,7 @@ import yaml
|
||||||
bind = "0.0.0.0:8000"
|
bind = "0.0.0.0:8000"
|
||||||
|
|
||||||
# Worker 进程数(推荐 CPU 核心数 * 2 + 1)
|
# Worker 进程数(推荐 CPU 核心数 * 2 + 1)
|
||||||
workers = 4
|
workers = 2
|
||||||
|
|
||||||
# 工作模式(sync、gevent、uvicorn.workers.UvicornWorker)
|
# 工作模式(sync、gevent、uvicorn.workers.UvicornWorker)
|
||||||
worker_class = "uvicorn.workers.UvicornWorker"
|
worker_class = "uvicorn.workers.UvicornWorker"
|
||||||
|
@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker"
|
||||||
log_dir = Path("logs")
|
log_dir = Path("logs")
|
||||||
log_dir.mkdir(exist_ok=True)
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# 允许来自这些IP的代理转发
|
||||||
|
forwarded_allow_ips = "*"
|
||||||
|
|
||||||
# 日志配置
|
# 日志配置
|
||||||
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
|
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
|
||||||
logconfig_dict = yaml.safe_load(f)
|
logconfig_dict = yaml.safe_load(f)
|
||||||
|
@ -36,10 +39,10 @@ raw_env = [
|
||||||
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
|
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
|
||||||
]
|
]
|
||||||
|
|
||||||
# 可选:超时时间(秒)
|
# 超时时间(秒)
|
||||||
timeout = 120
|
timeout = 120
|
||||||
|
|
||||||
# Keep - Alive超时
|
# Keep-Alive超时
|
||||||
keepalive = 5
|
keepalive = 5
|
||||||
|
|
||||||
# 进程名(ps aux 中显示)
|
# 进程名(ps aux 中显示)
|
||||||
|
|
|
@ -8,7 +8,7 @@ import yaml
|
||||||
bind = "0.0.0.0:8000"
|
bind = "0.0.0.0:8000"
|
||||||
|
|
||||||
# Worker 进程数(推荐 CPU 核心数 * 2 + 1)
|
# Worker 进程数(推荐 CPU 核心数 * 2 + 1)
|
||||||
workers = 4
|
workers = 2
|
||||||
|
|
||||||
# 工作模式(sync、gevent、uvicorn.workers.UvicornWorker)
|
# 工作模式(sync、gevent、uvicorn.workers.UvicornWorker)
|
||||||
worker_class = "uvicorn.workers.UvicornWorker"
|
worker_class = "uvicorn.workers.UvicornWorker"
|
||||||
|
@ -17,6 +17,9 @@ worker_class = "uvicorn.workers.UvicornWorker"
|
||||||
log_dir = Path("logs")
|
log_dir = Path("logs")
|
||||||
log_dir.mkdir(exist_ok=True)
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# 允许来自这些IP的代理转发
|
||||||
|
forwarded_allow_ips = "*"
|
||||||
|
|
||||||
# 日志配置
|
# 日志配置
|
||||||
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
|
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
|
||||||
logconfig_dict = yaml.safe_load(f)
|
logconfig_dict = yaml.safe_load(f)
|
||||||
|
@ -36,10 +39,10 @@ raw_env = [
|
||||||
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
|
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
|
||||||
]
|
]
|
||||||
|
|
||||||
# 可选:超时时间(秒)
|
# 超时时间(秒)
|
||||||
timeout = 120
|
timeout = 120
|
||||||
|
|
||||||
# Keep - Alive超时
|
# Keep-Alive超时
|
||||||
keepalive = 5
|
keepalive = 5
|
||||||
|
|
||||||
# 进程名(ps aux 中显示)
|
# 进程名(ps aux 中显示)
|
||||||
|
|
|
@ -28,7 +28,6 @@ favicon_router = APIRouter(prefix="", tags=["favicon"])
|
||||||
|
|
||||||
@favicon_router.get('/icon/')
|
@favicon_router.get('/icon/')
|
||||||
@favicon_router.get('/icon')
|
@favicon_router.get('/icon')
|
||||||
@favicon_router.get('/')
|
|
||||||
async def get_favicon(
|
async def get_favicon(
|
||||||
request: Request,
|
request: Request,
|
||||||
bg_tasks: BackgroundTasks,
|
bg_tasks: BackgroundTasks,
|
||||||
|
@ -54,14 +53,14 @@ async def get_count():
|
||||||
return _service.get_count()
|
return _service.get_count()
|
||||||
|
|
||||||
|
|
||||||
@favicon_router.get('/icon/referrer', include_in_schema=False)
|
@favicon_router.get('/icon/referer', include_in_schema=False)
|
||||||
async def get_referrer():
|
async def get_referrer():
|
||||||
"""获取请求来源信息"""
|
"""获取请求来源信息"""
|
||||||
content = 'None'
|
content = 'None'
|
||||||
path = os.path.join(_icon_root_path, 'conf', 'referrer.txt')
|
path = os.path.join(_icon_root_path, 'data', 'referer.txt')
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
try:
|
try:
|
||||||
content = FileUtil.read_file(path, mode='r') or 'None'
|
content = FileUtil.read_file(path, mode='r') or 'None'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"读取referrer文件失败: {e}")
|
logger.error(f"读取referer文件失败: {e}")
|
||||||
return Response(content=content, media_type="text/plain")
|
return Response(content=content, media_type="text/plain")
|
||||||
|
|
|
@ -3,13 +3,12 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import platform
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
from typing import Optional, Tuple, Dict, Set, List
|
from typing import Optional, Tuple, Dict, List
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import urllib3
|
import urllib3
|
||||||
|
@ -22,15 +21,6 @@ from favicon_app.utils import header
|
||||||
from favicon_app.utils.file_util import FileUtil
|
from favicon_app.utils.file_util import FileUtil
|
||||||
from favicon_app.utils.filetype import helpers, filetype
|
from favicon_app.utils.filetype import helpers, filetype
|
||||||
|
|
||||||
if platform.system() == 'Windows':
|
|
||||||
import msvcrt
|
|
||||||
else:
|
|
||||||
import fcntl
|
|
||||||
|
|
||||||
# 多进程加锁
|
|
||||||
LOCKS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'data', 'locks')
|
|
||||||
os.makedirs(LOCKS_DIR, exist_ok=True)
|
|
||||||
|
|
||||||
urllib3.disable_warnings()
|
urllib3.disable_warnings()
|
||||||
logging.captureWarnings(True)
|
logging.captureWarnings(True)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -53,7 +43,6 @@ class FaviconService:
|
||||||
self.url_count = 0
|
self.url_count = 0
|
||||||
self.request_icon_count = 0
|
self.request_icon_count = 0
|
||||||
self.request_cache_count = 0
|
self.request_cache_count = 0
|
||||||
self.href_referrer: Set[str] = set()
|
|
||||||
self.domain_list: List[str] = list()
|
self.domain_list: List[str] = list()
|
||||||
|
|
||||||
# 初始化队列
|
# 初始化队列
|
||||||
|
@ -275,137 +264,11 @@ class FaviconService:
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _lock_file(file_handle, lock_type='exclusive'):
|
|
||||||
"""跨平台文件锁"""
|
|
||||||
if platform.system() == 'Windows':
|
|
||||||
try:
|
|
||||||
msvcrt.locking(file_handle.fileno(), msvcrt.LK_LOCK, 1)
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
time.sleep(0.01)
|
|
||||||
try:
|
|
||||||
msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1)
|
|
||||||
return True
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
if lock_type == 'exclusive':
|
|
||||||
fcntl.flock(file_handle, fcntl.LOCK_EX)
|
|
||||||
else:
|
|
||||||
fcntl.flock(file_handle, fcntl.LOCK_SH)
|
|
||||||
return True
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _unlock_file(file_handle):
|
|
||||||
"""释放文件锁"""
|
|
||||||
if platform.system() == 'Windows':
|
|
||||||
try:
|
|
||||||
msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"释放Windows文件锁失败: {e}")
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
fcntl.flock(file_handle, fcntl.LOCK_UN)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"释放Unix文件锁失败: {e}")
|
|
||||||
|
|
||||||
def _get_domain_lock_path(self, domain: str) -> str:
|
|
||||||
"""获取域名对应的锁文件路径"""
|
|
||||||
domain_hash = hashlib.md5(domain.encode('utf-8')).hexdigest()
|
|
||||||
return os.path.join(LOCKS_DIR, f"{domain_hash}.lock")
|
|
||||||
|
|
||||||
def _acquire_domain_lock(self, domain: str, timeout: float = 5.0) -> Optional[str]:
|
|
||||||
"""获取域名锁,防止多进程同时获取同一个域名的favicon"""
|
|
||||||
lock_path = self._get_domain_lock_path(domain)
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
while time.time() - start_time < timeout:
|
|
||||||
try:
|
|
||||||
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
||||||
os.close(fd)
|
|
||||||
return lock_path
|
|
||||||
except FileExistsError:
|
|
||||||
time.sleep(0.1)
|
|
||||||
|
|
||||||
logger.warning(f"获取域名锁超时: {domain}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _release_domain_lock(self, lock_path: str) -> None:
|
|
||||||
"""释放域名锁"""
|
|
||||||
try:
|
|
||||||
if os.path.exists(lock_path):
|
|
||||||
os.remove(lock_path)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"释放锁文件失败 {lock_path}: {e}")
|
|
||||||
|
|
||||||
async def _referer(self, req: Request) -> None:
|
|
||||||
"""记录请求来源"""
|
|
||||||
_referrer = req.headers.get('referrer') or req.headers.get('referer')
|
|
||||||
|
|
||||||
if _referrer:
|
|
||||||
logger.debug(f"-> Referrer: {_referrer}")
|
|
||||||
|
|
||||||
_path = os.path.join(icon_root_path, 'conf', 'referrer.txt')
|
|
||||||
os.makedirs(os.path.dirname(_path), exist_ok=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if _referrer in self.href_referrer:
|
|
||||||
return
|
|
||||||
|
|
||||||
with open(_path, 'a+', encoding='utf-8') as f:
|
|
||||||
try:
|
|
||||||
locked = self._lock_file(f, 'exclusive')
|
|
||||||
if not locked:
|
|
||||||
logger.warning(f"无法获取文件锁,跳过referrer记录: {_referrer}")
|
|
||||||
return
|
|
||||||
|
|
||||||
f.seek(0)
|
|
||||||
existing_referrers = {line.strip() for line in f.readlines()}
|
|
||||||
|
|
||||||
if _referrer not in existing_referrers:
|
|
||||||
f.seek(0, os.SEEK_END)
|
|
||||||
f.write(f'{_referrer}\n')
|
|
||||||
f.flush()
|
|
||||||
if platform.system() != 'Windows':
|
|
||||||
os.fsync(f.fileno())
|
|
||||||
logger.debug(f"成功添加新referrer: {_referrer}")
|
|
||||||
self.href_referrer.add(_referrer)
|
|
||||||
else:
|
|
||||||
if _referrer not in self.href_referrer:
|
|
||||||
self.href_referrer.add(_referrer)
|
|
||||||
finally:
|
|
||||||
self._unlock_file(f)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"处理referrer文件失败: {e}")
|
|
||||||
|
|
||||||
if len(self.href_referrer) > 1000 or random.random() < 0.01:
|
|
||||||
await self._refresh_referrer_cache(_path)
|
|
||||||
|
|
||||||
async def _refresh_referrer_cache(self, file_path: str) -> None:
|
|
||||||
"""刷新内存中的referrer缓存"""
|
|
||||||
try:
|
|
||||||
if os.path.exists(file_path):
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
|
||||||
try:
|
|
||||||
locked = self._lock_file(f, 'shared')
|
|
||||||
if locked:
|
|
||||||
self.href_referrer = {line.strip() for line in f.readlines() if line.strip()}
|
|
||||||
finally:
|
|
||||||
self._unlock_file(f)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"刷新referrer缓存失败: {e}")
|
|
||||||
|
|
||||||
def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
|
def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
|
||||||
"""同步获取图标"""
|
"""同步获取图标"""
|
||||||
domain_lock = None
|
|
||||||
icon_content = None
|
icon_content = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
domain_lock = self._acquire_domain_lock(entity.domain)
|
|
||||||
if not domain_lock:
|
|
||||||
logger.warning(f"无法获取域名锁,跳过获取图标: {entity.domain}")
|
|
||||||
return _cached or default_icon_content
|
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if entity.domain in self.domain_list:
|
if entity.domain in self.domain_list:
|
||||||
|
@ -474,9 +337,6 @@ class FaviconService:
|
||||||
logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
|
logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
|
||||||
return _cached or default_icon_content
|
return _cached or default_icon_content
|
||||||
finally:
|
finally:
|
||||||
if domain_lock:
|
|
||||||
self._release_domain_lock(domain_lock)
|
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if entity.domain in self.domain_list:
|
if entity.domain in self.domain_list:
|
||||||
self.domain_list.remove(entity.domain)
|
self.domain_list.remove(entity.domain)
|
||||||
|
@ -491,7 +351,6 @@ class FaviconService:
|
||||||
'request_cache_count': self.request_cache_count,
|
'request_cache_count': self.request_cache_count,
|
||||||
'queue_size': self.icon_queue.qsize(),
|
'queue_size': self.icon_queue.qsize(),
|
||||||
'total_queue_size': self.total_queue.qsize(),
|
'total_queue_size': self.total_queue.qsize(),
|
||||||
'href_referrer': len(self.href_referrer),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async def get_favicon_handler(
|
async def get_favicon_handler(
|
||||||
|
@ -519,9 +378,6 @@ class FaviconService:
|
||||||
logger.warning(f"无效的URL: {url}")
|
logger.warning(f"无效的URL: {url}")
|
||||||
return self.get_default(self.time_of_7_days)
|
return self.get_default(self.time_of_7_days)
|
||||||
|
|
||||||
# 检测并记录referer
|
|
||||||
await self._referer(request)
|
|
||||||
|
|
||||||
# 检查缓存
|
# 检查缓存
|
||||||
_cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True'])
|
_cached, cached_icon = self._get_cache_icon(entity.domain_md5, refresh=refresh in ['true', '1', 'True'])
|
||||||
|
|
||||||
|
|
15
main.py
15
main.py
|
@ -4,7 +4,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI, Request
|
||||||
from fastapi.responses import Response
|
from fastapi.responses import Response
|
||||||
|
|
||||||
from favicon_app.routes import favicon_router
|
from favicon_app.routes import favicon_router
|
||||||
|
@ -20,11 +20,24 @@ favicon_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.ico'),
|
||||||
# 默认的站点图标
|
# 默认的站点图标
|
||||||
default_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.png'), mode='rb')
|
default_icon_file = FileUtil.read_file(os.path.join(current_dir, 'favicon.png'), mode='rb')
|
||||||
|
|
||||||
|
# 定义referer日志文件路径
|
||||||
|
referer_log_file = os.path.join(current_dir, 'data', 'referer.txt')
|
||||||
|
|
||||||
# fastapi
|
# fastapi
|
||||||
app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0.0")
|
app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0.0")
|
||||||
app.include_router(favicon_router)
|
app.include_router(favicon_router)
|
||||||
|
|
||||||
|
|
||||||
|
@app.middleware("http")
|
||||||
|
async def log_referer(request: Request, call_next):
|
||||||
|
referer = request.headers.get('referrer') or request.headers.get('referer')
|
||||||
|
if referer:
|
||||||
|
FileUtil.write_file(referer_log_file, referer, mode='a', atomic=True)
|
||||||
|
response = await call_next(request)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
# @app.get("/")
|
# @app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
return {"message": "Welcome to Favicon API! Use /icon/?url=example.com to get favicon."}
|
return {"message": "Welcome to Favicon API! Use /icon/?url=example.com to get favicon."}
|
||||||
|
|
Loading…
Reference in New Issue