25.09.03
parent
452e110ee5
commit
e54e69b940
|
@ -34,4 +34,4 @@ VOLUME ["/app/data", "/app/conf", "/app/logs"]
|
||||||
ENTRYPOINT ["entrypoint.sh"]
|
ENTRYPOINT ["entrypoint.sh"]
|
||||||
|
|
||||||
# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
CMD ["gunicorn", "-c", "conf/gunicorn.conf.pyc", "main:app"]
|
CMD ["gunicorn", "-c", "conf/gunicorn.conf.py", "main:app"]
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# 绑定地址和端口
|
||||||
|
bind = "0.0.0.0:8000"
|
||||||
|
|
||||||
|
# Worker 进程数(推荐 CPU 核心数 * 2 + 1)
|
||||||
|
workers = 4
|
||||||
|
|
||||||
|
# 工作模式(sync、gevent、uvicorn.workers.UvicornWorker)
|
||||||
|
worker_class = "uvicorn.workers.UvicornWorker"
|
||||||
|
|
||||||
|
# 日志目录
|
||||||
|
log_dir = Path("logs")
|
||||||
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# 日志配置
|
||||||
|
with open(Path(__file__).with_name("logging.yaml"), "r", encoding="utf-8") as f:
|
||||||
|
logconfig_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
|
# 日志级别(debug、info、warning、error、critical);以 YAML 配置优先
|
||||||
|
loglevel = "info"
|
||||||
|
|
||||||
|
# 访问日志文件("-" 表示输出到 stdout);以 YAML 配置优先
|
||||||
|
accesslog = "logs/access.log"
|
||||||
|
|
||||||
|
# 错误日志文件;以 YAML 配置优先
|
||||||
|
errorlog = "-"
|
||||||
|
|
||||||
|
# access_log_format 仅在 同步 worker 下有效,UvicornWorker下不可用;以 YAML 配置优先
|
||||||
|
# access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
|
||||||
|
raw_env = [
|
||||||
|
"UVICORN_ACCESS_LOGFORMAT=%(h)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\" %(D)s"
|
||||||
|
]
|
||||||
|
|
||||||
|
# 可选:超时时间(秒)
|
||||||
|
timeout = 120
|
||||||
|
|
||||||
|
# Keep - Alive超时
|
||||||
|
keepalive = 5
|
||||||
|
|
||||||
|
# 进程名(ps aux 中显示)
|
||||||
|
# proc_name = "gunicorn"
|
||||||
|
|
||||||
|
# 守护进程运行(后台运行,默认 False)
|
||||||
|
# daemon = True
|
|
@ -0,0 +1,60 @@
|
||||||
|
version: 1
|
||||||
|
disable_existing_loggers: false
|
||||||
|
formatters:
|
||||||
|
default:
|
||||||
|
format: "[%(levelname)-7s] %(asctime)s [%(process)d] -[%(name)s:%(lineno)d] %(message)s"
|
||||||
|
datefmt: "%Y-%m-%d %H:%M:%S"
|
||||||
|
|
||||||
|
handlers:
|
||||||
|
console:
|
||||||
|
class: logging.StreamHandler
|
||||||
|
level: INFO
|
||||||
|
formatter: default
|
||||||
|
stream: ext://sys.stdout
|
||||||
|
file_info:
|
||||||
|
class: logging.handlers.TimedRotatingFileHandler
|
||||||
|
level: INFO
|
||||||
|
formatter: default
|
||||||
|
filename: logs/info.log
|
||||||
|
when: midnight
|
||||||
|
interval: 1
|
||||||
|
backupCount: 7
|
||||||
|
encoding: utf8
|
||||||
|
delay: true
|
||||||
|
file_error:
|
||||||
|
class: logging.handlers.TimedRotatingFileHandler
|
||||||
|
level: ERROR
|
||||||
|
formatter: default
|
||||||
|
filename: logs/error.log
|
||||||
|
when: midnight
|
||||||
|
interval: 1
|
||||||
|
backupCount: 7
|
||||||
|
encoding: utf8
|
||||||
|
delay: true
|
||||||
|
|
||||||
|
loggers:
|
||||||
|
uvicorn:
|
||||||
|
level: INFO
|
||||||
|
handlers:
|
||||||
|
- console
|
||||||
|
- file_info
|
||||||
|
propagate: false
|
||||||
|
uvicorn.error:
|
||||||
|
level: INFO
|
||||||
|
handlers:
|
||||||
|
- console
|
||||||
|
- file_error
|
||||||
|
propagate: false
|
||||||
|
uvicorn.access:
|
||||||
|
level: INFO
|
||||||
|
handlers:
|
||||||
|
- console
|
||||||
|
- file_info
|
||||||
|
propagate: false
|
||||||
|
|
||||||
|
root:
|
||||||
|
level: INFO
|
||||||
|
handlers:
|
||||||
|
- console
|
||||||
|
- file_info
|
||||||
|
- file_error
|
|
@ -2,7 +2,38 @@
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
# 首次启动时把镜像里的默认配置拷到挂载点
|
mkdir -p /app/conf
|
||||||
[ -z "$(ls -A /app/conf)" ] && cp -r /app/conf.default/* /app/conf/
|
|
||||||
|
default_conf_dir="/app/conf.default/"
|
||||||
|
gunicorn_conf="/app/conf/gunicorn.conf.py"
|
||||||
|
logging_conf="/app/conf/logging.yaml"
|
||||||
|
|
||||||
|
if [ ! -f "$gunicorn_conf" ]; then
|
||||||
|
echo "复制默认的gunicorn.conf.py配置文件..."
|
||||||
|
if [ -f "$default_conf_dir/gunicorn.conf.py" ]; then
|
||||||
|
cp "$default_conf_dir/gunicorn.conf.py" "$gunicorn_conf"
|
||||||
|
chmod 644 "$gunicorn_conf"
|
||||||
|
echo "已成功复制gunicorn.conf.py"
|
||||||
|
else
|
||||||
|
echo "警告:默认配置文件 $default_conf_dir/gunicorn.conf.py 不存在,创建空文件"
|
||||||
|
touch "$gunicorn_conf"
|
||||||
|
chmod 644 "$gunicorn_conf"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "$logging_conf" ]; then
|
||||||
|
echo "复制默认的logging.yaml配置文件..."
|
||||||
|
if [ -f "$default_conf_dir/logging.yaml" ]; then
|
||||||
|
cp "$default_conf_dir/logging.yaml" "$logging_conf"
|
||||||
|
chmod 644 "$logging_conf"
|
||||||
|
echo "已成功复制logging.yaml"
|
||||||
|
else
|
||||||
|
echo "警告:默认配置文件 $default_conf_dir/logging.yaml 不存在,创建空文件"
|
||||||
|
touch "$logging_conf"
|
||||||
|
chmod 644 "$logging_conf"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p /app/logs /app/data/icon /app/data/text
|
||||||
|
|
||||||
exec "$@"
|
exec "$@"
|
||||||
|
|
|
@ -34,7 +34,7 @@ async def get_favicon(
|
||||||
bg_tasks: BackgroundTasks,
|
bg_tasks: BackgroundTasks,
|
||||||
url: Optional[str] = Query(None, description="网址:eg. https://www.baidu.com"),
|
url: Optional[str] = Query(None, description="网址:eg. https://www.baidu.com"),
|
||||||
refresh: Optional[str] = Query(None, include_in_schema=False),
|
refresh: Optional[str] = Query(None, include_in_schema=False),
|
||||||
sync: Optional[str] = Query('false', description="是否使用同步方式获取")
|
sync: Optional[str] = Query('true', description="是否使用同步方式获取")
|
||||||
):
|
):
|
||||||
"""获取网站图标"""
|
"""获取网站图标"""
|
||||||
return await _service.get_favicon_handler(request, bg_tasks, url, refresh, sync)
|
return await _service.get_favicon_handler(request, bg_tasks, url, refresh, sync)
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -21,6 +22,15 @@ from favicon_app.utils import header
|
||||||
from favicon_app.utils.file_util import FileUtil
|
from favicon_app.utils.file_util import FileUtil
|
||||||
from favicon_app.utils.filetype import helpers, filetype
|
from favicon_app.utils.filetype import helpers, filetype
|
||||||
|
|
||||||
|
if platform.system() == 'Windows':
|
||||||
|
import msvcrt
|
||||||
|
else:
|
||||||
|
import fcntl
|
||||||
|
|
||||||
|
# 多进程加锁
|
||||||
|
LOCKS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'data', 'locks')
|
||||||
|
os.makedirs(LOCKS_DIR, exist_ok=True)
|
||||||
|
|
||||||
urllib3.disable_warnings()
|
urllib3.disable_warnings()
|
||||||
logging.captureWarnings(True)
|
logging.captureWarnings(True)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -50,6 +60,9 @@ class FaviconService:
|
||||||
self.icon_queue = Queue()
|
self.icon_queue = Queue()
|
||||||
self.total_queue = Queue()
|
self.total_queue = Queue()
|
||||||
|
|
||||||
|
# 队列阈值常量配置
|
||||||
|
self.MAX_QUEUE_SIZE = 3
|
||||||
|
|
||||||
# 时间常量
|
# 时间常量
|
||||||
self.time_of_1_minus = 1 * 60
|
self.time_of_1_minus = 1 * 60
|
||||||
self.time_of_5_minus = 5 * self.time_of_1_minus
|
self.time_of_5_minus = 5 * self.time_of_1_minus
|
||||||
|
@ -127,7 +140,7 @@ class FaviconService:
|
||||||
|
|
||||||
def _get_cache_file(self, domain: str, refresh: bool = False) -> Tuple[Optional[bytes], Optional[bytes]]:
|
def _get_cache_file(self, domain: str, refresh: bool = False) -> Tuple[Optional[bytes], Optional[bytes]]:
|
||||||
"""从缓存中获取图标文件"""
|
"""从缓存中获取图标文件"""
|
||||||
cache_path = os.path.join(icon_root_path, 'data/icon', domain + '.png')
|
cache_path = os.path.join(icon_root_path, 'data', 'icon', domain + '.png')
|
||||||
if os.path.exists(cache_path) and os.path.isfile(cache_path) and os.path.getsize(cache_path) > 0:
|
if os.path.exists(cache_path) and os.path.isfile(cache_path) and os.path.getsize(cache_path) > 0:
|
||||||
try:
|
try:
|
||||||
cached_icon = FileUtil.read_file(cache_path, mode='rb')
|
cached_icon = FileUtil.read_file(cache_path, mode='rb')
|
||||||
|
@ -262,6 +275,70 @@ class FaviconService:
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _lock_file(file_handle, lock_type='exclusive'):
|
||||||
|
"""跨平台文件锁"""
|
||||||
|
if platform.system() == 'Windows':
|
||||||
|
try:
|
||||||
|
msvcrt.locking(file_handle.fileno(), msvcrt.LK_LOCK, 1)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
time.sleep(0.01)
|
||||||
|
try:
|
||||||
|
msvcrt.locking(file_handle.fileno(), msvcrt.LK_NBLCK, 1)
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if lock_type == 'exclusive':
|
||||||
|
fcntl.flock(file_handle, fcntl.LOCK_EX)
|
||||||
|
else:
|
||||||
|
fcntl.flock(file_handle, fcntl.LOCK_SH)
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _unlock_file(file_handle):
|
||||||
|
"""释放文件锁"""
|
||||||
|
if platform.system() == 'Windows':
|
||||||
|
try:
|
||||||
|
msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"释放Windows文件锁失败: {e}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
fcntl.flock(file_handle, fcntl.LOCK_UN)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"释放Unix文件锁失败: {e}")
|
||||||
|
|
||||||
|
def _get_domain_lock_path(self, domain: str) -> str:
|
||||||
|
"""获取域名对应的锁文件路径"""
|
||||||
|
domain_hash = hashlib.md5(domain.encode('utf-8')).hexdigest()
|
||||||
|
return os.path.join(LOCKS_DIR, f"{domain_hash}.lock")
|
||||||
|
|
||||||
|
def _acquire_domain_lock(self, domain: str, timeout: float = 5.0) -> Optional[str]:
|
||||||
|
"""获取域名锁,防止多进程同时获取同一个域名的favicon"""
|
||||||
|
lock_path = self._get_domain_lock_path(domain)
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
try:
|
||||||
|
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
||||||
|
os.close(fd)
|
||||||
|
return lock_path
|
||||||
|
except FileExistsError:
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
logger.warning(f"获取域名锁超时: {domain}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _release_domain_lock(self, lock_path: str) -> None:
|
||||||
|
"""释放域名锁"""
|
||||||
|
try:
|
||||||
|
if os.path.exists(lock_path):
|
||||||
|
os.remove(lock_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"释放锁文件失败 {lock_path}: {e}")
|
||||||
|
|
||||||
async def _referer(self, req: Request) -> None:
|
async def _referer(self, req: Request) -> None:
|
||||||
"""记录请求来源"""
|
"""记录请求来源"""
|
||||||
_referrer = req.headers.get('referrer') or req.headers.get('referer')
|
_referrer = req.headers.get('referrer') or req.headers.get('referer')
|
||||||
|
@ -270,40 +347,79 @@ class FaviconService:
|
||||||
logger.debug(f"-> Referrer: {_referrer}")
|
logger.debug(f"-> Referrer: {_referrer}")
|
||||||
|
|
||||||
_path = os.path.join(icon_root_path, 'conf', 'referrer.txt')
|
_path = os.path.join(icon_root_path, 'conf', 'referrer.txt')
|
||||||
|
os.makedirs(os.path.dirname(_path), exist_ok=True)
|
||||||
|
|
||||||
with self._lock:
|
try:
|
||||||
# 首次加载现有referrer数据
|
if _referrer in self.href_referrer:
|
||||||
if len(self.href_referrer) == 0 and os.path.exists(_path):
|
return
|
||||||
try:
|
|
||||||
with open(_path, 'r', encoding='utf-8') as ff:
|
|
||||||
self.href_referrer = {line.strip() for line in ff.readlines()}
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"读取referrer文件失败: {e}")
|
|
||||||
|
|
||||||
# 添加新的referrer
|
with open(_path, 'a+', encoding='utf-8') as f:
|
||||||
if _referrer not in self.href_referrer:
|
|
||||||
self.href_referrer.add(_referrer)
|
|
||||||
try:
|
try:
|
||||||
FileUtil.write_file(_path, f'{_referrer}\n', mode='a')
|
locked = self._lock_file(f, 'exclusive')
|
||||||
except Exception as e:
|
if not locked:
|
||||||
logger.error(f"写入referrer文件失败: {e}")
|
logger.warning(f"无法获取文件锁,跳过referrer记录: {_referrer}")
|
||||||
|
return
|
||||||
|
|
||||||
|
f.seek(0)
|
||||||
|
existing_referrers = {line.strip() for line in f.readlines()}
|
||||||
|
|
||||||
|
if _referrer not in existing_referrers:
|
||||||
|
f.seek(0, os.SEEK_END)
|
||||||
|
f.write(f'{_referrer}\n')
|
||||||
|
f.flush()
|
||||||
|
if platform.system() != 'Windows':
|
||||||
|
os.fsync(f.fileno())
|
||||||
|
logger.debug(f"成功添加新referrer: {_referrer}")
|
||||||
|
self.href_referrer.add(_referrer)
|
||||||
|
else:
|
||||||
|
if _referrer not in self.href_referrer:
|
||||||
|
self.href_referrer.add(_referrer)
|
||||||
|
finally:
|
||||||
|
self._unlock_file(f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"处理referrer文件失败: {e}")
|
||||||
|
|
||||||
|
if len(self.href_referrer) > 1000 or random.random() < 0.01:
|
||||||
|
await self._refresh_referrer_cache(_path)
|
||||||
|
|
||||||
|
async def _refresh_referrer_cache(self, file_path: str) -> None:
|
||||||
|
"""刷新内存中的referrer缓存"""
|
||||||
|
try:
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
try:
|
||||||
|
locked = self._lock_file(f, 'shared')
|
||||||
|
if locked:
|
||||||
|
self.href_referrer = {line.strip() for line in f.readlines() if line.strip()}
|
||||||
|
finally:
|
||||||
|
self._unlock_file(f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"刷新referrer缓存失败: {e}")
|
||||||
|
|
||||||
def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
|
def get_icon_sync(self, entity: Favicon, _cached: bytes = None) -> Optional[bytes]:
|
||||||
"""同步获取图标"""
|
"""同步获取图标"""
|
||||||
with self._lock:
|
domain_lock = None
|
||||||
if entity.domain in self.domain_list:
|
icon_content = None
|
||||||
self._queue_pull(True, self.total_queue)
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
self.domain_list.append(entity.domain)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
icon_url, icon_content = None, None
|
domain_lock = self._acquire_domain_lock(entity.domain)
|
||||||
|
if not domain_lock:
|
||||||
|
logger.warning(f"无法获取域名锁,跳过获取图标: {entity.domain}")
|
||||||
|
return _cached or default_icon_content
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
if entity.domain in self.domain_list:
|
||||||
|
self._queue_pull(True, self.total_queue)
|
||||||
|
return _cached or default_icon_content
|
||||||
|
else:
|
||||||
|
self.domain_list.append(entity.domain)
|
||||||
|
|
||||||
# 尝试从网站获取HTML内容
|
# 尝试从网站获取HTML内容
|
||||||
html_content = entity.req_get()
|
html_content = entity.req_get()
|
||||||
if html_content:
|
if html_content:
|
||||||
icon_url = self._parse_html(html_content, entity)
|
icon_url = self._parse_html(html_content, entity)
|
||||||
|
else:
|
||||||
|
icon_url = None
|
||||||
|
|
||||||
# 尝试不同的图标获取策略
|
# 尝试不同的图标获取策略
|
||||||
strategies = [
|
strategies = [
|
||||||
|
@ -336,8 +452,8 @@ class FaviconService:
|
||||||
icon_content = _cached if _cached else default_icon_content
|
icon_content = _cached if _cached else default_icon_content
|
||||||
|
|
||||||
if icon_content:
|
if icon_content:
|
||||||
cache_path = os.path.join(icon_root_path, 'data/icon', entity.domain_md5 + '.png')
|
cache_path = os.path.join(icon_root_path, 'data', 'icon', entity.domain_md5 + '.png')
|
||||||
md5_path = os.path.join(icon_root_path, 'data/text', entity.domain_md5 + '.txt')
|
md5_path = os.path.join(icon_root_path, 'data', 'text', entity.domain_md5 + '.txt')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 确保目录存在
|
# 确保目录存在
|
||||||
|
@ -356,8 +472,11 @@ class FaviconService:
|
||||||
return icon_content
|
return icon_content
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
|
logger.error(f"获取图标时发生错误 {entity.domain}: {e}")
|
||||||
return None
|
return _cached or default_icon_content
|
||||||
finally:
|
finally:
|
||||||
|
if domain_lock:
|
||||||
|
self._release_domain_lock(domain_lock)
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if entity.domain in self.domain_list:
|
if entity.domain in self.domain_list:
|
||||||
self.domain_list.remove(entity.domain)
|
self.domain_list.remove(entity.domain)
|
||||||
|
@ -438,14 +557,14 @@ class FaviconService:
|
||||||
else:
|
else:
|
||||||
# 没有缓存,实时处理,检查队列大小
|
# 没有缓存,实时处理,检查队列大小
|
||||||
queue_size = self.icon_queue.qsize()
|
queue_size = self.icon_queue.qsize()
|
||||||
if queue_size >= 16:
|
if queue_size >= self.MAX_QUEUE_SIZE:
|
||||||
# 加入后台队列并返回默认图片
|
# 加入后台队列并返回默认图片
|
||||||
logger.info(f"队列大小({queue_size})>=16,返回默认图片并加入后台队列: {entity.domain}")
|
logger.info(f"队列大小({queue_size})>={self.MAX_QUEUE_SIZE},返回默认图片并加入后台队列: {entity.domain}")
|
||||||
bg_tasks.add_task(self.get_icon_sync, entity, _cached)
|
bg_tasks.add_task(self.get_icon_sync, entity, _cached)
|
||||||
return self.get_default(0)
|
return self.get_default(0)
|
||||||
else:
|
else:
|
||||||
# 队列<16,实时处理
|
# 队列<MAX_QUEUE_SIZE,实时处理
|
||||||
logger.info(f"队列大小({queue_size})<16,实时处理: {entity.domain}")
|
logger.info(f"队列大小({queue_size})<{self.MAX_QUEUE_SIZE},实时处理: {entity.domain}")
|
||||||
icon_content = self.get_icon_sync(entity, _cached)
|
icon_content = self.get_icon_sync(entity, _cached)
|
||||||
|
|
||||||
if not icon_content:
|
if not icon_content:
|
||||||
|
|
Loading…
Reference in New Issue