You've already forked favicon-api-async
1. API外部接口可配置
2. 失败的url保存到文件中
This commit is contained in:
@@ -4,6 +4,7 @@ import base64
|
||||
import hashlib
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
@@ -35,6 +36,8 @@ DEFAULT_RETRIES = 2
|
||||
|
||||
# 存储失败的URL,值为缓存过期时间戳
|
||||
failed_urls: Dict[str, int] = dict()
|
||||
# 记录上次保存失败URL的时间
|
||||
_last_saved_failed_urls = time.time()
|
||||
|
||||
# 创建aiohttp客户端会话池
|
||||
_aiohttp_client = None
|
||||
@@ -111,7 +114,8 @@ class Favicon:
|
||||
if self.domain:
|
||||
self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest()
|
||||
except Exception as e:
|
||||
failed_urls[self.domain] = setting.time_of_1_days + int(time.time())
|
||||
# failed_urls[self.domain] = setting.time_of_1_days + int(time.time())
|
||||
add_failed_url(self.domain, setting.time_of_1_days + int(time.time()))
|
||||
self.scheme = None
|
||||
self.domain = None
|
||||
logger.error('URL解析错误: %s, URL: %s', str(e), url)
|
||||
@@ -271,7 +275,8 @@ def _check_internal(domain: str) -> bool:
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
failed_urls[domain] = setting.time_of_1_days + int(time.time())
|
||||
# failed_urls[domain] = setting.time_of_1_days + int(time.time())
|
||||
add_failed_url(domain, setting.time_of_1_days + int(time.time()))
|
||||
logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
|
||||
return False
|
||||
|
||||
@@ -341,25 +346,153 @@ async def _req_get(url: str,
|
||||
content = await resp.read()
|
||||
return content, ct_type
|
||||
else:
|
||||
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
|
||||
# failed_urls[domain] = setting.time_of_1_hours + int(time.time())
|
||||
add_failed_url(domain, setting.time_of_1_hours + int(time.time()))
|
||||
logger.error('异步请求失败: %d, URL: %s', resp.status, url)
|
||||
break
|
||||
except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
|
||||
retry_count += 1
|
||||
if retry_count > retries:
|
||||
failed_urls[domain] = setting.time_of_5_minus + int(time.time())
|
||||
# failed_urls[domain] = setting.time_of_5_minus + int(time.time())
|
||||
add_failed_url(domain, setting.time_of_5_minus + int(time.time()))
|
||||
logger.error('异步请求超时: %s, URL: %s', str(e), url)
|
||||
else:
|
||||
logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url)
|
||||
continue
|
||||
except Exception as e:
|
||||
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
|
||||
# failed_urls[domain] = setting.time_of_1_hours + int(time.time())
|
||||
add_failed_url(domain, setting.time_of_1_hours + int(time.time()))
|
||||
logger.error('异步请求异常: %s, URL: %s', str(e), url)
|
||||
break
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def add_failed_url(domain: str, expire_time: int):
|
||||
"""添加失败的URL,并在数量达到10的倍数时保存到文件
|
||||
|
||||
Args:
|
||||
domain: 域名
|
||||
expire_time: 过期时间戳
|
||||
"""
|
||||
global failed_urls
|
||||
|
||||
# 添加或更新失败URL
|
||||
if not domain: # 确保域名不为空
|
||||
return
|
||||
|
||||
old_count = len(failed_urls)
|
||||
failed_urls[domain] = expire_time
|
||||
new_count = len(failed_urls)
|
||||
|
||||
# 检查是否需要保存到文件(当新增了指定数量的URL或数量是指定阈值的倍数)
|
||||
if new_count % setting.FAILED_URLS_SAVE_THRESHOLD == 0 or (
|
||||
new_count - old_count) >= setting.FAILED_URLS_SAVE_THRESHOLD:
|
||||
save_failed_urls()
|
||||
|
||||
|
||||
def save_failed_urls():
|
||||
"""保存失败的URL到文件,每增加10个URL触发一次"""
|
||||
global failed_urls, _last_saved_failed_urls
|
||||
|
||||
try:
|
||||
# 读取现有文件内容
|
||||
existing_urls = {}
|
||||
if os.path.exists(setting.failed_urls_file):
|
||||
try:
|
||||
# 确保目录存在
|
||||
os.makedirs(os.path.dirname(setting.failed_urls_file), exist_ok=True)
|
||||
|
||||
# 读取文件内容
|
||||
with open(setting.failed_urls_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# 解析文件内容
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and '\t' in line:
|
||||
try:
|
||||
domain, timestamp_str = line.split('\t', 1)
|
||||
timestamp = int(timestamp_str)
|
||||
existing_urls[domain] = timestamp
|
||||
except:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error('读取失败URL文件出错: %s', str(e))
|
||||
|
||||
# 合并当前失败URL和文件中的URL,保留最新的过期时间
|
||||
merged_urls = {**existing_urls}
|
||||
for domain, timestamp in failed_urls.items():
|
||||
# 只保留过期时间更晚的条目
|
||||
if domain not in merged_urls or timestamp > merged_urls[domain]:
|
||||
merged_urls[domain] = timestamp
|
||||
|
||||
# 保存合并后的结果
|
||||
os.makedirs(os.path.dirname(setting.failed_urls_file), exist_ok=True)
|
||||
with open(setting.failed_urls_file, 'w', encoding='utf-8') as f:
|
||||
for domain, timestamp in merged_urls.items():
|
||||
# 只保留未过期的URL(时间戳大于当前时间)
|
||||
if timestamp > time.time():
|
||||
f.write(f"{domain}\t{timestamp}\n")
|
||||
|
||||
# 更新内存中的failed_urls为合并和去重后的结果
|
||||
failed_urls = merged_urls
|
||||
_last_saved_failed_urls = time.time()
|
||||
|
||||
logger.info(f'成功保存{len(merged_urls)}个失败URL到文件')
|
||||
except Exception as e:
|
||||
logger.error('保存失败URL到文件出错: %s', str(e))
|
||||
|
||||
|
||||
def load_failed_urls():
|
||||
"""从文件加载失败的URL到内存中
|
||||
|
||||
当failed_urls为空时调用,从failed_urls_file读取数据并加载到failed_urls字典中
|
||||
只加载未过期的URL
|
||||
"""
|
||||
global failed_urls
|
||||
|
||||
try:
|
||||
if not os.path.exists(setting.failed_urls_file):
|
||||
logger.info('失败URL文件不存在,无需加载')
|
||||
return
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(os.path.dirname(setting.failed_urls_file), exist_ok=True)
|
||||
|
||||
# 读取文件内容
|
||||
with open(setting.failed_urls_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# 解析文件内容,只加载未过期的URL
|
||||
loaded_urls = {}
|
||||
current_time = time.time()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and '\t' in line:
|
||||
try:
|
||||
domain, timestamp_str = line.split('\t', 1)
|
||||
timestamp = int(timestamp_str)
|
||||
# 只加载未过期的URL
|
||||
if timestamp > current_time:
|
||||
loaded_urls[domain] = timestamp
|
||||
except:
|
||||
continue
|
||||
|
||||
# 更新内存中的failed_urls
|
||||
if loaded_urls:
|
||||
failed_urls.update(loaded_urls)
|
||||
logger.info(f'成功从文件加载{len(loaded_urls)}个未过期的失败URL')
|
||||
else:
|
||||
logger.info('文件中没有未过期的失败URL需要加载')
|
||||
except Exception as e:
|
||||
logger.error('从文件加载失败URL出错: %s', str(e))
|
||||
|
||||
|
||||
# 初始化时,如果failed_urls为空,则从文件加载
|
||||
if not failed_urls:
|
||||
load_failed_urls()
|
||||
|
||||
# 域名验证正则表达式
|
||||
_pattern_domain = re.compile(
|
||||
r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?',
|
||||
|
||||
@@ -332,18 +332,14 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
|
||||
strategies = [
|
||||
# 0. 从原始网页标签链接中获取
|
||||
lambda: (icon_url, "原始网页标签") if icon_url else (None, None),
|
||||
# 从 gstatic.cn 接口获取
|
||||
lambda: (
|
||||
f'https://t3.gstatic.cn/faviconV2?client=SOCIAL&fallback_opts=TYPE,SIZE,URL&type=FAVICON&size=128&url={entity.get_base_url()}',
|
||||
"gstatic接口"),
|
||||
# 从网站默认位置获取
|
||||
lambda: ('', "网站默认位置/favicon.ico"),
|
||||
# 从其他api接口获取
|
||||
lambda: (f'https://favicon.is/{entity.domain}', "第三方API"),
|
||||
lambda: (f'https://ico.kucat.cn/get.php?url={entity.get_base_url()}', "第三方API"),
|
||||
# 99. 最后的尝试,cloudflare workers
|
||||
# lambda: (f'https://favicon.cary.cc/?url={entity.get_base_url()}', "cloudflare"),
|
||||
]
|
||||
|
||||
# 2. 从配置文件加载其他图标获取接口
|
||||
for _template, _name in setting.FAVICON_APIS:
|
||||
strategies.append(
|
||||
lambda template=_template, name=_name:
|
||||
(template.format(domain=entity.domain, base_url=entity.get_base_url()), name)
|
||||
)
|
||||
|
||||
for strategy in strategies:
|
||||
if icon_content:
|
||||
|
||||
20
setting.py
20
setting.py
@@ -16,6 +16,10 @@ default_icon_path = os.path.join(icon_root_path, 'favicon.png')
|
||||
default_icon_file = FileUtil.read_file(default_icon_path, mode='rb')
|
||||
# 定义referer日志文件路径
|
||||
referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt')
|
||||
# 定义失败URL日志文件路径
|
||||
failed_urls_file = os.path.join(icon_root_path, 'data', 'failedurls.txt')
|
||||
# 失败URL保存阈值,当失败URL数量达到此值的倍数时保存到文件
|
||||
FAILED_URLS_SAVE_THRESHOLD = 10
|
||||
|
||||
# 时间常量
|
||||
time_of_1_minus = 1 * 60
|
||||
@@ -33,3 +37,19 @@ time_of_1_days = 1 * 24 * 60 * 60
|
||||
time_of_7_days = 7 * time_of_1_days
|
||||
time_of_15_days = 15 * time_of_1_days
|
||||
time_of_30_days = 30 * time_of_1_days
|
||||
|
||||
# 图标获取接口配置
|
||||
# 格式: (模板URL, 名称)
|
||||
# 支持的变量: {domain} - 域名, {base_url} - 基础URL
|
||||
FAVICON_APIS = [
|
||||
# gstatic.cn 接口
|
||||
('https://t3.gstatic.cn/faviconV2?client=SOCIAL&fallback_opts=TYPE,SIZE,URL&type=FAVICON&size=128&url={base_url}',
|
||||
'gstatic接口'),
|
||||
|
||||
# 第三方API
|
||||
('https://favicon.is/{domain}', '第三方API'),
|
||||
('https://ico.kucat.cn/get.php?url={base_url}', '第三方API'),
|
||||
|
||||
# 网站默认位置,放在最后
|
||||
('', '网站默认位置/favicon.ico'),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user