修改失败URL处理逻辑

2025-09-21 00:12:01 +08:00 · 2025-09-21 00:12:01 +08:00 · 634cb504da
parent 1b9a50ec71
commit 634cb504da
16 changed files with 93 additions and 151 deletions
--- a/favicon_app/models/favicon.py
+++ b/favicon_app/models/favicon.py
@ -8,7 +8,7 @@ import os
 import re
 import socket
 import time
-from typing import Tuple, Optional, Dict
+from typing import Tuple, Optional
 from urllib.parse import urlparse, unquote

 import aiohttp
@ -17,12 +17,11 @@ import urllib3

 import setting
 from favicon_app.utils import header
+from favicon_app.utils.file_util import FileUtil
 from favicon_app.utils.filetype import helpers, filetype

-# 禁用SSL警告
 urllib3.disable_warnings()
 logging.captureWarnings(True)
-# 配置日志
 logger = logging.getLogger(__name__)

 # 创建requests会话池
@ -34,10 +33,8 @@ requests_session.verify = False
 DEFAULT_TIMEOUT = 10
 DEFAULT_RETRIES = 2

-# 存储失败的URL，值为缓存过期时间戳
-failed_urls: Dict[str, int] = dict()
-# 记录上次保存失败URL的时间
-_last_saved_failed_urls = time.time()
+# 临时存储域名和对应的MD5值
+domain_md5_mapping = dict()

 # 创建aiohttp客户端会话池
 _aiohttp_client = None
@ -114,8 +111,7 @@ class Favicon:
            if self.domain:
                self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest()
        except Exception as e:
-            # failed_urls[self.domain] = setting.time_of_1_days + int(time.time())
-            add_failed_url(self.domain, setting.time_of_1_days + int(time.time()))
+            add_failed_url(self.domain)
            self.scheme = None
            self.domain = None
            logger.error('URL解析错误: %s, URL: %s', str(e), url)
@ -275,8 +271,7 @@ def _check_internal(domain: str) -> bool:
                        return True
            return False
    except Exception as e:
-        # failed_urls[domain] = setting.time_of_1_days + int(time.time())
-        add_failed_url(domain, setting.time_of_1_days + int(time.time()))
+        add_failed_url(domain)
        logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
        return False

@ -346,153 +341,100 @@ async def _req_get(url: str,
                    content = await resp.read()
                    return content, ct_type
                else:
-                    # failed_urls[domain] = setting.time_of_1_hours + int(time.time())
-                    add_failed_url(domain, setting.time_of_1_hours + int(time.time()))
+                    add_failed_url(domain)
                    logger.error('异步请求失败: %d, URL: %s', resp.status, url)
                    break
        except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
            retry_count += 1
            if retry_count > retries:
-                # failed_urls[domain] = setting.time_of_5_minus + int(time.time())
-                add_failed_url(domain, setting.time_of_5_minus + int(time.time()))
+                add_failed_url(domain)
                logger.error('异步请求超时: %s, URL: %s', str(e), url)
            else:
                logger.warning('异步请求超时，正在重试(%d/%d): %s', retry_count, retries, url)
                continue
        except Exception as e:
-            # failed_urls[domain] = setting.time_of_1_hours + int(time.time())
-            add_failed_url(domain, setting.time_of_1_hours + int(time.time()))
+            add_failed_url(domain)
            logger.error('异步请求异常: %s, URL: %s', str(e), url)
            break

    return None, None


-def add_failed_url(domain: str, expire_time: int):
-    """添加失败的URL，并在数量达到10的倍数时保存到文件
+def add_failed_url(domain: str):
+    """添加失败的URL，将其保存为单独的文件

    Args:
        domain: 域名
-        expire_time: 过期时间戳
    """
-    global failed_urls
-
-    # 添加或更新失败URL
-    if not domain:  # 确保域名不为空
+    # 确保域名不为空
+    if not domain:
        return

-    old_count = len(failed_urls)
-    failed_urls[domain] = expire_time
-    new_count = len(failed_urls)
-
-    # 检查是否需要保存到文件（当新增了指定数量的URL或数量是指定阈值的倍数）
-    if (new_count % setting.FAILED_URLS_SAVE_THRESHOLD == 0
-            or (new_count - old_count) >= setting.FAILED_URLS_SAVE_THRESHOLD):
-        save_failed_urls()
-
-
-def save_failed_urls():
-    """保存失败的URL到文件，每增加10个URL触发一次"""
-    global failed_urls, _last_saved_failed_urls
-
    try:
-        # 读取现有文件内容
-        existing_urls = {}
-        if os.path.exists(setting.failed_urls_file):
-            try:
-                # 确保目录存在
-                os.makedirs(os.path.dirname(setting.failed_urls_file), exist_ok=True)
+        # 确保失败URL目录存在
+        os.makedirs(setting.failed_urls_dir, exist_ok=True)

-                # 读取文件内容
-                with open(setting.failed_urls_file, 'r', encoding='utf-8') as f:
-                    lines = f.readlines()
+        # 将域名的MD5值作为文件名
+        domain_md5 = hashlib.md5(domain.encode("utf-8")).hexdigest()
+        file_path = os.path.join(setting.failed_urls_dir, f"{domain_md5}.txt")

-                # 解析文件内容
-                for line in lines:
-                    line = line.strip()
-                    if line and '\t' in line:
-                        try:
-                            domain, timestamp_str = line.split('\t', 1)
-                            timestamp = int(timestamp_str)
-                            existing_urls[domain] = timestamp
-                        except:
-                            continue
-            except Exception as e:
-                logger.error('读取失败URL文件出错: %s', str(e))
+        # 格式化当前时间
+        formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())

-        # 合并当前失败URL和文件中的URL，保留最新的过期时间
-        merged_urls = {**existing_urls}
-        for domain, timestamp in failed_urls.items():
-            # 只保留过期时间更晚的条目
-            if domain not in merged_urls or timestamp > merged_urls[domain]:
-                merged_urls[domain] = timestamp
+        # 写入域名和时间到文件
+        FileUtil.write_file(file_path, f"{domain}--{formatted_time}")

-        # 保存合并后的结果
-        os.makedirs(os.path.dirname(setting.failed_urls_file), exist_ok=True)
-        with open(setting.failed_urls_file, 'w', encoding='utf-8') as f:
-            for domain, timestamp in merged_urls.items():
-                # 只保留未过期的URL（时间戳大于当前时间）
-                if timestamp > time.time():
-                    f.write(f"{domain}\t{timestamp}\n")
+        # 缓存域名和MD5的映射关系
+        domain_md5_mapping[domain] = domain_md5

-        # 更新内存中的failed_urls为合并和去重后的结果
-        failed_urls = merged_urls
-        _last_saved_failed_urls = time.time()
-
-        logger.info(f'成功保存{len(merged_urls)}个失败URL到文件')
+        logger.debug('成功添加失败URL到文件: %s', domain)
    except Exception as e:
-        logger.error('保存失败URL到文件出错: %s', str(e))
+        logger.error('添加失败URL到文件出错: %s, 域名: %s', str(e), domain)


-def load_failed_urls():
-    """从文件加载失败的URL到内存中
+def is_failed_url(domain: str) -> bool:
+    """检查域名是否是失败URL（未过期）
    
-    当failed_urls为空时调用，从failed_urls_file读取数据并加载到failed_urls字典中
-    只加载未过期的URL
+    Args:
+        domain: 域名
+        
+    Returns:
+        True: 是失败URL（未过期）；False: 不是失败URL或已过期
    """
-    global failed_urls
-
    try:
-        if not os.path.exists(setting.failed_urls_file):
-            logger.info('失败URL文件不存在，无需加载')
-            return
-
-        # 确保目录存在
-        os.makedirs(os.path.dirname(setting.failed_urls_file), exist_ok=True)
-
-        # 读取文件内容
-        with open(setting.failed_urls_file, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-
-        # 解析文件内容，只加载未过期的URL
-        loaded_urls = {}
-        current_time = time.time()
-        for line in lines:
-            line = line.strip()
-            if line and '\t' in line:
-                try:
-                    domain, timestamp_str = line.split('\t', 1)
-                    timestamp = int(timestamp_str)
-                    # 只加载未过期的URL
-                    if timestamp > current_time:
-                        loaded_urls[domain] = timestamp
-                except:
-                    continue
-
-        # 更新内存中的failed_urls
-        if loaded_urls:
-            failed_urls.update(loaded_urls)
-            logger.info(f'成功从文件加载{len(loaded_urls)}个未过期的失败URL')
+        # 从缓存中获取域名的MD5值，如果没有则计算
+        if domain in domain_md5_mapping:
+            domain_md5 = domain_md5_mapping[domain]
        else:
-            logger.info('文件中没有未过期的失败URL需要加载')
+            domain_md5 = hashlib.md5(domain.encode("utf-8")).hexdigest()
+            domain_md5_mapping[domain] = domain_md5
+
+        file_path = os.path.join(setting.failed_urls_dir, f"{domain_md5}.txt")
+
+        # 检查文件是否存在
+        if not os.path.exists(file_path):
+            return False
+
+        # 获取文件的修改时间
+        file_mtime = os.path.getmtime(file_path)
+        current_time = time.time()
+
+        # 检查文件是否未过期
+        if current_time - file_mtime <= setting.FAILED_URL_EXPIRE_TIME:
+            return True
+        else:
+            try:
+                os.remove(file_path)
+                if domain in domain_md5_mapping:
+                    del domain_md5_mapping[domain]
+            except:
+                pass
+            return False
    except Exception as e:
-        logger.error('从文件加载失败URL出错: %s', str(e))
+        logger.error('检查失败URL出错: %s, 域名: %s', str(e), domain)
+        return False


-# 初始化时，如果failed_urls为空，则从文件加载
-if not failed_urls:
-    load_failed_urls()
-
 # 域名验证正则表达式
 _pattern_domain = re.compile(
    r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?',
--- a/favicon_app/routes/favicon_routes.py
+++ b/favicon_app/routes/favicon_routes.py
@ -20,11 +20,10 @@ _icon_root_path = setting.icon_root_path
 _default_icon_path = setting.default_icon_path

 # 创建FastAPI路由器
-favicon_router = APIRouter(prefix="", tags=["favicon"])
+favicon_router = APIRouter(prefix="/icon", tags=["favicon"])


-@favicon_router.get('/icon/')
-@favicon_router.get('/icon')
+@favicon_router.get('/')
 async def get_favicon(
        request: Request,
        bg_tasks: BackgroundTasks,
@ -37,13 +36,13 @@ async def get_favicon(
    return await favicon_service.get_favicon_handler(request, bg_tasks, url, refresh)


-@favicon_router.get('/icon/default')
+@favicon_router.get('/default')
 async def get_default_icon():
    """获取默认图标"""
    return favicon_service.get_default()


-@favicon_router.get('/icon/referer', include_in_schema=False)
+@favicon_router.get('/referer', include_in_schema=False)
 async def get_referrer(unique: Optional[str] = Query(None)):
    """获取请求来源信息，带unique参数时会进行去重处理"""
    content = 'None'
--- a/favicon_app/routes/favicon_service.py
+++ b/favicon_app/routes/favicon_service.py
@ -258,15 +258,11 @@ async def get_favicon_handler(request: Request,
            return get_default(setting.time_of_1_days)

        # 检查缓存中的失败URL
-        if entity.domain in favicon.failed_urls:
-            if int(time.time()) <= favicon.failed_urls.get(entity.domain):
-                return get_default(setting.time_of_1_days)
-            else:
-                del favicon.failed_urls[entity.domain]
+        if favicon.is_failed_url(entity.domain):
+            return get_default(setting.time_of_1_days)

-        logger.info(
-            f"-> count (failed/cached/icon/url): "
-            f"{len(favicon.failed_urls)}/{_cache_count}/{_icon_count}/{_url_count}"
+        logger.debug(
+            f"-> count (cached/icon/url): "f"{_cache_count}/{_icon_count}/{_url_count}"
        )

        # 检查缓存
@ -333,7 +329,7 @@ async def get_icon_async(entity: Favicon, _cached: bytes = None) -> Optional[byt
            # 0. 从原始网页标签链接中获取
            lambda: (icon_url, "原始网页标签") if icon_url else (None, None),
        ]
-        
+
        # 2. 从配置文件加载其他图标获取接口
        for _template, _name in setting.FAVICON_APIS:
            strategies.append(
--- a/main.py
+++ b/main.py
@ -5,7 +5,6 @@ import os

 from fastapi import FastAPI, Request
 from fastapi.responses import Response
-from fastapi.staticfiles import StaticFiles

 import setting
 from favicon_app.routes import favicon_router
@ -24,7 +23,6 @@ referer_log_file = setting.referer_log_file

 # fastapi
 app = FastAPI(title="Favicon API", description="获取网站favicon图标", version="3.0")
-app.mount("/static", StaticFiles(directory="static"), name="static")
 app.include_router(favicon_router)


--- a/setting.py
+++ b/setting.py
@ -18,8 +18,8 @@ default_icon_file = FileUtil.read_file(default_icon_path, mode='rb')
 referer_log_file = os.path.join(icon_root_path, 'data', 'referer.txt')
 # 定义失败URL日志文件路径
 failed_urls_file = os.path.join(icon_root_path, 'data', 'failedurls.txt')
-# 失败URL保存阈值，当失败URL数量达到此值的倍数时保存到文件
-FAILED_URLS_SAVE_THRESHOLD = 10
+# 定义失败URL存储目录
+failed_urls_dir = os.path.join(icon_root_path, 'data', 'failed_urls')

 # 时间常量
 time_of_1_minus = 1 * 60
@ -38,6 +38,9 @@ time_of_7_days = 7 * time_of_1_days
 time_of_15_days = 15 * time_of_1_days
 time_of_30_days = 30 * time_of_1_days

+# 失败URL默认失效时间
+FAILED_URL_EXPIRE_TIME = time_of_6_hours
+
 # 图标获取接口配置
 # 格式: (模板URL, 名称)
 # 支持的变量: {domain} - 域名, {base_url} - 基础URL
--- a/static/css/app.6e1a9be5bf9dacce170f04018b053469.css
+++ b/static/css/app.6e1a9be5bf9dacce170f04018b053469.css
--- a/static/css/app.6e1a9be5bf9dacce170f04018b053469.css.map
+++ b/static/css/app.6e1a9be5bf9dacce170f04018b053469.css.map
--- a/static/fonts/element-icons.535877f.woff
+++ b/static/fonts/element-icons.535877f.woff
--- a/static/fonts/element-icons.732389d.ttf
+++ b/static/fonts/element-icons.732389d.ttf
--- a/static/js/app.c97f95b50095b442df6d.js
+++ b/static/js/app.c97f95b50095b442df6d.js
@ -1,2 +0,0 @@
-webpackJsonp([1],{Lptq:function(e,t){},NHnr:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0});var i=n("zxxf"),a=n("yebG"),o=(n("Lptq"),{name:"App",data:function(){return{height:600,marginTop:200,year:(new Date).getFullYear(),iconUrl:"",iconImg:"https://api.xinac.net/icon/default",defaultIcon:"https://api.xinac.net/icon/default",placeholder:"https://www.xinac.net",headerLink:"https://api.xinac.net"}},methods:{handle:function(){console.log("表单提交的数据：",this.iconUrl),this.iconUrl?this.iconImg="https://api.xinac.net/icon/?url="+this.iconUrl:this.iconImg=this.defaultIcon},resize:function(){this.height=document.documentElement.clientHeight,this.marginTop=(this.height-300)/2},linkTo:function(){window.location.href=this.headerLink}},mounted:function(){var e=this;this.resize(),window.onresize=function(){e.resize()}}}),l={render:function(){var e=this,t=e.$createElement,n=e._self._c||t;return n("div",{attrs:{id:"app"}},[n("el-container",{style:{height:e.height+"px"}},[n("el-header",[n("h2",{on:{click:e.linkTo}},[e._v(e._s(e.headerLink))])]),e._v(" "),n("el-main",{style:{marginTop:e.marginTop+"px"}},[n("div",[n("el-input",{attrs:{placeholder:e.placeholder,clearable:"",autofocus:""},on:{change:e.handle},model:{value:e.iconUrl,callback:function(t){e.iconUrl=t},expression:"iconUrl"}},[n("template",{attrs:{slot:"prepend"},slot:"prepend"},[e._v("https://api.xinac.net/icon/?url=")]),e._v(" "),n("template",{attrs:{slot:"append"},slot:"append"},[n("el-image",{staticStyle:{width:"32px",height:"32px"},attrs:{src:e.iconImg}})],1)],2)],1)]),e._v(" "),n("el-footer",[e._v("© "+e._s(e.year)+" "),n("a",{attrs:{href:e.placeholder}},[e._v("xinac.net")])])],1)],1)},staticRenderFns:[]};var u=n("lp4z")(o,l,!1,function(e){n("jQJW")},null,null).exports;i.default.config.productionTip=!1,i.default.use(a.Autocomplete),i.default.use(a.Input),i.default.use(a.Select),i.default.use(a.Button),i.default.use(a.ButtonGroup),i.default.use(a.Tooltip),i.default.use(a.Form),i.default.use(a.FormItem),i.default.use(a.Icon),i.default.use(a.Container),i.default.use(a.Header),i.default.use(a.Aside),i.default.use(a.Main),i.default.use(a.Footer),i.default.use(a.Image),new i.default({el:"#app",render:function(e){return e(u)}})},jQJW:function(e,t){}},["NHnr"]);
-//# sourceMappingURL=app.c97f95b50095b442df6d.js.map
--- a/static/js/app.c97f95b50095b442df6d.js.map
+++ b/static/js/app.c97f95b50095b442df6d.js.map
--- a/static/js/manifest.3c886754347e2a8c0b4d.js
+++ b/static/js/manifest.3c886754347e2a8c0b4d.js
@ -1,2 +0,0 @@
-!function(e){var n=window.webpackJsonp;window.webpackJsonp=function(r,c,a){for(var i,u,f,s=0,l=[];s<r.length;s++)u=r[s],t[u]&&l.push(t[u][0]),t[u]=0;for(i in c)Object.prototype.hasOwnProperty.call(c,i)&&(e[i]=c[i]);for(n&&n(r,c,a);l.length;)l.shift()();if(a)for(s=0;s<a.length;s++)f=o(o.s=a[s]);return f};var r={},t={2:0};function o(n){if(r[n])return r[n].exports;var t=r[n]={i:n,l:!1,exports:{}};return e[n].call(t.exports,t,t.exports,o),t.l=!0,t.exports}o.e=function(e){var n=t[e];if(0===n)return new Promise(function(e){e()});if(n)return n[2];var r=new Promise(function(r,o){n=t[e]=[r,o]});n[2]=r;var c=document.getElementsByTagName("head")[0],a=document.createElement("script");a.type="text/javascript",a.charset="utf-8",a.async=!0,a.timeout=12e4,o.nc&&a.setAttribute("nonce",o.nc),a.src=o.p+"static/js/"+e+"."+{0:"c377031f0b4534916d3a",1:"c97f95b50095b442df6d"}[e]+".js";var i=setTimeout(u,12e4);function u(){a.onerror=a.onload=null,clearTimeout(i);var n=t[e];0!==n&&(n&&n[1](new Error("Loading chunk "+e+" failed.")),t[e]=void 0)}return a.onerror=a.onload=u,c.appendChild(a),r},o.m=e,o.c=r,o.d=function(e,n,r){o.o(e,n)||Object.defineProperty(e,n,{configurable:!1,enumerable:!0,get:r})},o.n=function(e){var n=e&&e.__esModule?function(){return e.default}:function(){return e};return o.d(n,"a",n),n},o.o=function(e,n){return Object.prototype.hasOwnProperty.call(e,n)},o.p="/",o.oe=function(e){throw console.error(e),e}}([]);
-//# sourceMappingURL=manifest.3c886754347e2a8c0b4d.js.map
--- a/static/js/manifest.3c886754347e2a8c0b4d.js.map
+++ b/static/js/manifest.3c886754347e2a8c0b4d.js.map
--- a/static/js/vendor.c377031f0b4534916d3a.js
+++ b/static/js/vendor.c377031f0b4534916d3a.js
--- a/static/js/vendor.c377031f0b4534916d3a.js.map
+++ b/static/js/vendor.c377031f0b4534916d3a.js.map
--- a/templates/index.html
+++ b/templates/index.html
@ -1 +1,17 @@
-<!DOCTYPE html><html><head><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><title>favicon图标获取API</title><link href=/static/css/app.6e1a9be5bf9dacce170f04018b053469.css rel=stylesheet></head><body><div id=app></div><script type=text/javascript src=/static/js/manifest.3c886754347e2a8c0b4d.js></script><script type=text/javascript src=/static/js/vendor.c377031f0b4534916d3a.js></script><script type=text/javascript src=/static/js/app.c97f95b50095b442df6d.js></script></body></html>
+<!DOCTYPE html>
+<html lang="zh" translate="no">
+<head>
+    <meta charset="utf-8">
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+    <meta name="viewport" content="width=device-width,initial-scale=1">
+    <title>favicon图标获取API</title>
+    <link href="https://api.xinac.net/static/css/app.6e1a9be5bf9dacce170f04018b053469.css" rel="stylesheet">
+</head>
+<body>
+<div id="app"></div>
+<script src="https://api.xinac.net/static/js/manifest.3c886754347e2a8c0b4d.js"></script>
+<script src="https://api.xinac.net/static/js/vendor.c377031f0b4534916d3a.js"></script>
+<script src="https://api.xinac.net/static/js/app.c97f95b50095b442df6d.js"></script>
+</body>
+</html>