This commit is contained in:
jinql
2025-09-10 15:36:18 +08:00
parent cb9b9c2d35
commit 0adc1e717d
8 changed files with 48 additions and 253 deletions

View File

@@ -6,7 +6,8 @@ import ipaddress
import logging
import re
import socket
from typing import Tuple, Optional
import time
from typing import Tuple, Optional, Dict
from urllib.parse import urlparse
import aiohttp
@@ -14,7 +15,7 @@ import requests
import urllib3
import setting
from favicon_app.utils import header, redis_pool
from favicon_app.utils import header
from favicon_app.utils.filetype import helpers, filetype
# 禁用SSL警告
@@ -22,7 +23,6 @@ urllib3.disable_warnings()
logging.captureWarnings(True)
# 配置日志
logger = logging.getLogger(__name__)
# warnings.filterwarnings("ignore", category=RuntimeWarning)
# 创建requests会话池
requests_session = requests.Session()
@@ -34,7 +34,7 @@ DEFAULT_TIMEOUT = 10
DEFAULT_RETRIES = 2
# 存储失败的URL值为缓存过期时间戳
# failed_urls: Dict[str, int] = dict()
failed_urls: Dict[str, int] = dict()
# 创建aiohttp客户端会话池
_aiohttp_client = None
@@ -111,7 +111,7 @@ class Favicon:
if self.domain:
self.domain_md5 = hashlib.md5(self.domain.encode("utf-8")).hexdigest()
except Exception as e:
redis_pool.set_failed_domain(self.domain, setting.time_of_1_days)
failed_urls[self.domain] = setting.time_of_1_days + int(time.time())
self.scheme = None
self.domain = None
logger.error('URL解析错误: %s, URL: %s', str(e), url)
@@ -265,7 +265,7 @@ def _check_internal(domain: str) -> bool:
return True
return False
except Exception as e:
redis_pool.set_failed_domain(domain, setting.time_of_1_days)
failed_urls[domain] = setting.time_of_1_days + int(time.time())
logger.error('解析域名出错: %s, 错误: %s', domain, str(e))
return False
@@ -335,19 +335,19 @@ async def _req_get(url: str,
content = await resp.read()
return content, ct_type
else:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
logger.error('异步请求失败: %d, URL: %s', resp.status, url)
break
except (aiohttp.ClientConnectorError, aiohttp.ServerTimeoutError) as e:
retry_count += 1
if retry_count > retries:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
failed_urls[domain] = setting.time_of_5_minus + int(time.time())
logger.error('异步请求超时: %s, URL: %s', str(e), url)
else:
logger.warning('异步请求超时,正在重试(%d/%d): %s', retry_count, retries, url)
continue
except Exception as e:
await redis_pool.set_failed_domain(domain, setting.time_of_5_minus)
failed_urls[domain] = setting.time_of_1_hours + int(time.time())
logger.error('异步请求异常: %s, URL: %s', str(e), url)
break
@@ -358,9 +358,3 @@ async def _req_get(url: str,
_pattern_domain = re.compile(
r'[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62}(\.[a-zA-Z0-9\u4E00-\u9FA5][-a-zA-Z0-9\u4E00-\u9FA5]{0,62})+\.?',
re.I)
# def failed_url_cache(_domain: str, _time: int):
# if _domain:
# _current_time = int(time.time())
# if (not failed_urls.get(_domain)) or (_current_time <= failed_urls.get(_domain)):
# failed_urls[_domain] = _current_time + _time