diff --git a/entrypoint.sh b/entrypoint.sh index 24dfab7..cf6d02f 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -34,6 +34,6 @@ if [ ! -f "$logging_conf" ]; then fi fi -mkdir -p /app/logs /app/data/icon /app/data/text +mkdir -p /app/logs /app/data exec "$@" diff --git a/favicon_app/routes/favicon_routes.py b/favicon_app/routes/favicon_routes.py index e26b61c..e81f0dd 100644 --- a/favicon_app/routes/favicon_routes.py +++ b/favicon_app/routes/favicon_routes.py @@ -54,13 +54,21 @@ async def get_count(): @favicon_router.get('/icon/referer', include_in_schema=False) -async def get_referrer(): - """获取请求来源信息""" +async def get_referrer(unique: Optional[str] = Query(None)): + """获取请求来源信息,带unique参数时会进行去重处理""" content = 'None' path = os.path.join(_icon_root_path, 'data', 'referer.txt') + if os.path.exists(path): try: content = FileUtil.read_file(path, mode='r') or 'None' + + if unique in ['true', '1', 'True']: + lines = [line.strip() for line in content.split('\n') if line.strip()] + unique_lines = list(set(lines)) + unique_content = '\n'.join(unique_lines) + FileUtil.write_file(path, unique_content, mode='w') + content = unique_content except Exception as e: logger.error(f"读取referer文件失败: {e}") return Response(content=content, media_type="text/plain") diff --git a/favicon_app/routes/favicon_service.py b/favicon_app/routes/favicon_service.py index f04a72a..fbbebcd 100644 --- a/favicon_app/routes/favicon_service.py +++ b/favicon_app/routes/favicon_service.py @@ -39,6 +39,7 @@ class FaviconService: def __init__(self): # 使用锁保证线程安全 self._lock = Lock() + # 全局计数器和集合 self.url_count = 0 self.request_icon_count = 0 @@ -46,7 +47,9 @@ class FaviconService: self.domain_list: List[str] = list() # 初始化队列 + # 实时处理的任务数量 self.icon_queue = Queue() + # 所有正在处理的任务数量 self.total_queue = Queue() # 队列阈值常量配置 @@ -193,12 +196,14 @@ class FaviconService: } def _queue_pull(self, is_pull: bool = True, _queue: Queue = None) -> None: - """从队列中取出元素""" + """从队列中取出元素,用于任务完成后移除队列中的记录 + - is_pull: 是否执行取出操作 + - _queue: 要操作的队列,默认为icon_queue + """ if _queue is None: _queue = self.icon_queue if is_pull and not _queue.empty(): - # _queue.get() try: _queue.get_nowait() _queue.task_done() @@ -269,7 +274,6 @@ class FaviconService: icon_content = None try: - with self._lock: if entity.domain in self.domain_list: self._queue_pull(True, self.total_queue) @@ -340,6 +344,8 @@ class FaviconService: with self._lock: if entity.domain in self.domain_list: self.domain_list.remove(entity.domain) + # 同时从两个队列中移除元素(处理完成) + self._queue_pull(True, self.icon_queue) self._queue_pull(True, self.total_queue) def get_count(self) -> Dict[str, int]: @@ -369,7 +375,6 @@ class FaviconService: if not url: return {"message": "请提供url参数"} - logger.info('##########################################################') try: entity = Favicon(url) @@ -396,6 +401,8 @@ class FaviconService: if _cached and not cached_icon: # 缓存已过期,后台刷新缓存 logger.info(f"缓存已过期,加入后台队列刷新: {entity.domain}") + # 只增加总队列计数,不增加实时队列计数(后台任务) + self.total_queue.put(entity.domain) bg_tasks.add_task(self.get_icon_sync, entity, _cached) return Response(content=icon_content, @@ -408,6 +415,8 @@ class FaviconService: if not is_sync: # 返回默认图片并加入后台队列 logger.info(f"返回默认图片并加入后台队列: {entity.domain}") + # 只增加总队列计数,不增加实时队列计数(后台任务) + self.total_queue.put(entity.domain) bg_tasks.add_task(self.get_icon_sync, entity, _cached) return self.get_default(0) else: @@ -416,11 +425,16 @@ class FaviconService: if queue_size >= self.MAX_QUEUE_SIZE: # 加入后台队列并返回默认图片 logger.info(f"队列大小({queue_size})>={self.MAX_QUEUE_SIZE},返回默认图片并加入后台队列: {entity.domain}") + # 只增加总队列计数,不增加实时队列计数(后台任务) + self.total_queue.put(entity.domain) bg_tasks.add_task(self.get_icon_sync, entity, _cached) return self.get_default(0) else: # 队列