From 6c564e6e9929563089c91d5b1f9f892df6e292be Mon Sep 17 00:00:00 2001 From: jinql Date: Wed, 10 Sep 2025 22:56:31 +0800 Subject: [PATCH] update --- favicon_app/routes/favicon_service.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/favicon_app/routes/favicon_service.py b/favicon_app/routes/favicon_service.py index 4f170a1..a6a0c16 100644 --- a/favicon_app/routes/favicon_service.py +++ b/favicon_app/routes/favicon_service.py @@ -173,6 +173,7 @@ def _get_link_rel(links, entity: Favicon, _rel: str) -> Optional[str]: if not links: return None + _result = None for link in links: r = link.get('rel') _r = ' '.join(r) if isinstance(r, list) else r @@ -180,11 +181,11 @@ def _get_link_rel(links, entity: Favicon, _rel: str) -> Optional[str]: if _rel: if _r.lower() == _rel: - return entity.get_icon_url(str(_href)) + _result = entity.get_icon_url(str(_href)) else: - return entity.get_icon_url(str(_href)) + _result = entity.get_icon_url(str(_href)) - return None + return _result def _parse_html(content: bytes, entity: Favicon) -> Optional[str]: @@ -203,6 +204,13 @@ def _parse_html(content: bytes, entity: Favicon) -> Optional[str]: html_links = bs.find_all("link", rel=pattern_icon) + # 处理问题 + base_soup = bs4.BeautifulSoup(content_str, 'lxml', parse_only=SoupStrainer("base")) + if base_soup: + _base = base_soup.select_one('base[href]') + if _base: + logger.warning(f"-> 页面检测到标签:{_base['href']} | {entity.domain} <-") + # 如果没有找到,尝试使用正则表达式直接匹配 if not html_links or len(html_links) == 0: content_links = pattern_link.findall(content_str)