update
parent
d3b21d6a11
commit
6c564e6e99
|
@ -173,6 +173,7 @@ def _get_link_rel(links, entity: Favicon, _rel: str) -> Optional[str]:
|
|||
if not links:
|
||||
return None
|
||||
|
||||
_result = None
|
||||
for link in links:
|
||||
r = link.get('rel')
|
||||
_r = ' '.join(r) if isinstance(r, list) else r
|
||||
|
@ -180,11 +181,11 @@ def _get_link_rel(links, entity: Favicon, _rel: str) -> Optional[str]:
|
|||
|
||||
if _rel:
|
||||
if _r.lower() == _rel:
|
||||
return entity.get_icon_url(str(_href))
|
||||
_result = entity.get_icon_url(str(_href))
|
||||
else:
|
||||
return entity.get_icon_url(str(_href))
|
||||
_result = entity.get_icon_url(str(_href))
|
||||
|
||||
return None
|
||||
return _result
|
||||
|
||||
|
||||
def _parse_html(content: bytes, entity: Favicon) -> Optional[str]:
|
||||
|
@ -203,6 +204,13 @@ def _parse_html(content: bytes, entity: Favicon) -> Optional[str]:
|
|||
|
||||
html_links = bs.find_all("link", rel=pattern_icon)
|
||||
|
||||
# 处理<base>问题
|
||||
base_soup = bs4.BeautifulSoup(content_str, 'lxml', parse_only=SoupStrainer("base"))
|
||||
if base_soup:
|
||||
_base = base_soup.select_one('base[href]')
|
||||
if _base:
|
||||
logger.warning(f"-> 页面检测到<base>标签:{_base['href']} | {entity.domain} <-")
|
||||
|
||||
# 如果没有找到,尝试使用正则表达式直接匹配
|
||||
if not html_links or len(html_links) == 0:
|
||||
content_links = pattern_link.findall(content_str)
|
||||
|
|
Loading…
Reference in New Issue