diff --git a/Favicon.php b/Favicon.php index 7791c9e..056db28 100644 --- a/Favicon.php +++ b/Favicon.php @@ -134,7 +134,7 @@ class Favicon * 获取过程结束 * 计算时间及内存的占用信息 */ - $time_end = microtime(TRUE); + $time_end = microtime(TRUE); $this->_last_time_spend = $time_end - $time_start; $this->_last_memory_usage = ((!function_exists('memory_get_usage')) ? '0' : round(memory_get_usage() / 1024 / 1024, 2)) . 'MB'; @@ -187,7 +187,8 @@ class Favicon return array( 'X-Robots-Tag: noindex, nofollow', 'Content-type: image/x-icon', - 'Cache-Control: public, max-age=604800' + 'Cache-Control: public, max-age=86400', + 'Expires: ' . gmdate('D, d M Y H:i:s', time() + 86400) . ' GMT' ); } @@ -228,7 +229,7 @@ class Favicon * @return bool|string */ protected function getData() - { + { // 尝试匹配映射 $this->data = $this->_match_file_map(); @@ -263,7 +264,7 @@ class Favicon //解析HTML中的相对URL 路径 $match_url[2] = $this->filterRelativeUrl(trim($match_url[2]), $this->params['origin_url']); - $icon = $this->getFile($match_url[2],true); + $icon = $this->getFile($match_url[2], true); if ($icon && $icon['status'] == 'OK') { @@ -285,7 +286,7 @@ class Favicon //未能从LINK标签中获取图标(可能是网址无法打开,或者指定的文件无法打开,或未定义图标地址) //将使用网站根目录的文件代替 - $data = $this->getFile($this->full_host . '/favicon.ico',true); + $data = $this->getFile($this->full_host . '/favicon.ico', true); if ($data && $data['status'] == 'OK') { $this->_log_message("Success get icon from website root: {$this->full_host}/favicon.ico"); @@ -297,7 +298,7 @@ class Favicon if ($ret) { //最后的尝试,从重定向后的网址根目录获取favicon文件 - $data = $this->getFile($this->full_host . '/favicon.ico',true); + $data = $this->getFile($this->full_host . '/favicon.ico', true); if ($data && $data['status'] == 'OK') { $this->_log_message("Success get icon from redirect file: {$this->full_host}/favicon.ico"); @@ -306,17 +307,17 @@ class Favicon } } - + /** * 从其他api最后获取图像 ----------------------------------------------------- * t3.gstatic.com 国内可用 t3.gstatic.cn */ if ($this->data == NULL) { - $thrurl='http://t3.gstatic.cn/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&size=128&url='.$this->full_host; - $icon = file_get_contents($thrurl); - if($icon){ + $thrurl = 'http://t3.gstatic.cn/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&size=128&url=' . $this->full_host; + $icon = file_get_contents($thrurl); + if ($icon) { $this->_log_message("--https://t3.gstatic.com/{$this->full_host}/favicon.ico"); - $this->data = $icon; + $this->data = $icon; } } @@ -347,16 +348,15 @@ class Favicon */ $parsed_url = parse_url($url); - if (!isset($parsed_url['host']) || !$parsed_url['host']) { + if ($parsed_url === false || !isset($parsed_url['host']) || !$parsed_url['host']) { //在URL的前面加上http:// - // add the prefix if (!preg_match('/^https?:\/\/.*/', $url)) $url = 'http://' . $url; //解析URL并将结果保存到 $this->url $parsed_url = parse_url($url); - if ($parsed_url == FALSE) { - return FALSE; + if ($parsed_url === false) { + return false; } else { /** * 能成功解析的话就可以设置原始URL为这个添加过http://前缀的URL @@ -423,7 +423,7 @@ class Favicon //STEP5.2: 使用'/'分割URL字符串以获取目录的每一部分进行判断 $URI_full_dir = ltrim($URI_dir . '/' . $url, '/'); - $URL_arr = explode('/', $URI_full_dir); + $URL_arr = explode('/', $URI_full_dir); // 这里为了解决有些网站在根目录下的文件也使用 ../img/favicon.ico 这种形式的错误, // 对这种本来不合理的路径予以通过, 并忽略掉前面的两个点 (没错, 我说的是 gruntjs 的官网) @@ -439,7 +439,7 @@ class Favicon while (TRUE) { if (isset($dst_arr[$i - $j]) && $dst_arr[$i - $j] != FALSE) { $dst_arr[$i - $j] = FALSE; - $dst_arr[$i] = FALSE; + $dst_arr[$i] = FALSE; break; } else { $j++; @@ -472,6 +472,12 @@ class Favicon private function getFile($url, $isimg = false, $timeout = 2) { $ch = curl_init($url); + + //添加以下设置: + curl_setopt($ch, CURLOPT_TIMEOUT, 5);//设置总体超时5秒 + curl_setopt($ch, CURLOPT_NOSIGNAL, 1);//在多线程下使用超时选项 + curL_setopt($ch, CURLOPT_TCP_NODELAY, 1);//不延迟传输 + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); /* * 2019-06-20 @@ -484,29 +490,35 @@ class Favicon /** @var mixed 只获取500kb的数据,如果目标图片超过500kb,则丢弃 */ $request_headers = array('Range: bytes=0-512000'); //500 KB - curl_setopt( $ch, CURLOPT_FORBID_REUSE, true ); + curl_setopt($ch, CURLOPT_FORBID_REUSE, true); $request_headers[] = 'Connection: close'; - curl_setopt( $ch, CURLOPT_HTTPHEADER, $request_headers ); + curl_setopt($ch, CURLOPT_HTTPHEADER, $request_headers); curl_setopt($ch, CURLOPT_FAILONERROR, 1); //执行重定向获取 $ret = $this->curlExecFollow($ch, 2); - if($isimg){ - $mime=curl_getinfo($ch, CURLINFO_CONTENT_TYPE); - $mimeArray=explode('/',$mime); + if ($isimg) { + $img_info = @getimagesize($url); + if (empty($img_info)) { + $ret = ''; + $this->_log_message("不是图片:{$url}"); + } + $mime = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + $mimeArray = explode('/', $mime); } $arr = array( 'status' => 'FAIL', 'data' => '', 'real_url' => '' ); - if(!$isimg || $mimeArray[0] == 'image'){ + if (!$isimg || $mimeArray[0] == 'image') { if ($ret != false) { $status = curl_getinfo($ch, CURLINFO_HTTP_CODE); $arr = array( - 'status' => ($status >= 200 && $status <= 299) ? 'OK' : 'FAIL', + 'code' => $status, + 'status' => ($status >= 200 && $status <= 399) ? "OK" : "FAIL", 'data' => $ret, 'real_url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL) ); @@ -515,7 +527,7 @@ class Favicon curl_close($ch); return $arr; - }else{ + } else { $this->_log_message("不是图片:{$url}"); return $arr; } @@ -531,33 +543,34 @@ class Favicon * @param int $maxredirect 最大允许的重定向次数 * @return string */ - private function curlExecFollow( &$ch, $maxredirect = null) { - $mr = $maxredirect === null ? 5 : intval($maxredirect); - if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')) { - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0); - curl_setopt($ch, CURLOPT_MAXREDIRS, $mr); - } else { - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); - if ($mr > 0) { - $newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - - $rch = curl_copy_handle($ch); - curl_setopt($rch, CURLOPT_HEADER, true); - curl_setopt($rch, CURLOPT_NOBODY, true); + private function curlExecFollow(&$ch, $maxredirect = null) + { + $mr = $maxredirect === null ? 5 : intval($maxredirect); + if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')) { + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0); + curl_setopt($ch, CURLOPT_MAXREDIRS, $mr); + } else { + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); + if ($mr > 0) { + $newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + + $rch = curl_copy_handle($ch); + curl_setopt($rch, CURLOPT_HEADER, true); + curl_setopt($rch, CURLOPT_NOBODY, true); curl_setopt($rch, CURLOPT_NOSIGNAL, 1); - curl_setopt($rch, CURLOPT_CONNECTTIMEOUT_MS, 800); - curl_setopt($rch, CURLOPT_FORBID_REUSE, false); - curl_setopt($rch, CURLOPT_RETURNTRANSFER, true); - do { - curl_setopt($rch, CURLOPT_URL, $newurl); - $header = curl_exec($rch); - if (curl_errno($rch)) { - $code = 0; - } else { - $code = curl_getinfo($rch, CURLINFO_HTTP_CODE); - if ($code == 301 || $code == 302) { - preg_match('/Location:(.*?)\n/', $header, $matches); - $newurl = trim(array_pop($matches)); + curl_setopt($rch, CURLOPT_CONNECTTIMEOUT_MS, 800); + curl_setopt($rch, CURLOPT_FORBID_REUSE, false); + curl_setopt($rch, CURLOPT_RETURNTRANSFER, true); + do { + curl_setopt($rch, CURLOPT_URL, $newurl); + $header = curl_exec($rch); + if (curl_errno($rch)) { + $code = 0; + } else { + $code = curl_getinfo($rch, CURLINFO_HTTP_CODE); + if ($code == 301 || $code == 302) { + preg_match('/Location:(.*?)\n/', $header, $matches); + $newurl = trim(array_pop($matches)); /** * 这里由于部分网站返回的 Location 的值可能是相对网址, 所以还需要做一步 * 转换成完整地址的操作 @@ -565,25 +578,25 @@ class Favicon * @since v2.2.2 */ $newurl = $this->filterRelativeUrl($newurl, $this->params['origin_url']); - } else { - $code = 0; - } - } - } while ($code && --$mr); - curl_close($rch); - if (!$mr) { - if ($maxredirect === null) { - trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.', E_USER_WARNING); - } else { - $maxredirect = 0; - } - return false; - } - curl_setopt($ch, CURLOPT_URL, $newurl); - } - } - return curl_exec($ch); - } + } else { + $code = 0; + } + } + } while ($code && --$mr); + curl_close($rch); + if (!$mr) { + if ($maxredirect === null) { + trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.', E_USER_WARNING); + } else { + $maxredirect = 0; + } + return false; + } + curl_setopt($ch, CURLOPT_URL, $newurl); + } + } + return curl_exec($ch); + } /** * 在设定的映射条件中循环并尝试匹配每一条规则, @@ -611,7 +624,7 @@ class Favicon private function _log_message($message) { if ($this->debug_mode) { - error_log(date('d/m/Y H:i:s : ').$message.PHP_EOL,3, "./my-errors.log"); + error_log(date('d/m/Y H:i:s : ') . $message . PHP_EOL, 3, "./my-errors.log"); } } diff --git a/config.php b/config.php new file mode 100644 index 0000000..bc5ab02 --- /dev/null +++ b/config.php @@ -0,0 +1,6 @@ +formatUrl($url); -if($formatUrl){ - if($expire == 0){ +if ($formatUrl) { + if ($expire == 0) { $favicon->getFavicon($formatUrl, false); exit; } else { $defaultMD5 = md5(file_get_contents($defaultIco)); + $cache = new Cache($hash_key, $cache_dir); /** * 2023-02-20 * 增加刷新缓存参数:refresh=true 如:https://域名?url=www.iowen.cn&refresh=true */ - if( !isset($_GET['refresh']) || ( isset($_GET['refresh']) && $_GET['refresh']!='true' ) ){ - $data = Cache::get($formatUrl,$defaultMD5,$expire); + if (!isset($_GET['refresh']) || (isset($_GET['refresh']) && $_GET['refresh'] != 'true')) { + $data = $cache->get($formatUrl, $defaultMD5, $expire); if ($data !== NULL) { foreach ($favicon->getHeader() as $header) { @header($header); } + header('X-Cache-Type: IO'); echo $data; exit; } @@ -64,13 +68,9 @@ if($formatUrl){ /** * 缓存中没有指定的内容时, 重新获取内容并缓存起来 */ - $content = $favicon->getFavicon($formatUrl, TRUE); + $content = $favicon->getFavicon($formatUrl, true); - if( md5($content) == $defaultMD5 ){ - $expire = 43200; //如果返回默认图标,设置过期时间为12小时。Cache::get 方法中需同时修改 - } - - Cache::set($formatUrl, $content, $expire); + $cache->set($formatUrl, $content); foreach ($favicon->getHeader() as $header) { @header($header); @@ -79,7 +79,7 @@ if($formatUrl){ echo $content; exit; } -}else{ +} else { return http_response_code(404); } @@ -88,64 +88,70 @@ if($formatUrl){ */ class Cache { + public $dir = 'cache'; //图标缓存目录 + + public $hash_key = 'iowen'; // 哈希密钥 + + public function __construct($hash_key, $dir = 'cache') + { + $this->hash_key = $hash_key; + $this->dir = $dir; + } + /** * 获取缓存的值, 不存在时返回 null * - * @param $key - * @param $default 默认图片 - * @param $expire 过期时间 - * @return string + * @param string $key 缓存键(URL) + * @param string $default 默认图片 + * @param int $expire 过期时间 + * @return mixed */ - public static function get($key, $default, $expire) + public function get($key, $default, $expire) { - $dir = 'cache'; //图标缓存目录 - - //$f = md5( strtolower( $key ) ); - $f = parse_url($key)['host']; + $host = strtolower(parse_url($key)['host']); + $hash = substr(hash_hmac('sha256', $host, $this->hash_key), 8, 16); + $f = $host . '_' . $hash . '.txt'; + $path = $this->dir . '/' . $f; - $a = $dir . '/' . $f . '.txt'; - - if(is_file($a)){ - $data = file_get_contents($a); - if( md5($data) == $default ){ + if (is_file($path)) { + $data = file_get_contents($path); + if (md5($data) == $default) { $expire = 43200; //如果返回默认图标,过期时间为12小时。 } - if( (time() - filemtime($a)) > $expire ){ + if ((time() - filemtime($path)) > $expire) { return null; - } - else{ + } else { return $data; } - } - else{ + } else { return null; } } /** * 设置缓存 + * 保存图标到缓存目录 * - * @param $key - * @param $value - * @param $expire 过期时间 + * @param string $key 缓存键(URL) + * @param string $value 缓存值(图标) */ - public static function set($key, $value, $expire) + public function set($key, $value) { - $dir = 'cache'; //图标缓存目录 - - //$f = md5( strtolower( $key ) ); - $f = parse_url($key)['host']; - - $a = $dir . '/' . $f . '.txt'; - //如果缓存目录不存在则创建 - if (!is_dir($dir)) mkdir($dir,0777,true) or die('创建缓存目录失败!'); - - if ( !is_file($a) || (time() - filemtime($a)) > $expire ) { - $imgdata = fopen($a, "w") or die("Unable to open file!"); //w 重写 a追加 - fwrite($imgdata, $value); - fclose($imgdata); - clearstatcache(); + if (!is_dir($this->dir)) { + mkdir($this->dir, 0755, true) or die('创建缓存目录失败!'); } + + $host = strtolower(parse_url($key)['host']); + $hash = substr(hash_hmac('sha256', $host, $this->hash_key), 8, 16); + $f = $host . '_' . $hash . '.txt'; + $path = $this->dir . '/' . $f; + + $imgdata = fopen($path, "w") or die("Unable to open file!"); + if (flock($imgdata, LOCK_EX)) { // 获取排他锁 + fwrite($imgdata, $value); + flock($imgdata, LOCK_UN); // 释放锁 + } + fclose($imgdata); } }