diff --git a/.htaccess b/.htaccess index cbc7868..33d32f7 100644 --- a/.htaccess +++ b/.htaccess @@ -1,8 +1,12 @@ Options +FollowSymlinks -Multiviews - RewriteEngine On - + RewriteEngine on + RewriteCond %{REQUEST_FILENAME} !-d RewriteCond %{REQUEST_FILENAME} !-f - RewriteRule ^(.*)$ index.php/$1 [QSA,PT,L] - + RewriteRule ^app/(\w+)/(.*)$ app/$1/index.php [QSA,PT,L,E=PATH_INFO:$2] + + RewriteCond %{REQUEST_FILENAME} !-d + RewriteCond %{REQUEST_FILENAME} !-f + RewriteRule ^(.*)$ index.php [QSA,PT,L,E=PATH_INFO:$1] + \ No newline at end of file diff --git a/SkycaijiApp/admin/behavior/Init.php b/SkycaijiApp/admin/behavior/Init.php index cd0e9b2..43386a4 100644 --- a/SkycaijiApp/admin/behavior/Init.php +++ b/SkycaijiApp/admin/behavior/Init.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -19,7 +19,25 @@ class Init{ $curController=strtolower(request()->controller()); if('store'==$curController){ - header('Access-Control-Allow-Origin:http://www.skycaiji.com'); + $httpOrigin=strtolower($_SERVER['HTTP_ORIGIN']); + $httpOrigin=rtrim($httpOrigin,'/'); + + $allowOrigins=array('http://www.skycaiji.com','https://www.skycaiji.com'); + + $allowOrigin=''; + if(in_array($httpOrigin,$allowOrigins)){ + + $allowOrigin=$httpOrigin; + }else{ + + if(model('Provider')->where(array('domain'=>$httpOrigin,'enable'=>1))->count()>0){ + + $allowOrigin=$httpOrigin; + } + } + + + header('Access-Control-Allow-Origin:'.$allowOrigin); header('Access-Control-Allow-Credentials:true'); @@ -54,27 +72,39 @@ class Init{ $GLOBALS['user']['group']=model('Usergroup')->getById($GLOBALS['user']['groupid']); if(!empty($GLOBALS['user']['group'])){ $GLOBALS['user']['group']=$GLOBALS['user']['group']->toArray(); + if(model('Usergroup')->is_admin($GLOBALS['user']['group'])){ + session('is_admin',true); + }else{ + session('is_admin',null); + } } } } - /*用户未登录或者不是管理员用户*/ - if(empty($GLOBALS['user'])||(empty($GLOBALS['user']['group']['founder'])&&empty($GLOBALS['user']['group']['admin']))){ + if(!empty($GLOBALS['user'])&&session('is_admin')){ + /*是管理员,进行下列操作*/ + if('index'==$curController&&'index'==strtolower(request()->action())){ + + $url=null; + if(input('?_referer')){ + + $url=input('_referer','','trim'); + } + $url=empty($url)?url('Admin/Backstage/index',null,null,true):$url; + + $baseContr=new BaseController(); + $baseContr->success(lang('user_auto_login'),$url); + } + config('dispatch_error_tmpl','common:error_admin'); + config('dispatch_success_tmpl','common:success_admin'); + }else{ + if(!in_array($curController, array('index','api'))){ $baseContr=new BaseController(); $baseContr->dispatchJump(false,lang('user_error_is_not_admin'),url('Admin/Index/index',null,null,true)); exit(); } - }else{ - /*是管理员,进行下列操作*/ - if('index'==$curController&&'index'==strtolower(request()->action())){ - - $baseContr=new BaseController(); - $baseContr->success(lang('user_auto_login'),url('Admin/Backstage/index',null,null,true)); - } - config('dispatch_error_tmpl','common:error_admin'); - config('dispatch_success_tmpl','common:success_admin'); } /*通用操作,全局变量*/ $mconfig=model('Config'); diff --git a/SkycaijiApp/admin/command/Collect.php b/SkycaijiApp/admin/command/Collect.php index 2e23134..ebcb3a3 100644 --- a/SkycaijiApp/admin/command/Collect.php +++ b/SkycaijiApp/admin/command/Collect.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/common.php b/SkycaijiApp/admin/common.php index c2ab696..86d37ae 100644 --- a/SkycaijiApp/admin/common.php +++ b/SkycaijiApp/admin/common.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -149,4 +149,11 @@ function cli_command_exec($paramStr){ exit(); +} +function is_official_url($url){ + if(preg_match('/skycaiji\.com/i', $url)){ + return true; + }else{ + return false; + } } \ No newline at end of file diff --git a/SkycaijiApp/admin/config.php b/SkycaijiApp/admin/config.php index e6e5a14..d732a05 100644 --- a/SkycaijiApp/admin/config.php +++ b/SkycaijiApp/admin/config.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/Api.php b/SkycaijiApp/admin/controller/Api.php index fb20832..16d0236 100644 --- a/SkycaijiApp/admin/controller/Api.php +++ b/SkycaijiApp/admin/controller/Api.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/App.php b/SkycaijiApp/admin/controller/App.php new file mode 100644 index 0000000..7d4c74d --- /dev/null +++ b/SkycaijiApp/admin/controller/App.php @@ -0,0 +1,233 @@ +error('app标识错误'); + } + $mapp=model('App'); + $appData=$mapp->getByApp($app); + if(empty($appData)){ + $this->error('应用未安装'); + } + $appUrl=config('root_website').'/app/'.$app.'/'; + + $navPacks=array(); + if(is_array($appData['config']['packs'])){ + $manageUrl=url('App/manage?app='.$app); + $manageUrl.=strpos($manageUrl,'?')===false?'?':'&'; + foreach ($appData['config']['packs'] as $k=>$v){ + if($v['type']=='nav'){ + $v['nav_link']=str_replace(array('{app}','{apps}'), array(config('root_website').'/app/'.$app.'/',config('root_website').'/app/'),$v['nav_link']); + if(!preg_match('/^\w+\:\/\//', $v['nav_link'])){ + + $v['nav_link']=$appUrl.$v['nav_link']; + } + + if(isset($navid)&&$navid==$k){ + + $v['is_current']=true; + } + $navPacks[$k]=$v; + } + } + } + $provData=null; + $mprov=model('Provider'); + if($appData['provider_id']>0){ + $provData=$mprov->where('id',$appData['provider_id'])->find(); + } + + $appClass=$mapp->app_class($app); + if(is_object($appClass)){ + if(version_compare($appClass->config['version'], $appData['config']['version'],'>')===true){ + + $this->assign('newest_version',$appClass->config['version']); + } + + $appData['app_class']=$mapp->get_class_vars($appClass); + } + + + + + $this->assign('app',$app); + $this->assign('appUrl',$appUrl); + $this->assign('navid',$navid); + $this->assign('navPacks',$navPacks); + $this->assign('appData',$appData); + $this->assign('provData',$provData); + return $this->fetch(); + } + /*协议*/ + public function agreementAction(){ + $app=input('app'); + $appClass=model('App')->app_class($app); + $this->assign('app',$app); + $this->assign('name',$appClass->config['name']); + $this->assign('agreement',$appClass->config['agreement']); + + return $this->fetch('agreement'); + } + /*安装*/ + public function installAction(){ + $app=input('app'); + $success=input('success'); + if(empty($app)){ + $this->error('app标识错误'); + } + $mapp=model('App'); + + if(!$mapp->right_app($app)){ + $this->error('抱歉,app标识不规范!'); + } + if($mapp->where('app',$app)->count()>0){ + $this->success('该应用已安装!','Mystore/app'); + } + $appClass=$mapp->app_class($app); + if(!is_object($appClass)||empty($appClass->install)){ + $this->error('不存在安装接口!'); + } + if(!empty($appClass->config['phpv'])){ + + if(version_compare(PHP_VERSION, $appClass->config['phpv'],'<')){ + $this->error('抱歉,该应用要求PHP版本最低'.$appClass->config['phpv']); + } + } + + if($appClass->install!='1'){ + + if(!$success){ + + $apiUrl=config('root_url').'/app/'.$app.'/'.$appClass->install; + $this->assign('app',$app); + $this->assign('op','install'); + $this->assign('apiUrl',$apiUrl); + return $this->fetch('apiop'); + } + } + $newData=array( + 'app'=>$app, + 'addtime'=>time(), + 'uptime'=>time(), + 'provider_id'=>model('Provider')->getIdByUrl($appClass->config['website']) + ); + $mapp->isUpdate(false)->allowField(true)->save($newData); + if($mapp->id>0){ + $mapp->set_config($app,$appClass->config); + $this->success('恭喜!安装成功','Mystore/app'); + }else{ + $this->error('安装失败!'); + } + } + /*卸载应用*/ + public function uninstallAction(){ + $app=input('app'); + $success=input('success'); + if(empty($app)){ + $this->error('app标识错误'); + } + $mapp=model('App'); + + if($mapp->where('app',$app)->count()<=0){ + $this->success('该应用已卸载!','Mystore/app'); + } + $appClass=$mapp->app_class($app); + if(!is_object($appClass)){ + + $mapp->deleteByApp($app); + $this->success('卸载成功'); + } + if(empty($appClass->uninstall)){ + $this->error('不存在卸载接口!'); + } + + if($appClass->uninstall!='1'){ + + if(!$success){ + + $apiUrl=config('root_url').'/app/'.$app.'/'.$appClass->uninstall; + $this->assign('app',$app); + $this->assign('op','uninstall'); + $this->assign('apiUrl',$apiUrl); + return $this->fetch('apiop'); + } + } + + $mapp->deleteByApp($app); + + $this->success('卸载成功,您可以手动删除app/'.$app.'目录彻底清除应用'); + } + /*升级应用*/ + public function upgradeAction(){ + $app=input('app'); + $success=input('success'); + if(empty($app)){ + $this->error('app标识错误'); + } + $mapp=model('App'); + + $appData=$mapp->getByApp($app); + if(empty($appData)){ + $this->success('请先安装应用!','Mystore/app'); + } + $appClass=$mapp->app_class($app); + if(!is_object($appClass)||empty($appClass->upgrade)){ + $this->error('不存在升级接口!'); + } + $referer=\think\Request::instance()->server('HTTP_REFERER',null,null); + if(version_compare($appClass->config['version'], $appData['config']['version'],'=')===true){ + + $this->success('已升级!',$referer); + } + + if($appClass->upgrade!='1'){ + + if(!$success){ + + $apiUrl=config('root_url').'/app/'.$app.'/'.$appClass->upgrade; + $this->assign('app',$app); + $this->assign('op','upgrade'); + $this->assign('apiUrl',$apiUrl); + return $this->fetch('apiop'); + } + } + + $mapp->strict(false)->where('app',$app)->update(array( + 'uptime'=>time(), + 'provider_id'=>model('Provider')->getIdByUrl($appClass->config['website']) + )); + $mapp->set_config($app,$appClass->config); + + $this->success('恭喜!升级成功',$referer); + } + /*开启、关闭应用*/ + public function enableAction(){ + $app=input('app'); + $enable=input('enable/d',0); + if(empty($app)){ + $this->error('app标识错误'); + } + $mapp=model('App'); + + $enable=$enable?1:0; + + $mapp->set_config($app,array('enable'=>$enable)); + + $referer=\think\Request::instance()->server('HTTP_REFERER',null,null); + $this->success('应用已'.($enable?'开启':'关闭'),$referer); + } +} \ No newline at end of file diff --git a/SkycaijiApp/admin/controller/Backstage.php b/SkycaijiApp/admin/controller/Backstage.php index 1f95285..1d8ee55 100644 --- a/SkycaijiApp/admin/controller/Backstage.php +++ b/SkycaijiApp/admin/controller/Backstage.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -26,7 +26,7 @@ class Backstage extends BaseController{ $runInfo['task_other']=model('Task')->where('`auto`=0')->count(); /*服务器信息*/ - $serverInfo=array( + $serverData=array( 'os'=>php_uname('s').' '.php_uname('r'), 'php'=>PHP_VERSION, 'db'=>config('database.type'), @@ -35,9 +35,9 @@ class Backstage extends BaseController{ 'upload_max'=>ini_get('upload_max_filesize') ); - if(stripos($serverInfo['db'],'mysql')!==false){ + if(stripos($serverData['db'],'mysql')!==false){ $dbVersion=db()->query('SELECT VERSION() as v;'); - $serverInfo['db'].=' '.($dbVersion[0]?$dbVersion[0]['v']:''); + $serverData['db'].=' '.($dbVersion[0]?$dbVersion[0]['v']:''); } $runInfo['auto_status']='良好'; @@ -48,28 +48,28 @@ class Backstage extends BaseController{ $taskAutoCount=model('Task')->where('auto',1)->count(); if($taskAutoCount<=0){ - $serverInfo['caiji']='未设置自动采集任务'; + $serverData['caiji']='未设置自动采集任务'; $runInfo['auto_status']='无任务'; }else{ if($lastTime>0){ $runInfo['auto_status']='运行良好'; - $serverInfo['caiji']='最近采集:'.date('Y-m-d H:i:s',$lastTime).'  '; + $serverData['caiji']='最近采集:'.date('Y-m-d H:i:s',$lastTime).'  '; if($GLOBALS['config']['caiji']['run']=='backstage'){ if(NOW_TIME-$lastTime>60*($GLOBALS['config']['caiji']['interval']+15)){ - $serverInfo['caiji'].='

自动采集似乎停止了,请自动采集似乎停止了,请重新保存设置以便激活采集

'; $runInfo['auto_status']='停止运行'; } } } - $serverInfo['caiji'].='实时采集'; + $serverData['caiji'].='实时采集'; } }else{ $runInfo['auto_status']='已停止'; - $serverInfo['caiji']='未开启自动采集'; + $serverData['caiji']='未开启自动采集'; } $upgradeDb=false; @@ -93,7 +93,7 @@ class Backstage extends BaseController{ $GLOBALS['breadcrumb']=breadcrumb(array('首页')); $this->assign('runInfo',$runInfo); - $this->assign('serverInfo',$serverInfo); + $this->assign('serverData',$serverData); $this->assign('upgradeDb',$upgradeDb); return $this->fetch('backstage/index'); @@ -106,7 +106,7 @@ class Backstage extends BaseController{ /*获取推送消息*/ public function adminIndexAction(){ $callback=input('?'.config('var_jsonp_handler'))?input(config('var_jsonp_handler')):config('default_jsonp_handler'); - $html=get_html('http://www.skycaiji.com/Store/Client/adminIndex?v='.SKYCAIJI_VERSION.'&'.config('var_jsonp_handler').'='.rawurlencode($callback),null,null,'utf-8'); + $html=get_html('https://www.skycaiji.com/store/client/adminIndex?v='.SKYCAIJI_VERSION.'&'.config('var_jsonp_handler').'='.rawurlencode($callback),null,null,'utf-8'); header('Content-Type:application/json;charset=utf-8'); exit($html); } diff --git a/SkycaijiApp/admin/controller/BaseController.php b/SkycaijiApp/admin/controller/BaseController.php index de8f399..03585ff 100644 --- a/SkycaijiApp/admin/controller/BaseController.php +++ b/SkycaijiApp/admin/controller/BaseController.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/Collected.php b/SkycaijiApp/admin/controller/Collected.php index a2b8eb4..2638594 100644 --- a/SkycaijiApp/admin/controller/Collected.php +++ b/SkycaijiApp/admin/controller/Collected.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/Collector.php b/SkycaijiApp/admin/controller/Collector.php index b6f4ddb..2bf6067 100644 --- a/SkycaijiApp/admin/controller/Collector.php +++ b/SkycaijiApp/admin/controller/Collector.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/Cpattern.php b/SkycaijiApp/admin/controller/Cpattern.php index c00ef8e..ce70722 100644 --- a/SkycaijiApp/admin/controller/Cpattern.php +++ b/SkycaijiApp/admin/controller/Cpattern.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -188,7 +188,7 @@ class Cpattern extends BaseController { 'rule' =>array('rule','rule_multi','rule_multi_type','rule_multi_str','rule_merge'), 'auto' =>'auto', 'xpath' =>array('xpath','xpath_multi','xpath_multi_type','xpath_multi_str','xpath_attr','xpath_attr_custom'), - 'json' =>array('json','json_arr','json_arr_implode'), + 'json' =>array('json','json_arr','json_arr_implode','json_loop'), 'page' =>array('page','page_rule','page_rule_merge','page_rule_multi','page_rule_multi_str'), 'words' =>'words', 'num' => array('num_start','num_end'), @@ -237,6 +237,7 @@ class Cpattern extends BaseController { }elseif($op=='sub'){ $process=input('process/a',null,'trim'); + $process=array_array_map('trim', $process); if(empty($process)){ $process=''; }else{ @@ -246,7 +247,6 @@ class Cpattern extends BaseController { } } } - $objid=input('objid',''); $this->success('',null,array('process'=>$process,'process_json'=>empty($process)?'':json_encode($process),'objid'=>$objid)); } @@ -257,6 +257,7 @@ class Cpattern extends BaseController { }elseif($op=='load'){ $process=input('process/a',null,'trim'); + $process=array_array_map('trim', $process); $this->assign('process',$process); return $this->fetch('process_load'); } @@ -382,7 +383,7 @@ class Cpattern extends BaseController { $curLevel=input('level/d',0); $curLevel=$curLevel>0?$curLevel:0; - $levelData=$eCpattern->get_level_urls($source_url,$curLevel); + $levelData=$eCpattern->collLevelUrls($source_url,$curLevel); $eCpattern->success('',null,array('urls'=>$levelData['urls'],'levelName'=>$levelData['levelName'],'nextLevel'=>$levelData['nextLevel'])); }elseif('cont_url'==$op){ @@ -445,19 +446,36 @@ class Cpattern extends BaseController { } $val_list=$eCpattern->getFields($cont_url); - if(empty($eCpattern->first_loop_field)){ $val_list=array($val_list); } + $md5Url=md5($cont_url); + $msg=''; + if(isset($eCpattern->exclude_cont_urls[$md5Url])){ + if(empty($eCpattern->first_loop_field)){ + + $msg=reset($eCpattern->exclude_cont_urls[$md5Url]); + $msg=$eCpattern->exclude_url_msg($msg); + $this->error('中断采集 > '.$msg); + }else{ + + $num=0; + foreach ($eCpattern->exclude_cont_urls[$md5Url] as $k=>$v){ + $num+=count($v); + } + $msg='通过数据处理排除了'.$num.'条数据'; + } + } + foreach ($val_list as $v_k=>$vals){ foreach ($vals as $k=>$v){ $vals[$k]=$v['value']; } $val_list[$v_k]=$vals; } - $eCpattern->success('',null,$val_list); + $eCpattern->success($msg,null,$val_list); }elseif('get_paging_urls'==$op){ $paging_urls=$eCpattern->getPagingUrls($cont_url,$html,true); diff --git a/SkycaijiApp/admin/controller/Develop.php b/SkycaijiApp/admin/controller/Develop.php index 71add21..385f100 100644 --- a/SkycaijiApp/admin/controller/Develop.php +++ b/SkycaijiApp/admin/controller/Develop.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -23,7 +23,18 @@ class Develop extends BaseController { 'select_func' => '选择函数返回值(select)' ); + public static $packTypes=array( + 'nav'=>'后台导航' + ); + public static $frameworks=array( + 'thinkphp'=>array('6.0','5.1','5.0'), + 'laravel'=>array('5.5','5.1'), + ); + + public function pluginAction(){ + $this->redirect('develop/releaseCms'); + } public function releaseCmsAction(){ $mapp=model('ReleaseApp'); if(request()->isPost()){ @@ -84,6 +95,7 @@ class Develop extends BaseController { $appName=input('app'); $appName=ucfirst($appName); $config=array(); + if($appName){ $cmsData=$mapp->where(array('module'=>'cms','app'=>$appName))->find(); @@ -150,7 +162,7 @@ class Develop extends BaseController { } } - $GLOBALS['content_header']='开发CMS发布插件 '; + $GLOBALS['content_header']='开发CMS发布插件 '; $GLOBALS['breadcrumb']=breadcrumb(array('开发工具','开发CMS发布插件')); $this->assign('config',$config); $this->assign('is_old_plugin',$is_old_plugin); @@ -208,6 +220,7 @@ class Develop extends BaseController { $mapp=model('ReleaseApp'); $cmsData=$mapp->where(array('module'=>'cms','app'=>$appData['app']))->find(); + if(!$is_edit&&!empty($cmsData)){ $this->error('抱歉,已存在'.$appData['app'].'插件'); @@ -369,4 +382,439 @@ EOF; $this->error('代码错误'); } } + + + /*开发应用*/ + public function appAction(){ + $app=input('app'); + $app=strtolower($app); + $mapp=model('App'); + $appData=null; + if($app){ + $appData=$mapp->getByApp($app); + } + if(request()->isPost()){ + + $is_edit=input('edit'); + if($is_edit&&empty($appData)){ + $this->error('修改失败,该应用不存在!'); + } + $framework=input('framework'); + $frameworkVersion=input('framework_version/a'); + $frameworkVersion=$frameworkVersion[$framework]; + + $config=array( + 'name'=>input('name'), + 'version'=>input('version'), + 'desc'=>input('desc','','trim'), + 'author'=>input('author'), + 'website'=>input('website','','trim'), + 'phpv'=>input('phpv'), + 'agreement'=>input('agreement','','trim') + ); + + $install=input('install','','trim'); + $uninstall=input('uninstall','','trim'); + $upgrade=input('upgrade','','trim'); + + if(!empty($framework)&&empty($frameworkVersion)){ + $this->error('请选择框架版本'); + } + + $packs=input('packs/a'); + if(empty($config['name'])){ + $this->error('请输入应用名称'); + } + if(!$mapp->right_name($config['name'])){ + $this->error('应用名称只能由汉字、字母、数字和下划线组成'); + } + if(!$is_edit){ + + if(!$mapp->right_app($app)){ + $this->error('app标识不规范'); + } + if($mapp->where('app',$app)->count()>0){ + + $this->error('抱歉,已存在'.$app.'应用'); + } + } + if(!$mapp->right_version($config['version'])){ + $this->error('版本号格式错误'); + } + + if(is_array($packs)){ + foreach ($packs as $k=>$v){ + + $v=json_decode(url_b64decode($v),true); + $packs[$k]=array( + 'name'=>$v['name'], + 'type'=>$v['type'], + 'nav_link'=>$v['nav_link'], + 'target'=>$v['target'], + ); + } + }else{ + $packs=array(); + } + + $config['framework']=$framework; + $config['framework_version']=$frameworkVersion; + $config['packs']=$packs; + + $config=$mapp->clear_config($config); + + $provId=model('Provider')->getIdByUrl($config['website']); + + $tplAppPhp=file_get_contents(config('app_path').'/public/app/skycaiji_php.tpl'); + + $tplParams=$config; + $tplParams['app']=$app; + $tplParams['install']=$install; + $tplParams['uninstall']=$uninstall; + $tplParams['upgrade']=$upgrade; + foreach ($tplParams as $k=>$v){ + if(is_array($v)){ + + $v=$this->_format_array($v,"\t"); + } + $tplAppPhp=str_replace('{$'.$k.'}',$v, $tplAppPhp); + } + unset($tplParams); + + $tplAppPhp=preg_replace('/\{\$[^\{\}]+\}/', '', $tplAppPhp); + if(!$is_edit){ + + + $createFiles = array ( + 'index.php'=>file_get_contents(config('app_path').'/public/app/index_php.tpl'), + $app.'.php'=>$tplAppPhp, + ); + + foreach ($createFiles as $filename=>$filecode){ + write_dir_file(config('apps_path')."/{$app}/{$filename}",$filecode); + } + + $mapp->isUpdate(false)->allowField(true)->save(array( + 'app'=>$app, + 'addtime'=>time(), + 'uptime'=>time(), + 'provider_id'=>$provId + )); + if($mapp->id>0){ + $mapp->set_config($app,$config); + $this->success('应用创建成功','Develop/app?app='.$app); + }else{ + $this->success('应用创建失败'); + } + }else{ + + $appFilename=$mapp->app_class_file($app); + + $codeAppPhp=file_get_contents($appFilename); + + if(!empty($codeAppPhp)){ + + $appClass=$mapp->app_class($app); + $appConfig=array(); + if(is_object($appClass)){ + $appConfig=is_array($appClass->config)?$appClass->config:array(); + } + $appConfig=array_merge($appConfig,$config); + + $replaceVars=array('config'=>$appConfig,'install'=>$install,'uninstall'=>$uninstall,'upgrade'=>$upgrade); + $replaceVars=array_reverse($replaceVars); + foreach ($replaceVars as $reVar=>$reCont){ + + $matchVar='/[a-z]+\s*\$'.$reVar.'\s*=(?:([^\'\"\r\n]+?;)|([\s\S]+?[\]\)\'\"]\s*;))/i'; + + if(!preg_match($matchVar,$codeAppPhp)){ + + $codeAppPhp=preg_replace('/class\s*\w+\s*extends\s*skycaiji\s*\{/i', "$0\r\n\tpublic \$".$reVar."='';", $codeAppPhp); + } + if(is_array($reCont)){ + + $reCont=$this->_format_array($reCont); + $codeAppPhp=preg_replace($matchVar, 'public $'.$reVar.'='.$reCont.';', $codeAppPhp); + }else{ + + preg_match_all($matchVar,$codeAppPhp,$asd); + + $codeAppPhp=preg_replace($matchVar, 'public $'.$reVar."='".addslashes($reCont)."';", $codeAppPhp); + } + } + + write_dir_file($appFilename, $codeAppPhp); + }else{ + + write_dir_file($appFilename, $tplAppPhp); + } + + $mapp->strict(false)->where('id',$appData['id'])->update(array( + 'uptime'=>time(), + 'provider_id'=>$provId + )); + if(version_compare($config['version'],$appData['config']['version'],'<=')===true){ + + $mapp->set_config($app,$config); + } + + $this->success('修改成功','Develop/app?app='.$app); + } + }else{ + $GLOBALS['content_header']='开发应用程序 '; + + if($appData){ + $GLOBALS['breadcrumb']=breadcrumb(array(array('url'=>url('App/manage?app='.$appData['app']),'title'=>$appData['config']['name']),'开发应用')); + }else{ + $GLOBALS['breadcrumb']=breadcrumb(array('开发工具','应用程序')); + } + + $appClass=$mapp->app_class($app); + + if(is_object($appClass)){ + if(version_compare($appClass->config['version'], $appData['config']['version'],'>')===true){ + + $this->assign('newest_version',$appClass->config['version']); + } + + $appFrameworkPath=$appClass->appFrameworkPath(); + if(is_dir($appFrameworkPath)){ + + $this->assign('appFrameworkPath',$appFrameworkPath); + } + + $appData['app_class']=$mapp->get_class_vars($appClass); + } + + $this->assign('appData',$appData); + $this->assign('frameworks',self::$frameworks); + $this->assign('packTypes',self::$packTypes); + return $this->fetch(); + } + } + + /*添加扩展*/ + public function appAddPackAction(){ + if(request()->isPost()){ + $pack=input('pack/a','','trim'); + $pack['name']=strip_tags($pack['name']); + $pack['type']=strip_tags($pack['type']); + $pack['target']=intval($pack['target']); + $pack=array_array_map('trim', $pack); + if(empty($pack['name'])){ + $this->error('请输入名称'); + } + if(!model('App')->right_name($pack['name'])){ + $this->error('名称只能由汉字、字母、数字和下划线组成'); + } + + if(empty($pack['type'])){ + $this->error('请选择类型'); + } + if(empty($pack['nav_link'])){ + $this->error('请输入链接'); + } + $pack['pack_json']=json_encode($pack); + $pack['type_name']=self::$packTypes[$pack['type']]; + $this->success('',null,$pack); + }else{ + $objid=input('objid'); + $pack=input('pack','','url_b64decode'); + $pack=$pack?json_decode($pack,true):''; + + $this->assign('objid',$objid); + $this->assign('pack',$pack); + $this->assign('packTypes',self::$packTypes); + return $this->fetch('appAddPack'); + } + } + /*下载安装框架*/ + public function installFrameworkAction(){ + $app=input('app'); + $op=input('op'); + + if(empty($app)){ + $this->error('应用app标识错误'); + } + $mapp=model('App'); + $appClass=$mapp->app_class($app); + if(!is_object($appClass)){ + $this->error('应用配置错误'); + } + + if(empty($appClass->config['framework'])){ + $this->error('框架不能为空'); + } + + if(empty($appClass->config['framework_version'])){ + $this->error('框架版本错误'); + } + + $appFrameworkPath=$appClass->appFrameworkPath(); + if(is_dir($appFrameworkPath)){ + $this->error('该应用已有框架,如需重新设置框架,请先删除:'.$appFrameworkPath); + } + + $eachSize=1024*100; + $fileUrl='https://www.skycaiji.com/download/framework/'.$appClass->config['framework'].'/'.$appClass->config['framework_version'].'.zip'; + + $filePath=RUNTIME_PATH.'/cache_framework/'.$appClass->config['framework'].$appClass->config['framework_version'].'/'; + + if('files'==$op){ + + $fileHeader=get_headers($fileUrl,true); + if(!preg_match('/\s+20\d\s+ok/i',$fileHeader[0])){ + $this->error('文件获取失败'); + } + $fileSize=$fileHeader['Content-Length']; + $list=array(); + $count=ceil($fileSize/$eachSize); + for($i=0;$i<$count;$i++){ + + $list[$i]=array('id'=>$i+1,'start'=>$i*$eachSize,'end'=>($i+1)*$eachSize); + if($list[$i]['end']>=$fileSize){ + $list[$i]['end']=$fileSize; + } + $list[$i]['end']-=1; + } + $this->success(true,'',array('size'=>$fileSize,'list'=>$list)); + }elseif('down'==$op){ + $fileSize=input('size/d'); + $startSize=input('start_size/d'); + $endSize=input('end_size/d'); + $id=input('id/d'); + + $fileCont=file_get_contents($filePath.$id); + + if(!empty($fileCont)){ + + $this->success(); + }else{ + $blockData=$this->_down_file($fileUrl,null,"{$startSize}-{$endSize}"); + if(empty($blockData)){ + + $this->error(); + }else{ + write_dir_file($filePath.$id, $blockData); + $this->success(); + } + } + }elseif('install'==$op){ + $fileSize=input('size/d'); + $count=ceil($fileSize/$eachSize); + $is_end=true; + for($i=1;$i<=$count;$i++){ + + if(!file_exists($filePath.$i)){ + $is_end=false; + break; + } + } + if($is_end){ + + $error=''; + $allData=''; + for($i=1;$i<=$count;$i++){ + $allData.=file_get_contents($filePath.$i); + } + write_dir_file($filePath.'framework.zip', $allData); + + try { + $zipClass=new \ZipArchive(); + if($zipClass->open($filePath.'framework.zip')===TRUE){ + $zipClass->extractTo($appClass->appPath); + $zipClass->close(); + }else{ + $error='解压失败'; + } + }catch(\Exception $ex){ + $error='您的服务器不支持ZipArchive解压'; + } + if(!empty($error)){ + $error.=',请自行将文件'.$filePath.'framework.zip 解压到'.$appClass->appPath.'里'; + } + if($error){ + $this->error($error); + }else{ + clear_dir($filePath); + $this->success('安装成功','Develop/app?app='.$app); + } + } + $this->error(); + } + } + + public function _format_array($arr,$headStr=''){ + if(is_array($arr)){ + $arr=var_export($arr,true); + } + $arr=preg_replace_callback('/^\s*/m', function($matches) use ($headStr){ + + $returnStr="\t"; + for($i=0;$i<(strlen($matches[0])/2);$i++){ + $returnStr.="\t"; + } + return $headStr.$returnStr; + }, $arr); + $arr=preg_replace('/\s+array\s*\(/i', 'array(', $arr); + return $arr; + } + + public function _copy_files($fromPath,$toPath){ + if(empty($fromPath)||empty($toPath)){ + return false; + } + if(is_dir($fromPath)){ + + $fileList=scandir($fromPath); + foreach( $fileList as $file ){ + if('.'== $file || '..' == $file){ + continue; + } + $fileName=$fromPath.'/'.$file; + if(!file_exists($fileName)){ + continue; + } + $toFile=$toPath.'/'.$file; + if(is_dir( $fileName )){ + mkdir($toFile,0777,true); + $this->_copy_files($fileName, $toFile); + }elseif(is_file($fileName)){ + write_dir_file($toFile,file_get_contents($fileName)); + } + } + } + } + /*获取内容*/ + public function _down_file($url, $header=null,$size) { + $useragents=array( + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", + ); + static $useragent; + if(empty($useragent)){ + $useragent=$useragents[array_rand($useragents)]; + } + + $ch = curl_init (); + curl_setopt ( $ch, CURLOPT_URL, $url ); + curl_setopt ( $ch, CURLOPT_TIMEOUT, 100 ); + curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 ); + curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION, 1 ); + curl_setopt ( $ch, CURLOPT_HEADER, 0 ); + curl_setopt ( $ch, CURLOPT_USERAGENT, $useragent); + + + curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER, FALSE ); + curl_setopt ( $ch, CURLOPT_SSL_VERIFYHOST, FALSE ); + + if (! empty ( $header )) { + curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header ); + } + + curl_setopt($ch, CURLOPT_RANGE, $size); + + $bytes = curl_exec ( $ch ); + curl_close ( $ch ); + return $bytes; + } } \ No newline at end of file diff --git a/SkycaijiApp/admin/controller/Index.php b/SkycaijiApp/admin/controller/Index.php index 742502a..eeabeca 100644 --- a/SkycaijiApp/admin/controller/Index.php +++ b/SkycaijiApp/admin/controller/Index.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -34,13 +34,25 @@ class Index extends BaseController{ cache('caiji_auto_backstage_runtime',$runtime); }else{ - $cahce_runtime=cache('caiji_auto_backstage_runtime'); - $cahce_runtime=intval($cahce_runtime); - if($runtime<$cahce_runtime){ + $cache_runtime=cache('caiji_auto_backstage_runtime'); + $cache_runtime=intval($cache_runtime); + if($runtime<$cache_runtime){ $this->error('终止旧进程'); } } + + $curlCname='caiji_auto_curltime_'.$runtime; + if(input('?curltime')){ + + $cacheCurl=cache($curlCname); + if(!empty($cacheCurl)&&$cacheCurl>input('curltime')){ + + $this->error('终止过期进程'); + } + cache($curlCname,input('curltime')); + } + ignore_user_abort(true); set_time_limit(0); @@ -52,19 +64,55 @@ class Index extends BaseController{ $this->error('自动采集已停止'); } - $collectTime1=time(); try{ - @get_html(url('Admin/Api/collect?backstage=1',null,false,true),null,array('timeout'=>3)); + + $ch = curl_init (); + curl_setopt ( $ch, CURLOPT_URL, url('Admin/Api/collect?backstage=1',null,false,true) ); + curl_setopt ( $ch, CURLOPT_TIMEOUT, 3 ); + curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 ); + curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION, 1 ); + curl_setopt ( $ch, CURLOPT_HEADER, 1 ); + curl_setopt ( $ch, CURLOPT_NOBODY, 1 ); + curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER, FALSE ); + curl_setopt ( $ch, CURLOPT_SSL_VERIFYHOST, FALSE ); + curl_exec ( $ch ); + curl_close ( $ch ); }catch(\Exception $ex){ } - sleep(20); + sleep(15); if($GLOBALS['config']['caiji']['auto']){ try{ - @get_html(url('Admin/Index/backstage?autorun=1&runtime='.$runtime,null,false,true),null,array('timeout'=>3)); + + do { + + $curltime=time(); + + $ch = curl_init (); + curl_setopt ( $ch, CURLOPT_URL, url('Admin/Index/backstage?autorun=1&runtime='.$runtime.'&curltime='.$curltime,null,false,true) ); + curl_setopt ( $ch, CURLOPT_TIMEOUT, 2 ); + curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 ); + curl_setopt ( $ch, CURLOPT_FOLLOWLOCATION, 1 ); + curl_setopt ( $ch, CURLOPT_HEADER, 1 ); + curl_setopt ( $ch, CURLOPT_NOBODY, 1 ); + curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER, FALSE ); + curl_setopt ( $ch, CURLOPT_SSL_VERIFYHOST, FALSE ); + curl_exec ( $ch ); + curl_close ( $ch ); + + sleep(1); + + $cacheCurl=cache($curlCname); + + $continue=false; + if(empty($cacheCurl)||$cacheCurl<$curltime){ + + $continue=true; + } + }while($continue); }catch(\Exception $ex){ } @@ -128,7 +176,7 @@ class Index extends BaseController{ $muser=new \skycaiji\admin\model\User(); $userData=$muser->where('username',$username)->find(); - if(empty($userData)||$userData['password']!=pwd_encrypt($pwd)){ + if(empty($userData)||$userData['password']!=\skycaiji\admin\model\User::pwd_encrypt($pwd,$userData['salt'])){ if(!empty($config_login['limit'])){ @@ -160,13 +208,28 @@ class Index extends BaseController{ cookie('login_history',$username.'|'.md5($username.$userData['password']),array('expire'=>3600*24*15)); } session('user_id',$userData['uid']); + + $userGroup=model('Usergroup')->getById($userData['groupid']); - $serverinfo=input('serverinfo'); + if(model('Usergroup')->is_admin($userGroup)){ + session('is_admin',true); + }else{ + session('is_admin',null); + } + + $serverinfo=input('_serverinfo'); if(empty($serverinfo)){ - $this->success(lang('user_login_in'),'Admin/Backstage/index'); + $url=null; + if(input('?_referer')){ + + $url=input('_referer','','trim'); + } + $url=empty($url)?'Admin/Backstage/index':$url; + + $this->success(lang('user_login_in'),$url); }else{ - $this->success(lang('user_login_in'),null,array('js'=>'window.parent.postMessage("login_success","http://www.skycaiji.com");')); + $this->success(lang('user_login_in'),null,array('js'=>'window.parent.postMessage("login_success","*");')); } }else{ $this->error(lang('user_error_sublogin')); @@ -180,6 +243,7 @@ class Index extends BaseController{ \think\Cookie::delete('login_history'); unset($GLOBALS['user']); session('user_id',null); + session('is_admin',null); $this->success(lang('op_success'),'Admin/Index/index'); } /*验证码*/ @@ -291,7 +355,10 @@ class Index extends BaseController{ $this->error($check['msg']); } - $muser->strict(false)->where(array('username'=>$stepSession['user']['username']))->update(array('password'=>pwd_encrypt($pwd))); + $salt=\skycaiji\admin\model\User::rand_salt(); + $pwd=\skycaiji\admin\model\User::pwd_encrypt($pwd,$salt); + + $muser->strict(false)->where(array('username'=>$stepSession['user']['username']))->update(array('password'=>$pwd,'salt'=>$salt)); session($stepSname,null); $this->success(lang('find_pwd_success'),'Admin/Index/index'); }else{ @@ -330,6 +397,11 @@ class Index extends BaseController{ } } + $newPwd='skycaiji123'; + $newPwdEncrypt=\skycaiji\admin\model\User::pwd_encrypt($newPwd,$stepSession['user']['salt']); + + $this->assign('newPwd',$newPwd); + $this->assign('newPwdEncrypt',$newPwdEncrypt); $this->assign('emailStatus',$emailStatus); } diff --git a/SkycaijiApp/admin/controller/Mystore.php b/SkycaijiApp/admin/controller/Mystore.php index 77067e0..8bedc7a 100644 --- a/SkycaijiApp/admin/controller/Mystore.php +++ b/SkycaijiApp/admin/controller/Mystore.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -22,11 +22,11 @@ class Mystore extends BaseController { - $this->redirect('Mystore/collect'); + $this->redirect('Mystore/rule'); } - public function collectAction(){ + public function ruleAction(){ $mrule=model('Rule'); - $type='collect'; + $type=input('type','collect'); $module=input('module'); $page=max(1,input('p/d',0)); $cond=array('type'=>$type); @@ -50,6 +50,21 @@ class Mystore extends BaseController { $pagenav = $ruleList->render(); $this->assign('pagenav',$pagenav); $ruleList=$ruleList->all(); + if(!empty($ruleList)){ + $provList=array(); + foreach ($ruleList as $k=>$v){ + $provList[$v['provider_id']]=$v['provider_id']; + } + $provList=model('Provider')->where('id','in',$provList)->column('*','id'); + foreach ($ruleList as $k=>$v){ + $url='https://www.skycaiji.com'; + if(!empty($v['provider_id'])&&!empty($provList[$v['provider_id']])){ + + $url=$provList[$v['provider_id']]['url']; + } + $ruleList[$k]['store_url']=$url.'/client/rule/detail?id='.$v['store_id']; + } + } $GLOBALS['content_header']='已下载'; $GLOBALS['breadcrumb']=breadcrumb(array(array('url'=>url('Mystore/index'),'title'=>'已下载'),lang('rule_'.$type))); @@ -66,8 +81,8 @@ class Mystore extends BaseController { $id=input('id/d',0); $op=input('op'); - $ops=array('item'=>array('delete'),'list'=>array('deleteall','check_store_update')); - if(!in_array($op,$ops['item'])&&!in_array($op,$ops['list'])){ + $ops=array('item'=>array('delete'),'list'=>array('deleteall','check_store_update'),'else'=>array('auto_check')); + if(!in_array($op,$ops['item'])&&!in_array($op,$ops['list'])&&!in_array($op,$ops['else'])){ $this->error(lang('invalid_op')); } @@ -82,33 +97,76 @@ class Mystore extends BaseController { if(is_array($ids)&&count($ids)>0){ $mrule->where(array('id'=>array('in',$ids)))->delete(); } - $this->success(lang('op_success'),'Mystore/collect'); + $this->success(lang('op_success'),'Mystore/rule'); + }elseif($op=='auto_check'){ + + $auto=input('auto/d',0); + model('Config')->setConfig('store_auto_check_rule',$auto); + if($auto){ + $this->success('设置为自动检测更新'); + }else{ + $this->error('设置为手动检测更新'); + } }elseif($op=='check_store_update'){ $ids=input('ids/a'); if(!empty($ids)){ - $ruleList=model('Rule')->where(array('id'=>array('in',$ids)))->column('*','store_id'); + $ruleList=model('Rule')->where(array('id'=>array('in',$ids)))->select(); + $ruleList1=array(); + foreach ($ruleList as $k=>$v){ + $ruleList1[$v['store_id'].'_'.$v['provider_id']]=$v; + } + $ruleList=$ruleList1; + unset($ruleList1); }else{ $ruleList=array(); } $uptimeList=array(); + $updateList=array(); if(!empty($ruleList)){ - $storeIds=implode(',', array_keys($ruleList)); - $uptimeList=get_html('http://www.skycaiji.com/Store/Client/collectUpdate?ids='.rawurlencode($storeIds)); - $uptimeList=json_decode($uptimeList,true); - } - - if(!empty($uptimeList)){ - $updateList=array(); + $provList=array(); + $provStoreIds=array(); + foreach ($ruleList as $v){ + $provList[$v['provider_id']]=$v['provider_id']; + $provStoreIds[$v['provider_id']][$v['store_id']]=$v['store_id']; + } + if(!empty($provList)){ + $provList=model('Provider')->where('id','in',$provList)->column('*','id'); + }else{ + $provList=array(); + } - foreach ($uptimeList as $storeId=>$storeUptime){ - if($storeUptime>0&&$storeUptime>$ruleList[$storeId]['uptime']){ + foreach ($provStoreIds as $provId=>$storeIds){ + $url=''; + $storeIds=implode(',',$storeIds); + $storeIds=rawurlencode($storeIds); + if(empty($provId)){ - $updateList[]=$ruleList[$storeId]['id']; + $url='https://www.skycaiji.com'; + }elseif(!empty($provList[$provId])){ + + $url=$provList[$provId]['url']; + } + $url.='/client/rule/update?ids='.$storeIds; + + $uptimeList=get_html($url,null,array('timeout'=>2)); + $uptimeList=json_decode($uptimeList,true); + + if(!empty($uptimeList)){ + + foreach ($uptimeList as $storeId=>$storeUptime){ + if($storeUptime>0&&$storeUptime>$ruleList[$storeId.'_'.$provId]['uptime']){ + + $updateList[]=$ruleList[$storeId.'_'.$provId]['id']; + } + } } } + } + + if(!empty($updateList)){ $this->success('',null,$updateList); }else{ $this->error(); @@ -135,6 +193,26 @@ class Mystore extends BaseController { $this->assign('pagenav',$pagenav); $appList=$appList->all(); + if(!empty($appList)){ + $provList=array(); + foreach ($appList as $k=>$v){ + if(!empty($v['provider_id'])){ + + $provList[$v['provider_id']]=$v['provider_id']; + } + } + $provList=model('Provider')->where('id','in',$provList)->column('*','id'); + + foreach ($appList as $k=>$v){ + $url='https://www.skycaiji.com'; + if(!empty($v['provider_id'])&&!empty($provList[$v['provider_id']])){ + + $url=$provList[$v['provider_id']]['url']; + } + $appList[$k]['store_url']=$url.'/client/plugin/detail?app='.$v['app']; + } + } + $GLOBALS['content_header']='已下载'; $GLOBALS['breadcrumb']=breadcrumb(array(array('url'=>url('Mystore/index'),'title'=>'已下载'),'发布插件')); @@ -145,8 +223,8 @@ class Mystore extends BaseController { $id=input('id/d',0); $op=input('op'); - $ops=array('item'=>array('delete'),'list'=>array('deleteall','check_store_update')); - if(!in_array($op,$ops['item'])&&!in_array($op,$ops['list'])){ + $ops=array('item'=>array('delete'),'list'=>array('deleteall','check_store_update'),'else'=>array('auto_check')); + if(!in_array($op,$ops['item'])&&!in_array($op,$ops['list'])&&!in_array($op,$ops['else'])){ $this->error(lang('invalid_op')); } @@ -162,31 +240,244 @@ class Mystore extends BaseController { $mapp->where(array('id'=>array('in',$ids)))->delete(); } $this->success(lang('op_success'),'Mystore/ReleaseApp'); + }elseif($op=='auto_check'){ + + $auto=input('auto/d'); + model('Config')->setConfig('store_auto_check_plugin',$auto); + if($auto){ + $this->success('设置为自动检测更新'); + }else{ + $this->error('设置为手动检测更新'); + } }elseif($op=='check_store_update'){ $ids=input('ids/a'); $appList=model('ReleaseApp')->where(array('module'=>'cms','id'=>array('in',$ids)))->column('*','app'); + $appList1=array(); + foreach ($appList as $k=>$v){ + $appList1[$v['app'].'_'.$v['provider_id']]=$v; + } + $appList=$appList1; + unset($appList1); + $uptimeList=array(); if(!empty($appList)){ - $apps=implode(',', array_keys($appList)); - $uptimeList=get_html('http://www.skycaiji.com/Store/Client/cmsUpdate?apps='.rawurlencode($apps)); - $uptimeList=json_decode($uptimeList,true); - } - if(!empty($uptimeList)){ - $updateList=array(); + $provList=array(); + $provApps=array(); + foreach ($appList as $v){ + $provList[$v['provider_id']]=$v['provider_id']; + $provApps[$v['provider_id']][$v['app']]=$v['app']; + } + if(!empty($provList)){ + $provList=model('Provider')->where('id','in',$provList)->column('*','id'); + }else{ + $provList=array(); + } - foreach ($uptimeList as $app=>$storeUptime){ - if($storeUptime>0&&$storeUptime>$appList[$app]['uptime']){ + foreach ($provApps as $provId=>$apps){ + $apps=implode(',',$apps); + $apps=rawurlencode($apps); + $url='https://www.skycaiji.com'; + if(!empty($provId)&&!empty($provList[$provId])){ - $updateList[]=$appList[$app]['id']; + $url=$provList[$provId]['url']; + } + $url.='/client/plugin/update?apps='.$apps; + + $uptimeList=get_html($url,null,array('timeout'=>2)); + $uptimeList=json_decode($uptimeList,true); + + if(!empty($uptimeList)){ + + foreach ($uptimeList as $app=>$uptime){ + if($uptime>0&&$uptime>$appList[$app.'_'.$provId]['uptime']){ + + $updateList[]=$appList[$app.'_'.$provId]['id']; + } + } } } + } + + if(!empty($updateList)){ $this->success('',null,$updateList); }else{ $this->error(); } } } + + /*应用程序列表*/ + public function appAction(){ + + $mapp=model('App'); + $mprov=model('Provider'); + $dbApps=$mapp->order('uptime desc')->paginate(20); + $pagenav=$dbApps->render(); + $dbApps=$dbApps->all(); + $dbApps1=array(); + $provIds=array(); + foreach ($dbApps as $k=>$v){ + $v=$v->toArray(); + $v['config']=$mapp->get_config($v['app']); + + $dbApps1[$v['app']]=$v; + + try { + $appClass=$mapp->app_class($v['app'],false); + }catch (\Exception $ex ){ + $appClass=null; + } + + if(is_object($appClass)){ + + if(version_compare($appClass->config['version'], $v['config']['version'],'>')===true){ + + $dbApps1[$v['app']]['newest_version']=$appClass->config['version']; + } + } + if($v['provider_id']>0){ + $provIds[$v['provider_id']]=$v['provider_id']; + } + } + + $dbApps=$dbApps1; + unset($dbApps1); + + + $dirApps=scandir(config('apps_path')); + $pathApps=array(); + if(!empty($dirApps)){ + foreach( $dirApps as $dirApp ){ + if(isset($dbApps[$dirApp])){ + continue; + } + + try { + $appClass=$mapp->app_class($dirApp,false); + }catch (\Exception $ex ){ + $appClass=null; + } + + if(is_object($appClass)){ + + $pathApp=array('config'=>$mapp->clear_config($appClass->config)); + if(!empty($pathApp['config']['website'])){ + + $pathApp['provider_id']=$mprov->getIdByUrl($pathApp['config']['website']); + if($pathApp['provider_id']>0){ + $provIds[$pathApp['provider_id']]=$pathApp['provider_id']; + } + } + $pathApps[$dirApp]=$pathApp; + } + } + } + + $provList=array(); + if($provIds){ + $provList=$mprov->where('id','in',$provIds)->column('*','id'); + } + + if($pathApps){ + + $existApps=$mapp->where('app','in',array_keys($pathApps))->column('*','app'); + foreach ($pathApps as $k=>$v){ + if(!empty($existApps[$k])){ + + unset($pathApps[$k]); + } + } + } + + $GLOBALS['content_header']='应用程序'; + $GLOBALS['breadcrumb']=breadcrumb(array('应用程序')); + + $this->assign('pagenav',$pagenav); + $this->assign('dbApps',$dbApps); + $this->assign('pathApps',$pathApps); + $this->assign('provList',$provList); + + return $this->fetch(); + } + + public function appOpAction(){ + $op=input('op'); + $mapp=model('App'); + if($op=='auto_check'){ + + $auto=input('auto/d'); + model('Config')->setConfig('store_auto_check_app',$auto); + if($auto){ + $this->success('设置为自动检测更新'); + }else{ + $this->error('设置为手动检测更新'); + } + }elseif($op=='check_store_update'){ + + $apps=input('apps/a'); + + $mprov=model('Provider'); + $appList=array(); + $provList=array(); + $provApps=array(); + if(!empty($apps)){ + foreach( $apps as $app ){ + try { + $appClass=$mapp->app_class($app,false); + }catch (\Exception $ex ){ + $appClass=null; + } + + if(is_object($appClass)){ + $provId=$mprov->getIdByUrl($appClass->config['website']); + $provList[$provId]=$provId; + $appList[$app]=array('provider_id'=>$provId,'version'=>$appClass->config['version']); + $provApps[$provId][$app]=$app; + } + } + } + $updateList=array(); + + $provList=$mprov->where('id','in',$provList)->column('*','id'); + foreach($provApps as $provId=>$apps){ + $apps=implode(',',$apps); + $apps=rawurlencode($apps); + $url=''; + $appUrl=''; + $isProv=false; + if(!empty($provList[$provId])){ + + $url=$provList[$provId]['url']; + $isProv=true; + }else{ + + $url='https://www.skycaiji.com'; + } + $appUrl=$url; + $url.='/client/app/update?apps='.$apps; + + $storeList=get_html($url,null,array('timeout'=>2)); + $storeList=json_decode($storeList,true); + + if(!empty($storeList)){ + + foreach ($storeList as $storeApp=>$storeVer){ + if(!empty($storeVer)&&version_compare($storeVer,$appList[$app]['version'],'>')){ + + $updateList[]=array('app'=>$storeApp,'is_provider'=>$isProv,'app_url'=>$appUrl.'/client/app/detail?app='.rawurlencode($storeApp)); + } + } + } + } + + if(!empty($updateList)){ + $this->success('',null,$updateList); + }else{ + $this->error('无更新'); + } + } + } } \ No newline at end of file diff --git a/SkycaijiApp/admin/controller/Provider.php b/SkycaijiApp/admin/controller/Provider.php new file mode 100644 index 0000000..6079948 --- /dev/null +++ b/SkycaijiApp/admin/controller/Provider.php @@ -0,0 +1,143 @@ +where($cond)->order('sort desc')->paginate(20,false,paginate_auto_config()); + $pagenav=$list->render(); + $list=$list->all(); + + $GLOBALS['content_header']='第三方平台'; + $GLOBALS['breadcrumb']=breadcrumb(array('第三方平台')); + + $this->assign('list',$list); + $this->assign('pagenav',$pagenav); + + return $this->fetch(); + } + public function deleteAction(){ + $id=input('id/d'); + if(empty($id)){ + $this->error('id不存在'); + } + $mprovider=model('Provider'); + $mprovider->where('id',$id)->delete(); + + $this->success(); + } + public function enableAction(){ + $id=input('id/d'); + $enable=input('enable/d'); + if(empty($id)){ + $this->error('id不存在'); + } + $mprovider=model('Provider'); + $mprovider->strict(false)->where('id',$id)->update(array('enable'=>$enable)); + + $this->success(); + } + public function saveAction(){ + $id=input('id/d'); + $mprovider=model('Provider'); + if($id>0){ + $proData=$mprovider->where('id',$id)->find(); + if(!empty($proData)){ + $proData=$proData->toArray(); + } + $this->assign('proData',$proData); + } + if(request()->isPost()){ + $url=input('url','','strip_tags'); + $title=input('title'); + $sort=input('sort/d',0); + $enable=input('enable/d',0); + + $domain=\skycaiji\admin\model\Provider::matchDomain($url); + if(empty($domain)){ + $this->error('网址格式错误'); + } + + if(empty($proData)||strcasecmp($proData['url'], $url)!==0){ + + if($mprovider->where('url',$url)->count()>0){ + + $this->error('该网址已存在'); + } + } + + $domainCond=array( + 'domain'=>$domain + ); + if(!empty($proData)){ + $domainCond['id']=array('<>',$proData['id']); + } + if($mprovider->where($domainCond)->count()>0){ + + $this->error($domain.' 域名已存在'); + } + + if(empty($title)){ + $html=get_html($url,null,array('timeout'=>3)); + if(preg_match('/]*>(.*?)<\/title>/i', $html,$title)){ + $title=strip_tags($title[1]); + }else{ + $title=''; + } + } + + $newData=array( + 'url'=>$url, + 'title'=>$title, + 'domain'=>$domain, + 'enable'=>$enable, + 'sort'=>$sort + ); + if(empty($proData)){ + + $mprovider->isUpdate(false)->allowField(true)->save($newData); + $this->success('添加成功','Provider/list'); + }else{ + + $mprovider->strict(false)->where('id',$id)->update($newData); + $this->success('修改成功','Provider/list'); + } + }else{ + return $this->fetch(); + } + } + + public function saveallAction(){ + $newsort=input('newsort/a'); + $mprovider=model('Provider'); + if(is_array($newsort)&&count($newsort)>0){ + foreach ($newsort as $key=>$val){ + $mprovider->strict(false)->where('id',intval($key))->update(array('sort'=>intval($val))); + } + } + $this->success('保存成功','Provider/list'); + } +} \ No newline at end of file diff --git a/SkycaijiApp/admin/controller/Release.php b/SkycaijiApp/admin/controller/Release.php index 0543960..2d06a4a 100644 --- a/SkycaijiApp/admin/controller/Release.php +++ b/SkycaijiApp/admin/controller/Release.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/Setting.php b/SkycaijiApp/admin/controller/Setting.php index eb3e389..86dffa3 100644 --- a/SkycaijiApp/admin/controller/Setting.php +++ b/SkycaijiApp/admin/controller/Setting.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -317,7 +317,7 @@ class Setting extends BaseController { } $this->success(lang('op_success'),'Setting/page_render'); }else{ - $GLOBALS['content_header']='页面渲染设置 '; + $GLOBALS['content_header']='页面渲染设置 '; $GLOBALS['breadcrumb']=breadcrumb(array(array('url'=>url('Setting/page_render'),'title'=>'页面渲染设置'))); $config=$mconfig->getConfig('page_render','data'); $this->assign('config',$config); diff --git a/SkycaijiApp/admin/controller/Store.php b/SkycaijiApp/admin/controller/Store.php index 4be64ba..b5bd22b 100644 --- a/SkycaijiApp/admin/controller/Store.php +++ b/SkycaijiApp/admin/controller/Store.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -21,15 +21,42 @@ class Store extends BaseController { } } public function indexAction(){ + $url=input('url','','strip_tags'); + if(!empty($url)&&!is_official_url($url)){ + + $provData=model('Provider')->where('url',$url)->find(); + if(empty($provData)){ + $this->error($url.' 平台未添加'); + } + if(empty($provData['enable'])){ + $this->error($url.' 已设置为拒绝访问'); + } + $url=$provData['url']; + + $url.=strpos($url, '?')===false?'?':'&'; + $url.='clientinfo='.urlencode($GLOBALS['clientinfo']); + + $this->assign('provData',$provData); + } + if(empty($url)){ + $url='https://www.skycaiji.com/store'; + } + + if(!empty($url)){ + + } + $GLOBALS['content_header']=lang('store'); $GLOBALS['breadcrumb']=breadcrumb(array(lang('store'))); + $this->assign('url',$url); return $this->fetch(); } /*安装规则*/ public function installRuleAction(){ $mrule=model('Rule'); $rule=json_decode(base64_decode(input('post.rule')),true); + $store_id=intval($rule['store_id']); if(empty($store_id)){ $this->dispatchJump(false,'规则id为空'); @@ -48,11 +75,12 @@ class Store extends BaseController { $this->dispatchJump(false,'规则为空'); } if($store_id>0){ - $newRule=array('type'=>$rule['type'],'name'=>$rule['name'],'module'=>$rule['module'],'uptime'=>($rule['uptime']>0?$rule['uptime']:NOW_TIME),'config'=>$rule['config']); - $ruleData=$mrule->where(array('type'=>$rule['type'],'store_id'=>$store_id))->find(); + $newRule=array('type'=>$rule['type'],'module'=>$rule['module'],'store_id'=>$store_id,'name'=>$rule['name'],'uptime'=>($rule['uptime']>0?$rule['uptime']:time()),'config'=>$rule['config']); + + $newRule['provider_id']=$this->_getStoreProvid($rule['store_url']); + $ruleData=$mrule->where(array('store_id'=>$newRule['store_id'],'provider_id'=>$newRule['provider_id']))->find(); if(empty($ruleData)){ - $newRule['store_id']=$store_id; $newRule['addtime']=NOW_TIME; $mrule->isUpdate(false)->allowField(true)->save($newRule); $ruleId=$mrule->id; @@ -66,66 +94,169 @@ class Store extends BaseController { $this->dispatchJump(false,'id错误'); } } - /*规则更新时间*/ - public function ruleUpdateAction(){ - $storeIds=input('store_ids'); - $storeIdList=array('collect'=>array()); - foreach (array_keys($storeIdList) as $type){ - if(preg_match_all('/\b'.$type.'\_(\d+)/i', $storeIds,$typeIds)){ - $storeIdList[$type]=$typeIds[1]; - } + /*安装插件*/ + public function installPluginAction(){ + $plugin=json_decode(base64_decode(input('post.plugin')),true); + $plugin['code']=base64_decode($plugin['code']); + if(empty($plugin['app'])){ + $this->dispatchJump(false,'标识错误'); } - $uptimeList=array('status'=>1,'data'=>array()); - $mrule=model('Rule'); - if(!empty($storeIdList)){ - foreach ($storeIdList as $type=>$ids){ - if(!empty($ids)){ - $cond=array(); - $cond['type']=$type; - $cond['store_id']=array('in',$ids); - $uptimeList['data'][$type]=$mrule->field('`id`,`type`,`store_id`,`uptime`')->where($cond)->column('uptime','store_id'); - } - } + if(empty($plugin['name'])){ + $this->dispatchJump(false,'名称错误'); } - return jsonp($uptimeList); - } - /*安装cms发布程序*/ - public function installCmsAction(){ - $cms=json_decode(base64_decode(input('post.cms')),true); - $cms['code']=base64_decode($cms['code']); - if(empty($cms['app'])){ - $this->dispatchJump(false,'插件id错误'); + if(empty($plugin['type'])){ + $this->dispatchJump(false,'类型错误'); } - if(empty($cms['name'])){ - $this->dispatchJump(false,'插件名错误'); + if(empty($plugin['module'])){ + $this->dispatchJump(false,'模块错误'); } - if(empty($cms['code'])){ + if(empty($plugin['code'])){ $this->dispatchJump(false,'不是可用的程序'); } - if(!empty($cms['tpl'])){ + if(!empty($plugin['tpl'])){ - $cms['tpl']=base64_decode($cms['tpl']); + $plugin['tpl']=base64_decode($plugin['tpl']); } - model('ReleaseApp')->addCms(array('app'=>$cms['app'],'name'=>$cms['name'],'desc'=>$cms['desc'],'uptime'=>$cms['uptime']) - ,$cms['code'],$cms['tpl']); + $newData=array('app'=>$plugin['app'],'name'=>$plugin['name'],'desc'=>$plugin['desc'],'uptime'=>$plugin['uptime']); - $this->dispatchJump(true); + + $newData['provider_id']=$this->_getStoreProvid($plugin['store_url']); + + if($plugin['type']=='release'){ + model('ReleaseApp')->addCms($newData,$plugin['code'],$plugin['tpl']); + $this->dispatchJump(true); + }else{ + $this->dispatchJump(false); + } } - /*cms发布插件更新时间*/ - public function cmsUpdateAction(){ + /*安装应用程序*/ + public function installAppAction(){ + $app=json_decode(base64_decode(input('post.app')),true); + if(empty($app['app'])){ + $this->dispatchJump(false,'app标识错误'); + } + if(!preg_match('/^[\w\-]+$/',$app['app'])){ + $this->dispatchJump(false,'app标识不规范'); + } + if(empty($app['data'])){ + $this->dispatchJump(false,'数据错误'); + } + $app['data']=base64_decode($app['data']); + + $filePath=RUNTIME_PATH.'/cache_app_zip/'.$app['app'].'/'; + + $complete=false; + if($app['block']>0){ + + $app['no']=intval($app['no']); + write_dir_file($filePath.$app['no'],$app['data']); + + $blockComplete=true; + for($i=1;$i<=$app['block'];$i++){ + if(!file_exists($filePath.$i)){ + + $blockComplete=false; + break; + } + } + if($blockComplete){ + + $data=null; + for($i=1;$i<=$app['block'];$i++){ + $data.=file_get_contents($filePath.$i); + } + write_dir_file($filePath.$app['app'].'.zip',$data); + $complete=true; + unset($data); + } + }else{ + + write_dir_file($filePath.$app['app'].'.zip',$app['data']); + $complete=true; + } + if($complete){ + + $error=''; + try { + $zipClass=new \ZipArchive(); + if($zipClass->open($filePath.$app['app'].'.zip')===TRUE){ + $zipClass->extractTo(config('apps_path').'/'.$app['app']); + $zipClass->close(); + }else{ + $error='解压失败'; + } + }catch(\Exception $ex){ + $error='您的服务器不支持ZipArchive解压'; + } + + if($error){ + $this->dispatchJump(false,$error); + }else{ + clear_dir($filePath); + $this->dispatchJump(true); + } + }else{ + $this->dispatchJump(true); + } + } + /*统一检测更新*/ + public function updateAction(){ + $storeIds=input('store_ids'); + $storeIds=explode(',', $storeIds); + $storeApps=input('store_apps'); - if(preg_match_all('/\bcms\_(\w+)/i', $storeApps,$apps)){ - $apps=$apps[1]; + $storeApps=explode(',', $storeApps); + + $storeIdList=array(); + foreach ($storeIds as $id){ + if(preg_match('/^(\w+)_(\w+)$/',$id,$id)){ + $storeIdList[$id[1]][$id[2]]=$id[2]; + } } - $uptimeList=array('status'=>1,'data'=>array()); - if(!empty($apps)){ - $cond=array(); - $cond['module']='cms'; - $cond['app']=array('in',$apps); - $uptimeList['data']=model('ReleaseApp')->where($cond)->column('uptime','app'); + + $storeAppList=array(); + foreach ($storeApps as $app){ + if(preg_match('/^(\w+)_(\w+)$/',$app,$app)){ + $storeAppList[$app[1]][$app[2]]=$app[2]; + } } - return jsonp($uptimeList); + + $provId=$this->_getStoreProvid(input('store_url')); + + $updateList=array('status'=>1,'data'=>array()); + + if(!empty($storeIdList)){ + foreach ($storeIdList as $type=>$ids){ + $list=array(); + $cond=array('store_id'=>array('in',$ids),'provider_id'=>$provId,'type'=>$type); + $list=model('Rule')->field('`id`,`store_id`,`uptime`')->where($cond)->column('uptime','store_id'); + $list=is_array($list)?$list:array(); + $updateList['data'][$type]=$list; + } + } + + if(!empty($storeAppList)){ + foreach ($storeAppList as $type=>$apps){ + if(empty($type)){ + continue; + } + $list=array(); + $cond=array('app'=>array('in',$apps),'provider_id'=>$provId); + if($type=='release'||$type=='cms'){ + $list=model('ReleaseApp')->where($cond)->column('uptime','app'); + }elseif($type=='app'){ + foreach ($apps as $app){ + + $appClass=model('App')->app_class($app,false); + $list[$app]=$appClass->config['version']; + } + } + $list=is_array($list)?$list:array(); + $updateList['data'][$type]=$list; + } + } + return jsonp($updateList); } /*站点验证*/ public function siteCertificationAction(){ @@ -142,4 +273,13 @@ class Store extends BaseController { $this->dispatchJump(false,'操作错误!'); } } + /*获取平台域名Id*/ + protected function _getStoreProvid($storeUrl=null){ + $referer=request()->server('HTTP_REFERER'); + if(!empty($referer)){ + $storeUrl=$referer; + } + $provId=model('Provider')->getIdByUrl($storeUrl); + return $provId; + } } \ No newline at end of file diff --git a/SkycaijiApp/admin/controller/Task.php b/SkycaijiApp/admin/controller/Task.php index 3648344..a604d61 100644 --- a/SkycaijiApp/admin/controller/Task.php +++ b/SkycaijiApp/admin/controller/Task.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/Taskgroup.php b/SkycaijiApp/admin/controller/Taskgroup.php index 443e27c..8152950 100644 --- a/SkycaijiApp/admin/controller/Taskgroup.php +++ b/SkycaijiApp/admin/controller/Taskgroup.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/controller/Tool.php b/SkycaijiApp/admin/controller/Tool.php index 4e78c2f..d6e526e 100644 --- a/SkycaijiApp/admin/controller/Tool.php +++ b/SkycaijiApp/admin/controller/Tool.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -425,10 +425,21 @@ class Tool extends BaseController { public function json_treeAction(){ if(request()->isPost()){ $url=input('url','','trim'); + $html=input('html','','trim'); $json=''; + $eCpattern=controller('admin/Cpattern','event'); if(!empty($url)){ - $json=get_html($url); + + $html=get_html($url); } + if(!empty($html)){ + + if(preg_match($eCpattern::$jsonpRegExp,$html,$json)){ + + $json=trim($json['json']).'}'; + } + } + $this->success('','',array('json'=>$json)); }else{ $GLOBALS['content_header']='JSON解析'; @@ -436,5 +447,4 @@ class Tool extends BaseController { return $this->fetch(); } } - } \ No newline at end of file diff --git a/SkycaijiApp/admin/controller/Upgrade.php b/SkycaijiApp/admin/controller/Upgrade.php index 5afa43c..79e7932 100644 --- a/SkycaijiApp/admin/controller/Upgrade.php +++ b/SkycaijiApp/admin/controller/Upgrade.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -22,7 +22,7 @@ class Upgrade extends BaseController{ } /*检测更新*/ public function newVersionAction(){ - $version=get_html('http://www.skycaiji.com/upgrade/program/version?v='.SKYCAIJI_VERSION,null,null,'utf-8'); + $version=get_html('https://www.skycaiji.com/upgrade/program/version?v='.SKYCAIJI_VERSION,null,null,'utf-8'); $version=json_decode($version,true); $new_version=trim($version['new_version']); $cur_version=$GLOBALS['config']['version']; @@ -88,8 +88,8 @@ class Upgrade extends BaseController{ $this->success(); } } - $fileUrl='http://www.skycaiji.com/upgrade/program/getFile?filename='.rawurlencode(base64_encode($fileName)); - $result=\Requests::get($fileUrl,array(),array('timeout'=>100)); + $fileUrl='https://www.skycaiji.com/upgrade/program/getFile?filename='.rawurlencode(base64_encode($fileName)); + $result=\Requests::get($fileUrl,array(),array('timeout'=>100,'verify'=>false)); if(200==$result->status_code){ $newFile=$result->body; @@ -133,7 +133,7 @@ class Upgrade extends BaseController{ unset($md5Files); - $newFileList=get_html('http://www.skycaiji.com/upgrade/program/files',null,array('timeout'=>100),'utf-8'); + $newFileList=get_html('https://www.skycaiji.com/upgrade/program/files',null,array('timeout'=>100),'utf-8'); $newFileList=json_decode($newFileList,true); $downFileList=array(); diff --git a/SkycaijiApp/admin/controller/User.php b/SkycaijiApp/admin/controller/User.php index 4571cdd..2ac353b 100644 --- a/SkycaijiApp/admin/controller/User.php +++ b/SkycaijiApp/admin/controller/User.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -61,7 +61,8 @@ class User extends BaseController { if(!$check['success']){ $this->error($check['msg']); } - $newData['password']=pwd_encrypt($newData['password']); + $newData['salt']=\skycaiji\admin\model\User::rand_salt(); + $newData['password']=\skycaiji\admin\model\User::pwd_encrypt($newData['password'],$newData['salt']); $newGroup=$musergroup->getById($newData['groupid']); if($musergroup->user_level_limit($newGroup['level'])){ $this->error('您不能添加“'.$GLOBALS['user']['group']['name'].'”用户组'); @@ -129,7 +130,8 @@ class User extends BaseController { $this->error($check['msg']); } if(!empty($newData['password'])){ - $newData['password']=pwd_encrypt($newData['password']); + $newData['salt']=\skycaiji\admin\model\User::rand_salt(); + $newData['password']=\skycaiji\admin\model\User::pwd_encrypt($newData['password'],$newData['salt']); } $newGroup=$musergroup->getById($newData['groupid']); if($musergroup->user_level_limit($newGroup['level'])){ diff --git a/SkycaijiApp/admin/event/Collector.php b/SkycaijiApp/admin/event/Collector.php index b370e4a..5987bcd 100644 --- a/SkycaijiApp/admin/event/Collector.php +++ b/SkycaijiApp/admin/event/Collector.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/event/Cpattern.php b/SkycaijiApp/admin/event/Cpattern.php index 572886a..6902486 100644 --- a/SkycaijiApp/admin/event/Cpattern.php +++ b/SkycaijiApp/admin/event/Cpattern.php @@ -3,53 +3,177 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ namespace skycaiji\admin\event; use skycaiji\admin\model\CacheModel; -class Cpattern extends Collector{ - public $collector; - public $config; - public $release; - public $first_loop_field=null; - public $field_val_list=array(); - public $collect_num=0; - public $collected_field_list=array(); - public $used_source_urls=array(); - public $used_level_urls=array(); - public $used_cont_urls=array(); - public $original_source_urls=null; - public $level_urls_list=array(); - public $cont_urls_list=array(); - public $relation_url_list=array(); - public $used_paging_urls=array(); - public $cur_level_urls=array(); - public $cur_source_url=''; - public $html_cache_list=array(); - public $show_opened_tools=false; +class Cpattern extends CpatternBase{ + /*采集,return false表示终止采集*/ + public function collect($num=10){ + if(!defined('IS_COLLECTING')){ + define('IS_COLLECTING', 1); + } + @session_start(); + \think\Session::pause(); - /*对象销毁时处理*/ - public function __destruct(){ - if(!empty($this->used_cont_urls)){ + if(!$this->show_opened_tools){ + $opened_tools=array(); + if($this->config['page_render']){ + $opened_tools[]='页面渲染'; + } + if($GLOBALS['config']['caiji']['download_img']){ + $opened_tools[]='图片本地化'; + } + if($GLOBALS['config']['proxy']['open']){ + $opened_tools[]='代理'; + } + if(!empty($opened_tools)){ + $this->echo_msg('开启功能:'.implode('、', $opened_tools),'black'); + } + if($num>0){ + $this->echo_msg('预计采集'.$num.'条数据','black'); + } + + $this->show_opened_tools=true; + } + + $this->collect_num=$num; + $this->collected_field_list=array(); + + $source_is_url=intval($this->config['source_is_url']); + if(!isset($this->original_source_urls)){ - $usedContUrls=array_keys($this->used_cont_urls); - if(!empty($usedContUrls)&&is_array($usedContUrls)){ - $total=count($usedContUrls); - $limit=800; - $batch=ceil($total/$limit); - for($i=1;$i<=$batch;$i++){ - - $list=array_slice($usedContUrls,($i-1)*$limit,$limit); - if(!empty($list)){ - CacheModel::getInstance('cont_url')->db()->where('cname','in',$list)->delete(); + $this->original_source_urls=array(); + foreach ( $this->config ['source_url'] as $k => $v ) { + if(empty($v)){ + continue; + } + $return_s_urls = $this->convert_source_url ( $v ); + if (is_array ( $return_s_urls )) { + foreach ($return_s_urls as $r_s_u){ + $this->original_source_urls[md5($r_s_u)]=$r_s_u; } + } else { + $this->original_source_urls[md5($return_s_urls)]=$return_s_urls; } } } + if(empty($this->original_source_urls)){ + $this->echo_msg('没有起始页网址!'); + return 'completed'; + } + + if($source_is_url){ + + if(isset($this->used_source_urls['_source_is_url_'])){ + $this->echo_msg('所有起始页采集完毕!','green'); + return 'completed'; + } + }else{ + if(count($this->original_source_urls)<=count($this->used_source_urls)){ + $this->echo_msg('所有起始页采集完毕!','green'); + return 'completed'; + } + } + + $source_interval=$GLOBALS['config']['caiji']['interval']*60; + $time_interval_list=array(); + + $source_urls=array(); + $mcacheSource=CacheModel::getInstance('source_url'); + if($source_is_url){ + + $source_urls=$this->original_source_urls; + }else{ + $cacheSources=$mcacheSource->db()->where(array('cname'=>array('in',array_keys($this->original_source_urls))))->column('dateline','cname'); + if(!empty($cacheSources)){ + $count_db_used=0; + $sortSources=array('undb'=>array(),'db'=>array()); + + foreach ($this->original_source_urls as $sKey=>$sVal){ + if(!isset($cacheSources[$sKey])){ + + $sortSources['undb'][$sKey]=$sVal; + }else{ + + $time_interval=abs(NOW_TIME-$cacheSources[$sKey]); + if($time_interval<$source_interval){ + + $this->used_source_urls[$sVal]=1; + $count_db_used++; + $time_interval_list[]=$time_interval; + }else{ + $sortSources['db'][$sKey]=$sVal; + } + } + } + if($count_db_used>0){ + $this->echo_msg($count_db_used.'条已采集起始网址被过滤,下次采集需等待'.($source_interval-max($time_interval_list)).'秒,设置间隔','black'); + if(count($this->original_source_urls)<=count($this->used_source_urls)){ + $this->echo_msg('所有起始页采集完毕!','green'); + return 'completed'; + } + } + $source_urls=array_merge($sortSources['undb'],$sortSources['db']); + unset($sortSources); + unset($cacheSources); + }else{ + $source_urls=$this->original_source_urls; + } + } + $mcollected=model('Collected'); + + if($source_is_url){ + + $this->cont_urls_list['_source_is_url_']=array_values($source_urls); + $source_urls=array('_source_is_url_'=>'_source_is_url_'); + } + + + foreach ($source_urls as $key_source_url=>$source_url){ + $this->cur_source_url=$source_url; + if(array_key_exists($source_url,$this->used_source_urls)){ + + continue; + } + if($source_is_url){ + $this->echo_msg("起始页已转换为内容页网址",'black'); + }else{ + $this->echo_msg("采集起始页:{$source_url}",'green'); + } + if($source_is_url){ + + $this->_collect_fields(); + }else{ + + if(!empty($this->config['level_urls'])){ + + + $this->echo_msg('开始分析多级网址','black'); + $return_msg=$this->_collect_level($source_url,1); + if($return_msg=='completed'){ + return $return_msg; + } + }else{ + + $cont_urls=$this->getContUrls($source_url); + $this->cont_urls_list[$source_url]=$this->_collect_unused_cont_urls($cont_urls); + $this->_collect_fields(); + } + } + + if($this->collect_num>0&&count($this->collected_field_list)>=$this->collect_num){ + break; + } + } + + + return $this->collected_field_list; } /** * 优化设置页面post过来的config @@ -57,7 +181,7 @@ class Cpattern extends Collector{ */ public function setConfig($config){ $config['url_complete']=intval($config['url_complete']); - + $config['url_reverse']=intval($config['url_reverse']); $config['page_render']=intval($config['page_render']); $config['url_repeat']=intval($config['url_repeat']); @@ -69,7 +193,7 @@ class Cpattern extends Collector{ if(!is_array($config['request_headers']['custom_vals'])){ $config['request_headers']['custom_vals']=array(); } - + foreach ($config['request_headers']['custom_names'] as $k=>$v){ if(empty($v)){ @@ -80,7 +204,7 @@ class Cpattern extends Collector{ $config['request_headers']['custom_names']=array_values($config['request_headers']['custom_names']); $config['request_headers']['custom_vals']=array_values($config['request_headers']['custom_vals']); } - + foreach ($config['source_url'] as $k=>$v){ if(preg_match('/[\r\n]/', $v)){ @@ -105,8 +229,8 @@ class Cpattern extends Collector{ $config['source_url']=array_unique($config['source_url']); $config['source_url']=array_filter($config['source_url']); $config['source_url']=array_values($config['source_url']); - - + + if(!empty($config['field_list'])){ foreach ($config['field_list'] as $k=>$v){ @@ -122,7 +246,7 @@ class Cpattern extends Collector{ } $config['common_process']=input('process/a',null,'trim'); $config['common_process']=$this->setProcess($config['common_process']); - + if(!empty($config['paging_fields'])){ foreach ($config['paging_fields'] as $k=>$v){ @@ -143,7 +267,7 @@ class Cpattern extends Collector{ $config['relation_urls'][$k]=json_decode(url_b64decode($v),true); } } - + $config['url_post']=intval($config['url_post']); @@ -166,7 +290,7 @@ class Cpattern extends Collector{ $config['url_posts']['names']=array_values($config['url_posts']['names']); $config['url_posts']['vals']=array_values($config['url_posts']['vals']); } - + return $config; } public function init($collData){ @@ -213,7 +337,7 @@ class Cpattern extends Collector{ $newConfig['reg_source_cont']=$config['area']; } - + if(empty($config['url_rule_module'])){ @@ -247,7 +371,7 @@ class Cpattern extends Collector{ $newConfig['url_must']=preg_replace('/\\\*([\'\/])/', "\\\\$1", $config['url_must']); $newConfig['url_must']=str_replace('(*)', '[\s\S]*?', $newConfig['url_must']); } - + if(!empty($config['url_ban'])){ @@ -299,21 +423,21 @@ class Cpattern extends Collector{ $luv['url_merge']=$this->set_merge_default('(?P.+)', $luv['url_merge']); } $luv['reg_url_module']=$luv['url_rule_module']; - + if(!empty($luv['url_must'])){ $luv['url_must']=preg_replace('/\\\*([\'\/])/', "\\\\$1", $luv['url_must']); $luv['url_must']=str_replace('(*)', '[\s\S]*?', $luv['url_must']); } - + if(!empty($luv['url_ban'])){ $luv['url_ban']=preg_replace('/\\\*([\'\/])/', "\\\\$1", $luv['url_ban']); $luv['url_ban']=str_replace('(*)', '[\s\S]*?', $luv['url_ban']); } - + $config['level_urls'][$luk]=$luv; $config['new_level_urls'][$luv['name']]=$luv; } @@ -323,6 +447,19 @@ class Cpattern extends Collector{ if(!empty($config['relation_urls'])){ foreach ($config['relation_urls'] as $ruv){ + if(!empty($ruv['area'])){ + if(empty($ruv['area_module'])){ + + $ruv['reg_area']=$this->convert_sign_match($ruv['area']); + $ruv['reg_area']=preg_replace('/\\\*([\'\/])/', "\\\\$1", $ruv['reg_area']); + $ruv['reg_area']=str_replace('(*)', '[\s\S]*?', $ruv['reg_area']); + }else{ + + $ruv['reg_area']=$ruv['area']; + } + $ruv['reg_area_module']=$ruv['area_module']; + } + if(empty($ruv['url_rule_module'])){ $ruv['reg_url']=$this->convert_sign_match($ruv['url_rule']); @@ -336,7 +473,7 @@ class Cpattern extends Collector{ $ruv['url_merge']=$this->set_merge_default('(?P.+)', $ruv['url_merge']); } $ruv['reg_url_module']=$ruv['url_rule_module']; - + $relation_urls[$ruv['name']]=$ruv; } } @@ -365,7 +502,7 @@ class Cpattern extends Collector{ } $rDepth++; }while(!empty($rFuPage)); - + if($passRelation){ continue; @@ -381,7 +518,7 @@ class Cpattern extends Collector{ $config['new_relation_urls']=array_merge($config['new_relation_urls'],$rurls); } } - + if(!empty($config['field_list'])){ foreach ($config['field_list'] as $fk=>$fv){ @@ -438,7 +575,7 @@ class Cpattern extends Collector{ $config['paging']['reg_url']=$this->convert_sign_match($config['paging']['url_rule']); $config['paging']['reg_url']=preg_replace('/\\\*([\'\/])/', "\\\\$1", $config['paging']['reg_url']); $config['paging']['reg_url']=str_replace ( '(*)', '[\s\S]*?', $config['paging']['reg_url'] ); - + $config['paging']['url_merge']=$this->set_merge_default($config['paging']['reg_url'], $config['paging']['url_merge']); if(empty($config['paging']['url_merge'])){ @@ -453,22 +590,22 @@ class Cpattern extends Collector{ } $config['paging']['reg_url_module']=$config['paging']['url_rule_module']; } - + if(!empty($config['paging']['url_must'])){ $config['paging']['url_must']=preg_replace('/\\\*([\'\/])/', "\\\\$1", $config['paging']['url_must']); $config['paging']['url_must']=str_replace('(*)', '[\s\S]*?', $config['paging']['url_must']); } - + if(!empty($config['paging']['url_ban'])){ $config['paging']['url_ban']=preg_replace('/\\\*([\'\/])/', "\\\\$1", $config['paging']['url_ban']); $config['paging']['url_ban']=str_replace('(*)', '[\s\S]*?', $config['paging']['url_ban']); } - - + + $module_normal_fields=array(); $module_extract_fields=array(); @@ -491,15 +628,36 @@ class Cpattern extends Collector{ } $config['new_field_list']=array_merge($module_normal_fields,$module_extract_fields,$module_merge_fields); - - + + $new_paging_fields=array( - 'normal'=>array(), - 'extract'=>array(), - 'merge'=>array(), + 'normal'=>array(), + 'extract'=>array(), + 'merge'=>array(), ); if(!empty($config['paging_fields'])){ + $pagingFields=array(); + foreach ($config['paging_fields'] as $pfield){ + + $pagingFields[$pfield['field']]=$pfield; + } + if(!empty($pagingFields['::all'])){ + + $fieldAllParams=$pagingFields['::all']; + unset($pagingFields['::all']); + foreach ($config['new_field_list'] as $k=>$v){ + + if(empty($pagingFields[$k])){ + + $fieldAllParams['field']=$k; + $pagingFields[$k]=$fieldAllParams; + } + } + } + $config['paging_fields']=$pagingFields; + unset($pagingFields); + foreach ($config['paging_fields'] as $pfk=>$pfield){ $pfield['delimiter']=str_replace(array('\r','\n'), array("\r","\n"), $pfield['delimiter']); $config['paging_fields'][$pfk]=$pfield; @@ -517,120 +675,34 @@ class Cpattern extends Collector{ } $config['new_paging_fields']=array_merge($new_paging_fields['normal'],$new_paging_fields['extract'],$new_paging_fields['merge']); - + $config=array_merge($config,$newConfig); return $config; } - /*统一:获取网址列表*/ - public function _get_urls($source_url,$config,$is_level=false){ - $is_level=$is_level?'多级':''; - - $html=$this->get_html($source_url); - if(empty($html)){ - return $this->error($is_level.'页面为空'); - } - $base_url=$this->match_base_url($source_url, $html); - $domain_url=$this->match_domain_url($source_url); - - if(!empty($config['reg_area'])){ - if(empty($config['reg_area_module'])){ - - if(preg_match('/'.$config['reg_area'].'/i',$html,$source_cont)){ - if(isset($source_cont['match'])){ - $html=$source_cont['match']; - }else{ - $html=$source_cont[0]; - } - }else{ - $html=''; - } - }elseif('json'==$config['reg_area_module']){ - $html=$this->rule_module_json_data(array('json'=>$config['reg_area'],'json_arr'=>'jsonencode'),json_decode($html,true)); - }elseif('xpath'==$config['reg_area_module']){ - $html=$this->rule_module_xpath_data(array('xpath'=>$config['reg_area'],'xpath_attr'=>'outerHtml'),$html); - }else{ - $html=''; - } - if(empty($html)){ - return $this->error("未提取到{$is_level}区域内容!"); - } - } - $cont_urls=$this->rule_match_urls($config, $html); - $cont_urls1=array(); - - - if(isset($this->config['url_op'])){ + /*采集级别网址*/ + public function collLevelUrls($source_url,$curLevel=1){ + $curLevel=$curLevel>0?$curLevel:0; + if($curLevel>0){ - $op_not_complete=in_array('not_complete',$this->config['url_op'])?true:false; - }else{ - if(isset($this->config['url_complete'])){ + $nextLevel=0; + if(!empty($this->config['level_urls'])){ - $op_not_complete=$this->config['url_complete']?false:true; - }else{ - - $op_not_complete=false; - } - } - - foreach ($cont_urls as $cont_url){ - if(!$op_not_complete){ - - $cont_url=$this->create_complete_url($cont_url, $base_url, $domain_url); - } - if(!empty($config['url_must'])){ - - if(!preg_match('/'.$config['url_must'].'/i', $cont_url)){ - continue; - } - } - - if(!empty($config['url_ban'])){ - - if(preg_match('/'.$config['url_ban'].'/i', $cont_url)){ - continue; - } - } - if(!empty($cont_url)){ - if(strpos($cont_url,' ')==false){ + if(!empty($this->config['level_urls'][$curLevel-1])){ - - - $cont_urls1[]=$cont_url; - } - } - } - $cont_urls=$cont_urls1; - unset($cont_urls1); - - if(empty($cont_urls)){ - return $this->error("未获取到".($is_level?$is_level:'内容')."网址!"); - }else{ - if(!empty($this->config['url_reverse'])){ - - $cont_urls=array_reverse($cont_urls); - } - if(!empty($this->config['url_post'])){ - - $postParams=array(); - if(!empty($this->config['url_posts']['names'])){ - foreach ($this->config['url_posts']['names'] as $k=>$v){ - if (!empty($v)){ - $postParams[]=$v.'='.rawurlencode($this->config['url_posts']['vals'][$k]); - } - } - } - if(!empty($postParams)){ - - $postParams=implode('&', $postParams); - foreach ($cont_urls as $k=>$v){ - $v.=strpos($v,'?')===false?'?':'&'; - $v.=$postParams; - $cont_urls[$k]=$v; + if(!empty($this->config['level_urls'][$curLevel])){ + + $nextLevel=$curLevel+1; } } } - return array_values($cont_urls); + + $cont_urls=$this->getLevelUrls($source_url,$curLevel); + }else{ + + $cont_urls=$this->getContUrls($source_url); } + + return array('urls'=>$cont_urls,'levelName'=>$this->config['level_urls'][$curLevel-1]['name'],'nextLevel'=>$nextLevel); } /*获取内容网址*/ @@ -640,13 +712,13 @@ class Cpattern extends Collector{ } $config=array( - 'reg_area'=>$this->config['reg_source_cont'], - 'reg_area_module'=>$this->config['area_module'], - 'reg_url'=>$this->config['reg_source_cont_url'], - 'reg_url_module'=>$this->config['url_rule_module'], - 'url_merge'=>$this->config['url_merge'], - 'url_must'=>$this->config['url_must'], - 'url_ban'=>$this->config['url_ban'], + 'reg_area'=>$this->config['reg_source_cont'], + 'reg_area_module'=>$this->config['area_module'], + 'reg_url'=>$this->config['reg_source_cont_url'], + 'reg_url_module'=>$this->config['url_rule_module'], + 'url_merge'=>$this->config['url_merge'], + 'url_must'=>$this->config['url_must'], + 'url_ban'=>$this->config['url_ban'], ); return $this->_get_urls($source_url, $config); } @@ -660,114 +732,13 @@ class Cpattern extends Collector{ if(empty($config['reg_url'])){ return $this->error('必须填写多级“提取网址规则”'); } - + if(empty($parent_url)){ return $this->error('请输入父级网址'); } return $this->_get_urls($parent_url, $config,true); } - /** - * 规则匹配网址 - * @param array $config 配置参数 - * @param string $html 源码 - * @param bool $whole 完全匹配模式 - * - */ - public function rule_match_urls($config,$html,$whole=false){ - $cont_urls=array(); - if(!empty($config['reg_url'])&&!empty($config['url_merge'])){ - - $sign_match=$this->sign_addslashes(cp_sign('match','(?P\d*)')); - if(preg_match_all('/'.$sign_match.'/i', $config['url_merge'],$match_signs)){ - - $url_merge=true; - if(empty($config['reg_url_module'])){ - - if(preg_match('/\(\?P/i', $config['reg_url'])){ - - if(preg_match_all('/'.$config['reg_url'].'/i',$html,$cont_urls,PREG_SET_ORDER)){ - if($config['url_merge']==cp_sign('match')){ - - $url_merge=false; - foreach ($cont_urls as $k=>$v){ - $cont_urls[$k]=$v['match']; - } - } - } - }else{ - - if($whole){ - - if(preg_match_all('/'.$config['reg_url'].'/i',$html,$cont_urls)){ - $cont_urls=$cont_urls[0]; - - if($config['url_merge']==cp_sign('match')){ - - $url_merge=false; - }else{ - - foreach ($cont_urls as $k=>$v){ - $cont_urls[$k]=array( - 'match'=>$v - ); - } - } - } - } - } - }elseif(in_array($config['reg_url_module'],array('xpath','json'))){ - - if('xpath'==$config['reg_url_module']){ - - $cont_urls=$this->rule_module_xpath_data ( array ( - 'xpath' => $config['reg_url'], - 'xpath_attr' => 'href', - 'xpath_multi'=>true, - 'xpath_multi_type'=>'loop' - ),$html); - $cont_urls=is_array($cont_urls)?$cont_urls:array(); - }elseif('json'==$config['reg_url_module']){ - - $cont_urls=$this->rule_module_json_data(array('json'=>$config['reg_url'],'json_arr'=>'_original_'),json_decode($html,true)); - if(empty($cont_urls)){ - $cont_urls=array(); - }elseif(!is_array($cont_urls)){ - $cont_urls=array($cont_urls); - } - } - - if($config['url_merge']==cp_sign('match')){ - - $url_merge=false; - }else{ - - foreach ($cont_urls as $k=>$v){ - $cont_urls[$k]=array( - 'match'=>$v - ); - } - } - } - - if($url_merge){ - - foreach ($cont_urls as $k=>$v){ - $re_match=array(); - foreach($match_signs['num'] as $ms_k=>$ms_v){ - - $re_match[$ms_k]=$v['match'.$ms_v]; - } - - $cont_urls[$k]=str_replace($match_signs[0], $re_match, $config['url_merge']); - } - } - } - } - $cont_urls=is_array($cont_urls)?array_unique($cont_urls):array(); - $cont_urls=array_values($cont_urls); - return $cont_urls; - } - + /*获取分页链接*/ public function getPagingUrls($from_url,$html,$is_test=false){ $paging_urls=array(); @@ -776,14 +747,14 @@ class Cpattern extends Collector{ if(empty($html)){ $html=$this->get_html($from_url); } - + if(!empty($this->config['paging']['reg_url'])){ if(!empty($this->config['new_paging_fields'])){ $base_url=$this->match_base_url($from_url, $html); $domain_url=$this->match_domain_url($from_url); - + $paging_area=''; if(!empty($this->config['paging']['reg_area'])){ @@ -807,7 +778,7 @@ class Cpattern extends Collector{ } } }elseif('json'==$this->config['paging']['reg_area_module']){ - $paging_area=$this->rule_module_json_data(array('json'=>$this->config['paging']['reg_area'],'json_arr'=>'jsonencode'),json_decode($html,true)); + $paging_area=$this->rule_module_json_data(array('json'=>$this->config['paging']['reg_area'],'json_arr'=>'jsonencode'),$html); }elseif('xpath'==$this->config['paging']['reg_area_module']){ $paging_area=$this->rule_module_xpath_data(array('xpath'=>$this->config['paging']['reg_area'],'xpath_attr'=>'outerHtml'),$html); } @@ -817,7 +788,7 @@ class Cpattern extends Collector{ } if(!empty($paging_area)){ - + if(!empty($this->config['paging']['url_complete'])){ @@ -826,9 +797,9 @@ class Cpattern extends Collector{ return \skycaiji\admin\event\Cpattern::create_complete_url($matche_p_a[1], $base_url, $domain_url); },$paging_area); } - + $m_paging_urls=$this->rule_match_urls($this->config['paging'],$paging_area,true); - + foreach ($m_paging_urls as $purl){ if(!empty($this->config['paging']['url_must'])){ @@ -843,19 +814,19 @@ class Cpattern extends Collector{ continue; } } - + if($from_url==$purl){ continue; } - + if(strpos($purl,' ')==false){ $paging_urls[]=$purl; } - + } - + if(!empty($paging_urls)){ $paging_urls=array_filter($paging_urls); @@ -891,13 +862,13 @@ class Cpattern extends Collector{ } /*设置字段值*/ - public function setField($field_config,$cont_url,$html){ - $cont_url_md5=md5($cont_url); - + public function setField($field_config,$cur_url,$html,$cont_url){ + $cur_url_md5=md5($cur_url); + $field_process=$field_config['process']; $field_params=$field_config['field']; $module=strtolower($field_params['module']); - + if(!empty($field_params['source'])&&in_array($module, array('rule','xpath','json','auto'))){ $field_source_url=''; @@ -909,7 +880,7 @@ class Cpattern extends Collector{ }elseif(preg_match('/^relation_url:(.+)$/i', $field_params['source'],$relationName)){ $relationName=$relationName[1]; - $field_source_url=$this->getRelationUrl($relationName, $cont_url, $html); + $field_source_url=$this->getRelationUrl($relationName, $cur_url, $html); $source_echo_msg.="关联页“{$relationName}”"; }elseif(preg_match('/^level_url:(.+)$/i', $field_params['source'],$levelName)){ @@ -930,8 +901,8 @@ class Cpattern extends Collector{ return; } - if($field_source_url!=$cont_url){ - $cont_url=$field_source_url; + if($field_source_url!=$cur_url){ + $cur_url=$field_source_url; $this->echo_msg($source_echo_msg.":{$field_source_url}",'black'); $html=$this->get_html($field_source_url,true); } @@ -940,17 +911,17 @@ class Cpattern extends Collector{ static $fieldArr2=array('auto','json'); static $baseUrls=array(); static $domainUrls=array(); - - $urlMd5=md5($cont_url); + + $urlMd5=md5($cur_url); if(empty($baseUrls[$urlMd5])){ - $baseUrls[$urlMd5]=$this->match_base_url($cont_url, $html); + $baseUrls[$urlMd5]=$this->match_base_url($cur_url, $html); } if(empty($domainUrls[$urlMd5])){ - $domainUrls[$urlMd5]=$this->match_domain_url($cont_url); + $domainUrls[$urlMd5]=$this->match_domain_url($cur_url); } $base_url=$baseUrls[$urlMd5]; $domain_url=$domainUrls[$urlMd5]; - + $val=''; $field_func='field_module_'.$module; if(method_exists($this, $field_func)){ @@ -958,21 +929,21 @@ class Cpattern extends Collector{ if('extract'==$module){ - if(is_array($this->field_val_list[$field_params['extract']]['values'][$cont_url_md5])){ + if(is_array($this->field_val_list[$field_params['extract']]['values'][$cur_url_md5])){ $val=array(); - foreach ($this->field_val_list[$field_params['extract']]['values'][$cont_url_md5] as $k=>$v){ + foreach ($this->field_val_list[$field_params['extract']]['values'][$cur_url_md5] as $k=>$v){ $extract_field_val=array( 'value'=>$v, - 'img'=>$this->field_val_list[$field_params['extract']]['imgs'][$cont_url_md5][$k], + 'img'=>$this->field_val_list[$field_params['extract']]['imgs'][$cur_url_md5][$k], ); $val[$k]=$this->field_module_extract($field_params, $extract_field_val, $base_url, $domain_url); } }else{ $extract_field_val=array( - 'value'=>$this->field_val_list[$field_params['extract']]['values'][$cont_url_md5], - 'img'=>$this->field_val_list[$field_params['extract']]['imgs'][$cont_url_md5], + 'value'=>$this->field_val_list[$field_params['extract']]['values'][$cur_url_md5], + 'img'=>$this->field_val_list[$field_params['extract']]['imgs'][$cur_url_md5], ); $val=$this->field_module_extract($field_params, $extract_field_val, $base_url, $domain_url); } @@ -983,8 +954,8 @@ class Cpattern extends Collector{ $cur_field_val_list=array(); foreach ($this->field_val_list as $k=>$v){ $cur_field_val_list[$k]=array( - 'value'=>$v['values'][$cont_url_md5], - 'img'=>$v['imgs'][$cont_url_md5] + 'value'=>$v['values'][$cur_url_md5], + 'img'=>$v['imgs'][$cur_url_md5] ); } $val=$this->field_module_merge($field_params,$cur_field_val_list); @@ -992,27 +963,43 @@ class Cpattern extends Collector{ $val=array(); - foreach ($this->field_val_list[$this->first_loop_field]['values'][$cont_url_md5] as $v_k=>$v_v){ + foreach ($this->field_val_list[$this->first_loop_field]['values'][$cur_url_md5] as $v_k=>$v_v){ $cur_field_val_list=array(); foreach ($this->field_val_list as $k=>$v){ $cur_field_val_list[$k]=array( - 'value'=>(is_array($v['values'][$cont_url_md5])?$v['values'][$cont_url_md5][$v_k]:$v['values'][$cont_url_md5]), - 'img'=>(is_array($v['imgs'][$cont_url_md5][$v_k])?$v['imgs'][$cont_url_md5][$v_k]:$v['imgs'][$cont_url_md5]) + 'value'=>(is_array($v['values'][$cur_url_md5])?$v['values'][$cur_url_md5][$v_k]:$v['values'][$cur_url_md5]), + 'img'=>(is_array($v['imgs'][$cur_url_md5][$v_k])?$v['imgs'][$cur_url_md5][$v_k]:$v['imgs'][$cur_url_md5]) ); } $val[$v_k]=$this->field_module_merge($field_params,$cur_field_val_list); } } - }elseif(in_array($module,$fieldArr1)){ - $val=$this->$field_func($field_params); + + if($module=='words'){ + + $val=$this->$field_func($field_params); + }else{ + + if(empty($this->first_loop_field)){ + + $val=$this->$field_func($field_params); + }else{ + + $val=array(); + + foreach ($this->field_val_list[$this->first_loop_field]['values'][$cur_url_md5] as $v_k=>$v_v){ + $val[$v_k]=$this->$field_func($field_params); + } + } + } }elseif(in_array($module,$fieldArr2)){ - $val=$this->$field_func($field_params,$html,$cont_url); + $val=$this->$field_func($field_params,$html,$cur_url); }else{ $val=$this->$field_func($field_params,$html); } } - + $vals=null; if(is_array($val)){ @@ -1023,23 +1010,66 @@ class Cpattern extends Collector{ $is_loop=false; $vals=array($val); } - + $field_name=$field_params['name']; if(!isset($this->field_val_list[$field_name])){ $this->field_val_list[$field_name]=array('values'=>array(),'imgs'=>array()); } + + $cont_url_md5=empty($cont_url)?$cur_url_md5:md5($cont_url); foreach ($vals as $v_k=>$val){ + $loopIndex=$is_loop?$v_k:-1; if(!empty($field_process)){ - $val=$this->processField($val,$field_process); + $val=$this->process_field($val,$field_process,$cur_url_md5,$loopIndex,$cont_url_md5); } if(!empty($this->config['common_process'])){ - $val=$this->processField($val,$this->config['common_process']); + $val=$this->process_field($val,$this->config['common_process'],$cur_url_md5,$loopIndex,$cont_url_md5); } - + if(isset($this->exclude_cont_urls[$cont_url_md5][$cur_url_md5])){ + + if(empty($this->first_loop_field)){ + + foreach ($this->field_val_list as $f_k=>$f_v){ + + unset($this->field_val_list[$f_k]['values'][$cur_url_md5]); + unset($this->field_val_list[$f_k]['imgs'][$cur_url_md5]); + } + return; + }else{ + + if(isset($this->exclude_cont_urls[$cont_url_md5][$cur_url_md5][$loopIndex])){ + + if(!$is_loop){ + + foreach ($this->field_val_list as $f_k=>$f_v){ + + unset($this->field_val_list[$f_k]['values'][$cur_url_md5]); + unset($this->field_val_list[$f_k]['imgs'][$cur_url_md5]); + } + return; + }else{ + + foreach ($this->field_val_list as $f_k=>$f_v){ + + if(is_array($this->field_val_list[$f_k]['values'][$cur_url_md5])){ + + unset($this->field_val_list[$f_k]['values'][$cur_url_md5][$v_k]); + } + if(is_array($this->field_val_list[$f_k]['imgs'][$cur_url_md5])){ + + unset($this->field_val_list[$f_k]['imgs'][$cur_url_md5][$v_k]); + } + } + continue; + } + } + } + } + $val=preg_replace_callback('/(?<=\bhref\=[\'\"])([^\'\"]*)(?=[\'\"])/i',function($matche) use ($base_url,$domain_url){ @@ -1048,17 +1078,17 @@ class Cpattern extends Collector{ $val=preg_replace_callback('/(?<=\bsrc\=[\'\"])([^\'\"]*)(?=[\'\"])/i',function($matche) use ($base_url,$domain_url){ return \skycaiji\admin\event\Cpattern::create_complete_url($matche[1], $base_url, $domain_url); },$val); - + if($is_loop){ - if(!isset($this->field_val_list[$field_name]['values'][$cont_url_md5])){ - $this->field_val_list[$field_name]['values'][$cont_url_md5]=array(); - $this->field_val_list[$field_name]['imgs'][$cont_url_md5]=array(); + if(!isset($this->field_val_list[$field_name]['values'][$cur_url_md5])){ + $this->field_val_list[$field_name]['values'][$cur_url_md5]=array(); + $this->field_val_list[$field_name]['imgs'][$cur_url_md5]=array(); } - $this->field_val_list[$field_name]['values'][$cont_url_md5][$v_k]=$val; + $this->field_val_list[$field_name]['values'][$cur_url_md5][$v_k]=$val; }else{ - $this->field_val_list[$field_name]['values'][$cont_url_md5]=$val; + $this->field_val_list[$field_name]['values'][$cur_url_md5]=$val; } if(!empty($GLOBALS['config']['caiji']['download_img'])&&!empty($val)){ @@ -1076,35 +1106,35 @@ class Cpattern extends Collector{ $valImgs[]=$matche[1]; return $matche[1]; },$val); - + if($noImgVal!=$val){ if($is_loop){ - $this->field_val_list[$field_name]['values'][$cont_url_md5][$v_k]=$noImgVal; + $this->field_val_list[$field_name]['values'][$cur_url_md5][$v_k]=$noImgVal; }else{ - $this->field_val_list[$field_name]['values'][$cont_url_md5]=$noImgVal; + $this->field_val_list[$field_name]['values'][$cur_url_md5]=$noImgVal; } } - + if(!empty($valImgs)){ $valImgs=array_unique($valImgs); $valImgs=array_values($valImgs); if($is_loop){ - $this->field_val_list[$field_name]['imgs'][$cont_url_md5][$v_k]=$valImgs; + $this->field_val_list[$field_name]['imgs'][$cur_url_md5][$v_k]=$valImgs; }else{ - $this->field_val_list[$field_name]['imgs'][$cont_url_md5]=$valImgs; + $this->field_val_list[$field_name]['imgs'][$cur_url_md5]=$valImgs; } } - } + } } } /*设置分页的字段列表值*/ public function setPagingFields($cont_url,$page_url){ $contMd5=md5($cont_url); $pageMd5=md5($page_url); - + if(empty($page_url)){ return $this->error('请输入分页网址'); } @@ -1115,18 +1145,18 @@ class Cpattern extends Collector{ $this->set_html_interval(); $this->echo_msg("——采集分页:{$page_url}",'black'); - + $html=$this->get_html($page_url); if(empty($html)){ return $this->error('分页获取失败:'.$page_url); } - + if(!isset($this->used_paging_urls[$contMd5][$pageMd5])){ $this->used_paging_urls[$contMd5][$pageMd5]=$page_url; foreach ($this->config['new_paging_fields'] as $v){ - $this->setField($this->config['new_field_list'][$v['field']],$page_url,$html); + $this->setField($this->config['new_field_list'][$v['field']],$page_url,$html,$cont_url); } } @@ -1147,37 +1177,7 @@ class Cpattern extends Collector{ } } } - - - public function match_rule($html,$rule,$merge,$multi=false,$multi_str=''){ - $val=''; - $sign_match=$this->sign_addslashes(cp_sign('match','(?P\d*)')); - if(!empty($rule)&&preg_match_all('/'.$sign_match.'/i',$merge,$match_signs)){ - - $multiStr=''; - if(!empty($multi)){ - - preg_match_all('/'.$rule.'/i',$html,$match_conts,PREG_SET_ORDER); - $multiStr=str_replace(array('\r','\n'), array("\r","\n"), $multi_str); - }else{ - if(preg_match('/'.$rule.'/i', $html,$match_cont)){ - $match_conts=array($match_cont); - } - } - $curI=0; - foreach ($match_conts as $match_cont){ - $curI++; - - $re_match=array(); - foreach($match_signs['num'] as $ms_k=>$ms_v){ - $re_match[$ms_k]=$match_cont['match'.$ms_v]; - } - $val.=($curI<=1?'':$multiStr).str_replace($match_signs[0], $re_match, $merge); - } - } - return $val; - } - + /** * 获取关联页网址 * @param unknown $name 关联页名称 @@ -1198,9 +1198,15 @@ class Cpattern extends Collector{ return ''; } + if(empty($relation_url['page'])){ if(!isset($this->relation_url_list[$cont_url][$name])){ + $html=$this->rule_match_area($relation_url, $html); + if(empty($html)){ + + return ''; + } $relationUrl=$this->rule_match_urls($relation_url, $html); $relationUrl=(is_array($relationUrl)&&!empty($relationUrl))?reset($relationUrl):''; $this->relation_url_list[$cont_url][$name]=$relationUrl; @@ -1233,13 +1239,18 @@ class Cpattern extends Collector{ return ''; } - + krsort($depth_pages); $contPage=reset($depth_pages); $relationUrl=''; if(isset($contPage)){ if(!isset($this->relation_url_list[$cont_url][$contPage])){ + $html=$this->rule_match_area($this->config['new_relation_urls'][$contPage], $html); + if(empty($html)){ + + return ''; + } $relationUrl=$this->rule_match_urls($this->config['new_relation_urls'][$contPage], $html); $relationUrl=(is_array($relationUrl)&&!empty($relationUrl))?reset($relationUrl):''; $this->relation_url_list[$cont_url][$contPage]=$relationUrl; @@ -1250,7 +1261,7 @@ class Cpattern extends Collector{ $depth_pages=array_slice($depth_pages, 1); $depth_pages=is_array($depth_pages)?$depth_pages:array(); $depth_pages[]=$relation_url['name']; - + foreach ($depth_pages as $page){ if(empty($relationUrl)){ @@ -1259,26 +1270,27 @@ class Cpattern extends Collector{ if(!isset($this->relation_url_list[$cont_url][$page])){ $relationHtml=$this->get_html($relationUrl,true); + $relationHtml=$this->rule_match_area($this->config['new_relation_urls'][$page], $relationHtml); if(empty($relationHtml)){ return ''; } $relationUrl=$this->rule_match_urls($this->config['new_relation_urls'][$page],$relationHtml); $relationUrl=(is_array($relationUrl)&&!empty($relationUrl))?reset($relationUrl):''; - + $this->relation_url_list[$cont_url][$page]=$relationUrl; }else{ $relationUrl=$this->relation_url_list[$cont_url][$page]; } } } + + + + + - - - - - return $relationUrl; } @@ -1286,7 +1298,7 @@ class Cpattern extends Collector{ public function getFields($cont_url){ $this->field_val_list=array(); $this->first_loop_field=null; - + if(empty($cont_url)){ return $this->error('请输入内容页网址'); } @@ -1298,14 +1310,14 @@ class Cpattern extends Collector{ return $this->error('抓取页面失败'); } foreach($this->config['new_field_list'] as $field_config){ - $this->setField($field_config,$cont_url,$html); + $this->setField($field_config,$cont_url,$html,$cont_url); } $paging_urls=$this->getPagingUrls($cont_url,$html); if(!empty($paging_urls)){ $this->setPagingFields($cont_url,reset($paging_urls)); } - + $val_list=array(); if(!empty($this->field_val_list)){ if(empty($this->first_loop_field)){ @@ -1313,7 +1325,7 @@ class Cpattern extends Collector{ foreach ($this->field_val_list as $fieldName=>$fieldVal){ $val_values=array_filter($fieldVal['values']); $val_values=implode($this->config['new_paging_fields'][$fieldName]['delimiter'], $val_values); - + $val_imgs=array(); if(!empty($fieldVal['imgs'])){ foreach ($fieldVal['imgs'] as $v){ @@ -1369,509 +1381,10 @@ class Cpattern extends Collector{ } return $val_list?$val_list:array(); } - /** - * 规则匹配,方法可调用,$field_params传入规则参数 - * @param array $field_params - * @param string $html - * @return string - */ - public function field_module_rule($field_params,&$html){ - - $val=''; - $sign_match=$this->sign_addslashes(cp_sign('match','(?P\d*)')); - if(!empty($field_params['reg_rule'])&&preg_match_all('/'.$sign_match.'/i', $field_params['rule_merge'],$match_signs)){ - - $multiStr=''; - $is_loop=false; - if(!empty($field_params['rule_multi'])){ - - preg_match_all('/'.$field_params['reg_rule'].'/i',$html,$match_conts,PREG_SET_ORDER); - $is_loop='loop'==$field_params['rule_multi_type']?true:false; - if($is_loop){ - if(empty($this->first_loop_field)){ - - $this->first_loop_field=$field_params['name']; - } - $val=array(); - }else{ - $multiStr=str_replace(array('\r','\n'), array("\r","\n"), $field_params['rule_multi_str']); - } - }else{ - if(preg_match('/'.$field_params['reg_rule'].'/i', $html,$match_cont)){ - $match_conts=array($match_cont); - } - } - - $curI=0; - if(is_array($match_conts)){ - foreach ($match_conts as $match_cont){ - $curI++; - - $re_match=array(); - foreach($match_signs['num'] as $ms_k=>$ms_v){ - $re_match[$ms_k]=$match_cont['match'.$ms_v]; - } - $contVal=str_replace($match_signs[0], $re_match, $field_params['rule_merge']); - if($is_loop){ - - $val[]=$contVal; - }else{ - - $val.=($curI<=1?'':$multiStr).$contVal; - } - } - } - } - return $val; - } - /** - * xpath规则,方法可调用,$field_params传入规则参数 - * @param array $field_params - * @param string $html - * @return string - */ - public function field_module_xpath($field_params,$html){ - if(!empty($field_params['xpath_multi'])){ - - if('loop'==$field_params['xpath_multi_type']){ - - if(empty($this->first_loop_field)){ - - $this->first_loop_field=$field_params['name']; - } - } - } - return $this->rule_module_xpath_data($field_params,$html); - } - public function rule_module_xpath_data($field_params,$html){ - $vals=''; - if(!empty($field_params['xpath'])){ - $dom=new \DOMDocument; - $libxml_previous_state = libxml_use_internal_errors(true); - @$dom->loadHTML(''.$html); - - $dom->normalize(); - - $xPath = new \DOMXPath($dom); - - $xpath_attr=strtolower($field_params['xpath_attr']); - $xpath_attr='custom'==$xpath_attr?strtolower($field_params['xpath_attr_custom']):$xpath_attr; - - $normal_attr=true; - if(in_array($xpath_attr,array('innerhtml','outerhtml','text'))){ - - $normal_attr=false; - } - $xpath_q=trim($field_params['xpath']); - if(!empty($xpath_attr)){ - - if(preg_match('/\/\@[\w\-]+$/', $xpath_q)){ - - $xpath_q=preg_replace('/\@[\w\-]+$/', '', $xpath_q); - } - if($normal_attr){ - - $xpath_q=$xpath_q.(preg_match('/\/$/', $xpath_q)?'':'/').'@'.$xpath_attr; - } - }else{ - - if(!preg_match('/\/\@[\w\-]+$/', $xpath_q)){ - - $xpath_attr='innerhtml'; - $normal_attr=false; - } - } - - $nodes = $xPath->query($xpath_q); - - $multiStr=''; - $is_loop=false; - if(!empty($field_params['xpath_multi'])){ - - $is_loop='loop'==$field_params['xpath_multi_type']?true:false; - if($is_loop){ - - - - - $vals=array(); - }else{ - - $multiStr=str_replace(array('\r','\n'), array("\r","\n"), $field_params['xpath_multi_str']); - } - } - - $curI=0; - foreach ($nodes as $node){ - $curI++; - $val=($curI<=1?'':$multiStr); - if($normal_attr){ - - $val.=$node->nodeValue; - }else{ - - switch ($xpath_attr){ - case 'innerhtml': - $nchilds = $node->childNodes; - foreach ($nchilds as $nchild){ - $val .= $nchild->ownerDocument->saveHTML($nchild); - } - break; - case 'outerhtml':$val.=$node->ownerDocument->saveHTML($node);break; - case 'text': - - - $nchilds = $node->childNodes; - foreach ($nchilds as $nchild){ - $val .= $nchild->ownerDocument->saveHTML($nchild); - } - $val=$this->filter_html_tags($val, array('style','script','object')); - $val=strip_tags($val); - break; - } - } - - if($is_loop){ - - $vals[]=$val; - }else{ - $vals.=$val; - } - - if(empty($field_params['xpath_multi'])){ - - break; - } - } - - libxml_clear_errors(); - - } - return $vals; - } - - /*自动获取*/ - public function field_module_auto($field_params,&$html,$cur_url){ - switch (strtolower($field_params['auto'])){ - case 'title':$val=$this->get_title($html);break; - case 'content':$val=$this->get_content($html);break; - case 'keywords':$val=$this->get_keywords($html);break; - case 'description':$val=$this->get_description($html);break; - case 'url':$val=$cur_url;break; - } - return $val; - } - public function field_module_words($field_params){ - - return $field_params['words']; - } - public function field_module_num($field_params){ - - $start=intval($field_params['num_start']); - $end=intval($field_params['num_end']); - return rand($start, $end); - } - public function field_module_time($field_params){ - $val=''; - $start=empty($field_params['time_start'])?NOW_TIME:strtotime($field_params['time_start']); - $end=empty($field_params['time_end'])?NOW_TIME:strtotime($field_params['time_end']); - $time=rand($start, $end); - if(empty($field_params['time_stamp'])){ - - $fmt=empty($field_params['time_format'])?'Y-m-d H:i': - str_replace(array('[年]','[月]','[日]','[时]','[分]','[秒]'), array('Y','m','d','H','i','s'), $field_params['time_format']); - $val=date($fmt,$time); - }else{ - $val=$time; - } - return $val; - } - public function field_module_list($field_params){ - $val=''; - if(preg_match_all('/[^\r\n]+/', $field_params['list'],$str_list)){ - $str_list=$str_list[0]; - $randi=array_rand($str_list,1); - $val=$str_list[$randi]; - } - return $val; - } - public function field_module_merge($field_params,$val_list){ - $val=''; - - if(preg_match_all('/\[\x{5b57}\x{6bb5}\:(.+?)\]/u', $field_params['merge'],$match_fields)){ - $val=$field_params['merge']; - - for($i=0;$irule_module_json_data($field_params,$jsonList[$jsonKey]); - return $val; - } - public function rule_module_json_data($field_params,$jsonArr){ - $val=''; - if(!empty($jsonArr)){ - if(!empty($field_params['json'])){ - - $jsonFmt=str_replace(array('"',"'",'[',' '), '', $field_params['json']); - $jsonFmt=str_replace(']','.',$jsonFmt); - $jsonFmt=trim($jsonFmt,'.'); - $jsonFmt=explode('.', $jsonFmt); - $jsonFmt=array_values($jsonFmt); - if(!empty($jsonFmt)){ - - $val=$jsonArr; - $prevKey=''; - foreach ($jsonFmt as $i=>$key){ - if($prevKey=='*'){ - - $new_field_params=$field_params; - $new_field_params['json']=array_slice($jsonFmt, $i); - $new_field_params['json']=implode('.', $new_field_params['json']); - - foreach ($val as $vk=>$vv){ - - $val[$vk]=$this->rule_module_json_data($new_field_params,$vv); - } - break; - }else{ - if($key!='*'){ - - $val=$val[$key]; - } - } - $prevKey=$key; - } - } - } - } - if(is_array($val)){ - - $json_arr=strtolower($field_params['json_arr']); - if(empty($json_arr)){ - $json_arr='implode'; - } - switch ($json_arr){ - case 'implode':$arrImplode=str_replace(array('\r','\n'), array("\r","\n"), $field_params['json_arr_implode']);$val=array_implode($arrImplode,$val);break; - case 'jsonencode':$val=json_encode($val);break; - case 'serialize':$val=serialize($val);break; - case '_original_': break; - } - } - return $val; - } - - /*字段提取内容*/ - public function field_module_extract($field_params,$extract_field_val,$base_url,$domain_url){ - $field_html=$extract_field_val['value']; - if(empty($field_html)){ - return ''; - } - $val=''; - $extract_module=strtolower($field_params['extract_module']); - switch ($extract_module){ - case 'cover': - - if(!empty($extract_field_val['img'])){ - $val=reset($extract_field_val['img']); - }else{ - if(preg_match('/]*\bsrc=[\'\"](?P[^\'\"]+?)[\'\"]/i',$field_html,$cover)){ - $cover=$cover['url']; - $cover=$this->create_complete_url($cover, $base_url, $domain_url); - $val=$cover; - } - } - break; - case 'phone': - - $field_html=$this->filter_html_tags($field_html,'style,script,object'); - $field_html=strip_tags($field_html); - if(preg_match('/\d{11}/', $field_html,$phone)){ - $val=$phone[0]; - } - break; - case 'email': - $field_html=$this->filter_html_tags($field_html,'style,script,object'); - $field_html=strip_tags($field_html); - if(preg_match('/[\w\-]+\@[\w\-\.]+/i', $field_html,$email)){ - $val=$email[0]; - } - break; - case 'rule': - - $val=$this->field_module_rule(array('reg_rule'=>$field_params['reg_extract_rule']), $field_html); - if(empty($val)){ - - if(preg_match('/'.$field_params['reg_extract_rule'].'/i', $field_html,$val)){ - $val=$val[0]; - } - } - break; - case 'xpath': - $val=$this->field_module_xpath(array('xpath'=>$field_params['extract_xpath'],'xpath_attr'=>$field_params['extract_xpath_attr'],'xpath_attr_custom'=>$field_params['extract_xpath_attr_custom']), $field_html); - break; - case 'json': - $val=$this->field_module_json(array('json'=>$field_params['extract_json'],'json_arr'=>$field_params['extract_json_arr'],'json_arr_implode'=>$field_params['extract_json_arr_implode']), $field_html); - break; - } - return $val; - } - /*数据处理*/ - public function processField($fieldVal,$process){ - if(empty($fieldVal)||empty($process)){ - return $fieldVal; - } - foreach ($process as $params){ - if('html'==$params['module']){ - $htmlAllow=array_filter(explode(',',$params['html_allow'])); - $htmlFilter=array_filter(explode(',',$params['html_filter'])); - if(!empty($htmlAllow)){ - - $htmlAllowStr=''; - foreach ($htmlAllow as $v){ - $htmlAllowStr.='<'.$v.'>'; - } - $fieldVal=strip_tags($fieldVal,$htmlAllowStr); - } - if(!empty($htmlFilter)){ - - if(in_array('all', $htmlFilter)){ - - $fieldVal=$this->filter_html_tags($fieldVal, array('style','script','object')); - $fieldVal=strip_tags($fieldVal); - }else{ - $fieldVal=$this->filter_html_tags($fieldVal, $htmlFilter); - } - } - }elseif('replace'==$params['module']){ - $fieldVal=preg_replace('/'.$params['replace_from'].'/i',$params['replace_to'], $fieldVal); - }elseif('filter'==$params['module']){ - if(!empty($params['filter_list'])){ - - $filterList=explode("\r\n", $params['filter_list']); - $filterList=array_filter($filterList); - if(!empty($params['filter_pass'])){ - - foreach ($filterList as $filterStr){ - if(stripos($fieldVal,$filterStr)!==false){ - - $fieldVal=''; - break; - } - } - }else{ - - $fieldVal=str_ireplace($filterList, $params['filter_replace'], $fieldVal); - } - } - }elseif('tool'==$params['module']){ - - if(in_array('format', $params['tool_list'])){ - - $fieldVal=$this->filter_html_tags($fieldVal,array('style','script')); - $fieldVal=preg_replace('/\b(style|width|height|align)\s*=\s*([\'\"])[^\<\>\'\"]+?\\2(?=\s|$|\/|>)/i', ' ', $fieldVal); - } - if(in_array('trim', $params['tool_list'])){ - - $fieldVal=trim($fieldVal); - } - if(in_array('is_img', $params['tool_list'])){ - - if(!empty($GLOBALS['config']['caiji']['download_img'])){ - - $fieldVal=preg_replace('/(\bhttp[s]{0,1}\:\/\/[^\s]+)/i','{[img]}'."$1".'{[/img]}',$fieldVal); - } - } - }elseif('translate'==$params['module']){ - - if(!empty($GLOBALS['config']['translate'])&&!empty($GLOBALS['config']['translate']['open'])){ - - $fieldVal=\util\Translator::translate($fieldVal, $params['translate_from'], $params['translate_to']); - } - }elseif('batch'==$params['module']){ - - static $batch_list=array(); - if(!empty($params['batch_list'])){ - $listMd5=md5($params['batch_list']); - if(!isset($batch_list[$listMd5])){ - - if(preg_match_all('/([^\r\n]+?)\=([^\r\n]+)/', $params['batch_list'],$mlist)){ - $batch_re=$mlist[1]; - $batch_to=$mlist[2]; - $batch_list[$listMd5]=array($batch_re,$batch_to); - } - }else{ - $batch_re=$batch_list[$listMd5][0]; - $batch_to=$batch_list[$listMd5][1]; - } - $batch_re=is_array($batch_re)?$batch_re:null; - $batch_to=is_array($batch_to)?$batch_to:null; - if(!empty($batch_re)&&count($batch_re)==count($batch_to)){ - - $fieldVal=str_replace($batch_re, $batch_to, $fieldVal); - } - } - }elseif('substr'==$params['module']){ - $params['substr_len']=intval($params['substr_len']); - if($params['substr_len']>0){ - if(mb_strlen($fieldVal,'utf-8')>$params['substr_len']){ - - $fieldVal=mb_substr($fieldVal,0,$params['substr_len'],'utf-8').$params['substr_end']; - } - } - }elseif('func'==$params['module']){ - - if(!empty($params['func_name'])&&function_exists($params['func_name'])){ - - if(array_key_exists($params['func_name'], config('allow_process_func'))||array_key_exists($params['func_name'], config('EXTEND_PROCESS_FUNC'))){ - - static $func_param_list=array(); - $funcParam=null; - if(empty($params['func_param'])){ - - $funcParam=array($fieldVal); - }else{ - $fparamMd5=md5($params['func_param']); - if(!isset($func_param_list[$fparamMd5])){ - if(preg_match_all('/[^\r\n]+/', $params['func_param'],$mfuncParam)){ - $func_param_list[$fparamMd5]=$mfuncParam[0]; - } - } - $funcParam=$func_param_list[$fparamMd5]; - foreach ($funcParam as $k=>$v){ - $funcParam[$k]=str_replace('###', $fieldVal, $v); - } - } - if(!empty($funcParam)&&is_array($funcParam)){ - try { - $fieldVal=call_user_func_array($params['func_name'], $funcParam); - }catch (\Exception $ex){ - - } - } - } - } - } - } - return $fieldVal; - } /*设置数据处理,保存config时使用*/ public function setProcess($processList){ - if(!empty($processList)){ + if(is_array($processList)){ + $processList=array_array_map('trim',$processList); foreach ($processList as $k=>$v){ $v['module']=strtolower($v['module']); if(!empty($v['title'])){ @@ -1909,30 +1422,98 @@ class Cpattern extends Collector{ return $processList; } - /*采集级别网址*/ - public function get_level_urls($source_url,$curLevel=1){ - $curLevel=$curLevel>0?$curLevel:0; - if($curLevel>0){ + /*统一:获取网址列表*/ + public function _get_urls($source_url,$config,$is_level=false){ + $is_level=$is_level?'多级':''; + + $html=$this->get_html($source_url); + if(empty($html)){ + return $this->error($is_level.'页面为空'); + } + $base_url=$this->match_base_url($source_url, $html); + $domain_url=$this->match_domain_url($source_url); + + $html=$this->rule_match_area($config, $html); + if(empty($html)){ + return $this->error("未提取到{$is_level}区域内容!"); + } + + $cont_urls=$this->rule_match_urls($config, $html); + $cont_urls1=array(); + + + if(isset($this->config['url_op'])){ - $nextLevel=0; - if(!empty($this->config['level_urls'])){ + $op_not_complete=in_array('not_complete',$this->config['url_op'])?true:false; + }else{ + if(isset($this->config['url_complete'])){ - if(!empty($this->config['level_urls'][$curLevel-1])){ + $op_not_complete=$this->config['url_complete']?false:true; + }else{ + + $op_not_complete=false; + } + } + + foreach ($cont_urls as $cont_url){ + if(!$op_not_complete){ + + $cont_url=$this->create_complete_url($cont_url, $base_url, $domain_url); + } + if(!empty($config['url_must'])){ + + if(!preg_match('/'.$config['url_must'].'/i', $cont_url)){ + continue; + } + } + + if(!empty($config['url_ban'])){ + + if(preg_match('/'.$config['url_ban'].'/i', $cont_url)){ + continue; + } + } + if(!empty($cont_url)){ + if(strpos($cont_url,' ')==false){ - if(!empty($this->config['level_urls'][$curLevel])){ - - $nextLevel=$curLevel+1; + + + $cont_urls1[]=$cont_url; + } + } + } + $cont_urls=$cont_urls1; + unset($cont_urls1); + + if(empty($cont_urls)){ + return $this->error("未获取到".($is_level?$is_level:'内容')."网址!"); + }else{ + if(!empty($this->config['url_reverse'])){ + + $cont_urls=array_reverse($cont_urls); + } + if(!empty($this->config['url_post'])){ + + $postParams=array(); + if(!empty($this->config['url_posts']['names'])){ + foreach ($this->config['url_posts']['names'] as $k=>$v){ + if (!empty($v)){ + $postParams[]=$v.'='.rawurlencode($this->config['url_posts']['vals'][$k]); + } + } + } + if(!empty($postParams)){ + + $postParams=implode('&', $postParams); + foreach ($cont_urls as $k=>$v){ + $v.=strpos($v,'?')===false?'?':'&'; + $v.=$postParams; + $cont_urls[$k]=$v; } } } - - $cont_urls=$this->getLevelUrls($source_url,$curLevel); - }else{ - - $cont_urls=$this->getContUrls($source_url); + return array_values($cont_urls); } - - return array('urls'=>$cont_urls,'levelName'=>$this->config['level_urls'][$curLevel-1]['name'],'nextLevel'=>$nextLevel); } /*执行采集返回未使用的网址*/ public function _collect_unused_cont_urls($cont_urls=array(),$echo_str=''){ @@ -1972,7 +1553,7 @@ class Cpattern extends Collector{ /*执行级别采集*/ public function _collect_level($source_url,$level=1){ $end_echo=''; - + $level=max(1,$level); $level_str=''; for($i=1;$i<$level;$i++){ @@ -1984,25 +1565,25 @@ class Cpattern extends Collector{ $this->cur_level_urls=array(); } $this->echo_msg('','',true,'
'); - - $level_data=$this->get_level_urls($source_url,$level); + + $level_data=$this->collLevelUrls($source_url,$level); $this->echo_msg($level_str.'抓取到'.$level.'级“'.$this->config['level_urls'][$level-1]['name'].'”网址'.count($level_data['urls']).'条','black'); - + $mcollected=model('Collected'); $mcacheLevel=CacheModel::getInstance('level_url'); - + if(!empty($level_data['urls'])){ $level_urls=array(); foreach ($level_data['urls'] as $level_url){ $level_urls["level_{$level}:{$level_url}"]=$level_url; } - + $level_interval=$GLOBALS['config']['caiji']['interval']*60; $time_interval_list=array(); $cacheLevels=$mcacheLevel->db()->where(array('cname'=>array('in',array_map('md5', array_keys($level_urls)))))->column('dateline','cname'); - + if(!empty($cacheLevels)){ $count_db_used=0; $sortLevels=array('undb'=>array(),'db'=>array()); @@ -2027,7 +1608,7 @@ class Cpattern extends Collector{ } if($count_db_used>0){ $this->echo_msg($level_str.$count_db_used.'条已采集网址被过滤,下次采集需等待'.($level_interval-max($time_interval_list)).'秒,设置间隔','black'); + .url('Admin/Setting/caiji').'" target="_blank">设置间隔','black'); if(count($level_urls)<=$count_db_used){ $this->echo_msg($level_str.$level.'级“'.$this->config['level_urls'][$level-1]['name'].'”网址采集完毕!','green',true,$end_echo); return 'completed'; @@ -2039,7 +1620,7 @@ class Cpattern extends Collector{ } $level_data['urls']=$level_urls; } - + $finished_source=true; $cur_level_i=0;; @@ -2109,7 +1690,7 @@ class Cpattern extends Collector{ }else{ $source_type=1; } - + if($source_type==2){ if(array_key_exists($cont_key,$this->used_level_urls)){ @@ -2121,7 +1702,7 @@ class Cpattern extends Collector{ continue; } } - + $finished_cont=true; $cur_c_i=0; foreach ($cont_urls as $cont_url){ @@ -2144,7 +1725,7 @@ class Cpattern extends Collector{ } } } - + if(input('?backstage')){ @@ -2155,17 +1736,17 @@ class Cpattern extends Collector{ exit('终止进程'); } } - + if($mcacheCont->getCount($md5_cont_url)>0){ $this->used_cont_urls[$md5_cont_url]=1; continue; } $mcacheCont->setCache($md5_cont_url, 1); - + $this->echo_msg($echo_str."采集内容页:{$cont_url}",'black'); $field_vals_list=$this->getFields($cont_url); - + $is_loop=empty($this->first_loop_field)?false:true; if(!empty($field_vals_list)){ $is_real_time=false; @@ -2194,38 +1775,56 @@ class Cpattern extends Collector{ unset($field_vals_list[$k]); } } - $field_vals_list=array_values($field_vals_list); $this->echo_msg($echo_str.'已过滤'.count($loop_exists_urls).'条重复数据','black'); } } + if(isset($this->exclude_cont_urls[$md5_cont_url])){ + + $excludeNum=0; + foreach($this->exclude_cont_urls[$md5_cont_url] as $k=>$v){ + + $excludeNum+=count($v); + } + + $this->echo_msg($echo_str.'通过数据处理排除了'.$excludeNum.'条数据','black'); + } + $field_vals_list=array_values($field_vals_list); } + foreach ($field_vals_list as $field_vals){ + $collected_error=''; $collected_data=array('url'=>$cont_url,'fields'=>$field_vals); if($is_loop){ $collected_data['url'].='#'.md5(serialize($field_vals)); - } - $collected_error=''; - - if(!empty($this->config['field_title'])){ + }else{ - $collected_data['title']=$field_vals[$this->config['field_title']]['value']; - } - if(!empty($collected_data['title'])){ - - if($mcollected->getCountByTitle($collected_data['title'])>0){ + if(isset($this->exclude_cont_urls[$md5_cont_url])){ - $collected_error='标题重复:'.mb_substr($collected_data['title'],0,300,'utf-8'); + $collected_error=reset($this->exclude_cont_urls[$md5_cont_url]); + $collected_error=$this->exclude_url_msg($collected_error); + } + } + if(empty($collected_error)){ + if(!empty($this->config['field_title'])){ + + $collected_data['title']=$field_vals[$this->config['field_title']]['value']; + } + if(!empty($collected_data['title'])){ + + if($mcollected->getCountByTitle($collected_data['title'])>0){ + + $collected_error='标题重复:'.mb_substr($collected_data['title'],0,300,'utf-8'); + } } } - if(empty($collected_error)){ if($is_real_time){ $GLOBALS['real_time_release']->export(array($collected_data)); - + unset($collected_data['fields']); unset($collected_data['title']); } @@ -2233,9 +1832,15 @@ class Cpattern extends Collector{ $this->collected_field_list[]=$collected_data; }else{ - controller('ReleaseBase','event')->record_collected($collected_data['url'], - array('id'=>0,'error'=>$collected_error),array('task_id'=>$this->collector['task_id'],'module'=>$this->release['module']) - ); + if(!$this->config['url_repeat']){ + + controller('ReleaseBase','event')->record_collected($collected_data['url'], + array('id'=>0,'error'=>$collected_error),array('task_id'=>$this->collector['task_id'],'module'=>$this->release['module']) + ); + }else{ + + $this->echo_msg($collected_error); + } } } } @@ -2244,12 +1849,15 @@ class Cpattern extends Collector{ controller('ReleaseBase','event')->record_collected( - $cont_url,array('id'=>1,'target'=>'','desc'=>'循环入库'),array('task_id'=>$this->collector['task_id'],'module'=>$this->release['module']),null,false + $cont_url,array('id'=>1,'target'=>'','desc'=>'循环入库'),array('task_id'=>$this->collector['task_id'],'module'=>$this->release['module']),null,false ); } + }else{ + + $this->echo_msg('已采集过该网址','black'); } $this->used_cont_urls[$md5_cont_url]=1; - + if($this->collect_num>0){ if(count($this->collected_field_list)>=$this->collect_num){ @@ -2262,7 +1870,7 @@ class Cpattern extends Collector{ } } } - + if($finished_cont){ if($source_type==1){ @@ -2272,7 +1880,7 @@ class Cpattern extends Collector{ $mcacheLevel->setCache(md5($cont_key),$cont_key); } - + if($source_type==2){ $this->used_level_urls[$cont_key]=1; @@ -2281,481 +1889,11 @@ class Cpattern extends Collector{ $this->used_source_urls[$cont_key]=1; } } - + if($this->collect_num>0&&count($this->collected_field_list)>=$this->collect_num){ break; } } } - - /*采集,return false表示终止采集*/ - public function collect($num=10){ - if(!defined('IS_COLLECTING')){ - define('IS_COLLECTING', 1); - } - @session_start(); - \think\Session::pause(); - - if(!$this->show_opened_tools){ - $opened_tools=array(); - if($this->config['page_render']){ - $opened_tools[]='页面渲染'; - } - if($GLOBALS['config']['caiji']['download_img']){ - $opened_tools[]='图片本地化'; - } - if($GLOBALS['config']['proxy']['open']){ - $opened_tools[]='代理'; - } - if(!empty($opened_tools)){ - $this->echo_msg('开启功能:'.implode('、', $opened_tools),'black'); - } - if($num>0){ - $this->echo_msg('预计采集'.$num.'条数据','black'); - } - - $this->show_opened_tools=true; - } - - $this->collect_num=$num; - $this->collected_field_list=array(); - - $source_is_url=intval($this->config['source_is_url']); - if(!isset($this->original_source_urls)){ - - $this->original_source_urls=array(); - foreach ( $this->config ['source_url'] as $k => $v ) { - if(empty($v)){ - continue; - } - $return_s_urls = $this->convert_source_url ( $v ); - if (is_array ( $return_s_urls )) { - foreach ($return_s_urls as $r_s_u){ - $this->original_source_urls[md5($r_s_u)]=$r_s_u; - } - } else { - $this->original_source_urls[md5($return_s_urls)]=$return_s_urls; - } - } - } - if(empty($this->original_source_urls)){ - $this->echo_msg('没有起始页网址!'); - return 'completed'; - } - - if($source_is_url){ - - if(isset($this->used_source_urls['_source_is_url_'])){ - $this->echo_msg('所有起始页采集完毕!','green'); - return 'completed'; - } - }else{ - if(count($this->original_source_urls)<=count($this->used_source_urls)){ - $this->echo_msg('所有起始页采集完毕!','green'); - return 'completed'; - } - } - - $source_interval=$GLOBALS['config']['caiji']['interval']*60; - $time_interval_list=array(); - - $source_urls=array(); - $mcacheSource=CacheModel::getInstance('source_url'); - if($source_is_url){ - - $source_urls=$this->original_source_urls; - }else{ - $cacheSources=$mcacheSource->db()->where(array('cname'=>array('in',array_keys($this->original_source_urls))))->column('dateline','cname'); - if(!empty($cacheSources)){ - $count_db_used=0; - $sortSources=array('undb'=>array(),'db'=>array()); - - foreach ($this->original_source_urls as $sKey=>$sVal){ - if(!isset($cacheSources[$sKey])){ - - $sortSources['undb'][$sKey]=$sVal; - }else{ - - $time_interval=abs(NOW_TIME-$cacheSources[$sKey]); - if($time_interval<$source_interval){ - - $this->used_source_urls[$sVal]=1; - $count_db_used++; - $time_interval_list[]=$time_interval; - }else{ - $sortSources['db'][$sKey]=$sVal; - } - } - } - if($count_db_used>0){ - $this->echo_msg($count_db_used.'条已采集起始网址被过滤,下次采集需等待'.($source_interval-max($time_interval_list)).'秒,设置间隔','black'); - if(count($this->original_source_urls)<=count($this->used_source_urls)){ - $this->echo_msg('所有起始页采集完毕!','green'); - return 'completed'; - } - } - $source_urls=array_merge($sortSources['undb'],$sortSources['db']); - unset($sortSources); - unset($cacheSources); - }else{ - $source_urls=$this->original_source_urls; - } - } - $mcollected=model('Collected'); - - if($source_is_url){ - - $this->cont_urls_list['_source_is_url_']=array_values($source_urls); - $source_urls=array('_source_is_url_'=>'_source_is_url_'); - } - - - foreach ($source_urls as $key_source_url=>$source_url){ - $this->cur_source_url=$source_url; - if(array_key_exists($source_url,$this->used_source_urls)){ - - continue; - } - if($source_is_url){ - $this->echo_msg("起始页已转换为内容页网址",'black'); - }else{ - $this->echo_msg("采集起始页:{$source_url}",'green'); - } - if($source_is_url){ - - $this->_collect_fields(); - }else{ - - if(!empty($this->config['level_urls'])){ - - - $this->echo_msg('开始分析多级网址','black'); - $return_msg=$this->_collect_level($source_url,1); - if($return_msg=='completed'){ - return $return_msg; - } - }else{ - - $cont_urls=$this->getContUrls($source_url); - $this->cont_urls_list[$source_url]=$this->_collect_unused_cont_urls($cont_urls); - $this->_collect_fields(); - } - } - - if($this->collect_num>0&&count($this->collected_field_list)>=$this->collect_num){ - break; - } - } - - - return $this->collected_field_list; - } - /** - * 拼接默认设置 - * @param unknown $reg 规则 - * @param unknown $merge 拼接字符串 - */ - public function set_merge_default($reg,$merge){ - if(empty($merge)){ - $merge=''; - if(!empty($reg)){ - - if(preg_match_all('/\\d*)\>/i', $reg,$match_signs)){ - foreach ($match_signs['num'] as $snum){ - $merge.=cp_sign('match',$snum); - } - } - } - } - return $merge; - } - /** - * 转换起始网址 - * @param string $url - * @return multitype:mixed |unknown - */ - public function convert_source_url($url){ - $urls=array(); - if(preg_match('/\{param\:(?P[a-z]+)\,(?P.*?)\}/i', $url,$match)){ - - $fmtUrl=preg_replace('/\{param\:.*?\}/i', '__set:param__', $url); - $type=strtolower($match['type']); - $val=explode("\t", $match['val']); - if($type=='num'){ - - $num_start = intval($val[0]); - $num_end = intval($val[1]); - $num_end = max ($num_start,$num_end); - $num_inc = max ( 1, intval($val[2])); - $num_desc =$val[3]?1:0; - - if($num_desc){ - - for($i=$num_end;$i>=$num_start;$i--){ - $urls[]=str_replace('__set:param__', $num_start+($i-$num_start)*$num_inc, $fmtUrl); - } - }else{ - for($i=$num_start;$i<=$num_end;$i++){ - $urls[]=str_replace('__set:param__', $num_start+($i-$num_start)*$num_inc, $fmtUrl); - } - } - }elseif($type=='letter'){ - - $letter_start=ord($val[0]); - $letter_end=ord($val[1]); - $letter_end=max($letter_start,$letter_end); - $letter_desc=$val[2]?1:0; - - if($letter_desc){ - - for($i=$letter_end;$i>=$letter_start;$i--) { - $urls[]=str_replace('__set:param__', chr($i), $fmtUrl); - } - }else{ - for($i=$letter_start;$i<=$letter_end;$i++) { - $urls[]=str_replace('__set:param__', chr($i), $fmtUrl); - } - } - }elseif($type=='custom'){ - - foreach ($val as $v){ - $urls[]=str_replace('__set:param__', $v, $fmtUrl); - } - } - return $urls; - }if(preg_match('/\{json\:([^\}]*)\}/i',$url,$match)){ - - $url=preg_replace('/\{json\:([^\}]*)\}/i','',$url); - $jsonRule=trim($match[1]); - if(is_null($jsonRule)||$jsonRule==''){ - $jsonRule='*'; - } - $jsonData=$this->get_html($url); - $jsonData=json_decode($jsonData,true); - if(!empty($jsonData)&&is_array($jsonData)){ - - $urls=$this->rule_module_json_data(array('json'=>$jsonRule,'json_arr'=>'_original_'),$jsonData); - if(!is_array($urls)){ - $urls=array($urls); - } - - foreach ($urls as $k=>$v){ - if(!is_string($v)||!preg_match('/^\w+\:\/\//i', $v)){ - - unset($urls[$k]); - } - } - if(!empty($urls)&&is_array($urls)){ - $urls=array_unique($urls); - $urls=array_values($urls); - } - return $urls; - } - }elseif(preg_match('/[\r\n]/', $url)){ - - if(preg_match_all('/^\w+\:\/\/[^\r\n]+/im',$url,$urls)){ - - $urls=array_unique($urls[0]); - $urls=array_values($urls); - } - return $urls; - }else{ - - return $url; - } - } - /*转换(*)通配符*/ - public function convert_sign_wildcard($str){ - return str_replace(lang('sign_wildcard'), '[\s\S]*?', $str); - } - /*转换[参数]*/ - public function convert_sign_match($str){ - $str=preg_replace('/\(\?<(content|match)/i', '(?Psign_addslashes(cp_sign('match','(?P\d*)')); - $str=preg_replace_callback('/(\={0,1})(\s*)([\'\"]{0,1})'.$sign_match.'\3/', function($matches){ - $ruleStr=$matches[1].$matches[2].$matches[3].'(?P'; - if(!empty($matches[1])&&!empty($matches[3])){ - - $ruleStr.='[^\<\>]*?)'; - }else{ - $ruleStr.='[\s\S]*?)'; - } - $ruleStr.=$matches[3]; - return $ruleStr; - }, $str); - return $str; - } - public function sign_addslashes($str){ - $str=str_replace(array('[',']'), array('\[','\]'), $str); - return $str; - } - /*过滤html标签*/ - public function filter_html_tags($content,$tags){ - $tags=$this->clear_tags($tags); - $arr1=$arr2=array(); - foreach ($tags as $tag){ - $tag=strtolower($tag); - if($tag=='script'||$tag=='style'||$tag=='object'){ - $arr1[$tag]=$tag; - }else{ - $arr2[$tag]=$tag; - } - } - - if($arr1){ - $content=preg_replace('/<('.implode('|', $arr1).')[^<>]*>[\s\S]*?<\/\1>/i', '', $content); - } - - if($arr2){ - $content=preg_replace('/<[\/]*('.implode('|', $arr2).')[^<>]*>/i', '', $content); - } - return $content; - } - /*过滤标签*/ - public function clear_tags($tags){ - if(!is_array($tags)){ - $tags = preg_replace('/[\s\,\x{ff0c}]+/u', ',', $tags); - $tags=explode(',', $tags); - } - if(!empty($tags)&&is_array($tags)){ - - $tags=array_filter($tags); - $tags=array_unique($tags); - $tags=array_values($tags); - }else{ - $tags=array(); - } - return $tags; - } - /*获取源码*/ - public function get_html($url,$open_cache=false,$is_post=false){ - if($open_cache&&!empty($this->html_cache_list[$url])){ - - return $this->html_cache_list[$url]; - } - $pageRenderTool=null; - if($this->config['page_render']){ - $pageRenderTool=$GLOBALS['config']['page_render']['tool']; - if(empty($pageRenderTool)){ - - $this->error('页面渲染未设置,请检查渲染设置','Setting/page_render'); - return null; - } - } - - $html=null; - $headers=array(); - $options=array(); - if($this->config['request_headers']['open']){ - - if(!empty($this->config['request_headers']['useragent'])){ - - $options['useragent']=$this->config['request_headers']['useragent']; - } - if(!empty($this->config['request_headers']['cookie'])){ - $headers['cookie']=$this->config['request_headers']['cookie']; - } - if(!empty($this->config['request_headers']['referer'])){ - $headers['referer']=$this->config['request_headers']['referer']; - } - - if(!empty($this->config['request_headers']['custom_names'])){ - foreach ($this->config['request_headers']['custom_names'] as $k=>$v){ - if(!empty($v)){ - $headers[$v]=$this->config['request_headers']['custom_vals'][$k]; - } - } - } - } - $mproxy=model('Proxyip'); - $proxy_ip=null; - if(!empty($GLOBALS['config']['proxy']['open'])){ - - $proxy_ip=$mproxy->get_usable_ip(); - $proxyIp=$mproxy->to_proxy_ip($proxy_ip); - - if(!empty($proxyIp)){ - - $options['proxy']=$proxyIp; - } - } - $urlPost=null; - if($is_post){ - - $urlPost=strpos($url, '?'); - if($urlPost!==false){ - $urlPost=substr($url, $urlPost+1); - $url=preg_replace('/\?.*$/', '', $url); - }else{ - $urlPost=''; - } - } - - if($pageRenderTool){ - - if(!empty($options['useragent'])){ - - $headers['user-agent']=$options['useragent']; - unset($options['useragent']); - } - if(!empty($options['proxy'])){ - - $options['proxy']=$proxy_ip; - } - - if($pageRenderTool=='chrome'){ - $chromeConfig=$GLOBALS['config']['page_render']['chrome']; - try { - $chromeSocket=new \util\ChromeSocket($chromeConfig['host'],$chromeConfig['port'],$GLOBALS['config']['page_render']['timeout'],$chromeConfig['filename']); - $chromeSocket->newTab(); - $chromeSocket->websocket(null); - if($is_post){ - - $html=$chromeSocket->getRenderHtml($url,$headers,$options,$this->config['charset'],$urlPost); - }else{ - $html=$chromeSocket->getRenderHtml($url,$headers,$options); - } - }catch (\Exception $ex){ - $this->error('页面渲染失败,请检查渲染设置','Setting/page_render'); - return null; - } - }else{ - $this->error('渲染工具不可用,请检查渲染设置','Setting/page_render'); - return null; - } - }else{ - if($is_post){ - $html=get_html($url,$headers,$options,$this->config['charset'],$urlPost); - }else{ - $html=get_html($url,$headers,$options,$this->config['charset']); - } - } - - if($html==null){ - - if(!empty($proxy_ip)){ - $mproxy->set_ip_failed($proxy_ip); - } - return null; - } - - if($this->config['url_complete']){ - - $base_url=$this->match_base_url($url, $html); - $domain_url=$this->match_domain_url($url, $html); - $html=preg_replace_callback('/(?<=\bhref\=[\'\"])([^\'\"]*)(?=[\'\"])/i',function($matche) use ($base_url,$domain_url){ - - return \skycaiji\admin\event\Cpattern::create_complete_url($matche[1], $base_url, $domain_url); - },$html); - $html=preg_replace_callback('/(?<=\bsrc\=[\'\"])([^\'\"]*)(?=[\'\"])/i',function($matche) use ($base_url,$domain_url){ - return \skycaiji\admin\event\Cpattern::create_complete_url($matche[1], $base_url, $domain_url); - },$html); - } - if($open_cache){ - $this->html_cache_list[$url]=$html; - } - return $html; - } } ?> \ No newline at end of file diff --git a/SkycaijiApp/admin/event/CpatternBase.php b/SkycaijiApp/admin/event/CpatternBase.php new file mode 100644 index 0000000..8a9a273 --- /dev/null +++ b/SkycaijiApp/admin/event/CpatternBase.php @@ -0,0 +1,1402 @@ +used_cont_urls)){ + + $usedContUrls=array_keys($this->used_cont_urls); + if(!empty($usedContUrls)&&is_array($usedContUrls)){ + $total=count($usedContUrls); + $limit=800; + $batch=ceil($total/$limit); + for($i=1;$i<=$batch;$i++){ + + $list=array_slice($usedContUrls,($i-1)*$limit,$limit); + if(!empty($list)){ + CacheModel::getInstance('cont_url')->db()->where('cname','in',$list)->delete(); + } + } + } + } + } + + /*规则匹配区域*/ + public function rule_match_area($config,$html){ + if(!empty($config['reg_area'])){ + if(empty($config['reg_area_module'])){ + + if(preg_match('/'.$config['reg_area'].'/i',$html,$area_cont)){ + if(isset($area_cont['match'])){ + $html=$area_cont['match']; + }else{ + $html=$area_cont[0]; + } + }else{ + $html=''; + } + }elseif('json'==$config['reg_area_module']){ + $html=$this->rule_module_json_data(array('json'=>$config['reg_area'],'json_arr'=>'jsonencode'),$html); + }elseif('xpath'==$config['reg_area_module']){ + $html=$this->rule_module_xpath_data(array('xpath'=>$config['reg_area'],'xpath_attr'=>'outerHtml'),$html); + }else{ + $html=''; + } + } + return $html; + } + /** + * 规则匹配网址 + * @param array $config 配置参数 + * @param string $html 源码 + * @param bool $whole 完全匹配模式 + * + */ + public function rule_match_urls($config,$html,$whole=false){ + $cont_urls=array(); + if(!empty($config['reg_url'])&&!empty($config['url_merge'])){ + + $sign_match=$this->sign_addslashes(cp_sign('match','(?P\d*)')); + if(preg_match_all('/'.$sign_match.'/i', $config['url_merge'],$match_signs)){ + + $url_merge=true; + if(empty($config['reg_url_module'])){ + + if(preg_match('/\(\?P/i', $config['reg_url'])){ + + if(preg_match_all('/'.$config['reg_url'].'/i',$html,$cont_urls,PREG_SET_ORDER)){ + if($config['url_merge']==cp_sign('match')){ + + $url_merge=false; + foreach ($cont_urls as $k=>$v){ + $cont_urls[$k]=$v['match']; + } + } + } + }else{ + + if($whole){ + + if(preg_match_all('/'.$config['reg_url'].'/i',$html,$cont_urls)){ + $cont_urls=$cont_urls[0]; + + if($config['url_merge']==cp_sign('match')){ + + $url_merge=false; + }else{ + + foreach ($cont_urls as $k=>$v){ + $cont_urls[$k]=array( + 'match'=>$v + ); + } + } + } + } + } + }elseif(in_array($config['reg_url_module'],array('xpath','json'))){ + + if('xpath'==$config['reg_url_module']){ + + $cont_urls=$this->rule_module_xpath_data ( array ( + 'xpath' => $config['reg_url'], + 'xpath_attr' => 'href', + 'xpath_multi'=>true, + 'xpath_multi_type'=>'loop' + ),$html); + $cont_urls=is_array($cont_urls)?$cont_urls:array(); + }elseif('json'==$config['reg_url_module']){ + + $cont_urls=$this->rule_module_json_data(array('json'=>$config['reg_url'],'json_arr'=>'_original_'),$html); + if(empty($cont_urls)){ + $cont_urls=array(); + }elseif(!is_array($cont_urls)){ + $cont_urls=array($cont_urls); + } + } + + if($config['url_merge']==cp_sign('match')){ + + $url_merge=false; + }else{ + + foreach ($cont_urls as $k=>$v){ + $cont_urls[$k]=array( + 'match'=>$v + ); + } + } + } + + if($url_merge){ + + foreach ($cont_urls as $k=>$v){ + $re_match=array(); + foreach($match_signs['num'] as $ms_k=>$ms_v){ + + $re_match[$ms_k]=$v['match'.$ms_v]; + } + + $cont_urls[$k]=str_replace($match_signs[0], $re_match, $config['url_merge']); + } + } + } + } + $cont_urls=is_array($cont_urls)?array_unique($cont_urls):array(); + $cont_urls=array_values($cont_urls); + return $cont_urls; + } + + + public function match_rule($html,$rule,$merge,$multi=false,$multi_str=''){ + $val=''; + $sign_match=$this->sign_addslashes(cp_sign('match','(?P\d*)')); + if(!empty($rule)&&preg_match_all('/'.$sign_match.'/i',$merge,$match_signs)){ + + $multiStr=''; + if(!empty($multi)){ + + preg_match_all('/'.$rule.'/i',$html,$match_conts,PREG_SET_ORDER); + $multiStr=str_replace(array('\r','\n'), array("\r","\n"), $multi_str); + }else{ + if(preg_match('/'.$rule.'/i', $html,$match_cont)){ + $match_conts=array($match_cont); + } + } + $curI=0; + foreach ($match_conts as $match_cont){ + $curI++; + + $re_match=array(); + foreach($match_signs['num'] as $ms_k=>$ms_v){ + $re_match[$ms_k]=$match_cont['match'.$ms_v]; + } + $val.=($curI<=1?'':$multiStr).str_replace($match_signs[0], $re_match, $merge); + } + } + return $val; + } + + /** + * 规则匹配,方法可调用,$field_params传入规则参数 + * @param array $field_params + * @param string $html + * @return string + */ + public function field_module_rule($field_params,&$html){ + + $val=''; + $sign_match=$this->sign_addslashes(cp_sign('match','(?P\d*)')); + if(!empty($field_params['reg_rule'])&&preg_match_all('/'.$sign_match.'/i', $field_params['rule_merge'],$match_signs)){ + + $multiStr=''; + $is_loop=false; + if(!empty($field_params['rule_multi'])){ + + preg_match_all('/'.$field_params['reg_rule'].'/i',$html,$match_conts,PREG_SET_ORDER); + $is_loop='loop'==$field_params['rule_multi_type']?true:false; + if($is_loop){ + if(empty($this->first_loop_field)){ + + $this->first_loop_field=$field_params['name']; + } + $val=array(); + }else{ + $multiStr=str_replace(array('\r','\n'), array("\r","\n"), $field_params['rule_multi_str']); + } + }else{ + if(preg_match('/'.$field_params['reg_rule'].'/i', $html,$match_cont)){ + $match_conts=array($match_cont); + } + } + + $curI=0; + if(is_array($match_conts)){ + foreach ($match_conts as $match_cont){ + $curI++; + + $re_match=array(); + foreach($match_signs['num'] as $ms_k=>$ms_v){ + $re_match[$ms_k]=$match_cont['match'.$ms_v]; + } + $contVal=str_replace($match_signs[0], $re_match, $field_params['rule_merge']); + if($is_loop){ + + $val[]=$contVal; + }else{ + + $val.=($curI<=1?'':$multiStr).$contVal; + } + } + } + } + return $val; + } + /** + * xpath规则,方法可调用,$field_params传入规则参数 + * @param array $field_params + * @param string $html + * @return string + */ + public function field_module_xpath($field_params,$html){ + if(!empty($field_params['xpath_multi'])){ + + if('loop'==$field_params['xpath_multi_type']){ + + if(empty($this->first_loop_field)){ + + $this->first_loop_field=$field_params['name']; + } + } + } + return $this->rule_module_xpath_data($field_params,$html); + } + public function rule_module_xpath_data($field_params,$html){ + $vals=''; + if(!empty($field_params['xpath'])){ + $dom=new \DOMDocument; + $libxml_previous_state = libxml_use_internal_errors(true); + @$dom->loadHTML(''.$html); + + $dom->normalize(); + + $xPath = new \DOMXPath($dom); + + $xpath_attr=strtolower($field_params['xpath_attr']); + $xpath_attr='custom'==$xpath_attr?strtolower($field_params['xpath_attr_custom']):$xpath_attr; + + $normal_attr=true; + if(in_array($xpath_attr,array('innerhtml','outerhtml','text'))){ + + $normal_attr=false; + } + $xpath_q=trim($field_params['xpath']); + if(!empty($xpath_attr)){ + + if(preg_match('/\/\@[\w\-]+$/', $xpath_q)){ + + $xpath_q=preg_replace('/\@[\w\-]+$/', '', $xpath_q); + } + if($normal_attr){ + + $xpath_q=$xpath_q.(preg_match('/\/$/', $xpath_q)?'':'/').'@'.$xpath_attr; + } + }else{ + + if(!preg_match('/\/\@[\w\-]+$/', $xpath_q)){ + + $xpath_attr='innerhtml'; + $normal_attr=false; + } + } + + $nodes = $xPath->query($xpath_q); + + $multiStr=''; + $is_loop=false; + if(!empty($field_params['xpath_multi'])){ + + $is_loop='loop'==$field_params['xpath_multi_type']?true:false; + if($is_loop){ + + + + + $vals=array(); + }else{ + + $multiStr=str_replace(array('\r','\n'), array("\r","\n"), $field_params['xpath_multi_str']); + } + } + + $curI=0; + foreach ($nodes as $node){ + $curI++; + $val=($curI<=1?'':$multiStr); + if($normal_attr){ + + $val.=$node->nodeValue; + }else{ + + switch ($xpath_attr){ + case 'innerhtml': + $nchilds = $node->childNodes; + foreach ($nchilds as $nchild){ + $val .= $nchild->ownerDocument->saveHTML($nchild); + } + break; + case 'outerhtml':$val.=$node->ownerDocument->saveHTML($node);break; + case 'text': + + + $nchilds = $node->childNodes; + foreach ($nchilds as $nchild){ + $val .= $nchild->ownerDocument->saveHTML($nchild); + } + $val=$this->filter_html_tags($val, array('style','script','object')); + $val=strip_tags($val); + break; + } + } + + if($is_loop){ + + $vals[]=$val; + }else{ + $vals.=$val; + } + + if(empty($field_params['xpath_multi'])){ + + break; + } + } + + libxml_clear_errors(); + + } + return $vals; + } + + /*自动获取*/ + public function field_module_auto($field_params,&$html,$cur_url){ + switch (strtolower($field_params['auto'])){ + case 'title':$val=$this->get_title($html);break; + case 'content':$val=$this->get_content($html);break; + case 'keywords':$val=$this->get_keywords($html);break; + case 'description':$val=$this->get_description($html);break; + case 'url':$val=$cur_url;break; + } + return $val; + } + public function field_module_words($field_params){ + + return $field_params['words']; + } + public function field_module_num($field_params){ + + $start=intval($field_params['num_start']); + $end=intval($field_params['num_end']); + return rand($start, $end); + } + public function field_module_time($field_params){ + $val=''; + $start=empty($field_params['time_start'])?NOW_TIME:strtotime($field_params['time_start']); + $end=empty($field_params['time_end'])?NOW_TIME:strtotime($field_params['time_end']); + $time=rand($start, $end); + if(empty($field_params['time_stamp'])){ + + $fmt=empty($field_params['time_format'])?'Y-m-d H:i': + str_replace(array('[年]','[月]','[日]','[时]','[分]','[秒]'), array('Y','m','d','H','i','s'), $field_params['time_format']); + $val=date($fmt,$time); + }else{ + $val=$time; + } + return $val; + } + public function field_module_list($field_params){ + static $list=array(); + $key=md5($field_params['list']); + if(!isset($list[$key])){ + + if(preg_match_all('/[^\r\n]+/', $field_params['list'],$str_list)){ + $str_list=$str_list[0]; + }else{ + $str_list=array(); + } + $list[$key]=$str_list; + } + $str_list=$list[$key]; + $val=''; + if(!empty($str_list)){ + $randi=array_rand($str_list,1); + $val=$str_list[$randi]; + } + return $val; + } + public function field_module_merge($field_params,$val_list){ + $val=''; + + if(preg_match_all('/\[\x{5b57}\x{6bb5}\:(.+?)\]/u', $field_params['merge'],$match_fields)){ + $val=$field_params['merge']; + + for($i=0;$irule_module_json_data($field_params,$jsonList[$jsonKey]); + if($field_params['json_loop']){ + + if(is_array($val)){ + $field_params['json_arr']=$jsonArrType; + foreach ($val as $k=>$v){ + $val[$k]=$this->rule_module_json_data_convert($v,$field_params); + } + + if(empty($this->first_loop_field)){ + + $this->first_loop_field=$field_params['name']; + } + } + } + return $val; + } + public function rule_module_json_data($field_params,$jsonArrOrStr){ + $jsonArr=array(); + if(is_array($jsonArrOrStr)){ + $jsonArr=&$jsonArrOrStr; + }else{ + + $jsonArr=json_decode($jsonArrOrStr,true); + if(empty($jsonArr)&&preg_match(self::$jsonpRegExp,$jsonArrOrStr,$jsonArrOrStr)){ + + $jsonArr=trim($jsonArrOrStr['json']).'}'; + $jsonArr=json_decode($jsonArr,true); + } + unset($jsonArrOrStr); + } + $val=''; + if(!empty($jsonArr)){ + if(!empty($field_params['json'])){ + + $jsonFmt=str_replace(array('"',"'",'[',' '), '', $field_params['json']); + $jsonFmt=str_replace(']','.',$jsonFmt); + $jsonFmt=trim($jsonFmt,'.'); + $jsonFmt=explode('.', $jsonFmt); + $jsonFmt=array_values($jsonFmt); + if(!empty($jsonFmt)){ + + $val=$jsonArr; + $prevKey=''; + foreach ($jsonFmt as $i=>$key){ + if($prevKey=='*'){ + + $new_field_params=$field_params; + $new_field_params['json']=array_slice($jsonFmt, $i); + $new_field_params['json']=implode('.', $new_field_params['json']); + + foreach ($val as $vk=>$vv){ + + $val[$vk]=$this->rule_module_json_data($new_field_params,$vv); + } + break; + }else{ + if($key!='*'){ + + $val=$val[$key]; + } + } + $prevKey=$key; + } + } + } + } + + return $this->rule_module_json_data_convert($val, $field_params); + } + public function rule_module_json_data_convert($val,$field_params){ + if(is_array($val)){ + + $json_arr=strtolower($field_params['json_arr']); + if(empty($json_arr)){ + $json_arr='implode'; + } + switch ($json_arr){ + case 'implode':$arrImplode=str_replace(array('\r','\n'), array("\r","\n"), $field_params['json_arr_implode']);$val=array_implode($arrImplode,$val);break; + case 'jsonencode':$val=json_encode($val);break; + case 'serialize':$val=serialize($val);break; + case '_original_': break; + } + } + return $val; + } + + /*字段提取内容*/ + public function field_module_extract($field_params,$extract_field_val,$base_url,$domain_url){ + $field_html=$extract_field_val['value']; + if(empty($field_html)){ + return ''; + } + $val=''; + $extract_module=strtolower($field_params['extract_module']); + switch ($extract_module){ + case 'cover': + + if(!empty($extract_field_val['img'])){ + $val=reset($extract_field_val['img']); + }else{ + if(preg_match('/]*\bsrc=[\'\"](?P[^\'\"]+?)[\'\"]/i',$field_html,$cover)){ + $cover=$cover['url']; + $cover=$this->create_complete_url($cover, $base_url, $domain_url); + $val=$cover; + } + } + break; + case 'phone': + + $field_html=$this->filter_html_tags($field_html,'style,script,object'); + $field_html=strip_tags($field_html); + if(preg_match('/\d{11}/', $field_html,$phone)){ + $val=$phone[0]; + } + break; + case 'email': + $field_html=$this->filter_html_tags($field_html,'style,script,object'); + $field_html=strip_tags($field_html); + if(preg_match('/[\w\-]+\@[\w\-\.]+/i', $field_html,$email)){ + $val=$email[0]; + } + break; + case 'rule': + + $val=$this->field_module_rule(array('reg_rule'=>$field_params['reg_extract_rule']), $field_html); + if(empty($val)){ + + if(preg_match('/'.$field_params['reg_extract_rule'].'/i', $field_html,$val)){ + $val=$val[0]; + } + } + break; + case 'xpath': + $val=$this->field_module_xpath(array('xpath'=>$field_params['extract_xpath'],'xpath_attr'=>$field_params['extract_xpath_attr'],'xpath_attr_custom'=>$field_params['extract_xpath_attr_custom']), $field_html); + break; + case 'json': + $val=$this->field_module_json(array('json'=>$field_params['extract_json'],'json_arr'=>$field_params['extract_json_arr'],'json_arr_implode'=>$field_params['extract_json_arr_implode']), $field_html); + break; + } + return $val; + } + /*数据处理方法*/ + public function process_f_html($fieldVal,$params){ + $htmlAllow=array_filter(explode(',',$params['html_allow'])); + $htmlFilter=array_filter(explode(',',$params['html_filter'])); + if(!empty($htmlAllow)){ + + $htmlAllowStr=''; + foreach ($htmlAllow as $v){ + $htmlAllowStr.='<'.$v.'>'; + } + $fieldVal=strip_tags($fieldVal,$htmlAllowStr); + } + if(!empty($htmlFilter)){ + + if(in_array('all', $htmlFilter)){ + + $fieldVal=$this->filter_html_tags($fieldVal, array('style','script','object')); + $fieldVal=strip_tags($fieldVal); + }else{ + $fieldVal=$this->filter_html_tags($fieldVal, $htmlFilter); + } + } + return $fieldVal; + } + public function process_f_replace($fieldVal,$params){ + return preg_replace('/'.$params['replace_from'].'/i',$params['replace_to'], $fieldVal); + } + public function process_f_tool($fieldVal,$params){ + + if(in_array('format', $params['tool_list'])){ + + $fieldVal=$this->filter_html_tags($fieldVal,array('style','script')); + $fieldVal=preg_replace('/\b(id|class|style|width|height|align)\s*=\s*([\'\"])[^\<\>\'\"]+?\\2(?=\s|$|\/|>)/i', ' ', $fieldVal); + } + if(in_array('trim', $params['tool_list'])){ + + $fieldVal=trim($fieldVal); + } + if(in_array('is_img', $params['tool_list'])){ + + if(!empty($GLOBALS['config']['caiji']['download_img'])){ + + $fieldVal=preg_replace('/(\bhttp[s]{0,1}\:\/\/[^\s]+)/i','{[img]}'."$1".'{[/img]}',$fieldVal); + } + } + return $fieldVal; + } + public function process_f_translate($fieldVal,$params){ + + if(!empty($GLOBALS['config']['translate'])&&!empty($GLOBALS['config']['translate']['open'])){ + + $fieldVal=\util\Translator::translate($fieldVal, $params['translate_from'], $params['translate_to']); + } + return $fieldVal; + } + public function process_f_batch($fieldVal,$params){ + + static $batch_list=array(); + if(!empty($params['batch_list'])){ + $listMd5=md5($params['batch_list']); + if(!isset($batch_list[$listMd5])){ + + if(preg_match_all('/([^\r\n]+?)\=([^\r\n]+)/', $params['batch_list'],$mlist)){ + $batch_re=$mlist[1]; + $batch_to=$mlist[2]; + $batch_list[$listMd5]=array($batch_re,$batch_to); + } + }else{ + $batch_re=$batch_list[$listMd5][0]; + $batch_to=$batch_list[$listMd5][1]; + } + $batch_re=is_array($batch_re)?$batch_re:null; + $batch_to=is_array($batch_to)?$batch_to:null; + if(!empty($batch_re)&&count($batch_re)==count($batch_to)){ + + $fieldVal=str_replace($batch_re, $batch_to, $fieldVal); + } + } + return $fieldVal; + } + public function process_f_substr($fieldVal,$params){ + $params['substr_len']=intval($params['substr_len']); + if($params['substr_len']>0){ + if(mb_strlen($fieldVal,'utf-8')>$params['substr_len']){ + + $fieldVal=mb_substr($fieldVal,0,$params['substr_len'],'utf-8').$params['substr_end']; + } + } + return $fieldVal; + } + public function process_f_func($fieldVal,$params){ + + if(!empty($params['func_name'])){ + if(!function_exists($params['func_name'])){ + + $this->error('数据处理》无效的函数:'.$params['func_name']); + }else{ + + if(array_key_exists($params['func_name'], config('allow_process_func'))||array_key_exists($params['func_name'], config('EXTEND_PROCESS_FUNC'))){ + + static $func_param_list=array(); + $funcParam=null; + if(empty($params['func_param'])){ + + $funcParam=array($fieldVal); + }else{ + $fparamMd5=md5($params['func_param']); + if(!isset($func_param_list[$fparamMd5])){ + if(preg_match_all('/[^\r\n]+/', $params['func_param'],$mfuncParam)){ + $func_param_list[$fparamMd5]=$mfuncParam[0]; + } + } + $funcParam=$func_param_list[$fparamMd5]; + foreach ($funcParam as $k=>$v){ + $funcParam[$k]=str_replace('###', $fieldVal, $v); + } + } + if(!empty($funcParam)&&is_array($funcParam)){ + try { + $fieldVal=call_user_func_array($params['func_name'], $funcParam); + }catch (\Exception $ex){ + + } + } + }else{ + $this->error('数据处理》未配置函数:'.$params['func_name']); + } + } + } + return $fieldVal; + } + public function process_f_filter($fieldVal,$params,$curUrlMd5,$loopIndex,$contUrlMd5){ + static $key_list=array(); + if(!empty($params['filter_list'])){ + $listMd5=md5($params['filter_list']); + if(!isset($key_list[$listMd5])){ + $filterList=explode("\r\n", $params['filter_list']); + $filterList=array_filter($filterList); + $key_list[$listMd5]=$filterList; + }else{ + $filterList=$key_list[$listMd5]; + } + $filterList=is_array($filterList)?$filterList:array(); + + + if(!empty($params['filter_pass'])){ + if($params['filter_pass']=='1'){ + + foreach ($filterList as $filterStr){ + if(stripos($fieldVal,$filterStr)!==false){ + + $fieldVal=''; + break; + } + } + }elseif($params['filter_pass']=='2'){ + + foreach ($filterList as $filterStr){ + if(stripos($fieldVal,$filterStr)!==false){ + + if(!isset($this->exclude_cont_urls[$contUrlMd5])){ + $this->exclude_cont_urls[$contUrlMd5]=array(); + } + + if(empty($this->first_loop_field)){ + + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5]='filter:'.$filterStr; + }else{ + + if(!isset($this->exclude_cont_urls[$contUrlMd5][$curUrlMd5])){ + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5]=array(); + } + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5][$loopIndex]='filter:'.$filterStr; + } + break; + } + } + }elseif($params['filter_pass']=='3'){ + + $hasKey=false; + foreach ($filterList as $filterStr){ + if(stripos($fieldVal,$filterStr)!==false){ + + $hasKey=true; + break; + } + } + if(!$hasKey){ + $fieldVal=''; + } + }elseif($params['filter_pass']=='4'){ + + $hasKey=false; + foreach ($filterList as $filterStr){ + if(stripos($fieldVal,$filterStr)!==false){ + + $hasKey=true; + break; + } + } + if(!$hasKey){ + + if(!isset($this->exclude_cont_urls[$contUrlMd5])){ + $this->exclude_cont_urls[$contUrlMd5]=array(); + } + + if(empty($this->first_loop_field)){ + + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5]='filter:'; + }else{ + + if(!isset($this->exclude_cont_urls[$contUrlMd5][$curUrlMd5])){ + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5]=array(); + } + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5][$loopIndex]='filter:'; + } + } + } + }else{ + + $fieldVal=str_ireplace($filterList, $params['filter_replace'], $fieldVal); + } + } + return $fieldVal; + } + public function process_f_if($fieldVal,$params,$curUrlMd5,$loopIndex,$contUrlMd5){ + static $func_list=array(); + + if(is_array($params['if_logic'])&&!empty($params['if_logic'])){ + + $resultOr=array(); + $resultAnd=array(); + foreach($params['if_logic'] as $ifk=>$iflv){ + if(empty($iflv)||empty($params['if_cond'][$ifk])){ + + continue; + } + $ifVal=$params['if_val'][$ifk]; + $ifCond=$params['if_cond'][$ifk]; + $result=false; + switch($ifCond){ + case 'regexp': + if(preg_match('/'.$ifVal.'/', $fieldVal)){ + $result=true; + } + break; + case 'func': + if(!empty($ifVal)){ + + $funcMd5=md5($ifVal); + if(!isset($func_list[$funcMd5])){ + if(preg_match_all('/[^\r\n]+/',$ifVal,$funcParam)){ + + $funcParam=$funcParam[0]; + }else{ + + $funcParam=array($ifVal); + } + $func_list[$funcMd5]=$funcParam; + }else{ + $funcParam=$func_list[$funcMd5]; + } + $funcName=$funcParam[0]; + $isTurn=false; + if(strpos($funcName,'!')===0){ + + $funcName=substr($funcName, 1); + $isTurn=true; + } + unset($funcParam[0]); + if(empty($funcParam)){ + + $funcParam=array($fieldVal); + }else{ + foreach($funcParam as $k=>$v){ + $funcParam[$k]=str_replace('###', $fieldVal, $v); + } + } + + if(!function_exists($funcName)){ + + $this->error('数据处理》条件判断》无效的函数:'.$funcName); + }else{ + if(array_key_exists($funcName, config('allow_process_if'))||array_key_exists($funcName, config('EXTEND_PROCESS_IF'))){ + + try { + $result=call_user_func_array($funcName, $funcParam); + if($isTurn){ + + $result=$result?false:true; + } + }catch (\Exception $ex){ + + $this->error('数据处理》条件判断》函数'.$funcName.'运行错误,'.$ex->getMessage()); + } + }else{ + $this->error('数据处理》条件判断》未配置函数:'.$funcName); + } + } + } + break; + case 'has':$result=stripos($fieldVal,$ifVal)!==false?true:false;break; + case 'nhas':$result=stripos($fieldVal,$ifVal)===false?true:false;break; + case 'eq':$result=$fieldVal==$ifVal?true:false;break; + case 'neq':$result=$fieldVal!=$ifVal?true:false;break; + case 'heq':$result=$fieldVal===$ifVal?true:false;break; + case 'nheq':$result=$fieldVal!==$ifVal?true:false;break; + case 'gt':$result=$fieldVal>$ifVal?true:false;break; + case 'egt':$result=$fieldVal>=$ifVal?true:false;break; + case 'lt':$result=$fieldVal<$ifVal?true:false;break; + case 'elt':$result=$fieldVal<=$ifVal?true:false;break; + case 'time_eq': + case 'time_egt': + case 'time_elt': + $fieldTime=is_numeric($fieldVal)?$fieldVal:strtotime($fieldVal); + $valTime=is_numeric($ifVal)?$ifVal:strtotime($ifVal); + if($ifCond=='time_eq'){ + + $result=$fieldTime==$valTime?true:false; + }elseif($ifCond=='time_egt'){ + + $result=$fieldTime>=$valTime?true:false; + }elseif($ifCond=='time_elt'){ + + $result=$fieldTime<=$valTime?true:false; + } + break; + } + if('or'==$iflv){ + if(!empty($resultAnd)){ + + $resultOr[]=$resultAnd; + } + $resultAnd=array(); + $resultOr[]=$result; + }elseif('and'==$iflv){ + + $resultAnd[]=$result; + } + } + if(!empty($resultAnd)){ + + $resultOr[]=$resultAnd; + } + if(is_array($resultOr)&&!empty($resultOr)){ + $isTrue=false; + foreach ($resultOr as $results){ + if(is_array($results)){ + + $andResult=true; + foreach ($results as $result){ + if(!$result){ + + $andResult=false; + break; + } + } + $results=$andResult; + } + if($results){ + + $isTrue=true; + break; + } + } + + $exclude=''; + + switch ($params['if_type']){ + case '1':$exclude=$isTrue?'':'if:1';break; + case '2':$exclude=$isTrue?'if:2':'';break; + case '3':$exclude=!$isTrue?'':'if:3';break; + case '4':$exclude=!$isTrue?'if:4':'';break; + } + + if($exclude){ + + if(!isset($this->exclude_cont_urls[$contUrlMd5])){ + $this->exclude_cont_urls[$contUrlMd5]=array(); + } + + if(empty($this->first_loop_field)){ + + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5]=$exclude; + }else{ + + if(!isset($this->exclude_cont_urls[$contUrlMd5][$curUrlMd5])){ + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5]=array(); + } + $this->exclude_cont_urls[$contUrlMd5][$curUrlMd5][$loopIndex]=$exclude; + } + } + } + } + return $fieldVal; + } + /*数据处理*/ + public function process_field($fieldVal,$process,$curUrlMd5,$loopIndex,$contUrlMd5){ + if(empty($process)){ + return $fieldVal; + } + static $funcs=array('filter','if'); + foreach ($process as $params){ + + if(empty($this->first_loop_field)){ + + if(isset($this->exclude_cont_urls[$contUrlMd5][$curUrlMd5])){ + return $fieldVal; + } + }else{ + + if(isset($this->exclude_cont_urls[$contUrlMd5][$curUrlMd5][$loopIndex])){ + return $fieldVal; + } + } + $funcName='process_f_'.$params['module']; + if(method_exists($this, $funcName)){ + if(in_array($params['module'],$funcs)){ + $fieldVal=$this->$funcName($fieldVal,$params,$curUrlMd5,$loopIndex,$contUrlMd5); + }else{ + $fieldVal=$this->$funcName($fieldVal,$params); + } + } + } + return $fieldVal; + } + + + /** + * 拼接默认设置 + * @param unknown $reg 规则 + * @param unknown $merge 拼接字符串 + */ + public function set_merge_default($reg,$merge){ + if(empty($merge)){ + $merge=''; + if(!empty($reg)){ + + if(preg_match_all('/\\d*)\>/i', $reg,$match_signs)){ + foreach ($match_signs['num'] as $snum){ + $merge.=cp_sign('match',$snum); + } + } + } + } + return $merge; + } + /** + * 转换起始网址 + * @param string $url + * @return multitype:mixed |unknown + */ + public function convert_source_url($url){ + $urls=array(); + if(preg_match('/\{param\:(?P[a-z]+)\,(?P.*?)\}/i', $url,$match)){ + + $fmtUrl=preg_replace('/\{param\:.*?\}/i', '__set:param__', $url); + $type=strtolower($match['type']); + $val=explode("\t", $match['val']); + if($type=='num'){ + + $num_start = intval($val[0]); + $num_end = intval($val[1]); + $num_end = max ($num_start,$num_end); + $num_inc = max ( 1, intval($val[2])); + $num_desc =$val[3]?1:0; + + if($num_desc){ + + for($i=$num_end;$i>=$num_start;$i--){ + $urls[]=str_replace('__set:param__', $num_start+($i-$num_start)*$num_inc, $fmtUrl); + } + }else{ + for($i=$num_start;$i<=$num_end;$i++){ + $urls[]=str_replace('__set:param__', $num_start+($i-$num_start)*$num_inc, $fmtUrl); + } + } + }elseif($type=='letter'){ + + $letter_start=ord($val[0]); + $letter_end=ord($val[1]); + $letter_end=max($letter_start,$letter_end); + $letter_desc=$val[2]?1:0; + + if($letter_desc){ + + for($i=$letter_end;$i>=$letter_start;$i--) { + $urls[]=str_replace('__set:param__', chr($i), $fmtUrl); + } + }else{ + for($i=$letter_start;$i<=$letter_end;$i++) { + $urls[]=str_replace('__set:param__', chr($i), $fmtUrl); + } + } + }elseif($type=='custom'){ + + foreach ($val as $v){ + $urls[]=str_replace('__set:param__', $v, $fmtUrl); + } + } + return $urls; + }if(preg_match('/\{json\:([^\}]*)\}/i',$url,$match)){ + + $url=preg_replace('/\{json\:([^\}]*)\}/i','',$url); + $jsonRule=trim($match[1]); + if(is_null($jsonRule)||$jsonRule==''){ + $jsonRule='*'; + } + $jsonData=$this->get_html($url); + if(!empty($jsonData)){ + + $urls=$this->rule_module_json_data(array('json'=>$jsonRule,'json_arr'=>'_original_'),$jsonData); + if(empty($urls)){ + $urls=array(); + } + if(!is_array($urls)){ + $urls=array($urls); + } + + foreach ($urls as $k=>$v){ + if(!is_string($v)||!preg_match('/^\w+\:\/\//i', $v)){ + + unset($urls[$k]); + } + } + if(!empty($urls)&&is_array($urls)){ + $urls=array_unique($urls); + $urls=array_values($urls); + } + return $urls; + } + }elseif(preg_match('/[\r\n]/', $url)){ + + if(preg_match_all('/^\w+\:\/\/[^\r\n]+/im',$url,$urls)){ + + $urls=array_unique($urls[0]); + $urls=array_values($urls); + } + return $urls; + }else{ + + return $url; + } + } + /*排除内容网址的提示信息*/ + public function exclude_url_msg($val){ + $val=explode(':', $val); + $type=''; + if(is_array($val)){ + $type=$val[0]; + $val=$val[1]; + }else{ + $type=$val; + $val=''; + } + $msg='排除网址'; + if($type=='filter'){ + + if(empty($val)){ + $msg='关键词过滤'; + }else{ + $msg='关键词过滤:'.$val; + } + }elseif($type=='if'){ + $msg='条件'; + + switch ($val){ + case '1':$msg.='假';break; + case '2':$msg.='真';break; + case '3':$msg.='假';break; + case '4':$msg.='真';break; + } + if(lang('?p_m_if_'.$val)){ + $msg.=':'.lang('p_m_if_'.$val); + } + } + return $msg; + } + + /*转换(*)通配符*/ + public function convert_sign_wildcard($str){ + return str_replace(lang('sign_wildcard'), '[\s\S]*?', $str); + } + /*转换[参数]*/ + public function convert_sign_match($str){ + $str=preg_replace('/\(\?<(content|match)/i', '(?Psign_addslashes(cp_sign('match','(?P\d*)')); + $str=preg_replace_callback('/(\={0,1})(\s*)([\'\"]{0,1})'.$sign_match.'\3/', function($matches){ + $ruleStr=$matches[1].$matches[2].$matches[3].'(?P'; + if(!empty($matches[1])&&!empty($matches[3])){ + + $ruleStr.='[^\<\>]*?)'; + }else{ + $ruleStr.='[\s\S]*?)'; + } + $ruleStr.=$matches[3]; + return $ruleStr; + }, $str); + return $str; + } + public function sign_addslashes($str){ + $str=str_replace(array('[',']'), array('\[','\]'), $str); + return $str; + } + /*过滤html标签*/ + public function filter_html_tags($content,$tags){ + $tags=$this->clear_tags($tags); + $arr1=$arr2=array(); + foreach ($tags as $tag){ + $tag=strtolower($tag); + if($tag=='script'||$tag=='style'||$tag=='object'){ + $arr1[$tag]=$tag; + }else{ + $arr2[$tag]=$tag; + } + } + + if($arr1){ + $content=preg_replace('/<('.implode('|', $arr1).')[^<>]*>[\s\S]*?<\/\1>/i', '', $content); + } + + if($arr2){ + $content=preg_replace('/<[\/]*('.implode('|', $arr2).')[^<>]*>/i', '', $content); + } + return $content; + } + /*过滤标签*/ + public function clear_tags($tags){ + if(!is_array($tags)){ + $tags = preg_replace('/[\s\,\x{ff0c}]+/u', ',', $tags); + $tags=explode(',', $tags); + } + if(!empty($tags)&&is_array($tags)){ + + $tags=array_filter($tags); + $tags=array_unique($tags); + $tags=array_values($tags); + }else{ + $tags=array(); + } + return $tags; + } + /*获取源码*/ + public function get_html($url,$open_cache=false,$is_post=false){ + if($open_cache&&!empty($this->html_cache_list[$url])){ + + return $this->html_cache_list[$url]; + } + $pageRenderTool=null; + if($this->config['page_render']){ + $pageRenderTool=$GLOBALS['config']['page_render']['tool']; + if(empty($pageRenderTool)){ + + $this->error('页面渲染未设置,请检查渲染设置','Setting/page_render'); + return null; + } + } + + $html=null; + $headers=array(); + $options=array(); + if($this->config['request_headers']['open']){ + + if(!empty($this->config['request_headers']['useragent'])){ + + $options['useragent']=$this->config['request_headers']['useragent']; + } + if(!empty($this->config['request_headers']['cookie'])){ + $headers['cookie']=$this->config['request_headers']['cookie']; + } + if(!empty($this->config['request_headers']['referer'])){ + $headers['referer']=$this->config['request_headers']['referer']; + } + + if(!empty($this->config['request_headers']['custom_names'])){ + foreach ($this->config['request_headers']['custom_names'] as $k=>$v){ + if(!empty($v)){ + $headers[$v]=$this->config['request_headers']['custom_vals'][$k]; + } + } + } + } + $mproxy=model('Proxyip'); + $proxy_ip=null; + if(!empty($GLOBALS['config']['proxy']['open'])){ + + $proxy_ip=$mproxy->get_usable_ip(); + $proxyIp=$mproxy->to_proxy_ip($proxy_ip); + + if(!empty($proxyIp)){ + + $options['proxy']=$proxyIp; + } + } + $urlPost=null; + if($is_post){ + + $urlPost=strpos($url, '?'); + if($urlPost!==false){ + $urlPost=substr($url, $urlPost+1); + $url=preg_replace('/\?.*$/', '', $url); + }else{ + $urlPost=''; + } + } + + if($pageRenderTool){ + + if(!empty($options['useragent'])){ + + $headers['user-agent']=$options['useragent']; + unset($options['useragent']); + } + if(!empty($options['proxy'])){ + + $options['proxy']=$proxy_ip; + } + + if($pageRenderTool=='chrome'){ + $chromeConfig=$GLOBALS['config']['page_render']['chrome']; + try { + $chromeSocket=new \util\ChromeSocket($chromeConfig['host'],$chromeConfig['port'],$GLOBALS['config']['page_render']['timeout'],$chromeConfig['filename']); + $chromeSocket->newTab(); + $chromeSocket->websocket(null); + if($is_post){ + + $html=$chromeSocket->getRenderHtml($url,$headers,$options,$this->config['charset'],$urlPost); + }else{ + $html=$chromeSocket->getRenderHtml($url,$headers,$options); + } + }catch (\Exception $ex){ + $this->error('页面渲染失败,请检查渲染设置','Setting/page_render'); + return null; + } + }else{ + $this->error('渲染工具不可用,请检查渲染设置','Setting/page_render'); + return null; + } + }else{ + if($is_post){ + $html=get_html($url,$headers,$options,$this->config['charset'],$urlPost); + }else{ + $html=get_html($url,$headers,$options,$this->config['charset']); + } + } + + if($html==null){ + + if(!empty($proxy_ip)){ + $mproxy->set_ip_failed($proxy_ip); + } + return null; + } + + if($this->config['url_complete']){ + + $base_url=$this->match_base_url($url, $html); + $domain_url=$this->match_domain_url($url, $html); + $html=preg_replace_callback('/(?<=\bhref\=[\'\"])([^\'\"]*)(?=[\'\"])/i',function($matche) use ($base_url,$domain_url){ + + return \skycaiji\admin\event\Cpattern::create_complete_url($matche[1], $base_url, $domain_url); + },$html); + $html=preg_replace_callback('/(?<=\bsrc\=[\'\"])([^\'\"]*)(?=[\'\"])/i',function($matche) use ($base_url,$domain_url){ + return \skycaiji\admin\event\Cpattern::create_complete_url($matche[1], $base_url, $domain_url); + },$html); + } + if($open_cache){ + $this->html_cache_list[$url]=$html; + } + return $html; + } +} +?> \ No newline at end of file diff --git a/SkycaijiApp/admin/event/Rapi.php b/SkycaijiApp/admin/event/Rapi.php index 79ab9ea..cce2318 100644 --- a/SkycaijiApp/admin/event/Rapi.php +++ b/SkycaijiApp/admin/event/Rapi.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/event/Rcms.php b/SkycaijiApp/admin/event/Rcms.php index 7ebfb41..0847681 100644 --- a/SkycaijiApp/admin/event/Rcms.php +++ b/SkycaijiApp/admin/event/Rcms.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/event/Rdb.php b/SkycaijiApp/admin/event/Rdb.php index d3f4de8..814de11 100644 --- a/SkycaijiApp/admin/event/Rdb.php +++ b/SkycaijiApp/admin/event/Rdb.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -115,11 +115,11 @@ class Rdb extends Release{ $table=strtolower($table); foreach ($fields as $k=>$v){ - if(preg_match('/^auto_id\@([^\s]+)$/i', $v,$autoidTbName)){ + $fields[$k]=preg_replace_callback('/auto_id\@([^\s\#]+)[\#]{0,1}/i',function($autoidTbName)use($autoidList){ $autoidTbName=trim($autoidTbName[1]); $autoidTbName=strtolower($autoidTbName); - $fields[$k]=$autoidList[$autoidTbName]; - } + return $autoidList[$autoidTbName]; + },$v); } try { if('oracle'==$db_config['db_type']){ diff --git a/SkycaijiApp/admin/event/Rdiy.php b/SkycaijiApp/admin/event/Rdiy.php index 2e603b7..a7610ab 100644 --- a/SkycaijiApp/admin/event/Rdiy.php +++ b/SkycaijiApp/admin/event/Rdiy.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/event/Release.php b/SkycaijiApp/admin/event/Release.php index ddf9bfc..8347de7 100644 --- a/SkycaijiApp/admin/event/Release.php +++ b/SkycaijiApp/admin/event/Release.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/event/ReleaseBase.php b/SkycaijiApp/admin/event/ReleaseBase.php index aadaf6b..99eef00 100644 --- a/SkycaijiApp/admin/event/ReleaseBase.php +++ b/SkycaijiApp/admin/event/ReleaseBase.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/event/Rfile.php b/SkycaijiApp/admin/event/Rfile.php index 6c07533..8db9589 100644 --- a/SkycaijiApp/admin/event/Rfile.php +++ b/SkycaijiApp/admin/event/Rfile.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/lang/zh-cn.php b/SkycaijiApp/admin/lang/zh-cn.php index c59793d..adbd73e 100644 --- a/SkycaijiApp/admin/lang/zh-cn.php +++ b/SkycaijiApp/admin/lang/zh-cn.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -159,13 +159,19 @@ return array( 'process_module_html'=>'html标签过滤', 'process_module_replace'=>'内容替换', - 'process_module_filter'=>'过滤词', + 'process_module_filter'=>'关键词过滤', + 'process_module_if'=>'条件判断', 'process_module_translate'=>'翻译', 'process_module_tool'=>'工具箱', 'process_module_batch'=>'批量替换', 'process_module_substr'=>'截取字符串', 'process_module_func'=>'使用函数', - + + 'p_m_if_1'=>'满足条件采集', + 'p_m_if_2'=>'满足条件不采集', + 'p_m_if_3'=>'不满足条件采集', + 'p_m_if_4'=>'不满足条件不采集', + 'rele_set'=>'发布设置', 'rele_error_detect_null'=>'没有检测到本地CMS程序,您可以手动绑定数据', 'rele_error_empty_rele'=>'发布设置不存在', @@ -296,6 +302,6 @@ return array( 'tips_match_url'=>'示例:<a href="http://demo.com/[内容1]/[内容2]">(*)</a>', 'tips_matchn_url'=>'示例:http://www.demo.com/[内容1]-[内容2].html', - 'release_upgrade'=>'插件版本过低,请升级插件 升级教程', + 'release_upgrade'=>'插件版本过低,请升级插件 升级教程', ); ?> \ No newline at end of file diff --git a/SkycaijiApp/admin/model/App.php b/SkycaijiApp/admin/model/App.php new file mode 100644 index 0000000..f9c7d99 --- /dev/null +++ b/SkycaijiApp/admin/model/App.php @@ -0,0 +1,193 @@ +right_app($app)){ + + $path=realpath(config('apps_path').'/'.$app); + if(!empty($path)&&is_dir($path)){ + $appFilename=$this->app_class_file($app); + if(!in_array($app,$passPaths)&&file_exists($appFilename)){ + + if($includeClass){ + + include $appFilename; + $appClass=new $app(); + }else{ + + $appFile=file_get_contents($appFilename); + if(!empty($appFile)){ + $appClass=new \stdClass(); + if(preg_match('/public\s*\$config\s*=(\s*[\s\S]+?[\]\)]\s*\;)/i', $appFile,$config)){ + + set_error_handler(null); + + $config=trim($config[1]); + try { + $config=@eval('return '.$config); + }catch(\Exception $e){ + $config=array(); + } + $appClass->config=is_array($config)?$config:array(); + } + } + } + + if($appClass){ + $appClass->config=$this->clear_config($appClass->config); + return $appClass; + } + } + } + } + return false; + } + public function getByApp($app){ + $data=$this->where('app',$app)->find(); + if(!empty($data)){ + $data=$data->toArray(); + $data['config']=$this->get_config($app); + }else{ + $data=array(); + } + return $data; + } + public function deleteByApp($app){ + if($app){ + $this->where('app',$app)->delete(); + $this->delete_config($app); + } + } + /*应用配置文件名*/ + public function app_class_file($app){ + return realpath(config('apps_path')).DIRECTORY_SEPARATOR.$app.DIRECTORY_SEPARATOR.$app.'.php'; + } + + /*应用命名规范*/ + public function right_app($app){ + + if(preg_match('/^[a-z]+[a-z\_0-9]*$/', $app)){ + return strlen($app)<3?false:true; + }else{ + return false; + } + } + + /*版本号格式*/ + public function right_version($version){ + if(preg_match('/^\d+(\.\d{1,2}){1,2}$/', $version)){ + return true; + }else{ + return false; + } + } + /*名称只能由汉字、字母、数字和下划线组成*/ + public function right_name($name){ + if(preg_match('/^[\w+\x{4e00}-\x{9fa5}]+$/iu', $name)){ + return true; + }else{ + return false; + } + } + /*清理描述html*/ + public function clear_desc($desc){ + $desc=strip_tags($desc,'


'); + $desc=preg_replace('/<(p|br|b|i)\s+.*?>/i', "<$1>", $desc); + $desc=preg_replace('/[\r\n]+/', ' ', $desc); + $desc=trim($desc); + return $desc; + } + /*清理配置信息*/ + public function clear_config($arr){ + $arr=is_array($arr)?$arr:array(); + $desc=$this->clear_desc($arr['desc']); + $arr=$this->_array_map('strip_tags', $arr); + $arr['desc']=$desc; + if(!empty($arr['agreement'])){ + $arr['agreement']=preg_replace('/^[\s]+/m', '', $arr['agreement']); + } + return $arr; + } + /*缓存配置*/ + public function set_config($app,$config){ + if(empty($app)){ + return; + } + $config=is_array($config)?$config:array(); + $filename=$this->config_filename($app); + $oldConfig=$this->get_config($app); + $oldConfig=is_array($oldConfig)?$oldConfig:array(); + + $config=array_merge($oldConfig,$config); + $config=$this->clear_config($config); + + $config=var_export($config,true); + $config=''; + + write_dir_file($filename, $config); + } + /*读取配置*/ + public function get_config($app){ + $filename=$this->config_filename($app); + $config=array(); + if(file_exists($filename)){ + + $config=include $filename; + $config=is_array($config)?$config:array(); + } + $config=$this->clear_config($config); + + return $config; + } + public function delete_config($app){ + $filename=$this->config_filename($app); + unlink($filename); + } + private function _array_map($callback, $arr1){ + if(is_array($arr1)){ + $arr=array(); + foreach ($arr1 as $k=>$v){ + if(!is_array($v)){ + $arr[$k]=call_user_func($callback, $v); + }else{ + $arr[$k]=$this->_array_map($callback,$v); + } + } + } + return $arr; + } + public function config_filename($app){ + return config('apps_path').'/app/config/'.$app.'.php'; + } + /*获取应用类的变量*/ + public function get_class_vars($appClass){ + if(!is_object($appClass)){ + return null; + } + $class=new \ReflectionClass($appClass); + $vars=$class->getProperties(); + $values=array(); + if(is_array($vars)){ + foreach ($vars as $var){ + $var=$var->name; + $values[$var]=$appClass->$var; + } + } + return $values; + } +} + +?> \ No newline at end of file diff --git a/SkycaijiApp/admin/model/BaseModel.php b/SkycaijiApp/admin/model/BaseModel.php index 6f937ca..5f6b203 100644 --- a/SkycaijiApp/admin/model/BaseModel.php +++ b/SkycaijiApp/admin/model/BaseModel.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/CacheModel.php b/SkycaijiApp/admin/model/CacheModel.php index e499f8d..9507bad 100644 --- a/SkycaijiApp/admin/model/CacheModel.php +++ b/SkycaijiApp/admin/model/CacheModel.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/Collected.php b/SkycaijiApp/admin/model/Collected.php index f1cfea0..f1ef6e9 100644 --- a/SkycaijiApp/admin/model/Collected.php +++ b/SkycaijiApp/admin/model/Collected.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/Collector.php b/SkycaijiApp/admin/model/Collector.php index 1619484..8fbfa6b 100644 --- a/SkycaijiApp/admin/model/Collector.php +++ b/SkycaijiApp/admin/model/Collector.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/Config.php b/SkycaijiApp/admin/model/Config.php index 705746e..729b450 100644 --- a/SkycaijiApp/admin/model/Config.php +++ b/SkycaijiApp/admin/model/Config.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/DbCommon.php b/SkycaijiApp/admin/model/DbCommon.php index ed968ad..e7183a5 100644 --- a/SkycaijiApp/admin/model/DbCommon.php +++ b/SkycaijiApp/admin/model/DbCommon.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/Provider.php b/SkycaijiApp/admin/model/Provider.php new file mode 100644 index 0000000..732dbe0 --- /dev/null +++ b/SkycaijiApp/admin/model/Provider.php @@ -0,0 +1,41 @@ +where('domain',$url)->value('id'); + $id=intval($id); + } + return $id; + } +} + +?> \ No newline at end of file diff --git a/SkycaijiApp/admin/model/Proxyip.php b/SkycaijiApp/admin/model/Proxyip.php index 250e52e..fd00797 100644 --- a/SkycaijiApp/admin/model/Proxyip.php +++ b/SkycaijiApp/admin/model/Proxyip.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/QueryCompatible.php b/SkycaijiApp/admin/model/QueryCompatible.php index 507e3ba..b055738 100644 --- a/SkycaijiApp/admin/model/QueryCompatible.php +++ b/SkycaijiApp/admin/model/QueryCompatible.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/Release.php b/SkycaijiApp/admin/model/Release.php index 569f50d..337aa33 100644 --- a/SkycaijiApp/admin/model/Release.php +++ b/SkycaijiApp/admin/model/Release.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/ReleaseApp.php b/SkycaijiApp/admin/model/ReleaseApp.php index 408b699..821fd2e 100644 --- a/SkycaijiApp/admin/model/ReleaseApp.php +++ b/SkycaijiApp/admin/model/ReleaseApp.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -22,12 +22,12 @@ class ReleaseApp extends BaseModel{ $cms['module']='cms'; $cms['uptime']=$cms['uptime']>0?$cms['uptime']:NOW_TIME; - $cmsData=$this->where(array('module'=>'cms','app'=>$cms['app']))->find(); + $cmsData=$this->where('app',$cms['app'])->find(); $success=false; if(!empty($cmsData)){ - $this->strict(false)->where(array('module'=>'cms','app'=>$cms['app']))->update($cms); + $this->strict(false)->where('app',$cms['app'])->update($cms); $success=true; }else{ diff --git a/SkycaijiApp/admin/model/Rule.php b/SkycaijiApp/admin/model/Rule.php index 2ed3d7b..57984b2 100644 --- a/SkycaijiApp/admin/model/Rule.php +++ b/SkycaijiApp/admin/model/Rule.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/Task.php b/SkycaijiApp/admin/model/Task.php index a0f752c..4e13550 100644 --- a/SkycaijiApp/admin/model/Task.php +++ b/SkycaijiApp/admin/model/Task.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/Taskgroup.php b/SkycaijiApp/admin/model/Taskgroup.php index b3e9cad..0989710 100644 --- a/SkycaijiApp/admin/model/Taskgroup.php +++ b/SkycaijiApp/admin/model/Taskgroup.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/model/User.php b/SkycaijiApp/admin/model/User.php index a676536..fc0a086 100644 --- a/SkycaijiApp/admin/model/User.php +++ b/SkycaijiApp/admin/model/User.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -97,6 +97,24 @@ class User extends BaseModel{ } return $check; } + /*获取随机盐*/ + public static function rand_salt($len=20){ + $salt="QWERTYUIOPASDFGHJKLZXCVBNM1234567890qwertyuiopasdfghjklzxcvbnm"; + $salt=str_shuffle($salt); + if($len>=strlen($salt)){ + return $salt; + }else{ + return substr($salt,mt_rand(0,strlen($salt)-$len-1),$len); + } + } + /*密码加密*/ + public static function pwd_encrypt($pwd,$salt=''){ + $pwd=sha1($pwd); + if(!empty($salt)){ + $pwd.=$salt; + } + return md5($pwd); + } /*检测用户名正确且是否存在*/ public function checkUsername($username){ diff --git a/SkycaijiApp/admin/model/Usergroup.php b/SkycaijiApp/admin/model/Usergroup.php index 6facc58..f1265eb 100644 --- a/SkycaijiApp/admin/model/Usergroup.php +++ b/SkycaijiApp/admin/model/Usergroup.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ @@ -28,6 +28,16 @@ class Usergroup extends BaseModel{ return false; } } + /*是管理员账号*/ + public function is_admin($userGroup){ + if(empty($userGroup)){ + return false; + } + if(!empty($userGroup['founder'])||!empty($userGroup['admin'])){ + return true; + } + return false; + } } ?> \ No newline at end of file diff --git a/SkycaijiApp/admin/tags.php b/SkycaijiApp/admin/tags.php index 34f97dc..fa8cdb7 100644 --- a/SkycaijiApp/admin/tags.php +++ b/SkycaijiApp/admin/tags.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/validate/Task.php b/SkycaijiApp/admin/validate/Task.php index f473321..b484220 100644 --- a/SkycaijiApp/admin/validate/Task.php +++ b/SkycaijiApp/admin/validate/Task.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/validate/Taskgroup.php b/SkycaijiApp/admin/validate/Taskgroup.php index 94d61c2..576fa64 100644 --- a/SkycaijiApp/admin/validate/Taskgroup.php +++ b/SkycaijiApp/admin/validate/Taskgroup.php @@ -3,9 +3,9 @@ |-------------------------------------------------------------------------- | SkyCaiji (蓝天采集器) |-------------------------------------------------------------------------- - | Copyright (c) 2018 http://www.skycaiji.com All rights reserved. + | Copyright (c) 2018 https://www.skycaiji.com All rights reserved. |-------------------------------------------------------------------------- - | 使用协议 http://www.skycaiji.com/licenses + | 使用协议 https://www.skycaiji.com/licenses |-------------------------------------------------------------------------- */ diff --git a/SkycaijiApp/admin/view/app/agreement.html b/SkycaijiApp/admin/view/app/agreement.html new file mode 100644 index 0000000..16acaa4 --- /dev/null +++ b/SkycaijiApp/admin/view/app/agreement.html @@ -0,0 +1,11 @@ + + \ No newline at end of file diff --git a/SkycaijiApp/admin/view/app/apiop.html b/SkycaijiApp/admin/view/app/apiop.html new file mode 100644 index 0000000..5213908 --- /dev/null +++ b/SkycaijiApp/admin/view/app/apiop.html @@ -0,0 +1,59 @@ + \ No newline at end of file diff --git a/SkycaijiApp/admin/view/app/manage.html b/SkycaijiApp/admin/view/app/manage.html new file mode 100644 index 0000000..9babbc5 --- /dev/null +++ b/SkycaijiApp/admin/view/app/manage.html @@ -0,0 +1,68 @@ +{extend name="common:main" /} +{block name="cssjs"} + + +{/block} +{block name="content"} +

+{if !isset($navid)} +
+
启用
+
+ {$appData['config']['enable']?'已开启':'已关闭'} +
+
名称
+
{$appData['config']['name']}
+
标识
+
{$appData['app']}
+
版本
+
+ {$appData['config']['version']} + {if $newest_version} +   新版本{$newest_version} + {/if} +
+
框架
+
{$appData['config']['framework']?($appData['config']['framework'].$appData['config']['framework_version']):'自定义'}
+ {if !empty($appData['config']['phpv'])} +
PHP
+
最低要求php{$appData['config']['phpv']}版本
+ {/if} +
首页
+
+ 应用首页 +
+ {if !empty($appData['config']['author'])} +
作者
+
{$appData['config']['author']}
+ {/if} + {if !empty($provData)} +
平台
+
第三方平台
+ {elseif !empty($appData['config']['website'])} +
网站
+
作者网站
+ {/if} + {if $appData['config']['desc']} +
描述
+
{$appData['config']['desc']}
+ {/if} +
开发
+
开发应用
+
+{else /} +
+
 
页面加载中...
+ +
+{/if} + +{/block} \ No newline at end of file diff --git a/SkycaijiApp/admin/view/backstage/index.html b/SkycaijiApp/admin/view/backstage/index.html index 11cdc03..08e2240 100644 --- a/SkycaijiApp/admin/view/backstage/index.html +++ b/SkycaijiApp/admin/view/backstage/index.html @@ -66,12 +66,12 @@ {/if} 当前版本 - SkyCaiji V{$serverInfo['version']} + SkyCaiji V{$serverData['version']} - {if condition="isset($serverInfo['caiji'])"} + {if condition="isset($serverData['caiji'])"} 采集状态 - {$serverInfo['caiji']} + {$serverData['caiji']} {/if} @@ -90,23 +90,23 @@ 操作系统 - {$serverInfo['os']} + {$serverData['os']} 运行环境 - {$serverInfo['server']} + {$serverData['server']} 数据库 - {$serverInfo['db']} + {$serverData['db']} PHP版本 - {$serverInfo['php']} + {$serverData['php']} 上传限制 - {$serverInfo['upload_max']} + {$serverData['upload_max']} diff --git a/SkycaijiApp/admin/view/collector/save2store.html b/SkycaijiApp/admin/view/collector/save2store.html index 7d2c9f7..622f467 100644 --- a/SkycaijiApp/admin/view/collector/save2store.html +++ b/SkycaijiApp/admin/view/collector/save2store.html @@ -1,7 +1,7 @@ {if condition="!empty($collData)"}
-
+
+{/block} +{block name="content"} +
+
+
+ {if !empty($appData)} + + + {/if} + + {if $newest_version} + + {/if} + +
+ + {if !empty($appData)} + + {else /} + + {/if} +

标识可由小写字母、下划线、数字组成,长度3个字符以上且以字母开头,建议加入自己的版权以区别他人的应用

+
+
+ + {if $appFrameworkPath} + + + +

如需修改框架,请先删除:{$appFrameworkPath}

+ {else /} + + {foreach $frameworks as $fmw=>$vers} + + {/foreach} +
+ 使用php框架开发该应用 + {if !empty($appData['config']['framework'])&&empty($appFrameworkPath)} + 注意安装框架将会初始化应用,如应用中存在项目文件请先备份以防丢失! +
+ {/if} +
+ {/if} +
+
+ + +

应用的中文名称

+
+
+ + +

应用版本号标准格式:x.x或x.x.x(x为数字最多两位数)版本号高于旧版本时会提示升级

+
+
+ + +

可以使用<p><br><b><i><a>标签

+
+
+ + +
+
+ + +

第三方平台网址,必须包含前缀http://或https://

+
+
+ + +

运行该应用要求最低PHP版本,留空则无要求

+
+
+ + +

安装时提示协议

+
+ +
+ + + + + + + + + + + + + + + + + + + + + +
程序操作相对于应用的链接(开头不加/)
安装 + +
卸载 + +
升级 + +
+
+
+
+ + +
+
+
+
+ + + + + + + + + + + +
名称类型链接操作
+
+
+
+ +
+
+
+
+ +{/block} \ No newline at end of file diff --git a/SkycaijiApp/admin/view/develop/appAddPack.html b/SkycaijiApp/admin/view/develop/appAddPack.html new file mode 100644 index 0000000..ff9fd42 --- /dev/null +++ b/SkycaijiApp/admin/view/develop/appAddPack.html @@ -0,0 +1,42 @@ +
+ +
+ + +
+
+ + + +
+
+ +
+ + +
+

是否在新窗口打开连接

+
+
+ + +
+ 可使用参数:{app}当前应用的根网址,{apps}所有应用的根网址
+
+
+
+ +
+
+ \ No newline at end of file diff --git a/SkycaijiApp/admin/view/index/find_password.html b/SkycaijiApp/admin/view/index/find_password.html index a852fe1..0c254f1 100644 --- a/SkycaijiApp/admin/view/index/find_password.html +++ b/SkycaijiApp/admin/view/index/find_password.html @@ -50,8 +50,8 @@
{if condition="!$emailStatus['success']"}
- 手动修改:在数据库{:config('database.prefix')}user表中,将username为“{$username}”的password值改为“{:pwd_encrypt('skycaiji123')}” - 即密码为“skycaiji123”,再登录后台修改密码 + 手动修改:在数据库{:config('database.prefix')}user表中,将username为“{$username}”的password值改为“{$newPwdEncrypt}” + 即密码为“{$newPwd}”,再登录后台修改密码
{/if} {elseif condition="$step eq 3"/} diff --git a/SkycaijiApp/admin/view/index/index.html b/SkycaijiApp/admin/view/index/index.html index c497f0e..677555e 100644 --- a/SkycaijiApp/admin/view/index/index.html +++ b/SkycaijiApp/admin/view/index/index.html @@ -6,14 +6,18 @@ 蓝天采集器