mirror of https://gitee.com/zorlan/skycaiji
49 lines
19 KiB
PHP
49 lines
19 KiB
PHP
/*
|
||
|--------------------------------------------------------------------------
|
||
| SkyCaiji (蓝天采集器)
|
||
|--------------------------------------------------------------------------
|
||
| Copyright (c) 2018 https://www.skycaiji.com All rights reserved.
|
||
|--------------------------------------------------------------------------
|
||
| 使用协议 https://www.skycaiji.com/licenses
|
||
|--------------------------------------------------------------------------
|
||
*/
|
||
'use strict';function CpTestSourceUrls(collId,maxNum,sourceIsUrl,hasLevels){this.coll_id=collId;this.max_num=maxNum;this.source_is_url=sourceIsUrl;this.has_levels=hasLevels;this.box_id='#win_test_source_urls';this.url_ajax_requests=new Array()}
|
||
CpTestSourceUrls.prototype={constructor:CpTestSourceUrls,init:function(){var $_o=this;$('#myModal').on('hide.bs.modal',function(e){if($_o&&$_o.url_ajax_requests){for(var i in $_o.url_ajax_requests){$_o.url_ajax_requests[i].abort()}}});$($_o.box_id).on('click','.set-test-num .set-num-btn',function(){var testNum=$($_o.box_id+' .set-test-num').find('.set-num').val();ajaxOpen({type:'get',dataType:'json',url:ulink('cpattern_test/level_num'),data:{num:testNum},async:!0,success:function(data){toastr.success(data.msg)}})});$($_o.box_id).on('click','.cont_ceshi',function(){var curUrl=$(this).attr('data-url');var test_url=ulink('cpattern_test/test_url?coll_id=_collid_&test_url=_url_',{'_collid_':$_o.coll_id,'_url_':curUrl});var urls={};$(this).parents('.source_url').each(function(){var surl=$(this).attr('data-url');if(surl){if($(this).attr('data-level')){urls['level'+$(this).attr('data-level')+'_url']=surl}else{urls.source_url=surl}}});if(urls){for(var i in urls){test_url+='&'+i+'='+encodeURIComponent(urls[i])}}
|
||
window.open(test_url,'_blank')});$($_o.box_id).on('click','.cont_element',function(){var urls={};$(this).parents('.source_url').each(function(){var surl=$(this).attr('data-url');if(surl){if($(this).attr('data-level')){urls['level'+$(this).attr('data-level')+'_url']=surl}else{urls.source_url=surl}}});if((/\beasymode\s*\=/i).test(window.self.location.href)&&window.top){cpEasyBrowser($(this).attr('data-url'),'url',urls)}else{var testUrl=$(this).attr('data-url');var browserUrl=cpBrowserUrl($_o.coll_id,'url',testUrl,urls);window.open(browserUrl,'_blank')}});$($_o.box_id).on('click','.cont_urls_num a',function(){var status=$(this).attr('status');if(status==1){$(this).html('[展开]');$(this).attr('status',0);$(this).parents('.cont_urls_num').eq(0).siblings('.cont_urls_list').hide()}else{$(this).html('[收起]');$(this).attr('status',1);$(this).parents('.cont_urls_num').eq(0).siblings('.cont_urls_list').show()}});$($_o.box_id+' .source_url').each(function(index){if(!$_o.source_is_url){var source_url=$(this).attr('data-url');if($_o.has_levels){$($_o.box_id+' .set-test-num').show().find('.set-num').val($_o.max_num);if(index<$_o.max_num){$_o.get_cont_urls(source_url,$(this),1)}}else{$_o.get_cont_urls(source_url,$(this),0)}}})},get_cont_urls:function(source_url,curObj,level,parentUrl,parentLevel){parentUrl=parentUrl?parentUrl:'';parentLevel=parentLevel?parentLevel:'';var $_o=this;if(source_url){curObj.children('.cont_urls_list').html('<div class="loading-sm"></div>').show();var url=ulink('cpattern_test/cont_urls?coll_id=_collid_',{'_collid_':$_o.coll_id});var url_ajax_request=ajaxOpen({type:'post',url:url,data:{source_url:source_url,level:level,parent_url:parentUrl,parent_level:parentLevel,},dataType:'json',async:!0,success:function(data){if(data.code==1){data=data.data?data.data:{};var urls=data.urls;var urlMsgLinks=data.urlMsgLinks;if(!isObject(urlMsgLinks)){urlMsgLinks={}}
|
||
var list='<ul style="list-style-type:'+(level>0?'decimal':'square')+';padding-left:15px;white-space:normal;overflow-x:scroll;">';if(level>0){for(var i in urls){var urlMsgLink=urlMsgLinks[urls[i]];urlMsgLink=urlMsgLink?urlMsgLink:'';urls[i]=htmlspecialchars(urls[i]);if(i<$_o.max_num){list+='<li class="source_url" data-url="'+urls[i]+'" data-level="'+level+'"><p><b>抓取第'+level+'级页面 “'+data.levelName+'”:</b>'+data.levelOpened+urls[i]+urlMsgLink+'</p><p class="cont_urls_num"></p><div class="cont_urls_list"></div></li>'}}
|
||
list+='</ul>';curObj.children('.cont_urls_list').html(list).show();curObj.children('.cont_urls_num').html('获取到'+urls.length+'条网址'+(urls.length>$_o.max_num?'(只测试前'+$_o.max_num+'条)':'')+' <a href="javascript:;" status="1">[收起]</a>');curObj.children('.cont_urls_list').find('.source_url').each(function(){var obj=$(this);$_o.get_cont_urls(obj.attr('data-url'),obj,data.nextLevel,data.sourceUrl,data.level)})}else{for(var i in urls){var urlMsgLink=urlMsgLinks[urls[i]];urlMsgLink=urlMsgLink?urlMsgLink:'';urls[i]=htmlspecialchars(urls[i]);list+='<li>[<a href="javascript:;" class="cont_ceshi" data-url="'+urls[i]+'">测试</a>] '+'[<a href="javascript:;" class="cont_element" data-url="'+urls[i]+'">分析</a>] '+data.urlOpened+urls[i]+urlMsgLink+'</li>'}
|
||
list+='</ul>';curObj.children('.cont_urls_list').html(list).hide();curObj.children('.cont_urls_num').html('获取到'+urls.length+'条网址 <a href="javascript:;" status="0">[展开]</a>')}}else{curObj.children('.cont_urls_list').html('<span style="color:red">'+data.msg+'</span>').show()}}});$_o.url_ajax_requests.push(url_ajax_request)}}}
|
||
function CpTestUrl(collId,testType,pageSource){this.coll_id=collId;this.test=testType?testType:'get_fields';this.test_async=!0;this.page_source=pageSource?pageSource:'';this.box_id='#win_test_url';this.test_ajax_requests=new Array()}
|
||
CpTestUrl.prototype={constructor:CpTestUrl,ajax:function(params){var ajax_request=ajaxOpen(params);this.test_ajax_requests.push(ajax_request)},abort_test:function(){if(this.test_ajax_requests&&this.test_ajax_requests.length>0){for(var i in this.test_ajax_requests){this.test_ajax_requests[i].abort()}
|
||
this.test_ajax_requests=new Array()}},load_input_url:function(func){var $_o=this;var urlParams=$($_o.box_id+' [name="url_params"]').val();var inputedUrls={};$('#win_input_urls').find('[name]').each(function(){inputedUrls[$(this).attr('name')]=$(this).val()});var pageSource=$($_o.box_id+' [name="page_source"]').val();var url=ulink('cpattern_test/input_url?test=_test_&coll_id=_collid_&page_source=_source_',{'_test_':$_o.test,'_collid_':$_o.coll_id,'_source_':pageSource});if($($_o.box_id+' button.dropdown-toggle').attr('data-test')=='get_signs'){if($('#win_test_signs').find('[name="signs_cur_all"]').is(':checked')){url+='&signs_cur_all=1'}}
|
||
$_o.ajax({type:'post',url:url,dataType:'html',async:$_o.test_async,data:{url_params:urlParams,inputed_urls:inputedUrls},success:function(html){if(html){$('#win_input_urls').html(html).show()}else{$('#win_input_urls').html('').hide()}
|
||
var pageOpenedObj=$('#win_input_urls').find('#input_url_page_opened');if(pageOpenedObj.length>0){$($_o.box_id+' .test-page-opened').html(pageOpenedObj.html()).show()}else{$($_o.box_id+' .test-page-opened').hide()}
|
||
execVarFuncs(func)}})},init:function(){var $_o=this;$($_o.box_id+' .dropdown-menu a[data-test]').bind('click',function(){$_o.abort_test();var btnObj=$(this).parents('.dropdown-menu').eq(0).siblings('button.dropdown-toggle').eq(0);var testName=$(this).attr('data-test');btnObj.attr('data-test',testName);btnObj.find('.test-tips').text($(this).attr('data-title'));if(testName=='get_html'||testName=='get_browser'||testName=='get_signs'||testName=='get_pagination'){$($_o.box_id+' [name="test_url"]').attr('placeholder','输入网址');$($_o.box_id+' .test-page-sources').show()}else{$($_o.box_id+' [name="test_url"]').attr('placeholder','输入内容页网址');$($_o.box_id+' .test-page-sources').hide()}
|
||
$($_o.box_id+' .field-vals').html('');$_o.test=testName;if(testName=='get_signs'){$('#win_test_signs').show()}else{$('#win_test_signs').hide()}
|
||
if(testName=='get_pagination'){$('#win_test_pagination').show()}else{$('#win_test_pagination').hide()}
|
||
$_o.load_input_url()});$($_o.box_id+' [name="page_source"]').bind('change',function(){$_o.abort_test();$_o.load_input_url()});$($_o.box_id).off('click','[data-test-code]').on('click','[data-test-code]',function(){var objId='#'+$(this).attr('data-test-code');visualizeData('<pre>'+htmlspecialchars($(objId).val())+'</pre>')});$($_o.box_id).off('click','[data-test-visualize]').on('click','[data-test-visualize]',function(){var objId='#'+$(this).attr('data-test-visualize');visualizeData($(objId).val())});$($_o.box_id+' button.test-sub').bind('click',function(){$_o.abort_test();var testOp=$($_o.box_id+' button.dropdown-toggle').attr('data-test');testOp=testOp?testOp:'get_fields';$_o.test_async=testOp=='get_browser'?false:!0;$_o.load_input_url(function(){var test_url=$($_o.box_id+' input[name="test_url"]').val();var page_source=$($_o.box_id+' [name="page_source"]').val();if(!test_url){toastr.error('请输入网址')}else{$($_o.box_id+' .field-vals').html('<div class="loading"></div>');$($_o.box_id+' .echo-msg-box').hide();var url=ulink('cpattern_test/_op_?coll_id=_collid_&page_source=_source_&test_url=_url_',{'_op_':testOp,'_source_':page_source,'_collid_':$_o.coll_id,'_url_':test_url});$('#win_input_urls').find('[name]').each(function(){url+='&'+$(this).attr('name')+'='+encodeURIComponent($(this).val())});if(testOp=='get_html'){$_o.ajax({type:'get',url:url,dataType:'html',success:function(data){$($_o.box_id+' .field-vals').html('');data=data?data:'';if(dataIsJson(data)){var json=JSON.parse(data);$($_o.box_id+' .field-vals').html(json.msg?json.msg:'')}else{data=data.replace(' ','');if(dataIsJson(data)){var html='<label>源码:</label><textarea class="form-control" rows="5">'+htmlspecialchars(data)+'</textarea><label style="margin-top:15px;">JSON解析:</label><div id="win_json_tree"></div>';$($_o.box_id+' .field-vals').html(html);var jsonTreeFunc=function(){window.tool_json_tree.treeId='#win_json_tree';window.tool_json_tree.load(data)};if(window.tool_json_tree){jsonTreeFunc()}else{$.getScript(window.site_config.pub+'/static/js/admin/tool_json_tree.js',jsonTreeFunc)}}else{var eleId=generateUUID();data=htmlspecialchars(data);var html='<label>源码:</label> <a href="javascript:;" data-test-visualize="'+eleId+'">预览</a><span style="margin-left:5px;margin-right:5px;">/</span><a href="javascript:;" data-test-browser="1">分析网页</a><textarea id="'+eleId+'" class="form-control" rows="20">'+data+'</textarea>';$($_o.box_id+' .field-vals').html(html);$($_o.box_id).find('[data-test-browser]').bind('click',function(){var pageSource=$($_o.box_id+' [name="page_source"]').val();var testUrl=$($_o.box_id+' input[name="test_url"]').val();var inputedUrls={};$('#win_input_urls').find('[name]').each(function(){inputedUrls[$(this).attr('name')]=$(this).val()});var browserUrl=cpBrowserUrl($_o.coll_id,pageSource,testUrl,inputedUrls);window.open(browserUrl,'_blank')})}}}})}else{if(testOp=='get_signs'){if($('#win_test_signs').find('[name="signs_cur_all"]').is(':checked')){url+='&signs_cur_all=1'}}else if(testOp=='get_pagination'){url+='&pagination_type='+$('#win_test_pagination').find('[name="pagination_type"]:checked').val()}
|
||
$_o.ajax({type:'get',url:url,dataType:'html',async:$_o.test_async,success:function(data){$($_o.box_id+' .field-vals').html('');data=data?data:'';if(data.indexOf('<!--[echo_msg]-->')>-1){var echoMsg='';data=data.replace(/<\!--\[echo_msg\]-->([\s\S]*?)<\!--\[\/echo_msg\]-->/ig,function(match,match1){echoMsg+=match1;return''});if(echoMsg){$($_o.box_id+' .echo-msg-box').show().find('.panel-body').html(echoMsg)}}
|
||
if(dataIsJson(data)){data=JSON.parse(data)}else{data={}}
|
||
if(data.code==1){var html='';if(testOp=='get_pagination'){var pnType=data.data.pagination_type;var pnUrls=data.data.urls;var pnUrlMsgLinks=data.data.pn_links;if(!isObject(pnUrlMsgLinks)){pnUrlMsgLinks={}}
|
||
if(!pnType){html='<div class="page-header"><b>成功抓取到分页链接</b></div><ul>';for(var i in pnUrls){html+='<li>'+pnUrls[i]+(pnUrlMsgLinks[pnUrls[i]]?pnUrlMsgLinks[pnUrls[i]]:'')+'</li>'}
|
||
html+='</ul>'}else if(pnType=='next'){html='<div class="page-header"><b>成功抓取到分页链接</b>';if(pnUrls.length>=10){html+='(测试时最多抓取到第10个分页)'}
|
||
html+='</div><ul>';for(var i in pnUrls){var pnUrl=pnUrls[i];html+='<li>当前页:'+pnUrl.cur+(pnUrlMsgLinks[pnUrl.cur]?pnUrlMsgLinks[pnUrl.cur]:'')+'<br>抓取到的分页:'+pnUrl.next+(pnUrlMsgLinks[pnUrl.next]?pnUrlMsgLinks[pnUrl.next]:'')+'</li>'}
|
||
html+='</ul><div class="page-header"><b>循环抓取到的分页链接</b></div><ul>';for(var i in pnUrls){var pnUrl=pnUrls[i];html+='<li>'+pnUrl.next+(pnUrlMsgLinks[pnUrl.next]?pnUrlMsgLinks[pnUrl.next]:'')+'</li>'}
|
||
html+='</ul>'}}else if(testOp=='get_fields'){if(data.msg){toastr.success(data.msg)}
|
||
var valList=data.data.val_list;valList=valList?valList:[];if(valList.length>1){var loopTable=data.data.loop_table;loopTable=isNull(loopTable)?{}:loopTable;html+='<div class="table-responsive"><table class="table table-bordered table-hover table-test-loop"><thead><tr><td></td>';var fields=valList[0];for(var f in fields){var loopTableTh=isNull(loopTable[f])?{}:loopTable[f];var divStyle=isNull(loopTableTh.width)?'':(' style="width:'+parseInt(loopTableTh.width)+'px;"');html+='<th data-val="'+f+'"><div'+divStyle+'>'+f+'</div></th>'}
|
||
html+='</tr></thead><tbody>';var curNum=0;for(var i in valList){curNum++;var vals=valList[i];html+='<tr><td>'+curNum+'</td>';for(var f in vals){var val=vals[f]?vals[f]:'';if(val){if(dataIsJson(val)||dataIsHtml(val)){var eleId=generateUUID();html+='<td class="view-box"><input type="text" value="'+htmlspecialchars(val)+'" id="'+eleId+'" class="view-ipt" /> <a class="glyphicon glyphicon-eye-open view-btn" href="javascript:;" data-test-visualize="'+eleId+'"></a></td>'}else{html+='<td><input type="text" value="'+htmlspecialchars(val)+'" /></td>'}}else{html+='<td></td>'}}
|
||
html+='</tr>'}
|
||
html+='</tbody>'}else if(valList.length==1){var vals=valList[0];for(var f in vals){vals[f]=vals[f]?vals[f]:'';var browse='';var eleId=generateUUID();if(dataIsJson(vals[f])||dataIsHtml(vals[f])){browse='<a href="javascript:;" data-test-visualize="'+eleId+'">预览</a>'}
|
||
vals[f]=htmlspecialchars(vals[f]);html+='<div style="margin-bottom:10px;"><label>'+f+':</label>'+browse+'<textarea id="'+eleId+'" class="form-control" rows="4">'+vals[f]+'</textarea></div>'}}else{toastr.error('没有抓取到数据')}}else if(testOp=='get_relation_urls'){html='<div class="page-header"><b>关联页网址</b></div><ul>';for(var i in data.data){html+='<li>'+i+':'+data.data[i]+'</li>'}
|
||
html+='</ul>'}else if(testOp=='get_signs'){var pageSource=$($_o.box_id+' [name="page_source"]').val();html+='<table class="table table-bordered table-hover"><thead><tr><th>页面</th><th>区域规则</th><th>网址规则</th><th>提取内容</th></tr></thead><tbody>';var dataArr=data.data;if(dataArr.cur){html+=$_o.get_signs_html(dataArr.cur)}
|
||
if(isObject(dataArr.list)&&dataArr.list.length>0){html+='<tr><td colspan="4">'+data.msg+'</td></tr>';for(var i in dataArr.list){html+=$_o.get_signs_html(dataArr.list[i])}}
|
||
html+='</tbody></table>'}else if(testOp=='get_browser'){html='正在执行操作...';if(data.url){window.location.href=data.url}}
|
||
$($_o.box_id+' .field-vals').html(html);if(testOp=='get_fields'){$($_o.box_id).find('.table-test-loop thead th div').bind('mousedown',function(e){$(this).css('background','#f1f1f1');window.win_test_table_thead_mousedown={table_width:$($_o.box_id+' .table-test-loop').width(),th_width:$(this).width(),page_x:e.pageX};$(this).off('mousemove').bind('mousemove',function(e){var def=window.win_test_table_thead_mousedown;if(def){var moveWidth=e.pageX-def.page_x;$(this).width(def.th_width+moveWidth)}});$(this).off('mouseout mouseup').bind('mouseout mouseup',function(e){$(this).css('background','');window.win_test_table_thead_mousedown=null;if(e.type=='mouseup'){var width=$(this).width();var field=$(this).parents('th').eq(0).attr('data-val');if(width<100){width=100;$(this).width(width)}
|
||
$_o.ajax({type:'get',url:ulink('cpattern_test/loop_table?coll_id=_collid_&field=_field_&width=_width_',{'_collid_':$_o.coll_id,'_field_':field,'_width_':width}),dataType:'json',success:function(data){}})}})})}}else{$($_o.box_id+' .field-vals').html(data.msg)}},error:function(jqXHR,textStatus,errorThrown){if(textStatus!='abort'){toastr.error('抓取失败');var ifm='<iframe id="ifm_test_error" style="width:100%;border:1px solid #ddd;"></iframe>';$($_o.box_id+' .field-vals').html(ifm);var iframeObj=document.getElementById('ifm_test_error');$(iframeObj).contents().find('body').html(jqXHR.responseText);var height=(iframeObj.Document?iframeObj.Document.body.scrollHeight:iframeObj.contentDocument.body.offsetHeight);$(iframeObj).attr('height',height+'px')}}})}}})});$($_o.box_id+' input[name="test_url"]').bind('keyup',function(event){if(event.keyCode=="13"){$($_o.box_id+' button.test-sub').trigger("click")}});$($_o.box_id+' .dropdown-menu a[data-test="'+$_o.test+'"]').trigger('click');if($_o.test=='get_fields'){if($($_o.box_id+' input[name="test_url"]').val().length>0){$($_o.box_id+' button.test-sub').trigger("click")}}
|
||
if($_o.page_source){$($_o.box_id+' [name="page_source"]').val($_o.page_source).trigger('change')}},get_signs_html:function(match){var html='<tr><td>'+match.name+'</td>';var signKeys=['area','url','content'];for(var ski in signKeys){var signKey=signKeys[ski];html+='<td>';var signsHtml='';if(isObject(match[signKey])){for(var sign in match[signKey]){var eleId=generateUUID();var val=match[signKey][sign];val=val?val:'';sign=sign.replace(/^match/,'[内容')+']';if(dataIsJson(val)||dataIsHtml(val)){sign+=' <a href="javascript:;" data-test-visualize="'+eleId+'">预览</a> <a href="javascript:;" data-test-code="'+eleId+'">代码</a>'}
|
||
val=htmlspecialchars(val);signsHtml+='<div style="margin-bottom:5px;"><div style="margin-bottom:5px;">'+sign+'</div><textarea id="'+eleId+'" class="form-control" rows="2" style="resize:none">'+val+'</textarea></div>'}}
|
||
if(!signsHtml){signsHtml='无'}
|
||
html+=signsHtml+'</td>'}
|
||
return html}}
|
||
function CpTestMatch(){var boxId='#win_test_match';$(boxId+' [name="input_type"]').bind('change',function(){$(boxId+' #input_type_url').hide();$(boxId+' #input_type_content').hide();$(boxId+' #input_type_'+$(this).val()).show()});$(boxId+' [name="type"]').bind('change',function(){$(boxId+' #type_rule').hide();$(boxId+' #type_xpath').hide();$(boxId+' #type_json').hide();$(boxId+' #type_'+$(this).val()).show()});$(boxId+' .test-match-val .visualize').bind('click',function(){visualizeData($('#test_match_val').val())});cpRuleModuleInit(boxId,'field','');$(boxId+' form').bind('submit',function(){$(boxId).append('<div class="test-loading loading"></div>');ajaxOpen({type:'post',url:$(this).attr('action'),dataType:'json',data:$(this).serialize(),success:function(data){$(boxId).find('.test-loading').remove();if(data.code==1){var valObj=$(boxId+' .test-match-val');valObj.show();if(dataIsHtml(data.msg)){valObj.find('.visualize').show()}else{valObj.find('.visualize').hide()}
|
||
valObj.find('#test_match_val').val(data.msg)}else{toastr.error(data.msg)}}});return!1})} |