You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
12306/agency/cdn_utils.py

110 lines
3.6 KiB

7 years ago
# encoding=utf8
7 years ago
import collections
7 years ago
import json
import re
7 years ago
import sys
import csv
7 years ago
import requests
from config import urlConf
reload(sys)
sys.setdefaultencoding('utf-8')
class CDNProxy:
def __init__(self, host=None):
self.host = host
self.urlConf = urlConf.urls
self.httpClint = requests
self.city_list = []
self.timeout = 5
def _set_header(self):
"""设置header"""
return {
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
"X-Requested-With": "xmlHttpRequest",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
"Referer": "https://kyfw.12306.cn/otn/login/init",
"Accept": "*/*",
}
def get_city_id(self):
"""
获取所有城市md5参数
:return:
"""
try:
if self.host:
while True:
url = self.urlConf["cdn_host"]["req_url"]
data = {"host": self.host, "lintType": "电信,多线,联通,移动"}
rep = self.httpClint.post(url, data, headers=self._set_header(), timeout=self.timeout)
city_re = re.compile(r"<li id=\"(\S+)\" class=\"PingListCent PingRLlist")
self.city_list = re.findall(city_re, rep.content)
if self.city_list:
print self.city_list
break
else:
pass
except:
pass
def open_cdn_file(self):
f = open("./cdn_list", "a+")
return f
def get_cdn_list(self):
"""
筛选代理
:return:
"""
f = self.open_cdn_file()
url = self.urlConf["cdn_list"]["req_url"]
num = 1
f.seek(0)
f.truncate()
for guid in self.city_list:
data = {"guid": guid,
"host": "kyfw.12306.cn",
"ishost": 0,
"encode": "HJXhdRqjh5yCF6G/AZ6EDk9faB1oSk5r",
"checktype": 0}
try:
cdn_info = self.httpClint.post(url, data, headers=self._set_header(), timeout=self.timeout).content
print(cdn_info)
if cdn_info:
split_cdn = cdn_info.split("(")[1].rstrip(")").replace("{", "").replace("}", "").split(",")
local_dict = collections.OrderedDict()
for i in split_cdn:
splits = i.split(":")
local_dict[splits[0]] = splits[2] if splits[0] == "result" else splits[1]
if local_dict and "state" in local_dict and local_dict["state"] == "1":
if "responsetime" in local_dict and local_dict["responsetime"].find("毫秒") != -1 and int(filter(str.isdigit, local_dict["responsetime"])) < 100:
f.write(json.dumps(local_dict)+"\n")
num += 1
except Exception as e:
print(e.message)
print(u"本次cdn获取完成总个数{0}".format(num))
7 years ago
def all_cdn(self):
"""获取cdn列表"""
with open('cdn_list', 'r') as f:
7 years ago
cdn = f.readlines()
return cdn
def par_csv(self):
cdn_csv = csv.reader(open("../cdn1.csv", "r"))
for c in cdn_csv:
cdn_re = re.compile(r'https://(\S+)/otn/index/init')
cdn_ip = re.findall(cdn_re, c[0])
if cdn_ip and c[2] == "200":
print(cdn_ip[0])
7 years ago
7 years ago
7 years ago
if __name__ == '__main__':
cdn = CDNProxy()
cdn.get_cdn_list()