You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
12306/agency/cdn_utils.py

107 lines
3.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# encoding=utf8
import collections
import json
import re
import sys
import csv
import requests
from config import urlConf
reload(sys)
sys.setdefaultencoding('utf-8')
class CDNProxy:
def __init__(self, host=None):
self.host = host
self.urlConf = urlConf.urls
self.httpClint = requests
self.city_list = []
self.timeout = 5
def _set_header(self):
"""设置header"""
return {
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
"X-Requested-With": "xmlHttpRequest",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
"Referer": "https://kyfw.12306.cn/otn/login/init",
"Accept": "*/*",
}
def get_city_id(self):
"""
获取所有城市md5参数
:return:
"""
try:
if self.host:
while True:
url = self.urlConf["cdn_host"]["req_url"]
data = {"host": self.host, "lintType": "电信,多线,联通,移动"}
rep = self.httpClint.post(url, data, headers=self._set_header(), timeout=self.timeout)
city_re = re.compile(r"<li id=\"(\S+)\" class=\"PingListCent PingRLlist")
self.city_list = re.findall(city_re, rep.content)
if self.city_list:
print self.city_list
break
else:
pass
except:
pass
def open_cdn_file(self):
f = open("./cdn_list", "a+")
return f
def get_cdn_list(self):
"""
筛选代理
:return:
"""
f = self.open_cdn_file()
url = self.urlConf["cdn_list"]["req_url"]
num = 1
f.seek(0)
f.truncate()
for guid in self.city_list:
data = {"guid": guid,
"host": "kyfw.12306.cn",
"ishost": 0,
"encode": "HJXhdRqjh5yCF6G/AZ6EDk9faB1oSk5r",
"checktype": 0}
try:
cdn_info = self.httpClint.post(url, data, headers=self._set_header(), timeout=self.timeout).content
print(cdn_info)
if cdn_info:
split_cdn = cdn_info.split("(")[1].rstrip(")").replace("{", "").replace("}", "").split(",")
local_dict = collections.OrderedDict()
for i in split_cdn:
splits = i.split(":")
local_dict[splits[0]] = splits[2] if splits[0] == "result" else splits[1]
if local_dict and "state" in local_dict and local_dict["state"] == "1":
if "responsetime" in local_dict and local_dict["responsetime"].find("毫秒") != -1 and int(filter(str.isdigit, local_dict["responsetime"])) < 100:
f.write(json.dumps(local_dict)+"\n")
num += 1
except Exception as e:
print(e.message)
print(u"本次cdn获取完成总个数{0}".format(num))
def all_cdn(self):
"""获取cdn列表"""
with open('../cdn_list', 'r') as f:
cdn = f.readlines()
return cdn
def par_csv(self):
cdn_csv = csv.reader(open("../cdn1.csv", "r"))
for c in cdn_csv:
cdn_re = re.compile(r'https://(\S+)/otn/index/init')
cdn_ip = re.findall(cdn_re, c[0])
if cdn_ip and c[2] == "200":
print(cdn_ip[0])
if __name__ == '__main__':
cdn = CDNProxy()
cdn.get_cdn_list()