12306/agency/cdn_utils.py

# encoding=utf8
import collections
import json
import re
import sys
import csv
import requests
from config import urlConf
reload(sys)
sys.setdefaultencoding('utf-8')


class CDNProxy:
    def __init__(self, host=None):
        self.host = host
        self.urlConf = urlConf.urls
        self.httpClint = requests
        self.city_list = []
        self.timeout = 5

    def _set_header(self):
        """设置header"""
        return {
            "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
            "X-Requested-With": "xmlHttpRequest",
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
            "Referer": "https://kyfw.12306.cn/otn/login/init",
            "Accept": "*/*",
        }

    def get_city_id(self):
        """
        获取所有城市md5参数
        :return:
        """
        try:
            if self.host:
                while True:
                    url = self.urlConf["cdn_host"]["req_url"]
                    data = {"host": self.host, "lintType": "电信,多线,联通,移动"}
                    rep = self.httpClint.post(url, data, headers=self._set_header(), timeout=self.timeout)
                    city_re = re.compile(r"<li id=\"(\S+)\" class=\"PingListCent PingRLlist")
                    self.city_list = re.findall(city_re, rep.content)
                    if self.city_list:
                        print self.city_list
                        break
            else:
                pass
        except:
            pass

    def open_cdn_file(self):
        f = open("./cdn_list", "a+")
        return f

    def get_cdn_list(self):
        """
        筛选代理
        :return:
        """
        f = self.open_cdn_file()
        url = self.urlConf["cdn_list"]["req_url"]
        num = 1
        f.seek(0)
        f.truncate()
        for guid in self.city_list:
            data = {"guid": guid,
                    "host": "kyfw.12306.cn",
                    "ishost": 0,
                    "encode": "HJXhdRqjh5yCF6G/AZ6EDk9faB1oSk5r",
                    "checktype": 0}
            try:
                cdn_info = self.httpClint.post(url, data, headers=self._set_header(), timeout=self.timeout).content
                print(cdn_info)
                if cdn_info:
                    split_cdn = cdn_info.split("(")[1].rstrip(")").replace("{", "").replace("}", "").split(",")
                    local_dict = collections.OrderedDict()
                    for i in split_cdn:
                        splits = i.split(":")
                        local_dict[splits[0]] = splits[2] if splits[0] == "result" else splits[1]
                    if local_dict and "state" in local_dict and local_dict["state"] == "1":
                        if "responsetime" in local_dict and local_dict["responsetime"].find("毫秒") != -1 and int(filter(str.isdigit, local_dict["responsetime"])) < 100:
                            f.write(json.dumps(local_dict)+"\n")
                            num += 1
            except Exception as e:
                print(e.message)
        print(u"本次cdn获取完成，总个数{0}".format(num))

    def all_cdn(self):
        """获取cdn列表"""
        with open('../cdn_list', 'r') as f:
            cdn = f.readlines()
            return cdn

    def par_csv(self):
        cdn_csv = csv.reader(open("../cdn1.csv", "r"))
        for c in cdn_csv:
            cdn_re = re.compile(r'https://(\S+)/otn/index/init')
            cdn_ip = re.findall(cdn_re, c[0])
            if cdn_ip and c[2] == "200":
                print(cdn_ip[0])


if __name__ == '__main__':
    cdn = CDNProxy()
    cdn.get_cdn_list()