2018-01-10 01:42:25 +00:00
|
|
|
|
# -*- coding: utf8 -*-
|
2018-01-20 15:23:51 +00:00
|
|
|
|
import json
|
2019-10-17 01:51:16 +00:00
|
|
|
|
import random
|
2018-01-20 15:23:51 +00:00
|
|
|
|
import socket
|
2018-06-11 15:03:23 +00:00
|
|
|
|
from collections import OrderedDict
|
2018-01-23 09:48:09 +00:00
|
|
|
|
from time import sleep
|
2018-01-10 01:42:25 +00:00
|
|
|
|
import requests
|
2019-09-12 04:34:51 +00:00
|
|
|
|
from fake_useragent import UserAgent
|
|
|
|
|
import TickerConfig
|
2019-01-08 09:04:40 +00:00
|
|
|
|
from agency.agency_tools import proxy
|
2018-01-24 04:56:18 +00:00
|
|
|
|
from config import logger
|
2018-01-25 06:15:47 +00:00
|
|
|
|
|
2019-01-08 01:48:18 +00:00
|
|
|
|
|
2018-06-11 15:03:23 +00:00
|
|
|
|
def _set_header_default():
|
|
|
|
|
header_dict = OrderedDict()
|
2019-01-16 03:19:32 +00:00
|
|
|
|
# header_dict["Accept"] = "application/json, text/plain, */*"
|
2018-08-29 11:19:13 +00:00
|
|
|
|
header_dict["Accept-Encoding"] = "gzip, deflate"
|
2018-06-11 15:03:23 +00:00
|
|
|
|
header_dict[
|
2019-09-12 04:34:51 +00:00
|
|
|
|
"User-Agent"] = _set_user_agent()
|
2018-06-11 15:03:23 +00:00
|
|
|
|
header_dict["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
|
2019-04-03 15:29:50 +00:00
|
|
|
|
header_dict["Origin"] = "https://kyfw.12306.cn"
|
|
|
|
|
header_dict["Connection"] = "keep-alive"
|
2018-06-11 15:03:23 +00:00
|
|
|
|
return header_dict
|
|
|
|
|
|
|
|
|
|
|
2019-09-12 04:34:51 +00:00
|
|
|
|
def _set_user_agent():
|
2019-12-20 05:06:49 +00:00
|
|
|
|
# try:
|
|
|
|
|
# user_agent = UserAgent(verify_ssl=False).random
|
|
|
|
|
# return user_agent
|
|
|
|
|
# except:
|
|
|
|
|
# print("请求头设置失败,使用默认请求头")
|
|
|
|
|
# return 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.' + str(
|
|
|
|
|
# random.randint(5000, 7000)) + '.0 Safari/537.36'
|
|
|
|
|
return "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
|
2019-09-12 04:34:51 +00:00
|
|
|
|
|
|
|
|
|
|
2018-01-10 01:42:25 +00:00
|
|
|
|
class HTTPClient(object):
|
|
|
|
|
|
2019-12-20 05:06:49 +00:00
|
|
|
|
def __init__(self, is_proxy, cdnList=None):
|
2018-01-10 01:42:25 +00:00
|
|
|
|
"""
|
2019-12-20 05:06:49 +00:00
|
|
|
|
cdnList试试切换不包括查询的cdn,防止查询cdn污染登陆和下单cdn
|
2018-01-10 01:42:25 +00:00
|
|
|
|
:param method:
|
|
|
|
|
:param headers: Must be a dict. Such as headers={'Content_Type':'text/html'}
|
|
|
|
|
"""
|
2018-01-20 15:23:51 +00:00
|
|
|
|
self.initS()
|
2018-01-28 06:27:24 +00:00
|
|
|
|
self._cdn = None
|
2019-12-20 05:06:49 +00:00
|
|
|
|
self.cdnList = cdnList
|
2019-01-08 01:48:18 +00:00
|
|
|
|
self._proxies = None
|
2019-01-08 09:04:40 +00:00
|
|
|
|
if is_proxy is 1:
|
|
|
|
|
self.proxy = proxy()
|
|
|
|
|
self._proxies = self.proxy.setProxy()
|
|
|
|
|
# print(u"设置当前代理ip为 {}, 请注意代理ip是否可用!!!!!请注意代理ip是否可用!!!!!请注意代理ip是否可用!!!!!".format(self._proxies))
|
2018-01-20 15:23:51 +00:00
|
|
|
|
|
|
|
|
|
def initS(self):
|
|
|
|
|
self._s = requests.Session()
|
2018-08-29 11:19:13 +00:00
|
|
|
|
self._s.headers.update(_set_header_default())
|
2018-01-20 15:23:51 +00:00
|
|
|
|
return self
|
|
|
|
|
|
2019-09-01 06:41:37 +00:00
|
|
|
|
def set_cookies(self, kwargs):
|
2018-01-20 15:23:51 +00:00
|
|
|
|
"""
|
|
|
|
|
设置cookies
|
|
|
|
|
:param kwargs:
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
2019-09-05 06:48:00 +00:00
|
|
|
|
for kwarg in kwargs:
|
|
|
|
|
for k, v in kwarg.items():
|
|
|
|
|
self._s.cookies.set(k, v)
|
2018-01-20 15:23:51 +00:00
|
|
|
|
|
2018-08-29 11:19:13 +00:00
|
|
|
|
def get_cookies(self):
|
|
|
|
|
"""
|
|
|
|
|
获取cookies
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
return self._s.cookies.values()
|
|
|
|
|
|
2018-01-20 15:23:51 +00:00
|
|
|
|
def del_cookies(self):
|
|
|
|
|
"""
|
|
|
|
|
删除所有的key
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
self._s.cookies.clear()
|
|
|
|
|
|
|
|
|
|
def del_cookies_by_key(self, key):
|
|
|
|
|
"""
|
|
|
|
|
删除指定key的session
|
|
|
|
|
:return:
|
|
|
|
|
"""
|
|
|
|
|
self._s.cookies.set(key, None)
|
2018-01-10 01:42:25 +00:00
|
|
|
|
|
2018-01-20 15:23:51 +00:00
|
|
|
|
def setHeaders(self, headers):
|
|
|
|
|
self._s.headers.update(headers)
|
|
|
|
|
return self
|
|
|
|
|
|
2018-08-29 11:19:13 +00:00
|
|
|
|
def resetHeaders(self):
|
2018-01-21 05:47:38 +00:00
|
|
|
|
self._s.headers.clear()
|
2018-08-29 11:19:13 +00:00
|
|
|
|
self._s.headers.update(_set_header_default())
|
2018-01-21 05:47:38 +00:00
|
|
|
|
|
2018-01-20 15:23:51 +00:00
|
|
|
|
def getHeadersHost(self):
|
|
|
|
|
return self._s.headers["Host"]
|
|
|
|
|
|
|
|
|
|
def setHeadersHost(self, host):
|
|
|
|
|
self._s.headers.update({"Host": host})
|
|
|
|
|
return self
|
|
|
|
|
|
2019-09-12 04:34:51 +00:00
|
|
|
|
def setHeadersUserAgent(self):
|
|
|
|
|
self._s.headers.update({"User-Agent": _set_user_agent()})
|
|
|
|
|
|
|
|
|
|
def getHeadersUserAgent(self):
|
|
|
|
|
return self._s.headers["User-Agent"]
|
|
|
|
|
|
2018-01-20 15:23:51 +00:00
|
|
|
|
def getHeadersReferer(self):
|
|
|
|
|
return self._s.headers["Referer"]
|
|
|
|
|
|
|
|
|
|
def setHeadersReferer(self, referer):
|
|
|
|
|
self._s.headers.update({"Referer": referer})
|
|
|
|
|
return self
|
|
|
|
|
|
2018-01-28 06:27:24 +00:00
|
|
|
|
@property
|
|
|
|
|
def cdn(self):
|
|
|
|
|
return self._cdn
|
|
|
|
|
|
|
|
|
|
@cdn.setter
|
|
|
|
|
def cdn(self, cdn):
|
|
|
|
|
self._cdn = cdn
|
|
|
|
|
|
|
|
|
|
def send(self, urls, data=None, **kwargs):
|
2018-01-20 15:23:51 +00:00
|
|
|
|
"""send request to url.If response 200,return response, else return None."""
|
2018-01-23 09:48:09 +00:00
|
|
|
|
allow_redirects = False
|
2018-06-11 15:03:23 +00:00
|
|
|
|
is_logger = urls.get("is_logger", False)
|
|
|
|
|
req_url = urls.get("req_url", "")
|
|
|
|
|
re_try = urls.get("re_try", 0)
|
|
|
|
|
s_time = urls.get("s_time", 0)
|
2018-09-30 06:35:27 +00:00
|
|
|
|
is_cdn = urls.get("is_cdn", False)
|
|
|
|
|
is_test_cdn = urls.get("is_test_cdn", False)
|
2018-01-29 13:59:01 +00:00
|
|
|
|
error_data = {"code": 99999, "message": u"重试次数达到上限"}
|
2018-01-21 05:47:38 +00:00
|
|
|
|
if data:
|
|
|
|
|
method = "post"
|
|
|
|
|
self.setHeaders({"Content-Length": "{0}".format(len(data))})
|
|
|
|
|
else:
|
|
|
|
|
method = "get"
|
2018-08-29 11:19:13 +00:00
|
|
|
|
self.resetHeaders()
|
2019-09-12 04:34:51 +00:00
|
|
|
|
if TickerConfig.RANDOM_AGENT is 1:
|
2019-09-15 00:35:31 +00:00
|
|
|
|
self.setHeadersUserAgent()
|
|
|
|
|
self.setHeadersReferer(urls["Referer"])
|
2018-01-24 14:35:24 +00:00
|
|
|
|
if is_logger:
|
|
|
|
|
logger.log(
|
2019-09-18 01:52:55 +00:00
|
|
|
|
u"url: {0}\n入参: {1}\n请求方式: {2}\n".format(req_url, data, method))
|
2018-08-31 16:24:40 +00:00
|
|
|
|
self.setHeadersHost(urls["Host"])
|
2018-09-30 06:35:27 +00:00
|
|
|
|
if is_test_cdn:
|
|
|
|
|
url_host = self._cdn
|
|
|
|
|
elif is_cdn:
|
2018-12-26 08:05:51 +00:00
|
|
|
|
if self._cdn:
|
2019-01-07 14:54:46 +00:00
|
|
|
|
# print(u"当前请求cdn为{}".format(self._cdn))
|
2018-12-26 08:05:51 +00:00
|
|
|
|
url_host = self._cdn
|
2018-09-30 06:35:27 +00:00
|
|
|
|
else:
|
|
|
|
|
url_host = urls["Host"]
|
2018-01-28 06:27:24 +00:00
|
|
|
|
else:
|
|
|
|
|
url_host = urls["Host"]
|
2019-01-23 04:28:41 +00:00
|
|
|
|
http = urls.get("httpType") or "https"
|
2018-06-11 15:03:23 +00:00
|
|
|
|
for i in range(re_try):
|
2018-01-23 13:29:06 +00:00
|
|
|
|
try:
|
2018-03-01 03:10:36 +00:00
|
|
|
|
# sleep(urls["s_time"]) if "s_time" in urls else sleep(0.001)
|
2018-06-11 15:03:23 +00:00
|
|
|
|
sleep(s_time)
|
2019-01-08 01:48:18 +00:00
|
|
|
|
try:
|
|
|
|
|
requests.packages.urllib3.disable_warnings()
|
|
|
|
|
except:
|
|
|
|
|
pass
|
2018-01-23 13:29:06 +00:00
|
|
|
|
response = self._s.request(method=method,
|
2019-09-18 01:52:55 +00:00
|
|
|
|
timeout=5,
|
2019-01-08 09:04:40 +00:00
|
|
|
|
proxies=self._proxies,
|
2019-01-23 04:28:41 +00:00
|
|
|
|
url=http + "://" + url_host + req_url,
|
2018-01-23 13:29:06 +00:00
|
|
|
|
data=data,
|
|
|
|
|
allow_redirects=allow_redirects,
|
2018-01-28 06:27:24 +00:00
|
|
|
|
verify=False,
|
2018-01-23 13:29:06 +00:00
|
|
|
|
**kwargs)
|
2018-12-26 08:05:51 +00:00
|
|
|
|
if response.status_code == 200 or response.status_code == 302:
|
2019-01-04 01:38:40 +00:00
|
|
|
|
if urls.get("not_decode", False):
|
|
|
|
|
return response.content
|
2018-01-24 04:56:18 +00:00
|
|
|
|
if response.content:
|
|
|
|
|
if is_logger:
|
|
|
|
|
logger.log(
|
2019-09-03 03:49:42 +00:00
|
|
|
|
u"出参:{0}".format(response.content.decode()))
|
2019-01-04 01:38:40 +00:00
|
|
|
|
if urls["is_json"]:
|
修改urls["is_json"] 为true时,response非json格式bug,导致异常退出,使得用户一直 登陆不上bug。
比如打印response content内容: b'halo, world\n'
Traceback (most recent call last):
File "run.py", line 22, in <module>
select_ticket_info.select().main()
File "/Users/sun/Documents/workspace/python/12306/init/select_ticket_info.py", line 123, in main
self.call_login()
File "/Users/sun/Documents/workspace/python/12306/init/select_ticket_info.py", line 117, in call_login
self.login.go_login()
File "/Users/sun/Documents/workspace/python/12306/init/login.py", line 129, in go_login
uamtk = self.baseLogin(user, passwd)
File "/Users/sun/Documents/workspace/python/12306/init/login.py", line 66, in baseLogin
tresult = self.session.httpClint.send(logurl, loginData)
File "/Users/sun/Documents/workspace/python/12306/myUrllib/httpUtils.py", line 187, in send
response.content.decode() if isinstance(response.content, bytes) else response.content)
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
2020-01-02 04:47:50 +00:00
|
|
|
|
try:
|
|
|
|
|
result = json.loads(
|
|
|
|
|
response.content.decode() if isinstance(response.content, bytes) else response.content)
|
|
|
|
|
except:
|
|
|
|
|
continue
|
|
|
|
|
return result
|
2019-01-04 01:38:40 +00:00
|
|
|
|
else:
|
2019-10-17 01:51:16 +00:00
|
|
|
|
return response.content.decode("utf8", "ignore") if isinstance(response.content,
|
|
|
|
|
bytes) else response.content
|
2018-01-24 04:56:18 +00:00
|
|
|
|
else:
|
2019-09-05 03:44:02 +00:00
|
|
|
|
print(f"url: {urls['req_url']}返回参数为空, 接口状态码: {response.status_code}")
|
2019-12-20 05:06:49 +00:00
|
|
|
|
|
2018-01-24 04:56:18 +00:00
|
|
|
|
logger.log(
|
2018-01-28 06:27:24 +00:00
|
|
|
|
u"url: {} 返回参数为空".format(urls["req_url"]))
|
2019-12-20 05:06:49 +00:00
|
|
|
|
if self.cdnList:
|
|
|
|
|
# 如果下单或者登陆出现cdn 302的情况,立马切换cdn
|
|
|
|
|
url_host = self.cdnList.pop(random.randint(0, 4))
|
2019-01-23 04:28:41 +00:00
|
|
|
|
continue
|
2018-01-23 13:29:06 +00:00
|
|
|
|
else:
|
2018-01-28 06:27:24 +00:00
|
|
|
|
sleep(urls["re_time"])
|
2018-01-23 13:29:06 +00:00
|
|
|
|
except (requests.exceptions.Timeout, requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
|
|
|
|
|
pass
|
|
|
|
|
except socket.error:
|
|
|
|
|
pass
|
2018-01-23 09:58:17 +00:00
|
|
|
|
return error_data
|