12306/myUrllib/httpUtils.py

190 lines
6.4 KiB
Python
Raw Normal View History

# -*- coding: utf8 -*-
import json
import socket
from collections import OrderedDict
from time import sleep
import requests
2019-09-12 04:34:51 +00:00
from fake_useragent import UserAgent
import TickerConfig
2019-01-08 09:04:40 +00:00
from agency.agency_tools import proxy
2018-01-24 04:56:18 +00:00
from config import logger
2018-01-25 06:15:47 +00:00
2019-01-08 01:48:18 +00:00
def _set_header_default():
header_dict = OrderedDict()
2019-01-16 03:19:32 +00:00
# header_dict["Accept"] = "application/json, text/plain, */*"
header_dict["Accept-Encoding"] = "gzip, deflate"
header_dict[
2019-09-12 04:34:51 +00:00
"User-Agent"] = _set_user_agent()
header_dict["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8"
2019-04-03 15:29:50 +00:00
header_dict["Origin"] = "https://kyfw.12306.cn"
header_dict["Connection"] = "keep-alive"
return header_dict
2019-09-12 04:34:51 +00:00
def _set_user_agent():
user_agent = UserAgent(verify_ssl=False).random
return user_agent
class HTTPClient(object):
2019-01-08 09:04:40 +00:00
def __init__(self, is_proxy):
"""
:param method:
:param headers: Must be a dict. Such as headers={'Content_Type':'text/html'}
"""
self.initS()
2018-01-28 06:27:24 +00:00
self._cdn = None
2019-01-08 01:48:18 +00:00
self._proxies = None
2019-01-08 09:04:40 +00:00
if is_proxy is 1:
self.proxy = proxy()
self._proxies = self.proxy.setProxy()
# print(u"设置当前代理ip为 {}, 请注意代理ip是否可用请注意代理ip是否可用请注意代理ip是否可用".format(self._proxies))
def initS(self):
self._s = requests.Session()
self._s.headers.update(_set_header_default())
return self
2019-09-01 06:41:37 +00:00
def set_cookies(self, kwargs):
"""
设置cookies
:param kwargs:
:return:
"""
2019-09-05 06:48:00 +00:00
for kwarg in kwargs:
for k, v in kwarg.items():
self._s.cookies.set(k, v)
def get_cookies(self):
"""
获取cookies
:return:
"""
return self._s.cookies.values()
def del_cookies(self):
"""
删除所有的key
:return:
"""
self._s.cookies.clear()
def del_cookies_by_key(self, key):
"""
删除指定key的session
:return:
"""
self._s.cookies.set(key, None)
def setHeaders(self, headers):
self._s.headers.update(headers)
return self
def resetHeaders(self):
self._s.headers.clear()
self._s.headers.update(_set_header_default())
def getHeadersHost(self):
return self._s.headers["Host"]
def setHeadersHost(self, host):
self._s.headers.update({"Host": host})
return self
2019-09-12 04:34:51 +00:00
def setHeadersUserAgent(self):
self._s.headers.update({"User-Agent": _set_user_agent()})
def getHeadersUserAgent(self):
return self._s.headers["User-Agent"]
def getHeadersReferer(self):
return self._s.headers["Referer"]
def setHeadersReferer(self, referer):
self._s.headers.update({"Referer": referer})
return self
2018-01-28 06:27:24 +00:00
@property
def cdn(self):
return self._cdn
@cdn.setter
def cdn(self, cdn):
self._cdn = cdn
def send(self, urls, data=None, **kwargs):
"""send request to url.If response 200,return response, else return None."""
allow_redirects = False
is_logger = urls.get("is_logger", False)
req_url = urls.get("req_url", "")
re_try = urls.get("re_try", 0)
s_time = urls.get("s_time", 0)
is_cdn = urls.get("is_cdn", False)
is_test_cdn = urls.get("is_test_cdn", False)
error_data = {"code": 99999, "message": u"重试次数达到上限"}
if data:
method = "post"
self.setHeaders({"Content-Length": "{0}".format(len(data))})
else:
method = "get"
self.resetHeaders()
2019-09-12 04:34:51 +00:00
if TickerConfig.RANDOM_AGENT is 1:
2019-09-15 00:35:31 +00:00
self.setHeadersUserAgent()
self.setHeadersReferer(urls["Referer"])
2018-01-24 14:35:24 +00:00
if is_logger:
logger.log(
2019-09-18 01:52:55 +00:00
u"url: {0}\n入参: {1}\n请求方式: {2}\n".format(req_url, data, method))
2018-08-31 16:24:40 +00:00
self.setHeadersHost(urls["Host"])
if is_test_cdn:
url_host = self._cdn
elif is_cdn:
if self._cdn:
2019-01-07 14:54:46 +00:00
# print(u"当前请求cdn为{}".format(self._cdn))
url_host = self._cdn
else:
url_host = urls["Host"]
2018-01-28 06:27:24 +00:00
else:
url_host = urls["Host"]
2019-01-23 04:28:41 +00:00
http = urls.get("httpType") or "https"
for i in range(re_try):
try:
2018-03-01 03:10:36 +00:00
# sleep(urls["s_time"]) if "s_time" in urls else sleep(0.001)
sleep(s_time)
2019-01-08 01:48:18 +00:00
try:
requests.packages.urllib3.disable_warnings()
except:
pass
response = self._s.request(method=method,
2019-09-18 01:52:55 +00:00
timeout=5,
2019-01-08 09:04:40 +00:00
proxies=self._proxies,
2019-01-23 04:28:41 +00:00
url=http + "://" + url_host + req_url,
data=data,
allow_redirects=allow_redirects,
2018-01-28 06:27:24 +00:00
verify=False,
**kwargs)
if response.status_code == 200 or response.status_code == 302:
2019-01-04 01:38:40 +00:00
if urls.get("not_decode", False):
return response.content
2018-01-24 04:56:18 +00:00
if response.content:
if is_logger:
logger.log(
2019-09-03 03:49:42 +00:00
u"出参:{0}".format(response.content.decode()))
2019-01-04 01:38:40 +00:00
if urls["is_json"]:
return json.loads(response.content.decode() if isinstance(response.content, bytes) else response.content)
else:
return response.content.decode("utf8", "ignore") if isinstance(response.content, bytes) else response.content
2018-01-24 04:56:18 +00:00
else:
print(f"url: {urls['req_url']}返回参数为空, 接口状态码: {response.status_code}")
2018-01-24 04:56:18 +00:00
logger.log(
2018-01-28 06:27:24 +00:00
u"url: {} 返回参数为空".format(urls["req_url"]))
2019-01-23 04:28:41 +00:00
continue
else:
2018-01-28 06:27:24 +00:00
sleep(urls["re_time"])
except (requests.exceptions.Timeout, requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
pass
except socket.error:
pass
2018-01-23 09:58:17 +00:00
return error_data