130 lines
5.3 KiB
Python
130 lines
5.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
# This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
|
|
# License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later
|
|
# version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with this program; if not, write to the Free
|
|
# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
#
|
|
# Author: Mauro Soria
|
|
|
|
|
|
import urllib.parse
|
|
import socket
|
|
import urllib.request, urllib.parse, urllib.error
|
|
from thirdparty.urllib3 import *
|
|
from thirdparty.urllib3.exceptions import *
|
|
from .Response import *
|
|
from .RequestException import *
|
|
|
|
|
|
class Requester(object):
|
|
|
|
headers = {
|
|
'User-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
|
|
'Accept-Language': 'en-us',
|
|
'Accept-Encoding': 'identity',
|
|
'Keep-Alive': '300',
|
|
'Connection': 'keep-alive',
|
|
'Cache-Control': 'max-age=0',
|
|
}
|
|
|
|
def __init__(self, url, cookie=None, useragent=None, maxPool=1, maxRetries=5, timeout=30, ip=None, proxy=None,
|
|
redirect=False):
|
|
# if no backslash, append one
|
|
if url[-1] is not '/':
|
|
url = url + '/'
|
|
parsed = urllib.parse.urlparse(url)
|
|
self.basePath = parsed.path
|
|
|
|
# if not protocol specified, set http by default
|
|
if parsed.scheme != 'http' and parsed.scheme != 'https':
|
|
parsed = urllib.parse.urlparse('http://' + url)
|
|
self.basePath = parsed.path
|
|
self.protocol = parsed.scheme
|
|
if self.protocol != 'http' and self.protocol != 'https':
|
|
self.protocol = 'http'
|
|
self.host = parsed.netloc.split(':')[0]
|
|
|
|
# resolve DNS to decrease proxychains overhead
|
|
if ip is not None:
|
|
self.ip = ip
|
|
else:
|
|
try:
|
|
self.ip = socket.gethostbyname(self.host)
|
|
except socket.gaierror:
|
|
raise RequestException({'message': "Couldn't resolve DNS"})
|
|
self.headers['Host'] = self.host
|
|
|
|
# If no port specified, set default (80, 443)
|
|
try:
|
|
self.port = parsed.netloc.split(':')[1]
|
|
except IndexError:
|
|
self.port = (443 if self.protocol == 'https' else 80)
|
|
|
|
# Set cookie and user-agent headers
|
|
if cookie != None:
|
|
self.setHeader('Cookie', cookie)
|
|
if useragent != None:
|
|
self.setHeader('User-agent', useragent)
|
|
self.maxRetries = maxRetries
|
|
self.maxPool = maxPool
|
|
self.timeout = timeout
|
|
self.pool = None
|
|
self.proxy = proxy
|
|
self.redirect = redirect
|
|
|
|
def setHeader(self, header, content):
|
|
self.headers[header] = content
|
|
|
|
@property
|
|
def connection(self):
|
|
if self.pool == None:
|
|
if self.proxy is not None:
|
|
return self.connectionWithProxy()
|
|
if self.protocol == 'https':
|
|
self.pool = HTTPSConnectionPool(self.ip, port=self.port, timeout=self.timeout, maxsize=self.maxPool,
|
|
block=True, cert_reqs='CERT_NONE', assert_hostname=False)
|
|
else:
|
|
self.pool = HTTPConnectionPool(self.ip, port=self.port, timeout=self.timeout, maxsize=self.maxPool,
|
|
block=True)
|
|
return self.pool
|
|
|
|
def connectionWithProxy(self):
|
|
if self.protocol == 'https':
|
|
self.pool = proxy_from_url(self.proxy, timeout=self.timeout, maxsize=self.maxPool, block=True,
|
|
cert_reqs='CERT_NONE', assert_hostname=False)
|
|
else:
|
|
self.pool = proxy_from_url(self.proxy, timeout=self.timeout, maxsize=self.maxPool, block=True)
|
|
return self.pool
|
|
|
|
def request(self, path, method='GET', params='', data=''):
|
|
i = 0
|
|
while i <= self.maxRetries:
|
|
try:
|
|
if self.proxy is None:
|
|
url = '{0}{1}?{2}'.format(self.basePath, path, params)
|
|
else:
|
|
url = '{5}://{3}:{4}{0}{1}?{2}'.format(self.basePath, path, params, self.host, self.port,
|
|
self.protocol)
|
|
response = self.connection.urlopen(method, url, headers=self.headers, redirect=self.redirect,
|
|
assert_same_host=False)
|
|
result = Response(response.status, response.reason, response.headers, response.data)
|
|
break
|
|
except ProxyError as e:
|
|
raise RequestException({'message': 'Error with the proxy: {0}'.format(e)})
|
|
except (MaxRetryError, ReadTimeoutError):
|
|
continue
|
|
except ConnectTimeoutError:
|
|
continue
|
|
finally:
|
|
i = i + 1
|
|
if i > self.maxRetries:
|
|
raise RequestException({'message': 'CONNECTION TIMEOUT: There was a problem in the request to: {0}'.format(path)})
|
|
return result
|
|
|
|
|