A script for doing all our http funcitons like building urls and calling requests.
This commit is contained in:
38
torrentSearch/http_utils.py
Normal file
38
torrentSearch/http_utils.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3.6
|
||||
|
||||
from urllib import parse, request
|
||||
from urllib.error import URLError
|
||||
import logging
|
||||
|
||||
def build_url(ssl, baseUrl, path, args_dict=[]):
|
||||
url_parts = list(parse.urlparse(baseUrl))
|
||||
url_parts[0] = 'https' if ssl else 'http'
|
||||
if type(path) is list:
|
||||
url_parts[2] = '/'.join(path)
|
||||
else:
|
||||
url_parts[2] = path
|
||||
url_parts[4] = parse.urlencode(args_dict)
|
||||
return parse.urlunparse(url_parts)
|
||||
|
||||
# Converts a input string or list to percent-encoded string,
|
||||
# this is for encoding information in a Uniform Resource
|
||||
# Identifier (URI) using urllib
|
||||
def convert_query_to_percent_encoded_octets(input_query):
|
||||
if type(input_query) is list:
|
||||
input_query = ' '.join(input_query)
|
||||
|
||||
return parse.quote(input_query)
|
||||
|
||||
def fetch_url(url):
|
||||
req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
|
||||
try:
|
||||
response = request.urlopen(req)
|
||||
except URLError as e:
|
||||
if hasattr(e, 'reason'):
|
||||
logging.error('We failed to reach a server with request: %s' % req.full_url)
|
||||
logging.error('Reason: %s' % e.reason)
|
||||
elif hasattr(e, 'code'):
|
||||
logging.error('The server couldn\'t fulfill the request.')
|
||||
logging.error('Error code: ', e.code)
|
||||
else:
|
||||
return response
|
||||
@@ -1,47 +0,0 @@
|
||||
#!/usr/bin/env python3.6
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author: KevinMidboe
|
||||
# @Date: 2017-11-01 15:57:23
|
||||
# @Last Modified by: KevinMidboe
|
||||
# @Last Modified time: 2017-11-01 16:11:32
|
||||
|
||||
def sanitize(string, ignore_characters=None, replace_characters=None):
|
||||
"""Sanitize a string to strip special characters.
|
||||
|
||||
:param str string: the string to sanitize.
|
||||
:param set ignore_characters: characters to ignore.
|
||||
:return: the sanitized string.
|
||||
:rtype: str
|
||||
|
||||
"""
|
||||
# only deal with strings
|
||||
if string is None:
|
||||
return
|
||||
|
||||
replace_characters = replace_characters or ''
|
||||
|
||||
ignore_characters = ignore_characters or set()
|
||||
|
||||
characters = ignore_characters
|
||||
if characters:
|
||||
string = re.sub(r'[%s]' % re.escape(''.join(characters)), replace_characters, string)
|
||||
|
||||
return string
|
||||
|
||||
def return_re_match(string, re_statement):
|
||||
if string is None:
|
||||
return
|
||||
|
||||
m = re.search(re_statement, string)
|
||||
if 'Y-day' in m.group():
|
||||
return datetime.datetime.now().strftime('%m-%d %Y')
|
||||
return sanitize(m.group(), '\xa0', ' ')
|
||||
|
||||
|
||||
# Can maybe be moved away from this class
|
||||
# returns a number that is either the value of multiple_pages
|
||||
# or if it exceeds total_pages, return total_pages.
|
||||
def pagesToCount(multiple, total):
|
||||
if (multiple > total):
|
||||
return total
|
||||
return multiple
|
||||
Reference in New Issue
Block a user