Added docstring to all functions. 🎉

This commit is contained in:
2017-12-22 12:52:38 +01:00
parent 6bdf904a40
commit 98c05a380d
4 changed files with 195 additions and 160 deletions

View File

@@ -8,26 +8,41 @@ from urllib.error import URLError
logger = logging.getLogger('torrentSearch')
def build_url(ssl, baseUrl, path, args_dict=[]):
def build_url(ssl, baseUrl, path, args_dict={}):
"""
Given the parameters joins them together to a url to
:param bool ssl: if ssl is to be used or not
:param str baseUrl: the start of the url (http://thepiratebay.org)
:param list path: the rest of the path to the url (['search', 'lucifer', '0'])
:param dict args_dict: a dict with the query element we want to append to the url
:return: complete url based on the inputs
:rtype: str
"""
url_parts = list(parse.urlparse(baseUrl))
url_parts[0] = 'https' if ssl else 'http'
if type(path) is list:
url_parts[2] = '/'.join(path)
else:
url_parts[2] = path
url_parts[4] = parse.urlencode(args_dict)
return parse.urlunparse(url_parts)
# Converts a input string or list to percent-encoded string,
# this is for encoding information in a Uniform Resource
# Identifier (URI) using urllib
def convert_query_to_percent_encoded_octets(input_query):
"""
Converts a string with spaces to a string separated by '%20'
:param str input_query:
:return: string with spaces replaced with '%20' if found any
:rtype: str
"""
if type(input_query) is list:
input_query = ' '.join(input_query)
return parse.quote(input_query)
def fetch_url(url):
"""
Call and get output for a given url
:param str url: the url we want to make a request to
:return: a response object with contents and status code of the request
:rtype: http.client.HTTPResponse
"""
logger.debug('Fetching query: {}'.format(url))
req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
try:

View File

@@ -21,27 +21,25 @@ class Jackett(object):
self.page_limit = limit
self.ssl = ssl
# Returns the api key set in the initiator
# return [string]
def get_apikey(self):
logger.debug('Using api key: {}'.format(self.apikey))
return self.apikey
# Returns the path set in the initiator
# return [string]
def get_path(self):
return self.path
# Returns the page_limit set in the initiator
# return [string]
def get_page_limit(self):
logger.debug('Current page limit: {} pages'.format(self.page_limit))
return self.page_limit
# Starts the call to getting result from our indexer
# query [string]
# returns [List of Torrent objects]
def search(self, query):
"""
Starts the call to getting result from our indexer
:param jackett.Jackett self: object instance
:param str query: query we want to search for
:return: list of results we found from scraping jackett output based on query
:rtype: list
"""
baseUrl = 'http://' + self.host
path = self.get_path()
url_args = {
@@ -57,10 +55,15 @@ class Jackett(object):
return self.parse_xml_for_torrents(res.read())
# def __init__(self, name, magnet=None, size=None, uploader=None, date=None,
# seed_count=None, leech_count=None, url=None):
def find_xml_attribute(self, xml_element, attr):
"""
Finds a specific XML attribute given a element name
:param jackett.Jackett self: object instance
:param xml.etree.ElementTree.Element xml_element: the xml tree we want to search
:param str attr: the attribute/element name we want to find in the xml tree
:return: the value of the element fiven the attr/element name
:rtype: str
"""
value = xml_element.find(attr)
if (value != None):
logger.debug('Found attribute: {}'.format(attr))
@@ -70,6 +73,13 @@ class Jackett(object):
return ''
def parse_xml_for_torrents(self, raw_xml):
"""
Finds a specific XML attribute given a element name
:param jackett.Jackett self: object instance
:param bytes raw_xml: the xml page returned by querying jackett
:return: all the torrents we found in the xml page
:rtype: list
"""
tree = ET.fromstring(raw_xml)
channel = tree.find('channel')
results = []

View File

@@ -21,20 +21,20 @@ class Piratebay(object):
self.page = 0
self.total_pages = -1
# Returns the path set in the initiator
# return [string]
def get_path(self):
return self.path
# Returns the page_limit set in the initiator
# return [string]
def get_page_limit(self):
return self.page_limit
# Starts the call to getting result from our indexer
# query [string]
# returns [List of Torrent objects]
def search(self, query):
"""
Starts the call to getting result from our thepiratebay site
:param piratebay.Piratebay self: object instance
:param str query: query we want to search for
:return: list of results we found from scraping thepiratebay site based on query
:rtype: list
"""
search_query = convert_query_to_percent_encoded_octets(query)
baseUrl = 'http://' + self.host
@@ -45,7 +45,6 @@ class Piratebay(object):
return self.parse_raw_page_for_torrents(res.read())
def removeHeader(self, bs4_element):
if ('header' in bs4_element['class']):
return bs4_element.find_next('tr')

View File

@@ -79,7 +79,9 @@ def main():
def getConfig():
"""
Read path and get configuartion file with site settings
Returns config [configparser]
:return: config settings read from 'config.ini'
:rtype: configparser.ConfigParser
"""
config = configparser.ConfigParser()
config_dir = os.path.join(BASE_DIR, 'config.ini')
@@ -90,7 +92,10 @@ def getConfig():
def createJSONList(torrents):
"""
Iterates over all torrent objects in torrents and gets all attributes which are appended to a list
Returns: List of torrents with all their info in a JSON format
:param list torrents: integer of size of torrent file
:return: List of torrents with all their info in a JSON format
:rtype: str
"""
jsonList = []
for torrent in torrents:
@@ -129,7 +134,13 @@ def chooseCandidate(torrent_list):
def searchTorrentSite(config, query, site, print_result):
"""
Selects site based on input and finds torrents for that site based on query
Returns json list with results. If print_results is True in args then also prints the output to terminal
:param configparser.ConfigParser config: integer of size of torrent filest
:param str query: query to search search torrents for
:param str site: the site we want to index/scrape
:param boolean print_result: if the in results should be printed to terminal
:return: json list with results
:rtype: str
"""
logger.debug('Searching for query {} at {}'.format(query, site))