Added docstring to all functions. 🎉

This commit is contained in:
2017-12-22 12:52:38 +01:00
parent 6bdf904a40
commit 98c05a380d
4 changed files with 195 additions and 160 deletions

View File

@@ -8,26 +8,41 @@ from urllib.error import URLError
logger = logging.getLogger('torrentSearch') logger = logging.getLogger('torrentSearch')
def build_url(ssl, baseUrl, path, args_dict=[]): def build_url(ssl, baseUrl, path, args_dict={}):
"""
Given the parameters joins them together to a url to
:param bool ssl: if ssl is to be used or not
:param str baseUrl: the start of the url (http://thepiratebay.org)
:param list path: the rest of the path to the url (['search', 'lucifer', '0'])
:param dict args_dict: a dict with the query element we want to append to the url
:return: complete url based on the inputs
:rtype: str
"""
url_parts = list(parse.urlparse(baseUrl)) url_parts = list(parse.urlparse(baseUrl))
url_parts[0] = 'https' if ssl else 'http' url_parts[0] = 'https' if ssl else 'http'
if type(path) is list:
url_parts[2] = '/'.join(path) url_parts[2] = '/'.join(path)
else:
url_parts[2] = path
url_parts[4] = parse.urlencode(args_dict) url_parts[4] = parse.urlencode(args_dict)
return parse.urlunparse(url_parts) return parse.urlunparse(url_parts)
# Converts a input string or list to percent-encoded string,
# this is for encoding information in a Uniform Resource
# Identifier (URI) using urllib
def convert_query_to_percent_encoded_octets(input_query): def convert_query_to_percent_encoded_octets(input_query):
"""
Converts a string with spaces to a string separated by '%20'
:param str input_query:
:return: string with spaces replaced with '%20' if found any
:rtype: str
"""
if type(input_query) is list: if type(input_query) is list:
input_query = ' '.join(input_query) input_query = ' '.join(input_query)
return parse.quote(input_query) return parse.quote(input_query)
def fetch_url(url): def fetch_url(url):
"""
Call and get output for a given url
:param str url: the url we want to make a request to
:return: a response object with contents and status code of the request
:rtype: http.client.HTTPResponse
"""
logger.debug('Fetching query: {}'.format(url)) logger.debug('Fetching query: {}'.format(url))
req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
try: try:

View File

@@ -21,27 +21,25 @@ class Jackett(object):
self.page_limit = limit self.page_limit = limit
self.ssl = ssl self.ssl = ssl
# Returns the api key set in the initiator
# return [string]
def get_apikey(self): def get_apikey(self):
logger.debug('Using api key: {}'.format(self.apikey)) logger.debug('Using api key: {}'.format(self.apikey))
return self.apikey return self.apikey
# Returns the path set in the initiator
# return [string]
def get_path(self): def get_path(self):
return self.path return self.path
# Returns the page_limit set in the initiator
# return [string]
def get_page_limit(self): def get_page_limit(self):
logger.debug('Current page limit: {} pages'.format(self.page_limit)) logger.debug('Current page limit: {} pages'.format(self.page_limit))
return self.page_limit return self.page_limit
# Starts the call to getting result from our indexer
# query [string]
# returns [List of Torrent objects]
def search(self, query): def search(self, query):
"""
Starts the call to getting result from our indexer
:param jackett.Jackett self: object instance
:param str query: query we want to search for
:return: list of results we found from scraping jackett output based on query
:rtype: list
"""
baseUrl = 'http://' + self.host baseUrl = 'http://' + self.host
path = self.get_path() path = self.get_path()
url_args = { url_args = {
@@ -57,10 +55,15 @@ class Jackett(object):
return self.parse_xml_for_torrents(res.read()) return self.parse_xml_for_torrents(res.read())
# def __init__(self, name, magnet=None, size=None, uploader=None, date=None,
# seed_count=None, leech_count=None, url=None):
def find_xml_attribute(self, xml_element, attr): def find_xml_attribute(self, xml_element, attr):
"""
Finds a specific XML attribute given a element name
:param jackett.Jackett self: object instance
:param xml.etree.ElementTree.Element xml_element: the xml tree we want to search
:param str attr: the attribute/element name we want to find in the xml tree
:return: the value of the element fiven the attr/element name
:rtype: str
"""
value = xml_element.find(attr) value = xml_element.find(attr)
if (value != None): if (value != None):
logger.debug('Found attribute: {}'.format(attr)) logger.debug('Found attribute: {}'.format(attr))
@@ -70,6 +73,13 @@ class Jackett(object):
return '' return ''
def parse_xml_for_torrents(self, raw_xml): def parse_xml_for_torrents(self, raw_xml):
"""
Finds a specific XML attribute given a element name
:param jackett.Jackett self: object instance
:param bytes raw_xml: the xml page returned by querying jackett
:return: all the torrents we found in the xml page
:rtype: list
"""
tree = ET.fromstring(raw_xml) tree = ET.fromstring(raw_xml)
channel = tree.find('channel') channel = tree.find('channel')
results = [] results = []

View File

@@ -21,20 +21,20 @@ class Piratebay(object):
self.page = 0 self.page = 0
self.total_pages = -1 self.total_pages = -1
# Returns the path set in the initiator
# return [string]
def get_path(self): def get_path(self):
return self.path return self.path
# Returns the page_limit set in the initiator
# return [string]
def get_page_limit(self): def get_page_limit(self):
return self.page_limit return self.page_limit
# Starts the call to getting result from our indexer
# query [string]
# returns [List of Torrent objects]
def search(self, query): def search(self, query):
"""
Starts the call to getting result from our thepiratebay site
:param piratebay.Piratebay self: object instance
:param str query: query we want to search for
:return: list of results we found from scraping thepiratebay site based on query
:rtype: list
"""
search_query = convert_query_to_percent_encoded_octets(query) search_query = convert_query_to_percent_encoded_octets(query)
baseUrl = 'http://' + self.host baseUrl = 'http://' + self.host
@@ -45,7 +45,6 @@ class Piratebay(object):
return self.parse_raw_page_for_torrents(res.read()) return self.parse_raw_page_for_torrents(res.read())
def removeHeader(self, bs4_element): def removeHeader(self, bs4_element):
if ('header' in bs4_element['class']): if ('header' in bs4_element['class']):
return bs4_element.find_next('tr') return bs4_element.find_next('tr')

View File

@@ -79,7 +79,9 @@ def main():
def getConfig(): def getConfig():
""" """
Read path and get configuartion file with site settings Read path and get configuartion file with site settings
Returns config [configparser]
:return: config settings read from 'config.ini'
:rtype: configparser.ConfigParser
""" """
config = configparser.ConfigParser() config = configparser.ConfigParser()
config_dir = os.path.join(BASE_DIR, 'config.ini') config_dir = os.path.join(BASE_DIR, 'config.ini')
@@ -90,7 +92,10 @@ def getConfig():
def createJSONList(torrents): def createJSONList(torrents):
""" """
Iterates over all torrent objects in torrents and gets all attributes which are appended to a list Iterates over all torrent objects in torrents and gets all attributes which are appended to a list
Returns: List of torrents with all their info in a JSON format
:param list torrents: integer of size of torrent file
:return: List of torrents with all their info in a JSON format
:rtype: str
""" """
jsonList = [] jsonList = []
for torrent in torrents: for torrent in torrents:
@@ -129,7 +134,13 @@ def chooseCandidate(torrent_list):
def searchTorrentSite(config, query, site, print_result): def searchTorrentSite(config, query, site, print_result):
""" """
Selects site based on input and finds torrents for that site based on query Selects site based on input and finds torrents for that site based on query
Returns json list with results. If print_results is True in args then also prints the output to terminal
:param configparser.ConfigParser config: integer of size of torrent filest
:param str query: query to search search torrents for
:param str site: the site we want to index/scrape
:param boolean print_result: if the in results should be printed to terminal
:return: json list with results
:rtype: str
""" """
logger.debug('Searching for query {} at {}'.format(query, site)) logger.debug('Searching for query {} at {}'.format(query, site))