Renamed pirate_search to torrent_search

2017-11-01 16:09:06 +01:00
parent 736fefbcad
commit c1c1ca3ad4
10 changed files with 44 additions and 6 deletions
--- a/torrentSearch/api_torznab.py
+++ b/torrentSearch/api_torznab.py
@@ -0,0 +1,13 @@
+
+def build_api_url():
+    base_url = app.config['JACKET_HOST']
+    base_port = app.config['JACKET_PORT']
+    
+
+def call_jackett_api():
+    base_url = app.config['JACKET_HOST']
+    base_port = app.config['JACKET_PORT']
+    base_p
+
+def handle_request_by_args(query, mode=None):
+    apikey = api:
--- a/torrentSearch/config.example.ini
+++ b/torrentSearch/config.example.ini
@@ -0,0 +1,9 @@
+[DEFAULT]
+
+
+[JACKETT]
+JACKETT_HOST = '10.0.0.41'
+JACKETT_PORT = 9117
+JACKETT_MAX_SEARCH_RESULT = 1000
+JACKETT_SSL = False
+RESULTS_PER_PAGE = 75
--- a/torrentSearch/config.example.py
+++ b/torrentSearch/config.example.py
@@ -0,0 +1,8 @@
+[DEFAULT]
+
+
+[JACKETT]
+JACKETT_HOST = '10.0.0.41'
+JACKETT_PORT = '9117'
+JACKETT_MAX_SEARCH_RESULT = 1000
+RESULTS_PER_PAGE = 75
--- a/torrentSearch/jackett.py
+++ b/torrentSearch/jackett.py
@@ -0,0 +1,13 @@
+
+class Jackett(object):
+	"""docstring for Jackett"""
+	def __init__(self, url, query, apikey, limit):
+		super(Jackett, self).__init__()
+		self.url = url
+		self.query = query
+		self.apikey = apikey
+		self.limit = limit
+
+	@classmethod
+	def build_search_url(url, query, apikey, limit):
+        return cls(url, query, apikey, limit)
--- a/torrentSearch/piratebay.py
+++ b/torrentSearch/piratebay.py
@@ -0,0 +1,176 @@
+# Should maybe not be able to set values without checking if they are valid?
+class piratebay(object):
+	def __init__(self, query=None, page=0, sort=None, category=None):
+		# This should be moved to a config file
+		self.url = 'https://thepiratebay.org/search'
+		self.sortTypes = {
+			'size': 5,
+			'seed_count': 99
+		}
+		self.categoryTypes = {
+			'movies': 207,
+			'porn_movies': 505,
+		}
+		# - - -
+
+		# Req params
+		self.query = query
+		self.page = page
+		self.sort = sort
+		self.category = category
+		self.total_pages = 0
+		self.headers = {'User-Agent': 'Mozilla/5.0'}
+		# self.headers = {}
+
+	def build_URL_request(self):
+		url = '/'.join([self.url, parse.quote(self.query), str(self.page), str(self.sort), str(self.category)])
+		return request.Request(url, headers=self.headers)
+
+	def next_page(self):
+		# If page exceeds the max_page, return None
+		# Can either save the last query/url in the object or have it passed 
+		# again on call to next_page
+
+		# Throw a error if it is not possible (overflow)
+		self.page += 1
+		print(self.page)
+		raw_page = self.callPirateBaT()
+		return self.parse_raw_page_for_torrents(raw_page)
+
+	def set_total_pages(self, raw_page):
+		# body-id:searchResults-id:content-align:center
+		soup = BeautifulSoup(raw_page, 'html.parser')
+		content_searchResult = soup.body.find(id='SearchResults')
+		page_div = content_searchResult.find_next(attrs={"align": "center"})
+
+		last_page = 0
+		for page in page_div.find_all('a'):
+			last_page += 1
+
+		self.total_pages = last_page
+
+	def callPirateBaT(self):
+		req = self.build_URL_request()
+			
+		raw_page = self.fetchURL(req).read()
+		logging.info('Finished searching piratebay for query | %s' % stringTime())
+
+		if raw_page is None:
+			raise ValueError('Search result returned no content. Please check log for error reason.')
+
+		if self.total_pages is 0:
+			self.set_total_pages(raw_page)
+		
+		return raw_page
+
+
+	# Sets the search
+	def search(self, query, multiple_pages=1, page=0, sort=None, category=None):
+		# This should not be logged here, but in loop. Something else here maybe?
+		logging.info('Searching piratebay with query: %r, sort: %s and category: %s | %s' % 
+			(query, sort, category, stringTime()))
+		
+		if sort is not None and sort in self.sortTypes:
+			self.sort = self.sortTypes[sort]
+		else:
+			raise ValueError('Invalid sort category for piratebay search')
+
+		# Verify input? and reset total_pages
+		self.query = query
+
+		self.total_pages = 0
+		
+		if str(page).isnumeric() and type(page) == int and page >= 0:
+			self.page = page
+		
+		# TODO add category list
+		if category is not None and category in self.categoryTypes:
+			self.category = self.categoryTypes[category]
+
+		# TODO Pull most of this logic out bc it needs to also be done in next_page
+		
+		raw_page = self.callPirateBaT()
+		torrents_found = self.parse_raw_page_for_torrents(raw_page)
+		print(self.page)
+		
+		# Fetch in parallel
+		n = pagesToCount(multiple_pages, self.total_pages)
+		while n > 1:
+			torrents_found.extend(self.next_page())
+			n -= 1
+
+		return torrents_found
+
+
+	def removeHeader(self, bs4_element):
+		if ('header' in bs4_element['class']):
+			return bs4_element.find_next('tr')
+
+		return bs4_element
+
+	def has_magnet(self, href):
+		return href and re.compile('magnet').search(href)
+
+	def parse_raw_page_for_torrents(self, content):
+		soup = BeautifulSoup(content, 'html.parser')
+		content_searchResult = soup.body.find(id='searchResult')
+
+		if content_searchResult is None:
+			logging.info('No torrents found for the search criteria.')
+			return None
+		
+		listElements = content_searchResult.tr
+		
+		torrentWrapper = self.removeHeader(listElements)
+
+		torrents_found = []
+		for torrentElement in torrentWrapper.find_all_next('td'):
+			if torrentElement.find_all("div", class_='detName'):
+
+				name = torrentElement.find('a', class_='detLink').get_text()
+				url = torrentElement.find('a', class_='detLink')['href']
+				magnet = torrentElement.find(href=self.has_magnet)
+				
+				uploader = torrentElement.find('a', class_='detDesc')				
+
+				if uploader is None:
+					uploader = torrentElement.find('i')
+					
+				uploader = uploader.get_text()
+
+				info_text = torrentElement.find('font', class_='detDesc').get_text()
+				
+				date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
+				size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
+
+				# COULD NOT FIND HREF!
+				if (magnet is None):
+					continue
+
+				seed_and_leech = torrentElement.find_all_next(attrs={"align": "right"})
+				seed = seed_and_leech[0].get_text()
+				leech = seed_and_leech[1].get_text()
+
+				torrent = Torrent(name, magnet['href'], size, uploader, date, seed, leech, url)
+
+				torrents_found.append(torrent)
+			else:
+				# print(torrentElement)
+				continue
+
+		logging.info('Found %s torrents for given search criteria.' % len(torrents_found))
+		return torrents_found
+
+		
+	def fetchURL(self, req):
+		try:
+		    response = request.urlopen(req)
+		except URLError as e:
+		    if hasattr(e, 'reason'):
+		        logging.error('We failed to reach a server with request: %s' % req.full_url)
+		        logging.error('Reason: %s' % e.reason)
+		    elif hasattr(e, 'code'):
+		        logging.error('The server couldn\'t fulfill the request.')
+		        logging.error('Error code: ', e.code)
+		else:
+		    return response
--- a/torrentSearch/search.py
+++ b/torrentSearch/search.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3.6
+import configparser
+
+def create_jackett_client():
+	config = configparser.ConfigParser()
+	config.read('config.example.ini')
+	jackett_host = config['JACKETT']['JACKETT_HOST']
+	jackett_port = config['JACKETT']['JACKETT_PORT']
+	jackett_use_ssl = config['JACKETT']['JACKETT_SSL']
+
+	return [jackett_host, jackett_port, jackett_use_ssl]
+
+def search(term='', user=None, sort='date', order='desc', category='0_0',
+		quality_filter='0', page='1', per_page=75):
+    query_args = {
+        'term': term,
+        'user': user,
+        'sort': sort,
+        'order': order,
+        'category': category,
+        'page': page,
+        'per_page': per_page,
+        'max_search_results': app.config.get('MAX_SEARCH_RESULT', 1000)
+    }
+
+    jackettResult = search_jackett(**query_args)
+    return jackettResult
+
+# This should be done front_end!
+# I.E. filtering like this should be done in another script
+# and should be done with the shared standard for types. 
+# PS: Is it the right move to use a shared standard? What
+# happens if it is no longer public?
+def chooseCandidate(torrent_list):
+	interesting_torrents = []
+	match_release_type = ['bdremux', 'brremux', 'remux', 'bdrip', 'brrip', 'blu-ray', 'bluray', 'bdmv', 'bdr', 'bd5']
+
+	for torrent in torrent_list:
+		intersecting_release_types = set(torrent.find_release_type()) & set(match_release_type)
+
+		size, _, size_id = torrent.size.partition(' ')
+		if intersecting_release_types and int(torrent.seed_count) > 0 and float(size) > 4 and size_id == 'GiB':
+			print('{} : {} : {} {}'.format(torrent.name, torrent.size, torrent.seed_count, torrent.magnet))
+			interesting_torrents.append(torrent)
+		# else:
+		# 	print('Denied match! %s : %s : %s' % (torrent.name, torrent.size, torrent.seed_count))
+
+	return interesting_torrents
+
+
+def searchTorrentSite(query, site='piratebay'):
+	if site is 'piratebay':
+		pirate = piratebay()
+		torrents_found = pirate.search(query, page=0, multiple_pages=5, sort='size')
+	elif site is 'jackett':
+		jackett = jackett()
+		torrents_found = pirate.search(query)
+
+	pprint(torrents_found)
+	candidates = chooseCandidate(torrents_found)
+	pprint(candidates)
+	exit(0)
+	torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size', category='movies')
+	movie_candidates = chooseCandidate(torrents_found)
+
+	print('Length full: {}'.format(len(candidates)))
+	print('Length movies: {}'.format(len(movie_candidates)))
+	# torrents_found = pirate.next_page()
+	# pprint(torrents_found)
+	# candidates = chooseCandidate(torrents_found)
+
+	# Can autocall to next_page in a looped way to get more if nothing is found
+	# and there is more pages to be looked at
+	
+
+def main():
+	query = sys.argv[1]
+	searchTorrentSite(query)
+
+if __name__ == '__main__':
+	main()
--- a/torrentSearch/utils.py
+++ b/torrentSearch/utils.py