Renamed pirate_search to torrent_search
This commit is contained in:
13
torrentSearch/api_torznab.py
Normal file
13
torrentSearch/api_torznab.py
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
def build_api_url():
|
||||
base_url = app.config['JACKET_HOST']
|
||||
base_port = app.config['JACKET_PORT']
|
||||
|
||||
|
||||
def call_jackett_api():
|
||||
base_url = app.config['JACKET_HOST']
|
||||
base_port = app.config['JACKET_PORT']
|
||||
base_p
|
||||
|
||||
def handle_request_by_args(query, mode=None):
|
||||
apikey = api:
|
||||
9
torrentSearch/config.example.ini
Normal file
9
torrentSearch/config.example.ini
Normal file
@@ -0,0 +1,9 @@
|
||||
[DEFAULT]
|
||||
|
||||
|
||||
[JACKETT]
|
||||
JACKETT_HOST = '10.0.0.41'
|
||||
JACKETT_PORT = 9117
|
||||
JACKETT_MAX_SEARCH_RESULT = 1000
|
||||
JACKETT_SSL = False
|
||||
RESULTS_PER_PAGE = 75
|
||||
8
torrentSearch/config.example.py
Normal file
8
torrentSearch/config.example.py
Normal file
@@ -0,0 +1,8 @@
|
||||
[DEFAULT]
|
||||
|
||||
|
||||
[JACKETT]
|
||||
JACKETT_HOST = '10.0.0.41'
|
||||
JACKETT_PORT = '9117'
|
||||
JACKETT_MAX_SEARCH_RESULT = 1000
|
||||
RESULTS_PER_PAGE = 75
|
||||
13
torrentSearch/jackett.py
Normal file
13
torrentSearch/jackett.py
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
class Jackett(object):
|
||||
"""docstring for Jackett"""
|
||||
def __init__(self, url, query, apikey, limit):
|
||||
super(Jackett, self).__init__()
|
||||
self.url = url
|
||||
self.query = query
|
||||
self.apikey = apikey
|
||||
self.limit = limit
|
||||
|
||||
@classmethod
|
||||
def build_search_url(url, query, apikey, limit):
|
||||
return cls(url, query, apikey, limit)
|
||||
176
torrentSearch/piratebay.py
Normal file
176
torrentSearch/piratebay.py
Normal file
@@ -0,0 +1,176 @@
|
||||
# Should maybe not be able to set values without checking if they are valid?
|
||||
class piratebay(object):
|
||||
def __init__(self, query=None, page=0, sort=None, category=None):
|
||||
# This should be moved to a config file
|
||||
self.url = 'https://thepiratebay.org/search'
|
||||
self.sortTypes = {
|
||||
'size': 5,
|
||||
'seed_count': 99
|
||||
}
|
||||
self.categoryTypes = {
|
||||
'movies': 207,
|
||||
'porn_movies': 505,
|
||||
}
|
||||
# - - -
|
||||
|
||||
# Req params
|
||||
self.query = query
|
||||
self.page = page
|
||||
self.sort = sort
|
||||
self.category = category
|
||||
self.total_pages = 0
|
||||
self.headers = {'User-Agent': 'Mozilla/5.0'}
|
||||
# self.headers = {}
|
||||
|
||||
def build_URL_request(self):
|
||||
url = '/'.join([self.url, parse.quote(self.query), str(self.page), str(self.sort), str(self.category)])
|
||||
return request.Request(url, headers=self.headers)
|
||||
|
||||
def next_page(self):
|
||||
# If page exceeds the max_page, return None
|
||||
# Can either save the last query/url in the object or have it passed
|
||||
# again on call to next_page
|
||||
|
||||
# Throw a error if it is not possible (overflow)
|
||||
self.page += 1
|
||||
print(self.page)
|
||||
raw_page = self.callPirateBaT()
|
||||
return self.parse_raw_page_for_torrents(raw_page)
|
||||
|
||||
def set_total_pages(self, raw_page):
|
||||
# body-id:searchResults-id:content-align:center
|
||||
soup = BeautifulSoup(raw_page, 'html.parser')
|
||||
content_searchResult = soup.body.find(id='SearchResults')
|
||||
page_div = content_searchResult.find_next(attrs={"align": "center"})
|
||||
|
||||
last_page = 0
|
||||
for page in page_div.find_all('a'):
|
||||
last_page += 1
|
||||
|
||||
self.total_pages = last_page
|
||||
|
||||
def callPirateBaT(self):
|
||||
req = self.build_URL_request()
|
||||
|
||||
raw_page = self.fetchURL(req).read()
|
||||
logging.info('Finished searching piratebay for query | %s' % stringTime())
|
||||
|
||||
if raw_page is None:
|
||||
raise ValueError('Search result returned no content. Please check log for error reason.')
|
||||
|
||||
if self.total_pages is 0:
|
||||
self.set_total_pages(raw_page)
|
||||
|
||||
return raw_page
|
||||
|
||||
|
||||
# Sets the search
|
||||
def search(self, query, multiple_pages=1, page=0, sort=None, category=None):
|
||||
# This should not be logged here, but in loop. Something else here maybe?
|
||||
logging.info('Searching piratebay with query: %r, sort: %s and category: %s | %s' %
|
||||
(query, sort, category, stringTime()))
|
||||
|
||||
if sort is not None and sort in self.sortTypes:
|
||||
self.sort = self.sortTypes[sort]
|
||||
else:
|
||||
raise ValueError('Invalid sort category for piratebay search')
|
||||
|
||||
# Verify input? and reset total_pages
|
||||
self.query = query
|
||||
|
||||
self.total_pages = 0
|
||||
|
||||
if str(page).isnumeric() and type(page) == int and page >= 0:
|
||||
self.page = page
|
||||
|
||||
# TODO add category list
|
||||
if category is not None and category in self.categoryTypes:
|
||||
self.category = self.categoryTypes[category]
|
||||
|
||||
# TODO Pull most of this logic out bc it needs to also be done in next_page
|
||||
|
||||
raw_page = self.callPirateBaT()
|
||||
torrents_found = self.parse_raw_page_for_torrents(raw_page)
|
||||
print(self.page)
|
||||
|
||||
# Fetch in parallel
|
||||
n = pagesToCount(multiple_pages, self.total_pages)
|
||||
while n > 1:
|
||||
torrents_found.extend(self.next_page())
|
||||
n -= 1
|
||||
|
||||
return torrents_found
|
||||
|
||||
|
||||
def removeHeader(self, bs4_element):
|
||||
if ('header' in bs4_element['class']):
|
||||
return bs4_element.find_next('tr')
|
||||
|
||||
return bs4_element
|
||||
|
||||
def has_magnet(self, href):
|
||||
return href and re.compile('magnet').search(href)
|
||||
|
||||
def parse_raw_page_for_torrents(self, content):
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
content_searchResult = soup.body.find(id='searchResult')
|
||||
|
||||
if content_searchResult is None:
|
||||
logging.info('No torrents found for the search criteria.')
|
||||
return None
|
||||
|
||||
listElements = content_searchResult.tr
|
||||
|
||||
torrentWrapper = self.removeHeader(listElements)
|
||||
|
||||
torrents_found = []
|
||||
for torrentElement in torrentWrapper.find_all_next('td'):
|
||||
if torrentElement.find_all("div", class_='detName'):
|
||||
|
||||
name = torrentElement.find('a', class_='detLink').get_text()
|
||||
url = torrentElement.find('a', class_='detLink')['href']
|
||||
magnet = torrentElement.find(href=self.has_magnet)
|
||||
|
||||
uploader = torrentElement.find('a', class_='detDesc')
|
||||
|
||||
if uploader is None:
|
||||
uploader = torrentElement.find('i')
|
||||
|
||||
uploader = uploader.get_text()
|
||||
|
||||
info_text = torrentElement.find('font', class_='detDesc').get_text()
|
||||
|
||||
date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
|
||||
size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
|
||||
|
||||
# COULD NOT FIND HREF!
|
||||
if (magnet is None):
|
||||
continue
|
||||
|
||||
seed_and_leech = torrentElement.find_all_next(attrs={"align": "right"})
|
||||
seed = seed_and_leech[0].get_text()
|
||||
leech = seed_and_leech[1].get_text()
|
||||
|
||||
torrent = Torrent(name, magnet['href'], size, uploader, date, seed, leech, url)
|
||||
|
||||
torrents_found.append(torrent)
|
||||
else:
|
||||
# print(torrentElement)
|
||||
continue
|
||||
|
||||
logging.info('Found %s torrents for given search criteria.' % len(torrents_found))
|
||||
return torrents_found
|
||||
|
||||
|
||||
def fetchURL(self, req):
|
||||
try:
|
||||
response = request.urlopen(req)
|
||||
except URLError as e:
|
||||
if hasattr(e, 'reason'):
|
||||
logging.error('We failed to reach a server with request: %s' % req.full_url)
|
||||
logging.error('Reason: %s' % e.reason)
|
||||
elif hasattr(e, 'code'):
|
||||
logging.error('The server couldn\'t fulfill the request.')
|
||||
logging.error('Error code: ', e.code)
|
||||
else:
|
||||
return response
|
||||
81
torrentSearch/search.py
Executable file
81
torrentSearch/search.py
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3.6
|
||||
import configparser
|
||||
|
||||
def create_jackett_client():
|
||||
config = configparser.ConfigParser()
|
||||
config.read('config.example.ini')
|
||||
jackett_host = config['JACKETT']['JACKETT_HOST']
|
||||
jackett_port = config['JACKETT']['JACKETT_PORT']
|
||||
jackett_use_ssl = config['JACKETT']['JACKETT_SSL']
|
||||
|
||||
return [jackett_host, jackett_port, jackett_use_ssl]
|
||||
|
||||
def search(term='', user=None, sort='date', order='desc', category='0_0',
|
||||
quality_filter='0', page='1', per_page=75):
|
||||
query_args = {
|
||||
'term': term,
|
||||
'user': user,
|
||||
'sort': sort,
|
||||
'order': order,
|
||||
'category': category,
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
'max_search_results': app.config.get('MAX_SEARCH_RESULT', 1000)
|
||||
}
|
||||
|
||||
jackettResult = search_jackett(**query_args)
|
||||
return jackettResult
|
||||
|
||||
# This should be done front_end!
|
||||
# I.E. filtering like this should be done in another script
|
||||
# and should be done with the shared standard for types.
|
||||
# PS: Is it the right move to use a shared standard? What
|
||||
# happens if it is no longer public?
|
||||
def chooseCandidate(torrent_list):
|
||||
interesting_torrents = []
|
||||
match_release_type = ['bdremux', 'brremux', 'remux', 'bdrip', 'brrip', 'blu-ray', 'bluray', 'bdmv', 'bdr', 'bd5']
|
||||
|
||||
for torrent in torrent_list:
|
||||
intersecting_release_types = set(torrent.find_release_type()) & set(match_release_type)
|
||||
|
||||
size, _, size_id = torrent.size.partition(' ')
|
||||
if intersecting_release_types and int(torrent.seed_count) > 0 and float(size) > 4 and size_id == 'GiB':
|
||||
print('{} : {} : {} {}'.format(torrent.name, torrent.size, torrent.seed_count, torrent.magnet))
|
||||
interesting_torrents.append(torrent)
|
||||
# else:
|
||||
# print('Denied match! %s : %s : %s' % (torrent.name, torrent.size, torrent.seed_count))
|
||||
|
||||
return interesting_torrents
|
||||
|
||||
|
||||
def searchTorrentSite(query, site='piratebay'):
|
||||
if site is 'piratebay':
|
||||
pirate = piratebay()
|
||||
torrents_found = pirate.search(query, page=0, multiple_pages=5, sort='size')
|
||||
elif site is 'jackett':
|
||||
jackett = jackett()
|
||||
torrents_found = pirate.search(query)
|
||||
|
||||
pprint(torrents_found)
|
||||
candidates = chooseCandidate(torrents_found)
|
||||
pprint(candidates)
|
||||
exit(0)
|
||||
torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size', category='movies')
|
||||
movie_candidates = chooseCandidate(torrents_found)
|
||||
|
||||
print('Length full: {}'.format(len(candidates)))
|
||||
print('Length movies: {}'.format(len(movie_candidates)))
|
||||
# torrents_found = pirate.next_page()
|
||||
# pprint(torrents_found)
|
||||
# candidates = chooseCandidate(torrents_found)
|
||||
|
||||
# Can autocall to next_page in a looped way to get more if nothing is found
|
||||
# and there is more pages to be looked at
|
||||
|
||||
|
||||
def main():
|
||||
query = sys.argv[1]
|
||||
searchTorrentSite(query)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
0
torrentSearch/utils.py
Normal file
0
torrentSearch/utils.py
Normal file
Reference in New Issue
Block a user