Added our scripts needed for pirate search, these are files that need to be imported from the currently in-dev project seasonedParser

2017-10-21 12:39:20 +02:00
parent 3c039447f5
commit 63ba10bc5a
5 changed files with 973 additions and 0 deletions
--- a/app/core.py
+++ b/app/core.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3.6
+# -*- coding: utf-8 -*-
+# @Author: KevinMidboe
+# @Date:   2017-08-25 23:22:27
+# @Last Modified by:   KevinMidboe
+# @Last Modified time: 2017-10-12 22:44:27
+
+from guessit import guessit
+import os, errno
+import logging
+import tvdb_api
+from pprint import pprint
+
+import env_variables as env
+
+from video import VIDEO_EXTENSIONS, Episode, Movie, Video
+from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path
+from utils import sanitize
+
+logging.basicConfig(filename=env.logfile, level=logging.INFO)
+
+from datetime import datetime
+
+#: Supported archive extensions
+ARCHIVE_EXTENSIONS = ('.rar',)
+
+def scan_video(path):
+    """Scan a video from a `path`.
+
+    :param str path: existing path to the video.
+    :return: the scanned video.
+    :rtype: :class:`~subliminal.video.Video`
+
+    """
+    # check for non-existing path
+    if not os.path.exists(path):
+        raise ValueError('Path does not exist')
+
+    # check video extension
+    # if not path.endswith(VIDEO_EXTENSIONS):
+    #     raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
+
+    dirpath, filename = os.path.split(path)
+    logging.info('Scanning video %r in %r', filename, dirpath)
+
+    # guess
+    parent_path = path.strip(filename)
+    # video = Video.fromguess(filename, parent_path, guessit(path))
+    video = Video(filename)
+    # guessit(path)
+
+    return video
+
+
+def scan_subtitle(path):
+   if not os.path.exists(path):
+      raise ValueError('Path does not exist')
+
+   dirpath, filename = os.path.split(path)
+   logging.info('Scanning subtitle %r in %r', filename, dirpath)
+
+   # guess
+   parent_path = path.strip(filename)
+   subtitle = Subtitle.fromguess(filename, parent_path, guessit(path))
+
+
+   return subtitle
+
+
+def scan_files(path, age=None, archives=True):
+    """Scan `path` for videos and their subtitles.
+
+    See :func:`refine` to find additional information for the video.
+
+    :param str path: existing directory path to scan.
+    :param datetime.timedelta age: maximum age of the video or archive.
+    :param bool archives: scan videos in archives.
+    :return: the scanned videos.
+    :rtype: list of :class:`~subliminal.video.Video`
+
+    """
+    # check for non-existing path
+    if not os.path.exists(path):
+        raise ValueError('Path does not exist')
+
+    # check for non-directory path
+    if not os.path.isdir(path):
+        raise ValueError('Path is not a directory')
+
+    name_dict = {}
+
+    # walk the path
+    mediafiles = []
+    for dirpath, dirnames, filenames in os.walk(path):
+        logging.debug('Walking directory %r', dirpath)
+
+        # remove badly encoded and hidden dirnames
+        for dirname in list(dirnames):
+            if dirname.startswith('.'):
+                logging.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
+                dirnames.remove(dirname)
+
+        # scan for videos
+        for filename in filenames:
+            # filter on videos and archives
+            if not (filename.endswith(VIDEO_EXTENSIONS) or filename.endswith(SUBTITLE_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
+                continue
+
+            # skip hidden files
+            if filename.startswith('.'):
+                logging.debug('Skipping hidden filename %r in %r', filename, dirpath)
+                continue
+
+            # reconstruct the file path
+            filepath = os.path.join(dirpath, filename)
+
+            # skip links
+            if os.path.islink(filepath):
+                logging.debug('Skipping link %r in %r', filename, dirpath)
+                continue
+
+            # skip old files 
+            if age and datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(filepath)) > age:
+                logging.debug('Skipping old file %r in %r', filename, dirpath)
+                continue
+
+            # scan
+            if filename.endswith(VIDEO_EXTENSIONS):  # video
+                try:
+                    video = scan_video(filepath)
+                    try:
+                        name_dict[video.series] += 1
+                    except KeyError:
+                        name_dict[video.series] = 0
+                    mediafiles.append(video)
+
+                except ValueError:  # pragma: no cover
+                    logging.exception('Error scanning video')
+                    continue
+            elif archives and filename.endswith(ARCHIVE_EXTENSIONS):  # archive
+                print('archive')
+                pass
+            #    try:
+            #        video = scan_archive(filepath)
+            #        mediafiles.append(video)
+            #    except (NotRarFile, RarCannotExec, ValueError):  # pragma: no cover
+            #        logging.exception('Error scanning archive')
+            #        continue
+            elif filename.endswith(SUBTITLE_EXTENSIONS): # subtitle
+                try:
+                    subtitle = scan_subtitle(filepath)
+                    mediafiles.append(subtitle)
+                except ValueError: 
+                    logging.exception('Error scanning subtitle')
+                    continue
+            else:  # pragma: no cover
+                raise ValueError('Unsupported file %r' % filename)
+
+
+    pprint(name_dict)
+    return mediafiles
+
+
+def organize_files(path):
+   hashList = {}
+   mediafiles = scan_files(path)
+   # print(mediafiles)
+
+   for file in mediafiles:
+        hashList.setdefault(file.__hash__(),[]).append(file)
+         # hashList[file.__hash__()] = file
+
+   return hashList
+
+
+def save_subtitles(files, single=False, directory=None, encoding=None):
+    t = tvdb_api.Tvdb()
+
+    if not isinstance(files, list):
+        files = [files]
+
+    for file in files:
+        # TODO this should not be done in the loop
+        dirname = "%s S%sE%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode))
+
+        createParentfolder = not dirname in file.parent_path
+        if createParentfolder:
+            dirname = os.path.join(file.parent_path, dirname)
+            print('Created: %s' % dirname)
+            try:
+                os.makedirs(dirname)
+            except OSError as e:
+                if e.errno != errno.EEXIST:
+                    raise
+
+        # TODO Clean this !
+        try:
+            tvdb_episode = t[file.series][file.season][file.episode]
+            episode_title = tvdb_episode['episodename']
+        except:
+            episode_title = ''
+
+        old = os.path.join(file.parent_path, file.name)
+
+        if file.name.endswith(SUBTITLE_EXTENSIONS):
+            lang = file.getLanguage()
+            sdh = '.sdh' if file.sdh else ''
+            filename = "%s S%sE%s %s%s.%s.%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode), episode_title, sdh, lang, file.container)
+        else:
+            filename = "%s S%sE%s %s.%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode), episode_title, file.container)
+
+        if createParentfolder:
+            newname = os.path.join(dirname, filename)
+        else:
+            newname = os.path.join(file.parent_path, filename)
+
+        
+        print('Moved: %s ---> %s' % (old, newname))
+        os.rename(old, newname)
+
+        print()
+
+
+    # for hash in files:
+    #   hashIndex = [files[hash]]
+    #   for hashItems in hashIndex:
+    #      for file in hashItems:
+    #         print(file.series)
+
+    # saved_subtitles = []
+    # for subtitle in files:
+    #     # check content
+    #     if subtitle.name is None:
+    #         logging.error('Skipping subtitle %r: no content', subtitle)
+    #         continue
+
+    #     # check language
+    #     if subtitle.language in set(s.language for s in saved_subtitles):
+    #         logging.debug('Skipping subtitle %r: language already saved', subtitle)
+    #         continue
+
+    #     # create subtitle path
+    #     subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language)
+    #     if directory is not None:
+    #         subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1])
+
+    #     # save content as is or in the specified encoding
+    #     logging.info('Saving %r to %r', subtitle, subtitle_path)
+    #     if encoding is None:
+    #         with io.open(subtitle_path, 'wb') as f:
+    #             f.write(subtitle.content)
+    #     else:
+    #         with io.open(subtitle_path, 'w', encoding=encoding) as f:
+    #             f.write(subtitle.text)
+    #     saved_subtitles.append(subtitle)
+
+    #     # check single
+    #     if single:
+    #         break
+
+    # return saved_subtitles
+
+def stringTime():
+    return str(datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f"))
+
+
+def main():
+    # episodePath = '/Volumes/media/tv/Black Mirror/Black Mirror Season 01/'
+    episodePath = '/Volumes/mainframe/shows/Black Mirror/Black Mirror Season 01/'
+
+    t = tvdb_api.Tvdb()
+
+    hashList = organize_files(episodePath)
+    pprint(hashList)
+
+
+
+if __name__ == '__main__':
+    main()
--- a/app/pirateSearch.py
+++ b/app/pirateSearch.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python3.6
+# -*- coding: utf-8 -*-
+# @Author: KevinMidboe
+# @Date:   2017-10-12 11:55:03
+# @Last Modified by:   KevinMidboe
+# @Last Modified time: 2017-10-21 12:25:09
+
+import sys, logging, re, json
+from urllib import parse, request
+from urllib.error import URLError
+from bs4 import BeautifulSoup
+
+import datetime
+from pprint import pprint
+
+from core import stringTime
+import env_variables as env
+logging.basicConfig(filename=env.logfile, level=logging.INFO)
+
+RELEASE_TYPES = ('bdremux', 'brremux', 'remux',
+	'bdrip', 'brrip', 'blu-ray', 'bluray', 'bdmv', 'bdr', 'bd5',
+	'web-cap', 'webcap', 'web cap',
+	'webrip', 'web rip', 'web-rip', 'web',
+	'webdl', 'web dl', 'web-dl', 'hdrip',
+	'dsr', 'dsrip', 'satrip', 'dthrip', 'dvbrip', 'hdtv', 'pdtv', 'tvrip', 'hdtvrip',
+	'dvdr', 'dvd-full', 'full-rip', 'iso',
+	'ts', 'hdts', 'hdts', 'telesync', 'pdvd', 'predvdrip',
+	'camrip', 'cam')
+
+
+def sanitize(string, ignore_characters=None, replace_characters=None):
+	"""Sanitize a string to strip special characters.
+
+	:param str string: the string to sanitize.
+	:param set ignore_characters: characters to ignore.
+	:return: the sanitized string.
+	:rtype: str
+
+	"""
+	# only deal with strings
+	if string is None:
+		return
+	
+	replace_characters = replace_characters or ''
+
+	ignore_characters = ignore_characters or set()
+
+	characters = ignore_characters
+	if characters:
+		string = re.sub(r'[%s]' % re.escape(''.join(characters)), replace_characters, string)
+
+	return string
+
+def return_re_match(string, re_statement):
+	if string is None:
+		return
+
+	m = re.search(re_statement, string)
+	if 'Y-day' in m.group():
+		return datetime.datetime.now().strftime('%m-%d %Y')
+	return sanitize(m.group(), '\xa0', ' ')
+
+
+# Can maybe be moved away from this class
+# returns a number that is either the value of multiple_pages
+# or if it exceeds total_pages, return total_pages.
+def pagesToCount(multiple, total):
+	if (multiple > total):
+		return total
+	return multiple
+
+# Should maybe not be able to set values without checking if they are valid?
+class piratebay(object):
+	def __init__(self, query=None, page=0, sort=None, category=None):
+		# This should be moved to a config file
+		self.url = 'https://thepiratebay.org/search'
+		self.sortTypes = {
+			'size': 5,
+			'seed_count': 99
+		}
+		self.categoryTypes = {
+			'movies': 207,
+			'porn_movies': 505,
+		}
+		# - - -
+
+		# Req params
+		self.query = query
+		self.page = page
+		self.sort = sort
+		self.category = category
+		self.total_pages = 0
+		self.headers = {'User-Agent': 'Mozilla/5.0'}
+		# self.headers = {}
+
+	def build_URL_request(self):
+		url = '/'.join([self.url, parse.quote(self.query), str(self.page), str(self.sort), str(self.category)])
+		return request.Request(url, headers=self.headers)
+
+	def next_page(self):
+		# If page exceeds the max_page, return None
+		# Can either save the last query/url in the object or have it passed 
+		# again on call to next_page
+
+		# Throw a error if it is not possible (overflow)
+		self.page += 1
+		raw_page = self.callPirateBaT()
+		return self.parse_raw_page_for_torrents(raw_page)
+
+	def set_total_pages(self, raw_page):
+		# body-id:searchResults-id:content-align:center
+		soup = BeautifulSoup(raw_page, 'html.parser')
+		content_searchResult = soup.body.find(id='SearchResults')
+		page_div = content_searchResult.find_next(attrs={"align": "center"})
+
+		last_page = 0
+		for page in page_div.find_all('a'):
+			last_page += 1
+
+		self.total_pages = last_page
+
+	def callPirateBaT(self):
+		req = self.build_URL_request()
+			
+		raw_page = self.fetchURL(req).read()
+		logging.info('Finished searching piratebay for query | %s' % stringTime())
+
+		if raw_page is None:
+			raise ValueError('Search result returned no content. Please check log for error reason.')
+
+		if self.total_pages is 0:
+			self.set_total_pages(raw_page)
+		
+		return raw_page
+
+
+	# Sets the search
+	def search(self, query, multiple_pages=1, page=0, sort=None, category=None):
+		# This should not be logged here, but in loop. Something else here maybe?
+		logging.info('Searching piratebay with query: %r, sort: %s and category: %s | %s' % 
+			(query, sort, category, stringTime()))
+		
+		if sort is not None and sort in self.sortTypes:
+			self.sort = self.sortTypes[sort]
+		else:
+			raise ValueError('Invalid sort category for piratebay search')
+
+		# Verify input? and reset total_pages
+		self.query = query
+
+		self.total_pages = 0
+		
+		if str(page).isnumeric() and type(page) == int and page >= 0:
+			self.page = page
+		
+		# TODO add category list
+		if category is not None and category in self.categoryTypes:
+			self.category = self.categoryTypes[category]
+
+		# TODO Pull most of this logic out bc it needs to also be done in next_page
+		
+		raw_page = self.callPirateBaT()
+		torrents_found = self.parse_raw_page_for_torrents(raw_page)
+		
+		# Fetch in parallel
+		n = pagesToCount(multiple_pages, self.total_pages)
+		while n > 1:
+			torrents_found.extend(self.next_page())
+			n -= 1
+
+		return torrents_found
+
+
+	def removeHeader(self, bs4_element):
+		if ('header' in bs4_element['class']):
+			return bs4_element.find_next('tr')
+
+		return bs4_element
+
+	def has_magnet(self, href):
+		return href and re.compile('magnet').search(href)
+
+	def parse_raw_page_for_torrents(self, content):
+		soup = BeautifulSoup(content, 'html.parser')
+		content_searchResult = soup.body.find(id='searchResult')
+
+		if content_searchResult is None:
+			logging.info('No torrents found for the search criteria.')
+			return None
+		
+		listElements = content_searchResult.tr
+		
+		torrentWrapper = self.removeHeader(listElements)
+
+		torrents_found = []
+		for torrentElement in torrentWrapper.find_all_next('td'):
+			if torrentElement.find_all("div", class_='detName'):
+
+				name = torrentElement.find('a', class_='detLink').get_text()
+				url = torrentElement.find('a', class_='detLink')['href']
+				magnet = torrentElement.find(href=self.has_magnet)
+				
+				uploader = torrentElement.find('a', class_='detDesc')				
+
+				if uploader is None:
+					uploader = torrentElement.find('i')
+					
+				uploader = uploader.get_text()
+
+				info_text = torrentElement.find('font', class_='detDesc').get_text()
+				
+				date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
+				size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
+
+				# COULD NOT FIND HREF!
+				if (magnet is None):
+					continue
+
+				seed_and_leech = torrentElement.find_all_next(attrs={"align": "right"})
+				seed = seed_and_leech[0].get_text()
+				leech = seed_and_leech[1].get_text()
+
+				torrent = Torrent(name, magnet['href'], size, uploader, date, seed, leech, url)
+
+				torrents_found.append(torrent)
+			else:
+				# print(torrentElement)
+				continue
+
+		logging.info('Found %s torrents for given search criteria.' % len(torrents_found))
+		return torrents_found
+
+		
+	def fetchURL(self, req):
+		try:
+		    response = request.urlopen(req)
+		except URLError as e:
+		    if hasattr(e, 'reason'):
+		        logging.error('We failed to reach a server with request: %s' % req.full_url)
+		        logging.error('Reason: %s' % e.reason)
+		    elif hasattr(e, 'code'):
+		        logging.error('The server couldn\'t fulfill the request.')
+		        logging.error('Error code: ', e.code)
+		else:
+		    return response
+
+
+class Torrent(object):
+	def __init__(self, name, magnet=None, size=None, uploader=None, date=None,
+		seed_count=None, leech_count=None, url=None):
+		self.name  = name 
+		self.magnet = magnet
+		self.size = size
+		self.uploader = uploader
+		self.date = date
+		self.seed_count = seed_count
+		self.leech_count = leech_count
+		self.url = url
+
+	def find_release_type(self):
+		name = self.name.casefold()
+		return [r_type for r_type in RELEASE_TYPES if r_type in name]
+
+	def get_all_attr(self):
+		return ({'name': self.name, 'magnet': self.magnet,'uploader': self.uploader,'size': self.size,'date': self.date,'seed': self.seed_count,'leech': self.leech_count,'url': self.url})
+
+	def __repr__(self):
+		return '<%s [%r]>' % (self.__class__.__name__, self.name)
+
+
+# This should be done front_end!
+# I.E. filtering like this should be done in another script
+# and should be done with the shared standard for types. 
+# PS: Is it the right move to use a shared standard? What
+# happens if it is no longer public?
+def chooseCandidate(torrent_list):
+	interesting_torrents = []
+	match_release_type = ['bdremux', 'brremux', 'remux', 'bdrip', 'brrip', 'blu-ray', 'bluray', 'bdmv', 'bdr', 'bd5']
+
+	for torrent in torrent_list:
+		intersecting_release_types = set(torrent.find_release_type()) & set(match_release_type)
+
+		size, _, size_id = torrent.size.partition(' ')
+		if intersecting_release_types and int(torrent.seed_count) > 0 and float(size) > 4 and size_id == 'GiB':
+			# print('{} : {} : {} {}'.format(torrent.name, torrent.size, torrent.seed_count, torrent.magnet))
+			interesting_torrents.append(torrent.get_all_attr())
+		# else:
+		# 	print('Denied match! %s : %s : %s' % (torrent.name, torrent.size, torrent.seed_count))
+
+	return interesting_torrents
+
+
+def searchTorrentSite(query, site='piratebay'):
+	pirate = piratebay()
+	torrents_found = pirate.search(query, page=0, multiple_pages=3, sort='size')
+	candidates = chooseCandidate(torrents_found)
+	print(json.dumps(candidates))
+
+	# torrents_found = pirate.next_page()
+	# pprint(torrents_found)
+	# candidates = chooseCandidate(torrents_found)
+
+	# Can autocall to next_page in a looped way to get more if nothing is found
+	# and there is more pages to be looked at
+	
+
+def main():
+	query = sys.argv[1]
+	searchTorrentSite(query)
+
+if __name__ == '__main__':
+	main()
--- a/app/subtitle.py
+++ b/app/subtitle.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import codecs
+import logging
+import os
+
+import chardet
+import hashlib
+
+from video import Episode, Movie
+from utils import sanitize
+
+from langdetect import detect
+
+
+logger = logging.getLogger(__name__)
+
+#: Subtitle extensions
+SUBTITLE_EXTENSIONS = ('.srt', '.sub')
+
+
+class Subtitle(object):
+    """Base class for subtitle.
+
+    :param language: language of the subtitle.
+    :type language: :class:`~babelfish.language.Language`
+    :param bool hearing_impaired: whether or not the subtitle is hearing impaired.
+    :param page_link: URL of the web page from which the subtitle can be downloaded.
+    :type page_link: str
+    :param encoding: Text encoding of the subtitle.
+    :type encoding: str
+
+    """
+    #: Name of the provider that returns that class of subtitle
+    provider_name = ''
+
+    def __init__(self, name, parent_path, series, season, episode, language=None, hash=None, container=None, format=None, sdh=False):
+        #: Language of the subtitle
+        
+        self.name = name
+
+        self.parent_path = parent_path
+        
+        self.series = series
+        
+        self.season = season
+
+        self.episode = episode
+
+        self.language=language
+
+        self.hash = hash
+
+        self.container = container
+        
+        self.format = format
+
+        self.sdh = sdh
+
+    @classmethod
+    def fromguess(cls, name, parent_path, guess):
+        if not (guess['type'] == 'movie' or guess['type'] == 'episode'):
+            raise ValueError('The guess must be an episode guess')
+
+        if 'title' not in guess:
+            raise ValueError('Insufficient data to process the guess')
+
+        sdh = 'sdh' in name.lower()
+
+        if guess['type'] is 'episode':
+            return cls(name, parent_path, guess.get('title', 1), guess.get('season'), guess['episode'],
+                container=guess.get('container'), format=guess.get('format'), sdh=sdh)
+        elif guess['type'] is 'movie':
+            return cls(name, parent_path, guess.get('title', 1), container=guess.get('container'), 
+                format=guess.get('format'), sdh=sdh)
+
+
+    def getLanguage(self):
+        f = open(os.path.join(self.parent_path, self.name), 'r', encoding='ISO-8859-15')
+        language = detect(f.read())
+        f.close()
+
+        return language
+
+    def __hash__(self):
+        return hashlib.md5("b'{}'".format(str(self.series) + str(self.season) + str(self.episode)).encode()).hexdigest()
+
+    def __repr__(self):
+        return '<%s %s [%sx%s]>' % (self.__class__.__name__, self.series, self.season, str(self.episode))
+
+
+
+def get_subtitle_path(subtitles_path, language=None, extension='.srt'):
+    """Get the subtitle path using the `subtitles_path` and `language`.
+
+    :param str subtitles_path: path to the subtitle.
+    :param language: language of the subtitle to put in the path.
+    :type language: :class:`~babelfish.language.Language`
+    :param str extension: extension of the subtitle.
+    :return: path of the subtitle.
+    :rtype: str
+
+    """
+    subtitle_root = os.path.splitext(subtitles_path)[0]
+
+    if language:
+        subtitle_root += '.' + str(language)
+
+    return subtitle_root + extension
+
+
+
--- a/app/utils.py
+++ b/app/utils.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+from datetime import datetime
+import hashlib
+import os
+import re
+import struct
+
+def sanitize(string, ignore_characters=None):
+    """Sanitize a string to strip special characters.
+
+    :param str string: the string to sanitize.
+    :param set ignore_characters: characters to ignore.
+    :return: the sanitized string.
+    :rtype: str
+
+    """
+    # only deal with strings
+    if string is None:
+        return
+
+    ignore_characters = ignore_characters or set()
+
+    # replace some characters with one space
+    # characters = {'-', ':', '(', ')', '.'} - ignore_characters
+    # if characters:
+    #     string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
+
+    # remove some characters
+    characters = {'\''} - ignore_characters
+    if characters:
+        string = re.sub(r'[%s]' % re.escape(''.join(characters)), '', string)
+
+    # replace multiple spaces with one
+    string = re.sub(r'\s+', ' ', string)
+
+    # strip and lower case
+    return string.strip().lower()
+
--- a/app/video.py
+++ b/app/video.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3.6
+# -*- coding: utf-8 -*-
+# @Author: KevinMidboe
+# @Date:   2017-08-26 08:23:18
+# @Last Modified by:   KevinMidboe
+# @Last Modified time: 2017-09-29 13:56:21
+
+from guessit import guessit
+import os
+import hashlib, tvdb_api
+
+#: Video extensions
+VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
+                    '.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
+                    '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
+                    '.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
+                    '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf',
+                    '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
+                    '.vob', '.vro', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
+
+class Video(object):
+    """Base class for videos.
+    Represent a video, existing or not.
+    :param str name: name or path of the video.
+    :param str format: format of the video (HDTV, WEB-DL, BluRay, ...).
+    :param str release_group: release group of the video.
+    :param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i).
+    :param str video_codec: codec of the video stream.
+    :param str audio_codec: codec of the main audio stream.
+    :param str imdb_id: IMDb id of the video.
+    :param dict hashes: hashes of the video file by provider names.
+    :param int size: size of the video file in bytes.
+    :param set subtitle_languages: existing subtitle languages.
+    """
+    def __init__(self, name, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
+                 imdb_id=None, hashes=None, size=None, subtitle_languages=None):
+        #: Name or path of the video
+        self.name = name
+
+        #: Format of the video (HDTV, WEB-DL, BluRay, ...)
+        self.format = format
+
+        #: Release group of the video
+        self.release_group = release_group
+
+        #: Resolution of the video stream (480p, 720p, 1080p or 1080i)
+        self.resolution = resolution
+
+        #: Codec of the video stream
+        self.video_codec = video_codec
+
+        #: Codec of the main audio stream
+        self.audio_codec = audio_codec
+
+        #: IMDb id of the video
+        self.imdb_id = imdb_id
+
+        #: Hashes of the video file by provider names
+        self.hashes = hashes or {}
+
+        #: Size of the video file in bytes
+        self.size = size
+
+        #: Existing subtitle languages
+        self.subtitle_languages = subtitle_languages or set()
+
+    @property
+    def exists(self):
+        """Test whether the video exists"""
+        return os.path.exists(self.name)
+
+    @property
+    def age(self):
+        """Age of the video"""
+        if self.exists:
+            return datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(self.name))
+
+        return timedelta()
+
+    @classmethod
+    def fromguess(cls, name, parent_path, guess):
+        """Create an :class:`Episode` or a :class:`Movie` with the given `name` based on the `guess`.
+        :param str name: name of the video.
+        :param dict guess: guessed data.
+        :raise: :class:`ValueError` if the `type` of the `guess` is invalid
+        """
+        if guess['type'] == 'episode':
+            return Episode.fromguess(name, parent_path, guess)
+
+        if guess['type'] == 'movie':
+            return Movie.fromguess(name, guess)
+
+        raise ValueError('The guess must be an episode or a movie guess')
+
+    @classmethod
+    def fromname(cls, name):
+        """Shortcut for :meth:`fromguess` with a `guess` guessed from the `name`.
+        :param str name: name of the video.
+        """
+        return cls.fromguess(name, guessit(name))
+
+    def __repr__(self):
+        return '<%s [%r]>' % (self.__class__.__name__, self.name)
+
+    def __hash__(self):
+        return hash(self.name)
+
+
+class Episode():
+    """Episode :class:`Video`.
+    :param str series: series of the episode.
+    :param int season: season number of the episode.
+    :param int episode: episode number of the episode.
+    :param str title: title of the episode.
+    :param int year: year of the series.
+    :param bool original_series: whether the series is the first with this name.
+    :param int tvdb_id: TVDB id of the episode.
+    :param \*\*kwargs: additional parameters for the :class:`Video` constructor.
+    """
+    def __init__(self, name, parent_path, series, season, episode, year=None, original_series=True, tvdb_id=None,
+                 series_tvdb_id=None, series_imdb_id=None, release_group=None, video_codec=None, container=None,
+                 format=None, screen_size=None, **kwargs):
+        super(Episode, self).__init__()
+
+        self.name = name
+
+        self.parent_path = parent_path
+
+        #: Series of the episode
+        self.series = series
+
+        #: Season number of the episode
+        self.season = season
+
+        #: Episode number of the episode
+        self.episode = episode
+
+        #: Year of series
+        self.year = year
+
+        #: The series is the first with this name
+        self.original_series = original_series
+
+        #: TVDB id of the episode
+        self.tvdb_id = tvdb_id
+
+        #: TVDB id of the series
+        self.series_tvdb_id = series_tvdb_id
+
+        #: IMDb id of the series
+        self.series_imdb_id = series_imdb_id
+
+        # The release group of the episode
+        self.release_group = release_group
+
+        # The video vodec of the series
+        self.video_codec = video_codec
+
+        # The Video container of the episode
+        self.container = container
+        
+        # The Video format of the episode
+        self.format = format
+
+        # The Video screen_size of the episode
+        self.screen_size = screen_size
+
+    @classmethod
+    def fromguess(cls, name, parent_path, guess):
+        if guess['type'] != 'episode':
+            raise ValueError('The guess must be an episode guess')
+
+        if 'title' not in guess or 'episode' not in guess:
+            raise ValueError('Insufficient data to process the guess')
+
+        return cls(name, parent_path, guess['title'], guess.get('season', 1), guess['episode'],
+                   year=guess.get('year'), original_series='year' not in guess, release_group=guess.get('release_group'), 
+                   video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'), container=guess.get('container'),
+                   format=guess.get('format'), screen_size=guess.get('screen_size'))
+
+    @classmethod
+    def fromname(cls, name):
+        return cls.fromguess(name, guessit(name, {'type': 'episode'}))
+
+    def __hash__(self):
+         return hashlib.md5("b'{}'".format(str(self.series) + str(self.season) + str(self.episode)).encode()).hexdigest()
+
+    # THE EP NUMBER IS CONVERTED TO STRING AS A QUICK FIX FOR MULTIPLE NUMBERS IN ONE
+    def __repr__(self):
+        if self.year is None:
+            return '<%s [%r, %sx%s]>' % (self.__class__.__name__, self.series, self.season, str(self.episode))
+
+        return '<%s [%r, %d, %sx%s]>' % (self.__class__.__name__, self.series, self.year, self.season, str(self.episode))
+
+		
+
+class Movie():
+    """Movie :class:`Video`.
+    :param str title: title of the movie.
+    :param int year: year of the movie.
+    :param \*\*kwargs: additional parameters for the :class:`Video` constructor.
+    """
+    def __init__(self, name, title, year=None, format=None, **kwargs):
+        super(Movie, self).__init__()
+
+        #: Title of the movie
+        self.title = title
+
+        #: Year of the movie
+        self.year = year
+        self.format = format
+
+    @classmethod
+    def fromguess(cls, name, guess):
+        if guess['type'] != 'movie':
+            raise ValueError('The guess must be a movie guess')
+
+        if 'title' not in guess:
+            raise ValueError('Insufficient data to process the guess')
+
+        return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
+                   resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
+                   audio_codec=guess.get('audio_codec'), year=guess.get('year'))
+
+    @classmethod
+    def fromname(cls, name):
+        return cls.fromguess(name, guessit(name, {'type': 'movie'}))
+
+    def __repr__(self):
+        if self.year is None:
+            return '<%s [%r]>' % (self.__class__.__name__, self.title)
+
+        return '<%s [%r, %d]>' % (self.__class__.__name__, self.title, self.year)