Restructured project to better run as a package.

This should have been setup correctly, but now try to better follow
structure for python modules, which this is supposed to be.
- Renamed folder from src -> seasonedParser
- Moved test/ into seasonedParser/
- test has __init__.py script which sets location to the project folder
(seasonedParser/).
- Removed cli.py and moved contents to __main__.py
- Updated drone to run pytest without test folder parameter
This commit is contained in:
KevinMidboe
2020-02-18 23:44:16 +01:00
parent 90eff227a0
commit eb13e34e66
23 changed files with 58 additions and 23 deletions

View File

@@ -0,0 +1,7 @@
print('hello from init')
import sys,os
# sys.path.append(os.path.join(os.path.dirname(__file__),os.pardir))
# from
from .__version__ import __version__

View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
"""
Entry point module
"""
import click
from guessit import guessit
import logging
from core import scan_folder
from video import Video
from exceptions import InsufficientNameError
import env_variables as env
logging.basicConfig(filename=env.logfile, level=logging.INFO)
logger = logging.getLogger('seasonedParser')
fh = logging.FileHandler(env.logfile)
fh.setLevel(logging.INFO)
sh = logging.StreamHandler()
sh.setLevel(logging.WARNING)
fh_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
sh_formatter = logging.Formatter('%(levelname)s: %(message)s')
fh.setFormatter(fh_formatter)
sh.setFormatter(sh_formatter)
logger.addHandler(fh)
logger.addHandler(sh)
def tweet(video):
pass
def prompt(name):
manual_name = input("Insufficient name: '{}'\nInput name manually: ".format(name))
if manual_name == 'q':
raise KeyboardInterrupt
if manual_name == 's':
return None
return manual_name
def _moveHome(file):
print('- - -\nMatch: \t\t {}. \nDestination:\t {}'.format(file, file.wantedFilePath()))
logger.info('- - -\nMatch: \t\t {}. \nDestination:\t {}'.format(file, file.wantedFilePath()))
@click.command()
@click.argument('path')
@click.option('--daemon', '-d', is_flag=True)
@click.option('--dry', is_flag=True)
def main(path, daemon, dry):
if dry:
def moveHome(file): _moveHome(file)
else:
from core import moveHome
videos, insufficient_name = scan_folder(path)
for video in videos:
moveHome(video)
if len(insufficient_name) and daemon:
logger.warning('Daemon flag set. Insufficient name for: %r', insufficient_name)
exit(0)
while len(insufficient_name) >= 1:
for i, file in enumerate(insufficient_name):
try:
manual_name = prompt(file)
if manual_name is None:
del insufficient_name[i]
continue
video = Video.fromguess(file, guessit(manual_name))
moveHome(video)
del insufficient_name[i]
except KeyboardInterrupt:
# Logger: Received interrupt, exiting parser.
# should the class objects be deleted ?
print('Interrupt detected. Exiting')
exit(0)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Version module
"""
__version__ = '0.2.0'

330
seasonedParser/core.py Executable file
View File

@@ -0,0 +1,330 @@
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-08-25 23:22:27
# @Last Modified by: KevinMidboe
# @Last Modified time: 2019-02-02 01:04:25
from guessit import guessit
from babelfish import Language, LanguageReverseError
import hashlib
import os, errno
import shutil
import re
import tvdb_api
import click
from pprint import pprint
from titlecase import titlecase
import langdetect
import env_variables as env
from exceptions import InsufficientNameError
import logging
logger = logging.getLogger('seasonedParser')
from video import VIDEO_EXTENSIONS, Episode, Movie, Video
from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path
from utils import sanitize, refine
def search_external_subtitles(path, directory=None):
dirpath, filename = os.path.split(path)
dirpath = dirpath or '.'
fileroot, fileext = os.path.splitext(filename)
subtitles = {}
for p in os.listdir(directory or dirpath):
if not p.endswith(SUBTITLE_EXTENSIONS):
continue
language = Language('und')
language_code = p[len(fileroot):-len(os.path.splitext(p)[1])].replace(fileext, '').replace('_','-')[1:]
if language_code:
try:
language = Language.fromietf(language_code)
except (ValueError, LanguageReverseError):
logger.error('Cannot parse language code %r', language_code)
f = open(os.path.join(dirpath, p), 'r', encoding='ISO-8859-15')
pattern = re.compile('[0-9:\,-<>]+')
# head = list(islice(f.read(), 10))
filecontent = pattern.sub('', f.read())
filecontent = filecontent[0:1000]
language = langdetect.detect(filecontent)
f.close()
subtitles[os.path.join(dirpath, p)] = language
logger.debug('Found subtitles %r', subtitles)
return subtitles
def find_file_size(video):
return os.path.getsize(video.name)
def scan_video(path):
"""Scan a video from a `path`.
:param str path: existing path to the video.
:return: the scanned video.
:rtype: :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check video extension
if not path.endswith(VIDEO_EXTENSIONS):
raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
dirpath, filename = os.path.split(path)
logger.info('Scanning video %r in %r', filename, dirpath)
# guess
video = Video.fromguess(path, guessit(filename))
video.subtitles |= set(search_external_subtitles(video.name))
refine(video)
# hash of name
# if isinstance(video, Movie):
# if type(video.title) is str and type(video.year) is int:
# home_path = '{} ({})'.format(video.title, video.year)
# hash_str = ''.join([video.title, str(video.year) or ''])
# elif isinstance(video, Episode):
# if type(video.series) is str and type(video.season) is int and type(video.episode) is int:
# home_path = '{} ({})'.format(video.title, video.year)
# hash_str = ''.join([video.series, str(video.season), str(video.episode)])
# video.hash = hashlib.md5(hash_str.encode()).hexdigest()
# except:
# print(video)
return video
def scan_subtitle(path):
if not os.path.exists(path):
raise ValueError('Path does not exist')
dirpath, filename = os.path.split(path)
logger.info('Scanning subtitle %r in %r', filename, dirpath)
# guess
parent_path = path.strip(filename)
subtitle = Subtitle.fromguess(parent_path, guessit(path))
return subtitle
def subtitle_path(sibling, subtitle):
parent_path = os.path.dirname(sibling)
return os.path.join(parent_path, os.path.basename(subtitle))
def scan_videos(path):
"""Scan `path` for videos and their subtitles.
See :func:`refine` to find additional information for the video.
:param str path: existing directory path to scan.
:return: the scanned videos.
:rtype: list of :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check for non-directory path
if not os.path.isdir(path):
raise ValueError('Path is not a directory')
# setup progress bar
path_children = 0
for _ in os.walk(path): path_children += 1
with click.progressbar(length=path_children, show_pos=True, label='Collecting videos') as bar:
# walk the path
videos = []
insufficient_name = []
errors_path = []
for dirpath, dirnames, filenames in os.walk(path):
logger.debug('Walking directory %r', dirpath)
# remove badly encoded and hidden dirnames
for dirname in list(dirnames):
if dirname.startswith('.'):
logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# scan for videos
for filename in filenames:
if not (filename.endswith(VIDEO_EXTENSIONS)):
logger.debug('Skipping non-video file %s', filename)
continue
# skip hidden files
if filename.startswith('.'):
logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
continue
# reconstruct the file path
filepath = os.path.join(dirpath, filename)
if os.path.islink(filepath):
logger.debug('Skipping link %r in %r', filename, dirpath)
continue
# scan
if filename.endswith(VIDEO_EXTENSIONS): # video
try:
video = scan_video(filepath)
except InsufficientNameError as e:
logger.info(e)
insufficient_name.append(filepath)
continue
except ValueError: # pragma: no cover
logger.exception('Error scanning video')
errors_path.append(filepath)
continue
else: # pragma: no cover
raise ValueError('Unsupported file %r' % filename)
videos.append(video)
bar.update(1)
return videos, insufficient_name, errors_path
def organize_files(path):
hashList = {}
mediafiles = scan_files(path)
# print(mediafiles)
for file in mediafiles:
hashList.setdefault(file.__hash__(),[]).append(file)
# hashList[file.__hash__()] = file
return hashList
def save_subtitles(files, single=False, directory=None, encoding=None):
t = tvdb_api.Tvdb()
if not isinstance(files, list):
files = [files]
for file in files:
# TODO this should not be done in the loop
dirname = "%s S%sE%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode))
createParentfolder = not dirname in file.parent_path
if createParentfolder:
dirname = os.path.join(file.parent_path, dirname)
print('Created: %s' % dirname)
try:
os.makedirs(dirname)
except OSError as e:
if e.errno != errno.EEXIST:
raise
# TODO Clean this !
try:
tvdb_episode = t[file.series][file.season][file.episode]
episode_title = tvdb_episode['episodename']
except:
episode_title = ''
old = os.path.join(file.parent_path, file.name)
if file.name.endswith(SUBTITLE_EXTENSIONS):
lang = file.getLanguage()
sdh = '.sdh' if file.sdh else ''
filename = "%s S%sE%s %s%s.%s.%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode), episode_title, sdh, lang, file.container)
else:
filename = "%s S%sE%s %s.%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode), episode_title, file.container)
if createParentfolder:
newname = os.path.join(dirname, filename)
else:
newname = os.path.join(file.parent_path, filename)
print('Moved: %s ---> %s' % (old, newname))
os.rename(old, newname)
def scan_folder(path):
videos = []
insufficient_name = []
errored_paths = []
logger.debug('Collecting path %s', path)
# non-existing
if not os.path.exists(path):
errored_paths.append(path)
logger.exception("The path '{}' does not exist".format(path))
# file
# if path is a file
if os.path.isfile(path):
logger.info('Path is a file')
try:
video = scan_video(path)
videos.append(video)
except InsufficientNameError as e:
logger.info(e)
insufficient_name.append(path)
# directories
if os.path.isdir(path):
logger.info('Path is a directory')
scanned_videos = []
try:
videos, insufficient_name, errored_paths = scan_videos(path)
except:
logger.exception('Unexpected error while collecting directory path %s', path)
errored_paths.append(path)
click.echo('%s video%s collected / %s file%s with insufficient name / %s error%s' % (
click.style(str(len(videos)), bold=True, fg='green' if videos else None),
's' if len(videos) > 1 else '',
click.style(str(len(insufficient_name)), bold=True, fg='yellow' if insufficient_name else None),
's' if len(insufficient_name) > 1 else '',
click.style(str(len(errored_paths)), bold=True, fg='red' if errored_paths else None),
's' if len(errored_paths) > 1 else '',
))
return videos, insufficient_name
def pickforgirlscouts(video):
if video.sufficientInfo():
video.moveLocation()
return True
return False
def moveHome(video):
wantedFilePath = video.wantedFilePath()
dir = os.path.dirname(wantedFilePath)
if not os.path.exists(dir):
logger.info('Creating directory {}'.format(dir))
os.makedirs(dir)
logger.info("Moving video file from: '{}' to: '{}'".format(video.name, wantedFilePath))
shutil.move(video.name, wantedFilePath)
for sub in video.subtitles:
if not os.path.isfile(sub):
continue
oldpath = sub
newpath = subtitle_path(wantedFilePath, sub)
logger.info("Moving subtitle file from: '{}' to: '{}'".format(oldpath, newpath))
shutil.move(oldpath, newpath)
# Give feedback before delete ?
def empthDirectory(paths):
pass

View File

@@ -0,0 +1,3 @@
logfile = 'conf/output.log'
MOVIEBASE = '/mnt/mainframe/movies'
SHOWBASE = '/mnt/mainframe/shows'

View File

@@ -0,0 +1,5 @@
#!/usr/bin/env python3.6
class InsufficientNameError(Exception):
pass

17
seasonedParser/logger.py Normal file
View File

@@ -0,0 +1,17 @@
import logging
import env_variables as env
logging.basicConfig(filename=env.logfile, level=logging.INFO)
logger = logging.getLogger('seasonedParser')
fh = logging.FileHandler(env.logfile)
fh.setLevel(logging.INFO)
sh = logging.StreamHandler()
sh.setLevel(logging.WARNING)
fh_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
sh_formatter = logging.Formatter('%(levelname)s: %(message)s')
fh.setFormatter(fh_formatter)
sh.setFormatter(sh_formatter)
logger.addHandler(fh)
logger.addHandler(sh)

279
seasonedParser/pirateSearch.py Executable file
View File

@@ -0,0 +1,279 @@
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-10-12 11:55:03
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-11-01 16:11:30
import sys, logging, re
from urllib import parse, request
from urllib.error import URLError
from bs4 import BeautifulSoup
import datetime
from pprint import pprint
from core import stringTime
import env_variables as env
logging.basicConfig(filename=env.logfile, level=logging.INFO)
RELEASE_TYPES = ('bdremux', 'brremux', 'remux',
'bdrip', 'brrip', 'blu-ray', 'bluray', 'bdmv', 'bdr', 'bd5',
'web-cap', 'webcap', 'web cap',
'webrip', 'web rip', 'web-rip', 'web',
'webdl', 'web dl', 'web-dl', 'hdrip',
'dsr', 'dsrip', 'satrip', 'dthrip', 'dvbrip', 'hdtv', 'pdtv', 'tvrip', 'hdtvrip',
'dvdr', 'dvd-full', 'full-rip', 'iso',
'ts', 'hdts', 'hdts', 'telesync', 'pdvd', 'predvdrip',
'camrip', 'cam')
# Should maybe not be able to set values without checking if they are valid?
class piratebay(object):
def __init__(self, query=None, page=0, sort=None, category=None):
# This should be moved to a config file
self.url = 'https://thepiratebay.org/search'
self.sortTypes = {
'size': 5,
'seed_count': 99
}
self.categoryTypes = {
'movies': 207,
'porn_movies': 505,
}
# - - -
# Req params
self.query = query
self.page = page
self.sort = sort
self.category = category
self.total_pages = 0
self.headers = {'User-Agent': 'Mozilla/5.0'}
# self.headers = {}
def build_URL_request(self):
url = '/'.join([self.url, parse.quote(self.query), str(self.page), str(self.sort), str(self.category)])
return request.Request(url, headers=self.headers)
def next_page(self):
# If page exceeds the max_page, return None
# Can either save the last query/url in the object or have it passed
# again on call to next_page
# Throw a error if it is not possible (overflow)
self.page += 1
print(self.page)
raw_page = self.callPirateBaT()
return self.parse_raw_page_for_torrents(raw_page)
def set_total_pages(self, raw_page):
# body-id:searchResults-id:content-align:center
soup = BeautifulSoup(raw_page, 'html.parser')
content_searchResult = soup.body.find(id='SearchResults')
page_div = content_searchResult.find_next(attrs={"align": "center"})
last_page = 0
for page in page_div.find_all('a'):
last_page += 1
self.total_pages = last_page
def callPirateBaT(self):
req = self.build_URL_request()
raw_page = self.fetchURL(req).read()
logging.info('Finished searching piratebay for query | %s' % stringTime())
if raw_page is None:
raise ValueError('Search result returned no content. Please check log for error reason.')
if self.total_pages is 0:
self.set_total_pages(raw_page)
return raw_page
# Sets the search
def search(self, query, multiple_pages=1, page=0, sort=None, category=None):
# This should not be logged here, but in loop. Something else here maybe?
logging.info('Searching piratebay with query: %r, sort: %s and category: %s | %s' %
(query, sort, category, stringTime()))
if sort is not None and sort in self.sortTypes:
self.sort = self.sortTypes[sort]
else:
raise ValueError('Invalid sort category for piratebay search')
# Verify input? and reset total_pages
self.query = query
self.total_pages = 0
if str(page).isnumeric() and type(page) == int and page >= 0:
self.page = page
# TODO add category list
if category is not None and category in self.categoryTypes:
self.category = self.categoryTypes[category]
# TODO Pull most of this logic out bc it needs to also be done in next_page
raw_page = self.callPirateBaT()
torrents_found = self.parse_raw_page_for_torrents(raw_page)
print(self.page)
# Fetch in parallel
n = pagesToCount(multiple_pages, self.total_pages)
while n > 1:
torrents_found.extend(self.next_page())
n -= 1
return torrents_found
def removeHeader(self, bs4_element):
if ('header' in bs4_element['class']):
return bs4_element.find_next('tr')
return bs4_element
def has_magnet(self, href):
return href and re.compile('magnet').search(href)
def parse_raw_page_for_torrents(self, content):
soup = BeautifulSoup(content, 'html.parser')
content_searchResult = soup.body.find(id='searchResult')
if content_searchResult is None:
logging.info('No torrents found for the search criteria.')
return None
listElements = content_searchResult.tr
torrentWrapper = self.removeHeader(listElements)
torrents_found = []
for torrentElement in torrentWrapper.find_all_next('td'):
if torrentElement.find_all("div", class_='detName'):
name = torrentElement.find('a', class_='detLink').get_text()
url = torrentElement.find('a', class_='detLink')['href']
magnet = torrentElement.find(href=self.has_magnet)
uploader = torrentElement.find('a', class_='detDesc')
if uploader is None:
uploader = torrentElement.find('i')
uploader = uploader.get_text()
info_text = torrentElement.find('font', class_='detDesc').get_text()
date = return_re_match(info_text, r"(\d+\-\d+\s\d+)|(Y\-day\s\d{2}\:\d{2})")
size = return_re_match(info_text, r"(\d+(\.\d+)?\s[a-zA-Z]+)")
# COULD NOT FIND HREF!
if (magnet is None):
continue
seed_and_leech = torrentElement.find_all_next(attrs={"align": "right"})
seed = seed_and_leech[0].get_text()
leech = seed_and_leech[1].get_text()
torrent = Torrent(name, magnet['href'], size, uploader, date, seed, leech, url)
torrents_found.append(torrent)
else:
# print(torrentElement)
continue
logging.info('Found %s torrents for given search criteria.' % len(torrents_found))
return torrents_found
def fetchURL(self, req):
try:
response = request.urlopen(req)
except URLError as e:
if hasattr(e, 'reason'):
logging.error('We failed to reach a server with request: %s' % req.full_url)
logging.error('Reason: %s' % e.reason)
elif hasattr(e, 'code'):
logging.error('The server couldn\'t fulfill the request.')
logging.error('Error code: ', e.code)
else:
return response
class Torrent(object):
def __init__(self, name, magnet=None, size=None, uploader=None, date=None,
seed_count=None, leech_count=None, url=None):
self.name = name
self.magnet = magnet
self.size = size
self.uploader = uploader
self.date = date
self.seed_count = seed_count
self.leech_count = leech_count
self.url = url
def find_release_type(self):
name = self.name.casefold()
return [r_type for r_type in RELEASE_TYPES if r_type in name]
def get_all_attr(self):
return ({'name': self.name, 'magnet': self.magnet,'uploader': self.uploader,'size': self.size,'date': self.date,'seed': self.seed_count,'leech': self.leech_count,'url': self.url})
def __repr__(self):
return '<%s [%r]>' % (self.__class__.__name__, self.name)
# This should be done front_end!
# I.E. filtering like this should be done in another script
# and should be done with the shared standard for types.
# PS: Is it the right move to use a shared standard? What
# happens if it is no longer public?
def chooseCandidate(torrent_list):
interesting_torrents = []
match_release_type = ['bdremux', 'brremux', 'remux', 'bdrip', 'brrip', 'blu-ray', 'bluray', 'bdmv', 'bdr', 'bd5']
for torrent in torrent_list:
intersecting_release_types = set(torrent.find_release_type()) & set(match_release_type)
size, _, size_id = torrent.size.partition(' ')
if intersecting_release_types and int(torrent.seed_count) > 0 and float(size) > 4 and size_id == 'GiB':
print('{} : {} : {} {}'.format(torrent.name, torrent.size, torrent.seed_count, torrent.magnet))
interesting_torrents.append(torrent)
# else:
# print('Denied match! %s : %s : %s' % (torrent.name, torrent.size, torrent.seed_count))
return interesting_torrents
def searchTorrentSite(query, site='piratebay'):
pirate = piratebay()
torrents_found = pirate.search(query, page=0, multiple_pages=5, sort='size')
pprint(torrents_found)
candidates = chooseCandidate(torrents_found)
pprint(candidates)
exit(0)
torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size', category='movies')
movie_candidates = chooseCandidate(torrents_found)
print('Length full: {}'.format(len(candidates)))
print('Length movies: {}'.format(len(movie_candidates)))
# torrents_found = pirate.next_page()
# pprint(torrents_found)
# candidates = chooseCandidate(torrents_found)
# Can autocall to next_page in a looped way to get more if nothing is found
# and there is more pages to be looked at
def main():
query = sys.argv[1]
searchTorrentSite(query)
if __name__ == '__main__':
main()

175
seasonedParser/scandir.py Executable file
View File

@@ -0,0 +1,175 @@
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-10-02 16:29:25
# @Last Modified by: KevinMidboe
# @Last Modified time: 2018-01-15 17:18:36
try:
from os import scandir
except ImportError:
from scandir import scandir # use scandir PyPI module on Python < 3.5
import env_variables as env
import multiprocessing as mp
import logging, re, datetime
from guessit import guessit
from video import VIDEO_EXTENSIONS, Episode, Movie, Video
from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path
logging.basicConfig(filename=env.logfile, level=logging.INFO)
""" Move to utils file """
def removeLeadingZero(number):
stringedNumber = str(number)
if (len(stringedNumber) > 1 and stringedNumber[0] == '0'):
return int(stringedNumber[1:])
return int(number)
class movie(object):
def __init__(self, path, title=None, year=None):
self.path = path
self.title = title
self.year = year
class Episode(object):
def __init__(self, path, name, title=None, season=None, episode=None):
super(Episode, self).__init__()
self.path = path
self.name = name
self.title = title
self.season = season
self.episode = episode
@classmethod
def fromname(cls, path, name):
title = cls.findTitle(cls, name)
season = cls.findSeasonNumber(cls, name)
episode = cls.findEpisodeNumber(cls, name)
return cls(path, name, title, season, episode)
def findTitle(self, name):
m = re.search("([a-zA-Z0-9\'\.\-\ ])+([sS][0-9]{1,3})", name)
if m:
return re.sub('[\ \.]*[sS][0-9]{1,2}', '', m.group(0))
def findSeasonNumber(self, name):
m = re.search('[sS][0-9]{1,2}', name)
if m:
seasonNumber = re.sub('[sS]', '', m.group(0))
return removeLeadingZero(seasonNumber)
def findEpisodeNumber(self, name):
m = re.search('[eE][0-9]{1,3}', name)
if m:
episodeNumber = re.sub('[eE]', '', m.group(0))
return removeLeadingZero(episodeNumber)
def get_tree_size(path):
"""Return total size of files in given path and subdirs."""
total = 0
for entry in scandir(path):
if not ('.DS_Store' in entry.path or 'lost+found' in entry.path):
if entry.is_dir(follow_symlinks=False):
total += get_tree_size(entry.path)
else:
total += entry.stat(follow_symlinks=False).st_size
return int(total)
def scantree(path):
"""Recursively yield DirEntry objects for given directory."""
for entry in scandir(path):
# Skip .DS_Store and lost+found
# TODO have a blacklist here
if not ('.DS_Store' in entry.path or 'lost+found' in entry.path):
if entry.is_dir(follow_symlinks=False):
yield from scantree(entry.path)
else:
yield entry
# Find all the mediaobjects for a given path
# TODO handle list of path's
def get_objects_for_path(path, archives=None, match=False):
# Declare list to save the media objects found in the given path
hashList = {}
mediaFiles = []
# All entries given from scantree functoin
for entry in scantree(path):
logging.debug('Looking at file %s', str(entry.name))
name = entry.name # Pull out name for faster index
# Skip if not corrent media extension
if not (name.endswith(VIDEO_EXTENSIONS) or name.endswith(SUBTITLE_EXTENSIONS) or archives and name.endswith(ARCHIVE_EXTENSIONS)):
continue
# Skip if the file is a dotfile
if name.startswith('.'):
logging.debug('Skipping hidden file %s' % str(name))
continue
# If we have a video, create a class and append to mediaFiles
if name.endswith(VIDEO_EXTENSIONS): # video
episode = Episode.fromname(entry.path, entry.name)
if (episode.title is None):
logging.debug('None found for %s' % name)
continue
title = re.sub('[\.]', ' ', episode.title)
mediaFiles.append(episode)
return mediaFiles
if __name__ == '__main__':
logging.info('Started: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
import sys
from pprint import pprint
total = 0
missed = 0
# print(get_tree_size(sys.argv[1] if len(sys.argv) > 1 else '.'))
# print(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
path = sys.argv[1] if len(sys.argv) > 1 else '.'
mediaFiles = get_objects_for_path(path)
getTitle = lambda ep: ep.title
for ep in mediaFiles:
print(getTitle(ep))
mediaList = []
for entry in scantree(sys.argv[1] if len(sys.argv) > 1 else '.'):
name = entry.name
manual = Episode.fromname(entry.path, entry.name)
size = int(entry.stat(follow_symlinks=False).st_size) / 1024 / 1024 / 1024
# print(name + ' : ' + str(round(size, 2)) + 'GB')
title = manual.title
if title is None:
logging.debug('None found for %s' % (name))
continue
title = re.sub('[\.]', ' ', manual.title)
# try:
# print(name + ' : ' + "%s S%iE%i" % (str(title), manual.season, manual.episode))
# except TypeError:
# logging.error('Unexpected error: ' + name)
mediaList.append(manual)
if ('-m' in sys.argv):
guess = guessit(name)
logging.info('Manual is: {} and guess is {}'.format(title, guess['title']))
# # if not (guess['season'] == manual.season and guess['episode'] == manual.episode):
if (guess['title'].lower() != title.lower()):
logging.info('Missmatch: %s by manual guess: %s : %s' % (name, guess['title'], title))
missed += 1
total += 1
print('Total: %i, missed was: %i' % (total, missed))
logging.info('Ended: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
logging.info(' - - - - - - - - - ')

29
seasonedParser/seasonGuesser.py Executable file
View File

@@ -0,0 +1,29 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-08-25 23:22:27
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-09-09 14:59:48
from core import organize_files, save_subtitles
from pprint import pprint
import os
def main():
# path = '/Volumes/media/tv/Black Mirror/Black Mirror Season 01/'
path = '/Volumes/media-1/tv/Community.720p.1080p.WEB-DL.DD5.1.H.264/S04/'
# path = '/Volumes/media/tv/Fargo/Fargo Season 03/'
# path = '/Volumes/media/tv/Rick and Morty/Rick and Morty Season 03/'
# path = '/Volumes/media/movies/Interstellar.2014.1080p.BluRay.REMUX.AVC.DTS-HD.MA.5.1'
os.listdir(path)
files = organize_files(path)
for hashkey in files:
# save_subtitles(files[hashkey], directory=path)
print(files[hashkey], path)
if __name__ == '__main__':
main()

87
seasonedParser/seasonMover.py Executable file
View File

@@ -0,0 +1,87 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-07-11 19:16:23
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-08-13 12:12:05
import fire, re, os
from pprint import pprint
class seasonMover(object):
''' Moving multiple files to multiple folders with
identifer '''
workingDir = os.getcwd()
mediaExt = ['mkv', 'mp4']
subExt = ['srt']
def create(self, name, interval):
pass
def move(self, fileSyntax, folderName):
episodeRange = self.findInterval(fileSyntax)
episodeList = self.getFiles()
self.motherMover(fileSyntax, folderName, episodeRange)
def getFiles(self):
epDir = os.path.join(self.workingDir)
dirContent = os.listDir(epDir)
fileList = sorted(dirContent)
return fileList
def getEpisodeNumber(self):
m = re.search('[eE][0-9]{1,2}', self.parent)
if m:
return re.sub('[eE]', '', m.group(0))
# def findInterval(self, item):
# if (re.search(r'\((.*)\)', item) is None):
# raise ValueError('Need to declare an identifier e.g. (1..3) in: \n\t' + item)
# start = int(re.search('\((\d+)\.\.', item).group(1))
# end = int(re.search('\.\.(\d+)\)', item).group(1))
# return list(range(start, end+1))
def removeUploadSign(self, file):
match = re.search('-[a-zA-Z\[\]\-]*.[a-z]{3}', file)
if match:
uploader = match.group(0)[:-4]
return re.sub(uploader, '', file)
return file
def motherMover(self, fileSyntax, folderName, episodeRange):
# Call for sub of fileList
# TODO check if range is same as folderContent
# print(fileSyntax, folderName, episodeRange)
mediaFiles = []
subtitleFiles = []
seasonDirectory = sorted(os.listdir(os.path.join(self.workingDir)))
for file in seasonDirectory:
if file[-3:] in self.mediaExt:
mediaFiles.append(file)
elif file[-3:] in self.subExt:
subtitleFiles.append(file)
if (len(mediaFiles) is not len(episodeRange)):
raise ValueError('Range defined does not match directory file content')
for epMedia, epNum in zip(mediaFiles, episodeRange):
leadingZeroNumber = "%02d" % epNum
fileName = re.sub(r'\((.*)\)', leadingZeroNumber, fileSyntax)
oldPath = os.path.join(self.workingDir, epMedia)
newFolder = os.path.join(self.workingDir, folderName + leadingZeroNumber)
newPath = os.path.join(newFolder, self.removeUploadSign(fileName))
# os.makedirs(newFolder)
# os.rename(oldPath, newPath)
print(oldPath + ' --> ' + newPath)
if __name__ == '__main__':
fire.Fire(seasonMover)

296
seasonedParser/subtitle.py Normal file
View File

@@ -0,0 +1,296 @@
# -*- coding: utf-8 -*-
import codecs
import logging
import os
import chardet
import hashlib
from video import Episode, Movie
from utils import sanitize
# from langdetect import detect
logger = logging.getLogger(__name__)
#: Subtitle extensions
SUBTITLE_EXTENSIONS = ('.srt')
class Subtitle(object):
"""Base class for subtitle.
:param language: language of the subtitle.
:type language: :class:`~babelfish.language.Language`
:param bool hearing_impaired: whether or not the subtitle is hearing impaired.
:param page_link: URL of the web page from which the subtitle can be downloaded.
:type page_link: str
:param encoding: Text encoding of the subtitle.
:type encoding: str
"""
#: Name of the provider that returns that class of subtitle
provider_name = ''
def __init__(self, language, hearing_impaired=False, page_link=None, encoding=None):
#: Language of the subtitle
self.language = language
#: Whether or not the subtitle is hearing impaired
self.hearing_impaired = hearing_impaired
#: URL of the web page from which the subtitle can be downloaded
self.page_link = page_link
#: Content as bytes
self.content = None
#: Encoding to decode with when accessing :attr:`text`
self.encoding = None
# validate the encoding
if encoding:
try:
self.encoding = codecs.lookup(encoding).name
except (TypeError, LookupError):
logger.debug('Unsupported encoding %s', encoding)
@property
def id(self):
"""Unique identifier of the subtitle"""
raise NotImplementedError
@property
def text(self):
"""Content as string
If :attr:`encoding` is None, the encoding is guessed with :meth:`guess_encoding`
"""
if not self.content:
return
if self.encoding:
return self.content.decode(self.encoding, errors='replace')
return self.content.decode(self.guess_encoding(), errors='replace')
def is_valid(self):
"""Check if a :attr:`text` is a valid SubRip format.
:return: whether or not the subtitle is valid.
:rtype: bool
"""
if not self.text:
return False
try:
pysrt.from_string(self.text, error_handling=pysrt.ERROR_RAISE)
except pysrt.Error as e:
if e.args[0] < 80:
return False
return True
def guess_encoding(self):
"""Guess encoding using the language, falling back on chardet.
:return: the guessed encoding.
:rtype: str
"""
logger.info('Guessing encoding for language %s', self.language)
# always try utf-8 first
encodings = ['utf-8']
# add language-specific encodings
if self.language.alpha3 == 'zho':
encodings.extend(['gb18030', 'big5'])
elif self.language.alpha3 == 'jpn':
encodings.append('shift-jis')
elif self.language.alpha3 == 'ara':
encodings.append('windows-1256')
elif self.language.alpha3 == 'heb':
encodings.append('windows-1255')
elif self.language.alpha3 == 'tur':
encodings.extend(['iso-8859-9', 'windows-1254'])
elif self.language.alpha3 == 'pol':
# Eastern European Group 1
encodings.extend(['windows-1250'])
elif self.language.alpha3 == 'bul':
# Eastern European Group 2
encodings.extend(['windows-1251'])
else:
# Western European (windows-1252)
encodings.append('latin-1')
# try to decode
logger.debug('Trying encodings %r', encodings)
for encoding in encodings:
try:
self.content.decode(encoding)
except UnicodeDecodeError:
pass
else:
logger.info('Guessed encoding %s', encoding)
return encoding
logger.warning('Could not guess encoding from language')
# fallback on chardet
encoding = chardet.detect(self.content)['encoding']
logger.info('Chardet found encoding %s', encoding)
return encoding
def get_matches(self, video):
"""Get the matches against the `video`.
:param video: the video to get the matches with.
:type video: :class:`~subliminal.video.Video`
:return: matches of the subtitle.
:rtype: set
"""
raise NotImplementedError
def __hash__(self):
return hash(self.provider_name + '-' + self.id)
def __repr__(self):
return '<%s %r [%s]>' % (self.__class__.__name__, self.id, self.language)
def get_subtitle_path(video_path, language=None, extension='.srt'):
"""Get the subtitle path using the `video_path` and `language`.
:param str video_path: path to the video.
:param language: language of the subtitle to put in the path.
:type language: :class:`~babelfish.language.Language`
:param str extension: extension of the subtitle.
:return: path of the subtitle.
:rtype: str
"""
subtitle_root = os.path.splitext(video_path)[0]
if language:
subtitle_root += '.' + str(language)
return subtitle_root + extension
def guess_matches(video, guess, partial=False):
"""Get matches between a `video` and a `guess`.
If a guess is `partial`, the absence information won't be counted as a match.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param guess: the guess.
:type guess: dict
:param bool partial: whether or not the guess is partial.
:return: matches between the `video` and the `guess`.
:rtype: set
"""
matches = set()
if isinstance(video, Episode):
# series
if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series):
matches.add('series')
# title
if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
matches.add('title')
# season
if video.season and 'season' in guess and guess['season'] == video.season:
matches.add('season')
# episode
if video.episode and 'episode' in guess and guess['episode'] == video.episode:
matches.add('episode')
# year
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# count "no year" as an information
if not partial and video.original_series and 'year' not in guess:
matches.add('year')
elif isinstance(video, Movie):
# year
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# title
if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
matches.add('title')
# release_group
if (video.release_group and 'release_group' in guess and
sanitize_release_group(guess['release_group']) in
get_equivalent_release_groups(sanitize_release_group(video.release_group))):
matches.add('release_group')
# resolution
if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
matches.add('resolution')
# format
if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
matches.add('format')
# video_codec
if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
matches.add('video_codec')
# audio_codec
if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec:
matches.add('audio_codec')
return matches
def fix_line_ending(content):
"""Fix line ending of `content` by changing it to \n.
:param bytes content: content of the subtitle.
:return: the content with fixed line endings.
:rtype: bytes
"""
return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
'''
@classmethod
def fromguess(cls, name, parent_path, guess):
if not (guess['type'] == 'movie' or guess['type'] == 'episode'):
raise ValueError('The guess must be an episode guess')
if 'title' not in guess:
raise ValueError('Insufficient data to process the guess')
sdh = 'sdh' in name.lower()
if guess['type'] is 'episode':
return cls(name, parent_path, guess.get('title', 1), guess.get('season'), guess['episode'],
container=guess.get('container'), format=guess.get('format'), sdh=sdh)
elif guess['type'] is 'movie':
return cls(name, parent_path, guess.get('title', 1), container=guess.get('container'),
format=guess.get('format'), sdh=sdh)
def getLanguage(self):
f = open(os.path.join(self.parent_path, self.name), 'r', encoding='ISO-8859-15')
language = detect(f.read())
f.close()
return language
def __hash__(self):
return hashlib.md5("b'{}'".format(str(self.series) + str(self.season) + str(self.episode)).encode()).hexdigest()
def __repr__(self):
return '<%s %s [%sx%s]>' % (self.__class__.__name__, self.series, self.season, str(self.episode))
def get_subtitle_path(subtitles_path, language=None, extension='.srt'):
"""Get the subtitle path using the `subtitles_path` and `language`.
:param str subtitles_path: path to the subtitle.
:param language: language of the subtitle to put in the path.
:type language: :class:`~babelfish.language.Language`
:param str extension: extension of the subtitle.
:return: path of the subtitle.
:rtype: str
"""
subtitle_root = os.path.splitext(subtitles_path)[0]
if language:
subtitle_root += '.' + str(language)
return subtitle_root + extension
'''

View File

@@ -0,0 +1,2 @@
import sys, os
sys.path.append(os.path.realpath(os.path.dirname(__file__)+"/.."))

View File

@@ -0,0 +1,5 @@
import sys, os
def test_import_env_variables():
import env_variables as env
assert env.logfile == 'conf/output.log'

View File

@@ -0,0 +1,9 @@
def square(x):
return x * x
def test_square():
assert square(2) == 4
def test_square_negative():
assert square(-2) == 4

15
seasonedParser/tvdb.py Normal file
View File

@@ -0,0 +1,15 @@
import tvdb_api, hashlib
def main():
t = tvdb_api.Tvdb()
show = 'Rick and morty'.lower()
season = 3
for ep in range(1,10):
episode = t[show][season][ep]
itemConcat = '.'.join([show, str(season), str(ep)])
hash_object = hashlib.sha1(str.encode(itemConcat))
hex_dig = hash_object.hexdigest()
print('%s : %s' % (hex_dig, itemConcat))
if __name__ == '__main__':
main()

135
seasonedParser/utils.py Normal file
View File

@@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
from datetime import datetime
import hashlib
import os
import logging
import re
import struct
from babelfish import Error as BabelfishError, Language
from enzyme import MalformedMKVError, MKV
def sanitize(string, ignore_characters=None):
"""Sanitize a string to strip special characters.
:param str string: the string to sanitize.
:param set ignore_characters: characters to ignore.
:return: the sanitized string.
:rtype: str
"""
# only deal with strings
if string is None:
return
ignore_characters = ignore_characters or set()
# replace some characters with one space
# characters = {'-', ':', '(', ')', '.'} - ignore_characters
# if characters:
# string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
# remove some characters
characters = {'\''} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), '', string)
# replace multiple spaces with one
string = re.sub(r'\s+', ' ', string)
# strip and lower case
return string.strip().lower()
def refine(video, embedded_subtitles=True, **kwargs):
"""Refine a video by searching its metadata.
Several :class:`~subliminal.video.Video` attributes can be found:
* :attr:`~subliminal.video.Video.resolution`
* :attr:`~subliminal.video.Video.video_codec`
* :attr:`~subliminal.video.Video.audio_codec`
* :attr:`~subliminal.video.Video.embeded_subtitles`
:param bool embedded_subtitles: search for embedded subtitles.
"""
# skip non existing videos
if not video.exists:
return
# check extensions
extension = os.path.splitext(video.name)[1]
if extension == '.mkv':
with open(video.name, 'rb') as f:
try:
mkv = MKV(f)
except MalformedMKVError:
logging.error('Failed to parse mkv, malformed file')
return
except KeyError:
logging.error('Key error while opening file, uncompatible mkv container')
return
# main video track
if mkv.video_tracks:
video_track = mkv.video_tracks[0]
# resolution
if video_track.height in (480, 720, 1080, 2160):
if video_track.interlaced:
video.resolution = '%di' % video_track.height
else:
video.resolution = '%dp' % video_track.height
logging.debug('Found resolution %s', video.resolution)
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
video.video_codec = 'h264'
logging.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
video.video_codec = 'DivX'
logging.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
video.video_codec = 'XviD'
logging.debug('Found video_codec %s', video.video_codec)
else:
logging.warning('MKV has no video track')
# main audio track
if mkv.audio_tracks:
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
video.audio_codec = 'AC3'
logging.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_DTS':
video.audio_codec = 'DTS'
logging.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_AAC':
video.audio_codec = 'AAC'
logging.debug('Found audio_codec %s', video.audio_codec)
else:
logging.warning('MKV has no audio track')
# subtitle tracks
if mkv.subtitle_tracks:
if embedded_subtitles:
embeded_subtitles = set()
for st in mkv.subtitle_tracks:
if st.language:
try:
embeded_subtitles.add(Language.fromalpha3b(st.language))
except BabelfishError:
logging.error('Embedded subtitle track language %r is not a valid language', st.language)
embeded_subtitles.add(Language('und'))
elif st.name:
try:
embeded_subtitles.add(Language.fromname(st.name))
except BabelfishError:
logging.debug('Embedded subtitle track name %r is not a valid language', st.name)
embeded_subtitles.add(Language('und'))
else:
embeded_subtitles.add(Language('und'))
logging.debug('Found embedded subtitle %r', embeded_subtitles)
video.embeded_subtitles |= embeded_subtitles
else:
logging.debug('MKV has no subtitle track')
else:
logging.debug('Unsupported video extension %s', extension)

238
seasonedParser/video.py Normal file
View File

@@ -0,0 +1,238 @@
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-08-26 08:23:18
# @Last Modified by: KevinMidboe
# @Last Modified time: 2018-05-13 20:50:00
from guessit import guessit
import os
import logging
from titlecase import titlecase
import hashlib, tvdb_api
import env_variables as env
from exceptions import InsufficientNameError
logger = logging.getLogger('seasonedParser')
#: Video extensions
VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
'.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2v', '.m4e',
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
'.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf',
'.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
'.vob', '.vro', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
class Video(object):
"""Base class for videos.
Represent a video, existing or not.
:param str name: name or path of the video.
:param str format: format of the video (HDTV, WEB-DL, BluRay, ...).
:param str release_group: release group of the video.
:param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i, 4K).
:param str video_codec: codec of the video stream.
:param str audio_codec: codec of the main audio stream.
:param int size: size of the video file in bytes.
:param set subtitles: existing subtitle languages.
"""
def __init__(self, name, hash=None, size=None, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
subtitles=None, embeded_subtitles=None):
#: Name or path of the video
self.name = name
#: Hashes of the video file by provider names
self.hash = hash
#: Size of the video file in bytes
self.size = size
#: Format of the video (HDTV, WEB-DL, BluRay, ...)
self.format = format
#: Release group of the video
self.release_group = release_group
#: Resolution of the video stream (480p, 720p, 1080p or 1080i)
self.resolution = resolution
#: Codec of the video stream
self.video_codec = video_codec
#: Codec of the main audio stream
self.audio_codec = audio_codec
#: Existing subtitle languages
self.subtitles = subtitles or set()
#: Embeded subtitle languages
self.embeded_subtitles = embeded_subtitles or set()
@property
def exists(self):
"""Test whether the video exists"""
return os.path.exists(self.name)
@property
def age(self):
"""Age of the video"""
if self.exists:
return datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(self.name))
return timedelta()
@classmethod
def fromguess(cls, name, guess):
"""Create an :class:`Episode` or a :class:`Movie` with the given `name` based on the `guess`.
:param str name: name of the video.
:param dict guess: guessed data.
:raise: :class:`ValueError` if the `type` of the `guess` is invalid
"""
if guess['type'] == 'episode':
return Episode.fromguess(name, guess)
if guess['type'] == 'movie':
return Movie.fromguess(name, guess)
raise ValueError('The guess must be an episode or a movie guess')
@classmethod
def fromname(cls, name):
"""Shortcut for :meth:`fromguess` with a `guess` guessed from the `name`.
:param str name: name of the video.
"""
return cls.fromguess(name, guessit(name))
def __repr__(self):
return '<%s [%r]>' % (self.__class__.__name__, self.name)
def __hash__(self):
return hash(self.name)
class Episode(Video):
"""Episode :class:`Video`.
:param str series: series of the episode.
:param int season: season number of the episode.
:param int episode: episode number of the episode.
:param str title: title of the episode.
:param int year: year of the series.
:param bool original_series: whether the series is the first with this name.
:param int tvdb_id: TVDB id of the episode.
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
series_tvdb_id=None, **kwargs):
super(Episode, self).__init__(name, **kwargs)
#: Series of the episode
self.series = series
#: Season number of the episode
self.season = season
#: Episode number of the episode
self.episode = episode
#: Title of the episode
self.title = title
#: Year of series
self.year = year
#: The series is the first with this name
self.original_series = original_series
#: TVDB id of the episode
self.tvdb_id = tvdb_id
#: TVDB id of the series
self.series_tvdb_id = series_tvdb_id
@classmethod
def fromguess(cls, name, guess):
logger.info('Guess: {}'.format(guess))
if guess['type'] != 'episode':
raise ValueError('The guess must be an episode guess')
if 'title' not in guess or 'season' not in guess or 'episode' not in guess:
raise InsufficientNameError('Guess failed to have sufficient data from query: {}'.format(name))
if any([isinstance(x, list) for x in [guess['title'], guess['season'], guess['episode']]]):
raise InsufficientNameError('Guess could not be parsed, list values found.')
return cls(name, guess['title'], guess.get('season', 1), guess['episode'], title=guess.get('episode_title'),
year=guess.get('year'), format=guess.get('format'), original_series='year' not in guess,
release_group=guess.get('release_group'), resolution=guess.get('screen_size'),
video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'))
@classmethod
def fromname(cls, name):
return cls.fromguess(name, guessit(name, {'type': 'episode', 'single_value': True }))
def wantedFilePath(self):
series = titlecase(self.series)
grandParent = '{}/{} Season {:02d}'.format(series, series, self.season)
parent = '{} S{:02d}E{:02d}'.format(series, self.season, self.episode)
return os.path.join(env.SHOWBASE, grandParent, parent, os.path.basename(self.name))
def __repr__(self):
if self.subtitles is not None and len(self.subtitles) > 0:
return '<%s [%r, %dx%s] %s>' % (self.__class__.__name__, self.series, self.season, self.episode, self.subtitles)
return '<%s [%r, %dx%d]>' % (self.__class__.__name__, self.series, self.season, self.episode)
class Movie(Video):
"""Movie :class:`Video`.
:param str title: title of the movie.
:param int year: year of the movie.
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, title, year=None, **kwargs):
super(Movie, self).__init__(name, **kwargs)
#: Title of the movie
self.title = title
#: Year of the movie
self.year = year
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'movie':
raise ValueError('The guess must be a movie guess')
if 'title' not in guess or 'year' not in guess:
raise InsufficientNameError('Guess failed to have sufficient data from query: {}'.format(name))
return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
audio_codec=guess.get('audio_codec'), year=guess.get('year'))
@classmethod
def fromname(cls, name, year):
return cls.fromguess(name, guessit(name, {'type': 'movie'}))
def sufficientInfo(self):
t = hasattr(self, "title")
y = hasattr(self, "year")
if False in [t, y] or None in [self.title, self.year]:
logger.error('{} or {} found to have none value, manual correction required'.format(self.title, self.year))
return False
if list in [type(self.title), type(self.year)]:
logger.error('{} or {} found to have list value, manual correction required'.format(self.title, self.year))
return False
return True
def wantedFilePath(self):
title = titlecase(self.title)
parent = '{} ({})'.format(title, self.year)
return os.path.join(env.MOVIEBASE, parent, os.path.basename(self.name))
def __repr__(self):
if self.year is None:
return '<%s [%r]>' % (self.__class__.__name__, self.title)
return '<%s [%r, %d]>' % (self.__class__.__name__, self.title, self.year)

25
seasonedParser/walk.py Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-10-02 16:29:25
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-10-02 18:07:26
import itertools, os
import multiprocessing
def worker(filename):
print(filename)
def main():
with multiprocessing.Pool(48) as Pool: # pool of 48 processes
walk = os.walk("/Volumes/mainframe/shows/")
fn_gen = itertools.chain.from_iterable((os.path.join(root, file)
for file in files)
for root, dirs, files in walk)
results_of_work = Pool.map(worker, fn_gen) # this does the parallel processing
if __name__ == '__main__':
main()

23
seasonedParser/watcher.py Normal file
View File

@@ -0,0 +1,23 @@
import sys
import time
import logging
from watchdog.observers import Observer
from watchdog.events import LoggingEventHandler
if __name__ == "__main__":
logging.basicConfig(filename='output.log',
level=logging.DEBUG,
format='%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
path = sys.argv[1] if len(sys.argv) > 1 else '.'
print(path)
event_handler = LoggingEventHandler()
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()