diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..e61c330 --- /dev/null +++ b/.drone.yml @@ -0,0 +1,44 @@ +--- +kind: pipeline +type: docker +name: seasonedParser + +platform: + os: linux + arch: amd64 + +steps: +- name: test-python3.6 + image: python:3.6-alpine + commands: + - python --version + - pip install -r requirements.txt + - py.test + +- name: test-python3.8 + image: python:3.8-alpine + commands: + - python --version + - pip install -r requirements.txt + - py.test + +- name: codecov + image: python:3.6-alpine + environment: + CODECOV_TOKEN: + from_secret: CODECOV_TOKEN + commands: + - pip install -r requirements.txt + - py.test --cov-report=xml --cov=seasonedParser + - apk add git + - apk add bash + - apk add curl + - bash -c "$(curl -s https://codecov.io/bash)" +trigger: + branch: + - master + event: + include: + - pull_request + - push + diff --git a/README.md b/README.md index e7cc04f..134417b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # seaonedParser seasoned Parser is python based parser that indexes a given directory for media files and identifies if it is a movie or show file and renames + moves it to a the correct place in library. +[![codecov](https://codecov.io/gh/KevinMidboe/seasonedParser/branch/master/graph/badge.svg)](https://codecov.io/gh/KevinMidboe/seasonedParser) +[![Build Status](https://drone.kevinmidboe.com/api/badges/KevinMidboe/seasonedParser/status.svg)](https://drone.kevinmidboe.com/KevinMidboe/seasonedParser) + ## Table of Conents - [Config](#config) - [Setup for automation](#setup-for-automation) @@ -16,4 +19,29 @@ There are some settings that need to be set for seasonedParser to be able to fin ### Download directory In your download client set a incomplete folder and a complete directory. This will allow seasonedParser to only parse items that have been completely downloaded. -*TODO:* Monitor multiple folders at the same time. \ No newline at end of file +*TODO:* Monitor multiple folders at the same time. + +## Run +There are many run commands for this, but here is a list of the current working run commands for this project. + +```bash + user@host:$ ~/seasonedParser/./seasonedMover.py move 'The.Big.Bang.Theory.S11E(7..14).720p.x264.mkv' '/mnt/mainframe/shows/The Big Bang Theory/The Big Bang Theory S11E' +``` + +Here the first parameter is our move command, which in turn calls motherMover. The second parameter is what we want the filenames to be called. Notice the (num1..num2), this is to create a range for all the episodes we want to move. The last parameter is the path we want to move our content. + > This will be done automatically by the parser based on the info in the media items name, but it is nice to have a manual command. + + +## Cli + +Arguments +* Dry run with --dry +* Path variable +* daemon with -d option + * Still need the path variable + * Daemon sends confirmation and on missing asks tweetf or correction + +Functions +* Should ask for input when missing info, always when cli + + diff --git a/knowledgeBase.md b/knowledgeBase.md index 0f95342..0d20f9a 100644 --- a/knowledgeBase.md +++ b/knowledgeBase.md @@ -564,4 +564,51 @@ Total: 5926, missed was: 29 real 2m0.766s user 1m41.482s sys 0m0.851s -``` \ No newline at end of file +``` + + +Keep nfo files? + + +# Program flow + +videos -> scan_folder +scan_folder + videos + ├ scan_video + ├ search_external_subtitles + └ refine + ignored_videos -> None + error_paths -> not exists or directory but error while scan_videos + +scan_folder-videos-scan_video = + raise error -> not exists or not VIDEO_EXT + video -> Video.fromguess + +Video + -> fromguess + * Raise ValueError not episode or movie + +Can I raise an error for everything that is not sufficient to move. Then return the errors with the videos. This catches the same number as video.sufficient, pickforgirlscouts. + + +Tweet argument for correction + +How are the insufficient supposed to be handled? +* Return with videos to main and + + + +Should not create dependencies in the code by have an exception doing something very specific to install and external data by checking if a guess can be resolved from checking earlier matches. I would be more nimble and modular approach to have our errors send back and returned and have a separate excution path which has the database element for checking eariler matches. Now the dependencies are segmented more in two different files, increasing upgradability. +Another note would be to not have rearly and error-prone calls happend deep in a execution path but have it separated so handling the errors for it can be high level and not have others functions error handling take over or missrepersent the original error. + +| +|\ Could look at the SHOW_PATH and search for nodes with same +| | structure as the guess. A pretty high number or match the +| o partent dirs and then compare the file's guess with itself. + + +* Daemon argument - Throw exception if notificiation agent is not defined. +* should the path be saved in the video object? + +NB! New path is not defined. diff --git a/requirements.txt b/requirements.txt index 0ed1f9c..1450541 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,9 @@ -guessit==2.1.4 +guessit==3.0.0 tvdb_api==2.0 +hashids==1.2.0 +enzyme>=0.4.1 +click>=6.7 +langdetect>=1.0.7 +titlecase>=0.12.0 +pytest>=5.3.5 +pytest-cov>=2.8.1 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..a08b226 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,7 @@ +print('hello from init') +import sys,os +# sys.path.append(os.path.join(os.path.dirname(__file__),os.pardir)) + +# from + +from .__version__ import __version__ diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..c29c2c4 --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3.6 +# -*- coding: utf-8 -*- +""" +Entry point module +""" + +import click +from guessit import guessit +import logging + +from core import scan_folder +from video import Video +from exceptions import InsufficientNameError + +import env_variables as env + +logging.basicConfig(filename=env.logfile, level=logging.INFO) +logger = logging.getLogger('seasonedParser') +fh = logging.FileHandler(env.logfile) +fh.setLevel(logging.INFO) +sh = logging.StreamHandler() +sh.setLevel(logging.WARNING) + +fh_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +sh_formatter = logging.Formatter('%(levelname)s: %(message)s') +fh.setFormatter(fh_formatter) +sh.setFormatter(sh_formatter) + +logger.addHandler(fh) +logger.addHandler(sh) + +def tweet(video): + pass + +def prompt(name): + manual_name = input("Insufficient name: '{}'\nInput name manually: ".format(name)) + + if manual_name == 'q': + raise KeyboardInterrupt + if manual_name == 's': + return None + + + return manual_name + +def _moveHome(file): + print('- - -\nMatch: \t\t {}. \nDestination:\t {}'.format(file, file.wantedFilePath())) + logger.info('- - -\nMatch: \t\t {}. \nDestination:\t {}'.format(file, file.wantedFilePath())) + +@click.command() +@click.argument('path') +@click.option('--daemon', '-d', is_flag=True) +@click.option('--dry', is_flag=True) +def main(path, daemon, dry): + if dry: + def moveHome(file): _moveHome(file) + else: + from core import moveHome + + + videos, insufficient_name = scan_folder(path) + + for video in videos: + moveHome(video) + + if len(insufficient_name) and daemon: + logger.warning('Daemon flag set. Insufficient name for: %r', insufficient_name) + exit(0) + + while len(insufficient_name) >= 1: + for i, file in enumerate(insufficient_name): + try: + manual_name = prompt(file) + + if manual_name is None: + del insufficient_name[i] + continue + + video = Video.fromguess(file, guessit(manual_name)) + moveHome(video) + del insufficient_name[i] + + except KeyboardInterrupt: + # Logger: Received interrupt, exiting parser. + # should the class objects be deleted ? + print('Interrupt detected. Exiting') + exit(0) + +if __name__ == '__main__': + main() diff --git a/src/__version__.py b/src/__version__.py new file mode 100644 index 0000000..d19b4e5 --- /dev/null +++ b/src/__version__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Version module +""" +__version__ = '0.2.0' diff --git a/src/core.py b/src/core.py index dab6ba6..a18a4b2 100755 --- a/src/core.py +++ b/src/core.py @@ -3,25 +3,63 @@ # @Author: KevinMidboe # @Date: 2017-08-25 23:22:27 # @Last Modified by: KevinMidboe -# @Last Modified time: 2017-09-29 12:35:24 +# @Last Modified time: 2019-02-02 01:04:25 from guessit import guessit +from babelfish import Language, LanguageReverseError +import hashlib import os, errno -import logging +import shutil +import re import tvdb_api +import click from pprint import pprint +from titlecase import titlecase +import langdetect import env_variables as env +from exceptions import InsufficientNameError +import logging +logger = logging.getLogger('seasonedParser') from video import VIDEO_EXTENSIONS, Episode, Movie, Video from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path -from utils import sanitize +from utils import sanitize, refine -logging.basicConfig(filename=env.logfile, level=logging.INFO) +def search_external_subtitles(path, directory=None): + dirpath, filename = os.path.split(path) + dirpath = dirpath or '.' + fileroot, fileext = os.path.splitext(filename) + subtitles = {} + for p in os.listdir(directory or dirpath): + if not p.endswith(SUBTITLE_EXTENSIONS): + continue -#: Supported archive extensions -ARCHIVE_EXTENSIONS = ('.rar',) + language = Language('und') + language_code = p[len(fileroot):-len(os.path.splitext(p)[1])].replace(fileext, '').replace('_','-')[1:] + if language_code: + try: + language = Language.fromietf(language_code) + except (ValueError, LanguageReverseError): + logger.error('Cannot parse language code %r', language_code) + + f = open(os.path.join(dirpath, p), 'r', encoding='ISO-8859-15') + + pattern = re.compile('[0-9:\,-<>]+') + # head = list(islice(f.read(), 10)) + filecontent = pattern.sub('', f.read()) + filecontent = filecontent[0:1000] + language = langdetect.detect(filecontent) + f.close() + + subtitles[os.path.join(dirpath, p)] = language + logger.debug('Found subtitles %r', subtitles) + + return subtitles + +def find_file_size(video): + return os.path.getsize(video.name) def scan_video(path): """Scan a video from a `path`. @@ -36,17 +74,30 @@ def scan_video(path): raise ValueError('Path does not exist') # check video extension - # if not path.endswith(VIDEO_EXTENSIONS): - # raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1]) + if not path.endswith(VIDEO_EXTENSIONS): + raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1]) dirpath, filename = os.path.split(path) - logging.info('Scanning video %r in %r', filename, dirpath) + logger.info('Scanning video %r in %r', filename, dirpath) # guess - parent_path = path.strip(filename) - video = Video.fromguess(filename, parent_path, guessit(path)) - # video = Video('test') - # guessit(path) + video = Video.fromguess(path, guessit(filename)) + + video.subtitles |= set(search_external_subtitles(video.name)) + refine(video) + + # hash of name + # if isinstance(video, Movie): + # if type(video.title) is str and type(video.year) is int: + # home_path = '{} ({})'.format(video.title, video.year) + # hash_str = ''.join([video.title, str(video.year) or '']) + # elif isinstance(video, Episode): + # if type(video.series) is str and type(video.season) is int and type(video.episode) is int: + # home_path = '{} ({})'.format(video.title, video.year) + # hash_str = ''.join([video.series, str(video.season), str(video.episode)]) + # video.hash = hashlib.md5(hash_str.encode()).hexdigest() + # except: + # print(video) return video @@ -56,24 +107,25 @@ def scan_subtitle(path): raise ValueError('Path does not exist') dirpath, filename = os.path.split(path) - logging.info('Scanning subtitle %r in %r', filename, dirpath) + logger.info('Scanning subtitle %r in %r', filename, dirpath) # guess parent_path = path.strip(filename) - subtitle = Subtitle.fromguess(filename, parent_path, guessit(path)) + subtitle = Subtitle.fromguess(parent_path, guessit(path)) return subtitle +def subtitle_path(sibling, subtitle): + parent_path = os.path.dirname(sibling) + return os.path.join(parent_path, os.path.basename(subtitle)) -def scan_files(path, age=None, archives=True): +def scan_videos(path): """Scan `path` for videos and their subtitles. See :func:`refine` to find additional information for the video. :param str path: existing directory path to scan. - :param datetime.timedelta age: maximum age of the video or archive. - :param bool archives: scan videos in archives. :return: the scanned videos. :rtype: list of :class:`~subliminal.video.Video` @@ -86,70 +138,62 @@ def scan_files(path, age=None, archives=True): if not os.path.isdir(path): raise ValueError('Path is not a directory') - # walk the path - mediafiles = [] - for dirpath, dirnames, filenames in os.walk(path): - logging.debug('Walking directory %r', dirpath) + # setup progress bar + path_children = 0 + for _ in os.walk(path): path_children += 1 + with click.progressbar(length=path_children, show_pos=True, label='Collecting videos') as bar: - # remove badly encoded and hidden dirnames - for dirname in list(dirnames): - if dirname.startswith('.'): - logging.debug('Skipping hidden dirname %r in %r', dirname, dirpath) - dirnames.remove(dirname) + # walk the path + videos = [] + insufficient_name = [] + errors_path = [] + for dirpath, dirnames, filenames in os.walk(path): + logger.debug('Walking directory %r', dirpath) - # scan for videos - for filename in filenames: - # filter on videos and archives - if not (filename.endswith(VIDEO_EXTENSIONS) or filename.endswith(SUBTITLE_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)): - continue + # remove badly encoded and hidden dirnames + for dirname in list(dirnames): + if dirname.startswith('.'): + logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath) + dirnames.remove(dirname) - # skip hidden files - if filename.startswith('.'): - logging.debug('Skipping hidden filename %r in %r', filename, dirpath) - continue - - # reconstruct the file path - filepath = os.path.join(dirpath, filename) - - # skip links - if os.path.islink(filepath): - logging.debug('Skipping link %r in %r', filename, dirpath) - continue - - # skip old files - if age and datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(filepath)) > age: - logging.debug('Skipping old file %r in %r', filename, dirpath) - continue - - # scan - if filename.endswith(VIDEO_EXTENSIONS): # video - try: - video = scan_video(filepath) - mediafiles.append(video) - except ValueError: # pragma: no cover - logging.exception('Error scanning video') + # scan for videos + for filename in filenames: + if not (filename.endswith(VIDEO_EXTENSIONS)): + logger.debug('Skipping non-video file %s', filename) continue - elif archives and filename.endswith(ARCHIVE_EXTENSIONS): # archive - print('archive') - pass - # try: - # video = scan_archive(filepath) - # mediafiles.append(video) - # except (NotRarFile, RarCannotExec, ValueError): # pragma: no cover - # logging.exception('Error scanning archive') - # continue - elif filename.endswith(SUBTITLE_EXTENSIONS): # subtitle - try: - subtitle = scan_subtitle(filepath) - mediafiles.append(subtitle) - except ValueError: - logging.exception('Error scanning subtitle') - continue - else: # pragma: no cover - raise ValueError('Unsupported file %r' % filename) + # skip hidden files + if filename.startswith('.'): + logger.debug('Skipping hidden filename %r in %r', filename, dirpath) + continue - return mediafiles + # reconstruct the file path + filepath = os.path.join(dirpath, filename) + + if os.path.islink(filepath): + logger.debug('Skipping link %r in %r', filename, dirpath) + continue + + # scan + if filename.endswith(VIDEO_EXTENSIONS): # video + try: + video = scan_video(filepath) + except InsufficientNameError as e: + logger.info(e) + insufficient_name.append(filepath) + continue + except ValueError: # pragma: no cover + logger.exception('Error scanning video') + errors_path.append(filepath) + continue + else: # pragma: no cover + raise ValueError('Unsupported file %r' % filename) + + videos.append(video) + + bar.update(1) + + return videos, insufficient_name, errors_path def organize_files(path): @@ -209,59 +253,78 @@ def save_subtitles(files, single=False, directory=None, encoding=None): print('Moved: %s ---> %s' % (old, newname)) os.rename(old, newname) - print() +def scan_folder(path): + videos = [] + insufficient_name = [] + errored_paths = [] + logger.debug('Collecting path %s', path) - # for hash in files: - # hashIndex = [files[hash]] - # for hashItems in hashIndex: - # for file in hashItems: - # print(file.series) + # non-existing + if not os.path.exists(path): + errored_paths.append(path) + logger.exception("The path '{}' does not exist".format(path)) - # saved_subtitles = [] - # for subtitle in files: - # # check content - # if subtitle.name is None: - # logging.error('Skipping subtitle %r: no content', subtitle) - # continue + # file + # if path is a file + if os.path.isfile(path): + logger.info('Path is a file') - # # check language - # if subtitle.language in set(s.language for s in saved_subtitles): - # logging.debug('Skipping subtitle %r: language already saved', subtitle) - # continue + try: + video = scan_video(path) + videos.append(video) - # # create subtitle path - # subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language) - # if directory is not None: - # subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1]) + except InsufficientNameError as e: + logger.info(e) + insufficient_name.append(path) - # # save content as is or in the specified encoding - # logging.info('Saving %r to %r', subtitle, subtitle_path) - # if encoding is None: - # with io.open(subtitle_path, 'wb') as f: - # f.write(subtitle.content) - # else: - # with io.open(subtitle_path, 'w', encoding=encoding) as f: - # f.write(subtitle.text) - # saved_subtitles.append(subtitle) + # directories + if os.path.isdir(path): + logger.info('Path is a directory') - # # check single - # if single: - # break + scanned_videos = [] + try: + videos, insufficient_name, errored_paths = scan_videos(path) + except: + logger.exception('Unexpected error while collecting directory path %s', path) + errored_paths.append(path) - # return saved_subtitles + click.echo('%s video%s collected / %s file%s with insufficient name / %s error%s' % ( + click.style(str(len(videos)), bold=True, fg='green' if videos else None), + 's' if len(videos) > 1 else '', + click.style(str(len(insufficient_name)), bold=True, fg='yellow' if insufficient_name else None), + 's' if len(insufficient_name) > 1 else '', + click.style(str(len(errored_paths)), bold=True, fg='red' if errored_paths else None), + 's' if len(errored_paths) > 1 else '', + )) + return videos, insufficient_name -def main(): - # episodePath = '/Volumes/media/tv/Black Mirror/Black Mirror Season 01/' - episodePath = '/media/hdd1/tv/' +def pickforgirlscouts(video): + if video.sufficientInfo(): + video.moveLocation() + return True - t = tvdb_api.Tvdb() + return False - hashList = organize_files(episodePath) - pprint(hashList) +def moveHome(video): + wantedFilePath = video.wantedFilePath() + dir = os.path.dirname(wantedFilePath) + if not os.path.exists(dir): + logger.info('Creating directory {}'.format(dir)) + os.makedirs(dir) + logger.info("Moving video file from: '{}' to: '{}'".format(video.name, wantedFilePath)) + shutil.move(video.name, wantedFilePath) + for sub in video.subtitles: + if not os.path.isfile(sub): + continue + oldpath = sub + newpath = subtitle_path(wantedFilePath, sub) + logger.info("Moving subtitle file from: '{}' to: '{}'".format(oldpath, newpath)) + shutil.move(oldpath, newpath) -if __name__ == '__main__': - main() +# Give feedback before delete ? +def empthDirectory(paths): + pass diff --git a/src/env_variables.py b/src/env_variables.py index 4fd250a..0bb275a 100644 --- a/src/env_variables.py +++ b/src/env_variables.py @@ -1 +1,3 @@ logfile = 'conf/output.log' +MOVIEBASE = '/mnt/mainframe/movies' +SHOWBASE = '/mnt/mainframe/shows' \ No newline at end of file diff --git a/src/exceptions.py b/src/exceptions.py new file mode 100644 index 0000000..692e40a --- /dev/null +++ b/src/exceptions.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3.6 + +class InsufficientNameError(Exception): + pass + diff --git a/src/logger.py b/src/logger.py new file mode 100644 index 0000000..aaa42d3 --- /dev/null +++ b/src/logger.py @@ -0,0 +1,17 @@ +import logging +import env_variables as env + +logging.basicConfig(filename=env.logfile, level=logging.INFO) +logger = logging.getLogger('seasonedParser') +fh = logging.FileHandler(env.logfile) +fh.setLevel(logging.INFO) +sh = logging.StreamHandler() +sh.setLevel(logging.WARNING) + +fh_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +sh_formatter = logging.Formatter('%(levelname)s: %(message)s') +fh.setFormatter(fh_formatter) +sh.setFormatter(sh_formatter) + +logger.addHandler(fh) +logger.addHandler(sh) \ No newline at end of file diff --git a/src/pirateSearch.py b/src/pirateSearch.py index 02b2b18..397aca4 100755 --- a/src/pirateSearch.py +++ b/src/pirateSearch.py @@ -3,7 +3,7 @@ # @Author: KevinMidboe # @Date: 2017-10-12 11:55:03 # @Last Modified by: KevinMidboe -# @Last Modified time: 2017-10-17 00:58:24 +# @Last Modified time: 2017-11-01 16:11:30 import sys, logging, re from urllib import parse, request @@ -28,39 +28,6 @@ RELEASE_TYPES = ('bdremux', 'brremux', 'remux', 'camrip', 'cam') -def sanitize(string, ignore_characters=None, replace_characters=None): - """Sanitize a string to strip special characters. - - :param str string: the string to sanitize. - :param set ignore_characters: characters to ignore. - :return: the sanitized string. - :rtype: str - - """ - # only deal with strings - if string is None: - return - - replace_characters = replace_characters or '' - - ignore_characters = ignore_characters or set() - - characters = ignore_characters - if characters: - string = re.sub(r'[%s]' % re.escape(''.join(characters)), replace_characters, string) - - return string - -def return_re_match(string, re_statement): - if string is None: - return - - m = re.search(re_statement, string) - if 'Y-day' in m.group(): - return datetime.datetime.now().strftime('%m-%d %Y') - return sanitize(m.group(), '\xa0', ' ') - - # Should maybe not be able to set values without checking if they are valid? class piratebay(object): def __init__(self, query=None, page=0, sort=None, category=None): @@ -157,7 +124,7 @@ class piratebay(object): print(self.page) # Fetch in parallel - n = self.total_pages + n = pagesToCount(multiple_pages, self.total_pages) while n > 1: torrents_found.extend(self.next_page()) n -= 1 @@ -276,7 +243,7 @@ def chooseCandidate(torrent_list): size, _, size_id = torrent.size.partition(' ') if intersecting_release_types and int(torrent.seed_count) > 0 and float(size) > 4 and size_id == 'GiB': - print('{} : {} : {}'.format(torrent.name, torrent.size, torrent.seed_count)) + print('{} : {} : {} {}'.format(torrent.name, torrent.size, torrent.seed_count, torrent.magnet)) interesting_torrents.append(torrent) # else: # print('Denied match! %s : %s : %s' % (torrent.name, torrent.size, torrent.seed_count)) @@ -286,10 +253,11 @@ def chooseCandidate(torrent_list): def searchTorrentSite(query, site='piratebay'): pirate = piratebay() - torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size') - # pprint(torrents_found) + torrents_found = pirate.search(query, page=0, multiple_pages=5, sort='size') + pprint(torrents_found) candidates = chooseCandidate(torrents_found) - + pprint(candidates) + exit(0) torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size', category='movies') movie_candidates = chooseCandidate(torrents_found) @@ -308,4 +276,4 @@ def main(): searchTorrentSite(query) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/src/scandir.py b/src/scandir.py new file mode 100755 index 0000000..9c64eb7 --- /dev/null +++ b/src/scandir.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3.6 +# -*- coding: utf-8 -*- +# @Author: KevinMidboe +# @Date: 2017-10-02 16:29:25 +# @Last Modified by: KevinMidboe +# @Last Modified time: 2018-01-15 17:18:36 + +try: + from os import scandir +except ImportError: + from scandir import scandir # use scandir PyPI module on Python < 3.5 + +import env_variables as env +import multiprocessing as mp +import logging, re, datetime +from guessit import guessit + +from video import VIDEO_EXTENSIONS, Episode, Movie, Video +from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path + +logging.basicConfig(filename=env.logfile, level=logging.INFO) + +""" Move to utils file """ +def removeLeadingZero(number): + stringedNumber = str(number) + if (len(stringedNumber) > 1 and stringedNumber[0] == '0'): + return int(stringedNumber[1:]) + return int(number) + +class movie(object): + def __init__(self, path, title=None, year=None): + self.path = path + self.title = title + self.year = year + +class Episode(object): + def __init__(self, path, name, title=None, season=None, episode=None): + super(Episode, self).__init__() + self.path = path + self.name = name + self.title = title + self.season = season + self.episode = episode + + @classmethod + def fromname(cls, path, name): + title = cls.findTitle(cls, name) + season = cls.findSeasonNumber(cls, name) + episode = cls.findEpisodeNumber(cls, name) + + return cls(path, name, title, season, episode) + + def findTitle(self, name): + m = re.search("([a-zA-Z0-9\'\.\-\ ])+([sS][0-9]{1,3})", name) + if m: + return re.sub('[\ \.]*[sS][0-9]{1,2}', '', m.group(0)) + + def findSeasonNumber(self, name): + m = re.search('[sS][0-9]{1,2}', name) + if m: + seasonNumber = re.sub('[sS]', '', m.group(0)) + return removeLeadingZero(seasonNumber) + + def findEpisodeNumber(self, name): + m = re.search('[eE][0-9]{1,3}', name) + if m: + episodeNumber = re.sub('[eE]', '', m.group(0)) + return removeLeadingZero(episodeNumber) + +def get_tree_size(path): + """Return total size of files in given path and subdirs.""" + total = 0 + for entry in scandir(path): + if not ('.DS_Store' in entry.path or 'lost+found' in entry.path): + if entry.is_dir(follow_symlinks=False): + total += get_tree_size(entry.path) + else: + total += entry.stat(follow_symlinks=False).st_size + return int(total) + +def scantree(path): + """Recursively yield DirEntry objects for given directory.""" + for entry in scandir(path): + # Skip .DS_Store and lost+found + # TODO have a blacklist here + if not ('.DS_Store' in entry.path or 'lost+found' in entry.path): + if entry.is_dir(follow_symlinks=False): + yield from scantree(entry.path) + else: + yield entry + +# Find all the mediaobjects for a given path +# TODO handle list of path's +def get_objects_for_path(path, archives=None, match=False): + # Declare list to save the media objects found in the given path + hashList = {} + mediaFiles = [] + # All entries given from scantree functoin + for entry in scantree(path): + logging.debug('Looking at file %s', str(entry.name)) + name = entry.name # Pull out name for faster index + + # Skip if not corrent media extension + if not (name.endswith(VIDEO_EXTENSIONS) or name.endswith(SUBTITLE_EXTENSIONS) or archives and name.endswith(ARCHIVE_EXTENSIONS)): + continue + + # Skip if the file is a dotfile + if name.startswith('.'): + logging.debug('Skipping hidden file %s' % str(name)) + continue + + # If we have a video, create a class and append to mediaFiles + if name.endswith(VIDEO_EXTENSIONS): # video + episode = Episode.fromname(entry.path, entry.name) + if (episode.title is None): + logging.debug('None found for %s' % name) + continue + + title = re.sub('[\.]', ' ', episode.title) + mediaFiles.append(episode) + + return mediaFiles + +if __name__ == '__main__': + logging.info('Started: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f"))) + import sys + from pprint import pprint + total = 0 + missed = 0 + + # print(get_tree_size(sys.argv[1] if len(sys.argv) > 1 else '.')) + # print(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f"))) + path = sys.argv[1] if len(sys.argv) > 1 else '.' + mediaFiles = get_objects_for_path(path) + getTitle = lambda ep: ep.title + + for ep in mediaFiles: + print(getTitle(ep)) + + + mediaList = [] + for entry in scantree(sys.argv[1] if len(sys.argv) > 1 else '.'): + name = entry.name + manual = Episode.fromname(entry.path, entry.name) + size = int(entry.stat(follow_symlinks=False).st_size) / 1024 / 1024 / 1024 + # print(name + ' : ' + str(round(size, 2)) + 'GB') + + title = manual.title + if title is None: + logging.debug('None found for %s' % (name)) + continue + + title = re.sub('[\.]', ' ', manual.title) + + # try: + # print(name + ' : ' + "%s S%iE%i" % (str(title), manual.season, manual.episode)) + # except TypeError: + # logging.error('Unexpected error: ' + name) + + mediaList.append(manual) + if ('-m' in sys.argv): + guess = guessit(name) + + logging.info('Manual is: {} and guess is {}'.format(title, guess['title'])) + # # if not (guess['season'] == manual.season and guess['episode'] == manual.episode): + if (guess['title'].lower() != title.lower()): + logging.info('Missmatch: %s by manual guess: %s : %s' % (name, guess['title'], title)) + missed += 1 + + total += 1 + + + print('Total: %i, missed was: %i' % (total, missed)) + logging.info('Ended: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f"))) + logging.info(' - - - - - - - - - ') diff --git a/src/subtitle.py b/src/subtitle.py index ee36ff1..f377ebb 100644 --- a/src/subtitle.py +++ b/src/subtitle.py @@ -9,13 +9,13 @@ import hashlib from video import Episode, Movie from utils import sanitize -from langdetect import detect +# from langdetect import detect logger = logging.getLogger(__name__) #: Subtitle extensions -SUBTITLE_EXTENSIONS = ('.srt', '.sub') +SUBTITLE_EXTENSIONS = ('.srt') class Subtitle(object): @@ -33,28 +33,214 @@ class Subtitle(object): #: Name of the provider that returns that class of subtitle provider_name = '' - def __init__(self, name, parent_path, series, season, episode, language=None, hash=None, container=None, format=None, sdh=False): - #: Language of the subtitle - - self.name = name + def __init__(self, language, hearing_impaired=False, page_link=None, encoding=None): + #: Language of the subtitle + self.language = language - self.parent_path = parent_path - - self.series = series - - self.season = season + #: Whether or not the subtitle is hearing impaired + self.hearing_impaired = hearing_impaired - self.episode = episode + #: URL of the web page from which the subtitle can be downloaded + self.page_link = page_link - self.language=language + #: Content as bytes + self.content = None - self.hash = hash + #: Encoding to decode with when accessing :attr:`text` + self.encoding = None - self.container = container - - self.format = format + # validate the encoding + if encoding: + try: + self.encoding = codecs.lookup(encoding).name + except (TypeError, LookupError): + logger.debug('Unsupported encoding %s', encoding) - self.sdh = sdh + @property + def id(self): + """Unique identifier of the subtitle""" + raise NotImplementedError + + @property + def text(self): + """Content as string + If :attr:`encoding` is None, the encoding is guessed with :meth:`guess_encoding` + """ + if not self.content: + return + + if self.encoding: + return self.content.decode(self.encoding, errors='replace') + + return self.content.decode(self.guess_encoding(), errors='replace') + + def is_valid(self): + """Check if a :attr:`text` is a valid SubRip format. + :return: whether or not the subtitle is valid. + :rtype: bool + """ + if not self.text: + return False + + try: + pysrt.from_string(self.text, error_handling=pysrt.ERROR_RAISE) + except pysrt.Error as e: + if e.args[0] < 80: + return False + + return True + + def guess_encoding(self): + """Guess encoding using the language, falling back on chardet. + :return: the guessed encoding. + :rtype: str + """ + logger.info('Guessing encoding for language %s', self.language) + + # always try utf-8 first + encodings = ['utf-8'] + + # add language-specific encodings + if self.language.alpha3 == 'zho': + encodings.extend(['gb18030', 'big5']) + elif self.language.alpha3 == 'jpn': + encodings.append('shift-jis') + elif self.language.alpha3 == 'ara': + encodings.append('windows-1256') + elif self.language.alpha3 == 'heb': + encodings.append('windows-1255') + elif self.language.alpha3 == 'tur': + encodings.extend(['iso-8859-9', 'windows-1254']) + elif self.language.alpha3 == 'pol': + # Eastern European Group 1 + encodings.extend(['windows-1250']) + elif self.language.alpha3 == 'bul': + # Eastern European Group 2 + encodings.extend(['windows-1251']) + else: + # Western European (windows-1252) + encodings.append('latin-1') + + # try to decode + logger.debug('Trying encodings %r', encodings) + for encoding in encodings: + try: + self.content.decode(encoding) + except UnicodeDecodeError: + pass + else: + logger.info('Guessed encoding %s', encoding) + return encoding + + logger.warning('Could not guess encoding from language') + + # fallback on chardet + encoding = chardet.detect(self.content)['encoding'] + logger.info('Chardet found encoding %s', encoding) + + return encoding + + def get_matches(self, video): + """Get the matches against the `video`. + :param video: the video to get the matches with. + :type video: :class:`~subliminal.video.Video` + :return: matches of the subtitle. + :rtype: set + """ + raise NotImplementedError + + def __hash__(self): + return hash(self.provider_name + '-' + self.id) + + def __repr__(self): + return '<%s %r [%s]>' % (self.__class__.__name__, self.id, self.language) + + +def get_subtitle_path(video_path, language=None, extension='.srt'): + """Get the subtitle path using the `video_path` and `language`. + :param str video_path: path to the video. + :param language: language of the subtitle to put in the path. + :type language: :class:`~babelfish.language.Language` + :param str extension: extension of the subtitle. + :return: path of the subtitle. + :rtype: str + """ + subtitle_root = os.path.splitext(video_path)[0] + + if language: + subtitle_root += '.' + str(language) + + return subtitle_root + extension + + +def guess_matches(video, guess, partial=False): + """Get matches between a `video` and a `guess`. + If a guess is `partial`, the absence information won't be counted as a match. + :param video: the video. + :type video: :class:`~subliminal.video.Video` + :param guess: the guess. + :type guess: dict + :param bool partial: whether or not the guess is partial. + :return: matches between the `video` and the `guess`. + :rtype: set + """ + matches = set() + if isinstance(video, Episode): + # series + if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series): + matches.add('series') + # title + if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title): + matches.add('title') + # season + if video.season and 'season' in guess and guess['season'] == video.season: + matches.add('season') + # episode + if video.episode and 'episode' in guess and guess['episode'] == video.episode: + matches.add('episode') + # year + if video.year and 'year' in guess and guess['year'] == video.year: + matches.add('year') + # count "no year" as an information + if not partial and video.original_series and 'year' not in guess: + matches.add('year') + elif isinstance(video, Movie): + # year + if video.year and 'year' in guess and guess['year'] == video.year: + matches.add('year') + # title + if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title): + matches.add('title') + # release_group + if (video.release_group and 'release_group' in guess and + sanitize_release_group(guess['release_group']) in + get_equivalent_release_groups(sanitize_release_group(video.release_group))): + matches.add('release_group') + # resolution + if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution: + matches.add('resolution') + # format + if video.format and 'format' in guess and guess['format'].lower() == video.format.lower(): + matches.add('format') + # video_codec + if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec: + matches.add('video_codec') + # audio_codec + if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec: + matches.add('audio_codec') + + return matches + + +def fix_line_ending(content): + """Fix line ending of `content` by changing it to \n. + :param bytes content: content of the subtitle. + :return: the content with fixed line endings. + :rtype: bytes + """ + return content.replace(b'\r\n', b'\n').replace(b'\r', b'\n') + +''' @classmethod def fromguess(cls, name, parent_path, guess): @@ -107,5 +293,4 @@ def get_subtitle_path(subtitles_path, language=None, extension='.srt'): return subtitle_root + extension - - +''' diff --git a/src/test/__init__.py b/src/test/__init__.py new file mode 100644 index 0000000..736a215 --- /dev/null +++ b/src/test/__init__.py @@ -0,0 +1,2 @@ +import sys, os +sys.path.append(os.path.realpath(os.path.dirname(__file__)+"/..")) diff --git a/src/test/test_import.py b/src/test/test_import.py new file mode 100644 index 0000000..476b95a --- /dev/null +++ b/src/test/test_import.py @@ -0,0 +1,5 @@ +import sys, os + +def test_import_env_variables(): + import env_variables as env + assert env.logfile == 'conf/output.log' diff --git a/src/test/test_square.py b/src/test/test_square.py new file mode 100644 index 0000000..419c80b --- /dev/null +++ b/src/test/test_square.py @@ -0,0 +1,9 @@ + +def square(x): + return x * x + +def test_square(): + assert square(2) == 4 + +def test_square_negative(): + assert square(-2) == 4 diff --git a/src/utils.py b/src/utils.py index 2cd6ea7..b9534fe 100644 --- a/src/utils.py +++ b/src/utils.py @@ -2,9 +2,13 @@ from datetime import datetime import hashlib import os +import logging import re import struct +from babelfish import Error as BabelfishError, Language +from enzyme import MalformedMKVError, MKV + def sanitize(string, ignore_characters=None): """Sanitize a string to strip special characters. @@ -36,3 +40,96 @@ def sanitize(string, ignore_characters=None): # strip and lower case return string.strip().lower() + +def refine(video, embedded_subtitles=True, **kwargs): + """Refine a video by searching its metadata. + Several :class:`~subliminal.video.Video` attributes can be found: + * :attr:`~subliminal.video.Video.resolution` + * :attr:`~subliminal.video.Video.video_codec` + * :attr:`~subliminal.video.Video.audio_codec` + * :attr:`~subliminal.video.Video.embeded_subtitles` + :param bool embedded_subtitles: search for embedded subtitles. + """ + # skip non existing videos + if not video.exists: + return + + # check extensions + extension = os.path.splitext(video.name)[1] + if extension == '.mkv': + with open(video.name, 'rb') as f: + try: + mkv = MKV(f) + except MalformedMKVError: + logging.error('Failed to parse mkv, malformed file') + return + except KeyError: + logging.error('Key error while opening file, uncompatible mkv container') + return + + # main video track + if mkv.video_tracks: + video_track = mkv.video_tracks[0] + + # resolution + if video_track.height in (480, 720, 1080, 2160): + if video_track.interlaced: + video.resolution = '%di' % video_track.height + else: + video.resolution = '%dp' % video_track.height + logging.debug('Found resolution %s', video.resolution) + + # video codec + if video_track.codec_id == 'V_MPEG4/ISO/AVC': + video.video_codec = 'h264' + logging.debug('Found video_codec %s', video.video_codec) + elif video_track.codec_id == 'V_MPEG4/ISO/SP': + video.video_codec = 'DivX' + logging.debug('Found video_codec %s', video.video_codec) + elif video_track.codec_id == 'V_MPEG4/ISO/ASP': + video.video_codec = 'XviD' + logging.debug('Found video_codec %s', video.video_codec) + else: + logging.warning('MKV has no video track') + + # main audio track + if mkv.audio_tracks: + audio_track = mkv.audio_tracks[0] + # audio codec + if audio_track.codec_id == 'A_AC3': + video.audio_codec = 'AC3' + logging.debug('Found audio_codec %s', video.audio_codec) + elif audio_track.codec_id == 'A_DTS': + video.audio_codec = 'DTS' + logging.debug('Found audio_codec %s', video.audio_codec) + elif audio_track.codec_id == 'A_AAC': + video.audio_codec = 'AAC' + logging.debug('Found audio_codec %s', video.audio_codec) + else: + logging.warning('MKV has no audio track') + + # subtitle tracks + if mkv.subtitle_tracks: + if embedded_subtitles: + embeded_subtitles = set() + for st in mkv.subtitle_tracks: + if st.language: + try: + embeded_subtitles.add(Language.fromalpha3b(st.language)) + except BabelfishError: + logging.error('Embedded subtitle track language %r is not a valid language', st.language) + embeded_subtitles.add(Language('und')) + elif st.name: + try: + embeded_subtitles.add(Language.fromname(st.name)) + except BabelfishError: + logging.debug('Embedded subtitle track name %r is not a valid language', st.name) + embeded_subtitles.add(Language('und')) + else: + embeded_subtitles.add(Language('und')) + logging.debug('Found embedded subtitle %r', embeded_subtitles) + video.embeded_subtitles |= embeded_subtitles + else: + logging.debug('MKV has no subtitle track') + else: + logging.debug('Unsupported video extension %s', extension) diff --git a/src/video.py b/src/video.py index 68be105..77730e3 100644 --- a/src/video.py +++ b/src/video.py @@ -3,16 +3,23 @@ # @Author: KevinMidboe # @Date: 2017-08-26 08:23:18 # @Last Modified by: KevinMidboe -# @Last Modified time: 2017-09-29 13:56:21 +# @Last Modified time: 2018-05-13 20:50:00 from guessit import guessit import os +import logging +from titlecase import titlecase import hashlib, tvdb_api +import env_variables as env +from exceptions import InsufficientNameError + +logger = logging.getLogger('seasonedParser') + #: Video extensions VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik', '.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli', - '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e', + '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2v', '.m4e', '.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf', '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo', @@ -24,19 +31,23 @@ class Video(object): :param str name: name or path of the video. :param str format: format of the video (HDTV, WEB-DL, BluRay, ...). :param str release_group: release group of the video. - :param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i). + :param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i, 4K). :param str video_codec: codec of the video stream. :param str audio_codec: codec of the main audio stream. - :param str imdb_id: IMDb id of the video. - :param dict hashes: hashes of the video file by provider names. :param int size: size of the video file in bytes. - :param set subtitle_languages: existing subtitle languages. + :param set subtitles: existing subtitle languages. """ - def __init__(self, name, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None, - imdb_id=None, hashes=None, size=None, subtitle_languages=None): + def __init__(self, name, hash=None, size=None, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None, + subtitles=None, embeded_subtitles=None): #: Name or path of the video self.name = name + #: Hashes of the video file by provider names + self.hash = hash + + #: Size of the video file in bytes + self.size = size + #: Format of the video (HDTV, WEB-DL, BluRay, ...) self.format = format @@ -52,17 +63,11 @@ class Video(object): #: Codec of the main audio stream self.audio_codec = audio_codec - #: IMDb id of the video - self.imdb_id = imdb_id - - #: Hashes of the video file by provider names - self.hashes = hashes or {} - - #: Size of the video file in bytes - self.size = size - #: Existing subtitle languages - self.subtitle_languages = subtitle_languages or set() + self.subtitles = subtitles or set() + + #: Embeded subtitle languages + self.embeded_subtitles = embeded_subtitles or set() @property def exists(self): @@ -78,14 +83,14 @@ class Video(object): return timedelta() @classmethod - def fromguess(cls, name, parent_path, guess): + def fromguess(cls, name, guess): """Create an :class:`Episode` or a :class:`Movie` with the given `name` based on the `guess`. :param str name: name of the video. :param dict guess: guessed data. :raise: :class:`ValueError` if the `type` of the `guess` is invalid """ if guess['type'] == 'episode': - return Episode.fromguess(name, parent_path, guess) + return Episode.fromguess(name, guess) if guess['type'] == 'movie': return Movie.fromguess(name, guess) @@ -106,7 +111,7 @@ class Video(object): return hash(self.name) -class Episode(): +class Episode(Video): """Episode :class:`Video`. :param str series: series of the episode. :param int season: season number of the episode. @@ -117,14 +122,9 @@ class Episode(): :param int tvdb_id: TVDB id of the episode. :param \*\*kwargs: additional parameters for the :class:`Video` constructor. """ - def __init__(self, name, parent_path, series, season, episode, year=None, original_series=True, tvdb_id=None, - series_tvdb_id=None, series_imdb_id=None, release_group=None, video_codec=None, container=None, - format=None, screen_size=None, **kwargs): - super(Episode, self).__init__() - - self.name = name - - self.parent_path = parent_path + def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None, + series_tvdb_id=None, **kwargs): + super(Episode, self).__init__(name, **kwargs) #: Series of the episode self.series = series @@ -135,6 +135,9 @@ class Episode(): #: Episode number of the episode self.episode = episode + #: Title of the episode + self.title = title + #: Year of series self.year = year @@ -147,85 +150,87 @@ class Episode(): #: TVDB id of the series self.series_tvdb_id = series_tvdb_id - #: IMDb id of the series - self.series_imdb_id = series_imdb_id - - # The release group of the episode - self.release_group = release_group - - # The video vodec of the series - self.video_codec = video_codec - - # The Video container of the episode - self.container = container - - # The Video format of the episode - self.format = format - - # The Video screen_size of the episode - self.screen_size = screen_size - @classmethod - def fromguess(cls, name, parent_path, guess): + def fromguess(cls, name, guess): + logger.info('Guess: {}'.format(guess)) if guess['type'] != 'episode': raise ValueError('The guess must be an episode guess') - if 'title' not in guess or 'episode' not in guess: - raise ValueError('Insufficient data to process the guess') + if 'title' not in guess or 'season' not in guess or 'episode' not in guess: + raise InsufficientNameError('Guess failed to have sufficient data from query: {}'.format(name)) - return cls(name, parent_path, guess['title'], guess.get('season', 1), guess['episode'], - year=guess.get('year'), original_series='year' not in guess, release_group=guess.get('release_group'), - video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'), container=guess.get('container'), - format=guess.get('format'), screen_size=guess.get('screen_size')) + if any([isinstance(x, list) for x in [guess['title'], guess['season'], guess['episode']]]): + raise InsufficientNameError('Guess could not be parsed, list values found.') + + return cls(name, guess['title'], guess.get('season', 1), guess['episode'], title=guess.get('episode_title'), + year=guess.get('year'), format=guess.get('format'), original_series='year' not in guess, + release_group=guess.get('release_group'), resolution=guess.get('screen_size'), + video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec')) @classmethod def fromname(cls, name): - return cls.fromguess(name, guessit(name, {'type': 'episode'})) + return cls.fromguess(name, guessit(name, {'type': 'episode', 'single_value': True })) - def __hash__(self): - return hashlib.md5("b'{}'".format(str(self.series) + str(self.season) + str(self.episode)).encode()).hexdigest() + def wantedFilePath(self): + series = titlecase(self.series) + grandParent = '{}/{} Season {:02d}'.format(series, series, self.season) + parent = '{} S{:02d}E{:02d}'.format(series, self.season, self.episode) + return os.path.join(env.SHOWBASE, grandParent, parent, os.path.basename(self.name)) - # THE EP NUMBER IS CONVERTED TO STRING AS A QUICK FIX FOR MULTIPLE NUMBERS IN ONE def __repr__(self): - if self.year is None: - return '<%s [%r, %sx%s]>' % (self.__class__.__name__, self.series, self.season, str(self.episode)) + if self.subtitles is not None and len(self.subtitles) > 0: + return '<%s [%r, %dx%s] %s>' % (self.__class__.__name__, self.series, self.season, self.episode, self.subtitles) + return '<%s [%r, %dx%d]>' % (self.__class__.__name__, self.series, self.season, self.episode) - return '<%s [%r, %d, %sx%s]>' % (self.__class__.__name__, self.series, self.year, self.season, str(self.episode)) - - - -class Movie(): +class Movie(Video): """Movie :class:`Video`. :param str title: title of the movie. :param int year: year of the movie. :param \*\*kwargs: additional parameters for the :class:`Video` constructor. """ - def __init__(self, name, title, year=None, format=None, **kwargs): - super(Movie, self).__init__() + def __init__(self, name, title, year=None, **kwargs): + super(Movie, self).__init__(name, **kwargs) #: Title of the movie self.title = title #: Year of the movie self.year = year - self.format = format @classmethod def fromguess(cls, name, guess): if guess['type'] != 'movie': raise ValueError('The guess must be a movie guess') - if 'title' not in guess: - raise ValueError('Insufficient data to process the guess') + if 'title' not in guess or 'year' not in guess: + raise InsufficientNameError('Guess failed to have sufficient data from query: {}'.format(name)) return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'), resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'), year=guess.get('year')) @classmethod - def fromname(cls, name): + def fromname(cls, name, year): return cls.fromguess(name, guessit(name, {'type': 'movie'})) + def sufficientInfo(self): + t = hasattr(self, "title") + y = hasattr(self, "year") + + if False in [t, y] or None in [self.title, self.year]: + logger.error('{} or {} found to have none value, manual correction required'.format(self.title, self.year)) + return False + if list in [type(self.title), type(self.year)]: + logger.error('{} or {} found to have list value, manual correction required'.format(self.title, self.year)) + return False + + return True + + def wantedFilePath(self): + title = titlecase(self.title) + parent = '{} ({})'.format(title, self.year) + return os.path.join(env.MOVIEBASE, parent, os.path.basename(self.name)) + def __repr__(self): if self.year is None: return '<%s [%r]>' % (self.__class__.__name__, self.title) diff --git a/src/walk.py b/src/walk.py new file mode 100755 index 0000000..b922802 --- /dev/null +++ b/src/walk.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3.6 +# -*- coding: utf-8 -*- +# @Author: KevinMidboe +# @Date: 2017-10-02 16:29:25 +# @Last Modified by: KevinMidboe +# @Last Modified time: 2017-10-02 18:07:26 + +import itertools, os +import multiprocessing + +def worker(filename): + print(filename) + +def main(): + with multiprocessing.Pool(48) as Pool: # pool of 48 processes + + walk = os.walk("/Volumes/mainframe/shows/") + fn_gen = itertools.chain.from_iterable((os.path.join(root, file) + for file in files) + for root, dirs, files in walk) + + results_of_work = Pool.map(worker, fn_gen) # this does the parallel processing + +if __name__ == '__main__': + main() \ No newline at end of file