Renamed seasonMover to seasonedParser

This commit is contained in:
2017-09-09 16:14:31 +02:00
parent cf7fa70a97
commit e90d9f8073
6 changed files with 0 additions and 0 deletions

263
seasonedParser/core.py Executable file
View File

@@ -0,0 +1,263 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-08-25 23:22:27
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-09-09 14:57:54
from guessit import guessit
import os, errno
import logging
import tvdb_api
from video import VIDEO_EXTENSIONS, Episode, Movie, Video
from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path
logger = logging.getLogger(__name__)
#: Supported archive extensions
ARCHIVE_EXTENSIONS = ('.rar',)
@profile
def scan_video(path):
"""Scan a video from a `path`.
:param str path: existing path to the video.
:return: the scanned video.
:rtype: :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check video extension
# if not path.endswith(VIDEO_EXTENSIONS):
# raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
dirpath, filename = os.path.split(path)
logger.info('Scanning video %r in %r', filename, dirpath)
# guess
parent_path = path.strip(filename)
# video = Video.fromguess(filename, parent_path, guessit(path))
video = Video('test')
# guessit(path)
return video
def scan_subtitle(path):
if not os.path.exists(path):
raise ValueError('Path does not exist')
dirpath, filename = os.path.split(path)
logger.info('Scanning video %r in %r', filename, dirpath)
# guess
parent_path = path.strip(filename)
subtitle = Subtitle.fromguess(filename, parent_path, guessit(path))
return subtitle
@profile
def scan_files(path, age=None, archives=True):
"""Scan `path` for videos and their subtitles.
See :func:`refine` to find additional information for the video.
:param str path: existing directory path to scan.
:param datetime.timedelta age: maximum age of the video or archive.
:param bool archives: scan videos in archives.
:return: the scanned videos.
:rtype: list of :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check for non-directory path
if not os.path.isdir(path):
raise ValueError('Path is not a directory')
# walk the path
mediafiles = []
for dirpath, dirnames, filenames in os.walk(path):
logger.debug('Walking directory %r', dirpath)
# remove badly encoded and hidden dirnames
for dirname in list(dirnames):
if dirname.startswith('.'):
logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# scan for videos
for filename in filenames:
# filter on videos and archives
if not (filename.endswith(VIDEO_EXTENSIONS) or filename.endswith(SUBTITLE_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
continue
# skip hidden files
if filename.startswith('.'):
logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
continue
# reconstruct the file path
filepath = os.path.join(dirpath, filename)
# skip links
if os.path.islink(filepath):
logger.debug('Skipping link %r in %r', filename, dirpath)
continue
# skip old files
if age and datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(filepath)) > age:
logger.debug('Skipping old file %r in %r', filename, dirpath)
continue
# scan
if filename.endswith(VIDEO_EXTENSIONS): # video
try:
video = scan_video(filepath)
mediafiles.append(video)
except ValueError: # pragma: no cover
logger.exception('Error scanning video')
continue
elif archives and filename.endswith(ARCHIVE_EXTENSIONS): # archive
try:
video = scan_archive(filepath)
mediafiles.append(video)
except (NotRarFile, RarCannotExec, ValueError): # pragma: no cover
logger.exception('Error scanning archive')
continue
elif filename.endswith(SUBTITLE_EXTENSIONS): # subtitle
try:
subtitle = scan_subtitle(filepath)
mediafiles.append(subtitle)
except ValueError:
logger.exception('Error scanning subtitle')
continue
else: # pragma: no cover
raise ValueError('Unsupported file %r' % filename)
return mediafiles
@profile
def organize_files(path):
hashList = {}
mediafiles = scan_files(path)
print(mediafiles)
for file in mediafiles:
hashList.setdefault(file.__hash__(),[]).append(file)
# hashList[file.__hash__()] = file
return hashList
def save_subtitles(files, single=False, directory=None, encoding=None):
t = tvdb_api.Tvdb()
if not isinstance(files, list):
files = [files]
for file in files:
# TODO this should not be done in the loop
dirname = "%s S%sE%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode))
createParentfolder = not dirname in file.parent_path
if createParentfolder:
dirname = os.path.join(file.parent_path, dirname)
print('Created: %s' % dirname)
try:
os.makedirs(dirname)
except OSError as e:
if e.errno != errno.EEXIST:
raise
# TODO Clean this !
try:
tvdb_episode = t[file.series][file.season][file.episode]
episode_title = tvdb_episode['episodename']
except:
episode_title = ''
old = os.path.join(file.parent_path, file.name)
if file.name.endswith(SUBTITLE_EXTENSIONS):
lang = file.getLanguage()
sdh = '.sdh' if file.sdh else ''
filename = "%s S%sE%s %s%s.%s.%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode), episode_title, sdh, lang, file.container)
else:
filename = "%s S%sE%s %s.%s" % (file.series, "%02d" % (file.season), "%02d" % (file.episode), episode_title, file.container)
if createParentfolder:
newname = os.path.join(dirname, filename)
else:
newname = os.path.join(file.parent_path, filename)
print('Moved: %s ---> %s' % (old, newname))
os.rename(old, newname)
print()
# for hash in files:
# hashIndex = [files[hash]]
# for hashItems in hashIndex:
# for file in hashItems:
# print(file.series)
# saved_subtitles = []
# for subtitle in files:
# # check content
# if subtitle.name is None:
# logger.error('Skipping subtitle %r: no content', subtitle)
# continue
# # check language
# if subtitle.language in set(s.language for s in saved_subtitles):
# logger.debug('Skipping subtitle %r: language already saved', subtitle)
# continue
# # create subtitle path
# subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language)
# if directory is not None:
# subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1])
# # save content as is or in the specified encoding
# logger.info('Saving %r to %r', subtitle, subtitle_path)
# if encoding is None:
# with io.open(subtitle_path, 'wb') as f:
# f.write(subtitle.content)
# else:
# with io.open(subtitle_path, 'w', encoding=encoding) as f:
# f.write(subtitle.text)
# saved_subtitles.append(subtitle)
# # check single
# if single:
# break
# return saved_subtitles
def main():
episodePath = '/Volumes/media/tv/Black Mirror/Black Mirror Season 01/'
t = tvdb_api.Tvdb()
hashList = organize_files(episodePath)
pprint(hashList)
if __name__ == '__main__':
main()

29
seasonedParser/seasonGuesser.py Executable file
View File

@@ -0,0 +1,29 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-08-25 23:22:27
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-09-09 14:59:48
from core import organize_files, save_subtitles
from pprint import pprint
import os
def main():
# path = '/Volumes/media/tv/Black Mirror/Black Mirror Season 01/'
path = '/Volumes/media-1/tv/Community.720p.1080p.WEB-DL.DD5.1.H.264/S04/'
# path = '/Volumes/media/tv/Fargo/Fargo Season 03/'
# path = '/Volumes/media/tv/Rick and Morty/Rick and Morty Season 03/'
# path = '/Volumes/media/movies/Interstellar.2014.1080p.BluRay.REMUX.AVC.DTS-HD.MA.5.1'
os.listdir(path)
files = organize_files(path)
for hashkey in files:
# save_subtitles(files[hashkey], directory=path)
print(files[hashkey], path)
if __name__ == '__main__':
main()

87
seasonedParser/seasonMover.py Executable file
View File

@@ -0,0 +1,87 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-07-11 19:16:23
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-08-13 12:12:05
import fire, re, os
from pprint import pprint
class seasonMover(object):
''' Moving multiple files to multiple folders with
identifer '''
workingDir = os.getcwd()
mediaExt = ['mkv', 'mp4']
subExt = ['srt']
def create(self, name, interval):
pass
def move(self, fileSyntax, folderName):
episodeRange = self.findInterval(fileSyntax)
episodeList = self.getFiles()
self.motherMover(fileSyntax, folderName, episodeRange)
def getFiles(self):
epDir = os.path.join(self.workingDir)
dirContent = os.listDir(epDir)
fileList = sorted(dirContent)
return fileList
def getEpisodeNumber(self):
m = re.search('[eE][0-9]{1,2}', self.parent)
if m:
return re.sub('[eE]', '', m.group(0))
# def findInterval(self, item):
# if (re.search(r'\((.*)\)', item) is None):
# raise ValueError('Need to declare an identifier e.g. (1..3) in: \n\t' + item)
# start = int(re.search('\((\d+)\.\.', item).group(1))
# end = int(re.search('\.\.(\d+)\)', item).group(1))
# return list(range(start, end+1))
def removeUploadSign(self, file):
match = re.search('-[a-zA-Z\[\]\-]*.[a-z]{3}', file)
if match:
uploader = match.group(0)[:-4]
return re.sub(uploader, '', file)
return file
def motherMover(self, fileSyntax, folderName, episodeRange):
# Call for sub of fileList
# TODO check if range is same as folderContent
# print(fileSyntax, folderName, episodeRange)
mediaFiles = []
subtitleFiles = []
seasonDirectory = sorted(os.listdir(os.path.join(self.workingDir)))
for file in seasonDirectory:
if file[-3:] in self.mediaExt:
mediaFiles.append(file)
elif file[-3:] in self.subExt:
subtitleFiles.append(file)
if (len(mediaFiles) is not len(episodeRange)):
raise ValueError('Range defined does not match directory file content')
for epMedia, epNum in zip(mediaFiles, episodeRange):
leadingZeroNumber = "%02d" % epNum
fileName = re.sub(r'\((.*)\)', leadingZeroNumber, fileSyntax)
oldPath = os.path.join(self.workingDir, epMedia)
newFolder = os.path.join(self.workingDir, folderName + leadingZeroNumber)
newPath = os.path.join(newFolder, self.removeUploadSign(fileName))
# os.makedirs(newFolder)
# os.rename(oldPath, newPath)
print(oldPath + ' --> ' + newPath)
if __name__ == '__main__':
fire.Fire(seasonMover)

112
seasonedParser/subtitle.py Normal file
View File

@@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
import codecs
import logging
import os
import chardet
import pysrt
import hashlib
from video import Episode, Movie
from utils import sanitize
from langdetect import detect
logger = logging.getLogger(__name__)
#: Subtitle extensions
SUBTITLE_EXTENSIONS = ('.srt', '.sub', '.smi', '.txt', '.ssa', '.ass', '.mpl')
class Subtitle(object):
"""Base class for subtitle.
:param language: language of the subtitle.
:type language: :class:`~babelfish.language.Language`
:param bool hearing_impaired: whether or not the subtitle is hearing impaired.
:param page_link: URL of the web page from which the subtitle can be downloaded.
:type page_link: str
:param encoding: Text encoding of the subtitle.
:type encoding: str
"""
#: Name of the provider that returns that class of subtitle
provider_name = ''
def __init__(self, name, parent_path, series, season, episode, language=None, hash=None, container=None, format=None, sdh=False):
#: Language of the subtitle
self.name = name
self.parent_path = parent_path
self.series = series
self.season = season
self.episode = episode
self.language=language
self.hash = hash
self.container = container
self.format = format
self.sdh = sdh
@classmethod
def fromguess(cls, name, parent_path, guess):
if not guess['type'] == 'movie' or guess['type'] == 'episode':
raise ValueError('The guess must be an episode guess')
if 'title' not in guess:
raise ValueError('Insufficient data to process the guess')
sdh = 'sdh' in name.lower()
if guess['type'] is 'episode':
return cls(name, parent_path, guess.get('title', 1), guess.get('season'), guess['episode'],
container=guess.get('container'), format=guess.get('format'), sdh=sdh)
elif guess['type'] is 'movie':
return cls(name, parent_path, guess.get('title', 1), container=guess.get('container'),
format=guess.get('format'), sdh=sdh)
def getLanguage(self):
f = open(os.path.join(self.parent_path, self.name), 'r', encoding='ISO-8859-15')
language = detect(f.read())
f.close()
return language
def __hash__(self):
return hashlib.md5("b'{}'".format(self.series + str(self.season) + str(self.episode)).encode()).hexdigest()
def __repr__(self):
return '<%s %s [%ix%i]>' % (self.__class__.__name__, self.series, self.season, self.episode)
def get_subtitle_path(subtitles_path, language=None, extension='.srt'):
"""Get the subtitle path using the `subtitles_path` and `language`.
:param str subtitles_path: path to the subtitle.
:param language: language of the subtitle to put in the path.
:type language: :class:`~babelfish.language.Language`
:param str extension: extension of the subtitle.
:return: path of the subtitle.
:rtype: str
"""
subtitle_root = os.path.splitext(subtitles_path)[0]
if language:
subtitle_root += '.' + str(language)
return subtitle_root + extension

38
seasonedParser/utils.py Normal file
View File

@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
from datetime import datetime
import hashlib
import os
import re
import struct
def sanitize(string, ignore_characters=None):
"""Sanitize a string to strip special characters.
:param str string: the string to sanitize.
:param set ignore_characters: characters to ignore.
:return: the sanitized string.
:rtype: str
"""
# only deal with strings
if string is None:
return
ignore_characters = ignore_characters or set()
# replace some characters with one space
characters = {'-', ':', '(', ')', '.'} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
# remove some characters
characters = {'\''} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), '', string)
# replace multiple spaces with one
string = re.sub(r'\s+', ' ', string)
# strip and lower case
return string.strip().lower()

232
seasonedParser/video.py Normal file
View File

@@ -0,0 +1,232 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-08-26 08:23:18
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-09-09 14:46:13
from guessit import guessit
import os
import hashlib, tvdb_api
#: Video extensions
VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
'.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
'.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf',
'.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
'.vob', '.vro', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
class Video(object):
"""Base class for videos.
Represent a video, existing or not.
:param str name: name or path of the video.
:param str format: format of the video (HDTV, WEB-DL, BluRay, ...).
:param str release_group: release group of the video.
:param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i).
:param str video_codec: codec of the video stream.
:param str audio_codec: codec of the main audio stream.
:param str imdb_id: IMDb id of the video.
:param dict hashes: hashes of the video file by provider names.
:param int size: size of the video file in bytes.
:param set subtitle_languages: existing subtitle languages.
"""
def __init__(self, name, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
imdb_id=None, hashes=None, size=None, subtitle_languages=None):
#: Name or path of the video
self.name = name
#: Format of the video (HDTV, WEB-DL, BluRay, ...)
self.format = format
#: Release group of the video
self.release_group = release_group
#: Resolution of the video stream (480p, 720p, 1080p or 1080i)
self.resolution = resolution
#: Codec of the video stream
self.video_codec = video_codec
#: Codec of the main audio stream
self.audio_codec = audio_codec
#: IMDb id of the video
self.imdb_id = imdb_id
#: Hashes of the video file by provider names
self.hashes = hashes or {}
#: Size of the video file in bytes
self.size = size
#: Existing subtitle languages
self.subtitle_languages = subtitle_languages or set()
@property
def exists(self):
"""Test whether the video exists"""
return os.path.exists(self.name)
@property
def age(self):
"""Age of the video"""
if self.exists:
return datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(self.name))
return timedelta()
@classmethod
def fromguess(cls, name, parent_path, guess):
"""Create an :class:`Episode` or a :class:`Movie` with the given `name` based on the `guess`.
:param str name: name of the video.
:param dict guess: guessed data.
:raise: :class:`ValueError` if the `type` of the `guess` is invalid
"""
if guess['type'] == 'episode':
return Episode.fromguess(name, parent_path, guess)
if guess['type'] == 'movie':
return Movie.fromguess(name, guess)
raise ValueError('The guess must be an episode or a movie guess')
@classmethod
def fromname(cls, name):
"""Shortcut for :meth:`fromguess` with a `guess` guessed from the `name`.
:param str name: name of the video.
"""
return cls.fromguess(name, guessit(name))
def __repr__(self):
return '<%s [%r]>' % (self.__class__.__name__, self.name)
def __hash__(self):
return hash(self.name)
class Episode():
"""Episode :class:`Video`.
:param str series: series of the episode.
:param int season: season number of the episode.
:param int episode: episode number of the episode.
:param str title: title of the episode.
:param int year: year of the series.
:param bool original_series: whether the series is the first with this name.
:param int tvdb_id: TVDB id of the episode.
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, parent_path, series, season, episode, year=None, original_series=True, tvdb_id=None,
series_tvdb_id=None, series_imdb_id=None, release_group=None, video_codec=None, container=None,
format=None, screen_size=None, **kwargs):
super(Episode, self).__init__()
self.name = name
self.parent_path = parent_path
#: Series of the episode
self.series = series
#: Season number of the episode
self.season = season
#: Episode number of the episode
self.episode = episode
#: Year of series
self.year = year
#: The series is the first with this name
self.original_series = original_series
#: TVDB id of the episode
self.tvdb_id = tvdb_id
#: TVDB id of the series
self.series_tvdb_id = series_tvdb_id
#: IMDb id of the series
self.series_imdb_id = series_imdb_id
# The release group of the episode
self.release_group = release_group
# The video vodec of the series
self.video_codec = video_codec
# The Video container of the episode
self.container = container
# The Video format of the episode
self.format = format
# The Video screen_size of the episode
self.screen_size = screen_size
@classmethod
def fromguess(cls, name, parent_path, guess):
if guess['type'] != 'episode':
raise ValueError('The guess must be an episode guess')
if 'title' not in guess or 'episode' not in guess:
raise ValueError('Insufficient data to process the guess')
return cls(name, parent_path, guess['title'], guess.get('season', 1), guess['episode'],
year=guess.get('year'), original_series='year' not in guess, release_group=guess.get('release_group'),
video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'), container=guess.get('container'),
format=guess.get('format'), screen_size=guess.get('screen_size'))
@classmethod
def fromname(cls, name):
return cls.fromguess(name, guessit(name, {'type': 'episode'}))
def __hash__(self):
return hashlib.md5("b'{}'".format(self.series + str(self.season) + str(self.episode)).encode()).hexdigest()
def __repr__(self):
if self.year is None:
return '<%s [%r, %dx%d]>' % (self.__class__.__name__, self.series, self.season, self.episode)
return '<%s [%r, %d, %dx%d]>' % (self.__class__.__name__, self.series, self.year, self.season, self.episode)
class Movie():
"""Movie :class:`Video`.
:param str title: title of the movie.
:param int year: year of the movie.
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, title, year=None, format=None, **kwargs):
super(Movie, self).__init__()
#: Title of the movie
self.title = title
#: Year of the movie
self.year = year
self.format = format
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'movie':
raise ValueError('The guess must be a movie guess')
if 'title' not in guess:
raise ValueError('Insufficient data to process the guess')
return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
audio_codec=guess.get('audio_codec'), year=guess.get('year'))
@classmethod
def fromname(cls, name):
return cls.fromguess(name, guessit(name, {'type': 'movie'}))
def __repr__(self):
if self.year is None:
return '<%s [%r]>' % (self.__class__.__name__, self.title)
return '<%s [%r, %d]>' % (self.__class__.__name__, self.title, self.year)