Snapshot before refactor.
This commit is contained in:
@@ -3,10 +3,10 @@
|
||||
# @Author: KevinMidboe
|
||||
# @Date: 2017-08-25 23:22:27
|
||||
# @Last Modified by: KevinMidboe
|
||||
# @Last Modified time: 2017-09-29 12:35:24
|
||||
# @Last Modified time: 2018-05-13 20:54:17
|
||||
|
||||
from guessit import guessit
|
||||
import os, errno
|
||||
import os, errno,sys
|
||||
import logging
|
||||
import tvdb_api
|
||||
from pprint import pprint
|
||||
@@ -19,6 +19,7 @@ from utils import sanitize
|
||||
|
||||
logging.basicConfig(filename=env.logfile, level=logging.INFO)
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
#: Supported archive extensions
|
||||
ARCHIVE_EXTENSIONS = ('.rar',)
|
||||
@@ -45,7 +46,7 @@ def scan_video(path):
|
||||
# guess
|
||||
parent_path = path.strip(filename)
|
||||
video = Video.fromguess(filename, parent_path, guessit(path))
|
||||
# video = Video('test')
|
||||
# video = Video(filename)
|
||||
# guessit(path)
|
||||
|
||||
return video
|
||||
@@ -86,6 +87,8 @@ def scan_files(path, age=None, archives=True):
|
||||
if not os.path.isdir(path):
|
||||
raise ValueError('Path is not a directory')
|
||||
|
||||
name_dict = {}
|
||||
|
||||
# walk the path
|
||||
mediafiles = []
|
||||
for dirpath, dirnames, filenames in os.walk(path):
|
||||
@@ -99,11 +102,9 @@ def scan_files(path, age=None, archives=True):
|
||||
|
||||
# scan for videos
|
||||
for filename in filenames:
|
||||
# filter on videos and archives
|
||||
if not (filename.endswith(VIDEO_EXTENSIONS) or filename.endswith(SUBTITLE_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
|
||||
continue
|
||||
|
||||
# skip hidden files
|
||||
if filename.startswith('.'):
|
||||
logging.debug('Skipping hidden filename %r in %r', filename, dirpath)
|
||||
continue
|
||||
@@ -116,16 +117,19 @@ def scan_files(path, age=None, archives=True):
|
||||
logging.debug('Skipping link %r in %r', filename, dirpath)
|
||||
continue
|
||||
|
||||
# skip old files
|
||||
if age and datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(filepath)) > age:
|
||||
logging.debug('Skipping old file %r in %r', filename, dirpath)
|
||||
continue
|
||||
|
||||
# scan
|
||||
if filename.endswith(VIDEO_EXTENSIONS): # video
|
||||
try:
|
||||
video = scan_video(filepath)
|
||||
# try:
|
||||
# name_dict[video.series] += 1
|
||||
# except KeyError:
|
||||
# name_dict[video.series] = 0
|
||||
# except:
|
||||
# print('video did not have attrib series')
|
||||
# pass
|
||||
mediafiles.append(video)
|
||||
|
||||
except ValueError: # pragma: no cover
|
||||
logging.exception('Error scanning video')
|
||||
continue
|
||||
@@ -138,24 +142,26 @@ def scan_files(path, age=None, archives=True):
|
||||
# except (NotRarFile, RarCannotExec, ValueError): # pragma: no cover
|
||||
# logging.exception('Error scanning archive')
|
||||
# continue
|
||||
elif filename.endswith(SUBTITLE_EXTENSIONS): # subtitle
|
||||
try:
|
||||
subtitle = scan_subtitle(filepath)
|
||||
mediafiles.append(subtitle)
|
||||
except ValueError:
|
||||
logging.exception('Error scanning subtitle')
|
||||
continue
|
||||
# elif filename.endswith(SUBTITLE_EXTENSIONS): # subtitle
|
||||
# try:
|
||||
# subtitle = scan_subtitle(filepath)
|
||||
# mediafiles.append(subtitle)
|
||||
# except ValueError:
|
||||
# logging.exception('Error scanning subtitle')
|
||||
# continue
|
||||
else: # pragma: no cover
|
||||
raise ValueError('Unsupported file %r' % filename)
|
||||
print('Skipping unsupported file {}'.format(filename))
|
||||
# raise ValueError('Unsupported file %r' % filename)
|
||||
|
||||
|
||||
pprint(name_dict)
|
||||
return mediafiles
|
||||
|
||||
|
||||
def organize_files(path):
|
||||
hashList = {}
|
||||
mediafiles = scan_files(path)
|
||||
# print(mediafiles)
|
||||
print(mediafiles)
|
||||
|
||||
for file in mediafiles:
|
||||
hashList.setdefault(file.__hash__(),[]).append(file)
|
||||
@@ -251,10 +257,15 @@ def save_subtitles(files, single=False, directory=None, encoding=None):
|
||||
|
||||
# return saved_subtitles
|
||||
|
||||
def stringTime():
|
||||
return str(datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f"))
|
||||
|
||||
|
||||
def main():
|
||||
# episodePath = '/Volumes/media/tv/Black Mirror/Black Mirror Season 01/'
|
||||
episodePath = '/media/hdd1/tv/'
|
||||
episodePath = '/Volumes/mainframe/shows/Black Mirror/Black Mirror Season 01/'
|
||||
episodePath = '/Volumes/mainframe/shows/The.Voice.S14E24.720p.WEB.x264-TBS[rarbg]'
|
||||
episodePath = '/Volumes/mainframe/incomplete'
|
||||
|
||||
t = tvdb_api.Tvdb()
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# @Author: KevinMidboe
|
||||
# @Date: 2017-10-12 11:55:03
|
||||
# @Last Modified by: KevinMidboe
|
||||
# @Last Modified time: 2017-10-17 00:58:24
|
||||
# @Last Modified time: 2017-11-01 16:11:30
|
||||
|
||||
import sys, logging, re
|
||||
from urllib import parse, request
|
||||
@@ -28,39 +28,6 @@ RELEASE_TYPES = ('bdremux', 'brremux', 'remux',
|
||||
'camrip', 'cam')
|
||||
|
||||
|
||||
def sanitize(string, ignore_characters=None, replace_characters=None):
|
||||
"""Sanitize a string to strip special characters.
|
||||
|
||||
:param str string: the string to sanitize.
|
||||
:param set ignore_characters: characters to ignore.
|
||||
:return: the sanitized string.
|
||||
:rtype: str
|
||||
|
||||
"""
|
||||
# only deal with strings
|
||||
if string is None:
|
||||
return
|
||||
|
||||
replace_characters = replace_characters or ''
|
||||
|
||||
ignore_characters = ignore_characters or set()
|
||||
|
||||
characters = ignore_characters
|
||||
if characters:
|
||||
string = re.sub(r'[%s]' % re.escape(''.join(characters)), replace_characters, string)
|
||||
|
||||
return string
|
||||
|
||||
def return_re_match(string, re_statement):
|
||||
if string is None:
|
||||
return
|
||||
|
||||
m = re.search(re_statement, string)
|
||||
if 'Y-day' in m.group():
|
||||
return datetime.datetime.now().strftime('%m-%d %Y')
|
||||
return sanitize(m.group(), '\xa0', ' ')
|
||||
|
||||
|
||||
# Should maybe not be able to set values without checking if they are valid?
|
||||
class piratebay(object):
|
||||
def __init__(self, query=None, page=0, sort=None, category=None):
|
||||
@@ -157,7 +124,7 @@ class piratebay(object):
|
||||
print(self.page)
|
||||
|
||||
# Fetch in parallel
|
||||
n = self.total_pages
|
||||
n = pagesToCount(multiple_pages, self.total_pages)
|
||||
while n > 1:
|
||||
torrents_found.extend(self.next_page())
|
||||
n -= 1
|
||||
@@ -276,7 +243,7 @@ def chooseCandidate(torrent_list):
|
||||
|
||||
size, _, size_id = torrent.size.partition(' ')
|
||||
if intersecting_release_types and int(torrent.seed_count) > 0 and float(size) > 4 and size_id == 'GiB':
|
||||
print('{} : {} : {}'.format(torrent.name, torrent.size, torrent.seed_count))
|
||||
print('{} : {} : {} {}'.format(torrent.name, torrent.size, torrent.seed_count, torrent.magnet))
|
||||
interesting_torrents.append(torrent)
|
||||
# else:
|
||||
# print('Denied match! %s : %s : %s' % (torrent.name, torrent.size, torrent.seed_count))
|
||||
@@ -286,10 +253,11 @@ def chooseCandidate(torrent_list):
|
||||
|
||||
def searchTorrentSite(query, site='piratebay'):
|
||||
pirate = piratebay()
|
||||
torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size')
|
||||
# pprint(torrents_found)
|
||||
torrents_found = pirate.search(query, page=0, multiple_pages=5, sort='size')
|
||||
pprint(torrents_found)
|
||||
candidates = chooseCandidate(torrents_found)
|
||||
|
||||
pprint(candidates)
|
||||
exit(0)
|
||||
torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size', category='movies')
|
||||
movie_candidates = chooseCandidate(torrents_found)
|
||||
|
||||
@@ -308,4 +276,4 @@ def main():
|
||||
searchTorrentSite(query)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
||||
175
seasonedParser/scandir.py
Executable file
175
seasonedParser/scandir.py
Executable file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python3.6
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author: KevinMidboe
|
||||
# @Date: 2017-10-02 16:29:25
|
||||
# @Last Modified by: KevinMidboe
|
||||
# @Last Modified time: 2018-01-15 17:18:36
|
||||
|
||||
try:
|
||||
from os import scandir
|
||||
except ImportError:
|
||||
from scandir import scandir # use scandir PyPI module on Python < 3.5
|
||||
|
||||
import env_variables as env
|
||||
import multiprocessing as mp
|
||||
import logging, re, datetime
|
||||
from guessit import guessit
|
||||
|
||||
from video import VIDEO_EXTENSIONS, Episode, Movie, Video
|
||||
from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path
|
||||
|
||||
logging.basicConfig(filename=env.logfile, level=logging.INFO)
|
||||
|
||||
""" Move to utils file """
|
||||
def removeLeadingZero(number):
|
||||
stringedNumber = str(number)
|
||||
if (len(stringedNumber) > 1 and stringedNumber[0] == '0'):
|
||||
return int(stringedNumber[1:])
|
||||
return int(number)
|
||||
|
||||
class movie(object):
|
||||
def __init__(self, path, title=None, year=None):
|
||||
self.path = path
|
||||
self.title = title
|
||||
self.year = year
|
||||
|
||||
class Episode(object):
|
||||
def __init__(self, path, name, title=None, season=None, episode=None):
|
||||
super(Episode, self).__init__()
|
||||
self.path = path
|
||||
self.name = name
|
||||
self.title = title
|
||||
self.season = season
|
||||
self.episode = episode
|
||||
|
||||
@classmethod
|
||||
def fromname(cls, path, name):
|
||||
title = cls.findTitle(cls, name)
|
||||
season = cls.findSeasonNumber(cls, name)
|
||||
episode = cls.findEpisodeNumber(cls, name)
|
||||
|
||||
return cls(path, name, title, season, episode)
|
||||
|
||||
def findTitle(self, name):
|
||||
m = re.search("([a-zA-Z0-9\'\.\-\ ])+([sS][0-9]{1,3})", name)
|
||||
if m:
|
||||
return re.sub('[\ \.]*[sS][0-9]{1,2}', '', m.group(0))
|
||||
|
||||
def findSeasonNumber(self, name):
|
||||
m = re.search('[sS][0-9]{1,2}', name)
|
||||
if m:
|
||||
seasonNumber = re.sub('[sS]', '', m.group(0))
|
||||
return removeLeadingZero(seasonNumber)
|
||||
|
||||
def findEpisodeNumber(self, name):
|
||||
m = re.search('[eE][0-9]{1,3}', name)
|
||||
if m:
|
||||
episodeNumber = re.sub('[eE]', '', m.group(0))
|
||||
return removeLeadingZero(episodeNumber)
|
||||
|
||||
def get_tree_size(path):
|
||||
"""Return total size of files in given path and subdirs."""
|
||||
total = 0
|
||||
for entry in scandir(path):
|
||||
if not ('.DS_Store' in entry.path or 'lost+found' in entry.path):
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
total += get_tree_size(entry.path)
|
||||
else:
|
||||
total += entry.stat(follow_symlinks=False).st_size
|
||||
return int(total)
|
||||
|
||||
def scantree(path):
|
||||
"""Recursively yield DirEntry objects for given directory."""
|
||||
for entry in scandir(path):
|
||||
# Skip .DS_Store and lost+found
|
||||
# TODO have a blacklist here
|
||||
if not ('.DS_Store' in entry.path or 'lost+found' in entry.path):
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
yield from scantree(entry.path)
|
||||
else:
|
||||
yield entry
|
||||
|
||||
# Find all the mediaobjects for a given path
|
||||
# TODO handle list of path's
|
||||
def get_objects_for_path(path, archives=None, match=False):
|
||||
# Declare list to save the media objects found in the given path
|
||||
hashList = {}
|
||||
mediaFiles = []
|
||||
# All entries given from scantree functoin
|
||||
for entry in scantree(path):
|
||||
logging.debug('Looking at file %s', str(entry.name))
|
||||
name = entry.name # Pull out name for faster index
|
||||
|
||||
# Skip if not corrent media extension
|
||||
if not (name.endswith(VIDEO_EXTENSIONS) or name.endswith(SUBTITLE_EXTENSIONS) or archives and name.endswith(ARCHIVE_EXTENSIONS)):
|
||||
continue
|
||||
|
||||
# Skip if the file is a dotfile
|
||||
if name.startswith('.'):
|
||||
logging.debug('Skipping hidden file %s' % str(name))
|
||||
continue
|
||||
|
||||
# If we have a video, create a class and append to mediaFiles
|
||||
if name.endswith(VIDEO_EXTENSIONS): # video
|
||||
episode = Episode.fromname(entry.path, entry.name)
|
||||
if (episode.title is None):
|
||||
logging.debug('None found for %s' % name)
|
||||
continue
|
||||
|
||||
title = re.sub('[\.]', ' ', episode.title)
|
||||
mediaFiles.append(episode)
|
||||
|
||||
return mediaFiles
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.info('Started: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
|
||||
import sys
|
||||
from pprint import pprint
|
||||
total = 0
|
||||
missed = 0
|
||||
|
||||
# print(get_tree_size(sys.argv[1] if len(sys.argv) > 1 else '.'))
|
||||
# print(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
|
||||
path = sys.argv[1] if len(sys.argv) > 1 else '.'
|
||||
mediaFiles = get_objects_for_path(path)
|
||||
getTitle = lambda ep: ep.title
|
||||
|
||||
for ep in mediaFiles:
|
||||
print(getTitle(ep))
|
||||
|
||||
|
||||
mediaList = []
|
||||
for entry in scantree(sys.argv[1] if len(sys.argv) > 1 else '.'):
|
||||
name = entry.name
|
||||
manual = Episode.fromname(entry.path, entry.name)
|
||||
size = int(entry.stat(follow_symlinks=False).st_size) / 1024 / 1024 / 1024
|
||||
# print(name + ' : ' + str(round(size, 2)) + 'GB')
|
||||
|
||||
title = manual.title
|
||||
if title is None:
|
||||
logging.debug('None found for %s' % (name))
|
||||
continue
|
||||
|
||||
title = re.sub('[\.]', ' ', manual.title)
|
||||
|
||||
# try:
|
||||
# print(name + ' : ' + "%s S%iE%i" % (str(title), manual.season, manual.episode))
|
||||
# except TypeError:
|
||||
# logging.error('Unexpected error: ' + name)
|
||||
|
||||
mediaList.append(manual)
|
||||
if ('-m' in sys.argv):
|
||||
guess = guessit(name)
|
||||
|
||||
logging.info('Manual is: {} and guess is {}'.format(title, guess['title']))
|
||||
# # if not (guess['season'] == manual.season and guess['episode'] == manual.episode):
|
||||
if (guess['title'].lower() != title.lower()):
|
||||
logging.info('Missmatch: %s by manual guess: %s : %s' % (name, guess['title'], title))
|
||||
missed += 1
|
||||
|
||||
total += 1
|
||||
|
||||
|
||||
print('Total: %i, missed was: %i' % (total, missed))
|
||||
logging.info('Ended: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
|
||||
logging.info(' - - - - - - - - - ')
|
||||
@@ -3,7 +3,7 @@
|
||||
# @Author: KevinMidboe
|
||||
# @Date: 2017-08-26 08:23:18
|
||||
# @Last Modified by: KevinMidboe
|
||||
# @Last Modified time: 2017-09-29 13:56:21
|
||||
# @Last Modified time: 2018-05-13 20:50:00
|
||||
|
||||
from guessit import guessit
|
||||
import os
|
||||
@@ -12,7 +12,7 @@ import hashlib, tvdb_api
|
||||
#: Video extensions
|
||||
VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
|
||||
'.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
|
||||
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
|
||||
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2v', '.m4e',
|
||||
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
|
||||
'.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf',
|
||||
'.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
|
||||
|
||||
25
seasonedParser/walk.py
Executable file
25
seasonedParser/walk.py
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python3.6
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author: KevinMidboe
|
||||
# @Date: 2017-10-02 16:29:25
|
||||
# @Last Modified by: KevinMidboe
|
||||
# @Last Modified time: 2017-10-02 18:07:26
|
||||
|
||||
import itertools, os
|
||||
import multiprocessing
|
||||
|
||||
def worker(filename):
|
||||
print(filename)
|
||||
|
||||
def main():
|
||||
with multiprocessing.Pool(48) as Pool: # pool of 48 processes
|
||||
|
||||
walk = os.walk("/Volumes/mainframe/shows/")
|
||||
fn_gen = itertools.chain.from_iterable((os.path.join(root, file)
|
||||
for file in files)
|
||||
for root, dirs, files in walk)
|
||||
|
||||
results_of_work = Pool.map(worker, fn_gen) # this does the parallel processing
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user