Snapshot before refactor.

2018-09-14 18:51:40 +02:00
parent 9c82ece666
commit bfd8a2a1f5
5 changed files with 241 additions and 62 deletions
--- a/seasonedParser/core.py
+++ b/seasonedParser/core.py
@@ -3,10 +3,10 @@
 # @Author: KevinMidboe
 # @Date:   2017-08-25 23:22:27
 # @Last Modified by:   KevinMidboe
-# @Last Modified time: 2017-09-29 12:35:24
+# @Last Modified time: 2018-05-13 20:54:17

 from guessit import guessit
-import os, errno
+import os, errno,sys
 import logging
 import tvdb_api
 from pprint import pprint
@@ -19,6 +19,7 @@ from utils import sanitize

 logging.basicConfig(filename=env.logfile, level=logging.INFO)

+from datetime import datetime

 #: Supported archive extensions
 ARCHIVE_EXTENSIONS = ('.rar',)
@@ -45,7 +46,7 @@ def scan_video(path):
    # guess
    parent_path = path.strip(filename)
    video = Video.fromguess(filename, parent_path, guessit(path))
-    # video = Video('test')
+    # video = Video(filename)
    # guessit(path)

    return video
@@ -86,6 +87,8 @@ def scan_files(path, age=None, archives=True):
    if not os.path.isdir(path):
        raise ValueError('Path is not a directory')

+    name_dict = {}
+
    # walk the path
    mediafiles = []
    for dirpath, dirnames, filenames in os.walk(path):
@@ -99,11 +102,9 @@ def scan_files(path, age=None, archives=True):

        # scan for videos
        for filename in filenames:
-            # filter on videos and archives
            if not (filename.endswith(VIDEO_EXTENSIONS) or filename.endswith(SUBTITLE_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
                continue

-            # skip hidden files
            if filename.startswith('.'):
                logging.debug('Skipping hidden filename %r in %r', filename, dirpath)
                continue
@@ -116,16 +117,19 @@ def scan_files(path, age=None, archives=True):
                logging.debug('Skipping link %r in %r', filename, dirpath)
                continue

-            # skip old files
-            if age and datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(filepath)) > age:
-                logging.debug('Skipping old file %r in %r', filename, dirpath)
-                continue
-
            # scan
            if filename.endswith(VIDEO_EXTENSIONS):  # video
                try:
                    video = scan_video(filepath)
+                    # try:
+                    #     name_dict[video.series] += 1
+                    # except KeyError:
+                    #     name_dict[video.series] = 0
+                    # except:
+                    #     print('video did not have attrib series')
+                    #     pass
                    mediafiles.append(video)
+
                except ValueError:  # pragma: no cover
                    logging.exception('Error scanning video')
                    continue
@@ -138,24 +142,26 @@ def scan_files(path, age=None, archives=True):
            #    except (NotRarFile, RarCannotExec, ValueError):  # pragma: no cover
            #        logging.exception('Error scanning archive')
            #        continue
-            elif filename.endswith(SUBTITLE_EXTENSIONS): # subtitle
-               try:
-                  subtitle = scan_subtitle(filepath)
-                  mediafiles.append(subtitle)
-               except ValueError: 
-                  logging.exception('Error scanning subtitle')
-                  continue
+            # elif filename.endswith(SUBTITLE_EXTENSIONS): # subtitle
+            #     try:
+            #         subtitle = scan_subtitle(filepath)
+            #         mediafiles.append(subtitle)
+            #     except ValueError: 
+            #         logging.exception('Error scanning subtitle')
+            #         continue
            else:  # pragma: no cover
-                raise ValueError('Unsupported file %r' % filename)
+                print('Skipping unsupported file {}'.format(filename))
+                # raise ValueError('Unsupported file %r' % filename)


+    pprint(name_dict)
    return mediafiles


 def organize_files(path):
   hashList = {}
   mediafiles = scan_files(path)
-   # print(mediafiles)
+   print(mediafiles)

   for file in mediafiles:
        hashList.setdefault(file.__hash__(),[]).append(file)
@@ -251,10 +257,15 @@ def save_subtitles(files, single=False, directory=None, encoding=None):

    # return saved_subtitles

+def stringTime():
+    return str(datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f"))
+

 def main():
    # episodePath = '/Volumes/media/tv/Black Mirror/Black Mirror Season 01/'
-    episodePath = '/media/hdd1/tv/'
+    episodePath = '/Volumes/mainframe/shows/Black Mirror/Black Mirror Season 01/'
+    episodePath = '/Volumes/mainframe/shows/The.Voice.S14E24.720p.WEB.x264-TBS[rarbg]'
+    episodePath = '/Volumes/mainframe/incomplete'

    t = tvdb_api.Tvdb()

--- a/seasonedParser/pirateSearch.py
+++ b/seasonedParser/pirateSearch.py
@@ -3,7 +3,7 @@
 # @Author: KevinMidboe
 # @Date:   2017-10-12 11:55:03
 # @Last Modified by:   KevinMidboe
-# @Last Modified time: 2017-10-17 00:58:24
+# @Last Modified time: 2017-11-01 16:11:30

 import sys, logging, re
 from urllib import parse, request
@@ -28,39 +28,6 @@ RELEASE_TYPES = ('bdremux', 'brremux', 'remux',
 	'camrip', 'cam')


-def sanitize(string, ignore_characters=None, replace_characters=None):
-	"""Sanitize a string to strip special characters.
-
-	:param str string: the string to sanitize.
-	:param set ignore_characters: characters to ignore.
-	:return: the sanitized string.
-	:rtype: str
-
-	"""
-	# only deal with strings
-	if string is None:
-		return
-	
-	replace_characters = replace_characters or ''
-
-	ignore_characters = ignore_characters or set()
-
-	characters = ignore_characters
-	if characters:
-		string = re.sub(r'[%s]' % re.escape(''.join(characters)), replace_characters, string)
-
-	return string
-
-def return_re_match(string, re_statement):
-	if string is None:
-		return
-
-	m = re.search(re_statement, string)
-	if 'Y-day' in m.group():
-		return datetime.datetime.now().strftime('%m-%d %Y')
-	return sanitize(m.group(), '\xa0', ' ')
-
-
 # Should maybe not be able to set values without checking if they are valid?
 class piratebay(object):
 	def __init__(self, query=None, page=0, sort=None, category=None):
@@ -157,7 +124,7 @@ class piratebay(object):
 		print(self.page)
 		
 		# Fetch in parallel
-		n = self.total_pages
+		n = pagesToCount(multiple_pages, self.total_pages)
 		while n > 1:
 			torrents_found.extend(self.next_page())
 			n -= 1
@@ -276,7 +243,7 @@ def chooseCandidate(torrent_list):

 		size, _, size_id = torrent.size.partition(' ')
 		if intersecting_release_types and int(torrent.seed_count) > 0 and float(size) > 4 and size_id == 'GiB':
-			print('{} : {} : {}'.format(torrent.name, torrent.size, torrent.seed_count))
+			print('{} : {} : {} {}'.format(torrent.name, torrent.size, torrent.seed_count, torrent.magnet))
 			interesting_torrents.append(torrent)
 		# else:
 		# 	print('Denied match! %s : %s : %s' % (torrent.name, torrent.size, torrent.seed_count))
@@ -286,10 +253,11 @@ def chooseCandidate(torrent_list):

 def searchTorrentSite(query, site='piratebay'):
 	pirate = piratebay()
-	torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size')
-	# pprint(torrents_found)
+	torrents_found = pirate.search(query, page=0, multiple_pages=5, sort='size')
+	pprint(torrents_found)
 	candidates = chooseCandidate(torrents_found)
-
+	pprint(candidates)
+	exit(0)
 	torrents_found = pirate.search(query, page=0, multiple_pages=0, sort='size', category='movies')
 	movie_candidates = chooseCandidate(torrents_found)

@@ -308,4 +276,4 @@ def main():
 	searchTorrentSite(query)

 if __name__ == '__main__':
-	main()
+	main()
--- a/seasonedParser/scandir.py
+++ b/seasonedParser/scandir.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3.6
+# -*- coding: utf-8 -*-
+# @Author: KevinMidboe
+# @Date:   2017-10-02 16:29:25
+# @Last Modified by:   KevinMidboe
+# @Last Modified time: 2018-01-15 17:18:36
+
+try:
+    from os import scandir
+except ImportError:
+    from scandir import scandir  # use scandir PyPI module on Python < 3.5
+
+import env_variables as env
+import multiprocessing as mp
+import logging, re, datetime
+from guessit import guessit
+
+from video import VIDEO_EXTENSIONS, Episode, Movie, Video
+from subtitle import SUBTITLE_EXTENSIONS, Subtitle, get_subtitle_path
+
+logging.basicConfig(filename=env.logfile, level=logging.INFO)
+
+""" Move to utils file """
+def removeLeadingZero(number):
+    stringedNumber = str(number)
+    if (len(stringedNumber) > 1 and stringedNumber[0] == '0'):
+        return int(stringedNumber[1:])
+    return int(number)
+
+class movie(object):
+    def __init__(self, path, title=None, year=None):
+        self.path = path
+        self.title = title
+        self.year = year
+
+class Episode(object):
+    def __init__(self, path, name, title=None, season=None, episode=None):
+        super(Episode, self).__init__()
+        self.path = path
+        self.name = name
+        self.title = title
+        self.season = season
+        self.episode = episode
+
+    @classmethod
+    def fromname(cls, path, name):
+        title = cls.findTitle(cls, name)
+        season = cls.findSeasonNumber(cls, name)
+        episode = cls.findEpisodeNumber(cls, name)
+
+        return cls(path, name, title, season, episode)
+
+    def findTitle(self, name):
+        m = re.search("([a-zA-Z0-9\'\.\-\ ])+([sS][0-9]{1,3})", name)
+        if m:
+           return re.sub('[\ \.]*[sS][0-9]{1,2}', '', m.group(0))
+
+    def findSeasonNumber(self, name):
+        m = re.search('[sS][0-9]{1,2}', name)
+        if m:
+            seasonNumber = re.sub('[sS]', '', m.group(0))
+            return removeLeadingZero(seasonNumber)
+
+    def findEpisodeNumber(self, name):        
+        m = re.search('[eE][0-9]{1,3}', name)
+        if m:
+            episodeNumber = re.sub('[eE]', '', m.group(0))
+            return removeLeadingZero(episodeNumber)
+
+def get_tree_size(path):
+    """Return total size of files in given path and subdirs."""
+    total = 0
+    for entry in scandir(path):
+        if not ('.DS_Store' in entry.path or 'lost+found' in entry.path):
+            if entry.is_dir(follow_symlinks=False):
+                total += get_tree_size(entry.path)
+            else:
+                total += entry.stat(follow_symlinks=False).st_size
+    return int(total)
+
+def scantree(path):
+    """Recursively yield DirEntry objects for given directory."""
+    for entry in scandir(path):
+        # Skip .DS_Store and lost+found
+        # TODO have a blacklist here
+    	if not ('.DS_Store' in entry.path or 'lost+found' in entry.path):
+	        if entry.is_dir(follow_symlinks=False):
+	            yield from scantree(entry.path)
+	        else:
+	        	yield entry
+
+# Find all the mediaobjects for a given path
+# TODO handle list of path's
+def get_objects_for_path(path, archives=None, match=False):
+    # Declare list to save the media objects found in the given path
+    hashList = {}
+    mediaFiles = []
+    # All entries given from scantree functoin
+    for entry in scantree(path):
+        logging.debug('Looking at file %s', str(entry.name))
+        name = entry.name # Pull out name for faster index
+
+        # Skip if not corrent media extension
+        if not (name.endswith(VIDEO_EXTENSIONS) or name.endswith(SUBTITLE_EXTENSIONS) or archives and name.endswith(ARCHIVE_EXTENSIONS)):
+            continue
+
+        # Skip if the file is a dotfile
+        if name.startswith('.'):
+            logging.debug('Skipping hidden file %s' % str(name))
+            continue
+
+        # If we have a video, create a class and append to mediaFiles
+        if name.endswith(VIDEO_EXTENSIONS):  # video
+            episode = Episode.fromname(entry.path, entry.name)
+            if (episode.title is None):
+                logging.debug('None found for %s' % name)
+                continue
+            
+            title = re.sub('[\.]', ' ', episode.title)
+            mediaFiles.append(episode)
+
+    return mediaFiles
+
+if __name__ == '__main__':
+    logging.info('Started: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
+    import sys
+    from pprint import pprint
+    total = 0
+    missed = 0
+
+    # print(get_tree_size(sys.argv[1] if len(sys.argv) > 1 else '.'))
+    # print(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
+    path = sys.argv[1] if len(sys.argv) > 1 else '.'
+    mediaFiles = get_objects_for_path(path)
+    getTitle = lambda ep: ep.title
+
+    for ep in mediaFiles:
+        print(getTitle(ep))
+
+
+    mediaList = []
+    for entry in scantree(sys.argv[1] if len(sys.argv) > 1 else '.'):
+        name = entry.name
+        manual = Episode.fromname(entry.path, entry.name)
+        size = int(entry.stat(follow_symlinks=False).st_size) / 1024 / 1024 / 1024
+        # print(name + ' : ' + str(round(size, 2)) + 'GB')
+
+        title = manual.title
+        if title is None:
+            logging.debug('None found for %s' % (name))
+            continue
+
+        title = re.sub('[\.]', ' ', manual.title)
+
+        # try: 
+        #     print(name + ' : ' + "%s S%iE%i" % (str(title), manual.season, manual.episode))
+        # except TypeError:
+        #     logging.error('Unexpected error: ' + name)
+
+        mediaList.append(manual)
+        if ('-m' in sys.argv):
+            guess = guessit(name)
+            
+            logging.info('Manual is: {} and guess is {}'.format(title, guess['title']))
+        # # if not (guess['season'] == manual.season and guess['episode'] == manual.episode):
+            if (guess['title'].lower() != title.lower()):
+                logging.info('Missmatch: %s by manual guess: %s : %s' % (name, guess['title'], title))
+                missed += 1
+            
+            total += 1
+
+
+    print('Total: %i, missed was: %i' % (total, missed))
+    logging.info('Ended: %s' % str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")))
+    logging.info(' - - - - - - - - - ')
--- a/seasonedParser/video.py
+++ b/seasonedParser/video.py
@@ -3,7 +3,7 @@
 # @Author: KevinMidboe
 # @Date:   2017-08-26 08:23:18
 # @Last Modified by:   KevinMidboe
-# @Last Modified time: 2017-09-29 13:56:21
+# @Last Modified time: 2018-05-13 20:50:00

 from guessit import guessit
 import os
@@ -12,7 +12,7 @@ import hashlib, tvdb_api
 #: Video extensions
 VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
                    '.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
-                    '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
+                    '.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2v', '.m4e',
                    '.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
                    '.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm' '.ogv', '.omf',
                    '.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
--- a/seasonedParser/walk.py
+++ b/seasonedParser/walk.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3.6
+# -*- coding: utf-8 -*-
+# @Author: KevinMidboe
+# @Date:   2017-10-02 16:29:25
+# @Last Modified by:   KevinMidboe
+# @Last Modified time: 2017-10-02 18:07:26
+
+import itertools, os
+import multiprocessing
+
+def worker(filename):
+    print(filename)
+
+def main():
+    with multiprocessing.Pool(48) as Pool: # pool of 48 processes
+
+        walk = os.walk("/Volumes/mainframe/shows/")
+        fn_gen = itertools.chain.from_iterable((os.path.join(root, file)
+                                                for file in files)
+                                               for root, dirs, files in walk)
+
+        results_of_work = Pool.map(worker, fn_gen) # this does the parallel processing
+
+if __name__ == '__main__':
+    main()