Files
seasonedShows/findStray.py

164 lines
4.2 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Author: KevinMidboe
# @Date: 2017-03-04 16:50:09
# @Last Modified by: KevinMidboe
# @Last Modified time: 2017-03-05 17:20:04
import os, sqlite3, re, json
from fuzzywuzzy import process
from langdetect import detect
from pprint import pprint
showDir = '/media/hdd1/tv/'
dbPath = 'shows.db'
mediaExtensions = ['mkv', 'mp4', 'avi']
mediaExcluders = ['sample', 'RARBG']
subExtensions = ['srt']
def getFuzzyName(query):
return process.extractOne(query, getShowNames().keys())
def removeUploader(mediaItem):
match = re.search('-[a-zA-Z\[\]\-]*.[a-z]{3}', mediaItem)
if match:
uploader, ext = match.group(0).split('.')
return uploader
return ''
def getLanguage(path, subtitles):
f = open(path + subtitles, 'r', encoding= 'ISO-8859-15')
language = detect(f.read())
f.close()
return language
# Finds the correct show name
def getShowNames():
conn = sqlite3.connect(dbPath)
c = conn.cursor()
c.execute('SELECT show_names, date_added, date_modified FROM shows')
returnList = {}
for name, added, modified in c.fetchall():
returnList[name] = [added, modified]
conn.close()
return returnList
def XOR(list1, list2):
return set(list1) ^ set(list2)
def getByIdentifier(folderItem, identifier):
itemMatch = re.findall(identifier + '[0-9]{1,2}', folderItem)
# TODO Should be more precise than first item in list
item = re.sub(identifier, '', itemMatch[0])
# TODO Should be checking for errors
return item
def getItemChildren(folder):
children = os.listdir(showDir + folder)
media_items = []
subtitles = []
trash = []
for childItem in children:
if childItem[-3:] in mediaExtensions and childItem[:-4] not in mediaExcluders:
media_items.append([childItem, removeUploader(childItem)])
elif childItem[-3:] in subExtensions:
subtitles.append([childItem, removeUploader(childItem), getLanguage(showDir + folder + '/', childItem)])
else:
trash.append(childItem)
return media_items, subtitles, trash
def addToDB(episodeInfo):
conn = sqlite3.connect(dbPath)
c = conn.cursor()
original = episodeInfo['original']
full_path = episodeInfo['full_path']
name = episodeInfo['name']
season = episodeInfo['season']
episode = episodeInfo['episode']
media_items = json.dumps(episodeInfo['media_items'])
subtitles = json.dumps(episodeInfo['subtitles'])
trash = json.dumps(episodeInfo['trash'])
tweet_id = episodeInfo['tweet_id']
reponse_id = episodeInfo['reponse_id']
verified = episodeInfo['verified']
moved = episodeInfo['moved']
print((media_items))
try:
c.execute('INSERT INTO stray_episodes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [original,\
full_path, name, season, episode, media_items, subtitles, trash, tweet_id, reponse_id, verified, moved])
except sqlite3.IntegrityError:
print('Episode already registered')
conn.commit()
conn.close()
def getNewFolderContents():
# TODO Should not do on keys, if empty.
showNames = getShowNames().keys()
# TODO Better way to filter non dotfiles, dirread in filter?
# Should maybe all dirs be checked at start?
folderContents = filter( lambda f: not f.startswith('.'), os.listdir(showDir))
return XOR(folderContents, showNames) # OK
def checkForSingleEpisodes(folderItem):
# TODO also if empty, should be checked earlier
showName, hit = getFuzzyName(folderItem)
episodeMatch = re.findall(re.sub(' ', '.', showName)+'\.S[0-9]{1,2}E[0-9]{1,2}\.', folderItem)
if episodeMatch:
return True # OK
def getEpisodeInfo(folderItem):
showName, hit = getFuzzyName(folderItem)
season = getByIdentifier(folderItem, 'S')
episode = getByIdentifier(folderItem, 'E')
media_items, subtitles, trash = getItemChildren(folderItem)
episodeInfo = []
episodeInfo = {'original': folderItem,
'full_path': showDir + folderItem,
'name': showName,
'season': season,
'episode': episode,
'media_items': media_items,
'subtitles': subtitles,
'trash': trash,
'tweet_id': None,
'reponse_id': None,
'verified': '0',
'moved': '0'}
addToDB(episodeInfo)
return episodeInfo
def findStray():
# TODO What if null or tries to pass down error
for item in getNewFolderContents():
if checkForSingleEpisodes(item):
pprint(getEpisodeInfo(item))
if __name__ == '__main__':
findStray()