Added langdetect for last resort when trying to find the subtitle language. It reads the first 1000 lines of the file and guesses the locale of the text.

This commit is contained in:
2018-09-23 20:37:26 +02:00
parent 11f5c16336
commit 55f435109f
2 changed files with 12 additions and 0 deletions

View File

@@ -3,3 +3,4 @@ tvdb_api==2.0
hashids==1.2.0
enzyme>=0.4.1
click>=6.7
langdetect>=1.0.7

View File

@@ -14,6 +14,7 @@ import tvdb_api
import click
from pprint import pprint
from titlecase import titlecase
import langdetect
import env_variables as env
@@ -49,6 +50,16 @@ def search_external_subtitles(path, directory=None):
except (ValueError, LanguageReverseError):
logger.error('Cannot parse language code %r', language_code)
f = open(p, 'r', encoding='ISO-8859-15')
pattern = re.compile('[0-9:\,-<>]+')
# head = list(islice(f.read(), 10))
filecontent = pattern.sub('', f.read())
filecontent = filecontent[0:1000]
language = langdetect.detect(filecontent)
print(language)
f.close()
subtitles[p] = language
logger.debug('Found subtitles %r', subtitles)