Added langdetect for last resort when trying to find the subtitle language. It reads the first 1000 lines of the file and guesses the locale of the text.
This commit is contained in:
@@ -3,3 +3,4 @@ tvdb_api==2.0
|
||||
hashids==1.2.0
|
||||
enzyme>=0.4.1
|
||||
click>=6.7
|
||||
langdetect>=1.0.7
|
||||
|
||||
11
src/core.py
11
src/core.py
@@ -14,6 +14,7 @@ import tvdb_api
|
||||
import click
|
||||
from pprint import pprint
|
||||
from titlecase import titlecase
|
||||
import langdetect
|
||||
|
||||
import env_variables as env
|
||||
|
||||
@@ -49,6 +50,16 @@ def search_external_subtitles(path, directory=None):
|
||||
except (ValueError, LanguageReverseError):
|
||||
logger.error('Cannot parse language code %r', language_code)
|
||||
|
||||
f = open(p, 'r', encoding='ISO-8859-15')
|
||||
|
||||
pattern = re.compile('[0-9:\,-<>]+')
|
||||
# head = list(islice(f.read(), 10))
|
||||
filecontent = pattern.sub('', f.read())
|
||||
filecontent = filecontent[0:1000]
|
||||
language = langdetect.detect(filecontent)
|
||||
print(language)
|
||||
f.close()
|
||||
|
||||
subtitles[p] = language
|
||||
logger.debug('Found subtitles %r', subtitles)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user