Added erome support

This commit is contained in:
Ali Parlakçı
2018-07-23 23:16:56 +03:00
committed by GitHub
parent 2d334d56bf
commit 7314e17125
3 changed files with 158 additions and 19 deletions

View File

@@ -13,7 +13,7 @@ import time
from io import StringIO from io import StringIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur, Self from src.downloader import Direct, Gfycat, Imgur, Self, Erome
from src.errors import * from src.errors import *
from src.parser import LinkDesigner from src.parser import LinkDesigner
from src.searcher import getPosts from src.searcher import getPosts
@@ -322,7 +322,6 @@ class PromptUser:
GLOBAL.arguments.log = input("\nlog file directory:") GLOBAL.arguments.log = input("\nlog file directory:")
if Path(GLOBAL.arguments.log ).is_file(): if Path(GLOBAL.arguments.log ).is_file():
break break
while True: while True:
try: try:
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): ")) GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
@@ -447,7 +446,9 @@ def downloadPost(SUBMISSION):
global lastRequestTime global lastRequestTime
downloaders = {"imgur":Imgur,"gfycat":Gfycat,"direct":Direct,"self":Self} downloaders = {
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
}
if SUBMISSION['postType'] in downloaders: if SUBMISSION['postType'] in downloaders:
@@ -572,8 +573,6 @@ def download(submissions):
else: else:
print(" Total of {} links downloaded!".format(downloadedCount)) print(" Total of {} links downloaded!".format(downloadedCount))
return None
def main(): def main():
GLOBAL.arguments = parseArguments() GLOBAL.arguments = parseArguments()

View File

@@ -2,6 +2,7 @@ import io
import os import os
import sys import sys
import urllib.request import urllib.request
from html.parser import HTMLParser
from pathlib import Path from pathlib import Path
import imgurpython import imgurpython
@@ -69,6 +70,134 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
else: else:
raise FileAlreadyExistsError raise FileAlreadyExistsError
class Erome:
def __init__(self,directory,post):
# try:
# IMAGES = self.getLinks(post['postURL'])
# except IndexError:
# # raise NotADownloadableLinkError("Could not read the page source")
# pass
# except Exception as exception:
# pass
# # raise NotADownloadableLinkError("Could not read the page source")
IMAGES = self.getLinks(post['postURL'])
imagesLenght = len(IMAGES)
howManyDownloaded = imagesLenght
duplicates = 0
if imagesLenght == 1:
extension = getExtension(IMAGES[0])
title = nameCorrector(post['postTitle'])
print(title+"_" +post['postId']+extension)
fileDir = title + "_" + post['postId'] + extension
fileDir = directory / fileDir
tempDir = title + "_" + post['postId'] + '.tmp'
tempDir = directory / tempDir
imageURL = "https:" + IMAGES[0]
try:
getFile(fileDir,tempDir,imageURL)
except FileNameTooLong:
fileDir = directory / (post['postId'] + extension)
tempDir = directory / (post['postId'] + '.tmp')
getFile(fileDir,tempDir,imageURL)
else:
title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId'],end="\n\n")
folderDir = directory / (title+"_"+post['postId'])
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
os.makedirs(folderDir)
for i in range(imagesLenght):
extension = getExtension(IMAGES[i])
fileName = str(i+1)
imageURL = "https:" + IMAGES[i]
fileDir = folderDir / (fileName + extension)
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+extension))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
howManyDownloaded -= 1
except Exception as exception:
raise exception
print("\n Could not get the file")
print(" " + str(exception) + "\n")
exceptionType = exception
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
def getLinks(self,url,lineNumber=129):
content = []
lineNumber = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
for i in range(len(pageSource)):
obj = EromeParser()
obj.feed(pageSource[i])
tag = obj.tag
if tag is not None:
if "div" in tag:
if "id" in tag["div"]:
if tag["div"]["id"] == "album":
lineNumber = i
break
for line in pageSource[lineNumber:]:
obj = EromeParser()
obj.feed(line)
tag = obj.tag
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"]=="img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [
link for link in content \
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
]
class Imgur: class Imgur:
def __init__(self,directory,post): def __init__(self,directory,post):
self.imgurClient = self.initImgur() self.imgurClient = self.initImgur()
@@ -171,7 +300,7 @@ class Imgur:
if duplicates == imagesLenght: if duplicates == imagesLenght:
raise FileAlreadyExistsError raise FileAlreadyExistsError
elif howManyDownloaded < imagesLenght: elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely( raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely" "Album Not Downloaded Completely"
) )

View File

@@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
gfycatCount = 0 gfycatCount = 0
global imgurCount global imgurCount
imgurCount = 0 imgurCount = 0
global eromeCount
eromeCount = 0
global directCount global directCount
directCount = 0 directCount = 0
global selfCount global selfCount
@@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
if not len(subList) == 0: if not len(subList) == 0:
print( print(
"\nTotal of {} submissions found!\n"\ "\nTotal of {} submissions found!\n"\
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n" "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount) .format(
len(subList),
gfycatCount,
imgurCount,
eromeCount,
directCount,
selfCount
)
) )
return subList return subList
else: else:
@@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
def checkIfMatching(submission): def checkIfMatching(submission):
global gfycatCount global gfycatCount
global imgurCount global imgurCount
global eromeCount
global directCount global directCount
global selfCount global selfCount
@@ -383,19 +393,20 @@ def checkIfMatching(submission):
except AttributeError: except AttributeError:
return None return None
if ('gfycat' in submission.domain) or \ if 'gfycat' in submission.domain:
('imgur' in submission.domain): details['postType'] = 'gfycat'
gfycatCount += 1
return details
if 'gfycat' in submission.domain: elif 'imgur' in submission.domain:
details['postType'] = 'gfycat' details['postType'] = 'imgur'
gfycatCount += 1 imgurCount += 1
return details return details
elif 'imgur' in submission.domain: elif 'erome' in submission.domain:
details['postType'] = 'imgur' details['postType'] = 'erome'
eromeCount += 1
imgurCount += 1 return details
return details
elif isDirectLink(submission.url) is not False: elif isDirectLink(submission.url) is not False:
details['postType'] = 'direct' details['postType'] = 'direct'