11 Commits

Author SHA1 Message Date
Ali Parlakci
a6997898ce Update version 2018-07-23 23:37:27 +03:00
Ali Parlakci
61632c7143 Improve error handling 2018-07-23 23:33:11 +03:00
Ali Parlakçı
9bff3399a8 Typo fix 2018-07-23 23:19:29 +03:00
Ali Parlakçı
b00d185f67 Update changelog 2018-07-23 23:18:10 +03:00
Ali Parlakçı
7314e17125 Added erome support 2018-07-23 23:16:56 +03:00
Ali Parlakci
2d334d56bf remove exclude mode 2018-07-23 22:57:54 +03:00
Ali Parlakçı
974517928f Update README.md 2018-07-23 22:07:28 +03:00
Ali Parlakçı
bcae177b1e Split download function 2018-07-23 22:06:33 +03:00
Ali Parlakci
229def6578 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-23 18:49:39 +03:00
Ali Parlakci
59b0376d6e Added directions for frontpage 2018-07-23 18:49:28 +03:00
Ali Parlakçı
cf1dc7d08c Rename changelog section 2018-07-22 18:16:11 +03:00
6 changed files with 261 additions and 174 deletions

View File

@@ -52,7 +52,13 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit - All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
them, there. them, there.
## Changelog ## Changes on *master*
### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
- Split download() function
- Added erome support
- Remove exclude feature
- Bug fix
### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb) ### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
- Put log files in a folder named "LOG_FILES" - Put log files in a folder named "LOG_FILES"
- Fixed the bug that makes multireddit mode unusable - Fixed the bug that makes multireddit mode unusable

View File

@@ -40,8 +40,6 @@ optional arguments:
all all
--NoDownload Just gets the posts and store them in a file for --NoDownload Just gets the posts and store them in a file for
downloading later downloading later
--exclude {imgur,gfycat,direct,self} [{imgur,gfycat,direct,self} ...]
Do not download specified links
``` ```
# Examples # Examples

247
script.py
View File

@@ -13,7 +13,7 @@ import time
from io import StringIO from io import StringIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur, Self from src.downloader import Direct, Gfycat, Imgur, Self, Erome
from src.errors import * from src.errors import *
from src.parser import LinkDesigner from src.parser import LinkDesigner
from src.searcher import getPosts from src.searcher import getPosts
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
__author__ = "Ali Parlakci" __author__ = "Ali Parlakci"
__license__ = "GPL" __license__ = "GPL"
__version__ = "1.3.1" __version__ = "1.4.0"
__maintainer__ = "Ali Parlakci" __maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com" __email__ = "parlakciali@gmail.com"
@@ -144,11 +144,6 @@ def parseArguments(arguments=[]):
action="store_true", action="store_true",
default=False) default=False)
parser.add_argument("--exclude",
nargs="+",
help="Do not download specified links",
choices=["imgur","gfycat","direct","self"],
type=str)
if arguments == []: if arguments == []:
return parser.parse_args() return parser.parse_args()
@@ -242,10 +237,10 @@ class PromptUser:
if programMode == "subreddit": if programMode == "subreddit":
subredditInput = input("subreddit: ") subredditInput = input("subreddit (enter frontpage for frontpage): ")
GLOBAL.arguments.subreddit = subredditInput GLOBAL.arguments.subreddit = subredditInput
while not subredditInput == "": while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
subredditInput = input("subreddit: ") subredditInput = input("subreddit: ")
GLOBAL.arguments.subreddit += "+" + subredditInput GLOBAL.arguments.subreddit += "+" + subredditInput
@@ -253,7 +248,8 @@ class PromptUser:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split()) GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END # DELETE THE PLUS (+) AT THE END
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1] if not subredditInput.lower() == "frontpage":
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
print("\nselect sort type:") print("\nselect sort type:")
sortTypes = [ sortTypes = [
@@ -326,33 +322,6 @@ class PromptUser:
GLOBAL.arguments.log = input("\nlog file directory:") GLOBAL.arguments.log = input("\nlog file directory:")
if Path(GLOBAL.arguments.log ).is_file(): if Path(GLOBAL.arguments.log ).is_file():
break break
GLOBAL.arguments.exclude = []
sites = ["imgur","gfycat","direct","self"]
excludeInput = input("exclude: ").lower()
if excludeInput in sites and excludeInput != "":
GLOBAL.arguments.exclude = [excludeInput]
while not excludeInput == "":
while True:
excludeInput = input("exclude: ").lower()
if not excludeInput in sites or excludeInput in GLOBAL.arguments.exclude:
break
elif excludeInput == "":
break
else:
GLOBAL.arguments.exclude.append(excludeInput)
for i in range(len(GLOBAL.arguments.exclude)):
if " " in GLOBAL.arguments.exclude[i]:
inputWithWhitespace = GLOBAL.arguments.exclude[i]
del GLOBAL.arguments.exclude[i]
for siteInput in inputWithWhitespace.split():
if siteInput in sites and siteInput not in GLOBAL.arguments.exclude:
GLOBAL.arguments.exclude.append(siteInput)
while True: while True:
try: try:
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): ")) GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
@@ -472,20 +441,76 @@ def postExists(POST):
else: else:
return False return False
def downloadPost(SUBMISSION):
directory = GLOBAL.directory / SUBMISSION['postSubreddit']
global lastRequestTime
downloaders = {
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
}
if SUBMISSION['postType'] in downloaders:
print(SUBMISSION['postType'].upper())
if SUBMISSION['postType'] == "imgur":
if int(time.time() - lastRequestTime) <= 2:
pass
credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds")
print(
"Client: {} - User: {} - Reset {}".format(
credit['ClientRemaining'],
credit['UserRemaining'],
USER_RESET
)
)
if not (credit['UserRemaining'] == 0 or \
credit['ClientRemaining'] == 0):
"""This block of code is needed
"""
if int(time.time() - lastRequestTime) <= 2:
pass
lastRequestTime = time.time()
else:
if credit['UserRemaining'] == 0:
KEYWORD = "user"
elif credit['ClientRemaining'] == 0:
KEYWORD = "client"
raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
else:
raise NoSuitablePost
return None
def download(submissions): def download(submissions):
"""Analyze list of submissions and call the right function """Analyze list of submissions and call the right function
to download each one, catch errors, update the log files to download each one, catch errors, update the log files
""" """
subsLenght = len(submissions) subsLenght = len(submissions)
global lastRequestTime
lastRequestTime = 0 lastRequestTime = 0
downloadedCount = subsLenght downloadedCount = subsLenght
duplicates = 0 duplicates = 0
BACKUP = {}
if GLOBAL.arguments.exclude is not None:
ToBeDownloaded = GLOBAL.arguments.exclude
else:
ToBeDownloaded = []
FAILED_FILE = createLogFile("FAILED") FAILED_FILE = createLogFile("FAILED")
@@ -499,132 +524,46 @@ def download(submissions):
) )
if postExists(submissions[i]): if postExists(submissions[i]):
result = False
print(submissions[i]['postType'].upper()) print(submissions[i]['postType'].upper())
print("It already exists") print("It already exists")
duplicates += 1 duplicates += 1
downloadedCount -= 1 downloadedCount -= 1
continue continue
directory = GLOBAL.directory / submissions[i]['postSubreddit'] try:
downloadPost(submissions[i])
if submissions[i]['postType'] == 'imgur' and not 'imgur' in ToBeDownloaded: except FileAlreadyExistsError:
print("IMGUR",end="") print("It already exists")
duplicates += 1
downloadedCount -= 1
while int(time.time() - lastRequestTime) <= 2: except ImgurLoginError:
pass
credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds")
print( print(
" => Client: {} - User: {} - Reset {}".format( "Imgur login failed. \nQuitting the program "\
credit['ClientRemaining'], "as unexpected errors might occur."
credit['UserRemaining'],
USER_RESET
)
) )
sys.exit()
if not (credit['UserRemaining'] == 0 or \ except ImgurLimitError as exception:
credit['ClientRemaining'] == 0): FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
"""This block of code is needed except NotADownloadableLinkError as exception:
""" print(exception)
while int(time.time() - lastRequestTime) <= 2: FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
pass downloadedCount -= 1
lastRequestTime = time.time()
try: except NoSuitablePost:
Imgur(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except ImgurLoginError:
print(
"Imgur login failed. Quitting the program "\
"as unexpected errors might occur."
)
sys.exit()
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
if credit['UserRemaining'] == 0:
KEYWORD = "user"
elif credit['ClientRemaining'] == 0:
KEYWORD = "client"
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
FAILED_FILE.add(
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
submissions[i]]}
)
downloadedCount -= 1
elif submissions[i]['postType'] == 'gfycat' and not 'gfycat' in ToBeDownloaded:
print("GFYCAT")
try:
Gfycat(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except NotADownloadableLinkError as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'direct' and not 'direct' in ToBeDownloaded:
print("DIRECT")
try:
Direct(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'self' and not 'self' in ToBeDownloaded:
print("SELF")
try:
Self(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
print("No match found, skipping...") print("No match found, skipping...")
downloadedCount -= 1 downloadedCount -= 1
except Exception as exception:
# raise exception
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
if duplicates: if duplicates:
print("\n There was {} duplicates".format(duplicates)) print("\n There was {} duplicates".format(duplicates))
@@ -705,10 +644,12 @@ if __name__ == "__main__":
print = printToFile print = printToFile
GLOBAL.RUN_TIME = time.time() GLOBAL.RUN_TIME = time.time()
main() main()
except KeyboardInterrupt: except KeyboardInterrupt:
if GLOBAL.directory is None: if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\") GLOBAL.directory = Path(".\\")
print("\nQUITTING...") print("\nQUITTING...")
except Exception as exception: except Exception as exception:
if GLOBAL.directory is None: if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\") GLOBAL.directory = Path(".\\")
@@ -716,4 +657,4 @@ if __name__ == "__main__":
exc_info=full_exc_info(sys.exc_info())) exc_info=full_exc_info(sys.exc_info()))
print(log_stream.getvalue()) print(log_stream.getvalue())
input("Press enter to quit\n") input("\nPress enter to quit\n")

View File

@@ -2,7 +2,9 @@ import io
import os import os
import sys import sys
import urllib.request import urllib.request
from html.parser import HTMLParser
from pathlib import Path from pathlib import Path
from urllib.error import HTTPError
import imgurpython import imgurpython
from multiprocessing import Queue from multiprocessing import Queue
@@ -69,6 +71,129 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
else: else:
raise FileAlreadyExistsError raise FileAlreadyExistsError
class Erome:
def __init__(self,directory,post):
try:
IMAGES = self.getLinks(post['postURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
imagesLenght = len(IMAGES)
howManyDownloaded = imagesLenght
duplicates = 0
if imagesLenght == 1:
extension = getExtension(IMAGES[0])
title = nameCorrector(post['postTitle'])
print(title+"_" +post['postId']+extension)
fileDir = title + "_" + post['postId'] + extension
fileDir = directory / fileDir
tempDir = title + "_" + post['postId'] + '.tmp'
tempDir = directory / tempDir
imageURL = "https:" + IMAGES[0]
try:
getFile(fileDir,tempDir,imageURL)
except FileNameTooLong:
fileDir = directory / (post['postId'] + extension)
tempDir = directory / (post['postId'] + '.tmp')
getFile(fileDir,tempDir,imageURL)
else:
title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId'],end="\n\n")
folderDir = directory / (title+"_"+post['postId'])
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
os.makedirs(folderDir)
for i in range(imagesLenght):
extension = getExtension(IMAGES[i])
fileName = str(i+1)
imageURL = "https:" + IMAGES[i]
fileDir = folderDir / (fileName + extension)
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+extension))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
howManyDownloaded -= 1
except Exception as exception:
raise exception
print("\n Could not get the file")
print(" " + str(exception) + "\n")
exceptionType = exception
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
def getLinks(self,url,lineNumber=129):
content = []
lineNumber = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
for i in range(len(pageSource)):
obj = EromeParser()
obj.feed(pageSource[i])
tag = obj.tag
if tag is not None:
if "div" in tag:
if "id" in tag["div"]:
if tag["div"]["id"] == "album":
lineNumber = i
break
for line in pageSource[lineNumber:]:
obj = EromeParser()
obj.feed(line)
tag = obj.tag
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"]=="img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [
link for link in content \
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
]
class Imgur: class Imgur:
def __init__(self,directory,post): def __init__(self,directory,post):
self.imgurClient = self.initImgur() self.imgurClient = self.initImgur()
@@ -171,7 +296,7 @@ class Imgur:
if duplicates == imagesLenght: if duplicates == imagesLenght:
raise FileAlreadyExistsError raise FileAlreadyExistsError
elif howManyDownloaded < imagesLenght: elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely( raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely" "Album Not Downloaded Completely"
) )

View File

@@ -81,3 +81,9 @@ class InvalidSortingType(Exception):
class FileNotFoundError(Exception): class FileNotFoundError(Exception):
pass pass
class NoSuitablePost(Exception):
pass
class ImgurLimitError(Exception):
pass

View File

@@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
gfycatCount = 0 gfycatCount = 0
global imgurCount global imgurCount
imgurCount = 0 imgurCount = 0
global eromeCount
eromeCount = 0
global directCount global directCount
directCount = 0 directCount = 0
global selfCount global selfCount
@@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
if not len(subList) == 0: if not len(subList) == 0:
print( print(
"\nTotal of {} submissions found!\n"\ "\nTotal of {} submissions found!\n"\
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n" "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount) .format(
len(subList),
gfycatCount,
imgurCount,
eromeCount,
directCount,
selfCount
)
) )
return subList return subList
else: else:
@@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
def checkIfMatching(submission): def checkIfMatching(submission):
global gfycatCount global gfycatCount
global imgurCount global imgurCount
global eromeCount
global directCount global directCount
global selfCount global selfCount
@@ -383,19 +393,20 @@ def checkIfMatching(submission):
except AttributeError: except AttributeError:
return None return None
if ('gfycat' in submission.domain) or \ if 'gfycat' in submission.domain:
('imgur' in submission.domain): details['postType'] = 'gfycat'
gfycatCount += 1
return details
if 'gfycat' in submission.domain: elif 'imgur' in submission.domain:
details['postType'] = 'gfycat' details['postType'] = 'imgur'
gfycatCount += 1 imgurCount += 1
return details return details
elif 'imgur' in submission.domain: elif 'erome' in submission.domain:
details['postType'] = 'imgur' details['postType'] = 'erome'
eromeCount += 1
imgurCount += 1 return details
return details
elif isDirectLink(submission.url) is not False: elif isDirectLink(submission.url) is not False:
details['postType'] = 'direct' details['postType'] = 'direct'