mirror of
https://github.com/KevinMidboe/bulk-downloader-for-reddit.git
synced 2026-01-19 07:36:01 +00:00
Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6997898ce | ||
|
|
61632c7143 | ||
|
|
9bff3399a8 | ||
|
|
b00d185f67 | ||
|
|
7314e17125 | ||
|
|
2d334d56bf | ||
|
|
974517928f | ||
|
|
bcae177b1e | ||
|
|
229def6578 | ||
|
|
59b0376d6e | ||
|
|
cf1dc7d08c | ||
|
|
27532408c1 | ||
|
|
32647beee9 | ||
|
|
a67da461d2 | ||
|
|
8c6f593496 | ||
|
|
b60ce8a71e | ||
|
|
49920cc457 | ||
|
|
c70e7c2ebb | ||
|
|
3931dfff54 | ||
|
|
4a8c2377f9 | ||
|
|
8a18a42a9a | ||
|
|
6c2d748fbc | ||
|
|
8c966df105 |
15
README.md
15
README.md
@@ -52,7 +52,20 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
|
|||||||
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
|
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
|
||||||
them, there.
|
them, there.
|
||||||
|
|
||||||
## Changelog
|
## Changes on *master*
|
||||||
|
### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
|
||||||
|
- Split download() function
|
||||||
|
- Added erome support
|
||||||
|
- Remove exclude feature
|
||||||
|
- Bug fix
|
||||||
|
|
||||||
|
### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
|
||||||
|
- Put log files in a folder named "LOG_FILES"
|
||||||
|
- Fixed the bug that makes multireddit mode unusable
|
||||||
|
|
||||||
|
### [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
|
||||||
|
- Added exclude mode
|
||||||
|
|
||||||
### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/commit/7548a010198fb693841ca03654d2c9bdf5742139)
|
### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/commit/7548a010198fb693841ca03654d2c9bdf5742139)
|
||||||
- "0" input for no limit
|
- "0" input for no limit
|
||||||
- Fixed the bug that recognizes none image direct links as image links
|
- Fixed the bug that recognizes none image direct links as image links
|
||||||
|
|||||||
@@ -23,7 +23,8 @@ optional arguments:
|
|||||||
--saved Triggers saved mode
|
--saved Triggers saved mode
|
||||||
--submitted Gets posts of --user
|
--submitted Gets posts of --user
|
||||||
--upvoted Gets upvoted posts of --user
|
--upvoted Gets upvoted posts of --user
|
||||||
--log LOG FILE Triggers log read mode and takes a log file
|
--log LOG FILE Takes a log file which created by itself (json files),
|
||||||
|
reads posts and tries downloading them again.
|
||||||
--subreddit SUBREDDIT [SUBREDDIT ...]
|
--subreddit SUBREDDIT [SUBREDDIT ...]
|
||||||
Triggers subreddit mode and takes subreddit's name
|
Triggers subreddit mode and takes subreddit's name
|
||||||
without r/. use "frontpage" for frontpage
|
without r/. use "frontpage" for frontpage
|
||||||
|
|||||||
236
script.py
236
script.py
@@ -13,7 +13,7 @@ import time
|
|||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path, PurePath
|
from pathlib import Path, PurePath
|
||||||
|
|
||||||
from src.downloader import Direct, Gfycat, Imgur, Self
|
from src.downloader import Direct, Gfycat, Imgur, Self, Erome
|
||||||
from src.errors import *
|
from src.errors import *
|
||||||
from src.parser import LinkDesigner
|
from src.parser import LinkDesigner
|
||||||
from src.searcher import getPosts
|
from src.searcher import getPosts
|
||||||
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
|
|||||||
|
|
||||||
__author__ = "Ali Parlakci"
|
__author__ = "Ali Parlakci"
|
||||||
__license__ = "GPL"
|
__license__ = "GPL"
|
||||||
__version__ = "1.2.1"
|
__version__ = "1.4.0"
|
||||||
__maintainer__ = "Ali Parlakci"
|
__maintainer__ = "Ali Parlakci"
|
||||||
__email__ = "parlakciali@gmail.com"
|
__email__ = "parlakciali@gmail.com"
|
||||||
|
|
||||||
@@ -143,6 +143,7 @@ def parseArguments(arguments=[]):
|
|||||||
" for downloading later",
|
" for downloading later",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
default=False)
|
default=False)
|
||||||
|
|
||||||
|
|
||||||
if arguments == []:
|
if arguments == []:
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
@@ -159,7 +160,10 @@ def checkConflicts():
|
|||||||
else:
|
else:
|
||||||
user = 1
|
user = 1
|
||||||
|
|
||||||
modes = ["saved","subreddit","submitted","search","log","link","upvoted"]
|
modes = [
|
||||||
|
"saved","subreddit","submitted","search","log","link","upvoted",
|
||||||
|
"multireddit"
|
||||||
|
]
|
||||||
|
|
||||||
values = {
|
values = {
|
||||||
x: 0 if getattr(GLOBAL.arguments,x) is None or \
|
x: 0 if getattr(GLOBAL.arguments,x) is None or \
|
||||||
@@ -233,10 +237,10 @@ class PromptUser:
|
|||||||
|
|
||||||
if programMode == "subreddit":
|
if programMode == "subreddit":
|
||||||
|
|
||||||
subredditInput = input("subreddit: ")
|
subredditInput = input("subreddit (enter frontpage for frontpage): ")
|
||||||
GLOBAL.arguments.subreddit = subredditInput
|
GLOBAL.arguments.subreddit = subredditInput
|
||||||
|
|
||||||
while not subredditInput == "":
|
while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
|
||||||
subredditInput = input("subreddit: ")
|
subredditInput = input("subreddit: ")
|
||||||
GLOBAL.arguments.subreddit += "+" + subredditInput
|
GLOBAL.arguments.subreddit += "+" + subredditInput
|
||||||
|
|
||||||
@@ -244,7 +248,8 @@ class PromptUser:
|
|||||||
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
|
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
|
||||||
|
|
||||||
# DELETE THE PLUS (+) AT THE END
|
# DELETE THE PLUS (+) AT THE END
|
||||||
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
|
if not subredditInput.lower() == "frontpage":
|
||||||
|
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
|
||||||
|
|
||||||
print("\nselect sort type:")
|
print("\nselect sort type:")
|
||||||
sortTypes = [
|
sortTypes = [
|
||||||
@@ -265,7 +270,7 @@ class PromptUser:
|
|||||||
|
|
||||||
elif programMode == "multireddit":
|
elif programMode == "multireddit":
|
||||||
GLOBAL.arguments.user = input("\nredditor: ")
|
GLOBAL.arguments.user = input("\nredditor: ")
|
||||||
GLOBAL.arguments.subreddit = input("\nmultireddit: ")
|
GLOBAL.arguments.multireddit = input("\nmultireddit: ")
|
||||||
|
|
||||||
print("\nselect sort type:")
|
print("\nselect sort type:")
|
||||||
sortTypes = [
|
sortTypes = [
|
||||||
@@ -317,7 +322,6 @@ class PromptUser:
|
|||||||
GLOBAL.arguments.log = input("\nlog file directory:")
|
GLOBAL.arguments.log = input("\nlog file directory:")
|
||||||
if Path(GLOBAL.arguments.log ).is_file():
|
if Path(GLOBAL.arguments.log ).is_file():
|
||||||
break
|
break
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
|
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
|
||||||
@@ -377,6 +381,9 @@ def prepareAttributes():
|
|||||||
|
|
||||||
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
||||||
|
|
||||||
|
elif GLOBAL.arguments.multireddit is not None:
|
||||||
|
ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
|
||||||
|
|
||||||
elif GLOBAL.arguments.saved is True:
|
elif GLOBAL.arguments.saved is True:
|
||||||
ATTRIBUTES["saved"] = True
|
ATTRIBUTES["saved"] = True
|
||||||
|
|
||||||
@@ -434,16 +441,76 @@ def postExists(POST):
|
|||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def downloadPost(SUBMISSION):
|
||||||
|
directory = GLOBAL.directory / SUBMISSION['postSubreddit']
|
||||||
|
|
||||||
|
global lastRequestTime
|
||||||
|
|
||||||
|
downloaders = {
|
||||||
|
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
|
||||||
|
}
|
||||||
|
|
||||||
|
if SUBMISSION['postType'] in downloaders:
|
||||||
|
|
||||||
|
print(SUBMISSION['postType'].upper())
|
||||||
|
|
||||||
|
if SUBMISSION['postType'] == "imgur":
|
||||||
|
|
||||||
|
if int(time.time() - lastRequestTime) <= 2:
|
||||||
|
pass
|
||||||
|
|
||||||
|
credit = Imgur.get_credits()
|
||||||
|
|
||||||
|
IMGUR_RESET_TIME = credit['UserReset']-time.time()
|
||||||
|
USER_RESET = ("after " \
|
||||||
|
+ str(int(IMGUR_RESET_TIME/60)) \
|
||||||
|
+ " Minutes " \
|
||||||
|
+ str(int(IMGUR_RESET_TIME%60)) \
|
||||||
|
+ " Seconds")
|
||||||
|
|
||||||
|
print(
|
||||||
|
"Client: {} - User: {} - Reset {}".format(
|
||||||
|
credit['ClientRemaining'],
|
||||||
|
credit['UserRemaining'],
|
||||||
|
USER_RESET
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not (credit['UserRemaining'] == 0 or \
|
||||||
|
credit['ClientRemaining'] == 0):
|
||||||
|
|
||||||
|
"""This block of code is needed
|
||||||
|
"""
|
||||||
|
if int(time.time() - lastRequestTime) <= 2:
|
||||||
|
pass
|
||||||
|
|
||||||
|
lastRequestTime = time.time()
|
||||||
|
|
||||||
|
else:
|
||||||
|
if credit['UserRemaining'] == 0:
|
||||||
|
KEYWORD = "user"
|
||||||
|
elif credit['ClientRemaining'] == 0:
|
||||||
|
KEYWORD = "client"
|
||||||
|
|
||||||
|
raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
|
||||||
|
|
||||||
|
downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NoSuitablePost
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def download(submissions):
|
def download(submissions):
|
||||||
"""Analyze list of submissions and call the right function
|
"""Analyze list of submissions and call the right function
|
||||||
to download each one, catch errors, update the log files
|
to download each one, catch errors, update the log files
|
||||||
"""
|
"""
|
||||||
|
|
||||||
subsLenght = len(submissions)
|
subsLenght = len(submissions)
|
||||||
|
global lastRequestTime
|
||||||
lastRequestTime = 0
|
lastRequestTime = 0
|
||||||
downloadedCount = subsLenght
|
downloadedCount = subsLenght
|
||||||
duplicates = 0
|
duplicates = 0
|
||||||
BACKUP = {}
|
|
||||||
|
|
||||||
FAILED_FILE = createLogFile("FAILED")
|
FAILED_FILE = createLogFile("FAILED")
|
||||||
|
|
||||||
@@ -457,131 +524,45 @@ def download(submissions):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if postExists(submissions[i]):
|
if postExists(submissions[i]):
|
||||||
result = False
|
|
||||||
print(submissions[i]['postType'].upper())
|
print(submissions[i]['postType'].upper())
|
||||||
print("It already exists")
|
print("It already exists")
|
||||||
duplicates += 1
|
duplicates += 1
|
||||||
downloadedCount -= 1
|
downloadedCount -= 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
directory = GLOBAL.directory / submissions[i]['postSubreddit']
|
try:
|
||||||
|
downloadPost(submissions[i])
|
||||||
if submissions[i]['postType'] == 'imgur':
|
|
||||||
print("IMGUR",end="")
|
|
||||||
|
|
||||||
while int(time.time() - lastRequestTime) <= 2:
|
|
||||||
pass
|
|
||||||
credit = Imgur.get_credits()
|
|
||||||
|
|
||||||
IMGUR_RESET_TIME = credit['UserReset']-time.time()
|
|
||||||
USER_RESET = ("after " \
|
|
||||||
+ str(int(IMGUR_RESET_TIME/60)) \
|
|
||||||
+ " Minutes " \
|
|
||||||
+ str(int(IMGUR_RESET_TIME%60)) \
|
|
||||||
+ " Seconds")
|
|
||||||
print(
|
|
||||||
" => Client: {} - User: {} - Reset {}".format(
|
|
||||||
credit['ClientRemaining'],
|
|
||||||
credit['UserRemaining'],
|
|
||||||
USER_RESET
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not (credit['UserRemaining'] == 0 or \
|
|
||||||
credit['ClientRemaining'] == 0):
|
|
||||||
|
|
||||||
"""This block of code is needed
|
|
||||||
"""
|
|
||||||
while int(time.time() - lastRequestTime) <= 2:
|
|
||||||
pass
|
|
||||||
lastRequestTime = time.time()
|
|
||||||
|
|
||||||
try:
|
|
||||||
Imgur(directory,submissions[i])
|
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
|
||||||
print("It already exists")
|
|
||||||
duplicates += 1
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
except ImgurLoginError:
|
|
||||||
print(
|
|
||||||
"Imgur login failed. Quitting the program "\
|
|
||||||
"as unexpected errors might occur."
|
|
||||||
)
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
except Exception as exception:
|
|
||||||
print(exception)
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
else:
|
|
||||||
if credit['UserRemaining'] == 0:
|
|
||||||
KEYWORD = "user"
|
|
||||||
elif credit['ClientRemaining'] == 0:
|
|
||||||
KEYWORD = "client"
|
|
||||||
|
|
||||||
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
|
|
||||||
FAILED_FILE.add(
|
|
||||||
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
|
|
||||||
submissions[i]]}
|
|
||||||
)
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
elif submissions[i]['postType'] == 'gfycat':
|
|
||||||
print("GFYCAT")
|
|
||||||
try:
|
|
||||||
Gfycat(directory,submissions[i])
|
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
|
||||||
print("It already exists")
|
|
||||||
duplicates += 1
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
except NotADownloadableLinkError as exception:
|
|
||||||
print(exception)
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
except Exception as exception:
|
|
||||||
print(exception)
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
elif submissions[i]['postType'] == 'direct':
|
|
||||||
print("DIRECT")
|
|
||||||
try:
|
|
||||||
Direct(directory,submissions[i])
|
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
|
||||||
print("It already exists")
|
|
||||||
downloadedCount -= 1
|
|
||||||
duplicates += 1
|
|
||||||
|
|
||||||
except Exception as exception:
|
|
||||||
print(exception)
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
elif submissions[i]['postType'] == 'self':
|
except FileAlreadyExistsError:
|
||||||
print("SELF")
|
print("It already exists")
|
||||||
try:
|
duplicates += 1
|
||||||
Self(directory,submissions[i])
|
downloadedCount -= 1
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
except ImgurLoginError:
|
||||||
print("It already exists")
|
print(
|
||||||
downloadedCount -= 1
|
"Imgur login failed. \nQuitting the program "\
|
||||||
duplicates += 1
|
"as unexpected errors might occur."
|
||||||
|
)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
except Exception as exception:
|
except ImgurLimitError as exception:
|
||||||
print(exception)
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
downloadedCount -= 1
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
else:
|
except NotADownloadableLinkError as exception:
|
||||||
|
print(exception)
|
||||||
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||||
|
downloadedCount -= 1
|
||||||
|
|
||||||
|
except NoSuitablePost:
|
||||||
print("No match found, skipping...")
|
print("No match found, skipping...")
|
||||||
downloadedCount -= 1
|
downloadedCount -= 1
|
||||||
|
|
||||||
|
except Exception as exception:
|
||||||
|
# raise exception
|
||||||
|
print(exception)
|
||||||
|
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||||
|
downloadedCount -= 1
|
||||||
|
|
||||||
if duplicates:
|
if duplicates:
|
||||||
print("\n There was {} duplicates".format(duplicates))
|
print("\n There was {} duplicates".format(duplicates))
|
||||||
@@ -618,7 +599,7 @@ def main():
|
|||||||
logDir = Path(GLOBAL.arguments.log)
|
logDir = Path(GLOBAL.arguments.log)
|
||||||
download(postFromLog(logDir))
|
download(postFromLog(logDir))
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
POSTS = getPosts(prepareAttributes())
|
POSTS = getPosts(prepareAttributes())
|
||||||
except InsufficientPermission:
|
except InsufficientPermission:
|
||||||
@@ -663,12 +644,17 @@ if __name__ == "__main__":
|
|||||||
print = printToFile
|
print = printToFile
|
||||||
GLOBAL.RUN_TIME = time.time()
|
GLOBAL.RUN_TIME = time.time()
|
||||||
main()
|
main()
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
if GLOBAL.directory is None:
|
if GLOBAL.directory is None:
|
||||||
GLOBAL.directory = Path(".\\")
|
GLOBAL.directory = Path(".\\")
|
||||||
print("\nQUITTING...")
|
print("\nQUITTING...")
|
||||||
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
logging.error("Runtime error!", exc_info=full_exc_info(sys.exc_info()))
|
if GLOBAL.directory is None:
|
||||||
|
GLOBAL.directory = Path(".\\")
|
||||||
|
logging.error(sys.exc_info()[0].__name__,
|
||||||
|
exc_info=full_exc_info(sys.exc_info()))
|
||||||
print(log_stream.getvalue())
|
print(log_stream.getvalue())
|
||||||
|
|
||||||
input("Press enter to quit\n")
|
input("\nPress enter to quit\n")
|
||||||
|
|||||||
@@ -2,7 +2,9 @@ import io
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
from html.parser import HTMLParser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
import imgurpython
|
import imgurpython
|
||||||
from multiprocessing import Queue
|
from multiprocessing import Queue
|
||||||
@@ -69,6 +71,129 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
|
|||||||
else:
|
else:
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
|
|
||||||
|
class Erome:
|
||||||
|
def __init__(self,directory,post):
|
||||||
|
try:
|
||||||
|
IMAGES = self.getLinks(post['postURL'])
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
raise NotADownloadableLinkError("Not a downloadable link")
|
||||||
|
|
||||||
|
imagesLenght = len(IMAGES)
|
||||||
|
howManyDownloaded = imagesLenght
|
||||||
|
duplicates = 0
|
||||||
|
|
||||||
|
if imagesLenght == 1:
|
||||||
|
|
||||||
|
extension = getExtension(IMAGES[0])
|
||||||
|
|
||||||
|
title = nameCorrector(post['postTitle'])
|
||||||
|
print(title+"_" +post['postId']+extension)
|
||||||
|
|
||||||
|
fileDir = title + "_" + post['postId'] + extension
|
||||||
|
fileDir = directory / fileDir
|
||||||
|
|
||||||
|
tempDir = title + "_" + post['postId'] + '.tmp'
|
||||||
|
tempDir = directory / tempDir
|
||||||
|
|
||||||
|
imageURL = "https:" + IMAGES[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
getFile(fileDir,tempDir,imageURL)
|
||||||
|
except FileNameTooLong:
|
||||||
|
fileDir = directory / (post['postId'] + extension)
|
||||||
|
tempDir = directory / (post['postId'] + '.tmp')
|
||||||
|
getFile(fileDir,tempDir,imageURL)
|
||||||
|
|
||||||
|
else:
|
||||||
|
title = nameCorrector(post['postTitle'])
|
||||||
|
print(title+"_"+post['postId'],end="\n\n")
|
||||||
|
|
||||||
|
folderDir = directory / (title+"_"+post['postId'])
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not os.path.exists(folderDir):
|
||||||
|
os.makedirs(folderDir)
|
||||||
|
except FileNotFoundError:
|
||||||
|
folderDir = directory / post['postId']
|
||||||
|
os.makedirs(folderDir)
|
||||||
|
|
||||||
|
for i in range(imagesLenght):
|
||||||
|
|
||||||
|
extension = getExtension(IMAGES[i])
|
||||||
|
|
||||||
|
fileName = str(i+1)
|
||||||
|
imageURL = "https:" + IMAGES[i]
|
||||||
|
|
||||||
|
fileDir = folderDir / (fileName + extension)
|
||||||
|
tempDir = folderDir / (fileName + ".tmp")
|
||||||
|
|
||||||
|
print(" ({}/{})".format(i+1,imagesLenght))
|
||||||
|
print(" {}".format(fileName+extension))
|
||||||
|
|
||||||
|
try:
|
||||||
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
||||||
|
print()
|
||||||
|
except FileAlreadyExistsError:
|
||||||
|
print(" The file already exists" + " "*10,end="\n\n")
|
||||||
|
duplicates += 1
|
||||||
|
howManyDownloaded -= 1
|
||||||
|
|
||||||
|
except Exception as exception:
|
||||||
|
raise exception
|
||||||
|
print("\n Could not get the file")
|
||||||
|
print(" " + str(exception) + "\n")
|
||||||
|
exceptionType = exception
|
||||||
|
howManyDownloaded -= 1
|
||||||
|
|
||||||
|
if duplicates == imagesLenght:
|
||||||
|
raise FileAlreadyExistsError
|
||||||
|
elif howManyDownloaded + duplicates < imagesLenght:
|
||||||
|
raise AlbumNotDownloadedCompletely(
|
||||||
|
"Album Not Downloaded Completely"
|
||||||
|
)
|
||||||
|
|
||||||
|
def getLinks(self,url,lineNumber=129):
|
||||||
|
|
||||||
|
content = []
|
||||||
|
lineNumber = None
|
||||||
|
|
||||||
|
class EromeParser(HTMLParser):
|
||||||
|
tag = None
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
|
||||||
|
|
||||||
|
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
||||||
|
|
||||||
|
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
|
||||||
|
for i in range(len(pageSource)):
|
||||||
|
obj = EromeParser()
|
||||||
|
obj.feed(pageSource[i])
|
||||||
|
tag = obj.tag
|
||||||
|
|
||||||
|
if tag is not None:
|
||||||
|
if "div" in tag:
|
||||||
|
if "id" in tag["div"]:
|
||||||
|
if tag["div"]["id"] == "album":
|
||||||
|
lineNumber = i
|
||||||
|
break
|
||||||
|
|
||||||
|
for line in pageSource[lineNumber:]:
|
||||||
|
obj = EromeParser()
|
||||||
|
obj.feed(line)
|
||||||
|
tag = obj.tag
|
||||||
|
if tag is not None:
|
||||||
|
if "img" in tag:
|
||||||
|
if "class" in tag["img"]:
|
||||||
|
if tag["img"]["class"]=="img-front":
|
||||||
|
content.append(tag["img"]["src"])
|
||||||
|
elif "source" in tag:
|
||||||
|
content.append(tag["source"]["src"])
|
||||||
|
|
||||||
|
return [
|
||||||
|
link for link in content \
|
||||||
|
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
|
||||||
|
]
|
||||||
|
|
||||||
class Imgur:
|
class Imgur:
|
||||||
def __init__(self,directory,post):
|
def __init__(self,directory,post):
|
||||||
self.imgurClient = self.initImgur()
|
self.imgurClient = self.initImgur()
|
||||||
@@ -171,7 +296,7 @@ class Imgur:
|
|||||||
|
|
||||||
if duplicates == imagesLenght:
|
if duplicates == imagesLenght:
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
elif howManyDownloaded < imagesLenght:
|
elif howManyDownloaded + duplicates < imagesLenght:
|
||||||
raise AlbumNotDownloadedCompletely(
|
raise AlbumNotDownloadedCompletely(
|
||||||
"Album Not Downloaded Completely"
|
"Album Not Downloaded Completely"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -80,4 +80,10 @@ class InvalidSortingType(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
class FileNotFoundError(Exception):
|
class FileNotFoundError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class NoSuitablePost(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ImgurLimitError(Exception):
|
||||||
pass
|
pass
|
||||||
@@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
gfycatCount = 0
|
gfycatCount = 0
|
||||||
global imgurCount
|
global imgurCount
|
||||||
imgurCount = 0
|
imgurCount = 0
|
||||||
|
global eromeCount
|
||||||
|
eromeCount = 0
|
||||||
global directCount
|
global directCount
|
||||||
directCount = 0
|
directCount = 0
|
||||||
global selfCount
|
global selfCount
|
||||||
@@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
if not len(subList) == 0:
|
if not len(subList) == 0:
|
||||||
print(
|
print(
|
||||||
"\nTotal of {} submissions found!\n"\
|
"\nTotal of {} submissions found!\n"\
|
||||||
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
|
"{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
|
||||||
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
|
.format(
|
||||||
|
len(subList),
|
||||||
|
gfycatCount,
|
||||||
|
imgurCount,
|
||||||
|
eromeCount,
|
||||||
|
directCount,
|
||||||
|
selfCount
|
||||||
|
)
|
||||||
)
|
)
|
||||||
return subList
|
return subList
|
||||||
else:
|
else:
|
||||||
@@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
def checkIfMatching(submission):
|
def checkIfMatching(submission):
|
||||||
global gfycatCount
|
global gfycatCount
|
||||||
global imgurCount
|
global imgurCount
|
||||||
|
global eromeCount
|
||||||
global directCount
|
global directCount
|
||||||
global selfCount
|
global selfCount
|
||||||
|
|
||||||
@@ -383,19 +393,20 @@ def checkIfMatching(submission):
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if ('gfycat' in submission.domain) or \
|
if 'gfycat' in submission.domain:
|
||||||
('imgur' in submission.domain):
|
details['postType'] = 'gfycat'
|
||||||
|
gfycatCount += 1
|
||||||
|
return details
|
||||||
|
|
||||||
if 'gfycat' in submission.domain:
|
elif 'imgur' in submission.domain:
|
||||||
details['postType'] = 'gfycat'
|
details['postType'] = 'imgur'
|
||||||
gfycatCount += 1
|
imgurCount += 1
|
||||||
return details
|
return details
|
||||||
|
|
||||||
elif 'imgur' in submission.domain:
|
elif 'erome' in submission.domain:
|
||||||
details['postType'] = 'imgur'
|
details['postType'] = 'erome'
|
||||||
|
eromeCount += 1
|
||||||
imgurCount += 1
|
return details
|
||||||
return details
|
|
||||||
|
|
||||||
elif isDirectLink(submission.url) is not False:
|
elif isDirectLink(submission.url) is not False:
|
||||||
details['postType'] = 'direct'
|
details['postType'] = 'direct'
|
||||||
|
|||||||
@@ -75,8 +75,10 @@ def createLogFile(TITLE):
|
|||||||
put given arguments inside \"HEADER\" key
|
put given arguments inside \"HEADER\" key
|
||||||
"""
|
"""
|
||||||
|
|
||||||
folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
folderDirectory = GLOBAL.directory / "LOG_FILES" / \
|
||||||
time.localtime(GLOBAL.RUN_TIME)))
|
str(time.strftime(
|
||||||
|
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
|
||||||
|
))
|
||||||
logFilename = TITLE.upper()+'.json'
|
logFilename = TITLE.upper()+'.json'
|
||||||
|
|
||||||
if not path.exists(folderDirectory):
|
if not path.exists(folderDirectory):
|
||||||
@@ -95,7 +97,7 @@ def printToFile(*args, **kwargs):
|
|||||||
|
|
||||||
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
||||||
time.localtime(GLOBAL.RUN_TIME)))
|
time.localtime(GLOBAL.RUN_TIME)))
|
||||||
folderDirectory = GLOBAL.directory / TIME
|
folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
|
||||||
print(*args,**kwargs)
|
print(*args,**kwargs)
|
||||||
|
|
||||||
if not path.exists(folderDirectory):
|
if not path.exists(folderDirectory):
|
||||||
|
|||||||
Reference in New Issue
Block a user