mirror of
https://github.com/KevinMidboe/bulk-downloader-for-reddit.git
synced 2026-01-19 23:56:28 +00:00
Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
91d71565cc | ||
|
|
c7b7361ded | ||
|
|
cd81a6c38b | ||
|
|
1623722138 | ||
|
|
dad5669441 | ||
|
|
35d54d1eb1 | ||
|
|
394b864d86 | ||
|
|
837281c3c6 | ||
|
|
e6b648d8b3 | ||
|
|
cfaf2de7db | ||
|
|
80546d7094 | ||
|
|
139a81a0e7 | ||
|
|
9bb0a5da7f | ||
|
|
6f2273f182 | ||
|
|
b5d6165802 | ||
|
|
b98815376f | ||
|
|
d9586f99b8 | ||
|
|
76711892a2 | ||
|
|
bfea548eab |
17
README.md
17
README.md
@@ -41,6 +41,19 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
|
|||||||
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
|
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
|
||||||
|
|
||||||
## FAQ
|
## FAQ
|
||||||
|
### What do the dots resemble when getting posts?
|
||||||
|
- Each dot means that 100 posts are scanned.
|
||||||
|
|
||||||
|
### Getting posts is taking too long.
|
||||||
|
- You can press Ctrl+C to interrupt it and start downloading.
|
||||||
|
|
||||||
|
### How downloaded files' names are formatted?
|
||||||
|
- Images that are not belong to an album or self posts are formatted as **`[SUBMITTER NAME]_[POST TITLE]_[REDDIT ID]`**.
|
||||||
|
You can use *reddit id* to go to post's reddit page by going to link **reddit.com/[REDDIT ID]**
|
||||||
|
|
||||||
|
- An image in an imgur album is formatted as **`[ITEM NUMBER]_[IMAGE TITLE]_[IMGUR ID]`**
|
||||||
|
Similarly, you can use *imgur id* to go to image's imgur page by going to link **imgur.com/[IMGUR ID]**.
|
||||||
|
|
||||||
### How do I open self post files?
|
### How do I open self post files?
|
||||||
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
|
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
|
||||||
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
|
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
|
||||||
@@ -52,6 +65,10 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
|
|||||||
them, there.
|
them, there.
|
||||||
|
|
||||||
## Changes on *master*
|
## Changes on *master*
|
||||||
|
### [25/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
|
||||||
|
- Added verbose mode
|
||||||
|
- Stylize the console output
|
||||||
|
|
||||||
### [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
|
### [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
|
||||||
- Added OP's name to file names (backwards compatible)
|
- Added OP's name to file names (backwards compatible)
|
||||||
- Deleted # char from file names (backwards compatible)
|
- Deleted # char from file names (backwards compatible)
|
||||||
|
|||||||
@@ -10,13 +10,13 @@ usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved]
|
|||||||
[--subreddit SUBREDDIT [SUBREDDIT ...]]
|
[--subreddit SUBREDDIT [SUBREDDIT ...]]
|
||||||
[--multireddit MULTIREDDIT] [--user redditor]
|
[--multireddit MULTIREDDIT] [--user redditor]
|
||||||
[--search query] [--sort SORT TYPE] [--limit Limit]
|
[--search query] [--sort SORT TYPE] [--limit Limit]
|
||||||
[--time TIME_LIMIT] [--NoDownload]
|
[--time TIME_LIMIT] [--NoDownload] [--verbose]
|
||||||
|
|
||||||
This program downloads media from reddit posts
|
This program downloads media from reddit posts
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--directory DIRECTORY
|
--directory DIRECTORY, -d DIRECTORY
|
||||||
Specifies the directory where posts will be downloaded
|
Specifies the directory where posts will be downloaded
|
||||||
to
|
to
|
||||||
--link link, -l link Get posts from link
|
--link link, -l link Get posts from link
|
||||||
@@ -40,6 +40,7 @@ optional arguments:
|
|||||||
all
|
all
|
||||||
--NoDownload Just gets the posts and store them in a file for
|
--NoDownload Just gets the posts and store them in a file for
|
||||||
downloading later
|
downloading later
|
||||||
|
--verbose, -v Verbose Mode
|
||||||
```
|
```
|
||||||
|
|
||||||
# Examples
|
# Examples
|
||||||
|
|||||||
40
script.py
40
script.py
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
|
|||||||
|
|
||||||
__author__ = "Ali Parlakci"
|
__author__ = "Ali Parlakci"
|
||||||
__license__ = "GPL"
|
__license__ = "GPL"
|
||||||
__version__ = "1.5.0"
|
__version__ = "1.5.2"
|
||||||
__maintainer__ = "Ali Parlakci"
|
__maintainer__ = "Ali Parlakci"
|
||||||
__email__ = "parlakciali@gmail.com"
|
__email__ = "parlakciali@gmail.com"
|
||||||
|
|
||||||
@@ -62,7 +62,7 @@ def parseArguments(arguments=[]):
|
|||||||
description="This program downloads " \
|
description="This program downloads " \
|
||||||
"media from reddit " \
|
"media from reddit " \
|
||||||
"posts")
|
"posts")
|
||||||
parser.add_argument("--directory",
|
parser.add_argument("--directory","-d",
|
||||||
help="Specifies the directory where posts will be " \
|
help="Specifies the directory where posts will be " \
|
||||||
"downloaded to",
|
"downloaded to",
|
||||||
metavar="DIRECTORY")
|
metavar="DIRECTORY")
|
||||||
@@ -144,6 +144,11 @@ def parseArguments(arguments=[]):
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
default=False)
|
default=False)
|
||||||
|
|
||||||
|
parser.add_argument("--verbose","-v",
|
||||||
|
help="Verbose Mode",
|
||||||
|
action="store_true",
|
||||||
|
default=False)
|
||||||
|
|
||||||
|
|
||||||
if arguments == []:
|
if arguments == []:
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
@@ -465,13 +470,12 @@ def downloadPost(SUBMISSION):
|
|||||||
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
|
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print()
|
||||||
if SUBMISSION['postType'] in downloaders:
|
if SUBMISSION['postType'] in downloaders:
|
||||||
|
|
||||||
print(SUBMISSION['postType'].upper())
|
|
||||||
|
|
||||||
if SUBMISSION['postType'] == "imgur":
|
if SUBMISSION['postType'] == "imgur":
|
||||||
|
|
||||||
if int(time.time() - lastRequestTime) <= 2:
|
while int(time.time() - lastRequestTime) <= 2:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
credit = Imgur.get_credits()
|
credit = Imgur.get_credits()
|
||||||
@@ -483,20 +487,21 @@ def downloadPost(SUBMISSION):
|
|||||||
+ str(int(IMGUR_RESET_TIME%60)) \
|
+ str(int(IMGUR_RESET_TIME%60)) \
|
||||||
+ " Seconds")
|
+ " Seconds")
|
||||||
|
|
||||||
print(
|
if credit['ClientRemaining'] < 25 or credit['UserRemaining'] < 25:
|
||||||
"Client: {} - User: {} - Reset {}".format(
|
print(
|
||||||
credit['ClientRemaining'],
|
"==> Client: {} - User: {} - Reset {}".format(
|
||||||
credit['UserRemaining'],
|
credit['ClientRemaining'],
|
||||||
USER_RESET
|
credit['UserRemaining'],
|
||||||
|
USER_RESET
|
||||||
|
),end=""
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
if not (credit['UserRemaining'] == 0 or \
|
if not (credit['UserRemaining'] == 0 or \
|
||||||
credit['ClientRemaining'] == 0):
|
credit['ClientRemaining'] == 0):
|
||||||
|
|
||||||
"""This block of code is needed
|
"""This block of code is needed
|
||||||
"""
|
"""
|
||||||
if int(time.time() - lastRequestTime) <= 2:
|
while int(time.time() - lastRequestTime) <= 2:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
lastRequestTime = time.time()
|
lastRequestTime = time.time()
|
||||||
@@ -530,17 +535,13 @@ def download(submissions):
|
|||||||
FAILED_FILE = createLogFile("FAILED")
|
FAILED_FILE = createLogFile("FAILED")
|
||||||
|
|
||||||
for i in range(subsLenght):
|
for i in range(subsLenght):
|
||||||
print("\n({}/{})".format(i+1,subsLenght))
|
|
||||||
print(
|
print(
|
||||||
"https://reddit.com/r/{subreddit}/comments/{id}".format(
|
f"\n({i+1}/{subsLenght}) ({submissions[i]['postType'].upper()}) " \
|
||||||
subreddit=submissions[i]['postSubreddit'],
|
f"(r/{submissions[i]['postSubreddit']})",end=""
|
||||||
id=submissions[i]['postId']
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if isPostExists(submissions[i]):
|
if isPostExists(submissions[i]):
|
||||||
print(submissions[i]['postType'].upper())
|
print("\nIt already exists")
|
||||||
print("It already exists")
|
|
||||||
duplicates += 1
|
duplicates += 1
|
||||||
downloadedCount -= 1
|
downloadedCount -= 1
|
||||||
continue
|
continue
|
||||||
@@ -683,7 +684,6 @@ if __name__ == "__main__":
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
if GLOBAL.directory is None:
|
if GLOBAL.directory is None:
|
||||||
GLOBAL.directory = Path(".\\")
|
GLOBAL.directory = Path(".\\")
|
||||||
print("\nQUITTING...")
|
|
||||||
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
if GLOBAL.directory is None:
|
if GLOBAL.directory is None:
|
||||||
|
|||||||
@@ -61,13 +61,14 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
|
|||||||
tempDir,
|
tempDir,
|
||||||
reporthook=dlProgress)
|
reporthook=dlProgress)
|
||||||
os.rename(tempDir,fileDir)
|
os.rename(tempDir,fileDir)
|
||||||
print(" "*indent+"Downloaded"+" "*10)
|
|
||||||
break
|
|
||||||
except ConnectionResetError as exception:
|
except ConnectionResetError as exception:
|
||||||
print(" "*indent + str(exception))
|
print(" "*indent + str(exception))
|
||||||
print(" "*indent + "Trying again\n")
|
print(" "*indent + "Trying again\n")
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
raise FileNameTooLong
|
raise FileNameTooLong
|
||||||
|
else:
|
||||||
|
print(" "*indent+"Downloaded"+" "*10)
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
|
|
||||||
@@ -90,10 +91,10 @@ class Erome:
|
|||||||
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
|
||||||
|
|
||||||
fileDir = directory / (
|
fileDir = directory / (
|
||||||
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+extension
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
|
||||||
)
|
)
|
||||||
tempDir = directory / (
|
tempDir = directory / (
|
||||||
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
|
||||||
)
|
)
|
||||||
|
|
||||||
imageURL = "https:" + IMAGES[0]
|
imageURL = "https:" + IMAGES[0]
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import random
|
import random
|
||||||
import socket
|
import socket
|
||||||
import webbrowser
|
import webbrowser
|
||||||
@@ -125,7 +126,7 @@ def getPosts(args):
|
|||||||
if args["user"] == "me":
|
if args["user"] == "me":
|
||||||
args["user"] = str(reddit.user.me())
|
args["user"] = str(reddit.user.me())
|
||||||
|
|
||||||
print("\nGETTING POSTS\n.\n.\n.\n")
|
# print("\nGETTING POSTS\n.\n.\n.\n")
|
||||||
|
|
||||||
if not "search" in args:
|
if not "search" in args:
|
||||||
if args["sort"] == "top" or args["sort"] == "controversial":
|
if args["sort"] == "top" or args["sort"] == "controversial":
|
||||||
@@ -306,6 +307,7 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
|
|
||||||
allPosts = {}
|
allPosts = {}
|
||||||
|
|
||||||
|
print("GETTING POSTS")
|
||||||
postsFile = createLogFile("POSTS")
|
postsFile = createLogFile("POSTS")
|
||||||
|
|
||||||
if SINGLE_POST:
|
if SINGLE_POST:
|
||||||
@@ -326,49 +328,56 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
if result is not None:
|
if result is not None:
|
||||||
details = result
|
details = result
|
||||||
orderCount += 1
|
orderCount += 1
|
||||||
printSubmission(submission,subCount,orderCount)
|
if GLOBAL.arguments.verbose:
|
||||||
|
printSubmission(submission,subCount,orderCount)
|
||||||
subList.append(details)
|
subList.append(details)
|
||||||
|
|
||||||
postsFile.add({subCount:[details]})
|
postsFile.add({subCount:[details]})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
for submission in posts:
|
try:
|
||||||
subCount += 1
|
for submission in posts:
|
||||||
|
subCount += 1
|
||||||
|
|
||||||
try:
|
if subCount % 100 == 0 and not GLOBAL.arguments.verbose:
|
||||||
details = {'postId':submission.id,
|
sys.stdout.write("• ")
|
||||||
'postTitle':submission.title,
|
sys.stdout.flush()
|
||||||
'postSubmitter':str(submission.author),
|
|
||||||
'postType':None,
|
|
||||||
'postURL':submission.url,
|
|
||||||
'postSubreddit':submission.subreddit.display_name}
|
|
||||||
except AttributeError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result = checkIfMatching(submission)
|
if subCount % 1000 == 0:
|
||||||
|
sys.stdout.write("\n")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
if result is not None:
|
try:
|
||||||
details = result
|
details = {'postId':submission.id,
|
||||||
orderCount += 1
|
'postTitle':submission.title,
|
||||||
printSubmission(submission,subCount,orderCount)
|
'postSubmitter':str(submission.author),
|
||||||
subList.append(details)
|
'postType':None,
|
||||||
|
'postURL':submission.url,
|
||||||
|
'postSubreddit':submission.subreddit.display_name}
|
||||||
|
except AttributeError:
|
||||||
|
continue
|
||||||
|
|
||||||
allPosts[subCount] = [details]
|
result = checkIfMatching(submission)
|
||||||
|
|
||||||
|
if result is not None:
|
||||||
|
details = result
|
||||||
|
orderCount += 1
|
||||||
|
if GLOBAL.arguments.verbose:
|
||||||
|
printSubmission(submission,subCount,orderCount)
|
||||||
|
subList.append(details)
|
||||||
|
|
||||||
|
allPosts[subCount] = [details]
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nKeyboardInterrupt",end="")
|
||||||
|
|
||||||
postsFile.add(allPosts)
|
postsFile.add(allPosts)
|
||||||
|
|
||||||
if not len(subList) == 0:
|
if not len(subList) == 0:
|
||||||
print(
|
print(
|
||||||
"\nTotal of {} submissions found!\n"\
|
f"\n\nTotal of {len(subList)} submissions found!\n"\
|
||||||
"{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
|
f"{gfycatCount} GFYCATs, {imgurCount} IMGURs, " \
|
||||||
.format(
|
f"{eromeCount} EROMEs, {directCount} DIRECTs " \
|
||||||
len(subList),
|
f"and {selfCount} SELF POSTS"
|
||||||
gfycatCount,
|
|
||||||
imgurCount,
|
|
||||||
eromeCount,
|
|
||||||
directCount,
|
|
||||||
selfCount
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
return subList
|
return subList
|
||||||
else:
|
else:
|
||||||
|
|||||||
11
src/tools.py
11
src/tools.py
@@ -102,11 +102,12 @@ def printToFile(*args, **kwargs):
|
|||||||
|
|
||||||
if not path.exists(folderDirectory):
|
if not path.exists(folderDirectory):
|
||||||
makedirs(folderDirectory)
|
makedirs(folderDirectory)
|
||||||
|
|
||||||
with io.open(
|
if not "file" in kwargs:
|
||||||
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
|
with io.open(
|
||||||
) as FILE:
|
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
|
||||||
print(*args, file=FILE, **kwargs)
|
) as FILE:
|
||||||
|
print(*args, file=FILE, **kwargs)
|
||||||
|
|
||||||
def nameCorrector(string):
|
def nameCorrector(string):
|
||||||
"""Swap strange characters from given string
|
"""Swap strange characters from given string
|
||||||
|
|||||||
Reference in New Issue
Block a user