14 Commits

Author SHA1 Message Date
Ali Parlakçı
91d71565cc Update script.py 2018-07-25 18:30:48 +03:00
Ali Parlakci
c7b7361ded Update version 2018-07-25 18:27:58 +03:00
Ali Parlakci
cd81a6c38b Update changelog 2018-07-25 13:53:33 +03:00
Ali Parlakçı
1623722138 Merge pull request #42 from aliparlakci/verboseMode
Added Verbose mode
2018-07-25 13:52:34 +03:00
Ali Parlakci
dad5669441 Typo fix 2018-07-25 13:50:11 +03:00
Ali Parlakci
35d54d1eb1 Stylize 2018-07-25 13:48:30 +03:00
Ali Parlakci
394b864d86 Updated FAQ 2018-07-25 13:48:02 +03:00
Ali Parlakci
837281c3c6 Added verbose mode 2018-07-25 13:40:06 +03:00
Ali Parlakci
e6b648d8b3 Update changelog 2018-07-25 12:25:39 +03:00
Ali Parlakci
cfaf2de7db Stylize the console output 2018-07-25 12:24:50 +03:00
Ali Parlakci
80546d7094 Update version 2018-07-25 11:36:58 +03:00
Ali Parlakci
139a81a0e7 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-25 09:29:48 +03:00
Ali Parlakci
9bb0a5da7f Added delays for imgur rate limit 2018-07-25 09:27:41 +03:00
Ali Parlakçı
6f2273f182 Add file name formatting doc 2018-07-24 23:41:02 +03:00
5 changed files with 86 additions and 59 deletions

View File

@@ -41,6 +41,19 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required* \* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
## FAQ ## FAQ
### What do the dots resemble when getting posts?
- Each dot means that 100 posts are scanned.
### Getting posts is taking too long.
- You can press Ctrl+C to interrupt it and start downloading.
### How downloaded files' names are formatted?
- Images that are not belong to an album or self posts are formatted as **`[SUBMITTER NAME]_[POST TITLE]_[REDDIT ID]`**.
You can use *reddit id* to go to post's reddit page by going to link **reddit.com/[REDDIT ID]**
- An image in an imgur album is formatted as **`[ITEM NUMBER]_[IMAGE TITLE]_[IMGUR ID]`**
Similarly, you can use *imgur id* to go to image's imgur page by going to link **imgur.com/[IMGUR ID]**.
### How do I open self post files? ### How do I open self post files?
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings. - Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/). However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
@@ -52,6 +65,10 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
them, there. them, there.
## Changes on *master* ## Changes on *master*
### [25/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
- Added verbose mode
- Stylize the console output
### [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4) ### [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
- Added OP's name to file names (backwards compatible) - Added OP's name to file names (backwards compatible)
- Deleted # char from file names (backwards compatible) - Deleted # char from file names (backwards compatible)

View File

@@ -10,13 +10,13 @@ usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved]
[--subreddit SUBREDDIT [SUBREDDIT ...]] [--subreddit SUBREDDIT [SUBREDDIT ...]]
[--multireddit MULTIREDDIT] [--user redditor] [--multireddit MULTIREDDIT] [--user redditor]
[--search query] [--sort SORT TYPE] [--limit Limit] [--search query] [--sort SORT TYPE] [--limit Limit]
[--time TIME_LIMIT] [--NoDownload] [--time TIME_LIMIT] [--NoDownload] [--verbose]
This program downloads media from reddit posts This program downloads media from reddit posts
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
--directory DIRECTORY --directory DIRECTORY, -d DIRECTORY
Specifies the directory where posts will be downloaded Specifies the directory where posts will be downloaded
to to
--link link, -l link Get posts from link --link link, -l link Get posts from link
@@ -40,6 +40,7 @@ optional arguments:
all all
--NoDownload Just gets the posts and store them in a file for --NoDownload Just gets the posts and store them in a file for
downloading later downloading later
--verbose, -v Verbose Mode
``` ```
# Examples # Examples

View File

@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
__author__ = "Ali Parlakci" __author__ = "Ali Parlakci"
__license__ = "GPL" __license__ = "GPL"
__version__ = "1.5.1" __version__ = "1.5.2"
__maintainer__ = "Ali Parlakci" __maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com" __email__ = "parlakciali@gmail.com"
@@ -62,7 +62,7 @@ def parseArguments(arguments=[]):
description="This program downloads " \ description="This program downloads " \
"media from reddit " \ "media from reddit " \
"posts") "posts")
parser.add_argument("--directory", parser.add_argument("--directory","-d",
help="Specifies the directory where posts will be " \ help="Specifies the directory where posts will be " \
"downloaded to", "downloaded to",
metavar="DIRECTORY") metavar="DIRECTORY")
@@ -144,6 +144,11 @@ def parseArguments(arguments=[]):
action="store_true", action="store_true",
default=False) default=False)
parser.add_argument("--verbose","-v",
help="Verbose Mode",
action="store_true",
default=False)
if arguments == []: if arguments == []:
return parser.parse_args() return parser.parse_args()
@@ -465,13 +470,12 @@ def downloadPost(SUBMISSION):
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
} }
print()
if SUBMISSION['postType'] in downloaders: if SUBMISSION['postType'] in downloaders:
print(SUBMISSION['postType'].upper(),end=" ")
if SUBMISSION['postType'] == "imgur": if SUBMISSION['postType'] == "imgur":
if int(time.time() - lastRequestTime) <= 2: while int(time.time() - lastRequestTime) <= 2:
pass pass
credit = Imgur.get_credits() credit = Imgur.get_credits()
@@ -483,20 +487,21 @@ def downloadPost(SUBMISSION):
+ str(int(IMGUR_RESET_TIME%60)) \ + str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds") + " Seconds")
print( if credit['ClientRemaining'] < 25 or credit['UserRemaining'] < 25:
"==> Client: {} - User: {} - Reset {}".format( print(
credit['ClientRemaining'], "==> Client: {} - User: {} - Reset {}".format(
credit['UserRemaining'], credit['ClientRemaining'],
USER_RESET credit['UserRemaining'],
),end="" USER_RESET
) ),end=""
)
if not (credit['UserRemaining'] == 0 or \ if not (credit['UserRemaining'] == 0 or \
credit['ClientRemaining'] == 0): credit['ClientRemaining'] == 0):
"""This block of code is needed """This block of code is needed
""" """
if int(time.time() - lastRequestTime) <= 2: while int(time.time() - lastRequestTime) <= 2:
pass pass
lastRequestTime = time.time() lastRequestTime = time.time()
@@ -509,7 +514,6 @@ def downloadPost(SUBMISSION):
raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper())) raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
print()
downloaders[SUBMISSION['postType']] (directory,SUBMISSION) downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
else: else:
@@ -531,17 +535,13 @@ def download(submissions):
FAILED_FILE = createLogFile("FAILED") FAILED_FILE = createLogFile("FAILED")
for i in range(subsLenght): for i in range(subsLenght):
print("\n({}/{})".format(i+1,subsLenght))
print( print(
"https://reddit.com/r/{subreddit}/comments/{id}".format( f"\n({i+1}/{subsLenght}) ({submissions[i]['postType'].upper()}) " \
subreddit=submissions[i]['postSubreddit'], f"(r/{submissions[i]['postSubreddit']})",end=""
id=submissions[i]['postId']
)
) )
if isPostExists(submissions[i]): if isPostExists(submissions[i]):
print(submissions[i]['postType'].upper()) print("\nIt already exists")
print("It already exists")
duplicates += 1 duplicates += 1
downloadedCount -= 1 downloadedCount -= 1
continue continue
@@ -684,7 +684,6 @@ if __name__ == "__main__":
except KeyboardInterrupt: except KeyboardInterrupt:
if GLOBAL.directory is None: if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\") GLOBAL.directory = Path(".\\")
print("\nQUITTING...")
except Exception as exception: except Exception as exception:
if GLOBAL.directory is None: if GLOBAL.directory is None:

View File

@@ -1,4 +1,5 @@
import os import os
import sys
import random import random
import socket import socket
import webbrowser import webbrowser
@@ -125,7 +126,7 @@ def getPosts(args):
if args["user"] == "me": if args["user"] == "me":
args["user"] = str(reddit.user.me()) args["user"] = str(reddit.user.me())
print("\nGETTING POSTS\n.\n.\n.\n") # print("\nGETTING POSTS\n.\n.\n.\n")
if not "search" in args: if not "search" in args:
if args["sort"] == "top" or args["sort"] == "controversial": if args["sort"] == "top" or args["sort"] == "controversial":
@@ -306,6 +307,7 @@ def redditSearcher(posts,SINGLE_POST=False):
allPosts = {} allPosts = {}
print("GETTING POSTS")
postsFile = createLogFile("POSTS") postsFile = createLogFile("POSTS")
if SINGLE_POST: if SINGLE_POST:
@@ -326,49 +328,56 @@ def redditSearcher(posts,SINGLE_POST=False):
if result is not None: if result is not None:
details = result details = result
orderCount += 1 orderCount += 1
printSubmission(submission,subCount,orderCount) if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details) subList.append(details)
postsFile.add({subCount:[details]}) postsFile.add({subCount:[details]})
else: else:
for submission in posts: try:
subCount += 1 for submission in posts:
subCount += 1
try: if subCount % 100 == 0 and not GLOBAL.arguments.verbose:
details = {'postId':submission.id, sys.stdout.write("")
'postTitle':submission.title, sys.stdout.flush()
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
except AttributeError:
continue
result = checkIfMatching(submission) if subCount % 1000 == 0:
sys.stdout.write("\n")
sys.stdout.flush()
if result is not None: try:
details = result details = {'postId':submission.id,
orderCount += 1 'postTitle':submission.title,
printSubmission(submission,subCount,orderCount) 'postSubmitter':str(submission.author),
subList.append(details) 'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
except AttributeError:
continue
allPosts[subCount] = [details] result = checkIfMatching(submission)
if result is not None:
details = result
orderCount += 1
if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details)
allPosts[subCount] = [details]
except KeyboardInterrupt:
print("\nKeyboardInterrupt",end="")
postsFile.add(allPosts) postsFile.add(allPosts)
if not len(subList) == 0: if not len(subList) == 0:
print( print(
"\nTotal of {} submissions found!\n"\ f"\n\nTotal of {len(subList)} submissions found!\n"\
"{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n" f"{gfycatCount} GFYCATs, {imgurCount} IMGURs, " \
.format( f"{eromeCount} EROMEs, {directCount} DIRECTs " \
len(subList), f"and {selfCount} SELF POSTS"
gfycatCount,
imgurCount,
eromeCount,
directCount,
selfCount
)
) )
return subList return subList
else: else:

View File

@@ -103,10 +103,11 @@ def printToFile(*args, **kwargs):
if not path.exists(folderDirectory): if not path.exists(folderDirectory):
makedirs(folderDirectory) makedirs(folderDirectory)
with io.open( if not "file" in kwargs:
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8" with io.open(
) as FILE: folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
print(*args, file=FILE, **kwargs) ) as FILE:
print(*args, file=FILE, **kwargs)
def nameCorrector(string): def nameCorrector(string):
"""Swap strange characters from given string """Swap strange characters from given string