mirror of
https://github.com/KevinMidboe/bulk-downloader-for-reddit.git
synced 2025-10-29 17:40:15 +00:00
@@ -51,7 +51,6 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
|
|||||||
\*Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
|
\*Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
|
||||||
|
|
||||||
## Program Modes
|
## Program Modes
|
||||||
All the program modes are activated with command-line arguments as shown [here](#using-the-command-line-arguments)
|
|
||||||
- **saved mode**
|
- **saved mode**
|
||||||
- Gets posts from given user's saved posts.
|
- Gets posts from given user's saved posts.
|
||||||
- **submitted mode**
|
- **submitted mode**
|
||||||
@@ -60,19 +59,18 @@ All the program modes are activated with command-line arguments as shown [here](
|
|||||||
- Gets posts from given user's upvoted posts.
|
- Gets posts from given user's upvoted posts.
|
||||||
- **subreddit mode**
|
- **subreddit mode**
|
||||||
- Gets posts from given subreddit or subreddits that is sorted by given type and limited by given number.
|
- Gets posts from given subreddit or subreddits that is sorted by given type and limited by given number.
|
||||||
- You may also use search in this mode. See [`python script.py --help`](#using-the-command-line-arguments).
|
|
||||||
- **multireddit mode**
|
- **multireddit mode**
|
||||||
- Gets posts from given user's given multireddit that is sorted by given type and limited by given number.
|
- Gets posts from given user's given multireddit that is sorted by given type and limited by given number.
|
||||||
- **link mode**
|
- **link mode**
|
||||||
- Gets posts from given reddit link.
|
- Gets posts from given reddit link.
|
||||||
- You may customize the behaviour with `--sort`, `--time`, `--limit`.
|
- You may customize the behaviour with `--sort`, `--time`, `--limit`.
|
||||||
- You may also use search in this mode. See [`python script.py --help`](#using-the-command-line-arguments).
|
- This mode is only accessible via command-line arguments. See **[`python script.py --help`](#using-the-command-line-arguments)**
|
||||||
- **log read mode**
|
- **log read mode**
|
||||||
- Takes a log file which created by itself (json files), reads posts and tries downloading them again.
|
- Takes a log file which created by itself (json files), reads posts and tries downloading them again.
|
||||||
- Running log read mode for FAILED.json file once after the download is complete is **HIGHLY** recommended as unexpected problems may occur.
|
- Running log read mode for FAILED.json file once after the download is complete is **HIGHLY** recommended as unexpected problems may occur.
|
||||||
|
|
||||||
## Running the script
|
## Running the script
|
||||||
**DO NOT** let more than one instance of the script run as it interferes with IMGUR Request Rate.
|
Letting more than one instance of the script run is **not** suggested as it interferes with IMGUR Request Rate.
|
||||||
|
|
||||||
### Using the command line arguments
|
### Using the command line arguments
|
||||||
If no arguments are passed program will prompt you for arguments below which means you may start up the script with double-clicking on it (at least on Windows for sure).
|
If no arguments are passed program will prompt you for arguments below which means you may start up the script with double-clicking on it (at least on Windows for sure).
|
||||||
@@ -170,6 +168,8 @@ Python have real issues about naming their program
|
|||||||
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings. However, there is a great Chrome extension [here](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with Chrome.
|
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings. However, there is a great Chrome extension [here](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with Chrome.
|
||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
### [11/07/2018]()
|
||||||
|
- Improve UX and UI
|
||||||
### [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
|
### [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
|
||||||
- Added support for *self* post
|
- Added support for *self* post
|
||||||
- Now getting posts is quicker
|
- Now getting posts is quicker
|
||||||
|
|||||||
262
script.py
262
script.py
@@ -24,12 +24,6 @@ __version__ = "1.0.1"
|
|||||||
__maintainer__ = "Ali Parlakci"
|
__maintainer__ = "Ali Parlakci"
|
||||||
__email__ = "parlakciali@gmail.com"
|
__email__ = "parlakciali@gmail.com"
|
||||||
|
|
||||||
def debug(*post):
|
|
||||||
GLOBAL.config = getConfig('config.json')
|
|
||||||
GLOBAL.directory = Path(".\\debug\\")
|
|
||||||
download([*post])
|
|
||||||
quit()
|
|
||||||
|
|
||||||
def getConfig(configFileName):
|
def getConfig(configFileName):
|
||||||
"""Read credentials from config.json file"""
|
"""Read credentials from config.json file"""
|
||||||
|
|
||||||
@@ -66,7 +60,7 @@ def parseArguments(arguments=[]):
|
|||||||
description="This program downloads " \
|
description="This program downloads " \
|
||||||
"media from reddit " \
|
"media from reddit " \
|
||||||
"posts")
|
"posts")
|
||||||
parser.add_argument("directory",
|
parser.add_argument("--directory",
|
||||||
help="Specifies the directory where posts will be " \
|
help="Specifies the directory where posts will be " \
|
||||||
"downloaded to",
|
"downloaded to",
|
||||||
metavar="DIRECTORY")
|
metavar="DIRECTORY")
|
||||||
@@ -131,7 +125,6 @@ def parseArguments(arguments=[]):
|
|||||||
parser.add_argument("--limit",
|
parser.add_argument("--limit",
|
||||||
help="default: unlimited",
|
help="default: unlimited",
|
||||||
metavar="Limit",
|
metavar="Limit",
|
||||||
default=None,
|
|
||||||
type=int)
|
type=int)
|
||||||
|
|
||||||
parser.add_argument("--time",
|
parser.add_argument("--time",
|
||||||
@@ -157,89 +150,167 @@ def checkConflicts():
|
|||||||
if not, raise errors
|
if not, raise errors
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if GLOBAL.arguments.saved is False:
|
|
||||||
saved = 0
|
|
||||||
else:
|
|
||||||
saved = 1
|
|
||||||
|
|
||||||
if GLOBAL.arguments.subreddit is None:
|
|
||||||
subreddit = 0
|
|
||||||
else:
|
|
||||||
subreddit = 1
|
|
||||||
|
|
||||||
if GLOBAL.arguments.submitted is False:
|
|
||||||
submitted = 0
|
|
||||||
else:
|
|
||||||
submitted = 1
|
|
||||||
|
|
||||||
if GLOBAL.arguments.search is None:
|
|
||||||
search = 0
|
|
||||||
else:
|
|
||||||
search = 1
|
|
||||||
|
|
||||||
if GLOBAL.arguments.log is None:
|
|
||||||
log = 0
|
|
||||||
else:
|
|
||||||
log = 1
|
|
||||||
|
|
||||||
if GLOBAL.arguments.link is None:
|
|
||||||
link = 0
|
|
||||||
else:
|
|
||||||
link = 1
|
|
||||||
|
|
||||||
if GLOBAL.arguments.user is None:
|
if GLOBAL.arguments.user is None:
|
||||||
user = 0
|
user = 0
|
||||||
else:
|
else:
|
||||||
user = 1
|
user = 1
|
||||||
|
|
||||||
if GLOBAL.arguments.upvoted is False:
|
modes = ["saved","subreddit","submitted","search","log","link","upvoted"]
|
||||||
upvoted = 0
|
|
||||||
|
values = {
|
||||||
|
x: 0 if getattr(GLOBAL.arguments,x) is None or \
|
||||||
|
getattr(GLOBAL.arguments,x) is False \
|
||||||
|
else 1 \
|
||||||
|
for x in modes
|
||||||
|
}
|
||||||
|
|
||||||
|
if not sum(values[x] for x in values) == 1:
|
||||||
|
raise ProgramModeError("Invalid program mode")
|
||||||
|
|
||||||
|
if values["search"]+values["saved"] == 2:
|
||||||
|
raise SearchModeError("You cannot search in your saved posts")
|
||||||
|
|
||||||
|
if values["search"]+values["submitted"] == 2:
|
||||||
|
raise SearchModeError("You cannot search in submitted posts")
|
||||||
|
|
||||||
|
if values["search"]+values["upvoted"] == 2:
|
||||||
|
raise SearchModeError("You cannot search in upvoted posts")
|
||||||
|
|
||||||
|
if values["upvoted"]+values["submitted"] == 1 and user == 0:
|
||||||
|
raise RedditorNameError("No redditor name given")
|
||||||
|
|
||||||
|
class PromptUser:
|
||||||
|
@staticmethod
|
||||||
|
def chooseFrom(choices):
|
||||||
|
print()
|
||||||
|
choicesByIndex = list(str(x) for x in range(len(choices)+1))
|
||||||
|
for i in range(len(choices)):
|
||||||
|
print("{indent}[{order}] {mode}".format(
|
||||||
|
indent=" "*4,order=i+1,mode=choices[i]
|
||||||
|
))
|
||||||
|
print(" "*4+"[0] exit\n")
|
||||||
|
choice = input("> ")
|
||||||
|
while not choice.lower() in choices+choicesByIndex:
|
||||||
|
print("Invalid input\n")
|
||||||
|
programModeIndex = input("> ")
|
||||||
|
|
||||||
|
if choice == "0":
|
||||||
|
quit()
|
||||||
|
elif choice in choicesByIndex:
|
||||||
|
return choices[int(choice)-1]
|
||||||
else:
|
else:
|
||||||
upvoted = 1
|
return choice
|
||||||
|
|
||||||
if not saved+subreddit+log+link+submitted+upvoted == 1:
|
def __init__(self):
|
||||||
print("Program mode is invalid")
|
print("select program mode:")
|
||||||
quit()
|
programModes = [
|
||||||
|
"search","subreddit","multireddit",
|
||||||
|
"submitted","upvoted","saved","log"
|
||||||
|
]
|
||||||
|
programMode = self.chooseFrom(programModes)
|
||||||
|
|
||||||
if search+subreddit == 2:
|
if programMode == "search":
|
||||||
print("You cannot search in your saved posts")
|
GLOBAL.arguments.search = input("\nquery: ")
|
||||||
quit()
|
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
|
||||||
|
|
||||||
if search+submitted == 2:
|
print("\nselect sort type:")
|
||||||
print("You cannot search in submitted posts")
|
sortTypes = [
|
||||||
quit()
|
"relevance","top","new"
|
||||||
|
]
|
||||||
|
sortType = self.chooseFrom(sortTypes)
|
||||||
|
GLOBAL.arguments.sort = sortType
|
||||||
|
|
||||||
if search+upvoted == 2:
|
print("\nselect time filter:")
|
||||||
print("You cannot search in upvoted posts")
|
timeFilters = [
|
||||||
quit()
|
"hour","day","week","month","year","all"
|
||||||
|
]
|
||||||
|
timeFilter = self.chooseFrom(timeFilters)
|
||||||
|
GLOBAL.arguments.time = timeFilter
|
||||||
|
|
||||||
if upvoted+submitted == 1 and user == 0:
|
if programMode == "subreddit":
|
||||||
print("No redditor name given")
|
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
|
||||||
quit()
|
if " " in GLOBAL.arguments.subreddit:
|
||||||
|
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
|
||||||
|
|
||||||
def postFromLog(fileName):
|
print("\nselect sort type:")
|
||||||
"""Analyze a log file and return a list of dictionaries containing
|
sortTypes = [
|
||||||
submissions
|
"hot","top","new","rising","controversial"
|
||||||
"""
|
]
|
||||||
if Path.is_file(Path(fileName)):
|
sortType = self.chooseFrom(sortTypes)
|
||||||
content = jsonFile(fileName).read()
|
GLOBAL.arguments.sort = sortType
|
||||||
|
|
||||||
|
if sortType in ["top","controversial"]:
|
||||||
|
print("\nselect time filter:")
|
||||||
|
timeFilters = [
|
||||||
|
"hour","day","week","month","year","all"
|
||||||
|
]
|
||||||
|
timeFilter = self.chooseFrom(timeFilters)
|
||||||
|
GLOBAL.arguments.time = timeFilter
|
||||||
else:
|
else:
|
||||||
print("File not found")
|
GLOBAL.arguments.time = "all"
|
||||||
quit()
|
|
||||||
|
|
||||||
|
elif programMode == "multireddit":
|
||||||
|
GLOBAL.arguments.user = input("\nredditor: ")
|
||||||
|
GLOBAL.arguments.subreddit = input("\nmultireddit: ")
|
||||||
|
|
||||||
|
print("\nselect sort type:")
|
||||||
|
sortTypes = [
|
||||||
|
"hot","top","new","rising","controversial"
|
||||||
|
]
|
||||||
|
sortType = self.chooseFrom(sortTypes)
|
||||||
|
GLOBAL.arguments.sort = sortType
|
||||||
|
|
||||||
|
if sortType in ["top","controversial"]:
|
||||||
|
print("\nselect time filter:")
|
||||||
|
timeFilters = [
|
||||||
|
"hour","day","week","month","year","all"
|
||||||
|
]
|
||||||
|
timeFilter = self.chooseFrom(timeFilters)
|
||||||
|
GLOBAL.arguments.time = timeFilter
|
||||||
|
else:
|
||||||
|
GLOBAL.arguments.time = "all"
|
||||||
|
|
||||||
|
elif programMode == "submitted":
|
||||||
|
GLOBAL.arguments.submitted = True
|
||||||
|
GLOBAL.arguments.user = input("\nredditor: ")
|
||||||
|
|
||||||
|
print("\nselect sort type:")
|
||||||
|
sortTypes = [
|
||||||
|
"hot","top","new","controversial"
|
||||||
|
]
|
||||||
|
sortType = self.chooseFrom(sortTypes)
|
||||||
|
GLOBAL.arguments.sort = sortType
|
||||||
|
|
||||||
|
if sortType == "top":
|
||||||
|
print("\nselect time filter:")
|
||||||
|
timeFilters = [
|
||||||
|
"hour","day","week","month","year","all"
|
||||||
|
]
|
||||||
|
timeFilter = self.chooseFrom(timeFilters)
|
||||||
|
GLOBAL.arguments.time = timeFilter
|
||||||
|
else:
|
||||||
|
GLOBAL.arguments.time = "all"
|
||||||
|
|
||||||
|
elif programMode == "upvoted":
|
||||||
|
GLOBAL.arguments.upvoted = True
|
||||||
|
GLOBAL.arguments.user = input("\nredditor: ")
|
||||||
|
|
||||||
|
elif programMode == "saved":
|
||||||
|
GLOBAL.arguments.saved = True
|
||||||
|
|
||||||
|
elif programMode == "log":
|
||||||
|
while True:
|
||||||
|
GLOBAL.arguments.log = input("\nlog file directory:")
|
||||||
|
if Path(GLOBAL.arguments.log ).is_file():
|
||||||
|
break
|
||||||
|
|
||||||
|
while True:
|
||||||
try:
|
try:
|
||||||
del content["HEADER"]
|
GLOBAL.arguments.limit = int(input("\nlimit: "))
|
||||||
except KeyError:
|
break
|
||||||
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
posts = []
|
|
||||||
|
|
||||||
for post in content:
|
|
||||||
if not content[post][-1]['postType'] == None:
|
|
||||||
posts.append(content[post][-1])
|
|
||||||
|
|
||||||
return posts
|
|
||||||
|
|
||||||
def prepareAttributes():
|
def prepareAttributes():
|
||||||
ATTRIBUTES = {}
|
ATTRIBUTES = {}
|
||||||
|
|
||||||
@@ -285,6 +356,7 @@ def prepareAttributes():
|
|||||||
ATTRIBUTES["time"] = GLOBAL.arguments.time
|
ATTRIBUTES["time"] = GLOBAL.arguments.time
|
||||||
|
|
||||||
elif GLOBAL.arguments.subreddit is not None:
|
elif GLOBAL.arguments.subreddit is not None:
|
||||||
|
if type(GLOBAL.arguments.subreddit) == list:
|
||||||
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
|
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
|
||||||
|
|
||||||
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
||||||
@@ -305,6 +377,29 @@ def prepareAttributes():
|
|||||||
|
|
||||||
return ATTRIBUTES
|
return ATTRIBUTES
|
||||||
|
|
||||||
|
def postFromLog(fileName):
|
||||||
|
"""Analyze a log file and return a list of dictionaries containing
|
||||||
|
submissions
|
||||||
|
"""
|
||||||
|
if Path.is_file(Path(fileName)):
|
||||||
|
content = jsonFile(fileName).read()
|
||||||
|
else:
|
||||||
|
print("File not found")
|
||||||
|
quit()
|
||||||
|
|
||||||
|
try:
|
||||||
|
del content["HEADER"]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
posts = []
|
||||||
|
|
||||||
|
for post in content:
|
||||||
|
if not content[post][-1]['postType'] == None:
|
||||||
|
posts.append(content[post][-1])
|
||||||
|
|
||||||
|
return posts
|
||||||
|
|
||||||
def postExists(POST):
|
def postExists(POST):
|
||||||
"""Figure out a file's name and checks if the file already exists"""
|
"""Figure out a file's name and checks if the file already exists"""
|
||||||
|
|
||||||
@@ -482,20 +577,25 @@ def download(submissions):
|
|||||||
print(" Total of {} links downloaded!".format(downloadedCount))
|
print(" Total of {} links downloaded!".format(downloadedCount))
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if sys.argv[-1].endswith(__file__):
|
|
||||||
GLOBAL.arguments = parseArguments(input("> ").split())
|
|
||||||
else:
|
|
||||||
GLOBAL.arguments = parseArguments()
|
GLOBAL.arguments = parseArguments()
|
||||||
|
|
||||||
if GLOBAL.arguments.directory is not None:
|
if GLOBAL.arguments.directory is not None:
|
||||||
GLOBAL.directory = Path(GLOBAL.arguments.directory)
|
GLOBAL.directory = Path(GLOBAL.arguments.directory)
|
||||||
else:
|
else:
|
||||||
print("Invalid directory")
|
GLOBAL.directory = Path(input("download directory: "))
|
||||||
quit()
|
|
||||||
GLOBAL.config = getConfig(Path(PurePath(__file__).parent / 'config.json'))
|
|
||||||
|
|
||||||
|
print("\n"," ".join(sys.argv),"\n")
|
||||||
|
|
||||||
|
try:
|
||||||
checkConflicts()
|
checkConflicts()
|
||||||
|
except ProgramModeError as err:
|
||||||
|
PromptUser()
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
quit()
|
||||||
|
|
||||||
|
GLOBAL.config = getConfig("config.json")
|
||||||
|
|
||||||
print(sys.argv)
|
|
||||||
|
|
||||||
if GLOBAL.arguments.log is not None:
|
if GLOBAL.arguments.log is not None:
|
||||||
logDir = Path(GLOBAL.arguments.log)
|
logDir = Path(GLOBAL.arguments.log)
|
||||||
|
|||||||
@@ -19,6 +19,15 @@ class FileNameTooLong(Exception):
|
|||||||
class InvalidRedditLink(Exception):
|
class InvalidRedditLink(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class ProgramModeError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SearchModeError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class RedditorNameError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
class NoMatchingSubmissionFound(Exception):
|
class NoMatchingSubmissionFound(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user