104 Commits

Author SHA1 Message Date
Ali Parlakci
27532408c1 Update version 2018-07-22 18:06:46 +03:00
Ali Parlakci
32647beee9 Update changelog 2018-07-22 18:06:24 +03:00
Ali Parlakci
a67da461d2 Fixed the bug that makes multireddit mode unusable 2018-07-22 18:05:20 +03:00
Ali Parlakci
8c6f593496 Update changelog 2018-07-22 17:39:30 +03:00
Ali Parlakci
b60ce8a71e Put log files in a folder 2018-07-22 17:38:35 +03:00
Ali Parlakci
49920cc457 Bug fix 2018-07-22 17:25:30 +03:00
Ali Parlakci
c70e7c2ebb Update version 2018-07-22 14:39:09 +03:00
Ali Parlakci
3931dfff54 Update links 2018-07-21 22:03:16 +03:00
Ali Parlakci
4a8c2377f9 Updated --help page 2018-07-21 21:55:01 +03:00
Ali Parlakci
8a18a42a9a Updated changelog 2018-07-21 21:54:23 +03:00
Ali Parlakçı
6c2d748fbc Exclude post types (#38)
* Added argument for excluded links

* Added exclude in PromptUser()

* Added functionality for exclude and bug fix
2018-07-21 21:52:28 +03:00
Ali Parlakci
8c966df105 Improved traceback 2018-07-21 21:50:54 +03:00
Ali Parlakci
2adf2c0451 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-20 13:34:51 +03:00
Ali Parlakci
3e3a2df4d1 Bug fix at direct links 2018-07-20 13:34:23 +03:00
Ali Parlakci
7548a01019 Bug fix at direct links 2018-07-20 13:33:50 +03:00
Ali Parlakci
2ab16608d5 Update links 2018-07-20 13:06:01 +03:00
Ali Parlakci
e15f33b97a Fix README 2018-07-20 13:04:47 +03:00
Ali Parlakci
27211f993c 0 input for no limit 2018-07-20 13:03:50 +03:00
Ali Parlakci
87d3b294f7 0 input for no limit 2018-07-20 13:01:39 +03:00
Ali Parlakci
8128378dcd 0 input for no limit 2018-07-20 13:01:21 +03:00
Ali Parlakçı
cc93aa3012 Update commit link 2018-07-19 15:47:28 +03:00
Ali Parlakci
50c4a8d6d7 Update version 2018-07-19 15:38:49 +03:00
Ali Parlakci
5737904a54 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-19 15:35:46 +03:00
Ali Parlakci
f6eba6c5b0 Added more gfycat links 2018-07-19 15:34:58 +03:00
Ali Parlakci
41cbb58db3 Added more gfycat links 2018-07-19 15:22:12 +03:00
Ali Parlakçı
c569124406 Update Changelog 2018-07-19 14:58:17 +03:00
Ali Parlakçı
1a3836a8e1 Added v.redd.it support (#36) 2018-07-19 14:57:16 +03:00
Ali Parlakci
fde6a1fac4 Added custom exception descriptions to FAILED.json file 2018-07-19 14:56:00 +03:00
Ali Parlakci
6bba2c4dbb Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-18 09:17:48 +03:00
Ali Parlakci
a078d44236 Edited FAQ 2018-07-18 09:17:36 +03:00
Ali Parlakçı
deae0be769 Delete _config.yml 2018-07-15 10:54:40 +03:00
Ali Parlakci
3cf0203e6b Typo fix 2018-07-13 14:39:01 +03:00
Ali Parlakci
0b31db0e2e Update FAQ 2018-07-13 14:37:35 +03:00
Ali Parlakci
d3f2b1b08e Update executables' names 2018-07-13 14:34:20 +03:00
Ali Parlakci
0ec4bb3008 Update version 2018-07-13 14:18:18 +03:00
Ali Parlakci
0dbe2ed917 Update changelog 2018-07-13 14:13:39 +03:00
Ali Parlakci
9f831e1b78 Added .exe to executable's extension 2018-07-13 14:12:17 +03:00
Ali Parlakci
59012077e1 Excludes build folders 2018-07-13 14:11:41 +03:00
Ali Parlakci
5e3c79160b Changed config.json path 2018-07-13 14:10:21 +03:00
Ali Parlakci
1e8eaa1a8d Update changelog 2018-07-12 23:05:13 +03:00
Ali Parlakci
7dbc83fdce Initial commit 2018-07-12 23:03:00 +03:00
Ali Parlakci
50a77f6ba5 Update version 2018-07-12 22:19:46 +03:00
Ali Parlakci
4f7e406cd6 Take multiple subreddits 2018-07-12 22:00:43 +03:00
Ali Parlakci
ded3cece8c Update changelog 2018-07-12 21:16:20 +03:00
Ali Parlakci
dd671fd738 Accept exit to exit the program when taking arguments 2018-07-12 21:09:31 +03:00
Ali Parlakci
b357dff52c Added more examples 2018-07-12 14:31:39 +03:00
Ali Parlakci
32ffd3b861 Added dependency installation 2018-07-12 14:27:16 +03:00
Ali Parlakci
02673c3950 Update changelog 2018-07-12 14:15:26 +03:00
Ali Parlakci
8448e47080 Wait on KeyboardInterrupt 2018-07-12 14:14:54 +03:00
Ali Parlakci
39f2c73f4c Another option to open terminal added 2018-07-12 13:35:45 +03:00
Ali Parlakci
fe942b4734 Added linux executable guide 2018-07-12 13:32:59 +03:00
Ali Parlakci
205617e051 Changed quit() to sys.exit() 2018-07-12 13:00:02 +03:00
Ali Parlakci
b93b206a96 Typo fix 2018-07-12 12:31:32 +03:00
Ali Parlakci
b84684f786 Check if installed 2018-07-12 12:30:11 +03:00
Ali Parlakci
68558950ca Broken links fixed 2018-07-12 12:26:46 +03:00
aliparlakci
795965f754 Readme refactor (#35)
* Shorten the README.md file

* Added more information and guides

* Typo fix

* Rename sections
2018-07-12 12:25:09 +03:00
Ali Parlakci
d2ed8327df Update links 2018-07-12 02:07:02 +03:00
Ali Parlakci
a28a7776ab Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-12 02:06:25 +03:00
Ali Parlakci
1a50bcecb0 Bug fix 2018-07-12 02:06:16 +03:00
aliparlakci
d8a2204024 Update README.md 2018-07-12 02:01:29 +03:00
aliparlakci
0577c332b5 Update README.md 2018-07-12 01:59:35 +03:00
Ali Parlakci
e09da8db73 Updated the links 2018-07-12 01:43:44 +03:00
Ali Parlakci
fd507870e1 Added more information 2018-07-12 01:39:58 +03:00
aliparlakci
c434d346a7 Update help_page.md 2018-07-12 01:28:17 +03:00
aliparlakci
fcc3e53da7 Delete setup.py 2018-07-12 01:27:13 +03:00
Ali Parlakci
7452af8bd0 Author added 2018-07-12 01:16:28 +03:00
Ali Parlakci
1fd6951420 Typo fix 2018-07-12 01:16:11 +03:00
Ali Parlakci
bddac14804 Bug fix 2018-07-12 00:09:20 +03:00
Ali Parlakci
6e2c69d053 Initial commit 2018-07-12 00:01:02 +03:00
Ali Parlakci
bedd481195 Update version 2018-07-12 00:00:39 +03:00
Ali Parlakci
2994adea38 Removed installing packages in the runtime 2018-07-11 23:59:14 +03:00
Ali Parlakci
f1a9f1d3e4 Added requirements.txt 2018-07-11 23:55:03 +03:00
aliparlakci
9849c0439b Merge pull request #34 from aliparlakci/MinimalizeREADME
Minimalize README
2018-07-11 23:29:58 +03:00
aliparlakci
ef9f0c543f Merge pull request #33 from aliparlakci/ErrorHandling
Error handling
2018-07-11 23:29:41 +03:00
aliparlakci
26531fd6c0 Merge pull request #32 from aliparlakci/PromptForArguments
Improve UX
2018-07-11 23:29:11 +03:00
Ali Parlakci
cae044f7b6 Stylize print outs 2018-07-11 23:27:16 +03:00
Ali Parlakci
4daa9cd66a Initial commit 2018-07-11 23:24:36 +03:00
Ali Parlakci
e83d2626d3 Added clarifcation to log mode 2018-07-11 23:24:15 +03:00
Ali Parlakci
f67c5f62c6 Strip down 2018-07-11 23:23:55 +03:00
Ali Parlakci
fef559cd40 Update changelog 2018-07-11 22:33:10 +03:00
Ali Parlakci
42672dc15d Adding error tracebacks to CONSOLE_LOG files 2018-07-11 22:27:48 +03:00
Ali Parlakci
3b15141b49 Bug fix 2018-07-11 22:18:54 +03:00
Ali Parlakci
d080ca17bc Changes for latest update on the code 2018-07-11 21:58:26 +03:00
Ali Parlakci
7f97bd212a Bug fix 2018-07-11 21:25:45 +03:00
Ali Parlakci
65592c5d3a Improved checkConflicts() 2018-07-11 19:57:38 +03:00
Ali Parlakci
57a5f0c85c Print arguments before anything 2018-07-11 19:31:32 +03:00
Ali Parlakci
f6240f402e Refactor checkConflicts() 2018-07-11 19:25:24 +03:00
Ali Parlakci
fd179c0e4b Typo fix 2018-07-11 18:59:17 +03:00
Ali Parlakci
d9dc3132f6 Rearranged functions 2018-07-11 18:56:39 +03:00
Ali Parlakci
45191e2c60 Prompt user if no arguments passed 2018-07-11 18:40:40 +03:00
aliparlakci
319a6c82a3 Updated the Python version details 2018-07-11 02:40:36 +03:00
Ali Parlakci
2c62337fac Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-10 18:07:31 +03:00
Ali Parlakci
7c9c84c411 Print out post type before checking existence 2018-07-10 18:07:09 +03:00
aliparlakci
8922c27f9b Typo fix 2018-07-10 14:03:40 +03:00
aliparlakci
cc59710f33 Merge pull request #31 from aliparlakci/ChangePythonKeywords
Replaced py -3 with python
2018-07-10 13:34:19 +03:00
Ali Parlakci
49129bea24 Replaced py -3 with python 2018-07-10 13:33:26 +03:00
Ali Parlakci
a701444d5b Updated the version info 2018-07-10 03:14:43 +03:00
Ali Parlakci
5a030a156c Updated the links 2018-07-10 03:13:39 +03:00
Ali Parlakci
ffe3839aee Added .md as a possible extension 2018-07-10 03:12:24 +03:00
Ali Parlakci
a796038b71 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-10 03:11:49 +03:00
Ali Parlakci
ddccebbc70 Not creating POSTS.json file properly bug fix 2018-07-10 03:11:38 +03:00
aliparlakci
5cfa443f83 Edited FAQ about markdowns 2018-07-10 02:57:44 +03:00
aliparlakci
2103c62218 Fixed the broken link 2018-07-10 02:55:45 +03:00
aliparlakci
10128b63ac Added link to Changelog 2018-07-10 02:54:28 +03:00
12 changed files with 614 additions and 298 deletions

7
.gitignore vendored
View File

@@ -1,4 +1,5 @@
build/
dist/
MANIFEST
__pycache__/ __pycache__/
src/__pycache__/ src/__pycache__/
logs/
*.json

199
README.md
View File

@@ -1,41 +1,30 @@
# Bulk Downloader for Reddit # Bulk Downloader for Reddit
This program downloads imgur, gfycat and direct image and video links of saved posts from a reddit account. It is written in Python 3. This program downloads imgur, gfycat and direct image and video links of saved posts from a reddit account. It is written in Python 3.
**PLEASE** post any issue you had with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback. **PLEASE** post any issue you have with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback.
## Table of Contents ## What it can do
- Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
- Sorts posts by hot, top, new and so on
- Downloads imgur albums, gfycat links, [self posts](#how-do-i-open-self-post-files) and any link to a direct image
- Skips the existing ones
- Puts post titles to file's name
- Puts every post to its subreddit's folder
- Saves a reusable copy of posts' details that are found so that they can be re-downloaded again
- Logs failed ones in a file to so that you can try to download them later
- Can run with double-clicking on Windows
- [What it can do?](#what-it-can-do) ## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
- [Requirements](#requirements)
- [Setting up the script](#setting-up-the-script)
- [Creating an imgur app](#creating-an-imgur-app)
- [Program Modes](#program-modes)
- [Running the script](#running-the-script)
- [Using the command line arguments](#using-the-command-line-arguments)
- [Examples](#examples)
- [FAQ](#faq)
- [Changelog](#changelog)
## What it can do? ## How it works
### It...
- can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
- sorts post by hot, top, new and so on
- downloads imgur albums, gfycat links, [self posts](#i-can-t-open-the-self-posts-) and any link to a direct image
- skips the existing ones
- puts post titles to file's name
- puts every post to its subreddit's folder
- saves reusable a copy of posts' details that are found so that they can be re-downloaded again
- logs failed ones in a file to so that you can try to download them later
- can be run with double-clicking on Windows (but I don't recommend it)
## Requirements
- Python 3.x*
You can install Python 3 here: [https://www.python.org/downloads/](https://www.python.org/downloads/)
You have to check "**Add Python 3 to PATH**" option when installing in order it to run correctly. - For **Windows** and **Linux** users, there are executable files to run easily without installing a third party program. But if you are a paranoid like me, you can **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
- In Windows, double click on bulk-downloader-for-reddit file
- In Linux, extract files to a folder and open terminal inside it. Type **`./bulk-downloader-for-reddit`**
- **MacOS** users have to **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
*\*Although the latest version of python is suggested, you can use 3.6.5 since it runs perfectly on that version* Script also accepts **command-line arguments**, get further information from **[`--help`](docs/COMMAND_LINE_ARGUMENTS.md)**
## Setting up the script ## Setting up the script
Because this is not a commercial app, you need to create an imgur developer app in order API to work. Because this is not a commercial app, you need to create an imgur developer app in order API to work.
@@ -50,120 +39,50 @@ Because this is not a commercial app, you need to create an imgur developer app
It should redirect to a page which shows your **imgur_client_id** and **imgur_client_secret** It should redirect to a page which shows your **imgur_client_id** and **imgur_client_secret**
\*Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required* \* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
## Program Modes
All the program modes are activated with command-line arguments as shown [here](#using-the-command-line-arguments)
- **saved mode**
- Gets posts from given user's saved posts.
- **submitted mode**
- Gets posts from given user's submitted posts.
- **upvoted mode**
- Gets posts from given user's upvoted posts.
- **subreddit mode**
- Gets posts from given subreddit or subreddits that is sorted by given type and limited by given number.
- You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments).
- **multireddit mode**
- Gets posts from given user's given multireddit that is sorted by given type and limited by given number.
- **link mode**
- Gets posts from given reddit link.
- You may customize the behaviour with `--sort`, `--time`, `--limit`.
- You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments).
- **log read mode**
- Takes a log file which created by itself (json files), reads posts and tries downloading them again.
- Running log read mode for FAILED.json file once after the download is complete is **HIGHLY** recommended as unexpected problems may occur.
## Running the script
**DO NOT** let more than one instance of the script run as it interferes with IMGUR Request Rate.
### Using the command line arguments
If no arguments are passed program will prompt you for arguments below which means you may start up the script with double-clicking on it (at least on Windows for sure).
Open up the [terminal](https://www.reddit.com/r/NSFW411/comments/8vtnl8/meta_i_made_reddit_downloader_that_can_download/e1rnbnl) and navigate to where script.py is. If you are unfamiliar with changing directories in terminal see Change Directories in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything).
Run the script.py file from terminal with command-line arguments. Here is the help page:
Use `.\` for current directory and `..\` for upper directory when using short directories, otherwise it might act weird.
```console
$ py -3 script.py --help
usage: script.py [-h] [--link link] [--saved] [--submitted] [--upvoted]
[--log LOG FILE] [--subreddit SUBREDDIT [SUBREDDIT ...]]
[--multireddit MULTIREDDIT] [--user redditor]
[--search query] [--sort SORT TYPE] [--limit Limit]
[--time TIME_LIMIT] [--NoDownload]
DIRECTORY
This program downloads media from reddit posts
positional arguments:
DIRECTORY Specifies the directory where posts will be downloaded
to
optional arguments:
-h, --help show this help message and exit
--link link, -l link Get posts from link
--saved Triggers saved mode
--submitted Gets posts of --user
--upvoted Gets upvoted posts of --user
--log LOG FILE Triggers log read mode and takes a log file
--subreddit SUBREDDIT [SUBREDDIT ...]
Triggers subreddit mode and takes subreddit's name
without r/. use "frontpage" for frontpage
--multireddit MULTIREDDIT
Triggers multireddit mode and takes multireddit's name
without m/
--user redditor reddit username if needed. use "me" for current user
--search query Searches for given query in given subreddits
--sort SORT TYPE Either hot, top, new, controversial, rising or
relevance default: hot
--limit Limit default: unlimited
--time TIME_LIMIT Either hour, day, week, month, year or all. default:
all
--NoDownload Just gets the posts and store them in a file for
downloading later
```
### Examples
#### Don't include `py -3 script.py` part if you start the script by double-clicking
```console
py -3 script.py .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month"
```
```console
py -3 script.py .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/"
```
```console
py -3 script.py .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
```
```console
py -3 script.py .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10
```
```console
py -3 script.py .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250
```
```console
py -3 script.py .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
```
```console
py -3 script.py C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json
```
## FAQ ## FAQ
### I can't startup the script no matter what. ### How do I open self post files?
- Try `python3` or `python` or `py -3` as python have real issues about naming their program - Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
### I can't open the self posts. However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
- Self posts are held at subreddit as Markdown. So, the script downloads them as Markdown in order not to lose their stylings. However, there is a great Chrome extension [here](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with Chrome.
### How can I change my credentials?
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
them, there.
## Changelog ## Changelog
### 10/07/2018 ### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
- Put log files in a folder named "LOG_FILES"
- Fixed the bug that makes multireddit mode unusable
### [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
- Added exclude mode
### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/commit/7548a010198fb693841ca03654d2c9bdf5742139)
- "0" input for no limit
- Fixed the bug that recognizes none image direct links as image links
### [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
- Added v.redd.it support
- Added custom exception descriptions to FAILED.json file
- Fixed the bug that prevents downloading some gfycat URLs
### [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
- Change config.json file's path to home directory
### [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
- Added binaries for Windows and Linux
- Wait on KeyboardInterrupt
- Accept multiple subreddit input
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
### [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
- Improvements on UX and UI
- Added logging errors to CONSOLE_LOG.txt
- Using current directory if directory has not been given yet.
### [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
- Added support for *self* post - Added support for *self* post
- Now getting posts is quicker - Now getting posts is quicker

View File

@@ -1,5 +0,0 @@
theme: jekyll-theme-minimal
show_downloads: false
#title: Bulk Downloader for Reddit
description: Code written by Ali PARLAKCI
google_analytics: UA-80780721-3

View File

@@ -0,0 +1,101 @@
# Using command-line arguments
See **[compiling from source](COMPILE_FROM_SOURCE.md)** page first unless you are using an executable file. If you are using an executable file, see [using terminal](COMPILE_FROM_SOURCE.md#using-terminal) and come back.
***Use*** `.\bulk-downloader-for-reddit.exe` ***or*** `./bulk-downloader-for-reddit` ***if you are using the executable***.
```console
$ python script.py --help
usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved]
[--submitted] [--upvoted] [--log LOG FILE]
[--subreddit SUBREDDIT [SUBREDDIT ...]]
[--multireddit MULTIREDDIT] [--user redditor]
[--search query] [--sort SORT TYPE] [--limit Limit]
[--time TIME_LIMIT] [--NoDownload]
This program downloads media from reddit posts
optional arguments:
-h, --help show this help message and exit
--directory DIRECTORY
Specifies the directory where posts will be downloaded
to
--link link, -l link Get posts from link
--saved Triggers saved mode
--submitted Gets posts of --user
--upvoted Gets upvoted posts of --user
--log LOG FILE Takes a log file which created by itself (json files),
reads posts and tries downloading them again.
--subreddit SUBREDDIT [SUBREDDIT ...]
Triggers subreddit mode and takes subreddit's name
without r/. use "frontpage" for frontpage
--multireddit MULTIREDDIT
Triggers multireddit mode and takes multireddit's name
without m/
--user redditor reddit username if needed. use "me" for current user
--search query Searches for given query in given subreddits
--sort SORT TYPE Either hot, top, new, controversial, rising or
relevance default: hot
--limit Limit default: unlimited
--time TIME_LIMIT Either hour, day, week, month, year or all. default:
all
--NoDownload Just gets the posts and store them in a file for
downloading later
--exclude {imgur,gfycat,direct,self} [{imgur,gfycat,direct,self} ...]
Do not download specified links
```
# Examples
- **Use `python3` instead of `python` if you are using *MacOS* or *Linux***
```console
python script.py
```
```console
.\bulk-downloader-for-reddit.exe
```
```console
python script.py
```
```console
.\bulk-downloader-for-reddit.exe -- directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
```
```console
./bulk-downloader-for-reddit --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
```
```console
python script.py --directory .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month"
```
```console
python script.py --directory .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/"
```
```console
python script.py --directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
```
```console
python script.py --directory .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10
```
```console
python script.py --directory .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250
```
```console
python script.py --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
```
```console
python script.py --directory C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json
```
# FAQ
## I can't startup the script no matter what.
See **[finding the correct keyword for Python](COMPILE_FROM_SOURCE.md#finding-the-correct-keyword-for-python)**

View File

@@ -0,0 +1,42 @@
# Compiling from source code
## Requirements
### Python 3 Interpreter
Latest* version of **Python 3** is needed. See if it is already installed [here](#finding-the-correct-keyword-for-python). If not, download the matching release for your platform [here](https://www.python.org/downloads/) and install it. If you are a *Windows* user, selecting **Add Python 3 to PATH** option is mandatory.
\* *Use Python 3.6.5 if you encounter an issue*
## Using terminal
### To open it...
- **On Windows 8/8.1/10**: Press the File tab on **Windows Explorer**, click on **Open Windows PowerShell** or **Open Windows Command Prompt** or look for *Command Prompt* or *PowerShell* in *Start Menu*.
- **On Windows 7**: Press **WindowsKey+R**, type **cmd** and hit Enter or look for *Command Prompt* or *PowerShell* in *Start Menu*.
- **On Linux**: Right-click in a folder and select **Open Terminal** or press **Ctrl+Alt+T** or look for **Terminal** in the programs.
- **On MacOS**: Look for an app called **Terminal**.
### Navigating to the directory where script is downloaded
Go inside the folder where script.py is located. If you are not familiar with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything)
## Finding the correct keyword for Python
Enter these lines to terminal window until it prints out the version you have downloaded and installed:
- `python --version`
- `python3 --version`
- `python3.7 --version`
- `python3.6 --version`
- `py --version`
- `py -3 --version`
- `py -3.6 --version`
- `py -3.7 --version`
Once it does, your keyword is without the `--version` part.
## Installing dependencies
Enter the line below to terminal window when you are in the directory where script.py is, use your keyword for Python:
```console
python -m pip install -r requirements.txt
```
---
Now, you can go to [Using command-line arguments](COMMAND_LINE_ARGUMENTS.md)

3
requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
requests
praw
imgurpython

385
script.py
View File

@@ -6,30 +6,26 @@ saved posts from a reddit account. It is written in Python 3.
""" """
import argparse import argparse
import logging
import os import os
import sys import sys
import time import time
from io import StringIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur, Self from src.downloader import Direct, Gfycat, Imgur, Self
from src.errors import *
from src.parser import LinkDesigner from src.parser import LinkDesigner
from src.searcher import getPosts from src.searcher import getPosts
from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector, from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
printToFile) printToFile)
from src.errors import *
__author__ = "Ali Parlakci" __author__ = "Ali Parlakci"
__license__ = "GPL" __license__ = "GPL"
__version__ = "1.0.0" __version__ = "1.3.1"
__maintainer__ = "Ali Parlakci" __maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com" __email__ = "parlakciali@gmail.com"
def debug(*post):
GLOBAL.config = getConfig('config.json')
GLOBAL.directory = Path(".\\debug\\")
download([*post])
quit()
def getConfig(configFileName): def getConfig(configFileName):
"""Read credentials from config.json file""" """Read credentials from config.json file"""
@@ -66,7 +62,7 @@ def parseArguments(arguments=[]):
description="This program downloads " \ description="This program downloads " \
"media from reddit " \ "media from reddit " \
"posts") "posts")
parser.add_argument("directory", parser.add_argument("--directory",
help="Specifies the directory where posts will be " \ help="Specifies the directory where posts will be " \
"downloaded to", "downloaded to",
metavar="DIRECTORY") metavar="DIRECTORY")
@@ -88,7 +84,9 @@ def parseArguments(arguments=[]):
help="Gets upvoted posts of --user") help="Gets upvoted posts of --user")
parser.add_argument("--log", parser.add_argument("--log",
help="Triggers log read mode and takes a log file", help="Takes a log file which created by itself " \
"(json files), reads posts and tries downloadin" \
"g them again.",
# type=argparse.FileType('r'), # type=argparse.FileType('r'),
metavar="LOG FILE") metavar="LOG FILE")
@@ -131,7 +129,6 @@ def parseArguments(arguments=[]):
parser.add_argument("--limit", parser.add_argument("--limit",
help="default: unlimited", help="default: unlimited",
metavar="Limit", metavar="Limit",
default=None,
type=int) type=int)
parser.add_argument("--time", parser.add_argument("--time",
@@ -146,6 +143,12 @@ def parseArguments(arguments=[]):
" for downloading later", " for downloading later",
action="store_true", action="store_true",
default=False) default=False)
parser.add_argument("--exclude",
nargs="+",
help="Do not download specified links",
choices=["imgur","gfycat","direct","self"],
type=str)
if arguments == []: if arguments == []:
return parser.parse_args() return parser.parse_args()
@@ -157,88 +160,207 @@ def checkConflicts():
if not, raise errors if not, raise errors
""" """
if GLOBAL.arguments.saved is False:
saved = 0
else:
saved = 1
if GLOBAL.arguments.subreddit is None:
subreddit = 0
else:
subreddit = 1
if GLOBAL.arguments.submitted is False:
submitted = 0
else:
submitted = 1
if GLOBAL.arguments.search is None:
search = 0
else:
search = 1
if GLOBAL.arguments.log is None:
log = 0
else:
log = 1
if GLOBAL.arguments.link is None:
link = 0
else:
link = 1
if GLOBAL.arguments.user is None: if GLOBAL.arguments.user is None:
user = 0 user = 0
else: else:
user = 1 user = 1
if GLOBAL.arguments.upvoted is False: modes = [
upvoted = 0 "saved","subreddit","submitted","search","log","link","upvoted",
else: "multireddit"
upvoted = 1 ]
if not saved+subreddit+log+link+submitted+upvoted == 1: values = {
print("Program mode is invalid") x: 0 if getattr(GLOBAL.arguments,x) is None or \
quit() getattr(GLOBAL.arguments,x) is False \
else 1 \
for x in modes
}
if not sum(values[x] for x in values) == 1:
raise ProgramModeError("Invalid program mode")
if search+subreddit == 2: if values["search"]+values["saved"] == 2:
print("You cannot search in your saved posts") raise SearchModeError("You cannot search in your saved posts")
quit()
if search+submitted == 2: if values["search"]+values["submitted"] == 2:
print("You cannot search in submitted posts") raise SearchModeError("You cannot search in submitted posts")
quit()
if search+upvoted == 2: if values["search"]+values["upvoted"] == 2:
print("You cannot search in upvoted posts") raise SearchModeError("You cannot search in upvoted posts")
quit()
if upvoted+submitted == 1 and user == 0: if values["upvoted"]+values["submitted"] == 1 and user == 0:
print("No redditor name given") raise RedditorNameError("No redditor name given")
quit()
def postFromLog(fileName): class PromptUser:
"""Analyze a log file and return a list of dictionaries containing @staticmethod
submissions def chooseFrom(choices):
""" print()
if Path.is_file(Path(fileName)): choicesByIndex = list(str(x) for x in range(len(choices)+1))
content = jsonFile(fileName).read() for i in range(len(choices)):
else: print("{indent}[{order}] {mode}".format(
print("File not found") indent=" "*4,order=i+1,mode=choices[i]
quit() ))
print(" "*4+"[0] exit\n")
choice = input("> ")
while not choice.lower() in choices+choicesByIndex+["exit"]:
print("Invalid input\n")
programModeIndex = input("> ")
try: if choice == "0" or choice == "exit":
del content["HEADER"] sys.exit()
except KeyError: elif choice in choicesByIndex:
pass return choices[int(choice)-1]
else:
return choice
def __init__(self):
print("select program mode:")
programModes = [
"search","subreddit","multireddit",
"submitted","upvoted","saved","log"
]
programMode = self.chooseFrom(programModes)
posts = [] if programMode == "search":
GLOBAL.arguments.search = input("\nquery: ")
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
for post in content: print("\nselect sort type:")
if not content[post][-1]['postType'] == None: sortTypes = [
posts.append(content[post][-1]) "relevance","top","new"
]
sortType = self.chooseFrom(sortTypes)
GLOBAL.arguments.sort = sortType
return posts print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self.chooseFrom(timeFilters)
GLOBAL.arguments.time = timeFilter
if programMode == "subreddit":
subredditInput = input("subreddit: ")
GLOBAL.arguments.subreddit = subredditInput
while not subredditInput == "":
subredditInput = input("subreddit: ")
GLOBAL.arguments.subreddit += "+" + subredditInput
if " " in GLOBAL.arguments.subreddit:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
print("\nselect sort type:")
sortTypes = [
"hot","top","new","rising","controversial"
]
sortType = self.chooseFrom(sortTypes)
GLOBAL.arguments.sort = sortType
if sortType in ["top","controversial"]:
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self.chooseFrom(timeFilters)
GLOBAL.arguments.time = timeFilter
else:
GLOBAL.arguments.time = "all"
elif programMode == "multireddit":
GLOBAL.arguments.user = input("\nredditor: ")
GLOBAL.arguments.multireddit = input("\nmultireddit: ")
print("\nselect sort type:")
sortTypes = [
"hot","top","new","rising","controversial"
]
sortType = self.chooseFrom(sortTypes)
GLOBAL.arguments.sort = sortType
if sortType in ["top","controversial"]:
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self.chooseFrom(timeFilters)
GLOBAL.arguments.time = timeFilter
else:
GLOBAL.arguments.time = "all"
elif programMode == "submitted":
GLOBAL.arguments.submitted = True
GLOBAL.arguments.user = input("\nredditor: ")
print("\nselect sort type:")
sortTypes = [
"hot","top","new","controversial"
]
sortType = self.chooseFrom(sortTypes)
GLOBAL.arguments.sort = sortType
if sortType == "top":
print("\nselect time filter:")
timeFilters = [
"hour","day","week","month","year","all"
]
timeFilter = self.chooseFrom(timeFilters)
GLOBAL.arguments.time = timeFilter
else:
GLOBAL.arguments.time = "all"
elif programMode == "upvoted":
GLOBAL.arguments.upvoted = True
GLOBAL.arguments.user = input("\nredditor: ")
elif programMode == "saved":
GLOBAL.arguments.saved = True
elif programMode == "log":
while True:
GLOBAL.arguments.log = input("\nlog file directory:")
if Path(GLOBAL.arguments.log ).is_file():
break
GLOBAL.arguments.exclude = []
sites = ["imgur","gfycat","direct","self"]
excludeInput = input("exclude: ").lower()
if excludeInput in sites and excludeInput != "":
GLOBAL.arguments.exclude = [excludeInput]
while not excludeInput == "":
while True:
excludeInput = input("exclude: ").lower()
if not excludeInput in sites or excludeInput in GLOBAL.arguments.exclude:
break
elif excludeInput == "":
break
else:
GLOBAL.arguments.exclude.append(excludeInput)
for i in range(len(GLOBAL.arguments.exclude)):
if " " in GLOBAL.arguments.exclude[i]:
inputWithWhitespace = GLOBAL.arguments.exclude[i]
del GLOBAL.arguments.exclude[i]
for siteInput in inputWithWhitespace.split():
if siteInput in sites and siteInput not in GLOBAL.arguments.exclude:
GLOBAL.arguments.exclude.append(siteInput)
while True:
try:
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
if GLOBAL.arguments.limit == 0:
GLOBAL.arguments.limit = None
break
except ValueError:
pass
def prepareAttributes(): def prepareAttributes():
ATTRIBUTES = {} ATTRIBUTES = {}
@@ -285,10 +407,14 @@ def prepareAttributes():
ATTRIBUTES["time"] = GLOBAL.arguments.time ATTRIBUTES["time"] = GLOBAL.arguments.time
elif GLOBAL.arguments.subreddit is not None: elif GLOBAL.arguments.subreddit is not None:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit) if type(GLOBAL.arguments.subreddit) == list:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
elif GLOBAL.arguments.multireddit is not None:
ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
elif GLOBAL.arguments.saved is True: elif GLOBAL.arguments.saved is True:
ATTRIBUTES["saved"] = True ATTRIBUTES["saved"] = True
@@ -305,13 +431,36 @@ def prepareAttributes():
return ATTRIBUTES return ATTRIBUTES
def postFromLog(fileName):
"""Analyze a log file and return a list of dictionaries containing
submissions
"""
if Path.is_file(Path(fileName)):
content = jsonFile(fileName).read()
else:
print("File not found")
sys.exit()
try:
del content["HEADER"]
except KeyError:
pass
posts = []
for post in content:
if not content[post][-1]['postType'] == None:
posts.append(content[post][-1])
return posts
def postExists(POST): def postExists(POST):
"""Figure out a file's name and checks if the file already exists""" """Figure out a file's name and checks if the file already exists"""
title = nameCorrector(POST['postTitle']) title = nameCorrector(POST['postTitle'])
FILENAME = title + "_" + POST['postId'] FILENAME = title + "_" + POST['postId']
PATH = GLOBAL.directory / POST["postSubreddit"] PATH = GLOBAL.directory / POST["postSubreddit"]
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm"] possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
for i in range(2): for i in range(2):
for extension in possibleExtensions: for extension in possibleExtensions:
@@ -333,6 +482,10 @@ def download(submissions):
downloadedCount = subsLenght downloadedCount = subsLenght
duplicates = 0 duplicates = 0
BACKUP = {} BACKUP = {}
if GLOBAL.arguments.exclude is not None:
ToBeDownloaded = GLOBAL.arguments.exclude
else:
ToBeDownloaded = []
FAILED_FILE = createLogFile("FAILED") FAILED_FILE = createLogFile("FAILED")
@@ -347,6 +500,7 @@ def download(submissions):
if postExists(submissions[i]): if postExists(submissions[i]):
result = False result = False
print(submissions[i]['postType'].upper())
print("It already exists") print("It already exists")
duplicates += 1 duplicates += 1
downloadedCount -= 1 downloadedCount -= 1
@@ -354,7 +508,7 @@ def download(submissions):
directory = GLOBAL.directory / submissions[i]['postSubreddit'] directory = GLOBAL.directory / submissions[i]['postSubreddit']
if submissions[i]['postType'] == 'imgur': if submissions[i]['postType'] == 'imgur' and not 'imgur' in ToBeDownloaded:
print("IMGUR",end="") print("IMGUR",end="")
while int(time.time() - lastRequestTime) <= 2: while int(time.time() - lastRequestTime) <= 2:
@@ -397,7 +551,7 @@ def download(submissions):
"Imgur login failed. Quitting the program "\ "Imgur login failed. Quitting the program "\
"as unexpected errors might occur." "as unexpected errors might occur."
) )
quit() sys.exit()
except Exception as exception: except Exception as exception:
print(exception) print(exception)
@@ -417,7 +571,7 @@ def download(submissions):
) )
downloadedCount -= 1 downloadedCount -= 1
elif submissions[i]['postType'] == 'gfycat': elif submissions[i]['postType'] == 'gfycat' and not 'gfycat' in ToBeDownloaded:
print("GFYCAT") print("GFYCAT")
try: try:
Gfycat(directory,submissions[i]) Gfycat(directory,submissions[i])
@@ -428,7 +582,7 @@ def download(submissions):
downloadedCount -= 1 downloadedCount -= 1
except NotADownloadableLinkError as exception: except NotADownloadableLinkError as exception:
print("Could not read the page source") print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1 downloadedCount -= 1
@@ -437,7 +591,7 @@ def download(submissions):
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1 downloadedCount -= 1
elif submissions[i]['postType'] == 'direct': elif submissions[i]['postType'] == 'direct' and not 'direct' in ToBeDownloaded:
print("DIRECT") print("DIRECT")
try: try:
Direct(directory,submissions[i]) Direct(directory,submissions[i])
@@ -452,7 +606,7 @@ def download(submissions):
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1 downloadedCount -= 1
elif submissions[i]['postType'] == 'self': elif submissions[i]['postType'] == 'self' and not 'self' in ToBeDownloaded:
print("SELF") print("SELF")
try: try:
Self(directory,submissions[i]) Self(directory,submissions[i])
@@ -481,66 +635,85 @@ def download(submissions):
print(" Total of {} links downloaded!".format(downloadedCount)) print(" Total of {} links downloaded!".format(downloadedCount))
def main(): def main():
if sys.argv[-1].endswith(__file__): GLOBAL.arguments = parseArguments()
GLOBAL.arguments = parseArguments(input("> ").split())
else:
GLOBAL.arguments = parseArguments()
if GLOBAL.arguments.directory is not None: if GLOBAL.arguments.directory is not None:
GLOBAL.directory = Path(GLOBAL.arguments.directory) GLOBAL.directory = Path(GLOBAL.arguments.directory)
else: else:
print("Invalid directory") GLOBAL.directory = Path(input("download directory: "))
quit()
GLOBAL.config = getConfig(Path(PurePath(__file__).parent / 'config.json'))
checkConflicts() print("\n"," ".join(sys.argv),"\n")
print(sys.argv) try:
checkConflicts()
except ProgramModeError as err:
PromptUser()
except Exception as err:
print(err)
sys.exit()
if not Path(GLOBAL.configDirectory).is_dir():
os.makedirs(GLOBAL.configDirectory)
GLOBAL.config = getConfig(GLOBAL.configDirectory / "config.json")
if GLOBAL.arguments.log is not None: if GLOBAL.arguments.log is not None:
logDir = Path(GLOBAL.arguments.log) logDir = Path(GLOBAL.arguments.log)
download(postFromLog(logDir)) download(postFromLog(logDir))
quit() sys.exit()
try: try:
POSTS = getPosts(prepareAttributes()) POSTS = getPosts(prepareAttributes())
except InsufficientPermission: except InsufficientPermission:
print("You do not have permission to do that") print("You do not have permission to do that")
quit() sys.exit()
except NoMatchingSubmissionFound: except NoMatchingSubmissionFound:
print("No matching submission was found") print("No matching submission was found")
quit() sys.exit()
except NoRedditSupoort: except NoRedditSupoort:
print("Reddit does not support that") print("Reddit does not support that")
quit() sys.exit()
except NoPrawSupport: except NoPrawSupport:
print("PRAW does not support that") print("PRAW does not support that")
quit() sys.exit()
except MultiredditNotFound: except MultiredditNotFound:
print("Multireddit not found") print("Multireddit not found")
quit() sys.exit()
except InvalidSortingType: except InvalidSortingType:
print("Invalid sorting type has given") print("Invalid sorting type has given")
quit() sys.exit()
except InvalidRedditLink: except InvalidRedditLink:
print("Invalid reddit link") print("Invalid reddit link")
quit() sys.exit()
if POSTS is None: if POSTS is None:
print("I could not find any posts in that URL") print("I could not find any posts in that URL")
quit() sys.exit()
if GLOBAL.arguments.NoDownload: if GLOBAL.arguments.NoDownload:
quit() sys.exit()
else: else:
download(POSTS) download(POSTS)
if __name__ == "__main__": if __name__ == "__main__":
log_stream = StringIO()
logging.basicConfig(stream=log_stream, level=logging.INFO)
try: try:
VanillaPrint = print VanillaPrint = print
print = printToFile print = printToFile
GLOBAL.RUN_TIME = time.time() GLOBAL.RUN_TIME = time.time()
main() main()
except KeyboardInterrupt: except KeyboardInterrupt:
if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\")
print("\nQUITTING...") print("\nQUITTING...")
quit() except Exception as exception:
if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\")
logging.error(sys.exc_info()[0].__name__,
exc_info=full_exc_info(sys.exc_info()))
print(log_stream.getvalue())
input("Press enter to quit\n")

50
setup.py Normal file
View File

@@ -0,0 +1,50 @@
#!C:\Users\Ali\AppData\Local\Programs\Python\Python36\python.exe
## python setup.py build
import sys
from cx_Freeze import setup, Executable
from script import __version__
options = {
"build_exe": {
"packages":[
"idna","imgurpython", "praw", "requests"
]
}
}
if sys.platform == "win32":
executables = [Executable(
"script.py",
targetName="bulk-downloader-for-reddit.exe",
shortcutName="Bulk Downloader for Reddit",
shortcutDir="DesktopFolder"
)]
elif sys.platform == "linux":
executables = [Executable(
"script.py",
targetName="bulk-downloader-for-reddit",
shortcutName="Bulk Downloader for Reddit",
shortcutDir="DesktopFolder"
)]
setup(
name = "Bulk Downloader for Reddit",
version = __version__,
description = "Bulk Downloader for Reddit",
author = "Ali Parlakci",
author_email="parlakciali@gmail.com",
url="https://github.com/aliparlakci/bulk-downloader-for-reddit",
classifiers=(
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)"
"Natural Language :: English",
"Environment :: Console",
"Operating System :: OS Independent",
),
executables = executables,
options = options
)

View File

@@ -4,19 +4,14 @@ import sys
import urllib.request import urllib.request
from pathlib import Path from pathlib import Path
import imgurpython
from multiprocessing import Queue
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
FileNameTooLong, ImgurLoginError, FileNameTooLong, ImgurLoginError,
NotADownloadableLinkError) NotADownloadableLinkError)
from src.tools import GLOBAL, nameCorrector, printToFile from src.tools import GLOBAL, nameCorrector, printToFile
try:
from imgurpython import *
except ModuleNotFoundError:
print("\nimgurpython not found on your computer, installing...\n")
from src.tools import install
install("imgurpython")
from imgurpython import *
VanillaPrint = print VanillaPrint = print
print = printToFile print = printToFile
@@ -41,7 +36,10 @@ def getExtension(link):
if TYPE in parsed: if TYPE in parsed:
return "."+parsed[-1] return "."+parsed[-1]
else: else:
return '.jpg' if not "v.redd.it" in link:
return '.jpg'
else:
return '.mp4'
def getFile(fileDir,tempDir,imageURL,indent=0): def getFile(fileDir,tempDir,imageURL,indent=0):
"""Downloads given file to given directory. """Downloads given file to given directory.
@@ -174,14 +172,16 @@ class Imgur:
if duplicates == imagesLenght: if duplicates == imagesLenght:
raise FileAlreadyExistsError raise FileAlreadyExistsError
elif howManyDownloaded < imagesLenght: elif howManyDownloaded < imagesLenght:
raise AlbumNotDownloadedCompletely raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
@staticmethod @staticmethod
def initImgur(): def initImgur():
"""Initialize imgur api""" """Initialize imgur api"""
config = GLOBAL.config config = GLOBAL.config
return ImgurClient( return imgurpython.ImgurClient(
config['imgur_client_id'], config['imgur_client_id'],
config['imgur_client_secret'] config['imgur_client_secret']
) )
@@ -213,7 +213,7 @@ class Imgur:
elif identity['type'] == 'album': elif identity['type'] == 'album':
return {'object':self.imgurClient.get_album(identity['id']), return {'object':self.imgurClient.get_album(identity['id']),
'type':'album'} 'type':'album'}
@staticmethod
def get_credits(): def get_credits():
return Imgur.initImgur().get_credits() return Imgur.initImgur().get_credits()
@@ -222,9 +222,9 @@ class Gfycat:
try: try:
POST['mediaURL'] = self.getLink(POST['postURL']) POST['mediaURL'] = self.getLink(POST['postURL'])
except IndexError: except IndexError:
raise NotADownloadableLinkError raise NotADownloadableLinkError("Could not read the page source")
except Exception as exception: except Exception as exception:
raise NotADownloadableLinkError raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL']) POST['postExt'] = getExtension(POST['mediaURL'])
@@ -253,8 +253,7 @@ class Gfycat:
if url[-1:] == '/': if url[-1:] == '/':
url = url[:-1] url = url[:-1]
if 'gifs' in url: url = "https://gfycat.com/" + url.split('/')[-1]
url = "https://gfycat.com/" + url.split('/')[-1]
pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
@@ -271,7 +270,7 @@ class Gfycat:
break break
if "".join(link) == "": if "".join(link) == "":
raise NotADownloadableLinkError raise NotADownloadableLinkError("Could not read the page source")
return "".join(link) return "".join(link)

View File

@@ -1,3 +1,36 @@
import sys
class FauxTb(object):
def __init__(self, tb_frame, tb_lineno, tb_next):
self.tb_frame = tb_frame
self.tb_lineno = tb_lineno
self.tb_next = tb_next
def current_stack(skip=0):
try: 1/0
except ZeroDivisionError:
f = sys.exc_info()[2].tb_frame
for i in range(skip + 2):
f = f.f_back
lst = []
while f is not None:
lst.append((f, f.f_lineno))
f = f.f_back
return lst
def extend_traceback(tb, stack):
"""Extend traceback with stack info."""
head = tb
for tb_frame, tb_lineno in stack:
head = FauxTb(tb_frame, tb_lineno, head)
return head
def full_exc_info(exc_info):
"""Like sys.exc_info, but includes the full traceback."""
t, v, tb = exc_info
full_tb = extend_traceback(tb, current_stack(1))
return t, v, full_tb
class RedditLoginFailed(Exception): class RedditLoginFailed(Exception):
pass pass
@@ -19,6 +52,15 @@ class FileNameTooLong(Exception):
class InvalidRedditLink(Exception): class InvalidRedditLink(Exception):
pass pass
class ProgramModeError(Exception):
pass
class SearchModeError(Exception):
pass
class RedditorNameError(Exception):
pass
class NoMatchingSubmissionFound(Exception): class NoMatchingSubmissionFound(Exception):
pass pass

View File

@@ -3,14 +3,7 @@ import random
import socket import socket
import webbrowser import webbrowser
try: import praw
import praw
except ModuleNotFoundError:
print("\nPRAW not found on your computer, installing...\n")
from src.tools import install
install("praw")
import praw
from prawcore.exceptions import NotFound, ResponseException, Forbidden from prawcore.exceptions import NotFound, ResponseException, Forbidden
from src.tools import GLOBAL, createLogFile, jsonFile, printToFile from src.tools import GLOBAL, createLogFile, jsonFile, printToFile
@@ -96,7 +89,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes) authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0] reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1] refresh_token = authorizedInstance[1]
jsonFile("config.json").add({ jsonFile(GLOBAL.configDirectory / "config.json").add({
"reddit_refresh_token":refresh_token "reddit_refresh_token":refresh_token
}) })
else: else:
@@ -105,7 +98,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes) authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0] reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1] refresh_token = authorizedInstance[1]
jsonFile("config.json").add({ jsonFile(GLOBAL.configDirectory / "config.json").add({
"reddit_refresh_token":refresh_token "reddit_refresh_token":refresh_token
}) })
return reddit return reddit
@@ -360,7 +353,7 @@ def redditSearcher(posts,SINGLE_POST=False):
printSubmission(submission,subCount,orderCount) printSubmission(submission,subCount,orderCount)
subList.append(details) subList.append(details)
allPosts = {**allPosts,**details} allPosts[subCount] = [details]
postsFile.add(allPosts) postsFile.add(allPosts)
@@ -404,8 +397,9 @@ def checkIfMatching(submission):
imgurCount += 1 imgurCount += 1
return details return details
elif isDirectLink(submission.url): elif isDirectLink(submission.url) is not False:
details['postType'] = 'direct' details['postType'] = 'direct'
details['postURL'] = isDirectLink(submission.url)
directCount += 1 directCount += 1
return details return details
@@ -442,7 +436,7 @@ def printSubmission(SUB,validNumber,totalNumber):
def isDirectLink(URL): def isDirectLink(URL):
"""Check if link is a direct image link. """Check if link is a direct image link.
If so, return True, If so, return URL,
if not, return False if not, return False
""" """
@@ -451,10 +445,13 @@ def isDirectLink(URL):
URL = URL[:-1] URL = URL[:-1]
if "i.reddituploads.com" in URL: if "i.reddituploads.com" in URL:
return True return URL
elif "v.redd.it" in URL:
return URL+"/DASH_600_K"
for extension in imageTypes: for extension in imageTypes:
if extension in URL: if extension in URL:
return True return URL
else: else:
return False return False

View File

@@ -2,20 +2,11 @@ import io
import json import json
import sys import sys
import time import time
try:
from pip import main as pipmain
except:
from pip._internal import main as pipmain
from os import makedirs, path, remove from os import makedirs, path, remove
from pathlib import Path from pathlib import Path
from src.errors import FileNotFoundError from src.errors import FileNotFoundError
def install(package):
pipmain(['install', package])
class GLOBAL: class GLOBAL:
"""Declare global variables""" """Declare global variables"""
@@ -23,6 +14,7 @@ class GLOBAL:
config = None config = None
arguments = None arguments = None
directory = None directory = None
configDirectory = Path.home() / "Bulk Downloader for Reddit"
reddit_client_id = "BSyphDdxYZAgVQ" reddit_client_id = "BSyphDdxYZAgVQ"
reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk" reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk"
printVanilla = print printVanilla = print
@@ -83,8 +75,10 @@ def createLogFile(TITLE):
put given arguments inside \"HEADER\" key put given arguments inside \"HEADER\" key
""" """
folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S", folderDirectory = GLOBAL.directory / "LOG_FILES" / \
time.localtime(GLOBAL.RUN_TIME))) str(time.strftime(
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
))
logFilename = TITLE.upper()+'.json' logFilename = TITLE.upper()+'.json'
if not path.exists(folderDirectory): if not path.exists(folderDirectory):
@@ -103,7 +97,7 @@ def printToFile(*args, **kwargs):
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S", TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
time.localtime(GLOBAL.RUN_TIME))) time.localtime(GLOBAL.RUN_TIME)))
folderDirectory = GLOBAL.directory / TIME folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
print(*args,**kwargs) print(*args,**kwargs)
if not path.exists(folderDirectory): if not path.exists(folderDirectory):