160 Commits

Author SHA1 Message Date
Ali Parlakçı
d6194c57d9 Merge pull request #67 from dbanon87/dbanon87/erome-downloads
fix erome download URLs
2019-10-17 09:51:02 +03:00
Ali Parlakçı
cd87a4a120 Merge pull request #68 from dbanon87/dbanon87/gitignore-env
add env/ to gitignore
2019-10-17 09:49:36 +03:00
dbanon87
08cddf4c83 add env/ to gitignore
This allows working in a virtualenv in the project directory.
2019-10-08 10:52:56 -04:00
dbanon87
88fa9e742d fix erome download URLs 2019-10-08 10:51:56 -04:00
Ali Parlakçı
1c17f174a8 typo 2019-04-23 17:00:42 +03:00
Ali Parlakçı
9b36336ac3 typo 2019-04-23 16:51:05 +03:00
Ali Parlakçı
35e551f20c Update README.md 2019-04-23 14:04:15 +03:00
Ali Parlakçı
0f2bda9c34 Merge pull request #63 from aliparlakci/moreUsefulReadme
A more useful readme (credits to *stared*)
2019-04-23 14:00:53 +03:00
Ali Parlakçı
8ab694bcc1 Fixed typo 2019-04-23 13:59:01 +03:00
Ali
898f59d035 Added an FAQ entry 2019-04-23 13:51:21 +03:00
Ali
6b6db37185 Minor corrections 2019-04-23 13:29:58 +03:00
Piotr Migdał
d4a5100128 a clearer description how to run it (#62) 2019-04-23 13:17:15 +03:00
Ali
22047338e2 Update version number 2019-04-09 20:45:22 +03:00
Ali
b16cdd3cbb Hopefully, fixed the config.json bug 2019-04-09 20:31:42 +03:00
Ali
2a8394a48c Fixed the bug concerning config.json 2019-04-08 22:09:52 +03:00
Ali Parlakçı
eac4404bbf Update README.md 2019-03-31 11:59:49 +03:00
Ali Parlakci
fae49d50da Update version 2019-03-31 11:46:03 +03:00
Ali Parlakci
7130525ece Update version 2019-03-31 11:35:27 +03:00
Ali Parlakci
2bf1e03ee1 Update version 2019-03-31 11:33:29 +03:00
Ali
15a91e5784 Fixed saving auth info problem 2019-02-24 12:28:40 +03:00
Ali
344201a70d Fixed v.redd.it links 2019-02-23 00:01:39 +03:00
Ali
92e47adb43 Update version 2019-02-22 23:59:57 +03:00
Ali
4d385fda60 Fixed v.redd.it links 2019-02-22 23:59:03 +03:00
Ali Parlakci
82dcd2f63d Bug fix 2019-01-27 17:05:31 +03:00
Ali Parlakci
08de21a364 Updated Python3 version 2019-01-27 16:32:43 +03:00
Ali Parlakci
af7d3d9151 Moved FAQ 2019-01-27 16:32:00 +03:00
Ali Parlakci
280147282b 27 jan update 2019-01-27 16:06:31 +03:00
Ali Parlakci
b7baf07fb5 Added instructions 2019-01-27 15:59:24 +03:00
Ali Parlakci
aece2273fb Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-28 16:28:29 +03:00
Ali Parlakci
f807efe4d5 Ignore space at the end of directory 2018-08-28 16:27:29 +03:00
Ali Parlakci
743d887927 Ignore space at the end of directory 2018-08-28 16:24:14 +03:00
Ali Parlakci
da5492858c Add bs4 2018-08-28 16:15:22 +03:00
Ali Parlakci
cebfc713d2 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-28 16:12:01 +03:00
Ali Parlakci
f522154214 Update version 2018-08-28 16:11:48 +03:00
Ali Parlakçı
27cd3ee991 Changed getting gfycat links' algorithm 2018-08-28 16:10:15 +03:00
Ali Parlakci
29873331e6 Typo fix 2018-08-23 16:41:07 +03:00
Ali Parlakci
8a3dcd68a3 Update version 2018-08-23 12:16:31 +03:00
Ali Parlakci
ac323f2abe Bug fix 2018-08-23 12:09:56 +03:00
Ali Parlakci
32d26fa956 Print out github link at start 2018-08-20 15:13:42 +03:00
Ali Parlakci
137481cf3e Print out program info 2018-08-18 14:51:20 +03:00
Ali Parlakci
9b63c55d3e Print out version info before starting 2018-08-17 21:25:01 +03:00
Ali Parlakci
3a6954c7d3 Update version 2018-08-16 19:55:45 +03:00
Ali Parlakci
9a59da0c5f Update changelog 2018-08-16 19:53:33 +03:00
Ali Parlakci
d56efed1c6 Fix imgur download malfunction caused by headers 2018-08-16 19:51:56 +03:00
Ali Parlakci
8f64e62293 Update version 2018-08-15 21:52:54 +03:00
Ali Parlakci
bdc43eb0d8 Update changelog 2018-08-15 21:48:05 +03:00
Ali Parlakci
adccd8f3ba Prints the file that already exists 2018-08-15 21:46:27 +03:00
Ali Parlakci
47a07be1c8 Deleted commented line 2018-08-13 16:00:21 +03:00
Ali Parlakci
1a41dc6061 Update changelog 2018-08-13 15:56:37 +03:00
Ali Parlakci
50cb7c15b9 Fixed console prints for Linux 2018-08-13 15:55:37 +03:00
Ali Parlakci
a1f1915d57 Update changelog 2018-08-13 14:52:43 +03:00
Ali Parlakci
3448ba15a9 Added config file location as current directory 2018-08-13 14:50:45 +03:00
Ali Parlakci
ff68b5f70f Improved error handling 2018-08-10 19:50:52 +03:00
Ali Parlakci
588a3c3ea6 Update changelog 2018-08-10 13:09:28 +03:00
Ali Parlakci
8f1ff10a5e Added reddit username to config file 2018-08-10 13:08:24 +03:00
Ali Parlakci
9338961b2b Improved checkConflicts() 2018-08-09 09:26:01 +03:00
Ali Parlakci
94bc1c115f Minor edit in exception handling 2018-08-09 09:04:12 +03:00
Ali Parlakci
c19d8ad71b Refactored error handling 2018-08-09 00:17:04 +03:00
Ali Parlakci
4c8de50880 Fixed request headers 2018-08-08 00:47:34 +03:00
Ali Parlakci
3e6dfccdd2 Update request headers 2018-08-06 09:33:07 +03:00
Ali Parlakci
20b9747330 Added docstrings for the ease of modification 2018-08-06 08:13:07 +03:00
Ali Parlakci
be7508540d Refactored README page 2018-08-06 07:54:33 +03:00
Ali Parlakci
ccd9078b0a Refactored README page 2018-08-06 07:53:55 +03:00
Ali Parlakçı
43cf0a4d42 Typo fix 2018-08-06 07:42:52 +03:00
Ali Parlakci
3693cf46f8 Updated version 2018-08-06 07:40:23 +03:00
Ali Parlakci
04152e8554 Updated changelog 2018-08-06 07:37:35 +03:00
Ali Parlakci
210238d086 Sending header when requesting a file 2018-08-06 07:35:43 +03:00
Ali Parlakci
90e071354f Update changelog 2018-08-04 10:25:29 +03:00
Ali Parlakci
426089d0f3 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-04 10:23:22 +03:00
Ali Parlakci
7ae6c6385d Do not print post type to console 2018-08-04 10:22:41 +03:00
Ali Parlakçı
97d15f9974 Update README.md 2018-08-01 13:08:09 +03:00
Ali Parlakci
172cd72dc1 Update changelog 2018-07-30 13:37:29 +03:00
Ali Parlakci
af29492951 Hide KeyboardInterrupt 2018-07-30 13:36:48 +03:00
Ali Parlakci
5633b301f3 Bug fix 2018-07-30 13:36:27 +03:00
Ali Parlakci
5ed855af28 Update changelog 2018-07-30 13:23:34 +03:00
Ali Parlakci
7ccf2fb7f9 Open web browser as prompting for imgur credentials 2018-07-30 13:22:30 +03:00
Ali Parlakçı
2297e9ed86 Update README.md 2018-07-26 20:04:07 +03:00
Ali Parlakçı
401e014059 Update README.md 2018-07-26 20:03:25 +03:00
Ali Parlakçı
eb31d38c44 Update README.md 2018-07-26 19:39:51 +03:00
Ali Parlakçı
747fefea14 Update README.md 2018-07-26 19:36:50 +03:00
Ali Parlakçı
80cc4fade3 Update README.md 2018-07-26 19:33:47 +03:00
Ali Parlakçı
c26843c7fc Set theme jekyll-theme-cayman 2018-07-26 19:21:55 +03:00
Ali Parlakçı
a14edc9f5a Update README.md 2018-07-26 15:08:23 +03:00
Ali Parlakci
d685860c22 Update version 2018-07-26 12:25:54 +03:00
Ali Parlakci
dcf9f35273 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-26 12:25:50 +03:00
Ali Parlakci
7fdf03aa24 Added new line after 'GETTING POSTS' 2018-07-26 12:25:23 +03:00
Ali Parlakçı
25d61a4c78 Update README.md 2018-07-26 12:23:08 +03:00
Ali Parlakci
558eb107f4 Update changelog 2018-07-26 12:01:00 +03:00
Ali Parlakci
6e74630050 Typo fix 2018-07-26 11:59:29 +03:00
Ali Parlakci
2fd9248715 Added quit after finish option 2018-07-26 11:15:13 +03:00
Ali Parlakci
457b8cd21c Added remaining credits to log file 2018-07-26 11:08:37 +03:00
Ali Parlakci
e953456ead Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-26 10:10:48 +03:00
Ali Parlakci
ed0564fba0 Improve verbose mode 2018-07-26 10:08:57 +03:00
Ali Parlakçı
5378555f74 Update COMPILE_FROM_SOURCE.md 2018-07-26 09:24:50 +03:00
Ali Parlakçı
95ef308915 Update COMPILE_FROM_SOURCE.md 2018-07-26 09:22:14 +03:00
Ali Parlakçı
436f867f2e Update COMPILE_FROM_SOURCE.md 2018-07-26 09:22:03 +03:00
Ali Parlakçı
91d71565cc Update script.py 2018-07-25 18:30:48 +03:00
Ali Parlakci
c7b7361ded Update version 2018-07-25 18:27:58 +03:00
Ali Parlakci
cd81a6c38b Update changelog 2018-07-25 13:53:33 +03:00
Ali Parlakçı
1623722138 Merge pull request #42 from aliparlakci/verboseMode
Added Verbose mode
2018-07-25 13:52:34 +03:00
Ali Parlakci
dad5669441 Typo fix 2018-07-25 13:50:11 +03:00
Ali Parlakci
35d54d1eb1 Stylize 2018-07-25 13:48:30 +03:00
Ali Parlakci
394b864d86 Updated FAQ 2018-07-25 13:48:02 +03:00
Ali Parlakci
837281c3c6 Added verbose mode 2018-07-25 13:40:06 +03:00
Ali Parlakci
e6b648d8b3 Update changelog 2018-07-25 12:25:39 +03:00
Ali Parlakci
cfaf2de7db Stylize the console output 2018-07-25 12:24:50 +03:00
Ali Parlakci
80546d7094 Update version 2018-07-25 11:36:58 +03:00
Ali Parlakci
139a81a0e7 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-25 09:29:48 +03:00
Ali Parlakci
9bb0a5da7f Added delays for imgur rate limit 2018-07-25 09:27:41 +03:00
Ali Parlakçı
6f2273f182 Add file name formatting doc 2018-07-24 23:41:02 +03:00
Ali Parlakci
b5d6165802 Update version 2018-07-24 22:13:38 +03:00
Ali Parlakci
b98815376f Bug fix 2018-07-24 22:13:11 +03:00
Ali Parlakci
d9586f99b8 Use else in try blocks 2018-07-24 22:11:12 +03:00
Ali Parlakci
76711892a2 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-24 22:10:28 +03:00
Ali Parlakci
bfea548eab Print credits in the same line 2018-07-24 22:10:19 +03:00
Ali Parlakçı
2e852db4c3 Typo fix 2018-07-24 19:45:37 +03:00
Ali Parlakci
8ac02e7aff Update version 2018-07-24 19:42:38 +03:00
Ali Parlakci
5eccf4dd3d Update changelog 2018-07-24 19:35:49 +03:00
Ali Parlakçı
7a68ff3efa Merge pull request #41 from aliparlakci/changePostNames
Add submitter to file names
2018-07-24 19:34:02 +03:00
Ali Parlakçı
3ea2e16b62 Merge branch 'master' into changePostNames 2018-07-24 19:33:38 +03:00
Ali Parlakci
fc6787aa28 Update changelog 2018-07-24 19:28:48 +03:00
Ali Parlakci
21533bb78c Improved exception handling 2018-07-24 19:27:52 +03:00
Ali Parlakci
1781ab8ffe Update changelog 2018-07-24 19:10:34 +03:00
Ali Parlakci
821383c465 Deleted # char from file names 2018-07-24 19:09:45 +03:00
Ali Parlakci
9d0fdc7521 Add OP's name first 2018-07-24 18:55:33 +03:00
Ali Parlakci
0387dd5243 Tweaked 'What it can do' 2018-07-24 14:16:46 +03:00
Ali Parlakci
93732b0367 Little refactoring 2018-07-24 13:17:37 +03:00
Ali Parlakci
400ce01918 Added older version support 2018-07-24 13:17:14 +03:00
Ali Parlakci
ccedac4bdc Add submitter to file name 2018-07-24 12:44:53 +03:00
Ali Parlakci
a6997898ce Update version 2018-07-23 23:37:27 +03:00
Ali Parlakci
61632c7143 Improve error handling 2018-07-23 23:33:11 +03:00
Ali Parlakçı
9bff3399a8 Typo fix 2018-07-23 23:19:29 +03:00
Ali Parlakçı
b00d185f67 Update changelog 2018-07-23 23:18:10 +03:00
Ali Parlakçı
7314e17125 Added erome support 2018-07-23 23:16:56 +03:00
Ali Parlakci
2d334d56bf remove exclude mode 2018-07-23 22:57:54 +03:00
Ali Parlakçı
974517928f Update README.md 2018-07-23 22:07:28 +03:00
Ali Parlakçı
bcae177b1e Split download function 2018-07-23 22:06:33 +03:00
Ali Parlakci
229def6578 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-23 18:49:39 +03:00
Ali Parlakci
59b0376d6e Added directions for frontpage 2018-07-23 18:49:28 +03:00
Ali Parlakçı
cf1dc7d08c Rename changelog section 2018-07-22 18:16:11 +03:00
Ali Parlakci
27532408c1 Update version 2018-07-22 18:06:46 +03:00
Ali Parlakci
32647beee9 Update changelog 2018-07-22 18:06:24 +03:00
Ali Parlakci
a67da461d2 Fixed the bug that makes multireddit mode unusable 2018-07-22 18:05:20 +03:00
Ali Parlakci
8c6f593496 Update changelog 2018-07-22 17:39:30 +03:00
Ali Parlakci
b60ce8a71e Put log files in a folder 2018-07-22 17:38:35 +03:00
Ali Parlakci
49920cc457 Bug fix 2018-07-22 17:25:30 +03:00
Ali Parlakci
c70e7c2ebb Update version 2018-07-22 14:39:09 +03:00
Ali Parlakci
3931dfff54 Update links 2018-07-21 22:03:16 +03:00
Ali Parlakci
4a8c2377f9 Updated --help page 2018-07-21 21:55:01 +03:00
Ali Parlakci
8a18a42a9a Updated changelog 2018-07-21 21:54:23 +03:00
Ali Parlakçı
6c2d748fbc Exclude post types (#38)
* Added argument for excluded links

* Added exclude in PromptUser()

* Added functionality for exclude and bug fix
2018-07-21 21:52:28 +03:00
Ali Parlakci
8c966df105 Improved traceback 2018-07-21 21:50:54 +03:00
Ali Parlakci
2adf2c0451 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-20 13:34:51 +03:00
Ali Parlakci
3e3a2df4d1 Bug fix at direct links 2018-07-20 13:34:23 +03:00
Ali Parlakci
7548a01019 Bug fix at direct links 2018-07-20 13:33:50 +03:00
Ali Parlakci
2ab16608d5 Update links 2018-07-20 13:06:01 +03:00
Ali Parlakci
e15f33b97a Fix README 2018-07-20 13:04:47 +03:00
Ali Parlakci
27211f993c 0 input for no limit 2018-07-20 13:03:50 +03:00
Ali Parlakci
87d3b294f7 0 input for no limit 2018-07-20 13:01:39 +03:00
Ali Parlakci
8128378dcd 0 input for no limit 2018-07-20 13:01:21 +03:00
13 changed files with 908 additions and 430 deletions

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@ dist/
MANIFEST MANIFEST
__pycache__/ __pycache__/
src/__pycache__/ src/__pycache__/
config.json
env/

167
README.md
View File

@@ -1,77 +1,148 @@
# Bulk Downloader for Reddit # Bulk Downloader for Reddit
This program downloads imgur, gfycat and direct image and video links of saved posts from a reddit account. It is written in Python 3.
**PLEASE** post any issue you have with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback. Downloads media from reddit posts. Made by [u/aliparlakci](https://reddit.com/u/aliparlakci)
## [Download the latest release here](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
## What it can do ## What it can do
- Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links - Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
- Sorts posts by hot, top, new and so on - Sorts posts by hot, top, new and so on
- Downloads imgur albums, gfycat links, [self posts](#how-do-i-open-self-post-files) and any link to a direct image - Downloads **REDDIT** images and videos, **IMGUR** images and albums, **GFYCAT** links, **EROME** images and albums, **SELF POSTS** and any link to a **DIRECT IMAGE**
- Skips the existing ones - Skips the existing ones
- Puts post titles to file's name - Puts post title and OP's name in file's name
- Puts every post to its subreddit's folder - Puts every post to its subreddit's folder
- Saves a reusable copy of posts' details that are found so that they can be re-downloaded again - Saves a reusable copy of posts' details that are found so that they can be re-downloaded again
- Logs failed ones in a file to so that you can try to download them later - Logs failed ones in a file to so that you can try to download them later
- Can run with double-clicking on Windows
## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest) ## Installation
## How it works You can use it either as a `bulk-downloader-for-reddit.exe` executable file for Windows, as a Linux binary or as a *[Python script](#python-script)*. There is no MacOS executable, MacOS users must use the Python script option.
- For **Windows** and **Linux** users, there are executable files to run easily without installing a third party program. But if you are a paranoid like me, you can **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**. ### Executables
- In Windows, double click on bulk-downloader-for-reddit file
- In Linux, extract files to a folder and open terminal inside it. Type **`./bulk-downloader-for-reddit`**
- **MacOS** users have to **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**. For Windows and Linux, [download the latest executables, here](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest).
Script also accepts **command-line arguments**, get further information from **[`--help`](docs/COMMAND_LINE_ARGUMENTS.md)** ### Python script
## Setting up the script * Download this repository ([latest zip](https://github.com/aliparlakci/bulk-downloader-for-reddit/archive/master.zip) or `git clone git@github.com:aliparlakci/bulk-downloader-for-reddit.git`).
Because this is not a commercial app, you need to create an imgur developer app in order API to work. * Enter its folder.
* Run `python ./script.py` from the command-line (Windows, MacOSX or Linux command line; it may work with Anaconda prompt) See [here](docs/INTERPRET_FROM_SOURCE.md#finding-the-correct-keyword-for-python) if you have any trouble with this step.
### Creating an imgur app It uses Python 3.6 and above. It won't work with Python 3.5 or any Python 2.x. If you have a trouble setting it up, see [here](docs/INTERPRET_FROM_SOURCE.md).
* Go to https://api.imgur.com/oauth2/addclient
* Enter a name into the **Application Name** field.
* Pick **Anonymous usage without user authorization** as an **Authorization type**\*
* Enter your email into the Email field.
* Correct CHAPTCHA
* Click **submit** button
It should redirect to a page which shows your **imgur_client_id** and **imgur_client_secret**
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required* ### Setting up the script
You need to create an imgur developer app in order API to work. Go to https://api.imgur.com/oauth2/addclient and fill the form (It does not really matter how you fill it).
It should redirect you to a page where it shows your **imgur_client_id** and **imgur_client_secret**.
When you run it for the first time, it will automatically create `config.json` file containing `imgur_client_id`, `imgur_client_secret`, `reddit_username` and `reddit_refresh_token`.
## Running
You can run it it an interactive mode, or using [command-line arguments](docs/COMMAND_LINE_ARGUMENTS.md) (also available via `python ./script.py --help` or `bulk-downloader-for-reddit.exe --help`).
To run the interactive mode, simply use `python ./script.py` or double click on `bulk-downloader-for-reddit.exe` without any extra commands.
### [Example for command line arguments](docs/COMMAND_LINE_ARGUMENTS.md#examples)
### Example for an interactive script
```
(py37) bulk-downloader-for-reddit user$ python ./script.py
Bulk Downloader for Reddit v1.6.5
Written by Ali PARLAKCI parlakciali@gmail.com
https://github.com/aliparlakci/bulk-downloader-for-reddit/
download directory: downloads/dataisbeautiful_last_few
select program mode:
[1] search
[2] subreddit
[3] multireddit
[4] submitted
[5] upvoted
[6] saved
[7] log
[0] exit
> 2
(type frontpage for all subscribed subreddits,
use plus to seperate multi subreddits: pics+funny+me_irl etc.)
subreddit: dataisbeautiful
select sort type:
[1] hot
[2] top
[3] new
[4] rising
[5] controversial
[0] exit
> 1
limit (0 for none): 50
GETTING POSTS
(1/24) r/dataisbeautiful
AutoModerator_[Battle]_DataViz_Battle_for_the_month_of_April_2019__Visualize_the_April_Fool's_Prank_for_2019-04-01_on__r_DataIsBeautiful_b8ws37.md
Downloaded
(2/24) r/dataisbeautiful
AutoModerator_[Topic][Open]_Open_Discussion_Monday_—_Anybody_can_post_a_general_visualization_question_or_start_a_fresh_discussion!_bg1wej.md
Downloaded
...
Total of 24 links downloaded!
Press enter to quit
```
## FAQ ## FAQ
### I am running the script on a headless machine or on a remote server. How can I authenticate my reddit account?
- Download the script on your everday computer and run it for once.
- Authenticate the program on both reddit and imgur.
- Go to your Home folder (for Windows users it is `C:\Users\[USERNAME]\`, for Linux users it is `/home/[USERNAME]`)
- Copy the *config.json* file inside the Bulk Downloader for Reddit folder and paste it **next to** the file that you run the program.
### How can I change my credentials?
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit them, there.
Also if you already have a config.json file, you can paste it **next to** the script and override the one on your Home directory.
### What do the dots resemble when getting posts?
- Each dot means that 100 posts are scanned.
### Getting posts takes too long.
- You can press *Ctrl+C* to interrupt it and start downloading.
### How are the filenames formatted?
- **Self posts** and **images** that do not belong to an album and **album folders** are formatted as:
`[SUBMITTER NAME]_[POST TITLE]_[REDDIT ID]`
You can use *reddit id* to go to post's reddit page by going to link reddit.com/[REDDIT ID]
- An **image in an album** is formatted as:
`[ITEM NUMBER]_[IMAGE TITLE]_[IMGUR ID]`
Similarly, you can use *imgur id* to go to image's imgur page by going to link imgur.com/[IMGUR ID].
### How do I open self post files? ### How do I open self post files?
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings. - Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/). However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
### How can I change my credentials?
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
them, there.
## Changelog ## Changelog
### [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50c4a8d6d7e21d9b44a6d6d00c1811cfe9c655b1)
- Added v.redd.it support
- Added custom exception descriptions to FAILED.json file
- Fixed the bug that prevents downloading some gfycat URLs
### [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11) * [See the changes on *master* here](docs/CHANGELOG.md)
- Change config.json file's path to home directory
### [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
- Added binaries for Windows and Linux
- Wait on KeyboardInterrupt
- Accept multiple subreddit input
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
### [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
- Improvements on UX and UI
- Added logging errors to CONSOLE_LOG.txt
- Using current directory if directory has not been given yet.
### [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
- Added support for *self* post
- Now getting posts is quicker

1
_config.yml Normal file
View File

@@ -0,0 +1 @@
theme: jekyll-theme-cayman

86
docs/CHANGELOG.md Normal file
View File

@@ -0,0 +1,86 @@
# Changes on *master*
## [23/02/2019](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4d385fda60028343be816eb7c4f7bc613a9d555d)
- Fixed v.redd.it links
## [27/01/2019](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/b7baf07fb5998368d87e3c4c36aed40daf820609)
- Clarified the instructions
## [28/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
- Adjusted algorith used for extracting gfycat links because of gfycat's design change
- Ignore space at the end of the given directory
## [16/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
- Fix the bug that prevents downloading imgur videos
## [15/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/adccd8f3ba03ad124d58643d78dab287a4123a6f)
- Prints out the title of posts' that are already downloaded
## [13/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50cb7c15b9cb4befce0cfa2c23ab5de4af9176c6)
- Added alternative location of current directory for config file
- Fixed console prints on Linux
## [10/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/8f1ff10a5e11464575284210dbba4a0d387bc1c3)
- Added reddit username to config file
## [06/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/210238d0865febcb57fbd9f0b0a7d3da9dbff384)
- Sending headers when requesting a file in order not to be rejected by server
## [04/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/426089d0f35212148caff0082708a87017757bde)
- Disabled printing post types to console
## [30/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/af294929510f884d92b25eaa855c29fc4fb6dcaa)
- Now opens web browser and goes to Imgur when prompts for Imgur credentials
## [26/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
- Improved verbose mode
- Minimalized the console output
- Added quit option for auto quitting the program after process finishes
## [25/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
- Added verbose mode
- Stylized the console output
## [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
- Added OP's name to file names (backwards compatible)
- Deleted # char from file names (backwards compatible)
- Improved exception handling
## [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
- Splited download() function
- Added erome support
- Removed exclude feature
- Bug fixes
## [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/6e7463005051026ad64006a8580b0b5dc9536b8c)
- Put log files in a folder named "LOG_FILES"
- Fixed the bug that makes multireddit mode unusable
## [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
- Added exclude mode
## [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7548a010198fb693841ca03654d2c9bdf5742139)
- "0" input for no limit
- Fixed the bug that recognizes none image direct links as image links
## [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
- Added v.redd.it support
- Added custom exception descriptions to FAILED.json file
- Fixed the bug that prevents downloading some gfycat URLs
## [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
- Changed config.json file's path to home directory
## [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
- Added binaries for Windows and Linux
- Wait on KeyboardInterrupt
- Accept multiple subreddit input
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
## [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
- Improvements on UX and UI
- Added logging errors to CONSOLE_LOG.txt
- Using current directory if directory has not been given yet.
## [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
- Added support for *self* post
- Now getting posts is quicker

View File

@@ -1,29 +1,34 @@
# Using command-line arguments # Using command-line arguments
See **[compiling from source](COMPILE_FROM_SOURCE.md)** page first unless you are using an executable file. If you are using an executable file, see [using terminal](COMPILE_FROM_SOURCE.md#using-terminal) and come back. See **[compiling from source](INTERPRET_FROM_SOURCE.md)** page first unless you are using an executable file. If you are using an executable file, see [using terminal](INTERPRET_FROM_SOURCE.md#using-terminal) and come back.
***Use*** `.\bulk-downloader-for-reddit.exe` ***or*** `./bulk-downloader-for-reddit` ***if you are using the executable***. ***Use*** `.\bulk-downloader-for-reddit.exe` ***or*** `./bulk-downloader-for-reddit` ***if you are using the executable***.
```console ```console
$ python script.py --help $ python script.py --help
usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved] usage: script.py [-h] [--directory DIRECTORY] [--NoDownload] [--verbose]
[--submitted] [--upvoted] [--log LOG FILE] [--quit] [--link link] [--saved] [--submitted] [--upvoted]
[--subreddit SUBREDDIT [SUBREDDIT ...]] [--log LOG FILE] [--subreddit SUBREDDIT [SUBREDDIT ...]]
[--multireddit MULTIREDDIT] [--user redditor] [--multireddit MULTIREDDIT] [--user redditor]
[--search query] [--sort SORT TYPE] [--limit Limit] [--search query] [--sort SORT TYPE] [--limit Limit]
[--time TIME_LIMIT] [--NoDownload] [--time TIME_LIMIT]
This program downloads media from reddit posts This program downloads media from reddit posts
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
--directory DIRECTORY --directory DIRECTORY, -d DIRECTORY
Specifies the directory where posts will be downloaded Specifies the directory where posts will be downloaded
to to
--NoDownload Just gets the posts and stores them in a file for
downloading later
--verbose, -v Verbose Mode
--quit, -q Auto quit afer the process finishes
--link link, -l link Get posts from link --link link, -l link Get posts from link
--saved Triggers saved mode --saved Triggers saved mode
--submitted Gets posts of --user --submitted Gets posts of --user
--upvoted Gets upvoted posts of --user --upvoted Gets upvoted posts of --user
--log LOG FILE Triggers log read mode and takes a log file --log LOG FILE Takes a log file which created by itself (json files),
reads posts and tries downloading them again.
--subreddit SUBREDDIT [SUBREDDIT ...] --subreddit SUBREDDIT [SUBREDDIT ...]
Triggers subreddit mode and takes subreddit's name Triggers subreddit mode and takes subreddit's name
without r/. use "frontpage" for frontpage without r/. use "frontpage" for frontpage
@@ -37,8 +42,6 @@ optional arguments:
--limit Limit default: unlimited --limit Limit default: unlimited
--time TIME_LIMIT Either hour, day, week, month, year or all. default: --time TIME_LIMIT Either hour, day, week, month, year or all. default:
all all
--NoDownload Just gets the posts and store them in a file for
downloading later
``` ```
# Examples # Examples
@@ -95,4 +98,4 @@ python script.py --directory C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER
# FAQ # FAQ
## I can't startup the script no matter what. ## I can't startup the script no matter what.
See **[finding the correct keyword for Python](COMPILE_FROM_SOURCE.md#finding-the-correct-keyword-for-python)** See **[finding the correct keyword for Python](INTERPRET_FROM_SOURCE.md#finding-the-correct-keyword-for-python)**

View File

@@ -1,18 +1,16 @@
# Compiling from source code # Interpret from source code
## Requirements ## Requirements
### Python 3 Interpreter ### Python 3 Interpreter
Latest* version of **Python 3** is needed. See if it is already installed [here](#finding-the-correct-keyword-for-python). If not, download the matching release for your platform [here](https://www.python.org/downloads/) and install it. If you are a *Windows* user, selecting **Add Python 3 to PATH** option is mandatory. - This program is designed to work best on **Python 3.6.5** and this version of Python 3 is suggested. See if it is already installed, [here](#finding-the-correct-keyword-for-python).
- If not, download the matching release for your platform [here](https://www.python.org/downloads/) and install it. If you are a *Windows* user, selecting **Add Python 3 to PATH** option when installing the software is mandatory.
\* *Use Python 3.6.5 if you encounter an issue*
## Using terminal ## Using terminal
### To open it... ### To open it...
- **On Windows 8/8.1/10**: Press the File tab on **Windows Explorer**, click on **Open Windows PowerShell** or **Open Windows Command Prompt** or look for *Command Prompt* or *PowerShell* in *Start Menu*. - **on Windows**: Press **Shift+Right Click**, select **Open Powershell window here** or **Open Command Prompt window here**
- **On Windows 7**: Press **WindowsKey+R**, type **cmd** and hit Enter or look for *Command Prompt* or *PowerShell* in *Start Menu*. - **on Linux**: Right-click in a folder and select **Open Terminal** or press **Ctrl+Alt+T**.
- **On Linux**: Right-click in a folder and select **Open Terminal** or press **Ctrl+Alt+T** or look for **Terminal** in the programs. - **on MacOS**: Look for an app called **Terminal**.
- **On MacOS**: Look for an app called **Terminal**.
### Navigating to the directory where script is downloaded ### Navigating to the directory where script is downloaded
Go inside the folder where script.py is located. If you are not familiar with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything) Go inside the folder where script.py is located. If you are not familiar with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything)

View File

@@ -1,3 +1,4 @@
bs4
requests requests
praw praw
imgurpython imgurpython

429
script.py
View File

@@ -10,10 +10,11 @@ import logging
import os import os
import sys import sys
import time import time
import webbrowser
from io import StringIO from io import StringIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur, Self from src.downloader import Direct, Erome, Gfycat, Imgur, Self
from src.errors import * from src.errors import *
from src.parser import LinkDesigner from src.parser import LinkDesigner
from src.searcher import getPosts from src.searcher import getPosts
@@ -22,7 +23,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
__author__ = "Ali Parlakci" __author__ = "Ali Parlakci"
__license__ = "GPL" __license__ = "GPL"
__version__ = "1.2.0" __version__ = "1.6.5"
__maintainer__ = "Ali Parlakci" __maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com" __email__ = "parlakciali@gmail.com"
@@ -38,20 +39,34 @@ def getConfig(configFileName):
if "reddit_refresh_token" in content: if "reddit_refresh_token" in content:
if content["reddit_refresh_token"] == "": if content["reddit_refresh_token"] == "":
FILE.delete("reddit_refresh_token") FILE.delete("reddit_refresh_token")
if not all(False if content.get(key,"") == "" else True for key in keys):
print(
"Go to this URL and fill the form: " \
"https://api.imgur.com/oauth2/addclient\n" \
"Enter the client id and client secret here:"
)
webbrowser.open("https://api.imgur.com/oauth2/addclient",new=2)
for key in keys: for key in keys:
try: try:
if content[key] == "": if content[key] == "":
raise KeyError raise KeyError
except KeyError: except KeyError:
print(key,": ") FILE.add({key:input(" "+key+": ")})
FILE.add({key:input()})
return jsonFile(configFileName).read() return jsonFile(configFileName).read()
else: else:
FILE = jsonFile(configFileName) FILE = jsonFile(configFileName)
configDictionary = {} configDictionary = {}
print(
"Go to this URL and fill the form: " \
"https://api.imgur.com/oauth2/addclient\n" \
"Enter the client id and client secret here:"
)
webbrowser.open("https://api.imgur.com/oauth2/addclient",new=2)
for key in keys: for key in keys:
configDictionary[key] = input(key + ": ") configDictionary[key] = input(" "+key+": ")
FILE.add(configDictionary) FILE.add(configDictionary)
return FILE.read() return FILE.read()
@@ -62,11 +77,27 @@ def parseArguments(arguments=[]):
description="This program downloads " \ description="This program downloads " \
"media from reddit " \ "media from reddit " \
"posts") "posts")
parser.add_argument("--directory", parser.add_argument("--directory","-d",
help="Specifies the directory where posts will be " \ help="Specifies the directory where posts will be " \
"downloaded to", "downloaded to",
metavar="DIRECTORY") metavar="DIRECTORY")
parser.add_argument("--NoDownload",
help="Just gets the posts and stores them in a file" \
" for downloading later",
action="store_true",
default=False)
parser.add_argument("--verbose","-v",
help="Verbose Mode",
action="store_true",
default=False)
parser.add_argument("--quit","-q",
help="Auto quit afer the process finishes",
action="store_true",
default=False)
parser.add_argument("--link","-l", parser.add_argument("--link","-l",
help="Get posts from link", help="Get posts from link",
metavar="link") metavar="link")
@@ -138,12 +169,6 @@ def parseArguments(arguments=[]):
metavar="TIME_LIMIT", metavar="TIME_LIMIT",
type=str) type=str)
parser.add_argument("--NoDownload",
help="Just gets the posts and store them in a file" \
" for downloading later",
action="store_true",
default=False)
if arguments == []: if arguments == []:
return parser.parse_args() return parser.parse_args()
else: else:
@@ -159,7 +184,11 @@ def checkConflicts():
else: else:
user = 1 user = 1
modes = ["saved","subreddit","submitted","search","log","link","upvoted"] search = 1 if GLOBAL.arguments.search else 0
modes = [
"saved","subreddit","submitted","log","link","upvoted","multireddit"
]
values = { values = {
x: 0 if getattr(GLOBAL.arguments,x) is None or \ x: 0 if getattr(GLOBAL.arguments,x) is None or \
@@ -171,15 +200,18 @@ def checkConflicts():
if not sum(values[x] for x in values) == 1: if not sum(values[x] for x in values) == 1:
raise ProgramModeError("Invalid program mode") raise ProgramModeError("Invalid program mode")
if values["search"]+values["saved"] == 2: if search+values["saved"] == 2:
raise SearchModeError("You cannot search in your saved posts") raise SearchModeError("You cannot search in your saved posts")
if values["search"]+values["submitted"] == 2: if search+values["submitted"] == 2:
raise SearchModeError("You cannot search in submitted posts") raise SearchModeError("You cannot search in submitted posts")
if values["search"]+values["upvoted"] == 2: if search+values["upvoted"] == 2:
raise SearchModeError("You cannot search in upvoted posts") raise SearchModeError("You cannot search in upvoted posts")
if search+values["log"] == 2:
raise SearchModeError("You cannot search in log files")
if values["upvoted"]+values["submitted"] == 1 and user == 0: if values["upvoted"]+values["submitted"] == 1 and user == 0:
raise RedditorNameError("No redditor name given") raise RedditorNameError("No redditor name given")
@@ -233,18 +265,23 @@ class PromptUser:
if programMode == "subreddit": if programMode == "subreddit":
subredditInput = input("subreddit: ") subredditInput = input("(type frontpage for all subscribed subreddits,\n" \
" use plus to seperate multi subreddits:" \
" pics+funny+me_irl etc.)\n\n" \
"subreddit: ")
GLOBAL.arguments.subreddit = subredditInput GLOBAL.arguments.subreddit = subredditInput
while not subredditInput == "": # while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
subredditInput = input("subreddit: ") # subredditInput = input("subreddit: ")
GLOBAL.arguments.subreddit += "+" + subredditInput # GLOBAL.arguments.subreddit += "+" + subredditInput
if " " in GLOBAL.arguments.subreddit: if " " in GLOBAL.arguments.subreddit:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split()) GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END # DELETE THE PLUS (+) AT THE END
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1] if not subredditInput.lower() == "frontpage" \
and GLOBAL.arguments.subreddit[-1] == "+":
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
print("\nselect sort type:") print("\nselect sort type:")
sortTypes = [ sortTypes = [
@@ -264,8 +301,8 @@ class PromptUser:
GLOBAL.arguments.time = "all" GLOBAL.arguments.time = "all"
elif programMode == "multireddit": elif programMode == "multireddit":
GLOBAL.arguments.user = input("\nredditor: ") GLOBAL.arguments.user = input("\nmultireddit owner: ")
GLOBAL.arguments.subreddit = input("\nmultireddit: ") GLOBAL.arguments.multireddit = input("\nmultireddit: ")
print("\nselect sort type:") print("\nselect sort type:")
sortTypes = [ sortTypes = [
@@ -317,10 +354,11 @@ class PromptUser:
GLOBAL.arguments.log = input("\nlog file directory:") GLOBAL.arguments.log = input("\nlog file directory:")
if Path(GLOBAL.arguments.log ).is_file(): if Path(GLOBAL.arguments.log ).is_file():
break break
while True: while True:
try: try:
GLOBAL.arguments.limit = int(input("\nlimit: ")) GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
if GLOBAL.arguments.limit == 0:
GLOBAL.arguments.limit = None
break break
except ValueError: except ValueError:
pass pass
@@ -355,10 +393,7 @@ def prepareAttributes():
GLOBAL.arguments.link = GLOBAL.arguments.link.strip("\"") GLOBAL.arguments.link = GLOBAL.arguments.link.strip("\"")
try: ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
except InvalidRedditLink:
raise InvalidRedditLink
if GLOBAL.arguments.search is not None: if GLOBAL.arguments.search is not None:
ATTRIBUTES["search"] = GLOBAL.arguments.search ATTRIBUTES["search"] = GLOBAL.arguments.search
@@ -375,6 +410,9 @@ def prepareAttributes():
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
elif GLOBAL.arguments.multireddit is not None:
ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
elif GLOBAL.arguments.saved is True: elif GLOBAL.arguments.saved is True:
ATTRIBUTES["saved"] = True ATTRIBUTES["saved"] = True
@@ -385,7 +423,7 @@ def prepareAttributes():
ATTRIBUTES["submitted"] = True ATTRIBUTES["submitted"] = True
if GLOBAL.arguments.sort == "rising": if GLOBAL.arguments.sort == "rising":
raise InvalidSortingType raise InvalidSortingType("Invalid sorting type has given")
ATTRIBUTES["limit"] = GLOBAL.arguments.limit ATTRIBUTES["limit"] = GLOBAL.arguments.limit
@@ -414,75 +452,81 @@ def postFromLog(fileName):
return posts return posts
def postExists(POST): def isPostExists(POST):
"""Figure out a file's name and checks if the file already exists""" """Figure out a file's name and checks if the file already exists"""
title = nameCorrector(POST['postTitle']) title = nameCorrector(POST['postTitle'])
FILENAME = title + "_" + POST['postId']
PATH = GLOBAL.directory / POST["postSubreddit"] PATH = GLOBAL.directory / POST["postSubreddit"]
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"] possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
for i in range(2): """If you change the filenames, don't forget to add them here.
for extension in possibleExtensions: Please don't remove existing ones
FILE_PATH = PATH / (FILENAME+extension) """
if FILE_PATH.exists(): for extension in possibleExtensions:
return True
else: OLD_FILE_PATH = PATH / (
FILENAME = POST['postId'] title
+ "_" + POST['postId']
+ extension
)
FILE_PATH = PATH / (
POST["postSubmitter"]
+ "_" + title
+ "_" + POST['postId']
+ extension
)
SHORT_FILE_PATH = PATH / (POST['postId']+extension)
if OLD_FILE_PATH.exists() or \
FILE_PATH.exists() or \
SHORT_FILE_PATH.exists():
return True
else: else:
return False return False
def download(submissions): def downloadPost(SUBMISSION):
"""Analyze list of submissions and call the right function
to download each one, catch errors, update the log files
"""
subsLenght = len(submissions) """Download directory is declared here for each file"""
lastRequestTime = 0 directory = GLOBAL.directory / SUBMISSION['postSubreddit']
downloadedCount = subsLenght
duplicates = 0
BACKUP = {}
FAILED_FILE = createLogFile("FAILED") global lastRequestTime
for i in range(subsLenght): downloaders = {
print("\n({}/{})".format(i+1,subsLenght)) "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
print( }
"https://reddit.com/r/{subreddit}/comments/{id}".format(
subreddit=submissions[i]['postSubreddit'],
id=submissions[i]['postId']
)
)
if postExists(submissions[i]): print()
result = False if SUBMISSION['postType'] in downloaders:
print(submissions[i]['postType'].upper())
print("It already exists")
duplicates += 1
downloadedCount -= 1
continue
directory = GLOBAL.directory / submissions[i]['postSubreddit'] if SUBMISSION['postType'] == "imgur":
if submissions[i]['postType'] == 'imgur':
print("IMGUR",end="")
while int(time.time() - lastRequestTime) <= 2: while int(time.time() - lastRequestTime) <= 2:
pass pass
credit = Imgur.get_credits() credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time() IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \ USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \ + str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \ + " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \ + str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds") + " Seconds")
if credit['ClientRemaining'] < 25 or credit['UserRemaining'] < 25:
printCredit = {"noPrint":False}
else:
printCredit = {"noPrint":True}
print( print(
" => Client: {} - User: {} - Reset {}".format( "==> Client: {} - User: {} - Reset {}\n".format(
credit['ClientRemaining'], credit['ClientRemaining'],
credit['UserRemaining'], credit['UserRemaining'],
USER_RESET USER_RESET
) ),end="",**printCredit
) )
if not (credit['UserRemaining'] == 0 or \ if not (credit['UserRemaining'] == 0 or \
@@ -492,125 +536,151 @@ def download(submissions):
""" """
while int(time.time() - lastRequestTime) <= 2: while int(time.time() - lastRequestTime) <= 2:
pass pass
lastRequestTime = time.time() lastRequestTime = time.time()
try:
Imgur(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except ImgurLoginError:
print(
"Imgur login failed. Quitting the program "\
"as unexpected errors might occur."
)
sys.exit()
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else: else:
if credit['UserRemaining'] == 0: if credit['UserRemaining'] == 0:
KEYWORD = "user" KEYWORD = "user"
elif credit['ClientRemaining'] == 0: elif credit['ClientRemaining'] == 0:
KEYWORD = "client" KEYWORD = "client"
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper())) raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
FAILED_FILE.add(
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()), downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
submissions[i]]}
else:
raise NoSuitablePost
return None
def download(submissions):
"""Analyze list of submissions and call the right function
to download each one, catch errors, update the log files
"""
subsLenght = len(submissions)
global lastRequestTime
lastRequestTime = 0
downloadedCount = subsLenght
duplicates = 0
FAILED_FILE = createLogFile("FAILED")
for i in range(subsLenght):
print(f"\n({i+1}/{subsLenght}) r/{submissions[i]['postSubreddit']}",
end="")
print(f" {submissions[i]['postType'].upper()}",end="",noPrint=True)
if isPostExists(submissions[i]):
print(f"\n" \
f"{submissions[i]['postSubmitter']}_"
f"{nameCorrector(submissions[i]['postTitle'])}")
print("It already exists")
duplicates += 1
downloadedCount -= 1
continue
try:
downloadPost(submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except ImgurLoginError:
print(
"Imgur login failed. \nQuitting the program "\
"as unexpected errors might occur."
)
sys.exit()
except ImgurLimitError as exception:
FAILED_FILE.add({int(i+1):[
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
),
submissions[i]
]})
downloadedCount -= 1
except NotADownloadableLinkError as exception:
print(
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
) )
downloadedCount -= 1 )
FAILED_FILE.add({int(i+1):[
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
),
submissions[i]
]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'gfycat': except NoSuitablePost:
print("GFYCAT")
try:
Gfycat(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except NotADownloadableLinkError as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'direct':
print("DIRECT")
try:
Direct(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'self':
print("SELF")
try:
Self(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
print("No match found, skipping...") print("No match found, skipping...")
downloadedCount -= 1 downloadedCount -= 1
except Exception as exception:
# raise exception
print(
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
)
)
FAILED_FILE.add({int(i+1):[
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
),
submissions[i]
]})
downloadedCount -= 1
if duplicates: if duplicates:
print("\n There was {} duplicates".format(duplicates)) print(f"\nThere {'were' if duplicates > 1 else 'was'} " \
f"{duplicates} duplicate{'s' if duplicates > 1 else ''}")
if downloadedCount == 0: if downloadedCount == 0:
print(" Nothing downloaded :(") print("Nothing downloaded :(")
else: else:
print(" Total of {} links downloaded!".format(downloadedCount)) print(f"Total of {downloadedCount} " \
f"link{'s' if downloadedCount > 1 else ''} downloaded!")
def main(): def main():
VanillaPrint(
f"\nBulk Downloader for Reddit v{__version__}\n" \
f"Written by Ali PARLAKCI parlakciali@gmail.com\n\n" \
f"https://github.com/aliparlakci/bulk-downloader-for-reddit/"
)
GLOBAL.arguments = parseArguments() GLOBAL.arguments = parseArguments()
if GLOBAL.arguments.directory is not None: if GLOBAL.arguments.directory is not None:
GLOBAL.directory = Path(GLOBAL.arguments.directory) GLOBAL.directory = Path(GLOBAL.arguments.directory.strip())
else: else:
GLOBAL.directory = Path(input("download directory: ")) GLOBAL.directory = Path(input("\ndownload directory: ").strip())
print("\n"," ".join(sys.argv),"\n") print("\n"," ".join(sys.argv),"\n",noPrint=True)
print(f"Bulk Downloader for Reddit v{__version__}\n",noPrint=True
)
try: try:
checkConflicts() checkConflicts()
except ProgramModeError as err: except ProgramModeError as err:
PromptUser() PromptUser()
except Exception as err:
print(err)
sys.exit()
if not Path(GLOBAL.configDirectory).is_dir(): if not Path(GLOBAL.defaultConfigDirectory).is_dir():
os.makedirs(GLOBAL.configDirectory) os.makedirs(GLOBAL.defaultConfigDirectory)
GLOBAL.config = getConfig(GLOBAL.configDirectory / "config.json")
if Path("config.json").exists():
GLOBAL.configDirectory = Path("config.json")
else:
GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json"
GLOBAL.config = getConfig(GLOBAL.configDirectory)
if GLOBAL.arguments.log is not None: if GLOBAL.arguments.log is not None:
logDir = Path(GLOBAL.arguments.log) logDir = Path(GLOBAL.arguments.log)
@@ -619,26 +689,11 @@ def main():
try: try:
POSTS = getPosts(prepareAttributes()) POSTS = getPosts(prepareAttributes())
except InsufficientPermission: except Exception as exc:
print("You do not have permission to do that") logging.error(sys.exc_info()[0].__name__,
sys.exit() exc_info=full_exc_info(sys.exc_info()))
except NoMatchingSubmissionFound: print(log_stream.getvalue(),noPrint=True)
print("No matching submission was found") print(exc)
sys.exit()
except NoRedditSupoort:
print("Reddit does not support that")
sys.exit()
except NoPrawSupport:
print("PRAW does not support that")
sys.exit()
except MultiredditNotFound:
print("Multireddit not found")
sys.exit()
except InvalidSortingType:
print("Invalid sorting type has given")
sys.exit()
except InvalidRedditLink:
print("Invalid reddit link")
sys.exit() sys.exit()
if POSTS is None: if POSTS is None:
@@ -661,12 +716,16 @@ if __name__ == "__main__":
print = printToFile print = printToFile
GLOBAL.RUN_TIME = time.time() GLOBAL.RUN_TIME = time.time()
main() main()
except KeyboardInterrupt: except KeyboardInterrupt:
if GLOBAL.directory is None: if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\") GLOBAL.directory = Path(".\\")
print("\nQUITTING...")
except Exception as exception: except Exception as exception:
logging.error("Runtime error!", exc_info=full_exc_info(sys.exc_info())) if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\")
logging.error(sys.exc_info()[0].__name__,
exc_info=full_exc_info(sys.exc_info()))
print(log_stream.getvalue()) print(log_stream.getvalue())
input("Press enter to quit\n") if not GLOBAL.arguments.quit: input("\nPress enter to quit\n")

View File

@@ -1,11 +1,15 @@
import io import io
import json
import os import os
import sys import sys
import urllib.request import urllib.request
from html.parser import HTMLParser
from multiprocessing import Queue
from pathlib import Path from pathlib import Path
from urllib.error import HTTPError
import imgurpython import imgurpython
from multiprocessing import Queue from bs4 import BeautifulSoup
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
FileNameTooLong, ImgurLoginError, FileNameTooLong, ImgurLoginError,
@@ -21,8 +25,7 @@ def dlProgress(count, blockSize, totalSize):
downloadedMbs = int(count*blockSize*(10**(-6))) downloadedMbs = int(count*blockSize*(10**(-6)))
fileSize = int(totalSize*(10**(-6))) fileSize = int(totalSize*(10**(-6)))
sys.stdout.write("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)) sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))
sys.stdout.flush() sys.stdout.flush()
def getExtension(link): def getExtension(link):
@@ -52,6 +55,23 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
As too long file names seem not working. As too long file names seem not working.
""" """
headers = [
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
"Safari/537.36 OPR/54.0.2952.64"),
("Accept", "text/html,application/xhtml+xml,application/xml;" \
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
("Accept-Encoding", "none"),
("Accept-Language", "en-US,en;q=0.8"),
("Connection", "keep-alive")
]
opener = urllib.request.build_opener()
if not "imgur" in imageURL:
opener.addheaders = headers
urllib.request.install_opener(opener)
if not (os.path.isfile(fileDir)): if not (os.path.isfile(fileDir)):
for i in range(3): for i in range(3):
try: try:
@@ -59,16 +79,156 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
tempDir, tempDir,
reporthook=dlProgress) reporthook=dlProgress)
os.rename(tempDir,fileDir) os.rename(tempDir,fileDir)
print(" "*indent+"Downloaded"+" "*10)
break
except ConnectionResetError as exception: except ConnectionResetError as exception:
print(" "*indent + str(exception)) print(" "*indent + str(exception))
print(" "*indent + "Trying again\n") print(" "*indent + "Trying again\n")
except FileNotFoundError: except FileNotFoundError:
raise FileNameTooLong raise FileNameTooLong
else:
print(" "*indent+"Downloaded"+" "*10)
break
else: else:
raise FileAlreadyExistsError raise FileAlreadyExistsError
class Erome:
def __init__(self,directory,post):
try:
IMAGES = self.getLinks(post['postURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
imagesLenght = len(IMAGES)
howManyDownloaded = imagesLenght
duplicates = 0
if imagesLenght == 1:
extension = getExtension(IMAGES[0])
"""Filenames are declared here"""
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
fileDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
)
tempDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
)
imageURL = IMAGES[0]
if 'https://' not in imageURL and 'http://' not in imageURL:
imageURL = "https://" + imageURL
try:
getFile(fileDir,tempDir,imageURL)
except FileNameTooLong:
fileDir = directory / (post['postId'] + extension)
tempDir = directory / (post['postId'] + '.tmp')
getFile(fileDir,tempDir,imageURL)
else:
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
folderDir = directory / (
post["postSubmitter"] + "_" + title + "_" + post['postId']
)
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
os.makedirs(folderDir)
for i in range(imagesLenght):
extension = getExtension(IMAGES[i])
fileName = str(i+1)
imageURL = IMAGES[i]
if 'https://' not in imageURL and 'http://' not in imageURL:
imageURL = "https://" + imageURL
fileDir = folderDir / (fileName + extension)
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+extension))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
howManyDownloaded -= 1
except Exception as exception:
# raise exception
print("\n Could not get the file")
print(
" "
+ "{class_name}: {info}".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
exceptionType = exception
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
def getLinks(self,url,lineNumber=129):
content = []
lineNumber = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
for i in range(len(pageSource)):
obj = EromeParser()
obj.feed(pageSource[i])
tag = obj.tag
if tag is not None:
if "div" in tag:
if "id" in tag["div"]:
if tag["div"]["id"] == "album":
lineNumber = i
break
for line in pageSource[lineNumber:]:
obj = EromeParser()
obj.feed(line)
tag = obj.tag
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"]=="img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [
link for link in content \
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
]
class Imgur: class Imgur:
def __init__(self,directory,post): def __init__(self,directory,post):
self.imgurClient = self.initImgur() self.imgurClient = self.initImgur()
@@ -88,13 +248,25 @@ class Imgur:
post['postExt'] = getExtension(post['mediaURL']) post['postExt'] = getExtension(post['mediaURL'])
title = nameCorrector(post['postTitle']) title = nameCorrector(post['postTitle'])
print(title+"_" +post['postId']+post['postExt'])
fileDir = title + "_" + post['postId'] + post['postExt'] """Filenames are declared here"""
fileDir = directory / fileDir
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])
fileDir = directory / (
post["postSubmitter"]
+ "_" + title
+ "_" + post['postId']
+ post['postExt']
)
tempDir = directory / (
post["postSubmitter"]
+ "_" + title
+ "_" + post['postId']
+ ".tmp"
)
tempDir = title + "_" + post['postId'] + '.tmp'
tempDir = directory / tempDir
try: try:
getFile(fileDir,tempDir,post['mediaURL']) getFile(fileDir,tempDir,post['mediaURL'])
except FileNameTooLong: except FileNameTooLong:
@@ -110,9 +282,11 @@ class Imgur:
duplicates = 0 duplicates = 0
title = nameCorrector(post['postTitle']) title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId'],end="\n\n") print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
folderDir = directory / (title+"_"+post['postId']) folderDir = directory / (
post["postSubmitter"] + "_" + title + "_" + post['postId']
)
try: try:
if not os.path.exists(folderDir): if not os.path.exists(folderDir):
@@ -135,6 +309,8 @@ class Imgur:
+ "_" + "_"
+ images[i]['id']) + images[i]['id'])
"""Filenames are declared here"""
fileDir = folderDir / (fileName + images[i]['Ext']) fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp") tempDir = folderDir / (fileName + ".tmp")
@@ -165,13 +341,20 @@ class Imgur:
except Exception as exception: except Exception as exception:
print("\n Could not get the file") print("\n Could not get the file")
print(" " + str(exception) + "\n") print(
" "
+ "{class_name}: {info}".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
exceptionType = exception exceptionType = exception
howManyDownloaded -= 1 howManyDownloaded -= 1
if duplicates == imagesLenght: if duplicates == imagesLenght:
raise FileAlreadyExistsError raise FileAlreadyExistsError
elif howManyDownloaded < imagesLenght: elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely( raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely" "Album Not Downloaded Completely"
) )
@@ -224,16 +407,26 @@ class Gfycat:
except IndexError: except IndexError:
raise NotADownloadableLinkError("Could not read the page source") raise NotADownloadableLinkError("Could not read the page source")
except Exception as exception: except Exception as exception:
#debug
raise exception
raise NotADownloadableLinkError("Could not read the page source") raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL']) POST['postExt'] = getExtension(POST['mediaURL'])
if not os.path.exists(directory): os.makedirs(directory) if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle']) title = nameCorrector(POST['postTitle'])
print(title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (title+"_"+POST['postId']+POST['postExt']) """Filenames are declared here"""
tempDir = directory / (title+"_"+POST['postId']+".tmp")
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
try: try:
getFile(fileDir,tempDir,POST['mediaURL']) getFile(fileDir,tempDir,POST['mediaURL'])
except FileNameTooLong: except FileNameTooLong:
@@ -255,37 +448,33 @@ class Gfycat:
url = "https://gfycat.com/" + url.split('/')[-1] url = "https://gfycat.com/" + url.split('/')[-1]
pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) pageSource = (urllib.request.urlopen(url).read().decode())
theLine = pageSource[lineNumber] soup = BeautifulSoup(pageSource, "html.parser")
lenght = len(query) attributes = {"data-react-helmet":"true","type":"application/ld+json"}
link = [] content = soup.find("script",attrs=attributes)
for i in range(len(theLine)): if content is None:
if theLine[i:i+lenght] == query:
cursor = (i+lenght)+1
while not theLine[cursor] == '"':
link.append(theLine[cursor])
cursor += 1
break
if "".join(link) == "":
raise NotADownloadableLinkError("Could not read the page source") raise NotADownloadableLinkError("Could not read the page source")
return "".join(link) return json.loads(content.text)["video"]["contentUrl"]
class Direct: class Direct:
def __init__(self,directory,POST): def __init__(self,directory,POST):
POST['postExt'] = getExtension(POST['postURL']) POST['postExt'] = getExtension(POST['postURL'])
if not os.path.exists(directory): os.makedirs(directory) if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle']) title = nameCorrector(POST['postTitle'])
print(title+"_"+POST['postId']+POST['postExt'])
fileDir = title+"_"+POST['postId']+POST['postExt'] """Filenames are declared here"""
fileDir = directory / fileDir
tempDir = title+"_"+POST['postId']+".tmp" print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
tempDir = directory / tempDir
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
try: try:
getFile(fileDir,tempDir,POST['postURL']) getFile(fileDir,tempDir,POST['postURL'])
@@ -300,10 +489,14 @@ class Self:
if not os.path.exists(directory): os.makedirs(directory) if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(post['postTitle']) title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId']+".md")
fileDir = title+"_"+post['postId']+".md" """Filenames are declared here"""
fileDir = directory / fileDir
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")
fileDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"
)
if Path.is_file(fileDir): if Path.is_file(fileDir):
raise FileAlreadyExistsError raise FileAlreadyExistsError
@@ -319,6 +512,7 @@ class Self:
@staticmethod @staticmethod
def writeToFile(directory,post): def writeToFile(directory,post):
"""Self posts are formatted here"""
content = ("## [" content = ("## ["
+ post["postTitle"] + post["postTitle"]
+ "](" + "]("
@@ -326,7 +520,11 @@ class Self:
+ ")\n" + ")\n"
+ post["postContent"] + post["postContent"]
+ "\n\n---\n\n" + "\n\n---\n\n"
+ "submitted by [u/" + "submitted to [r/"
+ post["postSubreddit"]
+ "](https://www.reddit.com/r/"
+ post["postSubreddit"]
+ ") by [u/"
+ post["postSubmitter"] + post["postSubmitter"]
+ "](https://www.reddit.com/user/" + "](https://www.reddit.com/user/"
+ post["postSubmitter"] + post["postSubmitter"]

View File

@@ -67,7 +67,7 @@ class NoMatchingSubmissionFound(Exception):
class NoPrawSupport(Exception): class NoPrawSupport(Exception):
pass pass
class NoRedditSupoort(Exception): class NoRedditSupport(Exception):
pass pass
class MultiredditNotFound(Exception): class MultiredditNotFound(Exception):
@@ -81,3 +81,9 @@ class InvalidSortingType(Exception):
class FileNotFoundError(Exception): class FileNotFoundError(Exception):
pass pass
class NoSuitablePost(Exception):
pass
class ImgurLimitError(Exception):
pass

View File

@@ -29,7 +29,7 @@ def LinkParser(LINK):
ShortLink = False ShortLink = False
if not "reddit.com" in LINK: if not "reddit.com" in LINK:
raise InvalidRedditLink raise InvalidRedditLink("Invalid reddit link")
SplittedLink = LINK.split("/") SplittedLink = LINK.split("/")

View File

@@ -1,73 +1,79 @@
import os import os
import sys
import random import random
import socket import socket
import webbrowser import webbrowser
import urllib.request
from urllib.error import HTTPError
import praw import praw
from prawcore.exceptions import NotFound, ResponseException, Forbidden from prawcore.exceptions import NotFound, ResponseException, Forbidden
from src.tools import GLOBAL, createLogFile, jsonFile, printToFile from src.tools import GLOBAL, createLogFile, jsonFile, printToFile
from src.errors import (NoMatchingSubmissionFound, NoPrawSupport, from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
NoRedditSupoort, MultiredditNotFound, NoRedditSupport, MultiredditNotFound,
InvalidSortingType, RedditLoginFailed, InvalidSortingType, RedditLoginFailed,
InsufficientPermission) InsufficientPermission)
print = printToFile print = printToFile
class GetAuth:
def __init__(self,redditInstance,port):
self.redditInstance = redditInstance
self.PORT = int(port)
def recieve_connection(self):
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, message):
"""Send message to client and close the connection."""
self.client.send('HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8'))
self.client.close()
def getRefreshToken(self,*scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("Go to this URL and login to reddit:\n\n",url)
webbrowser.open(url,new=2)
self.client = self.recieve_connection()
data = self.client.recv(1024).decode('utf-8')
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)
def beginPraw(config,user_agent = str(socket.gethostname())): def beginPraw(config,user_agent = str(socket.gethostname())):
class GetAuth:
def __init__(self,redditInstance,port):
self.redditInstance = redditInstance
self.PORT = int(port)
def recieve_connection(self):
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, message):
"""Send message to client and close the connection."""
self.client.send(
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
)
self.client.close()
def getRefreshToken(self,*scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("Go to this URL and login to reddit:\n\n",url)
webbrowser.open(url,new=2)
self.client = self.recieve_connection()
data = self.client.recv(1024).decode('utf-8')
str(data)
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)
"""Start reddit instance""" """Start reddit instance"""
scopes = ['identity','history','read'] scopes = ['identity','history','read']
@@ -89,7 +95,8 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes) authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0] reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1] refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory / "config.json").add({ jsonFile(GLOBAL.configDirectory).add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token "reddit_refresh_token":refresh_token
}) })
else: else:
@@ -98,7 +105,8 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes) authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0] reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1] refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory / "config.json").add({ jsonFile(GLOBAL.configDirectory).add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token "reddit_refresh_token":refresh_token
}) })
return reddit return reddit
@@ -112,7 +120,7 @@ def getPosts(args):
reddit = beginPraw(config) reddit = beginPraw(config)
if args["sort"] == "best": if args["sort"] == "best":
raise NoPrawSupport raise NoPrawSupport("PRAW does not support that")
if "subreddit" in args: if "subreddit" in args:
if "search" in args: if "search" in args:
@@ -123,8 +131,6 @@ def getPosts(args):
if args["user"] == "me": if args["user"] == "me":
args["user"] = str(reddit.user.me()) args["user"] = str(reddit.user.me())
print("\nGETTING POSTS\n.\n.\n.\n")
if not "search" in args: if not "search" in args:
if args["sort"] == "top" or args["sort"] == "controversial": if args["sort"] == "top" or args["sort"] == "controversial":
keyword_params = { keyword_params = {
@@ -143,8 +149,8 @@ def getPosts(args):
} }
if "search" in args: if "search" in args:
if args["sort"] in ["hot","rising","controversial"]: if GLOBAL.arguments.sort in ["hot","rising","controversial"]:
raise InvalidSortingType raise InvalidSortingType("Invalid sorting type has given")
if "subreddit" in args: if "subreddit" in args:
print ( print (
@@ -156,7 +162,7 @@ def getPosts(args):
sort=args["sort"], sort=args["sort"],
subreddit=args["subreddit"], subreddit=args["subreddit"],
time=args["time"] time=args["time"]
).upper() ).upper(),noPrint=True
) )
return redditSearcher( return redditSearcher(
reddit.subreddit(args["subreddit"]).search( reddit.subreddit(args["subreddit"]).search(
@@ -168,23 +174,23 @@ def getPosts(args):
) )
elif "multireddit" in args: elif "multireddit" in args:
raise NoPrawSupport raise NoPrawSupport("PRAW does not support that")
elif "user" in args: elif "user" in args:
raise NoPrawSupport raise NoPrawSupport("PRAW does not support that")
elif "saved" in args: elif "saved" in args:
raise NoRedditSupoort raise ("Reddit does not support that")
if args["sort"] == "relevance": if args["sort"] == "relevance":
raise InvalidSortingType raise InvalidSortingType("Invalid sorting type has given")
if "saved" in args: if "saved" in args:
print( print(
"saved posts\nuser:{username}\nlimit={limit}\n".format( "saved posts\nuser:{username}\nlimit={limit}\n".format(
username=reddit.user.me(), username=reddit.user.me(),
limit=args["limit"] limit=args["limit"]
).upper() ).upper(),noPrint=True
) )
return redditSearcher(reddit.user.me().saved(limit=args["limit"])) return redditSearcher(reddit.user.me().saved(limit=args["limit"]))
@@ -199,7 +205,7 @@ def getPosts(args):
sort=args["sort"], sort=args["sort"],
subreddit=args["subreddit"], subreddit=args["subreddit"],
time=args["time"] time=args["time"]
).upper() ).upper(),noPrint=True
) )
return redditSearcher( return redditSearcher(
getattr(reddit.front,args["sort"]) (**keyword_params) getattr(reddit.front,args["sort"]) (**keyword_params)
@@ -213,7 +219,7 @@ def getPosts(args):
sort=args["sort"], sort=args["sort"],
subreddit=args["subreddit"], subreddit=args["subreddit"],
time=args["time"] time=args["time"]
).upper() ).upper(),noPrint=True
) )
return redditSearcher( return redditSearcher(
getattr( getattr(
@@ -231,7 +237,7 @@ def getPosts(args):
sort=args["sort"], sort=args["sort"],
multireddit=args["multireddit"], multireddit=args["multireddit"],
time=args["time"] time=args["time"]
).upper() ).upper(),noPrint=True
) )
try: try:
return redditSearcher( return redditSearcher(
@@ -242,11 +248,9 @@ def getPosts(args):
) (**keyword_params) ) (**keyword_params)
) )
except NotFound: except NotFound:
raise MultiredditNotFound raise MultiredditNotFound("Multireddit not found")
elif "submitted" in args: elif "submitted" in args:
# TODO
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
print ( print (
"submitted posts of {user}\nsort: {sort}\n" \ "submitted posts of {user}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format( "time: {time}\nlimit: {limit}\n".format(
@@ -254,7 +258,7 @@ def getPosts(args):
sort=args["sort"], sort=args["sort"],
user=args["user"], user=args["user"],
time=args["time"] time=args["time"]
).upper() ).upper(),noPrint=True
) )
return redditSearcher( return redditSearcher(
getattr( getattr(
@@ -263,23 +267,21 @@ def getPosts(args):
) )
elif "upvoted" in args: elif "upvoted" in args:
# TODO
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
print ( print (
"upvoted posts of {user}\nlimit: {limit}\n".format( "upvoted posts of {user}\nlimit: {limit}\n".format(
user=args["user"], user=args["user"],
limit=args["limit"] limit=args["limit"]
).upper() ).upper(),noPrint=True
) )
try: try:
return redditSearcher( return redditSearcher(
reddit.redditor(args["user"]).upvoted(limit=args["limit"]) reddit.redditor(args["user"]).upvoted(limit=args["limit"])
) )
except Forbidden: except Forbidden:
raise InsufficientPermission raise InsufficientPermission("You do not have permission to do that")
elif "post" in args: elif "post" in args:
print("post: {post}\n".format(post=args["post"]).upper()) print("post: {post}\n".format(post=args["post"]).upper(),noPrint=True)
return redditSearcher( return redditSearcher(
reddit.submission(url=args["post"]),SINGLE_POST=True reddit.submission(url=args["post"]),SINGLE_POST=True
) )
@@ -299,6 +301,8 @@ def redditSearcher(posts,SINGLE_POST=False):
gfycatCount = 0 gfycatCount = 0
global imgurCount global imgurCount
imgurCount = 0 imgurCount = 0
global eromeCount
eromeCount = 0
global directCount global directCount
directCount = 0 directCount = 0
global selfCount global selfCount
@@ -306,6 +310,8 @@ def redditSearcher(posts,SINGLE_POST=False):
allPosts = {} allPosts = {}
print("\nGETTING POSTS")
if GLOBAL.arguments.verbose: print("\n")
postsFile = createLogFile("POSTS") postsFile = createLogFile("POSTS")
if SINGLE_POST: if SINGLE_POST:
@@ -326,50 +332,70 @@ def redditSearcher(posts,SINGLE_POST=False):
if result is not None: if result is not None:
details = result details = result
orderCount += 1 orderCount += 1
printSubmission(submission,subCount,orderCount) if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details) subList.append(details)
postsFile.add({subCount:[details]}) postsFile.add({subCount:[details]})
else: else:
for submission in posts: try:
subCount += 1 for submission in posts:
subCount += 1
try: if subCount % 100 == 0 and not GLOBAL.arguments.verbose:
details = {'postId':submission.id, sys.stdout.write("")
'postTitle':submission.title, sys.stdout.flush()
'postSubmitter':str(submission.author),
'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
except AttributeError:
continue
result = checkIfMatching(submission) if subCount % 1000 == 0:
sys.stdout.write("\n"+" "*14)
sys.stdout.flush()
if result is not None: try:
details = result details = {'postId':submission.id,
orderCount += 1 'postTitle':submission.title,
printSubmission(submission,subCount,orderCount) 'postSubmitter':str(submission.author),
subList.append(details) 'postType':None,
'postURL':submission.url,
'postSubreddit':submission.subreddit.display_name}
except AttributeError:
continue
allPosts[subCount] = [details] result = checkIfMatching(submission)
if result is not None:
details = result
orderCount += 1
if GLOBAL.arguments.verbose:
printSubmission(submission,subCount,orderCount)
subList.append(details)
allPosts[subCount] = [details]
except KeyboardInterrupt:
print("\nKeyboardInterrupt",noPrint=True)
postsFile.add(allPosts) postsFile.add(allPosts)
if not len(subList) == 0: if not len(subList) == 0:
print( if GLOBAL.arguments.NoDownload or GLOBAL.arguments.verbose:
"\nTotal of {} submissions found!\n"\ print(
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n" f"\n\nTotal of {len(subList)} submissions found!"
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount) )
) print(
f"{gfycatCount} GFYCATs, {imgurCount} IMGURs, " \
f"{eromeCount} EROMEs, {directCount} DIRECTs " \
f"and {selfCount} SELF POSTS",noPrint=True
)
else:
print()
return subList return subList
else: else:
raise NoMatchingSubmissionFound raise NoMatchingSubmissionFound("No matching submission was found")
def checkIfMatching(submission): def checkIfMatching(submission):
global gfycatCount global gfycatCount
global imgurCount global imgurCount
global eromeCount
global directCount global directCount
global selfCount global selfCount
@@ -383,24 +409,19 @@ def checkIfMatching(submission):
except AttributeError: except AttributeError:
return None return None
if ('gfycat' in submission.domain) or \ if 'gfycat' in submission.domain:
('imgur' in submission.domain): details['postType'] = 'gfycat'
gfycatCount += 1
return details
if 'gfycat' in submission.domain: elif 'imgur' in submission.domain:
details['postType'] = 'gfycat' details['postType'] = 'imgur'
gfycatCount += 1 imgurCount += 1
return details return details
elif 'imgur' in submission.domain: elif 'erome' in submission.domain:
details['postType'] = 'imgur' details['postType'] = 'erome'
eromeCount += 1
imgurCount += 1
return details
elif isDirectLink(submission.url) is not None:
details['postType'] = 'direct'
details['postURL'] = isDirectLink(submission.url)
directCount += 1
return details return details
elif submission.is_self: elif submission.is_self:
@@ -409,6 +430,14 @@ def checkIfMatching(submission):
selfCount += 1 selfCount += 1
return details return details
directLink = isDirectLink(submission.url)
if directLink is not False:
details['postType'] = 'direct'
details['postURL'] = directLink
directCount += 1
return details
def printSubmission(SUB,validNumber,totalNumber): def printSubmission(SUB,validNumber,totalNumber):
"""Print post's link, title and media link to screen""" """Print post's link, title and media link to screen"""
@@ -448,7 +477,22 @@ def isDirectLink(URL):
return URL return URL
elif "v.redd.it" in URL: elif "v.redd.it" in URL:
return URL+"/DASH_600_K" bitrates = ["DASH_1080","DASH_720","DASH_600", \
"DASH_480","DASH_360","DASH_240"]
for bitrate in bitrates:
videoURL = URL+"/"+bitrate
try:
responseCode = urllib.request.urlopen(videoURL).getcode()
except urllib.error.HTTPError:
responseCode = 0
if responseCode == 200:
return videoURL
else:
return False
for extension in imageTypes: for extension in imageTypes:
if extension in URL: if extension in URL:

View File

@@ -14,7 +14,8 @@ class GLOBAL:
config = None config = None
arguments = None arguments = None
directory = None directory = None
configDirectory = Path.home() / "Bulk Downloader for Reddit" defaultConfigDirectory = Path.home() / "Bulk Downloader for Reddit"
configDirectory = ""
reddit_client_id = "BSyphDdxYZAgVQ" reddit_client_id = "BSyphDdxYZAgVQ"
reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk" reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk"
printVanilla = print printVanilla = print
@@ -75,8 +76,10 @@ def createLogFile(TITLE):
put given arguments inside \"HEADER\" key put given arguments inside \"HEADER\" key
""" """
folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S", folderDirectory = GLOBAL.directory / "LOG_FILES" / \
time.localtime(GLOBAL.RUN_TIME))) str(time.strftime(
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
))
logFilename = TITLE.upper()+'.json' logFilename = TITLE.upper()+'.json'
if not path.exists(folderDirectory): if not path.exists(folderDirectory):
@@ -88,23 +91,29 @@ def createLogFile(TITLE):
return FILE return FILE
def printToFile(*args, **kwargs): def printToFile(*args, noPrint=False,**kwargs):
"""Print to both CONSOLE and """Print to both CONSOLE and
CONSOLE LOG file in a folder time stampt in the name CONSOLE LOG file in a folder time stampt in the name
""" """
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S", TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
time.localtime(GLOBAL.RUN_TIME))) time.localtime(GLOBAL.RUN_TIME)))
folderDirectory = GLOBAL.directory / TIME folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
print(*args,**kwargs)
if not noPrint or \
GLOBAL.arguments.verbose or \
"file" in kwargs:
print(*args,**kwargs)
if not path.exists(folderDirectory): if not path.exists(folderDirectory):
makedirs(folderDirectory) makedirs(folderDirectory)
with io.open( if not "file" in kwargs:
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8" with io.open(
) as FILE: folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
print(*args, file=FILE, **kwargs) ) as FILE:
print(*args, file=FILE, **kwargs)
def nameCorrector(string): def nameCorrector(string):
"""Swap strange characters from given string """Swap strange characters from given string
@@ -130,7 +139,7 @@ def nameCorrector(string):
if len(string.split('\n')) > 1: if len(string.split('\n')) > 1:
string = "".join(string.split('\n')) string = "".join(string.split('\n'))
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.',] BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.','#']
if any(x in string for x in BAD_CHARS): if any(x in string for x in BAD_CHARS):
for char in string: for char in string: