mirror of
https://github.com/KevinMidboe/bulk-downloader-for-reddit.git
synced 2026-01-07 09:45:37 +00:00
Compare commits
244 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d6194c57d9 | ||
|
|
cd87a4a120 | ||
|
|
08cddf4c83 | ||
|
|
88fa9e742d | ||
|
|
1c17f174a8 | ||
|
|
9b36336ac3 | ||
|
|
35e551f20c | ||
|
|
0f2bda9c34 | ||
|
|
8ab694bcc1 | ||
|
|
898f59d035 | ||
|
|
6b6db37185 | ||
|
|
d4a5100128 | ||
|
|
22047338e2 | ||
|
|
b16cdd3cbb | ||
|
|
2a8394a48c | ||
|
|
eac4404bbf | ||
|
|
fae49d50da | ||
|
|
7130525ece | ||
|
|
2bf1e03ee1 | ||
|
|
15a91e5784 | ||
|
|
344201a70d | ||
|
|
92e47adb43 | ||
|
|
4d385fda60 | ||
|
|
82dcd2f63d | ||
|
|
08de21a364 | ||
|
|
af7d3d9151 | ||
|
|
280147282b | ||
|
|
b7baf07fb5 | ||
|
|
aece2273fb | ||
|
|
f807efe4d5 | ||
|
|
743d887927 | ||
|
|
da5492858c | ||
|
|
cebfc713d2 | ||
|
|
f522154214 | ||
|
|
27cd3ee991 | ||
|
|
29873331e6 | ||
|
|
8a3dcd68a3 | ||
|
|
ac323f2abe | ||
|
|
32d26fa956 | ||
|
|
137481cf3e | ||
|
|
9b63c55d3e | ||
|
|
3a6954c7d3 | ||
|
|
9a59da0c5f | ||
|
|
d56efed1c6 | ||
|
|
8f64e62293 | ||
|
|
bdc43eb0d8 | ||
|
|
adccd8f3ba | ||
|
|
47a07be1c8 | ||
|
|
1a41dc6061 | ||
|
|
50cb7c15b9 | ||
|
|
a1f1915d57 | ||
|
|
3448ba15a9 | ||
|
|
ff68b5f70f | ||
|
|
588a3c3ea6 | ||
|
|
8f1ff10a5e | ||
|
|
9338961b2b | ||
|
|
94bc1c115f | ||
|
|
c19d8ad71b | ||
|
|
4c8de50880 | ||
|
|
3e6dfccdd2 | ||
|
|
20b9747330 | ||
|
|
be7508540d | ||
|
|
ccd9078b0a | ||
|
|
43cf0a4d42 | ||
|
|
3693cf46f8 | ||
|
|
04152e8554 | ||
|
|
210238d086 | ||
|
|
90e071354f | ||
|
|
426089d0f3 | ||
|
|
7ae6c6385d | ||
|
|
97d15f9974 | ||
|
|
172cd72dc1 | ||
|
|
af29492951 | ||
|
|
5633b301f3 | ||
|
|
5ed855af28 | ||
|
|
7ccf2fb7f9 | ||
|
|
2297e9ed86 | ||
|
|
401e014059 | ||
|
|
eb31d38c44 | ||
|
|
747fefea14 | ||
|
|
80cc4fade3 | ||
|
|
c26843c7fc | ||
|
|
a14edc9f5a | ||
|
|
d685860c22 | ||
|
|
dcf9f35273 | ||
|
|
7fdf03aa24 | ||
|
|
25d61a4c78 | ||
|
|
558eb107f4 | ||
|
|
6e74630050 | ||
|
|
2fd9248715 | ||
|
|
457b8cd21c | ||
|
|
e953456ead | ||
|
|
ed0564fba0 | ||
|
|
5378555f74 | ||
|
|
95ef308915 | ||
|
|
436f867f2e | ||
|
|
91d71565cc | ||
|
|
c7b7361ded | ||
|
|
cd81a6c38b | ||
|
|
1623722138 | ||
|
|
dad5669441 | ||
|
|
35d54d1eb1 | ||
|
|
394b864d86 | ||
|
|
837281c3c6 | ||
|
|
e6b648d8b3 | ||
|
|
cfaf2de7db | ||
|
|
80546d7094 | ||
|
|
139a81a0e7 | ||
|
|
9bb0a5da7f | ||
|
|
6f2273f182 | ||
|
|
b5d6165802 | ||
|
|
b98815376f | ||
|
|
d9586f99b8 | ||
|
|
76711892a2 | ||
|
|
bfea548eab | ||
|
|
2e852db4c3 | ||
|
|
8ac02e7aff | ||
|
|
5eccf4dd3d | ||
|
|
7a68ff3efa | ||
|
|
3ea2e16b62 | ||
|
|
fc6787aa28 | ||
|
|
21533bb78c | ||
|
|
1781ab8ffe | ||
|
|
821383c465 | ||
|
|
9d0fdc7521 | ||
|
|
0387dd5243 | ||
|
|
93732b0367 | ||
|
|
400ce01918 | ||
|
|
ccedac4bdc | ||
|
|
a6997898ce | ||
|
|
61632c7143 | ||
|
|
9bff3399a8 | ||
|
|
b00d185f67 | ||
|
|
7314e17125 | ||
|
|
2d334d56bf | ||
|
|
974517928f | ||
|
|
bcae177b1e | ||
|
|
229def6578 | ||
|
|
59b0376d6e | ||
|
|
cf1dc7d08c | ||
|
|
27532408c1 | ||
|
|
32647beee9 | ||
|
|
a67da461d2 | ||
|
|
8c6f593496 | ||
|
|
b60ce8a71e | ||
|
|
49920cc457 | ||
|
|
c70e7c2ebb | ||
|
|
3931dfff54 | ||
|
|
4a8c2377f9 | ||
|
|
8a18a42a9a | ||
|
|
6c2d748fbc | ||
|
|
8c966df105 | ||
|
|
2adf2c0451 | ||
|
|
3e3a2df4d1 | ||
|
|
7548a01019 | ||
|
|
2ab16608d5 | ||
|
|
e15f33b97a | ||
|
|
27211f993c | ||
|
|
87d3b294f7 | ||
|
|
8128378dcd | ||
|
|
cc93aa3012 | ||
|
|
50c4a8d6d7 | ||
|
|
5737904a54 | ||
|
|
f6eba6c5b0 | ||
|
|
41cbb58db3 | ||
|
|
c569124406 | ||
|
|
1a3836a8e1 | ||
|
|
fde6a1fac4 | ||
|
|
6bba2c4dbb | ||
|
|
a078d44236 | ||
|
|
deae0be769 | ||
|
|
3cf0203e6b | ||
|
|
0b31db0e2e | ||
|
|
d3f2b1b08e | ||
|
|
0ec4bb3008 | ||
|
|
0dbe2ed917 | ||
|
|
9f831e1b78 | ||
|
|
59012077e1 | ||
|
|
5e3c79160b | ||
|
|
1e8eaa1a8d | ||
|
|
7dbc83fdce | ||
|
|
50a77f6ba5 | ||
|
|
4f7e406cd6 | ||
|
|
ded3cece8c | ||
|
|
dd671fd738 | ||
|
|
b357dff52c | ||
|
|
32ffd3b861 | ||
|
|
02673c3950 | ||
|
|
8448e47080 | ||
|
|
39f2c73f4c | ||
|
|
fe942b4734 | ||
|
|
205617e051 | ||
|
|
b93b206a96 | ||
|
|
b84684f786 | ||
|
|
68558950ca | ||
|
|
795965f754 | ||
|
|
d2ed8327df | ||
|
|
a28a7776ab | ||
|
|
1a50bcecb0 | ||
|
|
d8a2204024 | ||
|
|
0577c332b5 | ||
|
|
e09da8db73 | ||
|
|
fd507870e1 | ||
|
|
c434d346a7 | ||
|
|
fcc3e53da7 | ||
|
|
7452af8bd0 | ||
|
|
1fd6951420 | ||
|
|
bddac14804 | ||
|
|
6e2c69d053 | ||
|
|
bedd481195 | ||
|
|
2994adea38 | ||
|
|
f1a9f1d3e4 | ||
|
|
9849c0439b | ||
|
|
ef9f0c543f | ||
|
|
26531fd6c0 | ||
|
|
cae044f7b6 | ||
|
|
4daa9cd66a | ||
|
|
e83d2626d3 | ||
|
|
f67c5f62c6 | ||
|
|
fef559cd40 | ||
|
|
42672dc15d | ||
|
|
3b15141b49 | ||
|
|
d080ca17bc | ||
|
|
7f97bd212a | ||
|
|
65592c5d3a | ||
|
|
57a5f0c85c | ||
|
|
f6240f402e | ||
|
|
fd179c0e4b | ||
|
|
d9dc3132f6 | ||
|
|
45191e2c60 | ||
|
|
319a6c82a3 | ||
|
|
2c62337fac | ||
|
|
7c9c84c411 | ||
|
|
8922c27f9b | ||
|
|
cc59710f33 | ||
|
|
49129bea24 | ||
|
|
a701444d5b | ||
|
|
5a030a156c | ||
|
|
ffe3839aee | ||
|
|
a796038b71 | ||
|
|
ddccebbc70 | ||
|
|
5cfa443f83 | ||
|
|
2103c62218 | ||
|
|
10128b63ac |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -1,4 +1,7 @@
|
||||
build/
|
||||
dist/
|
||||
MANIFEST
|
||||
__pycache__/
|
||||
src/__pycache__/
|
||||
logs/
|
||||
*.json
|
||||
config.json
|
||||
env/
|
||||
|
||||
267
README.md
267
README.md
@@ -1,169 +1,148 @@
|
||||
# Bulk Downloader for Reddit
|
||||
This program downloads imgur, gfycat and direct image and video links of saved posts from a reddit account. It is written in Python 3.
|
||||
|
||||
**PLEASE** post any issue you had with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback.
|
||||
|
||||
## Table of Contents
|
||||
Downloads media from reddit posts. Made by [u/aliparlakci](https://reddit.com/u/aliparlakci)
|
||||
|
||||
- [What it can do?](#what-it-can-do)
|
||||
- [Requirements](#requirements)
|
||||
- [Setting up the script](#setting-up-the-script)
|
||||
- [Creating an imgur app](#creating-an-imgur-app)
|
||||
- [Program Modes](#program-modes)
|
||||
- [Running the script](#running-the-script)
|
||||
- [Using the command line arguments](#using-the-command-line-arguments)
|
||||
- [Examples](#examples)
|
||||
- [FAQ](#faq)
|
||||
- [Changelog](#changelog)
|
||||
## [Download the latest release here](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
|
||||
|
||||
## What it can do?
|
||||
### It...
|
||||
- can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
|
||||
- sorts post by hot, top, new and so on
|
||||
- downloads imgur albums, gfycat links, [self posts](#i-can-t-open-the-self-posts-) and any link to a direct image
|
||||
- skips the existing ones
|
||||
- puts post titles to file's name
|
||||
- puts every post to its subreddit's folder
|
||||
- saves reusable a copy of posts' details that are found so that they can be re-downloaded again
|
||||
- logs failed ones in a file to so that you can try to download them later
|
||||
- can be run with double-clicking on Windows (but I don't recommend it)
|
||||
## What it can do
|
||||
|
||||
## Requirements
|
||||
- Python 3.x*
|
||||
- Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
|
||||
- Sorts posts by hot, top, new and so on
|
||||
- Downloads **REDDIT** images and videos, **IMGUR** images and albums, **GFYCAT** links, **EROME** images and albums, **SELF POSTS** and any link to a **DIRECT IMAGE**
|
||||
- Skips the existing ones
|
||||
- Puts post title and OP's name in file's name
|
||||
- Puts every post to its subreddit's folder
|
||||
- Saves a reusable copy of posts' details that are found so that they can be re-downloaded again
|
||||
- Logs failed ones in a file to so that you can try to download them later
|
||||
|
||||
You can install Python 3 here: [https://www.python.org/downloads/](https://www.python.org/downloads/)
|
||||
|
||||
You have to check "**Add Python 3 to PATH**" option when installing in order it to run correctly.
|
||||
## Installation
|
||||
|
||||
*\*Although the latest version of python is suggested, you can use 3.6.5 since it runs perfectly on that version*
|
||||
You can use it either as a `bulk-downloader-for-reddit.exe` executable file for Windows, as a Linux binary or as a *[Python script](#python-script)*. There is no MacOS executable, MacOS users must use the Python script option.
|
||||
|
||||
## Setting up the script
|
||||
Because this is not a commercial app, you need to create an imgur developer app in order API to work.
|
||||
### Executables
|
||||
|
||||
### Creating an imgur app
|
||||
* Go to https://api.imgur.com/oauth2/addclient
|
||||
* Enter a name into the **Application Name** field.
|
||||
* Pick **Anonymous usage without user authorization** as an **Authorization type**\*
|
||||
* Enter your email into the Email field.
|
||||
* Correct CHAPTCHA
|
||||
* Click **submit** button
|
||||
|
||||
It should redirect to a page which shows your **imgur_client_id** and **imgur_client_secret**
|
||||
|
||||
\*Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
|
||||
For Windows and Linux, [download the latest executables, here](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest).
|
||||
|
||||
## Program Modes
|
||||
All the program modes are activated with command-line arguments as shown [here](#using-the-command-line-arguments)
|
||||
- **saved mode**
|
||||
- Gets posts from given user's saved posts.
|
||||
- **submitted mode**
|
||||
- Gets posts from given user's submitted posts.
|
||||
- **upvoted mode**
|
||||
- Gets posts from given user's upvoted posts.
|
||||
- **subreddit mode**
|
||||
- Gets posts from given subreddit or subreddits that is sorted by given type and limited by given number.
|
||||
- You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments).
|
||||
- **multireddit mode**
|
||||
- Gets posts from given user's given multireddit that is sorted by given type and limited by given number.
|
||||
- **link mode**
|
||||
- Gets posts from given reddit link.
|
||||
- You may customize the behaviour with `--sort`, `--time`, `--limit`.
|
||||
- You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments).
|
||||
- **log read mode**
|
||||
- Takes a log file which created by itself (json files), reads posts and tries downloading them again.
|
||||
- Running log read mode for FAILED.json file once after the download is complete is **HIGHLY** recommended as unexpected problems may occur.
|
||||
### Python script
|
||||
|
||||
## Running the script
|
||||
**DO NOT** let more than one instance of the script run as it interferes with IMGUR Request Rate.
|
||||
|
||||
### Using the command line arguments
|
||||
If no arguments are passed program will prompt you for arguments below which means you may start up the script with double-clicking on it (at least on Windows for sure).
|
||||
|
||||
Open up the [terminal](https://www.reddit.com/r/NSFW411/comments/8vtnl8/meta_i_made_reddit_downloader_that_can_download/e1rnbnl) and navigate to where script.py is. If you are unfamiliar with changing directories in terminal see Change Directories in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything).
|
||||
|
||||
Run the script.py file from terminal with command-line arguments. Here is the help page:
|
||||
|
||||
Use `.\` for current directory and `..\` for upper directory when using short directories, otherwise it might act weird.
|
||||
* Download this repository ([latest zip](https://github.com/aliparlakci/bulk-downloader-for-reddit/archive/master.zip) or `git clone git@github.com:aliparlakci/bulk-downloader-for-reddit.git`).
|
||||
* Enter its folder.
|
||||
* Run `python ./script.py` from the command-line (Windows, MacOSX or Linux command line; it may work with Anaconda prompt) See [here](docs/INTERPRET_FROM_SOURCE.md#finding-the-correct-keyword-for-python) if you have any trouble with this step.
|
||||
|
||||
```console
|
||||
$ py -3 script.py --help
|
||||
usage: script.py [-h] [--link link] [--saved] [--submitted] [--upvoted]
|
||||
[--log LOG FILE] [--subreddit SUBREDDIT [SUBREDDIT ...]]
|
||||
[--multireddit MULTIREDDIT] [--user redditor]
|
||||
[--search query] [--sort SORT TYPE] [--limit Limit]
|
||||
[--time TIME_LIMIT] [--NoDownload]
|
||||
DIRECTORY
|
||||
It uses Python 3.6 and above. It won't work with Python 3.5 or any Python 2.x. If you have a trouble setting it up, see [here](docs/INTERPRET_FROM_SOURCE.md).
|
||||
|
||||
This program downloads media from reddit posts
|
||||
|
||||
positional arguments:
|
||||
DIRECTORY Specifies the directory where posts will be downloaded
|
||||
to
|
||||
### Setting up the script
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--link link, -l link Get posts from link
|
||||
--saved Triggers saved mode
|
||||
--submitted Gets posts of --user
|
||||
--upvoted Gets upvoted posts of --user
|
||||
--log LOG FILE Triggers log read mode and takes a log file
|
||||
--subreddit SUBREDDIT [SUBREDDIT ...]
|
||||
Triggers subreddit mode and takes subreddit's name
|
||||
without r/. use "frontpage" for frontpage
|
||||
--multireddit MULTIREDDIT
|
||||
Triggers multireddit mode and takes multireddit's name
|
||||
without m/
|
||||
--user redditor reddit username if needed. use "me" for current user
|
||||
--search query Searches for given query in given subreddits
|
||||
--sort SORT TYPE Either hot, top, new, controversial, rising or
|
||||
relevance default: hot
|
||||
--limit Limit default: unlimited
|
||||
--time TIME_LIMIT Either hour, day, week, month, year or all. default:
|
||||
all
|
||||
--NoDownload Just gets the posts and store them in a file for
|
||||
downloading later
|
||||
```
|
||||
You need to create an imgur developer app in order API to work. Go to https://api.imgur.com/oauth2/addclient and fill the form (It does not really matter how you fill it).
|
||||
|
||||
### Examples
|
||||
It should redirect you to a page where it shows your **imgur_client_id** and **imgur_client_secret**.
|
||||
|
||||
#### Don't include `py -3 script.py` part if you start the script by double-clicking
|
||||
When you run it for the first time, it will automatically create `config.json` file containing `imgur_client_id`, `imgur_client_secret`, `reddit_username` and `reddit_refresh_token`.
|
||||
|
||||
```console
|
||||
py -3 script.py .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month"
|
||||
|
||||
## Running
|
||||
|
||||
You can run it it an interactive mode, or using [command-line arguments](docs/COMMAND_LINE_ARGUMENTS.md) (also available via `python ./script.py --help` or `bulk-downloader-for-reddit.exe --help`).
|
||||
|
||||
To run the interactive mode, simply use `python ./script.py` or double click on `bulk-downloader-for-reddit.exe` without any extra commands.
|
||||
|
||||
### [Example for command line arguments](docs/COMMAND_LINE_ARGUMENTS.md#examples)
|
||||
|
||||
### Example for an interactive script
|
||||
|
||||
```
|
||||
(py37) bulk-downloader-for-reddit user$ python ./script.py
|
||||
|
||||
Bulk Downloader for Reddit v1.6.5
|
||||
Written by Ali PARLAKCI – parlakciali@gmail.com
|
||||
|
||||
https://github.com/aliparlakci/bulk-downloader-for-reddit/
|
||||
|
||||
download directory: downloads/dataisbeautiful_last_few
|
||||
select program mode:
|
||||
|
||||
[1] search
|
||||
[2] subreddit
|
||||
[3] multireddit
|
||||
[4] submitted
|
||||
[5] upvoted
|
||||
[6] saved
|
||||
[7] log
|
||||
[0] exit
|
||||
|
||||
> 2
|
||||
(type frontpage for all subscribed subreddits,
|
||||
use plus to seperate multi subreddits: pics+funny+me_irl etc.)
|
||||
|
||||
subreddit: dataisbeautiful
|
||||
|
||||
select sort type:
|
||||
|
||||
[1] hot
|
||||
[2] top
|
||||
[3] new
|
||||
[4] rising
|
||||
[5] controversial
|
||||
[0] exit
|
||||
|
||||
> 1
|
||||
|
||||
limit (0 for none): 50
|
||||
|
||||
GETTING POSTS
|
||||
|
||||
|
||||
(1/24) – r/dataisbeautiful
|
||||
AutoModerator_[Battle]_DataViz_Battle_for_the_month_of_April_2019__Visualize_the_April_Fool's_Prank_for_2019-04-01_on__r_DataIsBeautiful_b8ws37.md
|
||||
Downloaded
|
||||
|
||||
(2/24) – r/dataisbeautiful
|
||||
AutoModerator_[Topic][Open]_Open_Discussion_Monday_—_Anybody_can_post_a_general_visualization_question_or_start_a_fresh_discussion!_bg1wej.md
|
||||
Downloaded
|
||||
|
||||
...
|
||||
|
||||
Total of 24 links downloaded!
|
||||
|
||||
Press enter to quit
|
||||
```
|
||||
|
||||
```console
|
||||
py -3 script.py .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/"
|
||||
```
|
||||
|
||||
```console
|
||||
py -3 script.py .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
|
||||
```
|
||||
|
||||
```console
|
||||
py -3 script.py .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10
|
||||
```
|
||||
|
||||
```console
|
||||
py -3 script.py .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250
|
||||
```
|
||||
|
||||
```console
|
||||
py -3 script.py .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
|
||||
```
|
||||
|
||||
```console
|
||||
py -3 script.py C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json
|
||||
```
|
||||
|
||||
## FAQ
|
||||
### I can't startup the script no matter what.
|
||||
- Try `python3` or `python` or `py -3` as python have real issues about naming their program
|
||||
|
||||
### I can't open the self posts.
|
||||
- Self posts are held at subreddit as Markdown. So, the script downloads them as Markdown in order not to lose their stylings. However, there is a great Chrome extension [here](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with Chrome.
|
||||
### I am running the script on a headless machine or on a remote server. How can I authenticate my reddit account?
|
||||
- Download the script on your everday computer and run it for once.
|
||||
- Authenticate the program on both reddit and imgur.
|
||||
- Go to your Home folder (for Windows users it is `C:\Users\[USERNAME]\`, for Linux users it is `/home/[USERNAME]`)
|
||||
- Copy the *config.json* file inside the Bulk Downloader for Reddit folder and paste it **next to** the file that you run the program.
|
||||
|
||||
### How can I change my credentials?
|
||||
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit them, there.
|
||||
|
||||
Also if you already have a config.json file, you can paste it **next to** the script and override the one on your Home directory.
|
||||
|
||||
### What do the dots resemble when getting posts?
|
||||
- Each dot means that 100 posts are scanned.
|
||||
|
||||
### Getting posts takes too long.
|
||||
- You can press *Ctrl+C* to interrupt it and start downloading.
|
||||
|
||||
### How are the filenames formatted?
|
||||
- **Self posts** and **images** that do not belong to an album and **album folders** are formatted as:
|
||||
`[SUBMITTER NAME]_[POST TITLE]_[REDDIT ID]`
|
||||
You can use *reddit id* to go to post's reddit page by going to link reddit.com/[REDDIT ID]
|
||||
|
||||
- An **image in an album** is formatted as:
|
||||
`[ITEM NUMBER]_[IMAGE TITLE]_[IMGUR ID]`
|
||||
Similarly, you can use *imgur id* to go to image's imgur page by going to link imgur.com/[IMGUR ID].
|
||||
|
||||
### How do I open self post files?
|
||||
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
|
||||
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
|
||||
|
||||
However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
|
||||
|
||||
## Changelog
|
||||
### 10/07/2018
|
||||
- Added support for *self* post
|
||||
- Now getting posts is quicker
|
||||
|
||||
* [See the changes on *master* here](docs/CHANGELOG.md)
|
||||
|
||||
@@ -1,5 +1 @@
|
||||
theme: jekyll-theme-minimal
|
||||
show_downloads: false
|
||||
#title: Bulk Downloader for Reddit
|
||||
description: Code written by Ali PARLAKCI
|
||||
google_analytics: UA-80780721-3
|
||||
theme: jekyll-theme-cayman
|
||||
86
docs/CHANGELOG.md
Normal file
86
docs/CHANGELOG.md
Normal file
@@ -0,0 +1,86 @@
|
||||
# Changes on *master*
|
||||
## [23/02/2019](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4d385fda60028343be816eb7c4f7bc613a9d555d)
|
||||
- Fixed v.redd.it links
|
||||
|
||||
## [27/01/2019](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/b7baf07fb5998368d87e3c4c36aed40daf820609)
|
||||
- Clarified the instructions
|
||||
|
||||
## [28/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
|
||||
- Adjusted algorith used for extracting gfycat links because of gfycat's design change
|
||||
- Ignore space at the end of the given directory
|
||||
|
||||
## [16/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
|
||||
- Fix the bug that prevents downloading imgur videos
|
||||
|
||||
## [15/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/adccd8f3ba03ad124d58643d78dab287a4123a6f)
|
||||
- Prints out the title of posts' that are already downloaded
|
||||
|
||||
## [13/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50cb7c15b9cb4befce0cfa2c23ab5de4af9176c6)
|
||||
- Added alternative location of current directory for config file
|
||||
- Fixed console prints on Linux
|
||||
|
||||
## [10/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/8f1ff10a5e11464575284210dbba4a0d387bc1c3)
|
||||
- Added reddit username to config file
|
||||
|
||||
## [06/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/210238d0865febcb57fbd9f0b0a7d3da9dbff384)
|
||||
- Sending headers when requesting a file in order not to be rejected by server
|
||||
|
||||
## [04/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/426089d0f35212148caff0082708a87017757bde)
|
||||
- Disabled printing post types to console
|
||||
|
||||
## [30/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/af294929510f884d92b25eaa855c29fc4fb6dcaa)
|
||||
- Now opens web browser and goes to Imgur when prompts for Imgur credentials
|
||||
|
||||
## [26/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
|
||||
- Improved verbose mode
|
||||
- Minimalized the console output
|
||||
- Added quit option for auto quitting the program after process finishes
|
||||
|
||||
## [25/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
|
||||
- Added verbose mode
|
||||
- Stylized the console output
|
||||
|
||||
## [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
|
||||
- Added OP's name to file names (backwards compatible)
|
||||
- Deleted # char from file names (backwards compatible)
|
||||
- Improved exception handling
|
||||
|
||||
## [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
|
||||
- Splited download() function
|
||||
- Added erome support
|
||||
- Removed exclude feature
|
||||
- Bug fixes
|
||||
|
||||
## [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/6e7463005051026ad64006a8580b0b5dc9536b8c)
|
||||
- Put log files in a folder named "LOG_FILES"
|
||||
- Fixed the bug that makes multireddit mode unusable
|
||||
|
||||
## [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
|
||||
- Added exclude mode
|
||||
|
||||
## [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7548a010198fb693841ca03654d2c9bdf5742139)
|
||||
- "0" input for no limit
|
||||
- Fixed the bug that recognizes none image direct links as image links
|
||||
|
||||
## [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
|
||||
- Added v.redd.it support
|
||||
- Added custom exception descriptions to FAILED.json file
|
||||
- Fixed the bug that prevents downloading some gfycat URLs
|
||||
|
||||
## [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
|
||||
- Changed config.json file's path to home directory
|
||||
|
||||
## [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
|
||||
- Added binaries for Windows and Linux
|
||||
- Wait on KeyboardInterrupt
|
||||
- Accept multiple subreddit input
|
||||
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
|
||||
|
||||
## [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
|
||||
- Improvements on UX and UI
|
||||
- Added logging errors to CONSOLE_LOG.txt
|
||||
- Using current directory if directory has not been given yet.
|
||||
|
||||
## [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
|
||||
- Added support for *self* post
|
||||
- Now getting posts is quicker
|
||||
101
docs/COMMAND_LINE_ARGUMENTS.md
Normal file
101
docs/COMMAND_LINE_ARGUMENTS.md
Normal file
@@ -0,0 +1,101 @@
|
||||
# Using command-line arguments
|
||||
|
||||
See **[compiling from source](INTERPRET_FROM_SOURCE.md)** page first unless you are using an executable file. If you are using an executable file, see [using terminal](INTERPRET_FROM_SOURCE.md#using-terminal) and come back.
|
||||
|
||||
***Use*** `.\bulk-downloader-for-reddit.exe` ***or*** `./bulk-downloader-for-reddit` ***if you are using the executable***.
|
||||
```console
|
||||
$ python script.py --help
|
||||
usage: script.py [-h] [--directory DIRECTORY] [--NoDownload] [--verbose]
|
||||
[--quit] [--link link] [--saved] [--submitted] [--upvoted]
|
||||
[--log LOG FILE] [--subreddit SUBREDDIT [SUBREDDIT ...]]
|
||||
[--multireddit MULTIREDDIT] [--user redditor]
|
||||
[--search query] [--sort SORT TYPE] [--limit Limit]
|
||||
[--time TIME_LIMIT]
|
||||
|
||||
This program downloads media from reddit posts
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--directory DIRECTORY, -d DIRECTORY
|
||||
Specifies the directory where posts will be downloaded
|
||||
to
|
||||
--NoDownload Just gets the posts and stores them in a file for
|
||||
downloading later
|
||||
--verbose, -v Verbose Mode
|
||||
--quit, -q Auto quit afer the process finishes
|
||||
--link link, -l link Get posts from link
|
||||
--saved Triggers saved mode
|
||||
--submitted Gets posts of --user
|
||||
--upvoted Gets upvoted posts of --user
|
||||
--log LOG FILE Takes a log file which created by itself (json files),
|
||||
reads posts and tries downloading them again.
|
||||
--subreddit SUBREDDIT [SUBREDDIT ...]
|
||||
Triggers subreddit mode and takes subreddit's name
|
||||
without r/. use "frontpage" for frontpage
|
||||
--multireddit MULTIREDDIT
|
||||
Triggers multireddit mode and takes multireddit's name
|
||||
without m/
|
||||
--user redditor reddit username if needed. use "me" for current user
|
||||
--search query Searches for given query in given subreddits
|
||||
--sort SORT TYPE Either hot, top, new, controversial, rising or
|
||||
relevance default: hot
|
||||
--limit Limit default: unlimited
|
||||
--time TIME_LIMIT Either hour, day, week, month, year or all. default:
|
||||
all
|
||||
```
|
||||
|
||||
# Examples
|
||||
|
||||
- **Use `python3` instead of `python` if you are using *MacOS* or *Linux***
|
||||
|
||||
```console
|
||||
python script.py
|
||||
```
|
||||
|
||||
```console
|
||||
.\bulk-downloader-for-reddit.exe
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py
|
||||
```
|
||||
|
||||
```console
|
||||
.\bulk-downloader-for-reddit.exe -- directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
|
||||
```
|
||||
|
||||
```console
|
||||
./bulk-downloader-for-reddit --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py --directory .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month"
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py --directory .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/"
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py --directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py --directory .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py --directory .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
|
||||
```
|
||||
|
||||
```console
|
||||
python script.py --directory C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json
|
||||
```
|
||||
|
||||
# FAQ
|
||||
## I can't startup the script no matter what.
|
||||
See **[finding the correct keyword for Python](INTERPRET_FROM_SOURCE.md#finding-the-correct-keyword-for-python)**
|
||||
40
docs/INTERPRET_FROM_SOURCE.md
Normal file
40
docs/INTERPRET_FROM_SOURCE.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Interpret from source code
|
||||
## Requirements
|
||||
### Python 3 Interpreter
|
||||
- This program is designed to work best on **Python 3.6.5** and this version of Python 3 is suggested. See if it is already installed, [here](#finding-the-correct-keyword-for-python).
|
||||
- If not, download the matching release for your platform [here](https://www.python.org/downloads/) and install it. If you are a *Windows* user, selecting **Add Python 3 to PATH** option when installing the software is mandatory.
|
||||
|
||||
## Using terminal
|
||||
### To open it...
|
||||
- **on Windows**: Press **Shift+Right Click**, select **Open Powershell window here** or **Open Command Prompt window here**
|
||||
|
||||
- **on Linux**: Right-click in a folder and select **Open Terminal** or press **Ctrl+Alt+T**.
|
||||
|
||||
- **on MacOS**: Look for an app called **Terminal**.
|
||||
|
||||
### Navigating to the directory where script is downloaded
|
||||
Go inside the folder where script.py is located. If you are not familiar with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything)
|
||||
|
||||
## Finding the correct keyword for Python
|
||||
Enter these lines to terminal window until it prints out the version you have downloaded and installed:
|
||||
|
||||
- `python --version`
|
||||
- `python3 --version`
|
||||
- `python3.7 --version`
|
||||
- `python3.6 --version`
|
||||
- `py --version`
|
||||
- `py -3 --version`
|
||||
- `py -3.6 --version`
|
||||
- `py -3.7 --version`
|
||||
|
||||
Once it does, your keyword is without the `--version` part.
|
||||
|
||||
## Installing dependencies
|
||||
Enter the line below to terminal window when you are in the directory where script.py is, use your keyword for Python:
|
||||
```console
|
||||
python -m pip install -r requirements.txt
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Now, you can go to [Using command-line arguments](COMMAND_LINE_ARGUMENTS.md)
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
bs4
|
||||
requests
|
||||
praw
|
||||
imgurpython
|
||||
675
script.py
675
script.py
@@ -6,30 +6,27 @@ saved posts from a reddit account. It is written in Python 3.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import webbrowser
|
||||
from io import StringIO
|
||||
from pathlib import Path, PurePath
|
||||
|
||||
from src.downloader import Direct, Gfycat, Imgur, Self
|
||||
from src.downloader import Direct, Erome, Gfycat, Imgur, Self
|
||||
from src.errors import *
|
||||
from src.parser import LinkDesigner
|
||||
from src.searcher import getPosts
|
||||
from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
|
||||
printToFile)
|
||||
from src.errors import *
|
||||
|
||||
__author__ = "Ali Parlakci"
|
||||
__license__ = "GPL"
|
||||
__version__ = "1.0.0"
|
||||
__version__ = "1.6.5"
|
||||
__maintainer__ = "Ali Parlakci"
|
||||
__email__ = "parlakciali@gmail.com"
|
||||
|
||||
def debug(*post):
|
||||
GLOBAL.config = getConfig('config.json')
|
||||
GLOBAL.directory = Path(".\\debug\\")
|
||||
download([*post])
|
||||
quit()
|
||||
|
||||
def getConfig(configFileName):
|
||||
"""Read credentials from config.json file"""
|
||||
|
||||
@@ -42,20 +39,34 @@ def getConfig(configFileName):
|
||||
if "reddit_refresh_token" in content:
|
||||
if content["reddit_refresh_token"] == "":
|
||||
FILE.delete("reddit_refresh_token")
|
||||
|
||||
if not all(False if content.get(key,"") == "" else True for key in keys):
|
||||
print(
|
||||
"Go to this URL and fill the form: " \
|
||||
"https://api.imgur.com/oauth2/addclient\n" \
|
||||
"Enter the client id and client secret here:"
|
||||
)
|
||||
webbrowser.open("https://api.imgur.com/oauth2/addclient",new=2)
|
||||
|
||||
for key in keys:
|
||||
try:
|
||||
if content[key] == "":
|
||||
raise KeyError
|
||||
except KeyError:
|
||||
print(key,": ")
|
||||
FILE.add({key:input()})
|
||||
FILE.add({key:input(" "+key+": ")})
|
||||
return jsonFile(configFileName).read()
|
||||
|
||||
else:
|
||||
FILE = jsonFile(configFileName)
|
||||
configDictionary = {}
|
||||
print(
|
||||
"Go to this URL and fill the form: " \
|
||||
"https://api.imgur.com/oauth2/addclient\n" \
|
||||
"Enter the client id and client secret here:"
|
||||
)
|
||||
webbrowser.open("https://api.imgur.com/oauth2/addclient",new=2)
|
||||
for key in keys:
|
||||
configDictionary[key] = input(key + ": ")
|
||||
configDictionary[key] = input(" "+key+": ")
|
||||
FILE.add(configDictionary)
|
||||
return FILE.read()
|
||||
|
||||
@@ -66,10 +77,26 @@ def parseArguments(arguments=[]):
|
||||
description="This program downloads " \
|
||||
"media from reddit " \
|
||||
"posts")
|
||||
parser.add_argument("directory",
|
||||
parser.add_argument("--directory","-d",
|
||||
help="Specifies the directory where posts will be " \
|
||||
"downloaded to",
|
||||
metavar="DIRECTORY")
|
||||
|
||||
parser.add_argument("--NoDownload",
|
||||
help="Just gets the posts and stores them in a file" \
|
||||
" for downloading later",
|
||||
action="store_true",
|
||||
default=False)
|
||||
|
||||
parser.add_argument("--verbose","-v",
|
||||
help="Verbose Mode",
|
||||
action="store_true",
|
||||
default=False)
|
||||
|
||||
parser.add_argument("--quit","-q",
|
||||
help="Auto quit afer the process finishes",
|
||||
action="store_true",
|
||||
default=False)
|
||||
|
||||
parser.add_argument("--link","-l",
|
||||
help="Get posts from link",
|
||||
@@ -88,7 +115,9 @@ def parseArguments(arguments=[]):
|
||||
help="Gets upvoted posts of --user")
|
||||
|
||||
parser.add_argument("--log",
|
||||
help="Triggers log read mode and takes a log file",
|
||||
help="Takes a log file which created by itself " \
|
||||
"(json files), reads posts and tries downloadin" \
|
||||
"g them again.",
|
||||
# type=argparse.FileType('r'),
|
||||
metavar="LOG FILE")
|
||||
|
||||
@@ -131,7 +160,6 @@ def parseArguments(arguments=[]):
|
||||
parser.add_argument("--limit",
|
||||
help="default: unlimited",
|
||||
metavar="Limit",
|
||||
default=None,
|
||||
type=int)
|
||||
|
||||
parser.add_argument("--time",
|
||||
@@ -140,12 +168,6 @@ def parseArguments(arguments=[]):
|
||||
choices=["all","hour","day","week","month","year"],
|
||||
metavar="TIME_LIMIT",
|
||||
type=str)
|
||||
|
||||
parser.add_argument("--NoDownload",
|
||||
help="Just gets the posts and store them in a file" \
|
||||
" for downloading later",
|
||||
action="store_true",
|
||||
default=False)
|
||||
|
||||
if arguments == []:
|
||||
return parser.parse_args()
|
||||
@@ -157,88 +179,189 @@ def checkConflicts():
|
||||
if not, raise errors
|
||||
"""
|
||||
|
||||
if GLOBAL.arguments.saved is False:
|
||||
saved = 0
|
||||
else:
|
||||
saved = 1
|
||||
|
||||
if GLOBAL.arguments.subreddit is None:
|
||||
subreddit = 0
|
||||
else:
|
||||
subreddit = 1
|
||||
|
||||
if GLOBAL.arguments.submitted is False:
|
||||
submitted = 0
|
||||
else:
|
||||
submitted = 1
|
||||
|
||||
if GLOBAL.arguments.search is None:
|
||||
search = 0
|
||||
else:
|
||||
search = 1
|
||||
|
||||
if GLOBAL.arguments.log is None:
|
||||
log = 0
|
||||
else:
|
||||
log = 1
|
||||
|
||||
if GLOBAL.arguments.link is None:
|
||||
link = 0
|
||||
else:
|
||||
link = 1
|
||||
|
||||
if GLOBAL.arguments.user is None:
|
||||
user = 0
|
||||
else:
|
||||
user = 1
|
||||
|
||||
if GLOBAL.arguments.upvoted is False:
|
||||
upvoted = 0
|
||||
else:
|
||||
upvoted = 1
|
||||
search = 1 if GLOBAL.arguments.search else 0
|
||||
|
||||
if not saved+subreddit+log+link+submitted+upvoted == 1:
|
||||
print("Program mode is invalid")
|
||||
quit()
|
||||
modes = [
|
||||
"saved","subreddit","submitted","log","link","upvoted","multireddit"
|
||||
]
|
||||
|
||||
values = {
|
||||
x: 0 if getattr(GLOBAL.arguments,x) is None or \
|
||||
getattr(GLOBAL.arguments,x) is False \
|
||||
else 1 \
|
||||
for x in modes
|
||||
}
|
||||
|
||||
if not sum(values[x] for x in values) == 1:
|
||||
raise ProgramModeError("Invalid program mode")
|
||||
|
||||
if search+subreddit == 2:
|
||||
print("You cannot search in your saved posts")
|
||||
quit()
|
||||
if search+values["saved"] == 2:
|
||||
raise SearchModeError("You cannot search in your saved posts")
|
||||
|
||||
if search+submitted == 2:
|
||||
print("You cannot search in submitted posts")
|
||||
quit()
|
||||
if search+values["submitted"] == 2:
|
||||
raise SearchModeError("You cannot search in submitted posts")
|
||||
|
||||
if search+upvoted == 2:
|
||||
print("You cannot search in upvoted posts")
|
||||
quit()
|
||||
if search+values["upvoted"] == 2:
|
||||
raise SearchModeError("You cannot search in upvoted posts")
|
||||
|
||||
if upvoted+submitted == 1 and user == 0:
|
||||
print("No redditor name given")
|
||||
quit()
|
||||
if search+values["log"] == 2:
|
||||
raise SearchModeError("You cannot search in log files")
|
||||
|
||||
def postFromLog(fileName):
|
||||
"""Analyze a log file and return a list of dictionaries containing
|
||||
submissions
|
||||
"""
|
||||
if Path.is_file(Path(fileName)):
|
||||
content = jsonFile(fileName).read()
|
||||
else:
|
||||
print("File not found")
|
||||
quit()
|
||||
if values["upvoted"]+values["submitted"] == 1 and user == 0:
|
||||
raise RedditorNameError("No redditor name given")
|
||||
|
||||
try:
|
||||
del content["HEADER"]
|
||||
except KeyError:
|
||||
pass
|
||||
class PromptUser:
|
||||
@staticmethod
|
||||
def chooseFrom(choices):
|
||||
print()
|
||||
choicesByIndex = list(str(x) for x in range(len(choices)+1))
|
||||
for i in range(len(choices)):
|
||||
print("{indent}[{order}] {mode}".format(
|
||||
indent=" "*4,order=i+1,mode=choices[i]
|
||||
))
|
||||
print(" "*4+"[0] exit\n")
|
||||
choice = input("> ")
|
||||
while not choice.lower() in choices+choicesByIndex+["exit"]:
|
||||
print("Invalid input\n")
|
||||
programModeIndex = input("> ")
|
||||
|
||||
posts = []
|
||||
if choice == "0" or choice == "exit":
|
||||
sys.exit()
|
||||
elif choice in choicesByIndex:
|
||||
return choices[int(choice)-1]
|
||||
else:
|
||||
return choice
|
||||
|
||||
def __init__(self):
|
||||
print("select program mode:")
|
||||
programModes = [
|
||||
"search","subreddit","multireddit",
|
||||
"submitted","upvoted","saved","log"
|
||||
]
|
||||
programMode = self.chooseFrom(programModes)
|
||||
|
||||
for post in content:
|
||||
if not content[post][-1]['postType'] == None:
|
||||
posts.append(content[post][-1])
|
||||
if programMode == "search":
|
||||
GLOBAL.arguments.search = input("\nquery: ")
|
||||
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
|
||||
|
||||
return posts
|
||||
print("\nselect sort type:")
|
||||
sortTypes = [
|
||||
"relevance","top","new"
|
||||
]
|
||||
sortType = self.chooseFrom(sortTypes)
|
||||
GLOBAL.arguments.sort = sortType
|
||||
|
||||
print("\nselect time filter:")
|
||||
timeFilters = [
|
||||
"hour","day","week","month","year","all"
|
||||
]
|
||||
timeFilter = self.chooseFrom(timeFilters)
|
||||
GLOBAL.arguments.time = timeFilter
|
||||
|
||||
if programMode == "subreddit":
|
||||
|
||||
subredditInput = input("(type frontpage for all subscribed subreddits,\n" \
|
||||
" use plus to seperate multi subreddits:" \
|
||||
" pics+funny+me_irl etc.)\n\n" \
|
||||
"subreddit: ")
|
||||
GLOBAL.arguments.subreddit = subredditInput
|
||||
|
||||
# while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
|
||||
# subredditInput = input("subreddit: ")
|
||||
# GLOBAL.arguments.subreddit += "+" + subredditInput
|
||||
|
||||
if " " in GLOBAL.arguments.subreddit:
|
||||
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
|
||||
|
||||
# DELETE THE PLUS (+) AT THE END
|
||||
if not subredditInput.lower() == "frontpage" \
|
||||
and GLOBAL.arguments.subreddit[-1] == "+":
|
||||
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
|
||||
|
||||
print("\nselect sort type:")
|
||||
sortTypes = [
|
||||
"hot","top","new","rising","controversial"
|
||||
]
|
||||
sortType = self.chooseFrom(sortTypes)
|
||||
GLOBAL.arguments.sort = sortType
|
||||
|
||||
if sortType in ["top","controversial"]:
|
||||
print("\nselect time filter:")
|
||||
timeFilters = [
|
||||
"hour","day","week","month","year","all"
|
||||
]
|
||||
timeFilter = self.chooseFrom(timeFilters)
|
||||
GLOBAL.arguments.time = timeFilter
|
||||
else:
|
||||
GLOBAL.arguments.time = "all"
|
||||
|
||||
elif programMode == "multireddit":
|
||||
GLOBAL.arguments.user = input("\nmultireddit owner: ")
|
||||
GLOBAL.arguments.multireddit = input("\nmultireddit: ")
|
||||
|
||||
print("\nselect sort type:")
|
||||
sortTypes = [
|
||||
"hot","top","new","rising","controversial"
|
||||
]
|
||||
sortType = self.chooseFrom(sortTypes)
|
||||
GLOBAL.arguments.sort = sortType
|
||||
|
||||
if sortType in ["top","controversial"]:
|
||||
print("\nselect time filter:")
|
||||
timeFilters = [
|
||||
"hour","day","week","month","year","all"
|
||||
]
|
||||
timeFilter = self.chooseFrom(timeFilters)
|
||||
GLOBAL.arguments.time = timeFilter
|
||||
else:
|
||||
GLOBAL.arguments.time = "all"
|
||||
|
||||
elif programMode == "submitted":
|
||||
GLOBAL.arguments.submitted = True
|
||||
GLOBAL.arguments.user = input("\nredditor: ")
|
||||
|
||||
print("\nselect sort type:")
|
||||
sortTypes = [
|
||||
"hot","top","new","controversial"
|
||||
]
|
||||
sortType = self.chooseFrom(sortTypes)
|
||||
GLOBAL.arguments.sort = sortType
|
||||
|
||||
if sortType == "top":
|
||||
print("\nselect time filter:")
|
||||
timeFilters = [
|
||||
"hour","day","week","month","year","all"
|
||||
]
|
||||
timeFilter = self.chooseFrom(timeFilters)
|
||||
GLOBAL.arguments.time = timeFilter
|
||||
else:
|
||||
GLOBAL.arguments.time = "all"
|
||||
|
||||
elif programMode == "upvoted":
|
||||
GLOBAL.arguments.upvoted = True
|
||||
GLOBAL.arguments.user = input("\nredditor: ")
|
||||
|
||||
elif programMode == "saved":
|
||||
GLOBAL.arguments.saved = True
|
||||
|
||||
elif programMode == "log":
|
||||
while True:
|
||||
GLOBAL.arguments.log = input("\nlog file directory:")
|
||||
if Path(GLOBAL.arguments.log ).is_file():
|
||||
break
|
||||
while True:
|
||||
try:
|
||||
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
|
||||
if GLOBAL.arguments.limit == 0:
|
||||
GLOBAL.arguments.limit = None
|
||||
break
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def prepareAttributes():
|
||||
ATTRIBUTES = {}
|
||||
@@ -270,10 +393,7 @@ def prepareAttributes():
|
||||
|
||||
GLOBAL.arguments.link = GLOBAL.arguments.link.strip("\"")
|
||||
|
||||
try:
|
||||
ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
|
||||
except InvalidRedditLink:
|
||||
raise InvalidRedditLink
|
||||
ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
|
||||
|
||||
if GLOBAL.arguments.search is not None:
|
||||
ATTRIBUTES["search"] = GLOBAL.arguments.search
|
||||
@@ -285,10 +405,14 @@ def prepareAttributes():
|
||||
ATTRIBUTES["time"] = GLOBAL.arguments.time
|
||||
|
||||
elif GLOBAL.arguments.subreddit is not None:
|
||||
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
|
||||
if type(GLOBAL.arguments.subreddit) == list:
|
||||
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
|
||||
|
||||
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
||||
|
||||
elif GLOBAL.arguments.multireddit is not None:
|
||||
ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
|
||||
|
||||
elif GLOBAL.arguments.saved is True:
|
||||
ATTRIBUTES["saved"] = True
|
||||
|
||||
@@ -299,80 +423,110 @@ def prepareAttributes():
|
||||
ATTRIBUTES["submitted"] = True
|
||||
|
||||
if GLOBAL.arguments.sort == "rising":
|
||||
raise InvalidSortingType
|
||||
raise InvalidSortingType("Invalid sorting type has given")
|
||||
|
||||
ATTRIBUTES["limit"] = GLOBAL.arguments.limit
|
||||
|
||||
return ATTRIBUTES
|
||||
|
||||
def postExists(POST):
|
||||
def postFromLog(fileName):
|
||||
"""Analyze a log file and return a list of dictionaries containing
|
||||
submissions
|
||||
"""
|
||||
if Path.is_file(Path(fileName)):
|
||||
content = jsonFile(fileName).read()
|
||||
else:
|
||||
print("File not found")
|
||||
sys.exit()
|
||||
|
||||
try:
|
||||
del content["HEADER"]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
posts = []
|
||||
|
||||
for post in content:
|
||||
if not content[post][-1]['postType'] == None:
|
||||
posts.append(content[post][-1])
|
||||
|
||||
return posts
|
||||
|
||||
def isPostExists(POST):
|
||||
"""Figure out a file's name and checks if the file already exists"""
|
||||
|
||||
title = nameCorrector(POST['postTitle'])
|
||||
FILENAME = title + "_" + POST['postId']
|
||||
PATH = GLOBAL.directory / POST["postSubreddit"]
|
||||
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm"]
|
||||
|
||||
for i in range(2):
|
||||
for extension in possibleExtensions:
|
||||
FILE_PATH = PATH / (FILENAME+extension)
|
||||
if FILE_PATH.exists():
|
||||
return True
|
||||
else:
|
||||
FILENAME = POST['postId']
|
||||
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
|
||||
|
||||
"""If you change the filenames, don't forget to add them here.
|
||||
Please don't remove existing ones
|
||||
"""
|
||||
for extension in possibleExtensions:
|
||||
|
||||
OLD_FILE_PATH = PATH / (
|
||||
title
|
||||
+ "_" + POST['postId']
|
||||
+ extension
|
||||
)
|
||||
FILE_PATH = PATH / (
|
||||
POST["postSubmitter"]
|
||||
+ "_" + title
|
||||
+ "_" + POST['postId']
|
||||
+ extension
|
||||
)
|
||||
|
||||
SHORT_FILE_PATH = PATH / (POST['postId']+extension)
|
||||
|
||||
if OLD_FILE_PATH.exists() or \
|
||||
FILE_PATH.exists() or \
|
||||
SHORT_FILE_PATH.exists():
|
||||
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
def download(submissions):
|
||||
"""Analyze list of submissions and call the right function
|
||||
to download each one, catch errors, update the log files
|
||||
"""
|
||||
def downloadPost(SUBMISSION):
|
||||
|
||||
subsLenght = len(submissions)
|
||||
lastRequestTime = 0
|
||||
downloadedCount = subsLenght
|
||||
duplicates = 0
|
||||
BACKUP = {}
|
||||
"""Download directory is declared here for each file"""
|
||||
directory = GLOBAL.directory / SUBMISSION['postSubreddit']
|
||||
|
||||
FAILED_FILE = createLogFile("FAILED")
|
||||
global lastRequestTime
|
||||
|
||||
for i in range(subsLenght):
|
||||
print("\n({}/{})".format(i+1,subsLenght))
|
||||
print(
|
||||
"https://reddit.com/r/{subreddit}/comments/{id}".format(
|
||||
subreddit=submissions[i]['postSubreddit'],
|
||||
id=submissions[i]['postId']
|
||||
)
|
||||
)
|
||||
downloaders = {
|
||||
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
|
||||
}
|
||||
|
||||
if postExists(submissions[i]):
|
||||
result = False
|
||||
print("It already exists")
|
||||
duplicates += 1
|
||||
downloadedCount -= 1
|
||||
continue
|
||||
print()
|
||||
if SUBMISSION['postType'] in downloaders:
|
||||
|
||||
directory = GLOBAL.directory / submissions[i]['postSubreddit']
|
||||
|
||||
if submissions[i]['postType'] == 'imgur':
|
||||
print("IMGUR",end="")
|
||||
if SUBMISSION['postType'] == "imgur":
|
||||
|
||||
while int(time.time() - lastRequestTime) <= 2:
|
||||
pass
|
||||
|
||||
credit = Imgur.get_credits()
|
||||
|
||||
IMGUR_RESET_TIME = credit['UserReset']-time.time()
|
||||
USER_RESET = ("after " \
|
||||
+ str(int(IMGUR_RESET_TIME/60)) \
|
||||
+ " Minutes " \
|
||||
+ str(int(IMGUR_RESET_TIME%60)) \
|
||||
+ " Seconds")
|
||||
+ str(int(IMGUR_RESET_TIME/60)) \
|
||||
+ " Minutes " \
|
||||
+ str(int(IMGUR_RESET_TIME%60)) \
|
||||
+ " Seconds")
|
||||
|
||||
if credit['ClientRemaining'] < 25 or credit['UserRemaining'] < 25:
|
||||
printCredit = {"noPrint":False}
|
||||
else:
|
||||
printCredit = {"noPrint":True}
|
||||
|
||||
print(
|
||||
" => Client: {} - User: {} - Reset {}".format(
|
||||
"==> Client: {} - User: {} - Reset {}\n".format(
|
||||
credit['ClientRemaining'],
|
||||
credit['UserRemaining'],
|
||||
USER_RESET
|
||||
)
|
||||
),end="",**printCredit
|
||||
)
|
||||
|
||||
if not (credit['UserRemaining'] == 0 or \
|
||||
@@ -382,165 +536,196 @@ def download(submissions):
|
||||
"""
|
||||
while int(time.time() - lastRequestTime) <= 2:
|
||||
pass
|
||||
|
||||
lastRequestTime = time.time()
|
||||
|
||||
try:
|
||||
Imgur(directory,submissions[i])
|
||||
|
||||
except FileAlreadyExistsError:
|
||||
print("It already exists")
|
||||
duplicates += 1
|
||||
downloadedCount -= 1
|
||||
|
||||
except ImgurLoginError:
|
||||
print(
|
||||
"Imgur login failed. Quitting the program "\
|
||||
"as unexpected errors might occur."
|
||||
)
|
||||
quit()
|
||||
|
||||
except Exception as exception:
|
||||
print(exception)
|
||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||
downloadedCount -= 1
|
||||
|
||||
else:
|
||||
if credit['UserRemaining'] == 0:
|
||||
KEYWORD = "user"
|
||||
elif credit['ClientRemaining'] == 0:
|
||||
KEYWORD = "client"
|
||||
|
||||
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
|
||||
FAILED_FILE.add(
|
||||
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
|
||||
submissions[i]]}
|
||||
)
|
||||
downloadedCount -= 1
|
||||
raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
|
||||
|
||||
elif submissions[i]['postType'] == 'gfycat':
|
||||
print("GFYCAT")
|
||||
try:
|
||||
Gfycat(directory,submissions[i])
|
||||
downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
|
||||
|
||||
except FileAlreadyExistsError:
|
||||
print("It already exists")
|
||||
duplicates += 1
|
||||
downloadedCount -= 1
|
||||
|
||||
except NotADownloadableLinkError as exception:
|
||||
print("Could not read the page source")
|
||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||
downloadedCount -= 1
|
||||
else:
|
||||
raise NoSuitablePost
|
||||
|
||||
except Exception as exception:
|
||||
print(exception)
|
||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||
downloadedCount -= 1
|
||||
return None
|
||||
|
||||
elif submissions[i]['postType'] == 'direct':
|
||||
print("DIRECT")
|
||||
try:
|
||||
Direct(directory,submissions[i])
|
||||
def download(submissions):
|
||||
"""Analyze list of submissions and call the right function
|
||||
to download each one, catch errors, update the log files
|
||||
"""
|
||||
|
||||
except FileAlreadyExistsError:
|
||||
print("It already exists")
|
||||
downloadedCount -= 1
|
||||
duplicates += 1
|
||||
subsLenght = len(submissions)
|
||||
global lastRequestTime
|
||||
lastRequestTime = 0
|
||||
downloadedCount = subsLenght
|
||||
duplicates = 0
|
||||
|
||||
except Exception as exception:
|
||||
print(exception)
|
||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||
downloadedCount -= 1
|
||||
FAILED_FILE = createLogFile("FAILED")
|
||||
|
||||
for i in range(subsLenght):
|
||||
print(f"\n({i+1}/{subsLenght}) – r/{submissions[i]['postSubreddit']}",
|
||||
end="")
|
||||
print(f" – {submissions[i]['postType'].upper()}",end="",noPrint=True)
|
||||
|
||||
if isPostExists(submissions[i]):
|
||||
print(f"\n" \
|
||||
f"{submissions[i]['postSubmitter']}_"
|
||||
f"{nameCorrector(submissions[i]['postTitle'])}")
|
||||
print("It already exists")
|
||||
duplicates += 1
|
||||
downloadedCount -= 1
|
||||
continue
|
||||
|
||||
try:
|
||||
downloadPost(submissions[i])
|
||||
|
||||
elif submissions[i]['postType'] == 'self':
|
||||
print("SELF")
|
||||
try:
|
||||
Self(directory,submissions[i])
|
||||
except FileAlreadyExistsError:
|
||||
print("It already exists")
|
||||
duplicates += 1
|
||||
downloadedCount -= 1
|
||||
|
||||
except FileAlreadyExistsError:
|
||||
print("It already exists")
|
||||
downloadedCount -= 1
|
||||
duplicates += 1
|
||||
except ImgurLoginError:
|
||||
print(
|
||||
"Imgur login failed. \nQuitting the program "\
|
||||
"as unexpected errors might occur."
|
||||
)
|
||||
sys.exit()
|
||||
|
||||
except Exception as exception:
|
||||
print(exception)
|
||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
||||
downloadedCount -= 1
|
||||
except ImgurLimitError as exception:
|
||||
FAILED_FILE.add({int(i+1):[
|
||||
"{class_name}: {info}".format(
|
||||
class_name=exception.__class__.__name__,info=str(exception)
|
||||
),
|
||||
submissions[i]
|
||||
]})
|
||||
downloadedCount -= 1
|
||||
|
||||
else:
|
||||
except NotADownloadableLinkError as exception:
|
||||
print(
|
||||
"{class_name}: {info}".format(
|
||||
class_name=exception.__class__.__name__,info=str(exception)
|
||||
)
|
||||
)
|
||||
FAILED_FILE.add({int(i+1):[
|
||||
"{class_name}: {info}".format(
|
||||
class_name=exception.__class__.__name__,info=str(exception)
|
||||
),
|
||||
submissions[i]
|
||||
]})
|
||||
downloadedCount -= 1
|
||||
|
||||
except NoSuitablePost:
|
||||
print("No match found, skipping...")
|
||||
downloadedCount -= 1
|
||||
|
||||
except Exception as exception:
|
||||
# raise exception
|
||||
print(
|
||||
"{class_name}: {info}".format(
|
||||
class_name=exception.__class__.__name__,info=str(exception)
|
||||
)
|
||||
)
|
||||
FAILED_FILE.add({int(i+1):[
|
||||
"{class_name}: {info}".format(
|
||||
class_name=exception.__class__.__name__,info=str(exception)
|
||||
),
|
||||
submissions[i]
|
||||
]})
|
||||
downloadedCount -= 1
|
||||
|
||||
if duplicates:
|
||||
print("\n There was {} duplicates".format(duplicates))
|
||||
print(f"\nThere {'were' if duplicates > 1 else 'was'} " \
|
||||
f"{duplicates} duplicate{'s' if duplicates > 1 else ''}")
|
||||
|
||||
if downloadedCount == 0:
|
||||
print(" Nothing downloaded :(")
|
||||
print("Nothing downloaded :(")
|
||||
|
||||
else:
|
||||
print(" Total of {} links downloaded!".format(downloadedCount))
|
||||
print(f"Total of {downloadedCount} " \
|
||||
f"link{'s' if downloadedCount > 1 else ''} downloaded!")
|
||||
|
||||
def main():
|
||||
if sys.argv[-1].endswith(__file__):
|
||||
GLOBAL.arguments = parseArguments(input("> ").split())
|
||||
else:
|
||||
GLOBAL.arguments = parseArguments()
|
||||
|
||||
VanillaPrint(
|
||||
f"\nBulk Downloader for Reddit v{__version__}\n" \
|
||||
f"Written by Ali PARLAKCI – parlakciali@gmail.com\n\n" \
|
||||
f"https://github.com/aliparlakci/bulk-downloader-for-reddit/"
|
||||
)
|
||||
GLOBAL.arguments = parseArguments()
|
||||
|
||||
if GLOBAL.arguments.directory is not None:
|
||||
GLOBAL.directory = Path(GLOBAL.arguments.directory)
|
||||
GLOBAL.directory = Path(GLOBAL.arguments.directory.strip())
|
||||
else:
|
||||
print("Invalid directory")
|
||||
quit()
|
||||
GLOBAL.config = getConfig(Path(PurePath(__file__).parent / 'config.json'))
|
||||
GLOBAL.directory = Path(input("\ndownload directory: ").strip())
|
||||
|
||||
checkConflicts()
|
||||
print("\n"," ".join(sys.argv),"\n",noPrint=True)
|
||||
print(f"Bulk Downloader for Reddit v{__version__}\n",noPrint=True
|
||||
)
|
||||
|
||||
print(sys.argv)
|
||||
try:
|
||||
checkConflicts()
|
||||
except ProgramModeError as err:
|
||||
PromptUser()
|
||||
|
||||
if not Path(GLOBAL.defaultConfigDirectory).is_dir():
|
||||
os.makedirs(GLOBAL.defaultConfigDirectory)
|
||||
|
||||
if Path("config.json").exists():
|
||||
GLOBAL.configDirectory = Path("config.json")
|
||||
else:
|
||||
GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json"
|
||||
|
||||
GLOBAL.config = getConfig(GLOBAL.configDirectory)
|
||||
|
||||
if GLOBAL.arguments.log is not None:
|
||||
logDir = Path(GLOBAL.arguments.log)
|
||||
download(postFromLog(logDir))
|
||||
quit()
|
||||
sys.exit()
|
||||
|
||||
try:
|
||||
POSTS = getPosts(prepareAttributes())
|
||||
except InsufficientPermission:
|
||||
print("You do not have permission to do that")
|
||||
quit()
|
||||
except NoMatchingSubmissionFound:
|
||||
print("No matching submission was found")
|
||||
quit()
|
||||
except NoRedditSupoort:
|
||||
print("Reddit does not support that")
|
||||
quit()
|
||||
except NoPrawSupport:
|
||||
print("PRAW does not support that")
|
||||
quit()
|
||||
except MultiredditNotFound:
|
||||
print("Multireddit not found")
|
||||
quit()
|
||||
except InvalidSortingType:
|
||||
print("Invalid sorting type has given")
|
||||
quit()
|
||||
except InvalidRedditLink:
|
||||
print("Invalid reddit link")
|
||||
quit()
|
||||
except Exception as exc:
|
||||
logging.error(sys.exc_info()[0].__name__,
|
||||
exc_info=full_exc_info(sys.exc_info()))
|
||||
print(log_stream.getvalue(),noPrint=True)
|
||||
print(exc)
|
||||
sys.exit()
|
||||
|
||||
if POSTS is None:
|
||||
print("I could not find any posts in that URL")
|
||||
quit()
|
||||
sys.exit()
|
||||
|
||||
if GLOBAL.arguments.NoDownload:
|
||||
quit()
|
||||
sys.exit()
|
||||
|
||||
else:
|
||||
download(POSTS)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
log_stream = StringIO()
|
||||
logging.basicConfig(stream=log_stream, level=logging.INFO)
|
||||
|
||||
try:
|
||||
VanillaPrint = print
|
||||
print = printToFile
|
||||
GLOBAL.RUN_TIME = time.time()
|
||||
main()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nQUITTING...")
|
||||
quit()
|
||||
if GLOBAL.directory is None:
|
||||
GLOBAL.directory = Path(".\\")
|
||||
|
||||
except Exception as exception:
|
||||
if GLOBAL.directory is None:
|
||||
GLOBAL.directory = Path(".\\")
|
||||
logging.error(sys.exc_info()[0].__name__,
|
||||
exc_info=full_exc_info(sys.exc_info()))
|
||||
print(log_stream.getvalue())
|
||||
|
||||
if not GLOBAL.arguments.quit: input("\nPress enter to quit\n")
|
||||
|
||||
50
setup.py
Normal file
50
setup.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#!C:\Users\Ali\AppData\Local\Programs\Python\Python36\python.exe
|
||||
|
||||
## python setup.py build
|
||||
import sys
|
||||
from cx_Freeze import setup, Executable
|
||||
from script import __version__
|
||||
|
||||
options = {
|
||||
"build_exe": {
|
||||
"packages":[
|
||||
"idna","imgurpython", "praw", "requests"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
if sys.platform == "win32":
|
||||
executables = [Executable(
|
||||
"script.py",
|
||||
targetName="bulk-downloader-for-reddit.exe",
|
||||
shortcutName="Bulk Downloader for Reddit",
|
||||
shortcutDir="DesktopFolder"
|
||||
)]
|
||||
|
||||
elif sys.platform == "linux":
|
||||
executables = [Executable(
|
||||
"script.py",
|
||||
targetName="bulk-downloader-for-reddit",
|
||||
shortcutName="Bulk Downloader for Reddit",
|
||||
shortcutDir="DesktopFolder"
|
||||
)]
|
||||
|
||||
setup(
|
||||
name = "Bulk Downloader for Reddit",
|
||||
version = __version__,
|
||||
description = "Bulk Downloader for Reddit",
|
||||
author = "Ali Parlakci",
|
||||
author_email="parlakciali@gmail.com",
|
||||
url="https://github.com/aliparlakci/bulk-downloader-for-reddit",
|
||||
classifiers=(
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)"
|
||||
"Natural Language :: English",
|
||||
"Environment :: Console",
|
||||
"Operating System :: OS Independent",
|
||||
),
|
||||
executables = executables,
|
||||
options = options
|
||||
)
|
||||
|
||||
|
||||
@@ -1,22 +1,21 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
from html.parser import HTMLParser
|
||||
from multiprocessing import Queue
|
||||
from pathlib import Path
|
||||
from urllib.error import HTTPError
|
||||
|
||||
import imgurpython
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
|
||||
FileNameTooLong, ImgurLoginError,
|
||||
NotADownloadableLinkError)
|
||||
from src.tools import GLOBAL, nameCorrector, printToFile
|
||||
|
||||
try:
|
||||
from imgurpython import *
|
||||
except ModuleNotFoundError:
|
||||
print("\nimgurpython not found on your computer, installing...\n")
|
||||
from src.tools import install
|
||||
install("imgurpython")
|
||||
from imgurpython import *
|
||||
|
||||
VanillaPrint = print
|
||||
print = printToFile
|
||||
|
||||
@@ -26,8 +25,7 @@ def dlProgress(count, blockSize, totalSize):
|
||||
|
||||
downloadedMbs = int(count*blockSize*(10**(-6)))
|
||||
fileSize = int(totalSize*(10**(-6)))
|
||||
sys.stdout.write("\r{}Mb/{}Mb".format(downloadedMbs,fileSize))
|
||||
sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))
|
||||
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
|
||||
sys.stdout.flush()
|
||||
|
||||
def getExtension(link):
|
||||
@@ -41,7 +39,10 @@ def getExtension(link):
|
||||
if TYPE in parsed:
|
||||
return "."+parsed[-1]
|
||||
else:
|
||||
return '.jpg'
|
||||
if not "v.redd.it" in link:
|
||||
return '.jpg'
|
||||
else:
|
||||
return '.mp4'
|
||||
|
||||
def getFile(fileDir,tempDir,imageURL,indent=0):
|
||||
"""Downloads given file to given directory.
|
||||
@@ -54,6 +55,23 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
|
||||
As too long file names seem not working.
|
||||
"""
|
||||
|
||||
headers = [
|
||||
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
|
||||
"Safari/537.36 OPR/54.0.2952.64"),
|
||||
("Accept", "text/html,application/xhtml+xml,application/xml;" \
|
||||
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
||||
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
||||
("Accept-Encoding", "none"),
|
||||
("Accept-Language", "en-US,en;q=0.8"),
|
||||
("Connection", "keep-alive")
|
||||
]
|
||||
|
||||
opener = urllib.request.build_opener()
|
||||
if not "imgur" in imageURL:
|
||||
opener.addheaders = headers
|
||||
urllib.request.install_opener(opener)
|
||||
|
||||
if not (os.path.isfile(fileDir)):
|
||||
for i in range(3):
|
||||
try:
|
||||
@@ -61,16 +79,156 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
|
||||
tempDir,
|
||||
reporthook=dlProgress)
|
||||
os.rename(tempDir,fileDir)
|
||||
print(" "*indent+"Downloaded"+" "*10)
|
||||
break
|
||||
except ConnectionResetError as exception:
|
||||
print(" "*indent + str(exception))
|
||||
print(" "*indent + "Trying again\n")
|
||||
except FileNotFoundError:
|
||||
raise FileNameTooLong
|
||||
else:
|
||||
print(" "*indent+"Downloaded"+" "*10)
|
||||
break
|
||||
else:
|
||||
raise FileAlreadyExistsError
|
||||
|
||||
class Erome:
|
||||
def __init__(self,directory,post):
|
||||
try:
|
||||
IMAGES = self.getLinks(post['postURL'])
|
||||
except urllib.error.HTTPError:
|
||||
raise NotADownloadableLinkError("Not a downloadable link")
|
||||
|
||||
imagesLenght = len(IMAGES)
|
||||
howManyDownloaded = imagesLenght
|
||||
duplicates = 0
|
||||
|
||||
if imagesLenght == 1:
|
||||
|
||||
extension = getExtension(IMAGES[0])
|
||||
|
||||
"""Filenames are declared here"""
|
||||
|
||||
title = nameCorrector(post['postTitle'])
|
||||
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
|
||||
|
||||
fileDir = directory / (
|
||||
post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
|
||||
)
|
||||
tempDir = directory / (
|
||||
post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
|
||||
)
|
||||
|
||||
imageURL = IMAGES[0]
|
||||
if 'https://' not in imageURL and 'http://' not in imageURL:
|
||||
imageURL = "https://" + imageURL
|
||||
|
||||
try:
|
||||
getFile(fileDir,tempDir,imageURL)
|
||||
except FileNameTooLong:
|
||||
fileDir = directory / (post['postId'] + extension)
|
||||
tempDir = directory / (post['postId'] + '.tmp')
|
||||
getFile(fileDir,tempDir,imageURL)
|
||||
|
||||
else:
|
||||
title = nameCorrector(post['postTitle'])
|
||||
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
|
||||
|
||||
folderDir = directory / (
|
||||
post["postSubmitter"] + "_" + title + "_" + post['postId']
|
||||
)
|
||||
|
||||
try:
|
||||
if not os.path.exists(folderDir):
|
||||
os.makedirs(folderDir)
|
||||
except FileNotFoundError:
|
||||
folderDir = directory / post['postId']
|
||||
os.makedirs(folderDir)
|
||||
|
||||
for i in range(imagesLenght):
|
||||
|
||||
extension = getExtension(IMAGES[i])
|
||||
|
||||
fileName = str(i+1)
|
||||
imageURL = IMAGES[i]
|
||||
if 'https://' not in imageURL and 'http://' not in imageURL:
|
||||
imageURL = "https://" + imageURL
|
||||
|
||||
fileDir = folderDir / (fileName + extension)
|
||||
tempDir = folderDir / (fileName + ".tmp")
|
||||
|
||||
print(" ({}/{})".format(i+1,imagesLenght))
|
||||
print(" {}".format(fileName+extension))
|
||||
|
||||
try:
|
||||
getFile(fileDir,tempDir,imageURL,indent=2)
|
||||
print()
|
||||
except FileAlreadyExistsError:
|
||||
print(" The file already exists" + " "*10,end="\n\n")
|
||||
duplicates += 1
|
||||
howManyDownloaded -= 1
|
||||
|
||||
except Exception as exception:
|
||||
# raise exception
|
||||
print("\n Could not get the file")
|
||||
print(
|
||||
" "
|
||||
+ "{class_name}: {info}".format(
|
||||
class_name=exception.__class__.__name__,
|
||||
info=str(exception)
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
exceptionType = exception
|
||||
howManyDownloaded -= 1
|
||||
|
||||
if duplicates == imagesLenght:
|
||||
raise FileAlreadyExistsError
|
||||
elif howManyDownloaded + duplicates < imagesLenght:
|
||||
raise AlbumNotDownloadedCompletely(
|
||||
"Album Not Downloaded Completely"
|
||||
)
|
||||
|
||||
def getLinks(self,url,lineNumber=129):
|
||||
|
||||
content = []
|
||||
lineNumber = None
|
||||
|
||||
class EromeParser(HTMLParser):
|
||||
tag = None
|
||||
def handle_starttag(self, tag, attrs):
|
||||
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
|
||||
|
||||
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
||||
|
||||
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
|
||||
for i in range(len(pageSource)):
|
||||
obj = EromeParser()
|
||||
obj.feed(pageSource[i])
|
||||
tag = obj.tag
|
||||
|
||||
if tag is not None:
|
||||
if "div" in tag:
|
||||
if "id" in tag["div"]:
|
||||
if tag["div"]["id"] == "album":
|
||||
lineNumber = i
|
||||
break
|
||||
|
||||
for line in pageSource[lineNumber:]:
|
||||
obj = EromeParser()
|
||||
obj.feed(line)
|
||||
tag = obj.tag
|
||||
if tag is not None:
|
||||
if "img" in tag:
|
||||
if "class" in tag["img"]:
|
||||
if tag["img"]["class"]=="img-front":
|
||||
content.append(tag["img"]["src"])
|
||||
elif "source" in tag:
|
||||
content.append(tag["source"]["src"])
|
||||
|
||||
return [
|
||||
link for link in content \
|
||||
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
|
||||
]
|
||||
|
||||
class Imgur:
|
||||
def __init__(self,directory,post):
|
||||
self.imgurClient = self.initImgur()
|
||||
@@ -88,15 +246,27 @@ class Imgur:
|
||||
post['mediaURL'] = content['object'].link
|
||||
|
||||
post['postExt'] = getExtension(post['mediaURL'])
|
||||
|
||||
|
||||
title = nameCorrector(post['postTitle'])
|
||||
print(title+"_" +post['postId']+post['postExt'])
|
||||
|
||||
fileDir = title + "_" + post['postId'] + post['postExt']
|
||||
fileDir = directory / fileDir
|
||||
"""Filenames are declared here"""
|
||||
|
||||
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])
|
||||
|
||||
fileDir = directory / (
|
||||
post["postSubmitter"]
|
||||
+ "_" + title
|
||||
+ "_" + post['postId']
|
||||
+ post['postExt']
|
||||
)
|
||||
|
||||
tempDir = directory / (
|
||||
post["postSubmitter"]
|
||||
+ "_" + title
|
||||
+ "_" + post['postId']
|
||||
+ ".tmp"
|
||||
)
|
||||
|
||||
tempDir = title + "_" + post['postId'] + '.tmp'
|
||||
tempDir = directory / tempDir
|
||||
try:
|
||||
getFile(fileDir,tempDir,post['mediaURL'])
|
||||
except FileNameTooLong:
|
||||
@@ -112,9 +282,11 @@ class Imgur:
|
||||
duplicates = 0
|
||||
|
||||
title = nameCorrector(post['postTitle'])
|
||||
print(title+"_"+post['postId'],end="\n\n")
|
||||
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
|
||||
|
||||
folderDir = directory / (title+"_"+post['postId'])
|
||||
folderDir = directory / (
|
||||
post["postSubmitter"] + "_" + title + "_" + post['postId']
|
||||
)
|
||||
|
||||
try:
|
||||
if not os.path.exists(folderDir):
|
||||
@@ -137,6 +309,8 @@ class Imgur:
|
||||
+ "_"
|
||||
+ images[i]['id'])
|
||||
|
||||
"""Filenames are declared here"""
|
||||
|
||||
fileDir = folderDir / (fileName + images[i]['Ext'])
|
||||
tempDir = folderDir / (fileName + ".tmp")
|
||||
|
||||
@@ -167,21 +341,30 @@ class Imgur:
|
||||
|
||||
except Exception as exception:
|
||||
print("\n Could not get the file")
|
||||
print(" " + str(exception) + "\n")
|
||||
print(
|
||||
" "
|
||||
+ "{class_name}: {info}".format(
|
||||
class_name=exception.__class__.__name__,
|
||||
info=str(exception)
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
exceptionType = exception
|
||||
howManyDownloaded -= 1
|
||||
|
||||
if duplicates == imagesLenght:
|
||||
raise FileAlreadyExistsError
|
||||
elif howManyDownloaded < imagesLenght:
|
||||
raise AlbumNotDownloadedCompletely
|
||||
elif howManyDownloaded + duplicates < imagesLenght:
|
||||
raise AlbumNotDownloadedCompletely(
|
||||
"Album Not Downloaded Completely"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def initImgur():
|
||||
"""Initialize imgur api"""
|
||||
|
||||
config = GLOBAL.config
|
||||
return ImgurClient(
|
||||
return imgurpython.ImgurClient(
|
||||
config['imgur_client_id'],
|
||||
config['imgur_client_secret']
|
||||
)
|
||||
@@ -213,7 +396,7 @@ class Imgur:
|
||||
elif identity['type'] == 'album':
|
||||
return {'object':self.imgurClient.get_album(identity['id']),
|
||||
'type':'album'}
|
||||
|
||||
@staticmethod
|
||||
def get_credits():
|
||||
return Imgur.initImgur().get_credits()
|
||||
|
||||
@@ -222,18 +405,28 @@ class Gfycat:
|
||||
try:
|
||||
POST['mediaURL'] = self.getLink(POST['postURL'])
|
||||
except IndexError:
|
||||
raise NotADownloadableLinkError
|
||||
raise NotADownloadableLinkError("Could not read the page source")
|
||||
except Exception as exception:
|
||||
raise NotADownloadableLinkError
|
||||
#debug
|
||||
raise exception
|
||||
raise NotADownloadableLinkError("Could not read the page source")
|
||||
|
||||
POST['postExt'] = getExtension(POST['mediaURL'])
|
||||
|
||||
|
||||
if not os.path.exists(directory): os.makedirs(directory)
|
||||
title = nameCorrector(POST['postTitle'])
|
||||
print(title+"_"+POST['postId']+POST['postExt'])
|
||||
|
||||
fileDir = directory / (title+"_"+POST['postId']+POST['postExt'])
|
||||
tempDir = directory / (title+"_"+POST['postId']+".tmp")
|
||||
"""Filenames are declared here"""
|
||||
|
||||
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
|
||||
|
||||
fileDir = directory / (
|
||||
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
|
||||
)
|
||||
tempDir = directory / (
|
||||
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
|
||||
)
|
||||
|
||||
try:
|
||||
getFile(fileDir,tempDir,POST['mediaURL'])
|
||||
except FileNameTooLong:
|
||||
@@ -253,40 +446,35 @@ class Gfycat:
|
||||
if url[-1:] == '/':
|
||||
url = url[:-1]
|
||||
|
||||
if 'gifs' in url:
|
||||
url = "https://gfycat.com/" + url.split('/')[-1]
|
||||
url = "https://gfycat.com/" + url.split('/')[-1]
|
||||
|
||||
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
||||
pageSource = (urllib.request.urlopen(url).read().decode())
|
||||
|
||||
theLine = pageSource[lineNumber]
|
||||
lenght = len(query)
|
||||
link = []
|
||||
soup = BeautifulSoup(pageSource, "html.parser")
|
||||
attributes = {"data-react-helmet":"true","type":"application/ld+json"}
|
||||
content = soup.find("script",attrs=attributes)
|
||||
|
||||
for i in range(len(theLine)):
|
||||
if theLine[i:i+lenght] == query:
|
||||
cursor = (i+lenght)+1
|
||||
while not theLine[cursor] == '"':
|
||||
link.append(theLine[cursor])
|
||||
cursor += 1
|
||||
break
|
||||
if content is None:
|
||||
raise NotADownloadableLinkError("Could not read the page source")
|
||||
|
||||
if "".join(link) == "":
|
||||
raise NotADownloadableLinkError
|
||||
|
||||
return "".join(link)
|
||||
return json.loads(content.text)["video"]["contentUrl"]
|
||||
|
||||
class Direct:
|
||||
def __init__(self,directory,POST):
|
||||
POST['postExt'] = getExtension(POST['postURL'])
|
||||
if not os.path.exists(directory): os.makedirs(directory)
|
||||
title = nameCorrector(POST['postTitle'])
|
||||
print(title+"_"+POST['postId']+POST['postExt'])
|
||||
|
||||
fileDir = title+"_"+POST['postId']+POST['postExt']
|
||||
fileDir = directory / fileDir
|
||||
"""Filenames are declared here"""
|
||||
|
||||
tempDir = title+"_"+POST['postId']+".tmp"
|
||||
tempDir = directory / tempDir
|
||||
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
|
||||
|
||||
fileDir = directory / (
|
||||
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
|
||||
)
|
||||
tempDir = directory / (
|
||||
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
|
||||
)
|
||||
|
||||
try:
|
||||
getFile(fileDir,tempDir,POST['postURL'])
|
||||
@@ -301,10 +489,14 @@ class Self:
|
||||
if not os.path.exists(directory): os.makedirs(directory)
|
||||
|
||||
title = nameCorrector(post['postTitle'])
|
||||
print(title+"_"+post['postId']+".md")
|
||||
|
||||
fileDir = title+"_"+post['postId']+".md"
|
||||
fileDir = directory / fileDir
|
||||
"""Filenames are declared here"""
|
||||
|
||||
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")
|
||||
|
||||
fileDir = directory / (
|
||||
post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"
|
||||
)
|
||||
|
||||
if Path.is_file(fileDir):
|
||||
raise FileAlreadyExistsError
|
||||
@@ -319,7 +511,8 @@ class Self:
|
||||
|
||||
@staticmethod
|
||||
def writeToFile(directory,post):
|
||||
|
||||
|
||||
"""Self posts are formatted here"""
|
||||
content = ("## ["
|
||||
+ post["postTitle"]
|
||||
+ "]("
|
||||
@@ -327,7 +520,11 @@ class Self:
|
||||
+ ")\n"
|
||||
+ post["postContent"]
|
||||
+ "\n\n---\n\n"
|
||||
+ "submitted by [u/"
|
||||
+ "submitted to [r/"
|
||||
+ post["postSubreddit"]
|
||||
+ "](https://www.reddit.com/r/"
|
||||
+ post["postSubreddit"]
|
||||
+ ") by [u/"
|
||||
+ post["postSubmitter"]
|
||||
+ "](https://www.reddit.com/user/"
|
||||
+ post["postSubmitter"]
|
||||
|
||||
@@ -1,3 +1,36 @@
|
||||
import sys
|
||||
|
||||
class FauxTb(object):
|
||||
def __init__(self, tb_frame, tb_lineno, tb_next):
|
||||
self.tb_frame = tb_frame
|
||||
self.tb_lineno = tb_lineno
|
||||
self.tb_next = tb_next
|
||||
|
||||
def current_stack(skip=0):
|
||||
try: 1/0
|
||||
except ZeroDivisionError:
|
||||
f = sys.exc_info()[2].tb_frame
|
||||
for i in range(skip + 2):
|
||||
f = f.f_back
|
||||
lst = []
|
||||
while f is not None:
|
||||
lst.append((f, f.f_lineno))
|
||||
f = f.f_back
|
||||
return lst
|
||||
|
||||
def extend_traceback(tb, stack):
|
||||
"""Extend traceback with stack info."""
|
||||
head = tb
|
||||
for tb_frame, tb_lineno in stack:
|
||||
head = FauxTb(tb_frame, tb_lineno, head)
|
||||
return head
|
||||
|
||||
def full_exc_info(exc_info):
|
||||
"""Like sys.exc_info, but includes the full traceback."""
|
||||
t, v, tb = exc_info
|
||||
full_tb = extend_traceback(tb, current_stack(1))
|
||||
return t, v, full_tb
|
||||
|
||||
class RedditLoginFailed(Exception):
|
||||
pass
|
||||
|
||||
@@ -19,13 +52,22 @@ class FileNameTooLong(Exception):
|
||||
class InvalidRedditLink(Exception):
|
||||
pass
|
||||
|
||||
class ProgramModeError(Exception):
|
||||
pass
|
||||
|
||||
class SearchModeError(Exception):
|
||||
pass
|
||||
|
||||
class RedditorNameError(Exception):
|
||||
pass
|
||||
|
||||
class NoMatchingSubmissionFound(Exception):
|
||||
pass
|
||||
|
||||
class NoPrawSupport(Exception):
|
||||
pass
|
||||
|
||||
class NoRedditSupoort(Exception):
|
||||
class NoRedditSupport(Exception):
|
||||
pass
|
||||
|
||||
class MultiredditNotFound(Exception):
|
||||
@@ -38,4 +80,10 @@ class InvalidSortingType(Exception):
|
||||
pass
|
||||
|
||||
class FileNotFoundError(Exception):
|
||||
pass
|
||||
|
||||
class NoSuitablePost(Exception):
|
||||
pass
|
||||
|
||||
class ImgurLimitError(Exception):
|
||||
pass
|
||||
@@ -29,7 +29,7 @@ def LinkParser(LINK):
|
||||
ShortLink = False
|
||||
|
||||
if not "reddit.com" in LINK:
|
||||
raise InvalidRedditLink
|
||||
raise InvalidRedditLink("Invalid reddit link")
|
||||
|
||||
SplittedLink = LINK.split("/")
|
||||
|
||||
|
||||
303
src/searcher.py
303
src/searcher.py
@@ -1,80 +1,79 @@
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import socket
|
||||
import webbrowser
|
||||
import urllib.request
|
||||
from urllib.error import HTTPError
|
||||
|
||||
try:
|
||||
import praw
|
||||
except ModuleNotFoundError:
|
||||
print("\nPRAW not found on your computer, installing...\n")
|
||||
from src.tools import install
|
||||
install("praw")
|
||||
import praw
|
||||
|
||||
import praw
|
||||
from prawcore.exceptions import NotFound, ResponseException, Forbidden
|
||||
|
||||
from src.tools import GLOBAL, createLogFile, jsonFile, printToFile
|
||||
from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
|
||||
NoRedditSupoort, MultiredditNotFound,
|
||||
NoRedditSupport, MultiredditNotFound,
|
||||
InvalidSortingType, RedditLoginFailed,
|
||||
InsufficientPermission)
|
||||
|
||||
print = printToFile
|
||||
|
||||
class GetAuth:
|
||||
def __init__(self,redditInstance,port):
|
||||
self.redditInstance = redditInstance
|
||||
self.PORT = int(port)
|
||||
|
||||
def recieve_connection(self):
|
||||
"""Wait for and then return a connected socket..
|
||||
Opens a TCP connection on port 8080, and waits for a single client.
|
||||
"""
|
||||
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
server.bind(('localhost', self.PORT))
|
||||
server.listen(1)
|
||||
client = server.accept()[0]
|
||||
server.close()
|
||||
return client
|
||||
|
||||
def send_message(self, message):
|
||||
"""Send message to client and close the connection."""
|
||||
self.client.send('HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8'))
|
||||
self.client.close()
|
||||
|
||||
def getRefreshToken(self,*scopes):
|
||||
state = str(random.randint(0, 65000))
|
||||
url = self.redditInstance.auth.url(scopes, state, 'permanent')
|
||||
print("Go to this URL and login to reddit:\n\n",url)
|
||||
webbrowser.open(url,new=2)
|
||||
|
||||
self.client = self.recieve_connection()
|
||||
data = self.client.recv(1024).decode('utf-8')
|
||||
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
|
||||
params = {
|
||||
key: value for (key, value) in [token.split('=') \
|
||||
for token in param_tokens]
|
||||
}
|
||||
if state != params['state']:
|
||||
self.send_message(
|
||||
client, 'State mismatch. Expected: {} Received: {}'
|
||||
.format(state, params['state'])
|
||||
)
|
||||
raise RedditLoginFailed
|
||||
elif 'error' in params:
|
||||
self.send_message(client, params['error'])
|
||||
raise RedditLoginFailed
|
||||
|
||||
refresh_token = self.redditInstance.auth.authorize(params['code'])
|
||||
self.send_message(
|
||||
"<script>" \
|
||||
"alert(\"You can go back to terminal window now.\");" \
|
||||
"</script>"
|
||||
)
|
||||
return (self.redditInstance,refresh_token)
|
||||
|
||||
def beginPraw(config,user_agent = str(socket.gethostname())):
|
||||
class GetAuth:
|
||||
def __init__(self,redditInstance,port):
|
||||
self.redditInstance = redditInstance
|
||||
self.PORT = int(port)
|
||||
|
||||
def recieve_connection(self):
|
||||
"""Wait for and then return a connected socket..
|
||||
Opens a TCP connection on port 8080, and waits for a single client.
|
||||
"""
|
||||
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
server.bind(('localhost', self.PORT))
|
||||
server.listen(1)
|
||||
client = server.accept()[0]
|
||||
server.close()
|
||||
return client
|
||||
|
||||
def send_message(self, message):
|
||||
"""Send message to client and close the connection."""
|
||||
self.client.send(
|
||||
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
|
||||
)
|
||||
self.client.close()
|
||||
|
||||
def getRefreshToken(self,*scopes):
|
||||
state = str(random.randint(0, 65000))
|
||||
url = self.redditInstance.auth.url(scopes, state, 'permanent')
|
||||
print("Go to this URL and login to reddit:\n\n",url)
|
||||
webbrowser.open(url,new=2)
|
||||
|
||||
self.client = self.recieve_connection()
|
||||
data = self.client.recv(1024).decode('utf-8')
|
||||
str(data)
|
||||
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
|
||||
params = {
|
||||
key: value for (key, value) in [token.split('=') \
|
||||
for token in param_tokens]
|
||||
}
|
||||
if state != params['state']:
|
||||
self.send_message(
|
||||
client, 'State mismatch. Expected: {} Received: {}'
|
||||
.format(state, params['state'])
|
||||
)
|
||||
raise RedditLoginFailed
|
||||
elif 'error' in params:
|
||||
self.send_message(client, params['error'])
|
||||
raise RedditLoginFailed
|
||||
|
||||
refresh_token = self.redditInstance.auth.authorize(params['code'])
|
||||
self.send_message(
|
||||
"<script>" \
|
||||
"alert(\"You can go back to terminal window now.\");" \
|
||||
"</script>"
|
||||
)
|
||||
return (self.redditInstance,refresh_token)
|
||||
|
||||
"""Start reddit instance"""
|
||||
|
||||
scopes = ['identity','history','read']
|
||||
@@ -96,7 +95,8 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
|
||||
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
|
||||
reddit = authorizedInstance[0]
|
||||
refresh_token = authorizedInstance[1]
|
||||
jsonFile("config.json").add({
|
||||
jsonFile(GLOBAL.configDirectory).add({
|
||||
"reddit_username":str(reddit.user.me()),
|
||||
"reddit_refresh_token":refresh_token
|
||||
})
|
||||
else:
|
||||
@@ -105,7 +105,8 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
|
||||
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
|
||||
reddit = authorizedInstance[0]
|
||||
refresh_token = authorizedInstance[1]
|
||||
jsonFile("config.json").add({
|
||||
jsonFile(GLOBAL.configDirectory).add({
|
||||
"reddit_username":str(reddit.user.me()),
|
||||
"reddit_refresh_token":refresh_token
|
||||
})
|
||||
return reddit
|
||||
@@ -119,7 +120,7 @@ def getPosts(args):
|
||||
reddit = beginPraw(config)
|
||||
|
||||
if args["sort"] == "best":
|
||||
raise NoPrawSupport
|
||||
raise NoPrawSupport("PRAW does not support that")
|
||||
|
||||
if "subreddit" in args:
|
||||
if "search" in args:
|
||||
@@ -130,8 +131,6 @@ def getPosts(args):
|
||||
if args["user"] == "me":
|
||||
args["user"] = str(reddit.user.me())
|
||||
|
||||
print("\nGETTING POSTS\n.\n.\n.\n")
|
||||
|
||||
if not "search" in args:
|
||||
if args["sort"] == "top" or args["sort"] == "controversial":
|
||||
keyword_params = {
|
||||
@@ -150,8 +149,8 @@ def getPosts(args):
|
||||
}
|
||||
|
||||
if "search" in args:
|
||||
if args["sort"] in ["hot","rising","controversial"]:
|
||||
raise InvalidSortingType
|
||||
if GLOBAL.arguments.sort in ["hot","rising","controversial"]:
|
||||
raise InvalidSortingType("Invalid sorting type has given")
|
||||
|
||||
if "subreddit" in args:
|
||||
print (
|
||||
@@ -163,7 +162,7 @@ def getPosts(args):
|
||||
sort=args["sort"],
|
||||
subreddit=args["subreddit"],
|
||||
time=args["time"]
|
||||
).upper()
|
||||
).upper(),noPrint=True
|
||||
)
|
||||
return redditSearcher(
|
||||
reddit.subreddit(args["subreddit"]).search(
|
||||
@@ -175,23 +174,23 @@ def getPosts(args):
|
||||
)
|
||||
|
||||
elif "multireddit" in args:
|
||||
raise NoPrawSupport
|
||||
raise NoPrawSupport("PRAW does not support that")
|
||||
|
||||
elif "user" in args:
|
||||
raise NoPrawSupport
|
||||
raise NoPrawSupport("PRAW does not support that")
|
||||
|
||||
elif "saved" in args:
|
||||
raise NoRedditSupoort
|
||||
raise ("Reddit does not support that")
|
||||
|
||||
if args["sort"] == "relevance":
|
||||
raise InvalidSortingType
|
||||
raise InvalidSortingType("Invalid sorting type has given")
|
||||
|
||||
if "saved" in args:
|
||||
print(
|
||||
"saved posts\nuser:{username}\nlimit={limit}\n".format(
|
||||
username=reddit.user.me(),
|
||||
limit=args["limit"]
|
||||
).upper()
|
||||
).upper(),noPrint=True
|
||||
)
|
||||
return redditSearcher(reddit.user.me().saved(limit=args["limit"]))
|
||||
|
||||
@@ -206,7 +205,7 @@ def getPosts(args):
|
||||
sort=args["sort"],
|
||||
subreddit=args["subreddit"],
|
||||
time=args["time"]
|
||||
).upper()
|
||||
).upper(),noPrint=True
|
||||
)
|
||||
return redditSearcher(
|
||||
getattr(reddit.front,args["sort"]) (**keyword_params)
|
||||
@@ -220,7 +219,7 @@ def getPosts(args):
|
||||
sort=args["sort"],
|
||||
subreddit=args["subreddit"],
|
||||
time=args["time"]
|
||||
).upper()
|
||||
).upper(),noPrint=True
|
||||
)
|
||||
return redditSearcher(
|
||||
getattr(
|
||||
@@ -238,7 +237,7 @@ def getPosts(args):
|
||||
sort=args["sort"],
|
||||
multireddit=args["multireddit"],
|
||||
time=args["time"]
|
||||
).upper()
|
||||
).upper(),noPrint=True
|
||||
)
|
||||
try:
|
||||
return redditSearcher(
|
||||
@@ -249,11 +248,9 @@ def getPosts(args):
|
||||
) (**keyword_params)
|
||||
)
|
||||
except NotFound:
|
||||
raise MultiredditNotFound
|
||||
raise MultiredditNotFound("Multireddit not found")
|
||||
|
||||
elif "submitted" in args:
|
||||
# TODO
|
||||
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
|
||||
print (
|
||||
"submitted posts of {user}\nsort: {sort}\n" \
|
||||
"time: {time}\nlimit: {limit}\n".format(
|
||||
@@ -261,7 +258,7 @@ def getPosts(args):
|
||||
sort=args["sort"],
|
||||
user=args["user"],
|
||||
time=args["time"]
|
||||
).upper()
|
||||
).upper(),noPrint=True
|
||||
)
|
||||
return redditSearcher(
|
||||
getattr(
|
||||
@@ -270,23 +267,21 @@ def getPosts(args):
|
||||
)
|
||||
|
||||
elif "upvoted" in args:
|
||||
# TODO
|
||||
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
|
||||
print (
|
||||
"upvoted posts of {user}\nlimit: {limit}\n".format(
|
||||
user=args["user"],
|
||||
limit=args["limit"]
|
||||
).upper()
|
||||
).upper(),noPrint=True
|
||||
)
|
||||
try:
|
||||
return redditSearcher(
|
||||
reddit.redditor(args["user"]).upvoted(limit=args["limit"])
|
||||
)
|
||||
except Forbidden:
|
||||
raise InsufficientPermission
|
||||
raise InsufficientPermission("You do not have permission to do that")
|
||||
|
||||
elif "post" in args:
|
||||
print("post: {post}\n".format(post=args["post"]).upper())
|
||||
print("post: {post}\n".format(post=args["post"]).upper(),noPrint=True)
|
||||
return redditSearcher(
|
||||
reddit.submission(url=args["post"]),SINGLE_POST=True
|
||||
)
|
||||
@@ -306,6 +301,8 @@ def redditSearcher(posts,SINGLE_POST=False):
|
||||
gfycatCount = 0
|
||||
global imgurCount
|
||||
imgurCount = 0
|
||||
global eromeCount
|
||||
eromeCount = 0
|
||||
global directCount
|
||||
directCount = 0
|
||||
global selfCount
|
||||
@@ -313,6 +310,8 @@ def redditSearcher(posts,SINGLE_POST=False):
|
||||
|
||||
allPosts = {}
|
||||
|
||||
print("\nGETTING POSTS")
|
||||
if GLOBAL.arguments.verbose: print("\n")
|
||||
postsFile = createLogFile("POSTS")
|
||||
|
||||
if SINGLE_POST:
|
||||
@@ -333,50 +332,70 @@ def redditSearcher(posts,SINGLE_POST=False):
|
||||
if result is not None:
|
||||
details = result
|
||||
orderCount += 1
|
||||
printSubmission(submission,subCount,orderCount)
|
||||
if GLOBAL.arguments.verbose:
|
||||
printSubmission(submission,subCount,orderCount)
|
||||
subList.append(details)
|
||||
|
||||
postsFile.add({subCount:[details]})
|
||||
|
||||
else:
|
||||
for submission in posts:
|
||||
subCount += 1
|
||||
try:
|
||||
for submission in posts:
|
||||
subCount += 1
|
||||
|
||||
try:
|
||||
details = {'postId':submission.id,
|
||||
'postTitle':submission.title,
|
||||
'postSubmitter':str(submission.author),
|
||||
'postType':None,
|
||||
'postURL':submission.url,
|
||||
'postSubreddit':submission.subreddit.display_name}
|
||||
except AttributeError:
|
||||
continue
|
||||
if subCount % 100 == 0 and not GLOBAL.arguments.verbose:
|
||||
sys.stdout.write("• ")
|
||||
sys.stdout.flush()
|
||||
|
||||
result = checkIfMatching(submission)
|
||||
if subCount % 1000 == 0:
|
||||
sys.stdout.write("\n"+" "*14)
|
||||
sys.stdout.flush()
|
||||
|
||||
if result is not None:
|
||||
details = result
|
||||
orderCount += 1
|
||||
printSubmission(submission,subCount,orderCount)
|
||||
subList.append(details)
|
||||
try:
|
||||
details = {'postId':submission.id,
|
||||
'postTitle':submission.title,
|
||||
'postSubmitter':str(submission.author),
|
||||
'postType':None,
|
||||
'postURL':submission.url,
|
||||
'postSubreddit':submission.subreddit.display_name}
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
allPosts = {**allPosts,**details}
|
||||
result = checkIfMatching(submission)
|
||||
|
||||
if result is not None:
|
||||
details = result
|
||||
orderCount += 1
|
||||
if GLOBAL.arguments.verbose:
|
||||
printSubmission(submission,subCount,orderCount)
|
||||
subList.append(details)
|
||||
|
||||
allPosts[subCount] = [details]
|
||||
except KeyboardInterrupt:
|
||||
print("\nKeyboardInterrupt",noPrint=True)
|
||||
|
||||
postsFile.add(allPosts)
|
||||
|
||||
if not len(subList) == 0:
|
||||
print(
|
||||
"\nTotal of {} submissions found!\n"\
|
||||
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
|
||||
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
|
||||
)
|
||||
if not len(subList) == 0:
|
||||
if GLOBAL.arguments.NoDownload or GLOBAL.arguments.verbose:
|
||||
print(
|
||||
f"\n\nTotal of {len(subList)} submissions found!"
|
||||
)
|
||||
print(
|
||||
f"{gfycatCount} GFYCATs, {imgurCount} IMGURs, " \
|
||||
f"{eromeCount} EROMEs, {directCount} DIRECTs " \
|
||||
f"and {selfCount} SELF POSTS",noPrint=True
|
||||
)
|
||||
else:
|
||||
print()
|
||||
return subList
|
||||
else:
|
||||
raise NoMatchingSubmissionFound
|
||||
raise NoMatchingSubmissionFound("No matching submission was found")
|
||||
|
||||
def checkIfMatching(submission):
|
||||
global gfycatCount
|
||||
global imgurCount
|
||||
global eromeCount
|
||||
global directCount
|
||||
global selfCount
|
||||
|
||||
@@ -390,23 +409,19 @@ def checkIfMatching(submission):
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
if ('gfycat' in submission.domain) or \
|
||||
('imgur' in submission.domain):
|
||||
if 'gfycat' in submission.domain:
|
||||
details['postType'] = 'gfycat'
|
||||
gfycatCount += 1
|
||||
return details
|
||||
|
||||
if 'gfycat' in submission.domain:
|
||||
details['postType'] = 'gfycat'
|
||||
gfycatCount += 1
|
||||
return details
|
||||
elif 'imgur' in submission.domain:
|
||||
details['postType'] = 'imgur'
|
||||
imgurCount += 1
|
||||
return details
|
||||
|
||||
elif 'imgur' in submission.domain:
|
||||
details['postType'] = 'imgur'
|
||||
|
||||
imgurCount += 1
|
||||
return details
|
||||
|
||||
elif isDirectLink(submission.url):
|
||||
details['postType'] = 'direct'
|
||||
directCount += 1
|
||||
elif 'erome' in submission.domain:
|
||||
details['postType'] = 'erome'
|
||||
eromeCount += 1
|
||||
return details
|
||||
|
||||
elif submission.is_self:
|
||||
@@ -415,6 +430,14 @@ def checkIfMatching(submission):
|
||||
selfCount += 1
|
||||
return details
|
||||
|
||||
directLink = isDirectLink(submission.url)
|
||||
|
||||
if directLink is not False:
|
||||
details['postType'] = 'direct'
|
||||
details['postURL'] = directLink
|
||||
directCount += 1
|
||||
return details
|
||||
|
||||
def printSubmission(SUB,validNumber,totalNumber):
|
||||
"""Print post's link, title and media link to screen"""
|
||||
|
||||
@@ -442,7 +465,7 @@ def printSubmission(SUB,validNumber,totalNumber):
|
||||
|
||||
def isDirectLink(URL):
|
||||
"""Check if link is a direct image link.
|
||||
If so, return True,
|
||||
If so, return URL,
|
||||
if not, return False
|
||||
"""
|
||||
|
||||
@@ -451,10 +474,28 @@ def isDirectLink(URL):
|
||||
URL = URL[:-1]
|
||||
|
||||
if "i.reddituploads.com" in URL:
|
||||
return True
|
||||
return URL
|
||||
|
||||
elif "v.redd.it" in URL:
|
||||
bitrates = ["DASH_1080","DASH_720","DASH_600", \
|
||||
"DASH_480","DASH_360","DASH_240"]
|
||||
|
||||
for bitrate in bitrates:
|
||||
videoURL = URL+"/"+bitrate
|
||||
|
||||
try:
|
||||
responseCode = urllib.request.urlopen(videoURL).getcode()
|
||||
except urllib.error.HTTPError:
|
||||
responseCode = 0
|
||||
|
||||
if responseCode == 200:
|
||||
return videoURL
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
for extension in imageTypes:
|
||||
if extension in URL:
|
||||
return True
|
||||
return URL
|
||||
else:
|
||||
return False
|
||||
|
||||
41
src/tools.py
41
src/tools.py
@@ -2,20 +2,11 @@ import io
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
|
||||
try:
|
||||
from pip import main as pipmain
|
||||
except:
|
||||
from pip._internal import main as pipmain
|
||||
|
||||
from os import makedirs, path, remove
|
||||
from pathlib import Path
|
||||
|
||||
from src.errors import FileNotFoundError
|
||||
|
||||
def install(package):
|
||||
pipmain(['install', package])
|
||||
|
||||
class GLOBAL:
|
||||
"""Declare global variables"""
|
||||
|
||||
@@ -23,6 +14,8 @@ class GLOBAL:
|
||||
config = None
|
||||
arguments = None
|
||||
directory = None
|
||||
defaultConfigDirectory = Path.home() / "Bulk Downloader for Reddit"
|
||||
configDirectory = ""
|
||||
reddit_client_id = "BSyphDdxYZAgVQ"
|
||||
reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk"
|
||||
printVanilla = print
|
||||
@@ -83,8 +76,10 @@ def createLogFile(TITLE):
|
||||
put given arguments inside \"HEADER\" key
|
||||
"""
|
||||
|
||||
folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
||||
time.localtime(GLOBAL.RUN_TIME)))
|
||||
folderDirectory = GLOBAL.directory / "LOG_FILES" / \
|
||||
str(time.strftime(
|
||||
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
|
||||
))
|
||||
logFilename = TITLE.upper()+'.json'
|
||||
|
||||
if not path.exists(folderDirectory):
|
||||
@@ -96,23 +91,29 @@ def createLogFile(TITLE):
|
||||
|
||||
return FILE
|
||||
|
||||
def printToFile(*args, **kwargs):
|
||||
def printToFile(*args, noPrint=False,**kwargs):
|
||||
"""Print to both CONSOLE and
|
||||
CONSOLE LOG file in a folder time stampt in the name
|
||||
"""
|
||||
|
||||
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
||||
time.localtime(GLOBAL.RUN_TIME)))
|
||||
folderDirectory = GLOBAL.directory / TIME
|
||||
print(*args,**kwargs)
|
||||
folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
|
||||
|
||||
if not noPrint or \
|
||||
GLOBAL.arguments.verbose or \
|
||||
"file" in kwargs:
|
||||
|
||||
print(*args,**kwargs)
|
||||
|
||||
if not path.exists(folderDirectory):
|
||||
makedirs(folderDirectory)
|
||||
|
||||
with io.open(
|
||||
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
|
||||
) as FILE:
|
||||
print(*args, file=FILE, **kwargs)
|
||||
|
||||
if not "file" in kwargs:
|
||||
with io.open(
|
||||
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
|
||||
) as FILE:
|
||||
print(*args, file=FILE, **kwargs)
|
||||
|
||||
def nameCorrector(string):
|
||||
"""Swap strange characters from given string
|
||||
@@ -138,7 +139,7 @@ def nameCorrector(string):
|
||||
if len(string.split('\n')) > 1:
|
||||
string = "".join(string.split('\n'))
|
||||
|
||||
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.',]
|
||||
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.','#']
|
||||
|
||||
if any(x in string for x in BAD_CHARS):
|
||||
for char in string:
|
||||
|
||||
Reference in New Issue
Block a user