mirror of
https://github.com/KevinMidboe/bulk-downloader-for-reddit.git
synced 2026-01-20 16:15:50 +00:00
Compare commits
100 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
91d71565cc | ||
|
|
c7b7361ded | ||
|
|
cd81a6c38b | ||
|
|
1623722138 | ||
|
|
dad5669441 | ||
|
|
35d54d1eb1 | ||
|
|
394b864d86 | ||
|
|
837281c3c6 | ||
|
|
e6b648d8b3 | ||
|
|
cfaf2de7db | ||
|
|
80546d7094 | ||
|
|
139a81a0e7 | ||
|
|
9bb0a5da7f | ||
|
|
6f2273f182 | ||
|
|
b5d6165802 | ||
|
|
b98815376f | ||
|
|
d9586f99b8 | ||
|
|
76711892a2 | ||
|
|
bfea548eab | ||
|
|
2e852db4c3 | ||
|
|
8ac02e7aff | ||
|
|
5eccf4dd3d | ||
|
|
7a68ff3efa | ||
|
|
3ea2e16b62 | ||
|
|
fc6787aa28 | ||
|
|
21533bb78c | ||
|
|
1781ab8ffe | ||
|
|
821383c465 | ||
|
|
9d0fdc7521 | ||
|
|
0387dd5243 | ||
|
|
93732b0367 | ||
|
|
400ce01918 | ||
|
|
ccedac4bdc | ||
|
|
a6997898ce | ||
|
|
61632c7143 | ||
|
|
9bff3399a8 | ||
|
|
b00d185f67 | ||
|
|
7314e17125 | ||
|
|
2d334d56bf | ||
|
|
974517928f | ||
|
|
bcae177b1e | ||
|
|
229def6578 | ||
|
|
59b0376d6e | ||
|
|
cf1dc7d08c | ||
|
|
27532408c1 | ||
|
|
32647beee9 | ||
|
|
a67da461d2 | ||
|
|
8c6f593496 | ||
|
|
b60ce8a71e | ||
|
|
49920cc457 | ||
|
|
c70e7c2ebb | ||
|
|
3931dfff54 | ||
|
|
4a8c2377f9 | ||
|
|
8a18a42a9a | ||
|
|
6c2d748fbc | ||
|
|
8c966df105 | ||
|
|
2adf2c0451 | ||
|
|
3e3a2df4d1 | ||
|
|
7548a01019 | ||
|
|
2ab16608d5 | ||
|
|
e15f33b97a | ||
|
|
27211f993c | ||
|
|
87d3b294f7 | ||
|
|
8128378dcd | ||
|
|
cc93aa3012 | ||
|
|
50c4a8d6d7 | ||
|
|
5737904a54 | ||
|
|
f6eba6c5b0 | ||
|
|
41cbb58db3 | ||
|
|
c569124406 | ||
|
|
1a3836a8e1 | ||
|
|
fde6a1fac4 | ||
|
|
6bba2c4dbb | ||
|
|
a078d44236 | ||
|
|
deae0be769 | ||
|
|
3cf0203e6b | ||
|
|
0b31db0e2e | ||
|
|
d3f2b1b08e | ||
|
|
0ec4bb3008 | ||
|
|
0dbe2ed917 | ||
|
|
9f831e1b78 | ||
|
|
59012077e1 | ||
|
|
5e3c79160b | ||
|
|
1e8eaa1a8d | ||
|
|
7dbc83fdce | ||
|
|
50a77f6ba5 | ||
|
|
4f7e406cd6 | ||
|
|
ded3cece8c | ||
|
|
dd671fd738 | ||
|
|
b357dff52c | ||
|
|
32ffd3b861 | ||
|
|
02673c3950 | ||
|
|
8448e47080 | ||
|
|
39f2c73f4c | ||
|
|
fe942b4734 | ||
|
|
205617e051 | ||
|
|
b93b206a96 | ||
|
|
b84684f786 | ||
|
|
68558950ca | ||
|
|
795965f754 |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -1,4 +1,5 @@
|
|||||||
|
build/
|
||||||
|
dist/
|
||||||
|
MANIFEST
|
||||||
__pycache__/
|
__pycache__/
|
||||||
src/__pycache__/
|
src/__pycache__/
|
||||||
logs/
|
|
||||||
*.json
|
|
||||||
105
README.md
105
README.md
@@ -3,20 +3,28 @@ This program downloads imgur, gfycat and direct image and video links of saved p
|
|||||||
|
|
||||||
**PLEASE** post any issue you have with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback.
|
**PLEASE** post any issue you have with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback.
|
||||||
|
|
||||||
## What can it do?
|
## What it can do
|
||||||
### It...
|
- Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
|
||||||
- can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
|
- Sorts posts by hot, top, new and so on
|
||||||
- sorts posts by hot, top, new and so on
|
- Downloads **REDDIT** images and videos, **IMGUR** images and albums, **GFYCAT** links, **EROME** images and albums, **SELF POSTS** and any link to a **DIRECT IMAGE**
|
||||||
- downloads imgur albums, gfycat links, [self posts](#i-cant-open-the-self-posts) and any link to a direct image
|
- Skips the existing ones
|
||||||
- skips the existing ones
|
- Puts post title and OP's name in file's name
|
||||||
- puts post titles to file's name
|
- Puts every post to its subreddit's folder
|
||||||
- puts every post to its subreddit's folder
|
- Saves a reusable copy of posts' details that are found so that they can be re-downloaded again
|
||||||
- saves a reusable copy of posts' details that are found so that they can be re-downloaded again
|
- Logs failed ones in a file to so that you can try to download them later
|
||||||
- logs failed ones in a file to so that you can try to download them later
|
|
||||||
- can be run with double-clicking on Windows (but I don't recommend it)
|
|
||||||
|
|
||||||
## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
|
## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
- For **Windows** and **Linux** users, there are executable files to run easily without installing a third party program. But if you are a paranoid like me, you can **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
|
||||||
|
- In Windows, double click on bulk-downloader-for-reddit file
|
||||||
|
- In Linux, extract files to a folder and open terminal inside it. Type **`./bulk-downloader-for-reddit`**
|
||||||
|
|
||||||
|
- **MacOS** users have to **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
|
||||||
|
|
||||||
|
Script also accepts **command-line arguments**, get further information from **[`--help`](docs/COMMAND_LINE_ARGUMENTS.md)**
|
||||||
|
|
||||||
## Setting up the script
|
## Setting up the script
|
||||||
Because this is not a commercial app, you need to create an imgur developer app in order API to work.
|
Because this is not a commercial app, you need to create an imgur developer app in order API to work.
|
||||||
|
|
||||||
@@ -32,20 +40,71 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
|
|||||||
|
|
||||||
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
|
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
|
||||||
|
|
||||||
## Running the script
|
|
||||||
|
|
||||||
For **Windows** users, there is an *EXE* file to run easily.
|
|
||||||
|
|
||||||
**Linux** and **MacOS** users have to install Python 3 and run it from the *source code* through terminal.
|
|
||||||
|
|
||||||
To get further information about that and **using command-line arguments to run the script**, see **[`python script.py --help`](docs/help_page.md)**
|
|
||||||
|
|
||||||
## FAQ
|
## FAQ
|
||||||
### I can't open the self post files.
|
### What do the dots resemble when getting posts?
|
||||||
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
|
- Each dot means that 100 posts are scanned.
|
||||||
However, there is a great Chrome extension [here](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with Chrome.
|
|
||||||
|
### Getting posts is taking too long.
|
||||||
|
- You can press Ctrl+C to interrupt it and start downloading.
|
||||||
|
|
||||||
|
### How downloaded files' names are formatted?
|
||||||
|
- Images that are not belong to an album or self posts are formatted as **`[SUBMITTER NAME]_[POST TITLE]_[REDDIT ID]`**.
|
||||||
|
You can use *reddit id* to go to post's reddit page by going to link **reddit.com/[REDDIT ID]**
|
||||||
|
|
||||||
|
- An image in an imgur album is formatted as **`[ITEM NUMBER]_[IMAGE TITLE]_[IMGUR ID]`**
|
||||||
|
Similarly, you can use *imgur id* to go to image's imgur page by going to link **imgur.com/[IMGUR ID]**.
|
||||||
|
|
||||||
|
### How do I open self post files?
|
||||||
|
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
|
||||||
|
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
|
||||||
|
|
||||||
|
However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
|
||||||
|
|
||||||
|
### How can I change my credentials?
|
||||||
|
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
|
||||||
|
them, there.
|
||||||
|
|
||||||
|
## Changes on *master*
|
||||||
|
### [25/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
|
||||||
|
- Added verbose mode
|
||||||
|
- Stylize the console output
|
||||||
|
|
||||||
|
### [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
|
||||||
|
- Added OP's name to file names (backwards compatible)
|
||||||
|
- Deleted # char from file names (backwards compatible)
|
||||||
|
- Improved exception handling
|
||||||
|
|
||||||
|
### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
|
||||||
|
- Split download() function
|
||||||
|
- Added erome support
|
||||||
|
- Remove exclude feature
|
||||||
|
- Bug fix
|
||||||
|
|
||||||
|
### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
|
||||||
|
- Put log files in a folder named "LOG_FILES"
|
||||||
|
- Fixed the bug that makes multireddit mode unusable
|
||||||
|
|
||||||
|
### [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
|
||||||
|
- Added exclude mode
|
||||||
|
|
||||||
|
### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/commit/7548a010198fb693841ca03654d2c9bdf5742139)
|
||||||
|
- "0" input for no limit
|
||||||
|
- Fixed the bug that recognizes none image direct links as image links
|
||||||
|
|
||||||
|
### [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
|
||||||
|
- Added v.redd.it support
|
||||||
|
- Added custom exception descriptions to FAILED.json file
|
||||||
|
- Fixed the bug that prevents downloading some gfycat URLs
|
||||||
|
|
||||||
|
### [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
|
||||||
|
- Change config.json file's path to home directory
|
||||||
|
|
||||||
|
### [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
|
||||||
|
- Added binaries for Windows and Linux
|
||||||
|
- Wait on KeyboardInterrupt
|
||||||
|
- Accept multiple subreddit input
|
||||||
|
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
|
||||||
|
|
||||||
## Changelog
|
|
||||||
### [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
|
### [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
|
||||||
- Improvements on UX and UI
|
- Improvements on UX and UI
|
||||||
- Added logging errors to CONSOLE_LOG.txt
|
- Added logging errors to CONSOLE_LOG.txt
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
theme: jekyll-theme-minimal
|
|
||||||
show_downloads: false
|
|
||||||
#title: Bulk Downloader for Reddit
|
|
||||||
description: Code written by Ali PARLAKCI
|
|
||||||
google_analytics: UA-80780721-3
|
|
||||||
100
docs/COMMAND_LINE_ARGUMENTS.md
Normal file
100
docs/COMMAND_LINE_ARGUMENTS.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
# Using command-line arguments
|
||||||
|
|
||||||
|
See **[compiling from source](COMPILE_FROM_SOURCE.md)** page first unless you are using an executable file. If you are using an executable file, see [using terminal](COMPILE_FROM_SOURCE.md#using-terminal) and come back.
|
||||||
|
|
||||||
|
***Use*** `.\bulk-downloader-for-reddit.exe` ***or*** `./bulk-downloader-for-reddit` ***if you are using the executable***.
|
||||||
|
```console
|
||||||
|
$ python script.py --help
|
||||||
|
usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved]
|
||||||
|
[--submitted] [--upvoted] [--log LOG FILE]
|
||||||
|
[--subreddit SUBREDDIT [SUBREDDIT ...]]
|
||||||
|
[--multireddit MULTIREDDIT] [--user redditor]
|
||||||
|
[--search query] [--sort SORT TYPE] [--limit Limit]
|
||||||
|
[--time TIME_LIMIT] [--NoDownload] [--verbose]
|
||||||
|
|
||||||
|
This program downloads media from reddit posts
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
--directory DIRECTORY, -d DIRECTORY
|
||||||
|
Specifies the directory where posts will be downloaded
|
||||||
|
to
|
||||||
|
--link link, -l link Get posts from link
|
||||||
|
--saved Triggers saved mode
|
||||||
|
--submitted Gets posts of --user
|
||||||
|
--upvoted Gets upvoted posts of --user
|
||||||
|
--log LOG FILE Takes a log file which created by itself (json files),
|
||||||
|
reads posts and tries downloading them again.
|
||||||
|
--subreddit SUBREDDIT [SUBREDDIT ...]
|
||||||
|
Triggers subreddit mode and takes subreddit's name
|
||||||
|
without r/. use "frontpage" for frontpage
|
||||||
|
--multireddit MULTIREDDIT
|
||||||
|
Triggers multireddit mode and takes multireddit's name
|
||||||
|
without m/
|
||||||
|
--user redditor reddit username if needed. use "me" for current user
|
||||||
|
--search query Searches for given query in given subreddits
|
||||||
|
--sort SORT TYPE Either hot, top, new, controversial, rising or
|
||||||
|
relevance default: hot
|
||||||
|
--limit Limit default: unlimited
|
||||||
|
--time TIME_LIMIT Either hour, day, week, month, year or all. default:
|
||||||
|
all
|
||||||
|
--NoDownload Just gets the posts and store them in a file for
|
||||||
|
downloading later
|
||||||
|
--verbose, -v Verbose Mode
|
||||||
|
```
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
|
||||||
|
- **Use `python3` instead of `python` if you are using *MacOS* or *Linux***
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
.\bulk-downloader-for-reddit.exe
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
.\bulk-downloader-for-reddit.exe -- directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
./bulk-downloader-for-reddit --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py --directory .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month"
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py --directory .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/"
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py --directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py --directory .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py --directory .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
python script.py --directory C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json
|
||||||
|
```
|
||||||
|
|
||||||
|
# FAQ
|
||||||
|
## I can't startup the script no matter what.
|
||||||
|
See **[finding the correct keyword for Python](COMPILE_FROM_SOURCE.md#finding-the-correct-keyword-for-python)**
|
||||||
42
docs/COMPILE_FROM_SOURCE.md
Normal file
42
docs/COMPILE_FROM_SOURCE.md
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# Compiling from source code
|
||||||
|
## Requirements
|
||||||
|
### Python 3 Interpreter
|
||||||
|
Latest* version of **Python 3** is needed. See if it is already installed [here](#finding-the-correct-keyword-for-python). If not, download the matching release for your platform [here](https://www.python.org/downloads/) and install it. If you are a *Windows* user, selecting **Add Python 3 to PATH** option is mandatory.
|
||||||
|
|
||||||
|
\* *Use Python 3.6.5 if you encounter an issue*
|
||||||
|
## Using terminal
|
||||||
|
### To open it...
|
||||||
|
- **On Windows 8/8.1/10**: Press the File tab on **Windows Explorer**, click on **Open Windows PowerShell** or **Open Windows Command Prompt** or look for *Command Prompt* or *PowerShell* in *Start Menu*.
|
||||||
|
|
||||||
|
- **On Windows 7**: Press **WindowsKey+R**, type **cmd** and hit Enter or look for *Command Prompt* or *PowerShell* in *Start Menu*.
|
||||||
|
|
||||||
|
- **On Linux**: Right-click in a folder and select **Open Terminal** or press **Ctrl+Alt+T** or look for **Terminal** in the programs.
|
||||||
|
|
||||||
|
- **On MacOS**: Look for an app called **Terminal**.
|
||||||
|
|
||||||
|
### Navigating to the directory where script is downloaded
|
||||||
|
Go inside the folder where script.py is located. If you are not familiar with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything)
|
||||||
|
|
||||||
|
## Finding the correct keyword for Python
|
||||||
|
Enter these lines to terminal window until it prints out the version you have downloaded and installed:
|
||||||
|
|
||||||
|
- `python --version`
|
||||||
|
- `python3 --version`
|
||||||
|
- `python3.7 --version`
|
||||||
|
- `python3.6 --version`
|
||||||
|
- `py --version`
|
||||||
|
- `py -3 --version`
|
||||||
|
- `py -3.6 --version`
|
||||||
|
- `py -3.7 --version`
|
||||||
|
|
||||||
|
Once it does, your keyword is without the `--version` part.
|
||||||
|
|
||||||
|
## Installing dependencies
|
||||||
|
Enter the line below to terminal window when you are in the directory where script.py is, use your keyword for Python:
|
||||||
|
```console
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Now, you can go to [Using command-line arguments](COMMAND_LINE_ARGUMENTS.md)
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
## python script.py --help
|
|
||||||
|
|
||||||
```console
|
|
||||||
usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved]
|
|
||||||
[--submitted] [--upvoted] [--log LOG FILE]
|
|
||||||
[--subreddit SUBREDDIT [SUBREDDIT ...]]
|
|
||||||
[--multireddit MULTIREDDIT] [--user redditor]
|
|
||||||
[--search query] [--sort SORT TYPE] [--limit Limit]
|
|
||||||
[--time TIME_LIMIT] [--NoDownload]
|
|
||||||
|
|
||||||
This program downloads media from reddit posts
|
|
||||||
|
|
||||||
optional arguments:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--directory DIRECTORY
|
|
||||||
Specifies the directory where posts will be downloaded
|
|
||||||
to
|
|
||||||
--link link, -l link Get posts from link
|
|
||||||
--saved Triggers saved mode
|
|
||||||
--submitted Gets posts of --user
|
|
||||||
--upvoted Gets upvoted posts of --user
|
|
||||||
--log LOG FILE Triggers log read mode and takes a log file
|
|
||||||
--subreddit SUBREDDIT [SUBREDDIT ...]
|
|
||||||
Triggers subreddit mode and takes subreddit's name
|
|
||||||
without r/. use "frontpage" for frontpage
|
|
||||||
--multireddit MULTIREDDIT
|
|
||||||
Triggers multireddit mode and takes multireddit's name
|
|
||||||
without m/
|
|
||||||
--user redditor reddit username if needed. use "me" for current user
|
|
||||||
--search query Searches for given query in given subreddits
|
|
||||||
--sort SORT TYPE Either hot, top, new, controversial, rising or
|
|
||||||
relevance default: hot
|
|
||||||
--limit Limit default: unlimited
|
|
||||||
--time TIME_LIMIT Either hour, day, week, month, year or all. default:
|
|
||||||
all
|
|
||||||
--NoDownload Just gets the posts and store them in a file for
|
|
||||||
downloading later
|
|
||||||
```
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
- **Use `python3` instead of `python` if you are using *MacOS* or *Linux***
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py
|
|
||||||
```
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month"
|
|
||||||
```
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/"
|
|
||||||
```
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
|
|
||||||
```
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10
|
|
||||||
```
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250
|
|
||||||
```
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
|
|
||||||
```
|
|
||||||
|
|
||||||
```console
|
|
||||||
python script.py C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json
|
|
||||||
```
|
|
||||||
|
|
||||||
## FAQ
|
|
||||||
### I can't startup the script no matter what.
|
|
||||||
- Try these:
|
|
||||||
- **`python`**
|
|
||||||
- **`python3`**
|
|
||||||
- **`python3.7`**
|
|
||||||
- **`python3.6`**
|
|
||||||
- **`py -3`**
|
|
||||||
|
|
||||||
Python have real issues about naming their program
|
|
||||||
331
script.py
331
script.py
@@ -13,7 +13,7 @@ import time
|
|||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path, PurePath
|
from pathlib import Path, PurePath
|
||||||
|
|
||||||
from src.downloader import Direct, Gfycat, Imgur, Self
|
from src.downloader import Direct, Gfycat, Imgur, Self, Erome
|
||||||
from src.errors import *
|
from src.errors import *
|
||||||
from src.parser import LinkDesigner
|
from src.parser import LinkDesigner
|
||||||
from src.searcher import getPosts
|
from src.searcher import getPosts
|
||||||
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
|
|||||||
|
|
||||||
__author__ = "Ali Parlakci"
|
__author__ = "Ali Parlakci"
|
||||||
__license__ = "GPL"
|
__license__ = "GPL"
|
||||||
__version__ = "1.1.0"
|
__version__ = "1.5.2"
|
||||||
__maintainer__ = "Ali Parlakci"
|
__maintainer__ = "Ali Parlakci"
|
||||||
__email__ = "parlakciali@gmail.com"
|
__email__ = "parlakciali@gmail.com"
|
||||||
|
|
||||||
@@ -62,7 +62,7 @@ def parseArguments(arguments=[]):
|
|||||||
description="This program downloads " \
|
description="This program downloads " \
|
||||||
"media from reddit " \
|
"media from reddit " \
|
||||||
"posts")
|
"posts")
|
||||||
parser.add_argument("--directory",
|
parser.add_argument("--directory","-d",
|
||||||
help="Specifies the directory where posts will be " \
|
help="Specifies the directory where posts will be " \
|
||||||
"downloaded to",
|
"downloaded to",
|
||||||
metavar="DIRECTORY")
|
metavar="DIRECTORY")
|
||||||
@@ -143,6 +143,12 @@ def parseArguments(arguments=[]):
|
|||||||
" for downloading later",
|
" for downloading later",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
default=False)
|
default=False)
|
||||||
|
|
||||||
|
parser.add_argument("--verbose","-v",
|
||||||
|
help="Verbose Mode",
|
||||||
|
action="store_true",
|
||||||
|
default=False)
|
||||||
|
|
||||||
|
|
||||||
if arguments == []:
|
if arguments == []:
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
@@ -159,7 +165,10 @@ def checkConflicts():
|
|||||||
else:
|
else:
|
||||||
user = 1
|
user = 1
|
||||||
|
|
||||||
modes = ["saved","subreddit","submitted","search","log","link","upvoted"]
|
modes = [
|
||||||
|
"saved","subreddit","submitted","search","log","link","upvoted",
|
||||||
|
"multireddit"
|
||||||
|
]
|
||||||
|
|
||||||
values = {
|
values = {
|
||||||
x: 0 if getattr(GLOBAL.arguments,x) is None or \
|
x: 0 if getattr(GLOBAL.arguments,x) is None or \
|
||||||
@@ -194,12 +203,12 @@ class PromptUser:
|
|||||||
))
|
))
|
||||||
print(" "*4+"[0] exit\n")
|
print(" "*4+"[0] exit\n")
|
||||||
choice = input("> ")
|
choice = input("> ")
|
||||||
while not choice.lower() in choices+choicesByIndex:
|
while not choice.lower() in choices+choicesByIndex+["exit"]:
|
||||||
print("Invalid input\n")
|
print("Invalid input\n")
|
||||||
programModeIndex = input("> ")
|
programModeIndex = input("> ")
|
||||||
|
|
||||||
if choice == "0":
|
if choice == "0" or choice == "exit":
|
||||||
quit()
|
sys.exit()
|
||||||
elif choice in choicesByIndex:
|
elif choice in choicesByIndex:
|
||||||
return choices[int(choice)-1]
|
return choices[int(choice)-1]
|
||||||
else:
|
else:
|
||||||
@@ -232,10 +241,21 @@ class PromptUser:
|
|||||||
GLOBAL.arguments.time = timeFilter
|
GLOBAL.arguments.time = timeFilter
|
||||||
|
|
||||||
if programMode == "subreddit":
|
if programMode == "subreddit":
|
||||||
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
|
|
||||||
|
subredditInput = input("subreddit (enter frontpage for frontpage): ")
|
||||||
|
GLOBAL.arguments.subreddit = subredditInput
|
||||||
|
|
||||||
|
while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
|
||||||
|
subredditInput = input("subreddit: ")
|
||||||
|
GLOBAL.arguments.subreddit += "+" + subredditInput
|
||||||
|
|
||||||
if " " in GLOBAL.arguments.subreddit:
|
if " " in GLOBAL.arguments.subreddit:
|
||||||
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
|
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
|
||||||
|
|
||||||
|
# DELETE THE PLUS (+) AT THE END
|
||||||
|
if not subredditInput.lower() == "frontpage":
|
||||||
|
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
|
||||||
|
|
||||||
print("\nselect sort type:")
|
print("\nselect sort type:")
|
||||||
sortTypes = [
|
sortTypes = [
|
||||||
"hot","top","new","rising","controversial"
|
"hot","top","new","rising","controversial"
|
||||||
@@ -255,7 +275,7 @@ class PromptUser:
|
|||||||
|
|
||||||
elif programMode == "multireddit":
|
elif programMode == "multireddit":
|
||||||
GLOBAL.arguments.user = input("\nredditor: ")
|
GLOBAL.arguments.user = input("\nredditor: ")
|
||||||
GLOBAL.arguments.subreddit = input("\nmultireddit: ")
|
GLOBAL.arguments.multireddit = input("\nmultireddit: ")
|
||||||
|
|
||||||
print("\nselect sort type:")
|
print("\nselect sort type:")
|
||||||
sortTypes = [
|
sortTypes = [
|
||||||
@@ -307,10 +327,11 @@ class PromptUser:
|
|||||||
GLOBAL.arguments.log = input("\nlog file directory:")
|
GLOBAL.arguments.log = input("\nlog file directory:")
|
||||||
if Path(GLOBAL.arguments.log ).is_file():
|
if Path(GLOBAL.arguments.log ).is_file():
|
||||||
break
|
break
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
GLOBAL.arguments.limit = int(input("\nlimit: "))
|
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
|
||||||
|
if GLOBAL.arguments.limit == 0:
|
||||||
|
GLOBAL.arguments.limit = None
|
||||||
break
|
break
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
@@ -365,6 +386,9 @@ def prepareAttributes():
|
|||||||
|
|
||||||
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
|
||||||
|
|
||||||
|
elif GLOBAL.arguments.multireddit is not None:
|
||||||
|
ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
|
||||||
|
|
||||||
elif GLOBAL.arguments.saved is True:
|
elif GLOBAL.arguments.saved is True:
|
||||||
ATTRIBUTES["saved"] = True
|
ATTRIBUTES["saved"] = True
|
||||||
|
|
||||||
@@ -389,7 +413,7 @@ def postFromLog(fileName):
|
|||||||
content = jsonFile(fileName).read()
|
content = jsonFile(fileName).read()
|
||||||
else:
|
else:
|
||||||
print("File not found")
|
print("File not found")
|
||||||
quit()
|
sys.exit()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
del content["HEADER"]
|
del content["HEADER"]
|
||||||
@@ -404,76 +428,73 @@ def postFromLog(fileName):
|
|||||||
|
|
||||||
return posts
|
return posts
|
||||||
|
|
||||||
def postExists(POST):
|
def isPostExists(POST):
|
||||||
"""Figure out a file's name and checks if the file already exists"""
|
"""Figure out a file's name and checks if the file already exists"""
|
||||||
|
|
||||||
title = nameCorrector(POST['postTitle'])
|
title = nameCorrector(POST['postTitle'])
|
||||||
FILENAME = title + "_" + POST['postId']
|
|
||||||
PATH = GLOBAL.directory / POST["postSubreddit"]
|
PATH = GLOBAL.directory / POST["postSubreddit"]
|
||||||
|
|
||||||
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
|
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
|
||||||
|
|
||||||
for i in range(2):
|
for extension in possibleExtensions:
|
||||||
for extension in possibleExtensions:
|
|
||||||
FILE_PATH = PATH / (FILENAME+extension)
|
OLD_FILE_PATH = PATH / (
|
||||||
if FILE_PATH.exists():
|
title
|
||||||
return True
|
+ "_" + POST['postId']
|
||||||
else:
|
+ extension
|
||||||
FILENAME = POST['postId']
|
)
|
||||||
|
FILE_PATH = PATH / (
|
||||||
|
POST["postSubmitter"]
|
||||||
|
+ "_" + title
|
||||||
|
+ "_" + POST['postId']
|
||||||
|
+ extension
|
||||||
|
)
|
||||||
|
|
||||||
|
SHORT_FILE_PATH = PATH / (POST['postId']+extension)
|
||||||
|
|
||||||
|
if OLD_FILE_PATH.exists() or \
|
||||||
|
FILE_PATH.exists() or \
|
||||||
|
SHORT_FILE_PATH.exists():
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def download(submissions):
|
def downloadPost(SUBMISSION):
|
||||||
"""Analyze list of submissions and call the right function
|
directory = GLOBAL.directory / SUBMISSION['postSubreddit']
|
||||||
to download each one, catch errors, update the log files
|
|
||||||
"""
|
|
||||||
|
|
||||||
subsLenght = len(submissions)
|
global lastRequestTime
|
||||||
lastRequestTime = 0
|
|
||||||
downloadedCount = subsLenght
|
|
||||||
duplicates = 0
|
|
||||||
BACKUP = {}
|
|
||||||
|
|
||||||
FAILED_FILE = createLogFile("FAILED")
|
downloaders = {
|
||||||
|
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
|
||||||
|
}
|
||||||
|
|
||||||
for i in range(subsLenght):
|
print()
|
||||||
print("\n({}/{})".format(i+1,subsLenght))
|
if SUBMISSION['postType'] in downloaders:
|
||||||
print(
|
|
||||||
"https://reddit.com/r/{subreddit}/comments/{id}".format(
|
|
||||||
subreddit=submissions[i]['postSubreddit'],
|
|
||||||
id=submissions[i]['postId']
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if postExists(submissions[i]):
|
if SUBMISSION['postType'] == "imgur":
|
||||||
result = False
|
|
||||||
print(submissions[i]['postType'].upper())
|
|
||||||
print("It already exists")
|
|
||||||
duplicates += 1
|
|
||||||
downloadedCount -= 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
directory = GLOBAL.directory / submissions[i]['postSubreddit']
|
|
||||||
|
|
||||||
if submissions[i]['postType'] == 'imgur':
|
|
||||||
print("IMGUR",end="")
|
|
||||||
|
|
||||||
while int(time.time() - lastRequestTime) <= 2:
|
while int(time.time() - lastRequestTime) <= 2:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
credit = Imgur.get_credits()
|
credit = Imgur.get_credits()
|
||||||
|
|
||||||
IMGUR_RESET_TIME = credit['UserReset']-time.time()
|
IMGUR_RESET_TIME = credit['UserReset']-time.time()
|
||||||
USER_RESET = ("after " \
|
USER_RESET = ("after " \
|
||||||
+ str(int(IMGUR_RESET_TIME/60)) \
|
+ str(int(IMGUR_RESET_TIME/60)) \
|
||||||
+ " Minutes " \
|
+ " Minutes " \
|
||||||
+ str(int(IMGUR_RESET_TIME%60)) \
|
+ str(int(IMGUR_RESET_TIME%60)) \
|
||||||
+ " Seconds")
|
+ " Seconds")
|
||||||
print(
|
|
||||||
" => Client: {} - User: {} - Reset {}".format(
|
if credit['ClientRemaining'] < 25 or credit['UserRemaining'] < 25:
|
||||||
credit['ClientRemaining'],
|
print(
|
||||||
credit['UserRemaining'],
|
"==> Client: {} - User: {} - Reset {}".format(
|
||||||
USER_RESET
|
credit['ClientRemaining'],
|
||||||
|
credit['UserRemaining'],
|
||||||
|
USER_RESET
|
||||||
|
),end=""
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
if not (credit['UserRemaining'] == 0 or \
|
if not (credit['UserRemaining'] == 0 or \
|
||||||
credit['ClientRemaining'] == 0):
|
credit['ClientRemaining'] == 0):
|
||||||
@@ -482,94 +503,105 @@ def download(submissions):
|
|||||||
"""
|
"""
|
||||||
while int(time.time() - lastRequestTime) <= 2:
|
while int(time.time() - lastRequestTime) <= 2:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
lastRequestTime = time.time()
|
lastRequestTime = time.time()
|
||||||
|
|
||||||
try:
|
|
||||||
Imgur(directory,submissions[i])
|
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
|
||||||
print("It already exists")
|
|
||||||
duplicates += 1
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
except ImgurLoginError:
|
|
||||||
print(
|
|
||||||
"Imgur login failed. Quitting the program "\
|
|
||||||
"as unexpected errors might occur."
|
|
||||||
)
|
|
||||||
quit()
|
|
||||||
|
|
||||||
except Exception as exception:
|
|
||||||
print(exception)
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if credit['UserRemaining'] == 0:
|
if credit['UserRemaining'] == 0:
|
||||||
KEYWORD = "user"
|
KEYWORD = "user"
|
||||||
elif credit['ClientRemaining'] == 0:
|
elif credit['ClientRemaining'] == 0:
|
||||||
KEYWORD = "client"
|
KEYWORD = "client"
|
||||||
|
|
||||||
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
|
raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
|
||||||
FAILED_FILE.add(
|
|
||||||
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
|
|
||||||
submissions[i]]}
|
|
||||||
)
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
elif submissions[i]['postType'] == 'gfycat':
|
downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
|
||||||
print("GFYCAT")
|
|
||||||
try:
|
|
||||||
Gfycat(directory,submissions[i])
|
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
else:
|
||||||
print("It already exists")
|
raise NoSuitablePost
|
||||||
duplicates += 1
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
except NotADownloadableLinkError as exception:
|
|
||||||
print("Could not read the page source")
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
except Exception as exception:
|
return None
|
||||||
print(exception)
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
|
||||||
downloadedCount -= 1
|
|
||||||
|
|
||||||
elif submissions[i]['postType'] == 'direct':
|
def download(submissions):
|
||||||
print("DIRECT")
|
"""Analyze list of submissions and call the right function
|
||||||
try:
|
to download each one, catch errors, update the log files
|
||||||
Direct(directory,submissions[i])
|
"""
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
subsLenght = len(submissions)
|
||||||
print("It already exists")
|
global lastRequestTime
|
||||||
downloadedCount -= 1
|
lastRequestTime = 0
|
||||||
duplicates += 1
|
downloadedCount = subsLenght
|
||||||
|
duplicates = 0
|
||||||
|
|
||||||
except Exception as exception:
|
FAILED_FILE = createLogFile("FAILED")
|
||||||
print(exception)
|
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
for i in range(subsLenght):
|
||||||
downloadedCount -= 1
|
print(
|
||||||
|
f"\n({i+1}/{subsLenght}) ({submissions[i]['postType'].upper()}) " \
|
||||||
|
f"(r/{submissions[i]['postSubreddit']})",end=""
|
||||||
|
)
|
||||||
|
|
||||||
|
if isPostExists(submissions[i]):
|
||||||
|
print("\nIt already exists")
|
||||||
|
duplicates += 1
|
||||||
|
downloadedCount -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
downloadPost(submissions[i])
|
||||||
|
|
||||||
elif submissions[i]['postType'] == 'self':
|
except FileAlreadyExistsError:
|
||||||
print("SELF")
|
print("It already exists")
|
||||||
try:
|
duplicates += 1
|
||||||
Self(directory,submissions[i])
|
downloadedCount -= 1
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
except ImgurLoginError:
|
||||||
print("It already exists")
|
print(
|
||||||
downloadedCount -= 1
|
"Imgur login failed. \nQuitting the program "\
|
||||||
duplicates += 1
|
"as unexpected errors might occur."
|
||||||
|
)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
except Exception as exception:
|
except ImgurLimitError as exception:
|
||||||
print(exception)
|
FAILED_FILE.add({int(i+1):[
|
||||||
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
|
"{class_name}: {info}".format(
|
||||||
downloadedCount -= 1
|
class_name=exception.__class__.__name__,info=str(exception)
|
||||||
|
),
|
||||||
|
submissions[i]
|
||||||
|
]})
|
||||||
|
downloadedCount -= 1
|
||||||
|
|
||||||
else:
|
except NotADownloadableLinkError as exception:
|
||||||
|
print(
|
||||||
|
"{class_name}: {info}".format(
|
||||||
|
class_name=exception.__class__.__name__,info=str(exception)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
FAILED_FILE.add({int(i+1):[
|
||||||
|
"{class_name}: {info}".format(
|
||||||
|
class_name=exception.__class__.__name__,info=str(exception)
|
||||||
|
),
|
||||||
|
submissions[i]
|
||||||
|
]})
|
||||||
|
downloadedCount -= 1
|
||||||
|
|
||||||
|
except NoSuitablePost:
|
||||||
print("No match found, skipping...")
|
print("No match found, skipping...")
|
||||||
downloadedCount -= 1
|
downloadedCount -= 1
|
||||||
|
|
||||||
|
except Exception as exception:
|
||||||
|
# raise exception
|
||||||
|
print(
|
||||||
|
"{class_name}: {info}".format(
|
||||||
|
class_name=exception.__class__.__name__,info=str(exception)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
FAILED_FILE.add({int(i+1):[
|
||||||
|
"{class_name}: {info}".format(
|
||||||
|
class_name=exception.__class__.__name__,info=str(exception)
|
||||||
|
),
|
||||||
|
submissions[i]
|
||||||
|
]})
|
||||||
|
downloadedCount -= 1
|
||||||
|
|
||||||
if duplicates:
|
if duplicates:
|
||||||
print("\n There was {} duplicates".format(duplicates))
|
print("\n There was {} duplicates".format(duplicates))
|
||||||
@@ -594,48 +626,46 @@ def main():
|
|||||||
checkConflicts()
|
checkConflicts()
|
||||||
except ProgramModeError as err:
|
except ProgramModeError as err:
|
||||||
PromptUser()
|
PromptUser()
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
quit()
|
|
||||||
|
|
||||||
GLOBAL.config = getConfig("config.json")
|
|
||||||
|
|
||||||
|
if not Path(GLOBAL.configDirectory).is_dir():
|
||||||
|
os.makedirs(GLOBAL.configDirectory)
|
||||||
|
GLOBAL.config = getConfig(GLOBAL.configDirectory / "config.json")
|
||||||
|
|
||||||
if GLOBAL.arguments.log is not None:
|
if GLOBAL.arguments.log is not None:
|
||||||
logDir = Path(GLOBAL.arguments.log)
|
logDir = Path(GLOBAL.arguments.log)
|
||||||
download(postFromLog(logDir))
|
download(postFromLog(logDir))
|
||||||
quit()
|
sys.exit()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
POSTS = getPosts(prepareAttributes())
|
POSTS = getPosts(prepareAttributes())
|
||||||
except InsufficientPermission:
|
except InsufficientPermission:
|
||||||
print("You do not have permission to do that")
|
print("You do not have permission to do that")
|
||||||
quit()
|
sys.exit()
|
||||||
except NoMatchingSubmissionFound:
|
except NoMatchingSubmissionFound:
|
||||||
print("No matching submission was found")
|
print("No matching submission was found")
|
||||||
quit()
|
sys.exit()
|
||||||
except NoRedditSupoort:
|
except NoRedditSupoort:
|
||||||
print("Reddit does not support that")
|
print("Reddit does not support that")
|
||||||
quit()
|
sys.exit()
|
||||||
except NoPrawSupport:
|
except NoPrawSupport:
|
||||||
print("PRAW does not support that")
|
print("PRAW does not support that")
|
||||||
quit()
|
sys.exit()
|
||||||
except MultiredditNotFound:
|
except MultiredditNotFound:
|
||||||
print("Multireddit not found")
|
print("Multireddit not found")
|
||||||
quit()
|
sys.exit()
|
||||||
except InvalidSortingType:
|
except InvalidSortingType:
|
||||||
print("Invalid sorting type has given")
|
print("Invalid sorting type has given")
|
||||||
quit()
|
sys.exit()
|
||||||
except InvalidRedditLink:
|
except InvalidRedditLink:
|
||||||
print("Invalid reddit link")
|
print("Invalid reddit link")
|
||||||
quit()
|
sys.exit()
|
||||||
|
|
||||||
if POSTS is None:
|
if POSTS is None:
|
||||||
print("I could not find any posts in that URL")
|
print("I could not find any posts in that URL")
|
||||||
quit()
|
sys.exit()
|
||||||
|
|
||||||
if GLOBAL.arguments.NoDownload:
|
if GLOBAL.arguments.NoDownload:
|
||||||
quit()
|
sys.exit()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
download(POSTS)
|
download(POSTS)
|
||||||
@@ -650,13 +680,16 @@ if __name__ == "__main__":
|
|||||||
print = printToFile
|
print = printToFile
|
||||||
GLOBAL.RUN_TIME = time.time()
|
GLOBAL.RUN_TIME = time.time()
|
||||||
main()
|
main()
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
if GLOBAL.directory is None:
|
if GLOBAL.directory is None:
|
||||||
GLOBAL.directory = Path(".\\")
|
GLOBAL.directory = Path(".\\")
|
||||||
print("\nQUITTING...")
|
|
||||||
quit()
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
logging.error("Runtime error!", exc_info=full_exc_info(sys.exc_info()))
|
if GLOBAL.directory is None:
|
||||||
|
GLOBAL.directory = Path(".\\")
|
||||||
|
logging.error(sys.exc_info()[0].__name__,
|
||||||
|
exc_info=full_exc_info(sys.exc_info()))
|
||||||
print(log_stream.getvalue())
|
print(log_stream.getvalue())
|
||||||
|
|
||||||
input("Press enter to quit\n")
|
input("\nPress enter to quit\n")
|
||||||
|
|||||||
50
setup.py
Normal file
50
setup.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
#!C:\Users\Ali\AppData\Local\Programs\Python\Python36\python.exe
|
||||||
|
|
||||||
|
## python setup.py build
|
||||||
|
import sys
|
||||||
|
from cx_Freeze import setup, Executable
|
||||||
|
from script import __version__
|
||||||
|
|
||||||
|
options = {
|
||||||
|
"build_exe": {
|
||||||
|
"packages":[
|
||||||
|
"idna","imgurpython", "praw", "requests"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if sys.platform == "win32":
|
||||||
|
executables = [Executable(
|
||||||
|
"script.py",
|
||||||
|
targetName="bulk-downloader-for-reddit.exe",
|
||||||
|
shortcutName="Bulk Downloader for Reddit",
|
||||||
|
shortcutDir="DesktopFolder"
|
||||||
|
)]
|
||||||
|
|
||||||
|
elif sys.platform == "linux":
|
||||||
|
executables = [Executable(
|
||||||
|
"script.py",
|
||||||
|
targetName="bulk-downloader-for-reddit",
|
||||||
|
shortcutName="Bulk Downloader for Reddit",
|
||||||
|
shortcutDir="DesktopFolder"
|
||||||
|
)]
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name = "Bulk Downloader for Reddit",
|
||||||
|
version = __version__,
|
||||||
|
description = "Bulk Downloader for Reddit",
|
||||||
|
author = "Ali Parlakci",
|
||||||
|
author_email="parlakciali@gmail.com",
|
||||||
|
url="https://github.com/aliparlakci/bulk-downloader-for-reddit",
|
||||||
|
classifiers=(
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)"
|
||||||
|
"Natural Language :: English",
|
||||||
|
"Environment :: Console",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
),
|
||||||
|
executables = executables,
|
||||||
|
options = options
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -2,7 +2,9 @@ import io
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
from html.parser import HTMLParser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
import imgurpython
|
import imgurpython
|
||||||
from multiprocessing import Queue
|
from multiprocessing import Queue
|
||||||
@@ -36,7 +38,10 @@ def getExtension(link):
|
|||||||
if TYPE in parsed:
|
if TYPE in parsed:
|
||||||
return "."+parsed[-1]
|
return "."+parsed[-1]
|
||||||
else:
|
else:
|
||||||
return '.jpg'
|
if not "v.redd.it" in link:
|
||||||
|
return '.jpg'
|
||||||
|
else:
|
||||||
|
return '.mp4'
|
||||||
|
|
||||||
def getFile(fileDir,tempDir,imageURL,indent=0):
|
def getFile(fileDir,tempDir,imageURL,indent=0):
|
||||||
"""Downloads given file to given directory.
|
"""Downloads given file to given directory.
|
||||||
@@ -56,16 +61,150 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
|
|||||||
tempDir,
|
tempDir,
|
||||||
reporthook=dlProgress)
|
reporthook=dlProgress)
|
||||||
os.rename(tempDir,fileDir)
|
os.rename(tempDir,fileDir)
|
||||||
print(" "*indent+"Downloaded"+" "*10)
|
|
||||||
break
|
|
||||||
except ConnectionResetError as exception:
|
except ConnectionResetError as exception:
|
||||||
print(" "*indent + str(exception))
|
print(" "*indent + str(exception))
|
||||||
print(" "*indent + "Trying again\n")
|
print(" "*indent + "Trying again\n")
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
raise FileNameTooLong
|
raise FileNameTooLong
|
||||||
|
else:
|
||||||
|
print(" "*indent+"Downloaded"+" "*10)
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
|
|
||||||
|
class Erome:
|
||||||
|
def __init__(self,directory,post):
|
||||||
|
try:
|
||||||
|
IMAGES = self.getLinks(post['postURL'])
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
raise NotADownloadableLinkError("Not a downloadable link")
|
||||||
|
|
||||||
|
imagesLenght = len(IMAGES)
|
||||||
|
howManyDownloaded = imagesLenght
|
||||||
|
duplicates = 0
|
||||||
|
|
||||||
|
if imagesLenght == 1:
|
||||||
|
|
||||||
|
extension = getExtension(IMAGES[0])
|
||||||
|
|
||||||
|
title = nameCorrector(post['postTitle'])
|
||||||
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
|
||||||
|
|
||||||
|
fileDir = directory / (
|
||||||
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
|
||||||
|
)
|
||||||
|
tempDir = directory / (
|
||||||
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
|
||||||
|
)
|
||||||
|
|
||||||
|
imageURL = "https:" + IMAGES[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
getFile(fileDir,tempDir,imageURL)
|
||||||
|
except FileNameTooLong:
|
||||||
|
fileDir = directory / (post['postId'] + extension)
|
||||||
|
tempDir = directory / (post['postId'] + '.tmp')
|
||||||
|
getFile(fileDir,tempDir,imageURL)
|
||||||
|
|
||||||
|
else:
|
||||||
|
title = nameCorrector(post['postTitle'])
|
||||||
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
|
||||||
|
|
||||||
|
folderDir = directory / (
|
||||||
|
post["postSubmitter"] + "_" + title + "_" + post['postId']
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not os.path.exists(folderDir):
|
||||||
|
os.makedirs(folderDir)
|
||||||
|
except FileNotFoundError:
|
||||||
|
folderDir = directory / post['postId']
|
||||||
|
os.makedirs(folderDir)
|
||||||
|
|
||||||
|
for i in range(imagesLenght):
|
||||||
|
|
||||||
|
extension = getExtension(IMAGES[i])
|
||||||
|
|
||||||
|
fileName = str(i+1)
|
||||||
|
imageURL = "https:" + IMAGES[i]
|
||||||
|
|
||||||
|
fileDir = folderDir / (fileName + extension)
|
||||||
|
tempDir = folderDir / (fileName + ".tmp")
|
||||||
|
|
||||||
|
print(" ({}/{})".format(i+1,imagesLenght))
|
||||||
|
print(" {}".format(fileName+extension))
|
||||||
|
|
||||||
|
try:
|
||||||
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
||||||
|
print()
|
||||||
|
except FileAlreadyExistsError:
|
||||||
|
print(" The file already exists" + " "*10,end="\n\n")
|
||||||
|
duplicates += 1
|
||||||
|
howManyDownloaded -= 1
|
||||||
|
|
||||||
|
except Exception as exception:
|
||||||
|
# raise exception
|
||||||
|
print("\n Could not get the file")
|
||||||
|
print(
|
||||||
|
" "
|
||||||
|
+ "{class_name}: {info}".format(
|
||||||
|
class_name=exception.__class__.__name__,
|
||||||
|
info=str(exception)
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
exceptionType = exception
|
||||||
|
howManyDownloaded -= 1
|
||||||
|
|
||||||
|
if duplicates == imagesLenght:
|
||||||
|
raise FileAlreadyExistsError
|
||||||
|
elif howManyDownloaded + duplicates < imagesLenght:
|
||||||
|
raise AlbumNotDownloadedCompletely(
|
||||||
|
"Album Not Downloaded Completely"
|
||||||
|
)
|
||||||
|
|
||||||
|
def getLinks(self,url,lineNumber=129):
|
||||||
|
|
||||||
|
content = []
|
||||||
|
lineNumber = None
|
||||||
|
|
||||||
|
class EromeParser(HTMLParser):
|
||||||
|
tag = None
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
|
||||||
|
|
||||||
|
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
||||||
|
|
||||||
|
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
|
||||||
|
for i in range(len(pageSource)):
|
||||||
|
obj = EromeParser()
|
||||||
|
obj.feed(pageSource[i])
|
||||||
|
tag = obj.tag
|
||||||
|
|
||||||
|
if tag is not None:
|
||||||
|
if "div" in tag:
|
||||||
|
if "id" in tag["div"]:
|
||||||
|
if tag["div"]["id"] == "album":
|
||||||
|
lineNumber = i
|
||||||
|
break
|
||||||
|
|
||||||
|
for line in pageSource[lineNumber:]:
|
||||||
|
obj = EromeParser()
|
||||||
|
obj.feed(line)
|
||||||
|
tag = obj.tag
|
||||||
|
if tag is not None:
|
||||||
|
if "img" in tag:
|
||||||
|
if "class" in tag["img"]:
|
||||||
|
if tag["img"]["class"]=="img-front":
|
||||||
|
content.append(tag["img"]["src"])
|
||||||
|
elif "source" in tag:
|
||||||
|
content.append(tag["source"]["src"])
|
||||||
|
|
||||||
|
return [
|
||||||
|
link for link in content \
|
||||||
|
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
|
||||||
|
]
|
||||||
|
|
||||||
class Imgur:
|
class Imgur:
|
||||||
def __init__(self,directory,post):
|
def __init__(self,directory,post):
|
||||||
self.imgurClient = self.initImgur()
|
self.imgurClient = self.initImgur()
|
||||||
@@ -85,13 +224,22 @@ class Imgur:
|
|||||||
post['postExt'] = getExtension(post['mediaURL'])
|
post['postExt'] = getExtension(post['mediaURL'])
|
||||||
|
|
||||||
title = nameCorrector(post['postTitle'])
|
title = nameCorrector(post['postTitle'])
|
||||||
print(title+"_" +post['postId']+post['postExt'])
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])
|
||||||
|
|
||||||
fileDir = title + "_" + post['postId'] + post['postExt']
|
fileDir = directory / (
|
||||||
fileDir = directory / fileDir
|
post["postSubmitter"]
|
||||||
|
+ "_" + title
|
||||||
|
+ "_" + post['postId']
|
||||||
|
+ post['postExt']
|
||||||
|
)
|
||||||
|
|
||||||
|
tempDir = directory / (
|
||||||
|
post["postSubmitter"]
|
||||||
|
+ "_" + title
|
||||||
|
+ "_" + post['postId']
|
||||||
|
+ ".tmp"
|
||||||
|
)
|
||||||
|
|
||||||
tempDir = title + "_" + post['postId'] + '.tmp'
|
|
||||||
tempDir = directory / tempDir
|
|
||||||
try:
|
try:
|
||||||
getFile(fileDir,tempDir,post['mediaURL'])
|
getFile(fileDir,tempDir,post['mediaURL'])
|
||||||
except FileNameTooLong:
|
except FileNameTooLong:
|
||||||
@@ -107,9 +255,11 @@ class Imgur:
|
|||||||
duplicates = 0
|
duplicates = 0
|
||||||
|
|
||||||
title = nameCorrector(post['postTitle'])
|
title = nameCorrector(post['postTitle'])
|
||||||
print(title+"_"+post['postId'],end="\n\n")
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
|
||||||
|
|
||||||
folderDir = directory / (title+"_"+post['postId'])
|
folderDir = directory / (
|
||||||
|
post["postSubmitter"] + "_" + title + "_" + post['postId']
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not os.path.exists(folderDir):
|
if not os.path.exists(folderDir):
|
||||||
@@ -162,14 +312,23 @@ class Imgur:
|
|||||||
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
print("\n Could not get the file")
|
print("\n Could not get the file")
|
||||||
print(" " + str(exception) + "\n")
|
print(
|
||||||
|
" "
|
||||||
|
+ "{class_name}: {info}".format(
|
||||||
|
class_name=exception.__class__.__name__,
|
||||||
|
info=str(exception)
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
exceptionType = exception
|
exceptionType = exception
|
||||||
howManyDownloaded -= 1
|
howManyDownloaded -= 1
|
||||||
|
|
||||||
if duplicates == imagesLenght:
|
if duplicates == imagesLenght:
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
elif howManyDownloaded < imagesLenght:
|
elif howManyDownloaded + duplicates < imagesLenght:
|
||||||
raise AlbumNotDownloadedCompletely
|
raise AlbumNotDownloadedCompletely(
|
||||||
|
"Album Not Downloaded Completely"
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def initImgur():
|
def initImgur():
|
||||||
@@ -217,18 +376,23 @@ class Gfycat:
|
|||||||
try:
|
try:
|
||||||
POST['mediaURL'] = self.getLink(POST['postURL'])
|
POST['mediaURL'] = self.getLink(POST['postURL'])
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise NotADownloadableLinkError
|
raise NotADownloadableLinkError("Could not read the page source")
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
raise NotADownloadableLinkError
|
raise NotADownloadableLinkError("Could not read the page source")
|
||||||
|
|
||||||
POST['postExt'] = getExtension(POST['mediaURL'])
|
POST['postExt'] = getExtension(POST['mediaURL'])
|
||||||
|
|
||||||
if not os.path.exists(directory): os.makedirs(directory)
|
if not os.path.exists(directory): os.makedirs(directory)
|
||||||
title = nameCorrector(POST['postTitle'])
|
title = nameCorrector(POST['postTitle'])
|
||||||
print(title+"_"+POST['postId']+POST['postExt'])
|
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
|
||||||
|
|
||||||
fileDir = directory / (title+"_"+POST['postId']+POST['postExt'])
|
fileDir = directory / (
|
||||||
tempDir = directory / (title+"_"+POST['postId']+".tmp")
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
|
||||||
|
)
|
||||||
|
tempDir = directory / (
|
||||||
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
getFile(fileDir,tempDir,POST['mediaURL'])
|
getFile(fileDir,tempDir,POST['mediaURL'])
|
||||||
except FileNameTooLong:
|
except FileNameTooLong:
|
||||||
@@ -248,8 +412,7 @@ class Gfycat:
|
|||||||
if url[-1:] == '/':
|
if url[-1:] == '/':
|
||||||
url = url[:-1]
|
url = url[:-1]
|
||||||
|
|
||||||
if 'gifs' in url:
|
url = "https://gfycat.com/" + url.split('/')[-1]
|
||||||
url = "https://gfycat.com/" + url.split('/')[-1]
|
|
||||||
|
|
||||||
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
||||||
|
|
||||||
@@ -266,7 +429,7 @@ class Gfycat:
|
|||||||
break
|
break
|
||||||
|
|
||||||
if "".join(link) == "":
|
if "".join(link) == "":
|
||||||
raise NotADownloadableLinkError
|
raise NotADownloadableLinkError("Could not read the page source")
|
||||||
|
|
||||||
return "".join(link)
|
return "".join(link)
|
||||||
|
|
||||||
@@ -275,13 +438,14 @@ class Direct:
|
|||||||
POST['postExt'] = getExtension(POST['postURL'])
|
POST['postExt'] = getExtension(POST['postURL'])
|
||||||
if not os.path.exists(directory): os.makedirs(directory)
|
if not os.path.exists(directory): os.makedirs(directory)
|
||||||
title = nameCorrector(POST['postTitle'])
|
title = nameCorrector(POST['postTitle'])
|
||||||
print(title+"_"+POST['postId']+POST['postExt'])
|
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
|
||||||
|
|
||||||
fileDir = title+"_"+POST['postId']+POST['postExt']
|
fileDir = directory / (
|
||||||
fileDir = directory / fileDir
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
|
||||||
|
)
|
||||||
tempDir = title+"_"+POST['postId']+".tmp"
|
tempDir = directory / (
|
||||||
tempDir = directory / tempDir
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
getFile(fileDir,tempDir,POST['postURL'])
|
getFile(fileDir,tempDir,POST['postURL'])
|
||||||
@@ -296,10 +460,11 @@ class Self:
|
|||||||
if not os.path.exists(directory): os.makedirs(directory)
|
if not os.path.exists(directory): os.makedirs(directory)
|
||||||
|
|
||||||
title = nameCorrector(post['postTitle'])
|
title = nameCorrector(post['postTitle'])
|
||||||
print(title+"_"+post['postId']+".md")
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")
|
||||||
|
|
||||||
fileDir = title+"_"+post['postId']+".md"
|
fileDir = directory / (
|
||||||
fileDir = directory / fileDir
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"
|
||||||
|
)
|
||||||
|
|
||||||
if Path.is_file(fileDir):
|
if Path.is_file(fileDir):
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
@@ -322,7 +487,11 @@ class Self:
|
|||||||
+ ")\n"
|
+ ")\n"
|
||||||
+ post["postContent"]
|
+ post["postContent"]
|
||||||
+ "\n\n---\n\n"
|
+ "\n\n---\n\n"
|
||||||
+ "submitted by [u/"
|
+ "submitted to [r/"
|
||||||
|
+ post["postSubreddit"]
|
||||||
|
+ "](https://www.reddit.com/r/"
|
||||||
|
+ post["postSubreddit"]
|
||||||
|
+ ") by [u/"
|
||||||
+ post["postSubmitter"]
|
+ post["postSubmitter"]
|
||||||
+ "](https://www.reddit.com/user/"
|
+ "](https://www.reddit.com/user/"
|
||||||
+ post["postSubmitter"]
|
+ post["postSubmitter"]
|
||||||
|
|||||||
@@ -80,4 +80,10 @@ class InvalidSortingType(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
class FileNotFoundError(Exception):
|
class FileNotFoundError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class NoSuitablePost(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ImgurLimitError(Exception):
|
||||||
pass
|
pass
|
||||||
216
src/searcher.py
216
src/searcher.py
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import random
|
import random
|
||||||
import socket
|
import socket
|
||||||
import webbrowser
|
import webbrowser
|
||||||
@@ -14,60 +15,62 @@ from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
|
|||||||
|
|
||||||
print = printToFile
|
print = printToFile
|
||||||
|
|
||||||
class GetAuth:
|
|
||||||
def __init__(self,redditInstance,port):
|
|
||||||
self.redditInstance = redditInstance
|
|
||||||
self.PORT = int(port)
|
|
||||||
|
|
||||||
def recieve_connection(self):
|
|
||||||
"""Wait for and then return a connected socket..
|
|
||||||
Opens a TCP connection on port 8080, and waits for a single client.
|
|
||||||
"""
|
|
||||||
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
||||||
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
||||||
server.bind(('localhost', self.PORT))
|
|
||||||
server.listen(1)
|
|
||||||
client = server.accept()[0]
|
|
||||||
server.close()
|
|
||||||
return client
|
|
||||||
|
|
||||||
def send_message(self, message):
|
|
||||||
"""Send message to client and close the connection."""
|
|
||||||
self.client.send('HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8'))
|
|
||||||
self.client.close()
|
|
||||||
|
|
||||||
def getRefreshToken(self,*scopes):
|
|
||||||
state = str(random.randint(0, 65000))
|
|
||||||
url = self.redditInstance.auth.url(scopes, state, 'permanent')
|
|
||||||
print("Go to this URL and login to reddit:\n\n",url)
|
|
||||||
webbrowser.open(url,new=2)
|
|
||||||
|
|
||||||
self.client = self.recieve_connection()
|
|
||||||
data = self.client.recv(1024).decode('utf-8')
|
|
||||||
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
|
|
||||||
params = {
|
|
||||||
key: value for (key, value) in [token.split('=') \
|
|
||||||
for token in param_tokens]
|
|
||||||
}
|
|
||||||
if state != params['state']:
|
|
||||||
self.send_message(
|
|
||||||
client, 'State mismatch. Expected: {} Received: {}'
|
|
||||||
.format(state, params['state'])
|
|
||||||
)
|
|
||||||
raise RedditLoginFailed
|
|
||||||
elif 'error' in params:
|
|
||||||
self.send_message(client, params['error'])
|
|
||||||
raise RedditLoginFailed
|
|
||||||
|
|
||||||
refresh_token = self.redditInstance.auth.authorize(params['code'])
|
|
||||||
self.send_message(
|
|
||||||
"<script>" \
|
|
||||||
"alert(\"You can go back to terminal window now.\");" \
|
|
||||||
"</script>"
|
|
||||||
)
|
|
||||||
return (self.redditInstance,refresh_token)
|
|
||||||
|
|
||||||
def beginPraw(config,user_agent = str(socket.gethostname())):
|
def beginPraw(config,user_agent = str(socket.gethostname())):
|
||||||
|
class GetAuth:
|
||||||
|
def __init__(self,redditInstance,port):
|
||||||
|
self.redditInstance = redditInstance
|
||||||
|
self.PORT = int(port)
|
||||||
|
|
||||||
|
def recieve_connection(self):
|
||||||
|
"""Wait for and then return a connected socket..
|
||||||
|
Opens a TCP connection on port 8080, and waits for a single client.
|
||||||
|
"""
|
||||||
|
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
server.bind(('localhost', self.PORT))
|
||||||
|
server.listen(1)
|
||||||
|
client = server.accept()[0]
|
||||||
|
server.close()
|
||||||
|
return client
|
||||||
|
|
||||||
|
def send_message(self, message):
|
||||||
|
"""Send message to client and close the connection."""
|
||||||
|
self.client.send(
|
||||||
|
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
|
||||||
|
)
|
||||||
|
self.client.close()
|
||||||
|
|
||||||
|
def getRefreshToken(self,*scopes):
|
||||||
|
state = str(random.randint(0, 65000))
|
||||||
|
url = self.redditInstance.auth.url(scopes, state, 'permanent')
|
||||||
|
print("Go to this URL and login to reddit:\n\n",url)
|
||||||
|
webbrowser.open(url,new=2)
|
||||||
|
|
||||||
|
self.client = self.recieve_connection()
|
||||||
|
data = self.client.recv(1024).decode('utf-8')
|
||||||
|
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
|
||||||
|
params = {
|
||||||
|
key: value for (key, value) in [token.split('=') \
|
||||||
|
for token in param_tokens]
|
||||||
|
}
|
||||||
|
if state != params['state']:
|
||||||
|
self.send_message(
|
||||||
|
client, 'State mismatch. Expected: {} Received: {}'
|
||||||
|
.format(state, params['state'])
|
||||||
|
)
|
||||||
|
raise RedditLoginFailed
|
||||||
|
elif 'error' in params:
|
||||||
|
self.send_message(client, params['error'])
|
||||||
|
raise RedditLoginFailed
|
||||||
|
|
||||||
|
refresh_token = self.redditInstance.auth.authorize(params['code'])
|
||||||
|
self.send_message(
|
||||||
|
"<script>" \
|
||||||
|
"alert(\"You can go back to terminal window now.\");" \
|
||||||
|
"</script>"
|
||||||
|
)
|
||||||
|
return (self.redditInstance,refresh_token)
|
||||||
|
|
||||||
"""Start reddit instance"""
|
"""Start reddit instance"""
|
||||||
|
|
||||||
scopes = ['identity','history','read']
|
scopes = ['identity','history','read']
|
||||||
@@ -89,7 +92,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
|
|||||||
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
|
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
|
||||||
reddit = authorizedInstance[0]
|
reddit = authorizedInstance[0]
|
||||||
refresh_token = authorizedInstance[1]
|
refresh_token = authorizedInstance[1]
|
||||||
jsonFile("config.json").add({
|
jsonFile(GLOBAL.configDirectory / "config.json").add({
|
||||||
"reddit_refresh_token":refresh_token
|
"reddit_refresh_token":refresh_token
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
@@ -98,7 +101,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
|
|||||||
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
|
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
|
||||||
reddit = authorizedInstance[0]
|
reddit = authorizedInstance[0]
|
||||||
refresh_token = authorizedInstance[1]
|
refresh_token = authorizedInstance[1]
|
||||||
jsonFile("config.json").add({
|
jsonFile(GLOBAL.configDirectory / "config.json").add({
|
||||||
"reddit_refresh_token":refresh_token
|
"reddit_refresh_token":refresh_token
|
||||||
})
|
})
|
||||||
return reddit
|
return reddit
|
||||||
@@ -123,7 +126,7 @@ def getPosts(args):
|
|||||||
if args["user"] == "me":
|
if args["user"] == "me":
|
||||||
args["user"] = str(reddit.user.me())
|
args["user"] = str(reddit.user.me())
|
||||||
|
|
||||||
print("\nGETTING POSTS\n.\n.\n.\n")
|
# print("\nGETTING POSTS\n.\n.\n.\n")
|
||||||
|
|
||||||
if not "search" in args:
|
if not "search" in args:
|
||||||
if args["sort"] == "top" or args["sort"] == "controversial":
|
if args["sort"] == "top" or args["sort"] == "controversial":
|
||||||
@@ -245,8 +248,6 @@ def getPosts(args):
|
|||||||
raise MultiredditNotFound
|
raise MultiredditNotFound
|
||||||
|
|
||||||
elif "submitted" in args:
|
elif "submitted" in args:
|
||||||
# TODO
|
|
||||||
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
|
|
||||||
print (
|
print (
|
||||||
"submitted posts of {user}\nsort: {sort}\n" \
|
"submitted posts of {user}\nsort: {sort}\n" \
|
||||||
"time: {time}\nlimit: {limit}\n".format(
|
"time: {time}\nlimit: {limit}\n".format(
|
||||||
@@ -263,8 +264,6 @@ def getPosts(args):
|
|||||||
)
|
)
|
||||||
|
|
||||||
elif "upvoted" in args:
|
elif "upvoted" in args:
|
||||||
# TODO
|
|
||||||
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
|
|
||||||
print (
|
print (
|
||||||
"upvoted posts of {user}\nlimit: {limit}\n".format(
|
"upvoted posts of {user}\nlimit: {limit}\n".format(
|
||||||
user=args["user"],
|
user=args["user"],
|
||||||
@@ -299,6 +298,8 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
gfycatCount = 0
|
gfycatCount = 0
|
||||||
global imgurCount
|
global imgurCount
|
||||||
imgurCount = 0
|
imgurCount = 0
|
||||||
|
global eromeCount
|
||||||
|
eromeCount = 0
|
||||||
global directCount
|
global directCount
|
||||||
directCount = 0
|
directCount = 0
|
||||||
global selfCount
|
global selfCount
|
||||||
@@ -306,6 +307,7 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
|
|
||||||
allPosts = {}
|
allPosts = {}
|
||||||
|
|
||||||
|
print("GETTING POSTS")
|
||||||
postsFile = createLogFile("POSTS")
|
postsFile = createLogFile("POSTS")
|
||||||
|
|
||||||
if SINGLE_POST:
|
if SINGLE_POST:
|
||||||
@@ -326,42 +328,56 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
if result is not None:
|
if result is not None:
|
||||||
details = result
|
details = result
|
||||||
orderCount += 1
|
orderCount += 1
|
||||||
printSubmission(submission,subCount,orderCount)
|
if GLOBAL.arguments.verbose:
|
||||||
|
printSubmission(submission,subCount,orderCount)
|
||||||
subList.append(details)
|
subList.append(details)
|
||||||
|
|
||||||
postsFile.add({subCount:[details]})
|
postsFile.add({subCount:[details]})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
for submission in posts:
|
try:
|
||||||
subCount += 1
|
for submission in posts:
|
||||||
|
subCount += 1
|
||||||
|
|
||||||
try:
|
if subCount % 100 == 0 and not GLOBAL.arguments.verbose:
|
||||||
details = {'postId':submission.id,
|
sys.stdout.write("• ")
|
||||||
'postTitle':submission.title,
|
sys.stdout.flush()
|
||||||
'postSubmitter':str(submission.author),
|
|
||||||
'postType':None,
|
|
||||||
'postURL':submission.url,
|
|
||||||
'postSubreddit':submission.subreddit.display_name}
|
|
||||||
except AttributeError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result = checkIfMatching(submission)
|
if subCount % 1000 == 0:
|
||||||
|
sys.stdout.write("\n")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
if result is not None:
|
try:
|
||||||
details = result
|
details = {'postId':submission.id,
|
||||||
orderCount += 1
|
'postTitle':submission.title,
|
||||||
printSubmission(submission,subCount,orderCount)
|
'postSubmitter':str(submission.author),
|
||||||
subList.append(details)
|
'postType':None,
|
||||||
|
'postURL':submission.url,
|
||||||
|
'postSubreddit':submission.subreddit.display_name}
|
||||||
|
except AttributeError:
|
||||||
|
continue
|
||||||
|
|
||||||
allPosts[subCount] = [details]
|
result = checkIfMatching(submission)
|
||||||
|
|
||||||
|
if result is not None:
|
||||||
|
details = result
|
||||||
|
orderCount += 1
|
||||||
|
if GLOBAL.arguments.verbose:
|
||||||
|
printSubmission(submission,subCount,orderCount)
|
||||||
|
subList.append(details)
|
||||||
|
|
||||||
|
allPosts[subCount] = [details]
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nKeyboardInterrupt",end="")
|
||||||
|
|
||||||
postsFile.add(allPosts)
|
postsFile.add(allPosts)
|
||||||
|
|
||||||
if not len(subList) == 0:
|
if not len(subList) == 0:
|
||||||
print(
|
print(
|
||||||
"\nTotal of {} submissions found!\n"\
|
f"\n\nTotal of {len(subList)} submissions found!\n"\
|
||||||
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
|
f"{gfycatCount} GFYCATs, {imgurCount} IMGURs, " \
|
||||||
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
|
f"{eromeCount} EROMEs, {directCount} DIRECTs " \
|
||||||
|
f"and {selfCount} SELF POSTS"
|
||||||
)
|
)
|
||||||
return subList
|
return subList
|
||||||
else:
|
else:
|
||||||
@@ -370,6 +386,7 @@ def redditSearcher(posts,SINGLE_POST=False):
|
|||||||
def checkIfMatching(submission):
|
def checkIfMatching(submission):
|
||||||
global gfycatCount
|
global gfycatCount
|
||||||
global imgurCount
|
global imgurCount
|
||||||
|
global eromeCount
|
||||||
global directCount
|
global directCount
|
||||||
global selfCount
|
global selfCount
|
||||||
|
|
||||||
@@ -383,22 +400,24 @@ def checkIfMatching(submission):
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if ('gfycat' in submission.domain) or \
|
if 'gfycat' in submission.domain:
|
||||||
('imgur' in submission.domain):
|
details['postType'] = 'gfycat'
|
||||||
|
gfycatCount += 1
|
||||||
|
return details
|
||||||
|
|
||||||
if 'gfycat' in submission.domain:
|
elif 'imgur' in submission.domain:
|
||||||
details['postType'] = 'gfycat'
|
details['postType'] = 'imgur'
|
||||||
gfycatCount += 1
|
imgurCount += 1
|
||||||
return details
|
return details
|
||||||
|
|
||||||
elif 'imgur' in submission.domain:
|
elif 'erome' in submission.domain:
|
||||||
details['postType'] = 'imgur'
|
details['postType'] = 'erome'
|
||||||
|
eromeCount += 1
|
||||||
imgurCount += 1
|
return details
|
||||||
return details
|
|
||||||
|
|
||||||
elif isDirectLink(submission.url):
|
elif isDirectLink(submission.url) is not False:
|
||||||
details['postType'] = 'direct'
|
details['postType'] = 'direct'
|
||||||
|
details['postURL'] = isDirectLink(submission.url)
|
||||||
directCount += 1
|
directCount += 1
|
||||||
return details
|
return details
|
||||||
|
|
||||||
@@ -435,7 +454,7 @@ def printSubmission(SUB,validNumber,totalNumber):
|
|||||||
|
|
||||||
def isDirectLink(URL):
|
def isDirectLink(URL):
|
||||||
"""Check if link is a direct image link.
|
"""Check if link is a direct image link.
|
||||||
If so, return True,
|
If so, return URL,
|
||||||
if not, return False
|
if not, return False
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -444,10 +463,13 @@ def isDirectLink(URL):
|
|||||||
URL = URL[:-1]
|
URL = URL[:-1]
|
||||||
|
|
||||||
if "i.reddituploads.com" in URL:
|
if "i.reddituploads.com" in URL:
|
||||||
return True
|
return URL
|
||||||
|
|
||||||
|
elif "v.redd.it" in URL:
|
||||||
|
return URL+"/DASH_600_K"
|
||||||
|
|
||||||
for extension in imageTypes:
|
for extension in imageTypes:
|
||||||
if extension in URL:
|
if extension in URL:
|
||||||
return True
|
return URL
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|||||||
22
src/tools.py
22
src/tools.py
@@ -14,6 +14,7 @@ class GLOBAL:
|
|||||||
config = None
|
config = None
|
||||||
arguments = None
|
arguments = None
|
||||||
directory = None
|
directory = None
|
||||||
|
configDirectory = Path.home() / "Bulk Downloader for Reddit"
|
||||||
reddit_client_id = "BSyphDdxYZAgVQ"
|
reddit_client_id = "BSyphDdxYZAgVQ"
|
||||||
reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk"
|
reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk"
|
||||||
printVanilla = print
|
printVanilla = print
|
||||||
@@ -74,8 +75,10 @@ def createLogFile(TITLE):
|
|||||||
put given arguments inside \"HEADER\" key
|
put given arguments inside \"HEADER\" key
|
||||||
"""
|
"""
|
||||||
|
|
||||||
folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
folderDirectory = GLOBAL.directory / "LOG_FILES" / \
|
||||||
time.localtime(GLOBAL.RUN_TIME)))
|
str(time.strftime(
|
||||||
|
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
|
||||||
|
))
|
||||||
logFilename = TITLE.upper()+'.json'
|
logFilename = TITLE.upper()+'.json'
|
||||||
|
|
||||||
if not path.exists(folderDirectory):
|
if not path.exists(folderDirectory):
|
||||||
@@ -94,16 +97,17 @@ def printToFile(*args, **kwargs):
|
|||||||
|
|
||||||
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
|
||||||
time.localtime(GLOBAL.RUN_TIME)))
|
time.localtime(GLOBAL.RUN_TIME)))
|
||||||
folderDirectory = GLOBAL.directory / TIME
|
folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
|
||||||
print(*args,**kwargs)
|
print(*args,**kwargs)
|
||||||
|
|
||||||
if not path.exists(folderDirectory):
|
if not path.exists(folderDirectory):
|
||||||
makedirs(folderDirectory)
|
makedirs(folderDirectory)
|
||||||
|
|
||||||
with io.open(
|
if not "file" in kwargs:
|
||||||
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
|
with io.open(
|
||||||
) as FILE:
|
folderDirectory / "CONSOLE_LOG.txt","a",encoding="utf-8"
|
||||||
print(*args, file=FILE, **kwargs)
|
) as FILE:
|
||||||
|
print(*args, file=FILE, **kwargs)
|
||||||
|
|
||||||
def nameCorrector(string):
|
def nameCorrector(string):
|
||||||
"""Swap strange characters from given string
|
"""Swap strange characters from given string
|
||||||
@@ -129,7 +133,7 @@ def nameCorrector(string):
|
|||||||
if len(string.split('\n')) > 1:
|
if len(string.split('\n')) > 1:
|
||||||
string = "".join(string.split('\n'))
|
string = "".join(string.split('\n'))
|
||||||
|
|
||||||
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.',]
|
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.','#']
|
||||||
|
|
||||||
if any(x in string for x in BAD_CHARS):
|
if any(x in string for x in BAD_CHARS):
|
||||||
for char in string:
|
for char in string:
|
||||||
|
|||||||
Reference in New Issue
Block a user