67 Commits

Author SHA1 Message Date
Ali Parlakci
a6997898ce Update version 2018-07-23 23:37:27 +03:00
Ali Parlakci
61632c7143 Improve error handling 2018-07-23 23:33:11 +03:00
Ali Parlakçı
9bff3399a8 Typo fix 2018-07-23 23:19:29 +03:00
Ali Parlakçı
b00d185f67 Update changelog 2018-07-23 23:18:10 +03:00
Ali Parlakçı
7314e17125 Added erome support 2018-07-23 23:16:56 +03:00
Ali Parlakci
2d334d56bf remove exclude mode 2018-07-23 22:57:54 +03:00
Ali Parlakçı
974517928f Update README.md 2018-07-23 22:07:28 +03:00
Ali Parlakçı
bcae177b1e Split download function 2018-07-23 22:06:33 +03:00
Ali Parlakci
229def6578 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-23 18:49:39 +03:00
Ali Parlakci
59b0376d6e Added directions for frontpage 2018-07-23 18:49:28 +03:00
Ali Parlakçı
cf1dc7d08c Rename changelog section 2018-07-22 18:16:11 +03:00
Ali Parlakci
27532408c1 Update version 2018-07-22 18:06:46 +03:00
Ali Parlakci
32647beee9 Update changelog 2018-07-22 18:06:24 +03:00
Ali Parlakci
a67da461d2 Fixed the bug that makes multireddit mode unusable 2018-07-22 18:05:20 +03:00
Ali Parlakci
8c6f593496 Update changelog 2018-07-22 17:39:30 +03:00
Ali Parlakci
b60ce8a71e Put log files in a folder 2018-07-22 17:38:35 +03:00
Ali Parlakci
49920cc457 Bug fix 2018-07-22 17:25:30 +03:00
Ali Parlakci
c70e7c2ebb Update version 2018-07-22 14:39:09 +03:00
Ali Parlakci
3931dfff54 Update links 2018-07-21 22:03:16 +03:00
Ali Parlakci
4a8c2377f9 Updated --help page 2018-07-21 21:55:01 +03:00
Ali Parlakci
8a18a42a9a Updated changelog 2018-07-21 21:54:23 +03:00
Ali Parlakçı
6c2d748fbc Exclude post types (#38)
* Added argument for excluded links

* Added exclude in PromptUser()

* Added functionality for exclude and bug fix
2018-07-21 21:52:28 +03:00
Ali Parlakci
8c966df105 Improved traceback 2018-07-21 21:50:54 +03:00
Ali Parlakci
2adf2c0451 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-20 13:34:51 +03:00
Ali Parlakci
3e3a2df4d1 Bug fix at direct links 2018-07-20 13:34:23 +03:00
Ali Parlakci
7548a01019 Bug fix at direct links 2018-07-20 13:33:50 +03:00
Ali Parlakci
2ab16608d5 Update links 2018-07-20 13:06:01 +03:00
Ali Parlakci
e15f33b97a Fix README 2018-07-20 13:04:47 +03:00
Ali Parlakci
27211f993c 0 input for no limit 2018-07-20 13:03:50 +03:00
Ali Parlakci
87d3b294f7 0 input for no limit 2018-07-20 13:01:39 +03:00
Ali Parlakci
8128378dcd 0 input for no limit 2018-07-20 13:01:21 +03:00
Ali Parlakçı
cc93aa3012 Update commit link 2018-07-19 15:47:28 +03:00
Ali Parlakci
50c4a8d6d7 Update version 2018-07-19 15:38:49 +03:00
Ali Parlakci
5737904a54 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-19 15:35:46 +03:00
Ali Parlakci
f6eba6c5b0 Added more gfycat links 2018-07-19 15:34:58 +03:00
Ali Parlakci
41cbb58db3 Added more gfycat links 2018-07-19 15:22:12 +03:00
Ali Parlakçı
c569124406 Update Changelog 2018-07-19 14:58:17 +03:00
Ali Parlakçı
1a3836a8e1 Added v.redd.it support (#36) 2018-07-19 14:57:16 +03:00
Ali Parlakci
fde6a1fac4 Added custom exception descriptions to FAILED.json file 2018-07-19 14:56:00 +03:00
Ali Parlakci
6bba2c4dbb Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-18 09:17:48 +03:00
Ali Parlakci
a078d44236 Edited FAQ 2018-07-18 09:17:36 +03:00
Ali Parlakçı
deae0be769 Delete _config.yml 2018-07-15 10:54:40 +03:00
Ali Parlakci
3cf0203e6b Typo fix 2018-07-13 14:39:01 +03:00
Ali Parlakci
0b31db0e2e Update FAQ 2018-07-13 14:37:35 +03:00
Ali Parlakci
d3f2b1b08e Update executables' names 2018-07-13 14:34:20 +03:00
Ali Parlakci
0ec4bb3008 Update version 2018-07-13 14:18:18 +03:00
Ali Parlakci
0dbe2ed917 Update changelog 2018-07-13 14:13:39 +03:00
Ali Parlakci
9f831e1b78 Added .exe to executable's extension 2018-07-13 14:12:17 +03:00
Ali Parlakci
59012077e1 Excludes build folders 2018-07-13 14:11:41 +03:00
Ali Parlakci
5e3c79160b Changed config.json path 2018-07-13 14:10:21 +03:00
Ali Parlakci
1e8eaa1a8d Update changelog 2018-07-12 23:05:13 +03:00
Ali Parlakci
7dbc83fdce Initial commit 2018-07-12 23:03:00 +03:00
Ali Parlakci
50a77f6ba5 Update version 2018-07-12 22:19:46 +03:00
Ali Parlakci
4f7e406cd6 Take multiple subreddits 2018-07-12 22:00:43 +03:00
Ali Parlakci
ded3cece8c Update changelog 2018-07-12 21:16:20 +03:00
Ali Parlakci
dd671fd738 Accept exit to exit the program when taking arguments 2018-07-12 21:09:31 +03:00
Ali Parlakci
b357dff52c Added more examples 2018-07-12 14:31:39 +03:00
Ali Parlakci
32ffd3b861 Added dependency installation 2018-07-12 14:27:16 +03:00
Ali Parlakci
02673c3950 Update changelog 2018-07-12 14:15:26 +03:00
Ali Parlakci
8448e47080 Wait on KeyboardInterrupt 2018-07-12 14:14:54 +03:00
Ali Parlakci
39f2c73f4c Another option to open terminal added 2018-07-12 13:35:45 +03:00
Ali Parlakci
fe942b4734 Added linux executable guide 2018-07-12 13:32:59 +03:00
Ali Parlakci
205617e051 Changed quit() to sys.exit() 2018-07-12 13:00:02 +03:00
Ali Parlakci
b93b206a96 Typo fix 2018-07-12 12:31:32 +03:00
Ali Parlakci
b84684f786 Check if installed 2018-07-12 12:30:11 +03:00
Ali Parlakci
68558950ca Broken links fixed 2018-07-12 12:26:46 +03:00
aliparlakci
795965f754 Readme refactor (#35)
* Shorten the README.md file

* Added more information and guides

* Typo fix

* Rename sections
2018-07-12 12:25:09 +03:00
11 changed files with 513 additions and 222 deletions

7
.gitignore vendored
View File

@@ -1,4 +1,5 @@
build/
dist/
MANIFEST
__pycache__/ __pycache__/
src/__pycache__/ src/__pycache__/
logs/
*.json

View File

@@ -3,20 +3,29 @@ This program downloads imgur, gfycat and direct image and video links of saved p
**PLEASE** post any issue you have with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback. **PLEASE** post any issue you have with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback.
## What can it do? ## What it can do
### It... - Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
- can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links - Sorts posts by hot, top, new and so on
- sorts posts by hot, top, new and so on - Downloads imgur albums, gfycat links, [self posts](#how-do-i-open-self-post-files) and any link to a direct image
- downloads imgur albums, gfycat links, [self posts](#i-cant-open-the-self-posts) and any link to a direct image - Skips the existing ones
- skips the existing ones - Puts post titles to file's name
- puts post titles to file's name - Puts every post to its subreddit's folder
- puts every post to its subreddit's folder - Saves a reusable copy of posts' details that are found so that they can be re-downloaded again
- saves a reusable copy of posts' details that are found so that they can be re-downloaded again - Logs failed ones in a file to so that you can try to download them later
- logs failed ones in a file to so that you can try to download them later - Can run with double-clicking on Windows
- can be run with double-clicking on Windows (but I don't recommend it)
## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest) ## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
## How it works
- For **Windows** and **Linux** users, there are executable files to run easily without installing a third party program. But if you are a paranoid like me, you can **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
- In Windows, double click on bulk-downloader-for-reddit file
- In Linux, extract files to a folder and open terminal inside it. Type **`./bulk-downloader-for-reddit`**
- **MacOS** users have to **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
Script also accepts **command-line arguments**, get further information from **[`--help`](docs/COMMAND_LINE_ARGUMENTS.md)**
## Setting up the script ## Setting up the script
Because this is not a commercial app, you need to create an imgur developer app in order API to work. Because this is not a commercial app, you need to create an imgur developer app in order API to work.
@@ -32,20 +41,49 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required* \* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
## Running the script
For **Windows** users, there is an *EXE* file to run easily.
**Linux** and **MacOS** users have to install Python 3 and run it from the *source code* through terminal.
To get further information about that and **using command-line arguments to run the script**, see **[`python script.py --help`](docs/help_page.md)**
## FAQ ## FAQ
### I can't open the self post files. ### How do I open self post files?
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings. - Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
However, there is a great Chrome extension [here](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with Chrome. However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
### How can I change my credentials?
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
them, there.
## Changes on *master*
### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
- Split download() function
- Added erome support
- Remove exclude feature
- Bug fix
### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
- Put log files in a folder named "LOG_FILES"
- Fixed the bug that makes multireddit mode unusable
### [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
- Added exclude mode
### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/commit/7548a010198fb693841ca03654d2c9bdf5742139)
- "0" input for no limit
- Fixed the bug that recognizes none image direct links as image links
### [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
- Added v.redd.it support
- Added custom exception descriptions to FAILED.json file
- Fixed the bug that prevents downloading some gfycat URLs
### [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
- Change config.json file's path to home directory
### [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
- Added binaries for Windows and Linux
- Wait on KeyboardInterrupt
- Accept multiple subreddit input
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
## Changelog
### [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b) ### [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
- Improvements on UX and UI - Improvements on UX and UI
- Added logging errors to CONSOLE_LOG.txt - Added logging errors to CONSOLE_LOG.txt

View File

@@ -1,5 +0,0 @@
theme: jekyll-theme-minimal
show_downloads: false
#title: Bulk Downloader for Reddit
description: Code written by Ali PARLAKCI
google_analytics: UA-80780721-3

View File

@@ -1,6 +1,10 @@
## python script.py --help # Using command-line arguments
See **[compiling from source](COMPILE_FROM_SOURCE.md)** page first unless you are using an executable file. If you are using an executable file, see [using terminal](COMPILE_FROM_SOURCE.md#using-terminal) and come back.
***Use*** `.\bulk-downloader-for-reddit.exe` ***or*** `./bulk-downloader-for-reddit` ***if you are using the executable***.
```console ```console
$ python script.py --help
usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved] usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved]
[--submitted] [--upvoted] [--log LOG FILE] [--submitted] [--upvoted] [--log LOG FILE]
[--subreddit SUBREDDIT [SUBREDDIT ...]] [--subreddit SUBREDDIT [SUBREDDIT ...]]
@@ -19,7 +23,8 @@ optional arguments:
--saved Triggers saved mode --saved Triggers saved mode
--submitted Gets posts of --user --submitted Gets posts of --user
--upvoted Gets upvoted posts of --user --upvoted Gets upvoted posts of --user
--log LOG FILE Triggers log read mode and takes a log file --log LOG FILE Takes a log file which created by itself (json files),
reads posts and tries downloading them again.
--subreddit SUBREDDIT [SUBREDDIT ...] --subreddit SUBREDDIT [SUBREDDIT ...]
Triggers subreddit mode and takes subreddit's name Triggers subreddit mode and takes subreddit's name
without r/. use "frontpage" for frontpage without r/. use "frontpage" for frontpage
@@ -37,7 +42,7 @@ optional arguments:
downloading later downloading later
``` ```
## Examples # Examples
- **Use `python3` instead of `python` if you are using *MacOS* or *Linux*** - **Use `python3` instead of `python` if you are using *MacOS* or *Linux***
@@ -46,40 +51,49 @@ python script.py
``` ```
```console ```console
python script.py .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month" .\bulk-downloader-for-reddit.exe
``` ```
```console ```console
python script.py .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/" python script.py
``` ```
```console ```console
python script.py .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload .\bulk-downloader-for-reddit.exe -- directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
``` ```
```console ```console
python script.py .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10 ./bulk-downloader-for-reddit --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
``` ```
```console ```console
python script.py .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250 python script.py --directory .\\NEW_FOLDER --sort new --time all --limit 10 --link "https://www.reddit.com/r/gifs/search?q=dogs&restrict_sr=on&type=link&sort=new&t=month"
``` ```
```console ```console
python script.py .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000 python script.py --directory .\\NEW_FOLDER --link "https://www.reddit.com/r/learnprogramming/comments/7mjw12/"
``` ```
```console ```console
python script.py C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json python script.py --directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
``` ```
## FAQ ```console
### I can't startup the script no matter what. python script.py --directory .\\NEW_FOLDER --user [USER_NAME] --submitted --limit 10
- Try these: ```
- **`python`**
- **`python3`** ```console
- **`python3.7`** python script.py --directory .\\NEW_FOLDER --multireddit good_subs --user [USER_NAME] --sort top --time week --limit 250
- **`python3.6`** ```
- **`py -3`**
```console
Python have real issues about naming their program python script.py --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
```
```console
python script.py --directory C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json
```
# FAQ
## I can't startup the script no matter what.
See **[finding the correct keyword for Python](COMPILE_FROM_SOURCE.md#finding-the-correct-keyword-for-python)**

View File

@@ -0,0 +1,42 @@
# Compiling from source code
## Requirements
### Python 3 Interpreter
Latest* version of **Python 3** is needed. See if it is already installed [here](#finding-the-correct-keyword-for-python). If not, download the matching release for your platform [here](https://www.python.org/downloads/) and install it. If you are a *Windows* user, selecting **Add Python 3 to PATH** option is mandatory.
\* *Use Python 3.6.5 if you encounter an issue*
## Using terminal
### To open it...
- **On Windows 8/8.1/10**: Press the File tab on **Windows Explorer**, click on **Open Windows PowerShell** or **Open Windows Command Prompt** or look for *Command Prompt* or *PowerShell* in *Start Menu*.
- **On Windows 7**: Press **WindowsKey+R**, type **cmd** and hit Enter or look for *Command Prompt* or *PowerShell* in *Start Menu*.
- **On Linux**: Right-click in a folder and select **Open Terminal** or press **Ctrl+Alt+T** or look for **Terminal** in the programs.
- **On MacOS**: Look for an app called **Terminal**.
### Navigating to the directory where script is downloaded
Go inside the folder where script.py is located. If you are not familiar with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything)
## Finding the correct keyword for Python
Enter these lines to terminal window until it prints out the version you have downloaded and installed:
- `python --version`
- `python3 --version`
- `python3.7 --version`
- `python3.6 --version`
- `py --version`
- `py -3 --version`
- `py -3.6 --version`
- `py -3.7 --version`
Once it does, your keyword is without the `--version` part.
## Installing dependencies
Enter the line below to terminal window when you are in the directory where script.py is, use your keyword for Python:
```console
python -m pip install -r requirements.txt
```
---
Now, you can go to [Using command-line arguments](COMMAND_LINE_ARGUMENTS.md)

282
script.py
View File

@@ -13,7 +13,7 @@ import time
from io import StringIO from io import StringIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur, Self from src.downloader import Direct, Gfycat, Imgur, Self, Erome
from src.errors import * from src.errors import *
from src.parser import LinkDesigner from src.parser import LinkDesigner
from src.searcher import getPosts from src.searcher import getPosts
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
__author__ = "Ali Parlakci" __author__ = "Ali Parlakci"
__license__ = "GPL" __license__ = "GPL"
__version__ = "1.1.0" __version__ = "1.4.0"
__maintainer__ = "Ali Parlakci" __maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com" __email__ = "parlakciali@gmail.com"
@@ -143,6 +143,7 @@ def parseArguments(arguments=[]):
" for downloading later", " for downloading later",
action="store_true", action="store_true",
default=False) default=False)
if arguments == []: if arguments == []:
return parser.parse_args() return parser.parse_args()
@@ -159,7 +160,10 @@ def checkConflicts():
else: else:
user = 1 user = 1
modes = ["saved","subreddit","submitted","search","log","link","upvoted"] modes = [
"saved","subreddit","submitted","search","log","link","upvoted",
"multireddit"
]
values = { values = {
x: 0 if getattr(GLOBAL.arguments,x) is None or \ x: 0 if getattr(GLOBAL.arguments,x) is None or \
@@ -194,12 +198,12 @@ class PromptUser:
)) ))
print(" "*4+"[0] exit\n") print(" "*4+"[0] exit\n")
choice = input("> ") choice = input("> ")
while not choice.lower() in choices+choicesByIndex: while not choice.lower() in choices+choicesByIndex+["exit"]:
print("Invalid input\n") print("Invalid input\n")
programModeIndex = input("> ") programModeIndex = input("> ")
if choice == "0": if choice == "0" or choice == "exit":
quit() sys.exit()
elif choice in choicesByIndex: elif choice in choicesByIndex:
return choices[int(choice)-1] return choices[int(choice)-1]
else: else:
@@ -232,10 +236,21 @@ class PromptUser:
GLOBAL.arguments.time = timeFilter GLOBAL.arguments.time = timeFilter
if programMode == "subreddit": if programMode == "subreddit":
GLOBAL.arguments.subreddit = input("\nsubreddit: ")
subredditInput = input("subreddit (enter frontpage for frontpage): ")
GLOBAL.arguments.subreddit = subredditInput
while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
subredditInput = input("subreddit: ")
GLOBAL.arguments.subreddit += "+" + subredditInput
if " " in GLOBAL.arguments.subreddit: if " " in GLOBAL.arguments.subreddit:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split()) GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END
if not subredditInput.lower() == "frontpage":
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
print("\nselect sort type:") print("\nselect sort type:")
sortTypes = [ sortTypes = [
"hot","top","new","rising","controversial" "hot","top","new","rising","controversial"
@@ -255,7 +270,7 @@ class PromptUser:
elif programMode == "multireddit": elif programMode == "multireddit":
GLOBAL.arguments.user = input("\nredditor: ") GLOBAL.arguments.user = input("\nredditor: ")
GLOBAL.arguments.subreddit = input("\nmultireddit: ") GLOBAL.arguments.multireddit = input("\nmultireddit: ")
print("\nselect sort type:") print("\nselect sort type:")
sortTypes = [ sortTypes = [
@@ -307,10 +322,11 @@ class PromptUser:
GLOBAL.arguments.log = input("\nlog file directory:") GLOBAL.arguments.log = input("\nlog file directory:")
if Path(GLOBAL.arguments.log ).is_file(): if Path(GLOBAL.arguments.log ).is_file():
break break
while True: while True:
try: try:
GLOBAL.arguments.limit = int(input("\nlimit: ")) GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
if GLOBAL.arguments.limit == 0:
GLOBAL.arguments.limit = None
break break
except ValueError: except ValueError:
pass pass
@@ -365,6 +381,9 @@ def prepareAttributes():
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
elif GLOBAL.arguments.multireddit is not None:
ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
elif GLOBAL.arguments.saved is True: elif GLOBAL.arguments.saved is True:
ATTRIBUTES["saved"] = True ATTRIBUTES["saved"] = True
@@ -389,7 +408,7 @@ def postFromLog(fileName):
content = jsonFile(fileName).read() content = jsonFile(fileName).read()
else: else:
print("File not found") print("File not found")
quit() sys.exit()
try: try:
del content["HEADER"] del content["HEADER"]
@@ -422,16 +441,76 @@ def postExists(POST):
else: else:
return False return False
def downloadPost(SUBMISSION):
directory = GLOBAL.directory / SUBMISSION['postSubreddit']
global lastRequestTime
downloaders = {
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
}
if SUBMISSION['postType'] in downloaders:
print(SUBMISSION['postType'].upper())
if SUBMISSION['postType'] == "imgur":
if int(time.time() - lastRequestTime) <= 2:
pass
credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds")
print(
"Client: {} - User: {} - Reset {}".format(
credit['ClientRemaining'],
credit['UserRemaining'],
USER_RESET
)
)
if not (credit['UserRemaining'] == 0 or \
credit['ClientRemaining'] == 0):
"""This block of code is needed
"""
if int(time.time() - lastRequestTime) <= 2:
pass
lastRequestTime = time.time()
else:
if credit['UserRemaining'] == 0:
KEYWORD = "user"
elif credit['ClientRemaining'] == 0:
KEYWORD = "client"
raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
else:
raise NoSuitablePost
return None
def download(submissions): def download(submissions):
"""Analyze list of submissions and call the right function """Analyze list of submissions and call the right function
to download each one, catch errors, update the log files to download each one, catch errors, update the log files
""" """
subsLenght = len(submissions) subsLenght = len(submissions)
global lastRequestTime
lastRequestTime = 0 lastRequestTime = 0
downloadedCount = subsLenght downloadedCount = subsLenght
duplicates = 0 duplicates = 0
BACKUP = {}
FAILED_FILE = createLogFile("FAILED") FAILED_FILE = createLogFile("FAILED")
@@ -445,131 +524,45 @@ def download(submissions):
) )
if postExists(submissions[i]): if postExists(submissions[i]):
result = False
print(submissions[i]['postType'].upper()) print(submissions[i]['postType'].upper())
print("It already exists") print("It already exists")
duplicates += 1 duplicates += 1
downloadedCount -= 1 downloadedCount -= 1
continue continue
directory = GLOBAL.directory / submissions[i]['postSubreddit'] try:
downloadPost(submissions[i])
if submissions[i]['postType'] == 'imgur':
print("IMGUR",end="")
while int(time.time() - lastRequestTime) <= 2:
pass
credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds")
print(
" => Client: {} - User: {} - Reset {}".format(
credit['ClientRemaining'],
credit['UserRemaining'],
USER_RESET
)
)
if not (credit['UserRemaining'] == 0 or \
credit['ClientRemaining'] == 0):
"""This block of code is needed
"""
while int(time.time() - lastRequestTime) <= 2:
pass
lastRequestTime = time.time()
try:
Imgur(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except ImgurLoginError:
print(
"Imgur login failed. Quitting the program "\
"as unexpected errors might occur."
)
quit()
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
if credit['UserRemaining'] == 0:
KEYWORD = "user"
elif credit['ClientRemaining'] == 0:
KEYWORD = "client"
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
FAILED_FILE.add(
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
submissions[i]]}
)
downloadedCount -= 1
elif submissions[i]['postType'] == 'gfycat':
print("GFYCAT")
try:
Gfycat(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except NotADownloadableLinkError as exception:
print("Could not read the page source")
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'direct':
print("DIRECT")
try:
Direct(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'self': except FileAlreadyExistsError:
print("SELF") print("It already exists")
try: duplicates += 1
Self(directory,submissions[i]) downloadedCount -= 1
except FileAlreadyExistsError: except ImgurLoginError:
print("It already exists") print(
downloadedCount -= 1 "Imgur login failed. \nQuitting the program "\
duplicates += 1 "as unexpected errors might occur."
)
sys.exit()
except Exception as exception: except ImgurLimitError as exception:
print(exception) FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) downloadedCount -= 1
downloadedCount -= 1
else: except NotADownloadableLinkError as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
except NoSuitablePost:
print("No match found, skipping...") print("No match found, skipping...")
downloadedCount -= 1 downloadedCount -= 1
except Exception as exception:
# raise exception
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
if duplicates: if duplicates:
print("\n There was {} duplicates".format(duplicates)) print("\n There was {} duplicates".format(duplicates))
@@ -596,46 +589,47 @@ def main():
PromptUser() PromptUser()
except Exception as err: except Exception as err:
print(err) print(err)
quit() sys.exit()
GLOBAL.config = getConfig("config.json")
if not Path(GLOBAL.configDirectory).is_dir():
os.makedirs(GLOBAL.configDirectory)
GLOBAL.config = getConfig(GLOBAL.configDirectory / "config.json")
if GLOBAL.arguments.log is not None: if GLOBAL.arguments.log is not None:
logDir = Path(GLOBAL.arguments.log) logDir = Path(GLOBAL.arguments.log)
download(postFromLog(logDir)) download(postFromLog(logDir))
quit() sys.exit()
try: try:
POSTS = getPosts(prepareAttributes()) POSTS = getPosts(prepareAttributes())
except InsufficientPermission: except InsufficientPermission:
print("You do not have permission to do that") print("You do not have permission to do that")
quit() sys.exit()
except NoMatchingSubmissionFound: except NoMatchingSubmissionFound:
print("No matching submission was found") print("No matching submission was found")
quit() sys.exit()
except NoRedditSupoort: except NoRedditSupoort:
print("Reddit does not support that") print("Reddit does not support that")
quit() sys.exit()
except NoPrawSupport: except NoPrawSupport:
print("PRAW does not support that") print("PRAW does not support that")
quit() sys.exit()
except MultiredditNotFound: except MultiredditNotFound:
print("Multireddit not found") print("Multireddit not found")
quit() sys.exit()
except InvalidSortingType: except InvalidSortingType:
print("Invalid sorting type has given") print("Invalid sorting type has given")
quit() sys.exit()
except InvalidRedditLink: except InvalidRedditLink:
print("Invalid reddit link") print("Invalid reddit link")
quit() sys.exit()
if POSTS is None: if POSTS is None:
print("I could not find any posts in that URL") print("I could not find any posts in that URL")
quit() sys.exit()
if GLOBAL.arguments.NoDownload: if GLOBAL.arguments.NoDownload:
quit() sys.exit()
else: else:
download(POSTS) download(POSTS)
@@ -650,13 +644,17 @@ if __name__ == "__main__":
print = printToFile print = printToFile
GLOBAL.RUN_TIME = time.time() GLOBAL.RUN_TIME = time.time()
main() main()
except KeyboardInterrupt: except KeyboardInterrupt:
if GLOBAL.directory is None: if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\") GLOBAL.directory = Path(".\\")
print("\nQUITTING...") print("\nQUITTING...")
quit()
except Exception as exception: except Exception as exception:
logging.error("Runtime error!", exc_info=full_exc_info(sys.exc_info())) if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\")
logging.error(sys.exc_info()[0].__name__,
exc_info=full_exc_info(sys.exc_info()))
print(log_stream.getvalue()) print(log_stream.getvalue())
input("Press enter to quit\n") input("\nPress enter to quit\n")

50
setup.py Normal file
View File

@@ -0,0 +1,50 @@
#!C:\Users\Ali\AppData\Local\Programs\Python\Python36\python.exe
## python setup.py build
import sys
from cx_Freeze import setup, Executable
from script import __version__
options = {
"build_exe": {
"packages":[
"idna","imgurpython", "praw", "requests"
]
}
}
if sys.platform == "win32":
executables = [Executable(
"script.py",
targetName="bulk-downloader-for-reddit.exe",
shortcutName="Bulk Downloader for Reddit",
shortcutDir="DesktopFolder"
)]
elif sys.platform == "linux":
executables = [Executable(
"script.py",
targetName="bulk-downloader-for-reddit",
shortcutName="Bulk Downloader for Reddit",
shortcutDir="DesktopFolder"
)]
setup(
name = "Bulk Downloader for Reddit",
version = __version__,
description = "Bulk Downloader for Reddit",
author = "Ali Parlakci",
author_email="parlakciali@gmail.com",
url="https://github.com/aliparlakci/bulk-downloader-for-reddit",
classifiers=(
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)"
"Natural Language :: English",
"Environment :: Console",
"Operating System :: OS Independent",
),
executables = executables,
options = options
)

View File

@@ -2,7 +2,9 @@ import io
import os import os
import sys import sys
import urllib.request import urllib.request
from html.parser import HTMLParser
from pathlib import Path from pathlib import Path
from urllib.error import HTTPError
import imgurpython import imgurpython
from multiprocessing import Queue from multiprocessing import Queue
@@ -36,7 +38,10 @@ def getExtension(link):
if TYPE in parsed: if TYPE in parsed:
return "."+parsed[-1] return "."+parsed[-1]
else: else:
return '.jpg' if not "v.redd.it" in link:
return '.jpg'
else:
return '.mp4'
def getFile(fileDir,tempDir,imageURL,indent=0): def getFile(fileDir,tempDir,imageURL,indent=0):
"""Downloads given file to given directory. """Downloads given file to given directory.
@@ -66,6 +71,129 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
else: else:
raise FileAlreadyExistsError raise FileAlreadyExistsError
class Erome:
def __init__(self,directory,post):
try:
IMAGES = self.getLinks(post['postURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
imagesLenght = len(IMAGES)
howManyDownloaded = imagesLenght
duplicates = 0
if imagesLenght == 1:
extension = getExtension(IMAGES[0])
title = nameCorrector(post['postTitle'])
print(title+"_" +post['postId']+extension)
fileDir = title + "_" + post['postId'] + extension
fileDir = directory / fileDir
tempDir = title + "_" + post['postId'] + '.tmp'
tempDir = directory / tempDir
imageURL = "https:" + IMAGES[0]
try:
getFile(fileDir,tempDir,imageURL)
except FileNameTooLong:
fileDir = directory / (post['postId'] + extension)
tempDir = directory / (post['postId'] + '.tmp')
getFile(fileDir,tempDir,imageURL)
else:
title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId'],end="\n\n")
folderDir = directory / (title+"_"+post['postId'])
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
os.makedirs(folderDir)
for i in range(imagesLenght):
extension = getExtension(IMAGES[i])
fileName = str(i+1)
imageURL = "https:" + IMAGES[i]
fileDir = folderDir / (fileName + extension)
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+extension))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
howManyDownloaded -= 1
except Exception as exception:
raise exception
print("\n Could not get the file")
print(" " + str(exception) + "\n")
exceptionType = exception
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
def getLinks(self,url,lineNumber=129):
content = []
lineNumber = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
for i in range(len(pageSource)):
obj = EromeParser()
obj.feed(pageSource[i])
tag = obj.tag
if tag is not None:
if "div" in tag:
if "id" in tag["div"]:
if tag["div"]["id"] == "album":
lineNumber = i
break
for line in pageSource[lineNumber:]:
obj = EromeParser()
obj.feed(line)
tag = obj.tag
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"]=="img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [
link for link in content \
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
]
class Imgur: class Imgur:
def __init__(self,directory,post): def __init__(self,directory,post):
self.imgurClient = self.initImgur() self.imgurClient = self.initImgur()
@@ -168,8 +296,10 @@ class Imgur:
if duplicates == imagesLenght: if duplicates == imagesLenght:
raise FileAlreadyExistsError raise FileAlreadyExistsError
elif howManyDownloaded < imagesLenght: elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
@staticmethod @staticmethod
def initImgur(): def initImgur():
@@ -217,9 +347,9 @@ class Gfycat:
try: try:
POST['mediaURL'] = self.getLink(POST['postURL']) POST['mediaURL'] = self.getLink(POST['postURL'])
except IndexError: except IndexError:
raise NotADownloadableLinkError raise NotADownloadableLinkError("Could not read the page source")
except Exception as exception: except Exception as exception:
raise NotADownloadableLinkError raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL']) POST['postExt'] = getExtension(POST['mediaURL'])
@@ -248,8 +378,7 @@ class Gfycat:
if url[-1:] == '/': if url[-1:] == '/':
url = url[:-1] url = url[:-1]
if 'gifs' in url: url = "https://gfycat.com/" + url.split('/')[-1]
url = "https://gfycat.com/" + url.split('/')[-1]
pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
@@ -266,7 +395,7 @@ class Gfycat:
break break
if "".join(link) == "": if "".join(link) == "":
raise NotADownloadableLinkError raise NotADownloadableLinkError("Could not read the page source")
return "".join(link) return "".join(link)

View File

@@ -80,4 +80,10 @@ class InvalidSortingType(Exception):
pass pass
class FileNotFoundError(Exception): class FileNotFoundError(Exception):
pass
class NoSuitablePost(Exception):
pass
class ImgurLimitError(Exception):
pass pass

View File

@@ -89,7 +89,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes) authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0] reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1] refresh_token = authorizedInstance[1]
jsonFile("config.json").add({ jsonFile(GLOBAL.configDirectory / "config.json").add({
"reddit_refresh_token":refresh_token "reddit_refresh_token":refresh_token
}) })
else: else:
@@ -98,7 +98,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes) authorizedInstance = GetAuth(reddit,port).getRefreshToken(*scopes)
reddit = authorizedInstance[0] reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1] refresh_token = authorizedInstance[1]
jsonFile("config.json").add({ jsonFile(GLOBAL.configDirectory / "config.json").add({
"reddit_refresh_token":refresh_token "reddit_refresh_token":refresh_token
}) })
return reddit return reddit
@@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
gfycatCount = 0 gfycatCount = 0
global imgurCount global imgurCount
imgurCount = 0 imgurCount = 0
global eromeCount
eromeCount = 0
global directCount global directCount
directCount = 0 directCount = 0
global selfCount global selfCount
@@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
if not len(subList) == 0: if not len(subList) == 0:
print( print(
"\nTotal of {} submissions found!\n"\ "\nTotal of {} submissions found!\n"\
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n" "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount) .format(
len(subList),
gfycatCount,
imgurCount,
eromeCount,
directCount,
selfCount
)
) )
return subList return subList
else: else:
@@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
def checkIfMatching(submission): def checkIfMatching(submission):
global gfycatCount global gfycatCount
global imgurCount global imgurCount
global eromeCount
global directCount global directCount
global selfCount global selfCount
@@ -383,22 +393,24 @@ def checkIfMatching(submission):
except AttributeError: except AttributeError:
return None return None
if ('gfycat' in submission.domain) or \ if 'gfycat' in submission.domain:
('imgur' in submission.domain): details['postType'] = 'gfycat'
gfycatCount += 1
return details
if 'gfycat' in submission.domain: elif 'imgur' in submission.domain:
details['postType'] = 'gfycat' details['postType'] = 'imgur'
gfycatCount += 1 imgurCount += 1
return details return details
elif 'imgur' in submission.domain: elif 'erome' in submission.domain:
details['postType'] = 'imgur' details['postType'] = 'erome'
eromeCount += 1
imgurCount += 1 return details
return details
elif isDirectLink(submission.url): elif isDirectLink(submission.url) is not False:
details['postType'] = 'direct' details['postType'] = 'direct'
details['postURL'] = isDirectLink(submission.url)
directCount += 1 directCount += 1
return details return details
@@ -435,7 +447,7 @@ def printSubmission(SUB,validNumber,totalNumber):
def isDirectLink(URL): def isDirectLink(URL):
"""Check if link is a direct image link. """Check if link is a direct image link.
If so, return True, If so, return URL,
if not, return False if not, return False
""" """
@@ -444,10 +456,13 @@ def isDirectLink(URL):
URL = URL[:-1] URL = URL[:-1]
if "i.reddituploads.com" in URL: if "i.reddituploads.com" in URL:
return True return URL
elif "v.redd.it" in URL:
return URL+"/DASH_600_K"
for extension in imageTypes: for extension in imageTypes:
if extension in URL: if extension in URL:
return True return URL
else: else:
return False return False

View File

@@ -14,6 +14,7 @@ class GLOBAL:
config = None config = None
arguments = None arguments = None
directory = None directory = None
configDirectory = Path.home() / "Bulk Downloader for Reddit"
reddit_client_id = "BSyphDdxYZAgVQ" reddit_client_id = "BSyphDdxYZAgVQ"
reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk" reddit_client_secret = "bfqNJaRh8NMh-9eAr-t4TRz-Blk"
printVanilla = print printVanilla = print
@@ -74,8 +75,10 @@ def createLogFile(TITLE):
put given arguments inside \"HEADER\" key put given arguments inside \"HEADER\" key
""" """
folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S", folderDirectory = GLOBAL.directory / "LOG_FILES" / \
time.localtime(GLOBAL.RUN_TIME))) str(time.strftime(
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
))
logFilename = TITLE.upper()+'.json' logFilename = TITLE.upper()+'.json'
if not path.exists(folderDirectory): if not path.exists(folderDirectory):
@@ -94,7 +97,7 @@ def printToFile(*args, **kwargs):
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S", TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
time.localtime(GLOBAL.RUN_TIME))) time.localtime(GLOBAL.RUN_TIME)))
folderDirectory = GLOBAL.directory / TIME folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
print(*args,**kwargs) print(*args,**kwargs)
if not path.exists(folderDirectory): if not path.exists(folderDirectory):