57 Commits

Author SHA1 Message Date
Ali Parlakci
280147282b 27 jan update 2019-01-27 16:06:31 +03:00
Ali Parlakci
b7baf07fb5 Added instructions 2019-01-27 15:59:24 +03:00
Ali Parlakci
aece2273fb Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-28 16:28:29 +03:00
Ali Parlakci
f807efe4d5 Ignore space at the end of directory 2018-08-28 16:27:29 +03:00
Ali Parlakci
743d887927 Ignore space at the end of directory 2018-08-28 16:24:14 +03:00
Ali Parlakci
da5492858c Add bs4 2018-08-28 16:15:22 +03:00
Ali Parlakci
cebfc713d2 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-28 16:12:01 +03:00
Ali Parlakci
f522154214 Update version 2018-08-28 16:11:48 +03:00
Ali Parlakçı
27cd3ee991 Changed getting gfycat links' algorithm 2018-08-28 16:10:15 +03:00
Ali Parlakci
29873331e6 Typo fix 2018-08-23 16:41:07 +03:00
Ali Parlakci
8a3dcd68a3 Update version 2018-08-23 12:16:31 +03:00
Ali Parlakci
ac323f2abe Bug fix 2018-08-23 12:09:56 +03:00
Ali Parlakci
32d26fa956 Print out github link at start 2018-08-20 15:13:42 +03:00
Ali Parlakci
137481cf3e Print out program info 2018-08-18 14:51:20 +03:00
Ali Parlakci
9b63c55d3e Print out version info before starting 2018-08-17 21:25:01 +03:00
Ali Parlakci
3a6954c7d3 Update version 2018-08-16 19:55:45 +03:00
Ali Parlakci
9a59da0c5f Update changelog 2018-08-16 19:53:33 +03:00
Ali Parlakci
d56efed1c6 Fix imgur download malfunction caused by headers 2018-08-16 19:51:56 +03:00
Ali Parlakci
8f64e62293 Update version 2018-08-15 21:52:54 +03:00
Ali Parlakci
bdc43eb0d8 Update changelog 2018-08-15 21:48:05 +03:00
Ali Parlakci
adccd8f3ba Prints the file that already exists 2018-08-15 21:46:27 +03:00
Ali Parlakci
47a07be1c8 Deleted commented line 2018-08-13 16:00:21 +03:00
Ali Parlakci
1a41dc6061 Update changelog 2018-08-13 15:56:37 +03:00
Ali Parlakci
50cb7c15b9 Fixed console prints for Linux 2018-08-13 15:55:37 +03:00
Ali Parlakci
a1f1915d57 Update changelog 2018-08-13 14:52:43 +03:00
Ali Parlakci
3448ba15a9 Added config file location as current directory 2018-08-13 14:50:45 +03:00
Ali Parlakci
ff68b5f70f Improved error handling 2018-08-10 19:50:52 +03:00
Ali Parlakci
588a3c3ea6 Update changelog 2018-08-10 13:09:28 +03:00
Ali Parlakci
8f1ff10a5e Added reddit username to config file 2018-08-10 13:08:24 +03:00
Ali Parlakci
9338961b2b Improved checkConflicts() 2018-08-09 09:26:01 +03:00
Ali Parlakci
94bc1c115f Minor edit in exception handling 2018-08-09 09:04:12 +03:00
Ali Parlakci
c19d8ad71b Refactored error handling 2018-08-09 00:17:04 +03:00
Ali Parlakci
4c8de50880 Fixed request headers 2018-08-08 00:47:34 +03:00
Ali Parlakci
3e6dfccdd2 Update request headers 2018-08-06 09:33:07 +03:00
Ali Parlakci
20b9747330 Added docstrings for the ease of modification 2018-08-06 08:13:07 +03:00
Ali Parlakci
be7508540d Refactored README page 2018-08-06 07:54:33 +03:00
Ali Parlakci
ccd9078b0a Refactored README page 2018-08-06 07:53:55 +03:00
Ali Parlakçı
43cf0a4d42 Typo fix 2018-08-06 07:42:52 +03:00
Ali Parlakci
3693cf46f8 Updated version 2018-08-06 07:40:23 +03:00
Ali Parlakci
04152e8554 Updated changelog 2018-08-06 07:37:35 +03:00
Ali Parlakci
210238d086 Sending header when requesting a file 2018-08-06 07:35:43 +03:00
Ali Parlakci
90e071354f Update changelog 2018-08-04 10:25:29 +03:00
Ali Parlakci
426089d0f3 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-04 10:23:22 +03:00
Ali Parlakci
7ae6c6385d Do not print post type to console 2018-08-04 10:22:41 +03:00
Ali Parlakçı
97d15f9974 Update README.md 2018-08-01 13:08:09 +03:00
Ali Parlakci
172cd72dc1 Update changelog 2018-07-30 13:37:29 +03:00
Ali Parlakci
af29492951 Hide KeyboardInterrupt 2018-07-30 13:36:48 +03:00
Ali Parlakci
5633b301f3 Bug fix 2018-07-30 13:36:27 +03:00
Ali Parlakci
5ed855af28 Update changelog 2018-07-30 13:23:34 +03:00
Ali Parlakci
7ccf2fb7f9 Open web browser as prompting for imgur credentials 2018-07-30 13:22:30 +03:00
Ali Parlakçı
2297e9ed86 Update README.md 2018-07-26 20:04:07 +03:00
Ali Parlakçı
401e014059 Update README.md 2018-07-26 20:03:25 +03:00
Ali Parlakçı
eb31d38c44 Update README.md 2018-07-26 19:39:51 +03:00
Ali Parlakçı
747fefea14 Update README.md 2018-07-26 19:36:50 +03:00
Ali Parlakçı
80cc4fade3 Update README.md 2018-07-26 19:33:47 +03:00
Ali Parlakçı
c26843c7fc Set theme jekyll-theme-cayman 2018-07-26 19:21:55 +03:00
Ali Parlakçı
a14edc9f5a Update README.md 2018-07-26 15:08:23 +03:00
11 changed files with 261 additions and 190 deletions

3
.gitignore vendored
View File

@@ -2,4 +2,5 @@ build/
dist/
MANIFEST
__pycache__/
src/__pycache__/
src/__pycache__/
config.json

114
README.md
View File

@@ -1,7 +1,7 @@
# Bulk Downloader for Reddit
This program downloads imgur, gfycat and direct image and video links of saved posts from a reddit account. It is written in Python 3.
**PLEASE** post any issue you have with the script to [Issues](https://github.com/aliparlakci/bulk-downloader-for-reddit/issues) tab. Since I don't have any testers or contributers I need your feedback.
Downloads media from reddit posts.
## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
## What it can do
- Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
@@ -13,109 +13,17 @@ This program downloads imgur, gfycat and direct image and video links of saved p
- Saves a reusable copy of posts' details that are found so that they can be re-downloaded again
- Logs failed ones in a file to so that you can try to download them later
## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
## **[Compiling it from source code](docs/COMPILE_FROM_SOURCE.md)**
*\* MacOS users have to use this option.*
## How it works
- For **Windows** and **Linux** users, there are executable files to run easily without installing a third party program. But if you are a paranoid like me, you can **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
- In Windows, double click on bulk-downloader-for-reddit file
- In Linux, extract files to a folder and open terminal inside it. Type **`./bulk-downloader-for-reddit`**
- **MacOS** users have to **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
### Additional options
Script also accepts additional options via command-line arguments, get further information from **[`--help`](docs/COMMAND_LINE_ARGUMENTS.md)**
## Additional options
Script also accepts additional options via command-line arguments. Get further information from **[`--help`](docs/COMMAND_LINE_ARGUMENTS.md)**
## Setting up the script
Because this is not a commercial app, you need to create an imgur developer app in order API to work.
### Creating an imgur app
* Go to https://api.imgur.com/oauth2/addclient
* Enter a name into the **Application Name** field.
* Pick **Anonymous usage without user authorization** as an **Authorization type**\*
* Enter your email into the Email field.
* Correct CHAPTCHA
* Click **submit** button
You need to create an imgur developer app in order API to work. Go to https://api.imgur.com/oauth2/addclient and fill the form (It does not really matter how you fill it).
It should redirect you to a page which shows your **imgur_client_id** and **imgur_client_secret**
It should redirect you to a page where it shows your **imgur_client_id** and **imgur_client_secret**.
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*. If this does not work, it is safe to proceed with **OAuth 2 authorization without a callback URL**.
## [FAQ](docs/FAQ.md)
## FAQ
### What do the dots resemble when getting posts?
- Each dot means that 100 posts are scanned.
### Getting posts is taking too long.
- You can press Ctrl+C to interrupt it and start downloading.
### How downloaded files' names are formatted?
- Self posts and images that are not belong to an album are formatted as **`[SUBMITTER NAME]_[POST TITLE]_[REDDIT ID]`**.
You can use *reddit id* to go to post's reddit page by going to link **reddit.com/[REDDIT ID]**
- An image in an imgur album is formatted as **`[ITEM NUMBER]_[IMAGE TITLE]_[IMGUR ID]`**
Similarly, you can use *imgur id* to go to image's imgur page by going to link **imgur.com/[IMGUR ID]**.
### How do I open self post files?
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
### How can I change my credentials?
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
them, there.
## Changes on *master*
### [26/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
- Improved verbose mode
- Minimalized the console output
- Added quit option for auto quitting the program after process finished
### [25/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
- Added verbose mode
- Stylized the console output
### [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
- Added OP's name to file names (backwards compatible)
- Deleted # char from file names (backwards compatible)
- Improved exception handling
### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
- Splited download() function
- Added erome support
- Removed exclude feature
- Bug fixes
### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/6e7463005051026ad64006a8580b0b5dc9536b8c)
- Put log files in a folder named "LOG_FILES"
- Fixed the bug that makes multireddit mode unusable
### [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
- Added exclude mode
### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7548a010198fb693841ca03654d2c9bdf5742139)
- "0" input for no limit
- Fixed the bug that recognizes none image direct links as image links
### [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
- Added v.redd.it support
- Added custom exception descriptions to FAILED.json file
- Fixed the bug that prevents downloading some gfycat URLs
### [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
- Changed config.json file's path to home directory
### [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
- Added binaries for Windows and Linux
- Wait on KeyboardInterrupt
- Accept multiple subreddit input
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
### [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
- Improvements on UX and UI
- Added logging errors to CONSOLE_LOG.txt
- Using current directory if directory has not been given yet.
### [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
- Added support for *self* post
- Now getting posts is quicker
## [Changes on *master*](docs/CHANGELOG.md)

1
_config.yml Normal file
View File

@@ -0,0 +1 @@
theme: jekyll-theme-cayman

83
docs/CHANGELOG.md Normal file
View File

@@ -0,0 +1,83 @@
# Changes on *master*
## [27/01/2019](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/b7baf07fb5998368d87e3c4c36aed40daf820609)
- Clarified the instructions
## [28/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
- Adjusted algorith used for extracting gfycat links because of gfycat's design change
- Ignore space at the end of the given directory
## [16/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
- Fix the bug that prevents downloading imgur videos
## [15/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/adccd8f3ba03ad124d58643d78dab287a4123a6f)
- Prints out the title of posts' that are already downloaded
## [13/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50cb7c15b9cb4befce0cfa2c23ab5de4af9176c6)
- Added alternative location of current directory for config file
- Fixed console prints on Linux
## [10/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/8f1ff10a5e11464575284210dbba4a0d387bc1c3)
- Added reddit username to config file
## [06/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/210238d0865febcb57fbd9f0b0a7d3da9dbff384)
- Sending headers when requesting a file in order not to be rejected by server
## [04/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/426089d0f35212148caff0082708a87017757bde)
- Disabled printing post types to console
## [30/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/af294929510f884d92b25eaa855c29fc4fb6dcaa)
- Now opens web browser and goes to Imgur when prompts for Imgur credentials
## [26/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
- Improved verbose mode
- Minimalized the console output
- Added quit option for auto quitting the program after process finishes
## [25/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/1623722138bad80ae39ffcd5fb38baf80680deac)
- Added verbose mode
- Stylized the console output
## [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
- Added OP's name to file names (backwards compatible)
- Deleted # char from file names (backwards compatible)
- Improved exception handling
## [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
- Splited download() function
- Added erome support
- Removed exclude feature
- Bug fixes
## [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/6e7463005051026ad64006a8580b0b5dc9536b8c)
- Put log files in a folder named "LOG_FILES"
- Fixed the bug that makes multireddit mode unusable
## [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
- Added exclude mode
## [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7548a010198fb693841ca03654d2c9bdf5742139)
- "0" input for no limit
- Fixed the bug that recognizes none image direct links as image links
## [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
- Added v.redd.it support
- Added custom exception descriptions to FAILED.json file
- Fixed the bug that prevents downloading some gfycat URLs
## [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
- Changed config.json file's path to home directory
## [12/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/50a77f6ba54c24f5647d5ea4e177400b71ff04a7)
- Added binaries for Windows and Linux
- Wait on KeyboardInterrupt
- Accept multiple subreddit input
- Fixed the bug that prevents choosing "[0] exit" with typing "exit"
## [11/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a28a7776ab826dea2a8d93873a94cd46db3a339b)
- Improvements on UX and UI
- Added logging errors to CONSOLE_LOG.txt
- Using current directory if directory has not been given yet.
## [10/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/ffe3839aee6dc1a552d95154d817aefc2b66af81)
- Added support for *self* post
- Now getting posts is quicker

23
docs/FAQ.md Normal file
View File

@@ -0,0 +1,23 @@
# FAQ
## What do the dots resemble when getting posts?
- Each dot means that 100 posts are scanned.
## Getting posts is taking too long.
- You can press Ctrl+C to interrupt it and start downloading.
## How are filenames formatted?
- Self posts and images that are not belong to an album are formatted as **`[SUBMITTER NAME]_[POST TITLE]_[REDDIT ID]`**.
You can use *reddit id* to go to post's reddit page by going to link **reddit.com/[REDDIT ID]**
- An image in an imgur album is formatted as **`[ITEM NUMBER]_[IMAGE TITLE]_[IMGUR ID]`**
Similarly, you can use *imgur id* to go to image's imgur page by going to link **imgur.com/[IMGUR ID]**.
## How do I open self post files?
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
## How can I change my credentials?
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
them, there.

View File

@@ -1,3 +1,4 @@
bs4
requests
praw
imgurpython

126
script.py
View File

@@ -10,10 +10,11 @@ import logging
import os
import sys
import time
import webbrowser
from io import StringIO
from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur, Self, Erome
from src.downloader import Direct, Erome, Gfycat, Imgur, Self
from src.errors import *
from src.parser import LinkDesigner
from src.searcher import getPosts
@@ -22,7 +23,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
__author__ = "Ali Parlakci"
__license__ = "GPL"
__version__ = "1.6.0"
__version__ = "1.6.4.1"
__maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com"
@@ -38,20 +39,34 @@ def getConfig(configFileName):
if "reddit_refresh_token" in content:
if content["reddit_refresh_token"] == "":
FILE.delete("reddit_refresh_token")
if not all(False if content.get(key,"") == "" else True for key in keys):
print(
"Go to this URL and fill the form: " \
"https://api.imgur.com/oauth2/addclient\n" \
"Enter the client id and client secret here:"
)
webbrowser.open("https://api.imgur.com/oauth2/addclient",new=2)
for key in keys:
try:
if content[key] == "":
raise KeyError
except KeyError:
print(key,": ")
FILE.add({key:input()})
FILE.add({key:input(" "+key+": ")})
return jsonFile(configFileName).read()
else:
FILE = jsonFile(configFileName)
configDictionary = {}
print(
"Go to this URL and fill the form: " \
"https://api.imgur.com/oauth2/addclient\n" \
"Enter the client id and client secret here:"
)
webbrowser.open("https://api.imgur.com/oauth2/addclient",new=2)
for key in keys:
configDictionary[key] = input(key + ": ")
configDictionary[key] = input(" "+key+": ")
FILE.add(configDictionary)
return FILE.read()
@@ -169,9 +184,10 @@ def checkConflicts():
else:
user = 1
search = 1 if GLOBAL.arguments.search else 0
modes = [
"saved","subreddit","submitted","search","log","link","upvoted",
"multireddit"
"saved","subreddit","submitted","log","link","upvoted","multireddit"
]
values = {
@@ -184,15 +200,18 @@ def checkConflicts():
if not sum(values[x] for x in values) == 1:
raise ProgramModeError("Invalid program mode")
if values["search"]+values["saved"] == 2:
if search+values["saved"] == 2:
raise SearchModeError("You cannot search in your saved posts")
if values["search"]+values["submitted"] == 2:
if search+values["submitted"] == 2:
raise SearchModeError("You cannot search in submitted posts")
if values["search"]+values["upvoted"] == 2:
if search+values["upvoted"] == 2:
raise SearchModeError("You cannot search in upvoted posts")
if search+values["log"] == 2:
raise SearchModeError("You cannot search in log files")
if values["upvoted"]+values["submitted"] == 1 and user == 0:
raise RedditorNameError("No redditor name given")
@@ -246,12 +265,15 @@ class PromptUser:
if programMode == "subreddit":
subredditInput = input("subreddit (enter frontpage for frontpage): ")
subredditInput = input("(type frontpage for all subscribed subreddits,\n" \
" use plus to seperate multi subreddits:" \
" pics+funny+me_irl etc.)\n\n" \
"subreddit: ")
GLOBAL.arguments.subreddit = subredditInput
while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
subredditInput = input("subreddit: ")
GLOBAL.arguments.subreddit += "+" + subredditInput
# while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
# subredditInput = input("subreddit: ")
# GLOBAL.arguments.subreddit += "+" + subredditInput
if " " in GLOBAL.arguments.subreddit:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
@@ -278,7 +300,7 @@ class PromptUser:
GLOBAL.arguments.time = "all"
elif programMode == "multireddit":
GLOBAL.arguments.user = input("\nredditor: ")
GLOBAL.arguments.user = input("\nmultireddit owner: ")
GLOBAL.arguments.multireddit = input("\nmultireddit: ")
print("\nselect sort type:")
@@ -370,10 +392,7 @@ def prepareAttributes():
GLOBAL.arguments.link = GLOBAL.arguments.link.strip("\"")
try:
ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
except InvalidRedditLink:
raise InvalidRedditLink
ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
if GLOBAL.arguments.search is not None:
ATTRIBUTES["search"] = GLOBAL.arguments.search
@@ -403,7 +422,7 @@ def prepareAttributes():
ATTRIBUTES["submitted"] = True
if GLOBAL.arguments.sort == "rising":
raise InvalidSortingType
raise InvalidSortingType("Invalid sorting type has given")
ATTRIBUTES["limit"] = GLOBAL.arguments.limit
@@ -440,6 +459,9 @@ def isPostExists(POST):
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
"""If you change the filenames, don't forget to add them here.
Please don't remove existing ones
"""
for extension in possibleExtensions:
OLD_FILE_PATH = PATH / (
@@ -466,6 +488,8 @@ def isPostExists(POST):
return False
def downloadPost(SUBMISSION):
"""Download directory is declared here for each file"""
directory = GLOBAL.directory / SUBMISSION['postSubreddit']
global lastRequestTime
@@ -543,13 +567,15 @@ def download(submissions):
FAILED_FILE = createLogFile("FAILED")
for i in range(subsLenght):
print(
f"\n({i+1}/{subsLenght}) {submissions[i]['postType'].upper()} " \
f" r/{submissions[i]['postSubreddit']}",end=""
)
print(f"\n({i+1}/{subsLenght}) r/{submissions[i]['postSubreddit']}",
end="")
print(f" {submissions[i]['postType'].upper()}",end="",noPrint=True)
if isPostExists(submissions[i]):
print("\nIt already exists")
print(f"\n" \
f"{submissions[i]['postSubmitter']}_"
f"{nameCorrector(submissions[i]['postTitle'])}")
print("It already exists")
duplicates += 1
downloadedCount -= 1
continue
@@ -612,23 +638,33 @@ def download(submissions):
downloadedCount -= 1
if duplicates:
print("\n There was {} duplicates".format(duplicates))
print(f"\nThere {'were' if duplicates > 1 else 'was'} " \
f"{duplicates} duplicate{'s' if duplicates > 1 else ''}")
if downloadedCount == 0:
print(" Nothing downloaded :(")
print("Nothing downloaded :(")
else:
print(" Total of {} links downloaded!".format(downloadedCount))
print(f"Total of {downloadedCount} " \
f"link{'s' if downloadedCount > 1 else ''} downloaded!")
def main():
VanillaPrint(
f"\nBulk Downloader for Reddit v{__version__}\n" \
f"Written by Ali PARLAKCI parlakciali@gmail.com\n\n" \
f"https://github.com/aliparlakci/bulk-downloader-for-reddit/"
)
GLOBAL.arguments = parseArguments()
if GLOBAL.arguments.directory is not None:
GLOBAL.directory = Path(GLOBAL.arguments.directory)
GLOBAL.directory = Path(GLOBAL.arguments.directory.strip())
else:
GLOBAL.directory = Path(input("download directory: "))
GLOBAL.directory = Path(input("\ndownload directory: ").strip())
print("\n"," ".join(sys.argv),"\n",noPrint=True)
print(f"Bulk Downloader for Reddit v{__version__}\n",noPrint=True
)
try:
checkConflicts()
@@ -637,35 +673,21 @@ def main():
if not Path(GLOBAL.configDirectory).is_dir():
os.makedirs(GLOBAL.configDirectory)
GLOBAL.config = getConfig(GLOBAL.configDirectory / "config.json")
GLOBAL.config = getConfig("config.json") if Path("config.json").exists() \
else getConfig(GLOBAL.configDirectory / "config.json")
if GLOBAL.arguments.log is not None:
logDir = Path(GLOBAL.arguments.log)
download(postFromLog(logDir))
sys.exit()
try:
POSTS = getPosts(prepareAttributes())
except InsufficientPermission:
print("You do not have permission to do that")
sys.exit()
except NoMatchingSubmissionFound:
print("No matching submission was found")
sys.exit()
except NoRedditSupoort:
print("Reddit does not support that")
sys.exit()
except NoPrawSupport:
print("PRAW does not support that")
sys.exit()
except MultiredditNotFound:
print("Multireddit not found")
sys.exit()
except InvalidSortingType:
print("Invalid sorting type has given")
sys.exit()
except InvalidRedditLink:
print("Invalid reddit link")
except Exception as exc:
logging.error(sys.exc_info()[0].__name__,
exc_info=full_exc_info(sys.exc_info()))
print(log_stream.getvalue(),noPrint=True)
print(exc)
sys.exit()
if POSTS is None:

View File

@@ -1,13 +1,15 @@
import io
import json
import os
import sys
import urllib.request
from html.parser import HTMLParser
from multiprocessing import Queue
from pathlib import Path
from urllib.error import HTTPError
import imgurpython
from multiprocessing import Queue
from bs4 import BeautifulSoup
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
FileNameTooLong, ImgurLoginError,
@@ -23,8 +25,7 @@ def dlProgress(count, blockSize, totalSize):
downloadedMbs = int(count*blockSize*(10**(-6)))
fileSize = int(totalSize*(10**(-6)))
sys.stdout.write("\r{}Mb/{}Mb".format(downloadedMbs,fileSize))
sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
sys.stdout.flush()
def getExtension(link):
@@ -54,6 +55,23 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
As too long file names seem not working.
"""
headers = [
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
"Safari/537.36 OPR/54.0.2952.64"),
("Accept", "text/html,application/xhtml+xml,application/xml;" \
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
("Accept-Encoding", "none"),
("Accept-Language", "en-US,en;q=0.8"),
("Connection", "keep-alive")
]
opener = urllib.request.build_opener()
if not "imgur" in imageURL:
opener.addheaders = headers
urllib.request.install_opener(opener)
if not (os.path.isfile(fileDir)):
for i in range(3):
try:
@@ -87,6 +105,8 @@ class Erome:
extension = getExtension(IMAGES[0])
"""Filenames are declared here"""
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
@@ -222,8 +242,11 @@ class Imgur:
post['mediaURL'] = content['object'].link
post['postExt'] = getExtension(post['mediaURL'])
title = nameCorrector(post['postTitle'])
"""Filenames are declared here"""
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])
fileDir = directory / (
@@ -282,6 +305,8 @@ class Imgur:
+ "_"
+ images[i]['id'])
"""Filenames are declared here"""
fileDir = folderDir / (fileName + images[i]['Ext'])
tempDir = folderDir / (fileName + ".tmp")
@@ -378,12 +403,17 @@ class Gfycat:
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
except Exception as exception:
#debug
raise exception
raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
"""Filenames are declared here"""
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (
@@ -414,30 +444,25 @@ class Gfycat:
url = "https://gfycat.com/" + url.split('/')[-1]
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
pageSource = (urllib.request.urlopen(url).read().decode())
theLine = pageSource[lineNumber]
lenght = len(query)
link = []
soup = BeautifulSoup(pageSource, "html.parser")
attributes = {"data-react-helmet":"true","type":"application/ld+json"}
content = soup.find("script",attrs=attributes)
for i in range(len(theLine)):
if theLine[i:i+lenght] == query:
cursor = (i+lenght)+1
while not theLine[cursor] == '"':
link.append(theLine[cursor])
cursor += 1
break
if "".join(link) == "":
if content is None:
raise NotADownloadableLinkError("Could not read the page source")
return "".join(link)
return json.loads(content.text)["video"]["contentUrl"]
class Direct:
def __init__(self,directory,POST):
POST['postExt'] = getExtension(POST['postURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
"""Filenames are declared here"""
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (
@@ -460,6 +485,9 @@ class Self:
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(post['postTitle'])
"""Filenames are declared here"""
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")
fileDir = directory / (
@@ -479,7 +507,8 @@ class Self:
@staticmethod
def writeToFile(directory,post):
"""Self posts are formatted here"""
content = ("## ["
+ post["postTitle"]
+ "]("

View File

@@ -67,7 +67,7 @@ class NoMatchingSubmissionFound(Exception):
class NoPrawSupport(Exception):
pass
class NoRedditSupoort(Exception):
class NoRedditSupport(Exception):
pass
class MultiredditNotFound(Exception):

View File

@@ -29,7 +29,7 @@ def LinkParser(LINK):
ShortLink = False
if not "reddit.com" in LINK:
raise InvalidRedditLink
raise InvalidRedditLink("Invalid reddit link")
SplittedLink = LINK.split("/")

View File

@@ -9,7 +9,7 @@ from prawcore.exceptions import NotFound, ResponseException, Forbidden
from src.tools import GLOBAL, createLogFile, jsonFile, printToFile
from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
NoRedditSupoort, MultiredditNotFound,
NoRedditSupport, MultiredditNotFound,
InvalidSortingType, RedditLoginFailed,
InsufficientPermission)
@@ -48,6 +48,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
self.client = self.recieve_connection()
data = self.client.recv(1024).decode('utf-8')
str(data)
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
@@ -93,6 +94,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory / "config.json").add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token
})
else:
@@ -102,6 +104,7 @@ def beginPraw(config,user_agent = str(socket.gethostname())):
reddit = authorizedInstance[0]
refresh_token = authorizedInstance[1]
jsonFile(GLOBAL.configDirectory / "config.json").add({
"reddit_username":str(reddit.user.me()),
"reddit_refresh_token":refresh_token
})
return reddit
@@ -115,7 +118,7 @@ def getPosts(args):
reddit = beginPraw(config)
if args["sort"] == "best":
raise NoPrawSupport
raise NoPrawSupport("PRAW does not support that")
if "subreddit" in args:
if "search" in args:
@@ -144,8 +147,8 @@ def getPosts(args):
}
if "search" in args:
if args["sort"] in ["hot","rising","controversial"]:
raise InvalidSortingType
if GLOBAL.arguments.sort in ["hot","rising","controversial"]:
raise InvalidSortingType("Invalid sorting type has given")
if "subreddit" in args:
print (
@@ -169,16 +172,16 @@ def getPosts(args):
)
elif "multireddit" in args:
raise NoPrawSupport
raise NoPrawSupport("PRAW does not support that")
elif "user" in args:
raise NoPrawSupport
raise NoPrawSupport("PRAW does not support that")
elif "saved" in args:
raise NoRedditSupoort
raise ("Reddit does not support that")
if args["sort"] == "relevance":
raise InvalidSortingType
raise InvalidSortingType("Invalid sorting type has given")
if "saved" in args:
print(
@@ -243,7 +246,7 @@ def getPosts(args):
) (**keyword_params)
)
except NotFound:
raise MultiredditNotFound
raise MultiredditNotFound("Multireddit not found")
elif "submitted" in args:
print (
@@ -273,7 +276,7 @@ def getPosts(args):
reddit.redditor(args["user"]).upvoted(limit=args["limit"])
)
except Forbidden:
raise InsufficientPermission
raise InsufficientPermission("You do not have permission to do that")
elif "post" in args:
print("post: {post}\n".format(post=args["post"]).upper(),noPrint=True)
@@ -367,7 +370,7 @@ def redditSearcher(posts,SINGLE_POST=False):
allPosts[subCount] = [details]
except KeyboardInterrupt:
print("\nKeyboardInterrupt",end="")
print("\nKeyboardInterrupt",noPrint=True)
postsFile.add(allPosts)
@@ -385,7 +388,7 @@ def redditSearcher(posts,SINGLE_POST=False):
print()
return subList
else:
raise NoMatchingSubmissionFound
raise NoMatchingSubmissionFound("No matching submission was found")
def checkIfMatching(submission):
global gfycatCount