64 Commits

Author SHA1 Message Date
Ali Parlakci
b5d6165802 Update version 2018-07-24 22:13:38 +03:00
Ali Parlakci
b98815376f Bug fix 2018-07-24 22:13:11 +03:00
Ali Parlakci
d9586f99b8 Use else in try blocks 2018-07-24 22:11:12 +03:00
Ali Parlakci
76711892a2 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-24 22:10:28 +03:00
Ali Parlakci
bfea548eab Print credits in the same line 2018-07-24 22:10:19 +03:00
Ali Parlakçı
2e852db4c3 Typo fix 2018-07-24 19:45:37 +03:00
Ali Parlakci
8ac02e7aff Update version 2018-07-24 19:42:38 +03:00
Ali Parlakci
5eccf4dd3d Update changelog 2018-07-24 19:35:49 +03:00
Ali Parlakçı
7a68ff3efa Merge pull request #41 from aliparlakci/changePostNames
Add submitter to file names
2018-07-24 19:34:02 +03:00
Ali Parlakçı
3ea2e16b62 Merge branch 'master' into changePostNames 2018-07-24 19:33:38 +03:00
Ali Parlakci
fc6787aa28 Update changelog 2018-07-24 19:28:48 +03:00
Ali Parlakci
21533bb78c Improved exception handling 2018-07-24 19:27:52 +03:00
Ali Parlakci
1781ab8ffe Update changelog 2018-07-24 19:10:34 +03:00
Ali Parlakci
821383c465 Deleted # char from file names 2018-07-24 19:09:45 +03:00
Ali Parlakci
9d0fdc7521 Add OP's name first 2018-07-24 18:55:33 +03:00
Ali Parlakci
0387dd5243 Tweaked 'What it can do' 2018-07-24 14:16:46 +03:00
Ali Parlakci
93732b0367 Little refactoring 2018-07-24 13:17:37 +03:00
Ali Parlakci
400ce01918 Added older version support 2018-07-24 13:17:14 +03:00
Ali Parlakci
ccedac4bdc Add submitter to file name 2018-07-24 12:44:53 +03:00
Ali Parlakci
a6997898ce Update version 2018-07-23 23:37:27 +03:00
Ali Parlakci
61632c7143 Improve error handling 2018-07-23 23:33:11 +03:00
Ali Parlakçı
9bff3399a8 Typo fix 2018-07-23 23:19:29 +03:00
Ali Parlakçı
b00d185f67 Update changelog 2018-07-23 23:18:10 +03:00
Ali Parlakçı
7314e17125 Added erome support 2018-07-23 23:16:56 +03:00
Ali Parlakci
2d334d56bf remove exclude mode 2018-07-23 22:57:54 +03:00
Ali Parlakçı
974517928f Update README.md 2018-07-23 22:07:28 +03:00
Ali Parlakçı
bcae177b1e Split download function 2018-07-23 22:06:33 +03:00
Ali Parlakci
229def6578 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-23 18:49:39 +03:00
Ali Parlakci
59b0376d6e Added directions for frontpage 2018-07-23 18:49:28 +03:00
Ali Parlakçı
cf1dc7d08c Rename changelog section 2018-07-22 18:16:11 +03:00
Ali Parlakci
27532408c1 Update version 2018-07-22 18:06:46 +03:00
Ali Parlakci
32647beee9 Update changelog 2018-07-22 18:06:24 +03:00
Ali Parlakci
a67da461d2 Fixed the bug that makes multireddit mode unusable 2018-07-22 18:05:20 +03:00
Ali Parlakci
8c6f593496 Update changelog 2018-07-22 17:39:30 +03:00
Ali Parlakci
b60ce8a71e Put log files in a folder 2018-07-22 17:38:35 +03:00
Ali Parlakci
49920cc457 Bug fix 2018-07-22 17:25:30 +03:00
Ali Parlakci
c70e7c2ebb Update version 2018-07-22 14:39:09 +03:00
Ali Parlakci
3931dfff54 Update links 2018-07-21 22:03:16 +03:00
Ali Parlakci
4a8c2377f9 Updated --help page 2018-07-21 21:55:01 +03:00
Ali Parlakci
8a18a42a9a Updated changelog 2018-07-21 21:54:23 +03:00
Ali Parlakçı
6c2d748fbc Exclude post types (#38)
* Added argument for excluded links

* Added exclude in PromptUser()

* Added functionality for exclude and bug fix
2018-07-21 21:52:28 +03:00
Ali Parlakci
8c966df105 Improved traceback 2018-07-21 21:50:54 +03:00
Ali Parlakci
2adf2c0451 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-20 13:34:51 +03:00
Ali Parlakci
3e3a2df4d1 Bug fix at direct links 2018-07-20 13:34:23 +03:00
Ali Parlakci
7548a01019 Bug fix at direct links 2018-07-20 13:33:50 +03:00
Ali Parlakci
2ab16608d5 Update links 2018-07-20 13:06:01 +03:00
Ali Parlakci
e15f33b97a Fix README 2018-07-20 13:04:47 +03:00
Ali Parlakci
27211f993c 0 input for no limit 2018-07-20 13:03:50 +03:00
Ali Parlakci
87d3b294f7 0 input for no limit 2018-07-20 13:01:39 +03:00
Ali Parlakci
8128378dcd 0 input for no limit 2018-07-20 13:01:21 +03:00
Ali Parlakçı
cc93aa3012 Update commit link 2018-07-19 15:47:28 +03:00
Ali Parlakci
50c4a8d6d7 Update version 2018-07-19 15:38:49 +03:00
Ali Parlakci
5737904a54 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-19 15:35:46 +03:00
Ali Parlakci
f6eba6c5b0 Added more gfycat links 2018-07-19 15:34:58 +03:00
Ali Parlakci
41cbb58db3 Added more gfycat links 2018-07-19 15:22:12 +03:00
Ali Parlakçı
c569124406 Update Changelog 2018-07-19 14:58:17 +03:00
Ali Parlakçı
1a3836a8e1 Added v.redd.it support (#36) 2018-07-19 14:57:16 +03:00
Ali Parlakci
fde6a1fac4 Added custom exception descriptions to FAILED.json file 2018-07-19 14:56:00 +03:00
Ali Parlakci
6bba2c4dbb Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-07-18 09:17:48 +03:00
Ali Parlakci
a078d44236 Edited FAQ 2018-07-18 09:17:36 +03:00
Ali Parlakçı
deae0be769 Delete _config.yml 2018-07-15 10:54:40 +03:00
Ali Parlakci
3cf0203e6b Typo fix 2018-07-13 14:39:01 +03:00
Ali Parlakci
0b31db0e2e Update FAQ 2018-07-13 14:37:35 +03:00
Ali Parlakci
d3f2b1b08e Update executables' names 2018-07-13 14:34:20 +03:00
9 changed files with 509 additions and 268 deletions

View File

@@ -6,25 +6,24 @@ This program downloads imgur, gfycat and direct image and video links of saved p
## What it can do
- Can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links
- Sorts posts by hot, top, new and so on
- Downloads imgur albums, gfycat links, [self posts](#i-cant-open-the-self-post-files) and any link to a direct image
- Downloads **REDDIT** images and videos, **IMGUR** images and albums, **GFYCAT** links, **EROME** images and albums, **SELF POSTS** and any link to a **DIRECT IMAGE**
- Skips the existing ones
- Puts post titles to file's name
- Puts post title and OP's name in file's name
- Puts every post to its subreddit's folder
- Saves a reusable copy of posts' details that are found so that they can be re-downloaded again
- Logs failed ones in a file to so that you can try to download them later
- Can run with double-clicking on Windows
## [Download the latest release](https://github.com/aliparlakci/bulk-downloader-for-reddit/releases/latest)
## How it works
- For **Windows** and **Linux** users, there are executable files to run easily without installing a third party program. But if you are a paranoid like me, you can **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
- In Windows, double click on script.exe file
- In Linux, extract files to a folder and open terminal inside it. Type **`./script`**
- In Windows, double click on bulk-downloader-for-reddit file
- In Linux, extract files to a folder and open terminal inside it. Type **`./bulk-downloader-for-reddit`**
- **MacOS** users have to **[compile it from source code](docs/COMPILE_FROM_SOURCE.md)**.
Script also accepts **command-line arguments**, get further information from **[`python script.py --help`](docs/COMMAND_LINE_ARGUMENTS.md)**
Script also accepts **command-line arguments**, get further information from **[`--help`](docs/COMMAND_LINE_ARGUMENTS.md)**
## Setting up the script
Because this is not a commercial app, you need to create an imgur developer app in order API to work.
@@ -42,11 +41,44 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
\* Select **OAuth 2 authorization without a callback URL** first then select **Anonymous usage without user authorization** if it says *Authorization callback URL: required*
## FAQ
### I can't open the self post files.
### How do I open self post files?
- Self posts are held at reddit as styled with markdown. So, the script downloads them as they are in order not to lose their stylings.
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
However, there is a [great Chrome extension](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with [Chrome](https://www.google.com/intl/tr/chrome/).
However, they are basically text files. You can also view them with any text editor such as Notepad on Windows, gedit on Linux or Text Editor on MacOS
### How can I change my credentials?
- All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit
them, there.
## Changes on *master*
### [24/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7a68ff3efac9939f9574c2cef6184b92edb135f4)
- Added OP's name to file names (backwards compatible)
- Deleted # char from file names (backwards compatible)
- Improved exception handling
### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
- Split download() function
- Added erome support
- Remove exclude feature
- Bug fix
### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
- Put log files in a folder named "LOG_FILES"
- Fixed the bug that makes multireddit mode unusable
### [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
- Added exclude mode
### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/commit/7548a010198fb693841ca03654d2c9bdf5742139)
- "0" input for no limit
- Fixed the bug that recognizes none image direct links as image links
### [19/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/41cbb58db34f500a8a5ecc3ac4375bf6c3b275bb)
- Added v.redd.it support
- Added custom exception descriptions to FAILED.json file
- Fixed the bug that prevents downloading some gfycat URLs
## Changelog
### [13/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/9f831e1b784a770c82252e909462871401a05c11)
- Change config.json file's path to home directory

View File

@@ -1,5 +0,0 @@
theme: jekyll-theme-minimal
show_downloads: false
#title: Bulk Downloader for Reddit
description: Code written by Ali PARLAKCI
google_analytics: UA-80780721-3

View File

@@ -2,7 +2,7 @@
See **[compiling from source](COMPILE_FROM_SOURCE.md)** page first unless you are using an executable file. If you are using an executable file, see [using terminal](COMPILE_FROM_SOURCE.md#using-terminal) and come back.
***Use*** `.\script.exe` ***or*** `./script` ***if you are using the executable***.
***Use*** `.\bulk-downloader-for-reddit.exe` ***or*** `./bulk-downloader-for-reddit` ***if you are using the executable***.
```console
$ python script.py --help
usage: script.py [-h] [--directory DIRECTORY] [--link link] [--saved]
@@ -23,7 +23,8 @@ optional arguments:
--saved Triggers saved mode
--submitted Gets posts of --user
--upvoted Gets upvoted posts of --user
--log LOG FILE Triggers log read mode and takes a log file
--log LOG FILE Takes a log file which created by itself (json files),
reads posts and tries downloading them again.
--subreddit SUBREDDIT [SUBREDDIT ...]
Triggers subreddit mode and takes subreddit's name
without r/. use "frontpage" for frontpage
@@ -50,7 +51,7 @@ python script.py
```
```console
.\script.exe
.\bulk-downloader-for-reddit.exe
```
```console
@@ -58,11 +59,11 @@ python script.py
```
```console
.\script.exe -- directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
.\bulk-downloader-for-reddit.exe -- directory .\\NEW_FOLDER --search cats --sort new --time all --subreddit gifs pics --NoDownload
```
```console
./script --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
./bulk-downloader-for-reddit --directory .\\NEW_FOLDER\\ANOTHER_FOLDER --saved --limit 1000
```
```console

View File

@@ -15,7 +15,7 @@ Latest* version of **Python 3** is needed. See if it is already installed [here]
- **On MacOS**: Look for an app called **Terminal**.
### Navigating to the directory where script is downloaded
Go inside the folder where script.py is located. If you are not familier with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything)
Go inside the folder where script.py is located. If you are not familiar with changing directories on command-prompt and terminal read *Changing Directories* in [this article](https://lifehacker.com/5633909/who-needs-a-mouse-learn-to-use-the-command-line-for-almost-anything)
## Finding the correct keyword for Python
Enter these lines to terminal window until it prints out the version you have downloaded and installed:

301
script.py
View File

@@ -13,7 +13,7 @@ import time
from io import StringIO
from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur, Self
from src.downloader import Direct, Gfycat, Imgur, Self, Erome
from src.errors import *
from src.parser import LinkDesigner
from src.searcher import getPosts
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
__author__ = "Ali Parlakci"
__license__ = "GPL"
__version__ = "1.1.2"
__version__ = "1.5.1"
__maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com"
@@ -143,6 +143,7 @@ def parseArguments(arguments=[]):
" for downloading later",
action="store_true",
default=False)
if arguments == []:
return parser.parse_args()
@@ -159,7 +160,10 @@ def checkConflicts():
else:
user = 1
modes = ["saved","subreddit","submitted","search","log","link","upvoted"]
modes = [
"saved","subreddit","submitted","search","log","link","upvoted",
"multireddit"
]
values = {
x: 0 if getattr(GLOBAL.arguments,x) is None or \
@@ -233,10 +237,10 @@ class PromptUser:
if programMode == "subreddit":
subredditInput = input("subreddit: ")
subredditInput = input("subreddit (enter frontpage for frontpage): ")
GLOBAL.arguments.subreddit = subredditInput
while not subredditInput == "":
while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
subredditInput = input("subreddit: ")
GLOBAL.arguments.subreddit += "+" + subredditInput
@@ -244,9 +248,9 @@ class PromptUser:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
# DELETE THE PLUS (+) AT THE END
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
if not subredditInput.lower() == "frontpage":
GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
print(GLOBAL.arguments.subreddit)
print("\nselect sort type:")
sortTypes = [
"hot","top","new","rising","controversial"
@@ -266,7 +270,7 @@ class PromptUser:
elif programMode == "multireddit":
GLOBAL.arguments.user = input("\nredditor: ")
GLOBAL.arguments.subreddit = input("\nmultireddit: ")
GLOBAL.arguments.multireddit = input("\nmultireddit: ")
print("\nselect sort type:")
sortTypes = [
@@ -318,10 +322,11 @@ class PromptUser:
GLOBAL.arguments.log = input("\nlog file directory:")
if Path(GLOBAL.arguments.log ).is_file():
break
while True:
try:
GLOBAL.arguments.limit = int(input("\nlimit: "))
GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
if GLOBAL.arguments.limit == 0:
GLOBAL.arguments.limit = None
break
except ValueError:
pass
@@ -376,6 +381,9 @@ def prepareAttributes():
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
elif GLOBAL.arguments.multireddit is not None:
ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
elif GLOBAL.arguments.saved is True:
ATTRIBUTES["saved"] = True
@@ -415,34 +423,110 @@ def postFromLog(fileName):
return posts
def postExists(POST):
def isPostExists(POST):
"""Figure out a file's name and checks if the file already exists"""
title = nameCorrector(POST['postTitle'])
FILENAME = title + "_" + POST['postId']
PATH = GLOBAL.directory / POST["postSubreddit"]
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm",".md"]
for i in range(2):
for extension in possibleExtensions:
FILE_PATH = PATH / (FILENAME+extension)
if FILE_PATH.exists():
return True
else:
FILENAME = POST['postId']
for extension in possibleExtensions:
OLD_FILE_PATH = PATH / (
title
+ "_" + POST['postId']
+ extension
)
FILE_PATH = PATH / (
POST["postSubmitter"]
+ "_" + title
+ "_" + POST['postId']
+ extension
)
SHORT_FILE_PATH = PATH / (POST['postId']+extension)
if OLD_FILE_PATH.exists() or \
FILE_PATH.exists() or \
SHORT_FILE_PATH.exists():
return True
else:
return False
def downloadPost(SUBMISSION):
directory = GLOBAL.directory / SUBMISSION['postSubreddit']
global lastRequestTime
downloaders = {
"imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
}
if SUBMISSION['postType'] in downloaders:
print(SUBMISSION['postType'].upper(),end=" ")
if SUBMISSION['postType'] == "imgur":
if int(time.time() - lastRequestTime) <= 2:
pass
credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds")
print(
"==> Client: {} - User: {} - Reset {}".format(
credit['ClientRemaining'],
credit['UserRemaining'],
USER_RESET
),end=""
)
if not (credit['UserRemaining'] == 0 or \
credit['ClientRemaining'] == 0):
"""This block of code is needed
"""
if int(time.time() - lastRequestTime) <= 2:
pass
lastRequestTime = time.time()
else:
if credit['UserRemaining'] == 0:
KEYWORD = "user"
elif credit['ClientRemaining'] == 0:
KEYWORD = "client"
raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
print()
downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
else:
raise NoSuitablePost
return None
def download(submissions):
"""Analyze list of submissions and call the right function
to download each one, catch errors, update the log files
"""
subsLenght = len(submissions)
global lastRequestTime
lastRequestTime = 0
downloadedCount = subsLenght
duplicates = 0
BACKUP = {}
FAILED_FILE = createLogFile("FAILED")
@@ -455,132 +539,69 @@ def download(submissions):
)
)
if postExists(submissions[i]):
result = False
if isPostExists(submissions[i]):
print(submissions[i]['postType'].upper())
print("It already exists")
duplicates += 1
downloadedCount -= 1
continue
directory = GLOBAL.directory / submissions[i]['postSubreddit']
try:
downloadPost(submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
if submissions[i]['postType'] == 'imgur':
print("IMGUR",end="")
while int(time.time() - lastRequestTime) <= 2:
pass
credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds")
except ImgurLoginError:
print(
" => Client: {} - User: {} - Reset {}".format(
credit['ClientRemaining'],
credit['UserRemaining'],
USER_RESET
"Imgur login failed. \nQuitting the program "\
"as unexpected errors might occur."
)
sys.exit()
except ImgurLimitError as exception:
FAILED_FILE.add({int(i+1):[
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
),
submissions[i]
]})
downloadedCount -= 1
except NotADownloadableLinkError as exception:
print(
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
)
)
FAILED_FILE.add({int(i+1):[
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
),
submissions[i]
]})
downloadedCount -= 1
if not (credit['UserRemaining'] == 0 or \
credit['ClientRemaining'] == 0):
"""This block of code is needed
"""
while int(time.time() - lastRequestTime) <= 2:
pass
lastRequestTime = time.time()
try:
Imgur(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except ImgurLoginError:
print(
"Imgur login failed. Quitting the program "\
"as unexpected errors might occur."
)
sys.exit()
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
if credit['UserRemaining'] == 0:
KEYWORD = "user"
elif credit['ClientRemaining'] == 0:
KEYWORD = "client"
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
FAILED_FILE.add(
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
submissions[i]]}
)
downloadedCount -= 1
elif submissions[i]['postType'] == 'gfycat':
print("GFYCAT")
try:
Gfycat(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except NotADownloadableLinkError as exception:
print("Could not read the page source")
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'direct':
print("DIRECT")
try:
Direct(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'self':
print("SELF")
try:
Self(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
except NoSuitablePost:
print("No match found, skipping...")
downloadedCount -= 1
except Exception as exception:
# raise exception
print(
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
)
)
FAILED_FILE.add({int(i+1):[
"{class_name}: {info}".format(
class_name=exception.__class__.__name__,info=str(exception)
),
submissions[i]
]})
downloadedCount -= 1
if duplicates:
print("\n There was {} duplicates".format(duplicates))
@@ -605,9 +626,6 @@ def main():
checkConflicts()
except ProgramModeError as err:
PromptUser()
except Exception as err:
print(err)
sys.exit()
if not Path(GLOBAL.configDirectory).is_dir():
os.makedirs(GLOBAL.configDirectory)
@@ -617,7 +635,7 @@ def main():
logDir = Path(GLOBAL.arguments.log)
download(postFromLog(logDir))
sys.exit()
try:
POSTS = getPosts(prepareAttributes())
except InsufficientPermission:
@@ -662,12 +680,17 @@ if __name__ == "__main__":
print = printToFile
GLOBAL.RUN_TIME = time.time()
main()
except KeyboardInterrupt:
if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\")
print("\nQUITTING...")
except Exception as exception:
logging.error("Runtime error!", exc_info=full_exc_info(sys.exc_info()))
if GLOBAL.directory is None:
GLOBAL.directory = Path(".\\")
logging.error(sys.exc_info()[0].__name__,
exc_info=full_exc_info(sys.exc_info()))
print(log_stream.getvalue())
input("Press enter to quit\n")
input("\nPress enter to quit\n")

View File

@@ -2,7 +2,9 @@ import io
import os
import sys
import urllib.request
from html.parser import HTMLParser
from pathlib import Path
from urllib.error import HTTPError
import imgurpython
from multiprocessing import Queue
@@ -36,7 +38,10 @@ def getExtension(link):
if TYPE in parsed:
return "."+parsed[-1]
else:
return '.jpg'
if not "v.redd.it" in link:
return '.jpg'
else:
return '.mp4'
def getFile(fileDir,tempDir,imageURL,indent=0):
"""Downloads given file to given directory.
@@ -56,16 +61,150 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
tempDir,
reporthook=dlProgress)
os.rename(tempDir,fileDir)
print(" "*indent+"Downloaded"+" "*10)
break
except ConnectionResetError as exception:
print(" "*indent + str(exception))
print(" "*indent + "Trying again\n")
except FileNotFoundError:
raise FileNameTooLong
else:
print(" "*indent+"Downloaded"+" "*10)
break
else:
raise FileAlreadyExistsError
class Erome:
def __init__(self,directory,post):
try:
IMAGES = self.getLinks(post['postURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
imagesLenght = len(IMAGES)
howManyDownloaded = imagesLenght
duplicates = 0
if imagesLenght == 1:
extension = getExtension(IMAGES[0])
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
fileDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
)
tempDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
)
imageURL = "https:" + IMAGES[0]
try:
getFile(fileDir,tempDir,imageURL)
except FileNameTooLong:
fileDir = directory / (post['postId'] + extension)
tempDir = directory / (post['postId'] + '.tmp')
getFile(fileDir,tempDir,imageURL)
else:
title = nameCorrector(post['postTitle'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
folderDir = directory / (
post["postSubmitter"] + "_" + title + "_" + post['postId']
)
try:
if not os.path.exists(folderDir):
os.makedirs(folderDir)
except FileNotFoundError:
folderDir = directory / post['postId']
os.makedirs(folderDir)
for i in range(imagesLenght):
extension = getExtension(IMAGES[i])
fileName = str(i+1)
imageURL = "https:" + IMAGES[i]
fileDir = folderDir / (fileName + extension)
tempDir = folderDir / (fileName + ".tmp")
print(" ({}/{})".format(i+1,imagesLenght))
print(" {}".format(fileName+extension))
try:
getFile(fileDir,tempDir,imageURL,indent=2)
print()
except FileAlreadyExistsError:
print(" The file already exists" + " "*10,end="\n\n")
duplicates += 1
howManyDownloaded -= 1
except Exception as exception:
# raise exception
print("\n Could not get the file")
print(
" "
+ "{class_name}: {info}".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
exceptionType = exception
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
def getLinks(self,url,lineNumber=129):
content = []
lineNumber = None
class EromeParser(HTMLParser):
tag = None
def handle_starttag(self, tag, attrs):
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
for i in range(len(pageSource)):
obj = EromeParser()
obj.feed(pageSource[i])
tag = obj.tag
if tag is not None:
if "div" in tag:
if "id" in tag["div"]:
if tag["div"]["id"] == "album":
lineNumber = i
break
for line in pageSource[lineNumber:]:
obj = EromeParser()
obj.feed(line)
tag = obj.tag
if tag is not None:
if "img" in tag:
if "class" in tag["img"]:
if tag["img"]["class"]=="img-front":
content.append(tag["img"]["src"])
elif "source" in tag:
content.append(tag["source"]["src"])
return [
link for link in content \
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
]
class Imgur:
def __init__(self,directory,post):
self.imgurClient = self.initImgur()
@@ -85,13 +224,22 @@ class Imgur:
post['postExt'] = getExtension(post['mediaURL'])
title = nameCorrector(post['postTitle'])
print(title+"_" +post['postId']+post['postExt'])
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])
fileDir = title + "_" + post['postId'] + post['postExt']
fileDir = directory / fileDir
fileDir = directory / (
post["postSubmitter"]
+ "_" + title
+ "_" + post['postId']
+ post['postExt']
)
tempDir = directory / (
post["postSubmitter"]
+ "_" + title
+ "_" + post['postId']
+ ".tmp"
)
tempDir = title + "_" + post['postId'] + '.tmp'
tempDir = directory / tempDir
try:
getFile(fileDir,tempDir,post['mediaURL'])
except FileNameTooLong:
@@ -107,9 +255,11 @@ class Imgur:
duplicates = 0
title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId'],end="\n\n")
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
folderDir = directory / (title+"_"+post['postId'])
folderDir = directory / (
post["postSubmitter"] + "_" + title + "_" + post['postId']
)
try:
if not os.path.exists(folderDir):
@@ -162,14 +312,23 @@ class Imgur:
except Exception as exception:
print("\n Could not get the file")
print(" " + str(exception) + "\n")
print(
" "
+ "{class_name}: {info}".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
exceptionType = exception
howManyDownloaded -= 1
if duplicates == imagesLenght:
raise FileAlreadyExistsError
elif howManyDownloaded < imagesLenght:
raise AlbumNotDownloadedCompletely
elif howManyDownloaded + duplicates < imagesLenght:
raise AlbumNotDownloadedCompletely(
"Album Not Downloaded Completely"
)
@staticmethod
def initImgur():
@@ -217,18 +376,23 @@ class Gfycat:
try:
POST['mediaURL'] = self.getLink(POST['postURL'])
except IndexError:
raise NotADownloadableLinkError
raise NotADownloadableLinkError("Could not read the page source")
except Exception as exception:
raise NotADownloadableLinkError
raise NotADownloadableLinkError("Could not read the page source")
POST['postExt'] = getExtension(POST['mediaURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
print(title+"_"+POST['postId']+POST['postExt'])
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = directory / (title+"_"+POST['postId']+POST['postExt'])
tempDir = directory / (title+"_"+POST['postId']+".tmp")
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
try:
getFile(fileDir,tempDir,POST['mediaURL'])
except FileNameTooLong:
@@ -248,8 +412,7 @@ class Gfycat:
if url[-1:] == '/':
url = url[:-1]
if 'gifs' in url:
url = "https://gfycat.com/" + url.split('/')[-1]
url = "https://gfycat.com/" + url.split('/')[-1]
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
@@ -266,7 +429,7 @@ class Gfycat:
break
if "".join(link) == "":
raise NotADownloadableLinkError
raise NotADownloadableLinkError("Could not read the page source")
return "".join(link)
@@ -275,13 +438,14 @@ class Direct:
POST['postExt'] = getExtension(POST['postURL'])
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(POST['postTitle'])
print(title+"_"+POST['postId']+POST['postExt'])
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
fileDir = title+"_"+POST['postId']+POST['postExt']
fileDir = directory / fileDir
tempDir = title+"_"+POST['postId']+".tmp"
tempDir = directory / tempDir
fileDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
)
tempDir = directory / (
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
)
try:
getFile(fileDir,tempDir,POST['postURL'])
@@ -296,10 +460,11 @@ class Self:
if not os.path.exists(directory): os.makedirs(directory)
title = nameCorrector(post['postTitle'])
print(title+"_"+post['postId']+".md")
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")
fileDir = title+"_"+post['postId']+".md"
fileDir = directory / fileDir
fileDir = directory / (
post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"
)
if Path.is_file(fileDir):
raise FileAlreadyExistsError
@@ -322,7 +487,11 @@ class Self:
+ ")\n"
+ post["postContent"]
+ "\n\n---\n\n"
+ "submitted by [u/"
+ "submitted to [r/"
+ post["postSubreddit"]
+ "](https://www.reddit.com/r/"
+ post["postSubreddit"]
+ ") by [u/"
+ post["postSubmitter"]
+ "](https://www.reddit.com/user/"
+ post["postSubmitter"]

View File

@@ -80,4 +80,10 @@ class InvalidSortingType(Exception):
pass
class FileNotFoundError(Exception):
pass
class NoSuitablePost(Exception):
pass
class ImgurLimitError(Exception):
pass

View File

@@ -14,60 +14,62 @@ from src.errors import (NoMatchingSubmissionFound, NoPrawSupport,
print = printToFile
class GetAuth:
def __init__(self,redditInstance,port):
self.redditInstance = redditInstance
self.PORT = int(port)
def recieve_connection(self):
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, message):
"""Send message to client and close the connection."""
self.client.send('HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8'))
self.client.close()
def getRefreshToken(self,*scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("Go to this URL and login to reddit:\n\n",url)
webbrowser.open(url,new=2)
self.client = self.recieve_connection()
data = self.client.recv(1024).decode('utf-8')
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)
def beginPraw(config,user_agent = str(socket.gethostname())):
class GetAuth:
def __init__(self,redditInstance,port):
self.redditInstance = redditInstance
self.PORT = int(port)
def recieve_connection(self):
"""Wait for and then return a connected socket..
Opens a TCP connection on port 8080, and waits for a single client.
"""
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(('localhost', self.PORT))
server.listen(1)
client = server.accept()[0]
server.close()
return client
def send_message(self, message):
"""Send message to client and close the connection."""
self.client.send(
'HTTP/1.1 200 OK\r\n\r\n{}'.format(message).encode('utf-8')
)
self.client.close()
def getRefreshToken(self,*scopes):
state = str(random.randint(0, 65000))
url = self.redditInstance.auth.url(scopes, state, 'permanent')
print("Go to this URL and login to reddit:\n\n",url)
webbrowser.open(url,new=2)
self.client = self.recieve_connection()
data = self.client.recv(1024).decode('utf-8')
param_tokens = data.split(' ', 2)[1].split('?', 1)[1].split('&')
params = {
key: value for (key, value) in [token.split('=') \
for token in param_tokens]
}
if state != params['state']:
self.send_message(
client, 'State mismatch. Expected: {} Received: {}'
.format(state, params['state'])
)
raise RedditLoginFailed
elif 'error' in params:
self.send_message(client, params['error'])
raise RedditLoginFailed
refresh_token = self.redditInstance.auth.authorize(params['code'])
self.send_message(
"<script>" \
"alert(\"You can go back to terminal window now.\");" \
"</script>"
)
return (self.redditInstance,refresh_token)
"""Start reddit instance"""
scopes = ['identity','history','read']
@@ -245,8 +247,6 @@ def getPosts(args):
raise MultiredditNotFound
elif "submitted" in args:
# TODO
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
print (
"submitted posts of {user}\nsort: {sort}\n" \
"time: {time}\nlimit: {limit}\n".format(
@@ -263,8 +263,6 @@ def getPosts(args):
)
elif "upvoted" in args:
# TODO
# USE REDDIT.USER.ME() INSTEAD WHEN "ME" PASSED AS A --USER
print (
"upvoted posts of {user}\nlimit: {limit}\n".format(
user=args["user"],
@@ -299,6 +297,8 @@ def redditSearcher(posts,SINGLE_POST=False):
gfycatCount = 0
global imgurCount
imgurCount = 0
global eromeCount
eromeCount = 0
global directCount
directCount = 0
global selfCount
@@ -360,8 +360,15 @@ def redditSearcher(posts,SINGLE_POST=False):
if not len(subList) == 0:
print(
"\nTotal of {} submissions found!\n"\
"{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
.format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
"{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
.format(
len(subList),
gfycatCount,
imgurCount,
eromeCount,
directCount,
selfCount
)
)
return subList
else:
@@ -370,6 +377,7 @@ def redditSearcher(posts,SINGLE_POST=False):
def checkIfMatching(submission):
global gfycatCount
global imgurCount
global eromeCount
global directCount
global selfCount
@@ -383,22 +391,24 @@ def checkIfMatching(submission):
except AttributeError:
return None
if ('gfycat' in submission.domain) or \
('imgur' in submission.domain):
if 'gfycat' in submission.domain:
details['postType'] = 'gfycat'
gfycatCount += 1
return details
if 'gfycat' in submission.domain:
details['postType'] = 'gfycat'
gfycatCount += 1
return details
elif 'imgur' in submission.domain:
details['postType'] = 'imgur'
imgurCount += 1
return details
elif 'imgur' in submission.domain:
details['postType'] = 'imgur'
imgurCount += 1
return details
elif 'erome' in submission.domain:
details['postType'] = 'erome'
eromeCount += 1
return details
elif isDirectLink(submission.url):
elif isDirectLink(submission.url) is not False:
details['postType'] = 'direct'
details['postURL'] = isDirectLink(submission.url)
directCount += 1
return details
@@ -435,7 +445,7 @@ def printSubmission(SUB,validNumber,totalNumber):
def isDirectLink(URL):
"""Check if link is a direct image link.
If so, return True,
If so, return URL,
if not, return False
"""
@@ -444,10 +454,13 @@ def isDirectLink(URL):
URL = URL[:-1]
if "i.reddituploads.com" in URL:
return True
return URL
elif "v.redd.it" in URL:
return URL+"/DASH_600_K"
for extension in imageTypes:
if extension in URL:
return True
return URL
else:
return False

View File

@@ -75,8 +75,10 @@ def createLogFile(TITLE):
put given arguments inside \"HEADER\" key
"""
folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S",
time.localtime(GLOBAL.RUN_TIME)))
folderDirectory = GLOBAL.directory / "LOG_FILES" / \
str(time.strftime(
"%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
))
logFilename = TITLE.upper()+'.json'
if not path.exists(folderDirectory):
@@ -95,7 +97,7 @@ def printToFile(*args, **kwargs):
TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
time.localtime(GLOBAL.RUN_TIME)))
folderDirectory = GLOBAL.directory / TIME
folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
print(*args,**kwargs)
if not path.exists(folderDirectory):
@@ -130,7 +132,7 @@ def nameCorrector(string):
if len(string.split('\n')) > 1:
string = "".join(string.split('\n'))
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.',]
BAD_CHARS = ['\\','/',':','*','?','"','<','>','|','.','#']
if any(x in string for x in BAD_CHARS):
for char in string: