From 4e8e59f527c7946951c4537df7914bcb7974b3b5 Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 10 Jul 2018 01:30:50 +0300 Subject: [PATCH 1/6] Added Self class --- script.py | 19 +++++++++++++++-- src/downloader.py | 54 ++++++++++++++++++++++++++++++++++++++++++++--- src/searcher.py | 17 +++++++++++---- 3 files changed, 81 insertions(+), 9 deletions(-) diff --git a/script.py b/script.py index 6747ca3..ce6669f 100644 --- a/script.py +++ b/script.py @@ -11,7 +11,7 @@ import sys import time from pathlib import Path, PurePath -from src.downloader import Direct, Gfycat, Imgur +from src.downloader import Direct, Gfycat, Imgur, Self from src.parser import LinkDesigner from src.searcher import getPosts from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector, @@ -451,7 +451,22 @@ def download(submissions): print(exception) FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) downloadedCount -= 1 - + + elif submissions[i]['postType'] == 'self': + print("SELF") + try: + Self(directory,submissions[i]) + + except FileAlreadyExistsError: + print("It already exists") + downloadedCount -= 1 + duplicates += 1 + + # except Exception as exception: + # print(exception) + # FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) + # downloadedCount -= 1 + else: print("No match found, skipping...") downloadedCount -= 1 diff --git a/src/downloader.py b/src/downloader.py index 1883dec..706b7c7 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -1,3 +1,4 @@ +import io import os import sys import urllib.request @@ -16,7 +17,7 @@ except ModuleNotFoundError: install("imgurpython") from imgurpython import * - +VanillaPrint = print print = printToFile def dlProgress(count, blockSize, totalSize): @@ -233,8 +234,13 @@ class Gfycat: fileDir = directory / (title+"_"+POST['postId']+POST['postExt']) tempDir = directory / (title+"_"+POST['postId']+".tmp") + try: + getFile(fileDir,tempDir,POST['mediaURL']) + except FileNameTooLong: + fileDir = directory / (POST['postId']+POST['postExt']) + tempDir = directory / (POST['postId']+".tmp") - getFile(fileDir,tempDir,POST['mediaURL']) + getFile(fileDir,tempDir,POST['mediaURL']) def getLink(self, url, query=' Date: Tue, 10 Jul 2018 01:44:28 +0300 Subject: [PATCH 2/6] Fixed the long file name bug --- src/downloader.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/downloader.py b/src/downloader.py index 706b7c7..5e2eec3 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -308,8 +308,14 @@ class Self: if Path.is_file(fileDir): raise FileAlreadyExistsError + + try: + self.writeToFile(fileDir,post) + except FileNotFoundError: + fileDir = post['postId']+".md" + fileDir = directory / fileDir - self.writeToFile(fileDir,post) + self.writeToFile(fileDir,post) @staticmethod def writeToFile(directory,post): From 8791f22f56caeeea6e4e4842b40cee5b17f33c26 Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 10 Jul 2018 01:58:42 +0300 Subject: [PATCH 3/6] Updated the changelog --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3af1d6a..755b144 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ This program downloads imgur, gfycat and direct image and video links of saved p - [Examples](#examples) - [FAQ](#faq) - [Changelog](#changelog) - - [release-1.0.0](#release-100) + - [10/07/2018](#10-07-2018) ## Requirements - Python 3.x* @@ -167,5 +167,6 @@ py -3 script.py C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json - Try `python3` or `python` or `py -3` as python have real issues about naming their program ## Changelog -### v1.0.0 -- Initial release +### 10/07/2018 +- Added support for *self* post +- Now getting posts is quicker From 97ab3fb01c2b5e97aa991aec24e6621d1c19010e Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 10 Jul 2018 02:27:44 +0300 Subject: [PATCH 4/6] Added what it can do section --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 755b144..207c3bc 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ This program downloads imgur, gfycat and direct image and video links of saved p ## Table of Contents +- [What it can do?](#what-it-can-do) - [Requirements](#requirements) - [Configuring the APIs](#configuring-the-apis) - [Creating an imgur app](#creating-an-imgur-app) @@ -23,6 +24,17 @@ This program downloads imgur, gfycat and direct image and video links of saved p - [Changelog](#changelog) - [10/07/2018](#10-07-2018) +## What it can do? +### It... +- can get posts from: frontpage, subreddits, multireddits, redditor's submissions, upvoted and saved posts; search results or just plain reddit links +- sorts post by hot, top, new and so on +- downloads imgur albums, gfycat links, [self posts](#i-can-t-open-the-self-posts-) and any link to a direct image +- skips the existing ones +- puts post titles to file's name +- puts every post to its subreddit's folder +- saves reusable a copy of posts' details that are found so that they can be re-downloaded again +- logs failed ones in a file to so that you can try to download them later + ## Requirements - Python 3.x* @@ -166,6 +178,9 @@ py -3 script.py C:\\NEW_FOLDER\\ANOTHER_FOLDER --log UNNAMED_FOLDER\\FAILED.json ### I can't startup the script no matter what. - Try `python3` or `python` or `py -3` as python have real issues about naming their program +### I can't open the self posts. +- Self posts are held at subreddit as Markdown. So, the script downloads them as Markdown in order not to lose their stylings. However, there is a great Chrome extension [here](https://chrome.google.com/webstore/detail/markdown-viewer/ckkdlimhmcjmikdlpkmbgfkaikojcbjk) for viewing Markdown files with its styling. Install it and open the files with Chrome. + ## Changelog ### 10/07/2018 - Added support for *self* post From e27ebd1c445d12b692c4c1aa4c5ab4911968e5ad Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 10 Jul 2018 02:38:28 +0300 Subject: [PATCH 5/6] Minimalize --- README.md | 60 +++++++++++++++++++------------------------------------ 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 207c3bc..5b1f676 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,11 @@ This program downloads imgur, gfycat and direct image and video links of saved p - [Configuring the APIs](#configuring-the-apis) - [Creating an imgur app](#creating-an-imgur-app) - [Program Modes](#program-modes) - - [saved mode](#saved-mode) - - [submitted mode](#submitted-mode) - - [upvoted mode](#upvoted-mode) - - [subreddit mode](#subreddit-mode) - - [multireddit mode](#multireddit-mode) - - [link mode](#link-mode) - - [log read mode](#log-read-mode) - [Running the script](#running-the-script) - [Using the command line arguments](#using-the-command-line-arguments) - [Examples](#examples) - [FAQ](#faq) - [Changelog](#changelog) - - [10/07/2018](#10-07-2018) ## What it can do? ### It... @@ -34,6 +26,7 @@ This program downloads imgur, gfycat and direct image and video links of saved p - puts every post to its subreddit's folder - saves reusable a copy of posts' details that are found so that they can be re-downloaded again - logs failed ones in a file to so that you can try to download them later +- can be run with double-clicking on Windows (but I don't recommend it) ## Requirements - Python 3.x* @@ -61,38 +54,27 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl ## Program Modes All the program modes are activated with command-line arguments as shown [here](#using-the-command-line-arguments) -### saved mode -In saved mode, the program gets posts from given user's saved posts. -### submitted mode -In submitted mode, the program gets posts from given user's submitted posts. -### upvoted mode -In submitted mode, the program gets posts from given user's upvoted posts. -### subreddit mode -In subreddit mode, the program gets posts from given subreddits* that is sorted by given type and limited by given number. - -Multiple subreddits can be given - -*You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments).* -### multireddit mode -In multireddit mode, the program gets posts from given user's given multireddit that is sorted by given type and limited by given number. -### link mode -In link mode, the program gets posts from given reddit link. - -You may customize the behaviour with `--sort`, `--time`, `--limit`. - -*You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments).* - -## log read mode -Two log files are created each time *script.py* runs. -- **POSTS** Saves all the posts without filtering. -- **FAILED** Keeps track of posts that are tried to be downloaded but failed. - -In log mode, the program takes a log file which created by itself, reads posts and tries downloading them again. - -Running log read mode for FAILED.json file once after the download is complete is **HIGHLY** recommended as unexpected problems may occur. +- **saved mode** + - Gets posts from given user's saved posts. +- **submitted mode** + - Gets posts from given user's submitted posts. +- **upvoted mode** + - Gets posts from given user's upvoted posts. +- **subreddit mode** + - Gets posts from given subreddit or subreddits that is sorted by given type and limited by given number. + - You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments). +- **multireddit mode** + - Gets posts from given user's given multireddit that is sorted by given type and limited by given number. +- **link mode** + - Gets posts from given reddit link. + - You may customize the behaviour with `--sort`, `--time`, `--limit`. + - You may also use search in this mode. See [`py -3 script.py --help`](#using-the-command-line-arguments). +- **log read mode** + - Takes a log file which created by itself (json files), reads posts and tries downloading them again. + - Running log read mode for FAILED.json file once after the download is complete is **HIGHLY** recommended as unexpected problems may occur. ## Running the script -**WARNING** *DO NOT* let more than *1* instance of script run as it interferes with IMGUR Request Rate. +**DO NOT** let more than one instance of the script run as it interferes with IMGUR Request Rate. ### Using the command line arguments If no arguments are passed program will prompt you for arguments below which means you may start up the script with double-clicking on it (at least on Windows for sure). @@ -101,7 +83,7 @@ Open up the [terminal](https://www.reddit.com/r/NSFW411/comments/8vtnl8/meta_i_m Run the script.py file from terminal with command-line arguments. Here is the help page: -**ATTENTION** Use `.\` for current directory and `..\` for upper directory when using short directories, otherwise it might act weird. +Use `.\` for current directory and `..\` for upper directory when using short directories, otherwise it might act weird. ```console $ py -3 script.py --help From be743200414ee7a9dbd643a6bba004323fea4da2 Mon Sep 17 00:00:00 2001 From: Ali Parlakci Date: Tue, 10 Jul 2018 02:41:53 +0300 Subject: [PATCH 6/6] Uncomment error handling code --- script.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script.py b/script.py index ce6669f..c7ffbf2 100644 --- a/script.py +++ b/script.py @@ -462,10 +462,10 @@ def download(submissions): downloadedCount -= 1 duplicates += 1 - # except Exception as exception: - # print(exception) - # FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) - # downloadedCount -= 1 + except Exception as exception: + print(exception) + FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]}) + downloadedCount -= 1 else: print("No match found, skipping...")