Update version

Improve error handling
Typo fix
2026-01-20 08:05:49 +00:00 · 2018-07-23 23:37:27 +03:00 · 2018-07-23 23:33:11 +03:00 · 2018-07-23 23:19:29 +03:00 · 2018-07-23 23:18:10 +03:00 · 2018-07-23 23:16:56 +03:00
6 changed files with 261 additions and 174 deletions
--- a/README.md
+++ b/README.md
@@ -52,7 +52,13 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
 - All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit 
  them, there.
-## Changelog
+## Changes on *master*
 ### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
 - Split download() function
 - Added erome support
 - Remove exclude feature
 - Bug fix
 ### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
 - Put log files in a folder named "LOG_FILES"
 - Fixed the bug that makes multireddit mode unusable
--- a/docs/COMMAND_LINE_ARGUMENTS.md
+++ b/docs/COMMAND_LINE_ARGUMENTS.md
@@ -40,8 +40,6 @@ optional arguments:
                        all
  --NoDownload          Just gets the posts and store them in a file for
                        downloading later
  --exclude {imgur,gfycat,direct,self} [{imgur,gfycat,direct,self} ...]
                        Do not download specified links
 ```
 # Examples
--- a/script.py
+++ b/script.py
@@ -13,7 +13,7 @@ import time
 from io import StringIO
 from pathlib import Path, PurePath
-from src.downloader import Direct, Gfycat, Imgur, Self
+from src.downloader import Direct, Gfycat, Imgur, Self, Erome
 from src.errors import *
 from src.parser import LinkDesigner
 from src.searcher import getPosts
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
 __author__ = "Ali Parlakci"
 __license__ = "GPL"
-__version__ = "1.3.1"
+__version__ = "1.4.0"
 __maintainer__ = "Ali Parlakci"
 __email__ = "parlakciali@gmail.com"
@@ -144,11 +144,6 @@ def parseArguments(arguments=[]):
                        action="store_true",
                        default=False)
    parser.add_argument("--exclude",
                        nargs="+",
                        help="Do not download specified links",
                        choices=["imgur","gfycat","direct","self"],
                        type=str)
    if arguments == []:
        return parser.parse_args()
@@ -242,10 +237,10 @@ class PromptUser:
        if programMode == "subreddit":
-            subredditInput = input("subreddit: ")
+            subredditInput = input("subreddit (enter frontpage for frontpage): ")
            GLOBAL.arguments.subreddit = subredditInput
-            while not subredditInput == "":
+            while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
                subredditInput = input("subreddit: ")
                GLOBAL.arguments.subreddit += "+" + subredditInput
@@ -253,7 +248,8 @@ class PromptUser:
                GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())
            # DELETE THE PLUS (+) AT THE END
-            GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
+            if not subredditInput.lower() == "frontpage":
                GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
            print("\nselect sort type:")
            sortTypes = [
@@ -326,33 +322,6 @@ class PromptUser:
                GLOBAL.arguments.log = input("\nlog file directory:")
                if Path(GLOBAL.arguments.log ).is_file():
                    break 
        GLOBAL.arguments.exclude = []
        sites = ["imgur","gfycat","direct","self"]
        excludeInput = input("exclude: ").lower()
        if excludeInput in sites and excludeInput != "":
            GLOBAL.arguments.exclude = [excludeInput]
        while not excludeInput == "":
            while True:
                excludeInput = input("exclude: ").lower()
                if not excludeInput in sites or excludeInput in GLOBAL.arguments.exclude:
                    break
                elif excludeInput == "":
                    break
                else:
                    GLOBAL.arguments.exclude.append(excludeInput)
        for i in range(len(GLOBAL.arguments.exclude)):
            if " " in GLOBAL.arguments.exclude[i]:
                inputWithWhitespace = GLOBAL.arguments.exclude[i]
                del GLOBAL.arguments.exclude[i]
                for siteInput in inputWithWhitespace.split():
                    if siteInput in sites and siteInput not in GLOBAL.arguments.exclude:
                        GLOBAL.arguments.exclude.append(siteInput)
        while True:
            try:
                GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
@@ -472,20 +441,76 @@ def postExists(POST):
    else:
        return False
 def downloadPost(SUBMISSION):
    directory = GLOBAL.directory / SUBMISSION['postSubreddit']
    global lastRequestTime
    downloaders = {
        "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
    }
    if SUBMISSION['postType'] in downloaders:
        print(SUBMISSION['postType'].upper())
        if SUBMISSION['postType'] == "imgur":
            if int(time.time() - lastRequestTime) <= 2:
                pass
            credit = Imgur.get_credits()
            IMGUR_RESET_TIME = credit['UserReset']-time.time()
            USER_RESET = ("after " \
                            + str(int(IMGUR_RESET_TIME/60)) \
                            + " Minutes " \
                            + str(int(IMGUR_RESET_TIME%60)) \
                            + " Seconds") 
            print(
                "Client: {} - User: {} - Reset {}".format(
                    credit['ClientRemaining'],
                    credit['UserRemaining'],
                    USER_RESET
                )
            )
            if not (credit['UserRemaining'] == 0 or \
                    credit['ClientRemaining'] == 0):
                """This block of code is needed
                """
                if int(time.time() - lastRequestTime) <= 2:
                    pass
                lastRequestTime = time.time()
            else:
                if credit['UserRemaining'] == 0:
                    KEYWORD = "user"
                elif credit['ClientRemaining'] == 0:
                    KEYWORD = "client"
                raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
        downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
    else:
        raise NoSuitablePost
    return None
 def download(submissions):
    """Analyze list of submissions and call the right function
    to download each one, catch errors, update the log files
    """
    subsLenght = len(submissions)
    global lastRequestTime
    lastRequestTime = 0
    downloadedCount = subsLenght
    duplicates = 0
    BACKUP = {}
    if GLOBAL.arguments.exclude is not None:
        ToBeDownloaded = GLOBAL.arguments.exclude
    else:
        ToBeDownloaded = []
    FAILED_FILE = createLogFile("FAILED")
@@ -499,132 +524,46 @@ def download(submissions):
        )
        if postExists(submissions[i]):
            result = False
            print(submissions[i]['postType'].upper())
            print("It already exists")
            duplicates += 1
            downloadedCount -= 1
            continue
-        directory = GLOBAL.directory / submissions[i]['postSubreddit']
+        try:
            downloadPost(submissions[i])
-        if submissions[i]['postType'] == 'imgur' and not 'imgur' in ToBeDownloaded:
+        except FileAlreadyExistsError:
-            print("IMGUR",end="")
+            print("It already exists")
            duplicates += 1
            downloadedCount -= 1
-            while int(time.time() - lastRequestTime) <= 2:
+        except ImgurLoginError:
                pass
            credit = Imgur.get_credits()
            IMGUR_RESET_TIME = credit['UserReset']-time.time()
            USER_RESET = ("after " \
                          + str(int(IMGUR_RESET_TIME/60)) \
                          + " Minutes " \
                          + str(int(IMGUR_RESET_TIME%60)) \
                          + " Seconds") 
            print(
-                " => Client: {} - User: {} - Reset {}".format(
+                "Imgur login failed. \nQuitting the program "\
-                    credit['ClientRemaining'],
+                "as unexpected errors might occur."
                    credit['UserRemaining'],
                    USER_RESET
                )
            )
            sys.exit()
-            if not (credit['UserRemaining'] == 0 or \
+        except ImgurLimitError as exception:
-                    credit['ClientRemaining'] == 0):
+            FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
            downloadedCount -= 1
-                """This block of code is needed
+        except NotADownloadableLinkError as exception:
-                """
+            print(exception)
-                while int(time.time() - lastRequestTime) <= 2:
+            FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
-                    pass
+            downloadedCount -= 1
                lastRequestTime = time.time()
-                try:
+        except NoSuitablePost:
                    Imgur(directory,submissions[i])
                except FileAlreadyExistsError:
                    print("It already exists")
                    duplicates += 1
                    downloadedCount -= 1
                except ImgurLoginError:
                    print(
                        "Imgur login failed. Quitting the program "\
                        "as unexpected errors might occur."
                    )
                    sys.exit()
                except Exception as exception:
                    print(exception)
                    FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
                    downloadedCount -= 1
            else:
                if credit['UserRemaining'] == 0:
                    KEYWORD = "user"
                elif credit['ClientRemaining'] == 0:
                    KEYWORD = "client"
                print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
                FAILED_FILE.add(
                    {int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
                               submissions[i]]}
                )
                downloadedCount -= 1
        elif submissions[i]['postType'] == 'gfycat' and not 'gfycat' in ToBeDownloaded:
            print("GFYCAT")
            try:
                Gfycat(directory,submissions[i])
            except FileAlreadyExistsError:
                print("It already exists")
                duplicates += 1
                downloadedCount -= 1
            except NotADownloadableLinkError as exception:
                print(exception)
                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
                downloadedCount -= 1
            except Exception as exception:
                print(exception)
                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
                downloadedCount -= 1
        elif submissions[i]['postType'] == 'direct' and not 'direct' in ToBeDownloaded:
            print("DIRECT")
            try:
                Direct(directory,submissions[i])
            except FileAlreadyExistsError:
                print("It already exists")
                downloadedCount -= 1
                duplicates += 1
            except Exception as exception:
                print(exception)
                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
                downloadedCount -= 1
        elif submissions[i]['postType'] == 'self' and not 'self' in ToBeDownloaded:
            print("SELF")
            try:
                Self(directory,submissions[i])
            except FileAlreadyExistsError:
                print("It already exists")
                downloadedCount -= 1
                duplicates += 1
            except Exception as exception:
                print(exception)
                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
                downloadedCount -= 1
        else:
            print("No match found, skipping...")
            downloadedCount -= 1
        except Exception as exception:
            # raise exception
            print(exception)
            FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
            downloadedCount -= 1
    if duplicates:
        print("\n There was {} duplicates".format(duplicates))
@@ -705,10 +644,12 @@ if __name__ == "__main__":
        print = printToFile
        GLOBAL.RUN_TIME = time.time()
        main()
    except KeyboardInterrupt:
        if GLOBAL.directory is None:
            GLOBAL.directory = Path(".\\")
        print("\nQUITTING...")
    except Exception as exception:
        if GLOBAL.directory is None:
            GLOBAL.directory = Path(".\\")
@@ -716,4 +657,4 @@ if __name__ == "__main__":
                      exc_info=full_exc_info(sys.exc_info()))
        print(log_stream.getvalue())
-    input("Press enter to quit\n")
+    input("\nPress enter to quit\n")
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -2,7 +2,9 @@ import io
 import os
 import sys
 import urllib.request
 from html.parser import HTMLParser
 from pathlib import Path
 from urllib.error import HTTPError
 import imgurpython
 from multiprocessing import Queue
@@ -69,6 +71,129 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
    else:
        raise FileAlreadyExistsError
 class Erome:
    def __init__(self,directory,post):
        try:
            IMAGES = self.getLinks(post['postURL'])
        except urllib.error.HTTPError:
            raise NotADownloadableLinkError("Not a downloadable link")
        imagesLenght = len(IMAGES)
        howManyDownloaded = imagesLenght
        duplicates = 0
        if imagesLenght == 1:
            extension = getExtension(IMAGES[0])
            title = nameCorrector(post['postTitle'])
            print(title+"_" +post['postId']+extension)
            fileDir = title + "_" + post['postId'] + extension
            fileDir = directory / fileDir
            tempDir = title + "_" + post['postId'] + '.tmp'
            tempDir = directory / tempDir
            imageURL = "https:" + IMAGES[0]
            try:
                getFile(fileDir,tempDir,imageURL)
            except FileNameTooLong:
                fileDir = directory / (post['postId'] + extension)
                tempDir = directory / (post['postId'] + '.tmp')
                getFile(fileDir,tempDir,imageURL)
        else:
            title = nameCorrector(post['postTitle'])
            print(title+"_"+post['postId'],end="\n\n")
            folderDir = directory / (title+"_"+post['postId'])
            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['postId']
                os.makedirs(folderDir)
            for i in range(imagesLenght):
                extension = getExtension(IMAGES[i])
                fileName = str(i+1)
                imageURL = "https:" + IMAGES[i]
                fileDir = folderDir / (fileName + extension)
                tempDir = folderDir / (fileName + ".tmp")
                print("  ({}/{})".format(i+1,imagesLenght))
                print("  {}".format(fileName+extension))
                try:
                    getFile(fileDir,tempDir,imageURL,indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " "*10,end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1
                except Exception as exception:
                    raise exception
                    print("\n  Could not get the file")
                    print("  " + str(exception) + "\n")
                    exceptionType = exception
                    howManyDownloaded -= 1
            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )
    def getLinks(self,url,lineNumber=129):
        content = []
        lineNumber = None
        class EromeParser(HTMLParser):
            tag = None
            def handle_starttag(self, tag, attrs):
                self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
        """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
        for i in range(len(pageSource)):
            obj = EromeParser()
            obj.feed(pageSource[i])
            tag = obj.tag
            if tag is not None:
                if "div" in tag:
                    if "id" in tag["div"]:
                        if tag["div"]["id"] == "album":
                            lineNumber = i
                            break
        for line in pageSource[lineNumber:]:
            obj = EromeParser()
            obj.feed(line)
            tag = obj.tag
            if tag is not None:
                if "img" in tag:
                    if "class" in tag["img"]:
                        if tag["img"]["class"]=="img-front":
                            content.append(tag["img"]["src"])
                elif "source" in tag:
                    content.append(tag["source"]["src"])
        return [
            link for link in content \
            if link.endswith("_480p.mp4") or not link.endswith(".mp4")
        ]
 class Imgur:
    def __init__(self,directory,post):
        self.imgurClient = self.initImgur()
@@ -171,7 +296,7 @@ class Imgur:
            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
-            elif howManyDownloaded < imagesLenght:
+            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )
--- a/src/errors.py
+++ b/src/errors.py
@@ -81,3 +81,9 @@ class InvalidSortingType(Exception):
 class FileNotFoundError(Exception):
    pass
 class NoSuitablePost(Exception):
    pass
 class ImgurLimitError(Exception):
    pass
--- a/src/searcher.py
+++ b/src/searcher.py
@@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
    gfycatCount = 0
    global imgurCount
    imgurCount = 0
    global eromeCount
    eromeCount = 0
    global directCount
    directCount = 0
    global selfCount
@@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
    if not len(subList) == 0:    
        print(
            "\nTotal of {} submissions found!\n"\
-            "{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
+            "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
-            .format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
+            .format(
                len(subList),
                gfycatCount,
                imgurCount,
                eromeCount,
                directCount,
                selfCount
            )
        )
        return subList
    else:
@@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
 def checkIfMatching(submission):
    global gfycatCount
    global imgurCount
    global eromeCount
    global directCount
    global selfCount
@@ -383,19 +393,20 @@ def checkIfMatching(submission):
    except AttributeError:
        return None
-    if ('gfycat' in submission.domain) or \
+    if 'gfycat' in submission.domain:
-        ('imgur' in submission.domain):
+        details['postType'] = 'gfycat'
        gfycatCount += 1
        return details
-        if 'gfycat' in submission.domain:
+    elif 'imgur' in submission.domain:
-            details['postType'] = 'gfycat'
+        details['postType'] = 'imgur'
-            gfycatCount += 1
+        imgurCount += 1
-            return details
+        return details
-        elif 'imgur' in submission.domain:
+    elif 'erome' in submission.domain:
-            details['postType'] = 'imgur'
+        details['postType'] = 'erome'
-            
+        eromeCount += 1
-            imgurCount += 1
+        return details
            return details
    elif isDirectLink(submission.url) is not False:
        details['postType'] = 'direct'
Author	SHA1	Message	Date
Ali Parlakci	a6997898ce	Update version	2018-07-23 23:37:27 +03:00
Ali Parlakci	61632c7143	Improve error handling	2018-07-23 23:33:11 +03:00
Ali Parlakçı	9bff3399a8	Typo fix	2018-07-23 23:19:29 +03:00
Ali Parlakçı	b00d185f67	Update changelog	2018-07-23 23:18:10 +03:00
Ali Parlakçı	7314e17125	Added erome support	2018-07-23 23:16:56 +03:00
Ali Parlakci	2d334d56bf	remove exclude mode	2018-07-23 22:57:54 +03:00
Ali Parlakçı	974517928f	Update README.md	2018-07-23 22:07:28 +03:00
Ali Parlakçı	bcae177b1e	Split download function	2018-07-23 22:06:33 +03:00
Ali Parlakci	229def6578	Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit	2018-07-23 18:49:39 +03:00
Ali Parlakci	59b0376d6e	Added directions for frontpage	2018-07-23 18:49:28 +03:00
Ali Parlakçı	cf1dc7d08c	Rename changelog section	2018-07-22 18:16:11 +03:00