Update version

Improve error handling
Typo fix
2026-01-10 19:25:41 +00:00 · 2018-07-23 23:37:27 +03:00 · 2018-07-23 23:33:11 +03:00 · 2018-07-23 23:19:29 +03:00 · 2018-07-23 23:18:10 +03:00 · 2018-07-23 23:16:56 +03:00
7 changed files with 288 additions and 144 deletions
--- a/README.md
+++ b/README.md
@@ -52,7 +52,20 @@ It should redirect to a page which shows your **imgur_client_id** and **imgur_cl
 - All of the user data is held in **config.json** file which is in a folder named "Bulk Downloader for Reddit" in your **Home** directory. You can edit 
  them, there.

-## Changelog
+## Changes on *master*
+### [23/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/7314e17125aa78fd4e6b28e26fda7ec7db7e0147)
+- Split download() function
+- Added erome support
+- Remove exclude feature
+- Bug fix
+
+### [22/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/a67da461d2fcd70672effcb20c8179e3224091bb)
+- Put log files in a folder named "LOG_FILES"
+- Fixed the bug that makes multireddit mode unusable
+
+### [21/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/4a8c2377f9fb4d60ed7eeb8d50aaf9a26492462a)
+- Added exclude mode
+
 ### [20/07/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/commit/7548a010198fb693841ca03654d2c9bdf5742139)
 - "0" input for no limit
 - Fixed the bug that recognizes none image direct links as image links
--- a/docs/COMMAND_LINE_ARGUMENTS.md
+++ b/docs/COMMAND_LINE_ARGUMENTS.md
@@ -23,7 +23,8 @@ optional arguments:
  --saved               Triggers saved mode
  --submitted           Gets posts of --user
  --upvoted             Gets upvoted posts of --user
-  --log LOG FILE        Triggers log read mode and takes a log file
+  --log LOG FILE        Takes a log file which created by itself (json files),
+                        reads posts and tries downloading them again.
  --subreddit SUBREDDIT [SUBREDDIT ...]
                        Triggers subreddit mode and takes subreddit's name
                        without r/. use "frontpage" for frontpage
--- a/script.py
+++ b/script.py
@@ -13,7 +13,7 @@ import time
 from io import StringIO
 from pathlib import Path, PurePath

-from src.downloader import Direct, Gfycat, Imgur, Self
+from src.downloader import Direct, Gfycat, Imgur, Self, Erome
 from src.errors import *
 from src.parser import LinkDesigner
 from src.searcher import getPosts
@@ -22,7 +22,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,

 __author__ = "Ali Parlakci"
 __license__ = "GPL"
-__version__ = "1.2.1"
+__version__ = "1.4.0"
 __maintainer__ = "Ali Parlakci"
 __email__ = "parlakciali@gmail.com"

@@ -143,6 +143,7 @@ def parseArguments(arguments=[]):
                             " for downloading later",
                        action="store_true",
                        default=False)
+    

    if arguments == []:
        return parser.parse_args()
@@ -159,7 +160,10 @@ def checkConflicts():
    else:
        user = 1

-    modes = ["saved","subreddit","submitted","search","log","link","upvoted"]
+    modes = [
+        "saved","subreddit","submitted","search","log","link","upvoted",
+        "multireddit"
+    ]

    values = {
        x: 0 if getattr(GLOBAL.arguments,x) is None or \
@@ -233,10 +237,10 @@ class PromptUser:

        if programMode == "subreddit":

-            subredditInput = input("subreddit: ")
+            subredditInput = input("subreddit (enter frontpage for frontpage): ")
            GLOBAL.arguments.subreddit = subredditInput

-            while not subredditInput == "":
+            while not (subredditInput == "" or subredditInput.lower() == "frontpage"):
                subredditInput = input("subreddit: ")
                GLOBAL.arguments.subreddit += "+" + subredditInput

@@ -244,7 +248,8 @@ class PromptUser:
                GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit.split())

            # DELETE THE PLUS (+) AT THE END
-            GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]
+            if not subredditInput.lower() == "frontpage":
+                GLOBAL.arguments.subreddit = GLOBAL.arguments.subreddit[:-1]

            print("\nselect sort type:")
            sortTypes = [
@@ -265,7 +270,7 @@ class PromptUser:

        elif programMode == "multireddit":
            GLOBAL.arguments.user = input("\nredditor: ")
-            GLOBAL.arguments.subreddit = input("\nmultireddit: ")
+            GLOBAL.arguments.multireddit = input("\nmultireddit: ")
            
            print("\nselect sort type:")
            sortTypes = [
@@ -317,7 +322,6 @@ class PromptUser:
                GLOBAL.arguments.log = input("\nlog file directory:")
                if Path(GLOBAL.arguments.log ).is_file():
                    break 
-
        while True:
            try:
                GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
@@ -377,6 +381,9 @@ def prepareAttributes():

        ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit

+    elif GLOBAL.arguments.multireddit is not None:
+        ATTRIBUTES["multireddit"] = GLOBAL.arguments.multireddit
+
    elif GLOBAL.arguments.saved is True:
        ATTRIBUTES["saved"] = True

@@ -434,16 +441,76 @@ def postExists(POST):
    else:
        return False

+def downloadPost(SUBMISSION):
+    directory = GLOBAL.directory / SUBMISSION['postSubreddit']
+
+    global lastRequestTime
+
+    downloaders = {
+        "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
+    }
+
+    if SUBMISSION['postType'] in downloaders:
+
+        print(SUBMISSION['postType'].upper())
+
+        if SUBMISSION['postType'] == "imgur":
+            
+            if int(time.time() - lastRequestTime) <= 2:
+                pass
+
+            credit = Imgur.get_credits()
+
+            IMGUR_RESET_TIME = credit['UserReset']-time.time()
+            USER_RESET = ("after " \
+                            + str(int(IMGUR_RESET_TIME/60)) \
+                            + " Minutes " \
+                            + str(int(IMGUR_RESET_TIME%60)) \
+                            + " Seconds") 
+
+            print(
+                "Client: {} - User: {} - Reset {}".format(
+                    credit['ClientRemaining'],
+                    credit['UserRemaining'],
+                    USER_RESET
+                )
+            )
+
+            if not (credit['UserRemaining'] == 0 or \
+                    credit['ClientRemaining'] == 0):
+
+                """This block of code is needed
+                """
+                if int(time.time() - lastRequestTime) <= 2:
+                    pass
+
+                lastRequestTime = time.time()
+
+            else:
+                if credit['UserRemaining'] == 0:
+                    KEYWORD = "user"
+                elif credit['ClientRemaining'] == 0:
+                    KEYWORD = "client"
+
+                raise ImgurLimitError('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
+
+        downloaders[SUBMISSION['postType']] (directory,SUBMISSION)
+
+    else:
+        raise NoSuitablePost
+
+    return None
+
 def download(submissions):
    """Analyze list of submissions and call the right function
    to download each one, catch errors, update the log files
    """

    subsLenght = len(submissions)
+    global lastRequestTime
    lastRequestTime = 0
    downloadedCount = subsLenght
    duplicates = 0
-    BACKUP = {}

    FAILED_FILE = createLogFile("FAILED")

@@ -457,131 +524,45 @@ def download(submissions):
        )

        if postExists(submissions[i]):
-            result = False
            print(submissions[i]['postType'].upper())
            print("It already exists")
            duplicates += 1
            downloadedCount -= 1
            continue

-        directory = GLOBAL.directory / submissions[i]['postSubreddit']
-
-        if submissions[i]['postType'] == 'imgur':
-            print("IMGUR",end="")
-            
-            while int(time.time() - lastRequestTime) <= 2:
-                pass
-            credit = Imgur.get_credits()
-
-            IMGUR_RESET_TIME = credit['UserReset']-time.time()
-            USER_RESET = ("after " \
-                          + str(int(IMGUR_RESET_TIME/60)) \
-                          + " Minutes " \
-                          + str(int(IMGUR_RESET_TIME%60)) \
-                          + " Seconds") 
-            print(
-                " => Client: {} - User: {} - Reset {}".format(
-                    credit['ClientRemaining'],
-                    credit['UserRemaining'],
-                    USER_RESET
-                )
-            )
-
-            if not (credit['UserRemaining'] == 0 or \
-                    credit['ClientRemaining'] == 0):
-
-                """This block of code is needed
-                """
-                while int(time.time() - lastRequestTime) <= 2:
-                    pass
-                lastRequestTime = time.time()
-
-                try:
-                    Imgur(directory,submissions[i])
-
-                except FileAlreadyExistsError:
-                    print("It already exists")
-                    duplicates += 1
-                    downloadedCount -= 1
-
-                except ImgurLoginError:
-                    print(
-                        "Imgur login failed. Quitting the program "\
-                        "as unexpected errors might occur."
-                    )
-                    sys.exit()
-
-                except Exception as exception:
-                    print(exception)
-                    FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
-                    downloadedCount -= 1
-
-            else:
-                if credit['UserRemaining'] == 0:
-                    KEYWORD = "user"
-                elif credit['ClientRemaining'] == 0:
-                    KEYWORD = "client"
-
-                print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
-                FAILED_FILE.add(
-                    {int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
-                               submissions[i]]}
-                )
-                downloadedCount -= 1
-
-        elif submissions[i]['postType'] == 'gfycat':
-            print("GFYCAT")
-            try:
-                Gfycat(directory,submissions[i])
-
-            except FileAlreadyExistsError:
-                print("It already exists")
-                duplicates += 1
-                downloadedCount -= 1
-                
-            except NotADownloadableLinkError as exception:
-                print(exception)
-                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
-                downloadedCount -= 1
-
-            except Exception as exception:
-                print(exception)
-                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
-                downloadedCount -= 1
-
-        elif submissions[i]['postType'] == 'direct':
-            print("DIRECT")
-            try:
-                Direct(directory,submissions[i])
-
-            except FileAlreadyExistsError:
-                print("It already exists")
-                downloadedCount -= 1
-                duplicates += 1
-
-            except Exception as exception:
-                print(exception)
-                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
-                downloadedCount -= 1
+        try:
+            downloadPost(submissions[i])
        
-        elif submissions[i]['postType'] == 'self':
-            print("SELF")
-            try:
-                Self(directory,submissions[i])
+        except FileAlreadyExistsError:
+            print("It already exists")
+            duplicates += 1
+            downloadedCount -= 1

-            except FileAlreadyExistsError:
-                print("It already exists")
-                downloadedCount -= 1
-                duplicates += 1
+        except ImgurLoginError:
+            print(
+                "Imgur login failed. \nQuitting the program "\
+                "as unexpected errors might occur."
+            )
+            sys.exit()

-            except Exception as exception:
-                print(exception)
-                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
-                downloadedCount -= 1
+        except ImgurLimitError as exception:
+            FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
+            downloadedCount -= 1

-        else:
+        except NotADownloadableLinkError as exception:
+            print(exception)
+            FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
+            downloadedCount -= 1
+
+        except NoSuitablePost:
            print("No match found, skipping...")
            downloadedCount -= 1
+        
+        except Exception as exception:
+            # raise exception
+            print(exception)
+            FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
+            downloadedCount -= 1

    if duplicates:
        print("\n There was {} duplicates".format(duplicates))
@@ -618,7 +599,7 @@ def main():
        logDir = Path(GLOBAL.arguments.log)
        download(postFromLog(logDir))
        sys.exit()
-
+    
    try:
        POSTS = getPosts(prepareAttributes())
    except InsufficientPermission:
@@ -663,12 +644,17 @@ if __name__ == "__main__":
        print = printToFile
        GLOBAL.RUN_TIME = time.time()
        main()
+
    except KeyboardInterrupt:
        if GLOBAL.directory is None:
            GLOBAL.directory = Path(".\\")
        print("\nQUITTING...")
+        
    except Exception as exception:
-        logging.error("Runtime error!", exc_info=full_exc_info(sys.exc_info()))
+        if GLOBAL.directory is None:
+            GLOBAL.directory = Path(".\\")
+        logging.error(sys.exc_info()[0].__name__,
+                      exc_info=full_exc_info(sys.exc_info()))
        print(log_stream.getvalue())

-    input("Press enter to quit\n")
+    input("\nPress enter to quit\n")
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -2,7 +2,9 @@ import io
 import os
 import sys
 import urllib.request
+from html.parser import HTMLParser
 from pathlib import Path
+from urllib.error import HTTPError

 import imgurpython
 from multiprocessing import Queue
@@ -69,6 +71,129 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
    else:
        raise FileAlreadyExistsError

+class Erome:
+    def __init__(self,directory,post):
+        try:
+            IMAGES = self.getLinks(post['postURL'])
+        except urllib.error.HTTPError:
+            raise NotADownloadableLinkError("Not a downloadable link")
+
+        imagesLenght = len(IMAGES)
+        howManyDownloaded = imagesLenght
+        duplicates = 0
+
+        if imagesLenght == 1:
+            
+            extension = getExtension(IMAGES[0])
+
+            title = nameCorrector(post['postTitle'])
+            print(title+"_" +post['postId']+extension)
+
+            fileDir = title + "_" + post['postId'] + extension
+            fileDir = directory / fileDir
+
+            tempDir = title + "_" + post['postId'] + '.tmp'
+            tempDir = directory / tempDir
+
+            imageURL = "https:" + IMAGES[0]
+
+            try:
+                getFile(fileDir,tempDir,imageURL)
+            except FileNameTooLong:
+                fileDir = directory / (post['postId'] + extension)
+                tempDir = directory / (post['postId'] + '.tmp')
+                getFile(fileDir,tempDir,imageURL)
+
+        else:
+            title = nameCorrector(post['postTitle'])
+            print(title+"_"+post['postId'],end="\n\n")
+
+            folderDir = directory / (title+"_"+post['postId'])
+
+            try:
+                if not os.path.exists(folderDir):
+                    os.makedirs(folderDir)
+            except FileNotFoundError:
+                folderDir = directory / post['postId']
+                os.makedirs(folderDir)
+
+            for i in range(imagesLenght):
+                
+                extension = getExtension(IMAGES[i])
+
+                fileName = str(i+1)
+                imageURL = "https:" + IMAGES[i]
+
+                fileDir = folderDir / (fileName + extension)
+                tempDir = folderDir / (fileName + ".tmp")
+
+                print("  ({}/{})".format(i+1,imagesLenght))
+                print("  {}".format(fileName+extension))
+
+                try:
+                    getFile(fileDir,tempDir,imageURL,indent=2)
+                    print()
+                except FileAlreadyExistsError:
+                    print("  The file already exists" + " "*10,end="\n\n")
+                    duplicates += 1
+                    howManyDownloaded -= 1
+
+                except Exception as exception:
+                    raise exception
+                    print("\n  Could not get the file")
+                    print("  " + str(exception) + "\n")
+                    exceptionType = exception
+                    howManyDownloaded -= 1
+
+            if duplicates == imagesLenght:
+                raise FileAlreadyExistsError
+            elif howManyDownloaded + duplicates < imagesLenght:
+                raise AlbumNotDownloadedCompletely(
+                    "Album Not Downloaded Completely"
+                )
+
+    def getLinks(self,url,lineNumber=129):
+ 
+        content = []
+        lineNumber = None
+
+        class EromeParser(HTMLParser):
+            tag = None
+            def handle_starttag(self, tag, attrs):
+                self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
+
+        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
+
+        """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
+        for i in range(len(pageSource)):
+            obj = EromeParser()
+            obj.feed(pageSource[i])
+            tag = obj.tag
+            
+            if tag is not None:
+                if "div" in tag:
+                    if "id" in tag["div"]:
+                        if tag["div"]["id"] == "album":
+                            lineNumber = i
+                            break
+
+        for line in pageSource[lineNumber:]:
+            obj = EromeParser()
+            obj.feed(line)
+            tag = obj.tag
+            if tag is not None:
+                if "img" in tag:
+                    if "class" in tag["img"]:
+                        if tag["img"]["class"]=="img-front":
+                            content.append(tag["img"]["src"])
+                elif "source" in tag:
+                    content.append(tag["source"]["src"])
+                    
+        return [
+            link for link in content \
+            if link.endswith("_480p.mp4") or not link.endswith(".mp4")
+        ]
+
 class Imgur:
    def __init__(self,directory,post):
        self.imgurClient = self.initImgur()
@@ -171,7 +296,7 @@ class Imgur:

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
-            elif howManyDownloaded < imagesLenght:
+            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )
--- a/src/errors.py
+++ b/src/errors.py
@@ -80,4 +80,10 @@ class InvalidSortingType(Exception):
    pass

 class FileNotFoundError(Exception):
+    pass
+
+class NoSuitablePost(Exception):
+    pass
+
+class ImgurLimitError(Exception):
    pass
--- a/src/searcher.py
+++ b/src/searcher.py
@@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
    gfycatCount = 0
    global imgurCount
    imgurCount = 0
+    global eromeCount
+    eromeCount = 0
    global directCount
    directCount = 0
    global selfCount
@@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
    if not len(subList) == 0:    
        print(
            "\nTotal of {} submissions found!\n"\
-            "{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
-            .format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
+            "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
+            .format(
+                len(subList),
+                gfycatCount,
+                imgurCount,
+                eromeCount,
+                directCount,
+                selfCount
+            )
        )
        return subList
    else:
@@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
 def checkIfMatching(submission):
    global gfycatCount
    global imgurCount
+    global eromeCount
    global directCount
    global selfCount

@@ -383,19 +393,20 @@ def checkIfMatching(submission):
    except AttributeError:
        return None

-    if ('gfycat' in submission.domain) or \
-        ('imgur' in submission.domain):
+    if 'gfycat' in submission.domain:
+        details['postType'] = 'gfycat'
+        gfycatCount += 1
+        return details

-        if 'gfycat' in submission.domain:
-            details['postType'] = 'gfycat'
-            gfycatCount += 1
-            return details
+    elif 'imgur' in submission.domain:
+        details['postType'] = 'imgur'
+        imgurCount += 1
+        return details

-        elif 'imgur' in submission.domain:
-            details['postType'] = 'imgur'
-            
-            imgurCount += 1
-            return details
+    elif 'erome' in submission.domain:
+        details['postType'] = 'erome'
+        eromeCount += 1
+        return details

    elif isDirectLink(submission.url) is not False:
        details['postType'] = 'direct'
--- a/src/tools.py
+++ b/src/tools.py
@@ -75,8 +75,10 @@ def createLogFile(TITLE):
    put given arguments inside \"HEADER\" key
    """

-    folderDirectory = GLOBAL.directory / str(time.strftime("%d-%m-%Y_%H-%M-%S",
-                                             time.localtime(GLOBAL.RUN_TIME)))
+    folderDirectory = GLOBAL.directory / "LOG_FILES" / \
+                      str(time.strftime(
+                          "%d-%m-%Y_%H-%M-%S",time.localtime(GLOBAL.RUN_TIME)
+                      ))
    logFilename = TITLE.upper()+'.json'

    if not path.exists(folderDirectory):
@@ -95,7 +97,7 @@ def printToFile(*args, **kwargs):
    
    TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S",
                             time.localtime(GLOBAL.RUN_TIME)))
-    folderDirectory = GLOBAL.directory / TIME
+    folderDirectory = GLOBAL.directory / "LOG_FILES" / TIME
    print(*args,**kwargs)

    if not path.exists(folderDirectory):
Author	SHA1	Message	Date
Ali Parlakci	a6997898ce	Update version	2018-07-23 23:37:27 +03:00
Ali Parlakci	61632c7143	Improve error handling	2018-07-23 23:33:11 +03:00
Ali Parlakçı	9bff3399a8	Typo fix	2018-07-23 23:19:29 +03:00
Ali Parlakçı	b00d185f67	Update changelog	2018-07-23 23:18:10 +03:00
Ali Parlakçı	7314e17125	Added erome support	2018-07-23 23:16:56 +03:00
Ali Parlakci	2d334d56bf	remove exclude mode	2018-07-23 22:57:54 +03:00
Ali Parlakçı	974517928f	Update README.md	2018-07-23 22:07:28 +03:00
Ali Parlakçı	bcae177b1e	Split download function	2018-07-23 22:06:33 +03:00
Ali Parlakci	229def6578	Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit	2018-07-23 18:49:39 +03:00
Ali Parlakci	59b0376d6e	Added directions for frontpage	2018-07-23 18:49:28 +03:00
Ali Parlakçı	cf1dc7d08c	Rename changelog section	2018-07-22 18:16:11 +03:00
Ali Parlakci	27532408c1	Update version	2018-07-22 18:06:46 +03:00
Ali Parlakci	32647beee9	Update changelog	2018-07-22 18:06:24 +03:00
Ali Parlakci	a67da461d2	Fixed the bug that makes multireddit mode unusable	2018-07-22 18:05:20 +03:00
Ali Parlakci	8c6f593496	Update changelog	2018-07-22 17:39:30 +03:00
Ali Parlakci	b60ce8a71e	Put log files in a folder	2018-07-22 17:38:35 +03:00
Ali Parlakci	49920cc457	Bug fix	2018-07-22 17:25:30 +03:00
Ali Parlakci	c70e7c2ebb	Update version	2018-07-22 14:39:09 +03:00
Ali Parlakci	3931dfff54	Update links	2018-07-21 22:03:16 +03:00
Ali Parlakci	4a8c2377f9	Updated --help page	2018-07-21 21:55:01 +03:00
Ali Parlakci	8a18a42a9a	Updated changelog	2018-07-21 21:54:23 +03:00
Ali Parlakçı	6c2d748fbc	Exclude post types (#38 ) * Added argument for excluded links * Added exclude in PromptUser() * Added functionality for exclude and bug fix	2018-07-21 21:52:28 +03:00
Ali Parlakci	8c966df105	Improved traceback	2018-07-21 21:50:54 +03:00