From 7314e17125aa78fd4e6b28e26fda7ec7db7e0147 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ali=20Parlak=C3=A7=C4=B1?= <parlakciali@gmail.com>
Date: Mon, 23 Jul 2018 23:16:56 +0300
Subject: [PATCH] Added erome support

---
 script.py         |   9 ++--
 src/downloader.py | 131 +++++++++++++++++++++++++++++++++++++++++++++-
 src/searcher.py   |  37 ++++++++-----
 3 files changed, 158 insertions(+), 19 deletions(-)

diff --git a/script.py b/script.py
index 76b1dcc..14d5b54 100644
--- a/script.py
+++ b/script.py
@@ -13,7 +13,7 @@ import time
 from io import StringIO
 from pathlib import Path, PurePath
 
-from src.downloader import Direct, Gfycat, Imgur, Self
+from src.downloader import Direct, Gfycat, Imgur, Self, Erome
 from src.errors import *
 from src.parser import LinkDesigner
 from src.searcher import getPosts
@@ -322,7 +322,6 @@ class PromptUser:
                 GLOBAL.arguments.log = input("\nlog file directory:")
                 if Path(GLOBAL.arguments.log ).is_file():
                     break 
-
         while True:
             try:
                 GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
@@ -447,7 +446,9 @@ def downloadPost(SUBMISSION):
 
     global lastRequestTime
 
-    downloaders = {"imgur":Imgur,"gfycat":Gfycat,"direct":Direct,"self":Self}
+    downloaders = {
+        "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
+    }
 
     if SUBMISSION['postType'] in downloaders:
 
@@ -572,8 +573,6 @@ def download(submissions):
     else:
         print(" Total of {} links downloaded!".format(downloadedCount))
 
-    return None
-
 def main():
     GLOBAL.arguments = parseArguments()
 
diff --git a/src/downloader.py b/src/downloader.py
index e7a93c8..88775e1 100644
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -2,6 +2,7 @@ import io
 import os
 import sys
 import urllib.request
+from html.parser import HTMLParser
 from pathlib import Path
 
 import imgurpython
@@ -69,6 +70,134 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
     else:
         raise FileAlreadyExistsError
 
+class Erome:
+    def __init__(self,directory,post):
+        # try:
+        #     IMAGES = self.getLinks(post['postURL'])
+        # except IndexError:
+        #     # raise NotADownloadableLinkError("Could not read the page source")
+        #     pass
+        # except Exception as exception:
+        #     pass
+        #     # raise NotADownloadableLinkError("Could not read the page source")
+        IMAGES = self.getLinks(post['postURL'])
+
+        imagesLenght = len(IMAGES)
+        howManyDownloaded = imagesLenght
+        duplicates = 0
+
+        if imagesLenght == 1:
+            
+            extension = getExtension(IMAGES[0])
+
+            title = nameCorrector(post['postTitle'])
+            print(title+"_" +post['postId']+extension)
+
+            fileDir = title + "_" + post['postId'] + extension
+            fileDir = directory / fileDir
+
+            tempDir = title + "_" + post['postId'] + '.tmp'
+            tempDir = directory / tempDir
+
+            imageURL = "https:" + IMAGES[0]
+
+            try:
+                getFile(fileDir,tempDir,imageURL)
+            except FileNameTooLong:
+                fileDir = directory / (post['postId'] + extension)
+                tempDir = directory / (post['postId'] + '.tmp')
+                getFile(fileDir,tempDir,imageURL)
+
+        else:
+            title = nameCorrector(post['postTitle'])
+            print(title+"_"+post['postId'],end="\n\n")
+
+            folderDir = directory / (title+"_"+post['postId'])
+
+            try:
+                if not os.path.exists(folderDir):
+                    os.makedirs(folderDir)
+            except FileNotFoundError:
+                folderDir = directory / post['postId']
+                os.makedirs(folderDir)
+
+            for i in range(imagesLenght):
+                
+                extension = getExtension(IMAGES[i])
+
+                fileName = str(i+1)
+                imageURL = "https:" + IMAGES[i]
+
+                fileDir = folderDir / (fileName + extension)
+                tempDir = folderDir / (fileName + ".tmp")
+
+                print("  ({}/{})".format(i+1,imagesLenght))
+                print("  {}".format(fileName+extension))
+
+                try:
+                    getFile(fileDir,tempDir,imageURL,indent=2)
+                    print()
+                except FileAlreadyExistsError:
+                    print("  The file already exists" + " "*10,end="\n\n")
+                    duplicates += 1
+                    howManyDownloaded -= 1
+
+                except Exception as exception:
+                    raise exception
+                    print("\n  Could not get the file")
+                    print("  " + str(exception) + "\n")
+                    exceptionType = exception
+                    howManyDownloaded -= 1
+
+            if duplicates == imagesLenght:
+                raise FileAlreadyExistsError
+            elif howManyDownloaded + duplicates < imagesLenght:
+                raise AlbumNotDownloadedCompletely(
+                    "Album Not Downloaded Completely"
+                )
+
+    def getLinks(self,url,lineNumber=129):
+ 
+        content = []
+        lineNumber = None
+
+        class EromeParser(HTMLParser):
+            tag = None
+            def handle_starttag(self, tag, attrs):
+                self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
+
+        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
+
+        """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
+        for i in range(len(pageSource)):
+            obj = EromeParser()
+            obj.feed(pageSource[i])
+            tag = obj.tag
+            
+            if tag is not None:
+                if "div" in tag:
+                    if "id" in tag["div"]:
+                        if tag["div"]["id"] == "album":
+                            lineNumber = i
+                            break
+
+        for line in pageSource[lineNumber:]:
+            obj = EromeParser()
+            obj.feed(line)
+            tag = obj.tag
+            if tag is not None:
+                if "img" in tag:
+                    if "class" in tag["img"]:
+                        if tag["img"]["class"]=="img-front":
+                            content.append(tag["img"]["src"])
+                elif "source" in tag:
+                    content.append(tag["source"]["src"])
+                    
+        return [
+            link for link in content \
+            if link.endswith("_480p.mp4") or not link.endswith(".mp4")
+        ]
+
 class Imgur:
     def __init__(self,directory,post):
         self.imgurClient = self.initImgur()
@@ -171,7 +300,7 @@ class Imgur:
 
             if duplicates == imagesLenght:
                 raise FileAlreadyExistsError
-            elif howManyDownloaded < imagesLenght:
+            elif howManyDownloaded + duplicates < imagesLenght:
                 raise AlbumNotDownloadedCompletely(
                     "Album Not Downloaded Completely"
                 )
diff --git a/src/searcher.py b/src/searcher.py
index 1a8ab4e..ea0d0ec 100644
--- a/src/searcher.py
+++ b/src/searcher.py
@@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
     gfycatCount = 0
     global imgurCount
     imgurCount = 0
+    global eromeCount
+    eromeCount = 0
     global directCount
     directCount = 0
     global selfCount
@@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
     if not len(subList) == 0:    
         print(
             "\nTotal of {} submissions found!\n"\
-            "{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
-            .format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
+            "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
+            .format(
+                len(subList),
+                gfycatCount,
+                imgurCount,
+                eromeCount,
+                directCount,
+                selfCount
+            )
         )
         return subList
     else:
@@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
 def checkIfMatching(submission):
     global gfycatCount
     global imgurCount
+    global eromeCount
     global directCount
     global selfCount
 
@@ -383,19 +393,20 @@ def checkIfMatching(submission):
     except AttributeError:
         return None
 
-    if ('gfycat' in submission.domain) or \
-        ('imgur' in submission.domain):
+    if 'gfycat' in submission.domain:
+        details['postType'] = 'gfycat'
+        gfycatCount += 1
+        return details
 
-        if 'gfycat' in submission.domain:
-            details['postType'] = 'gfycat'
-            gfycatCount += 1
-            return details
+    elif 'imgur' in submission.domain:
+        details['postType'] = 'imgur'
+        imgurCount += 1
+        return details
 
-        elif 'imgur' in submission.domain:
-            details['postType'] = 'imgur'
-            
-            imgurCount += 1
-            return details
+    elif 'erome' in submission.domain:
+        details['postType'] = 'erome'
+        eromeCount += 1
+        return details
 
     elif isDirectLink(submission.url) is not False:
         details['postType'] = 'direct'