mirror of
				https://github.com/KevinMidboe/bulk-downloader-for-reddit.git
				synced 2025-10-29 17:40:15 +00:00 
			
		
		
		
	Added erome support
This commit is contained in:
		| @@ -13,7 +13,7 @@ import time | ||||
| from io import StringIO | ||||
| from pathlib import Path, PurePath | ||||
|  | ||||
| from src.downloader import Direct, Gfycat, Imgur, Self | ||||
| from src.downloader import Direct, Gfycat, Imgur, Self, Erome | ||||
| from src.errors import * | ||||
| from src.parser import LinkDesigner | ||||
| from src.searcher import getPosts | ||||
| @@ -322,7 +322,6 @@ class PromptUser: | ||||
|                 GLOBAL.arguments.log = input("\nlog file directory:") | ||||
|                 if Path(GLOBAL.arguments.log ).is_file(): | ||||
|                     break  | ||||
|  | ||||
|         while True: | ||||
|             try: | ||||
|                 GLOBAL.arguments.limit = int(input("\nlimit (0 for none): ")) | ||||
| @@ -447,7 +446,9 @@ def downloadPost(SUBMISSION): | ||||
|  | ||||
|     global lastRequestTime | ||||
|  | ||||
|     downloaders = {"imgur":Imgur,"gfycat":Gfycat,"direct":Direct,"self":Self} | ||||
|     downloaders = { | ||||
|         "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self | ||||
|     } | ||||
|  | ||||
|     if SUBMISSION['postType'] in downloaders: | ||||
|  | ||||
| @@ -572,8 +573,6 @@ def download(submissions): | ||||
|     else: | ||||
|         print(" Total of {} links downloaded!".format(downloadedCount)) | ||||
|  | ||||
|     return None | ||||
|  | ||||
| def main(): | ||||
|     GLOBAL.arguments = parseArguments() | ||||
|  | ||||
|   | ||||
| @@ -2,6 +2,7 @@ import io | ||||
| import os | ||||
| import sys | ||||
| import urllib.request | ||||
| from html.parser import HTMLParser | ||||
| from pathlib import Path | ||||
|  | ||||
| import imgurpython | ||||
| @@ -69,6 +70,134 @@ def getFile(fileDir,tempDir,imageURL,indent=0): | ||||
|     else: | ||||
|         raise FileAlreadyExistsError | ||||
|  | ||||
| class Erome: | ||||
|     def __init__(self,directory,post): | ||||
|         # try: | ||||
|         #     IMAGES = self.getLinks(post['postURL']) | ||||
|         # except IndexError: | ||||
|         #     # raise NotADownloadableLinkError("Could not read the page source") | ||||
|         #     pass | ||||
|         # except Exception as exception: | ||||
|         #     pass | ||||
|         #     # raise NotADownloadableLinkError("Could not read the page source") | ||||
|         IMAGES = self.getLinks(post['postURL']) | ||||
|  | ||||
|         imagesLenght = len(IMAGES) | ||||
|         howManyDownloaded = imagesLenght | ||||
|         duplicates = 0 | ||||
|  | ||||
|         if imagesLenght == 1: | ||||
|              | ||||
|             extension = getExtension(IMAGES[0]) | ||||
|  | ||||
|             title = nameCorrector(post['postTitle']) | ||||
|             print(title+"_" +post['postId']+extension) | ||||
|  | ||||
|             fileDir = title + "_" + post['postId'] + extension | ||||
|             fileDir = directory / fileDir | ||||
|  | ||||
|             tempDir = title + "_" + post['postId'] + '.tmp' | ||||
|             tempDir = directory / tempDir | ||||
|  | ||||
|             imageURL = "https:" + IMAGES[0] | ||||
|  | ||||
|             try: | ||||
|                 getFile(fileDir,tempDir,imageURL) | ||||
|             except FileNameTooLong: | ||||
|                 fileDir = directory / (post['postId'] + extension) | ||||
|                 tempDir = directory / (post['postId'] + '.tmp') | ||||
|                 getFile(fileDir,tempDir,imageURL) | ||||
|  | ||||
|         else: | ||||
|             title = nameCorrector(post['postTitle']) | ||||
|             print(title+"_"+post['postId'],end="\n\n") | ||||
|  | ||||
|             folderDir = directory / (title+"_"+post['postId']) | ||||
|  | ||||
|             try: | ||||
|                 if not os.path.exists(folderDir): | ||||
|                     os.makedirs(folderDir) | ||||
|             except FileNotFoundError: | ||||
|                 folderDir = directory / post['postId'] | ||||
|                 os.makedirs(folderDir) | ||||
|  | ||||
|             for i in range(imagesLenght): | ||||
|                  | ||||
|                 extension = getExtension(IMAGES[i]) | ||||
|  | ||||
|                 fileName = str(i+1) | ||||
|                 imageURL = "https:" + IMAGES[i] | ||||
|  | ||||
|                 fileDir = folderDir / (fileName + extension) | ||||
|                 tempDir = folderDir / (fileName + ".tmp") | ||||
|  | ||||
|                 print("  ({}/{})".format(i+1,imagesLenght)) | ||||
|                 print("  {}".format(fileName+extension)) | ||||
|  | ||||
|                 try: | ||||
|                     getFile(fileDir,tempDir,imageURL,indent=2) | ||||
|                     print() | ||||
|                 except FileAlreadyExistsError: | ||||
|                     print("  The file already exists" + " "*10,end="\n\n") | ||||
|                     duplicates += 1 | ||||
|                     howManyDownloaded -= 1 | ||||
|  | ||||
|                 except Exception as exception: | ||||
|                     raise exception | ||||
|                     print("\n  Could not get the file") | ||||
|                     print("  " + str(exception) + "\n") | ||||
|                     exceptionType = exception | ||||
|                     howManyDownloaded -= 1 | ||||
|  | ||||
|             if duplicates == imagesLenght: | ||||
|                 raise FileAlreadyExistsError | ||||
|             elif howManyDownloaded + duplicates < imagesLenght: | ||||
|                 raise AlbumNotDownloadedCompletely( | ||||
|                     "Album Not Downloaded Completely" | ||||
|                 ) | ||||
|  | ||||
|     def getLinks(self,url,lineNumber=129): | ||||
|   | ||||
|         content = [] | ||||
|         lineNumber = None | ||||
|  | ||||
|         class EromeParser(HTMLParser): | ||||
|             tag = None | ||||
|             def handle_starttag(self, tag, attrs): | ||||
|                 self.tag = {tag:{attr[0]: attr[1] for attr in attrs}} | ||||
|  | ||||
|         pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) | ||||
|  | ||||
|         """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS""" | ||||
|         for i in range(len(pageSource)): | ||||
|             obj = EromeParser() | ||||
|             obj.feed(pageSource[i]) | ||||
|             tag = obj.tag | ||||
|              | ||||
|             if tag is not None: | ||||
|                 if "div" in tag: | ||||
|                     if "id" in tag["div"]: | ||||
|                         if tag["div"]["id"] == "album": | ||||
|                             lineNumber = i | ||||
|                             break | ||||
|  | ||||
|         for line in pageSource[lineNumber:]: | ||||
|             obj = EromeParser() | ||||
|             obj.feed(line) | ||||
|             tag = obj.tag | ||||
|             if tag is not None: | ||||
|                 if "img" in tag: | ||||
|                     if "class" in tag["img"]: | ||||
|                         if tag["img"]["class"]=="img-front": | ||||
|                             content.append(tag["img"]["src"]) | ||||
|                 elif "source" in tag: | ||||
|                     content.append(tag["source"]["src"]) | ||||
|                      | ||||
|         return [ | ||||
|             link for link in content \ | ||||
|             if link.endswith("_480p.mp4") or not link.endswith(".mp4") | ||||
|         ] | ||||
|  | ||||
| class Imgur: | ||||
|     def __init__(self,directory,post): | ||||
|         self.imgurClient = self.initImgur() | ||||
| @@ -171,7 +300,7 @@ class Imgur: | ||||
|  | ||||
|             if duplicates == imagesLenght: | ||||
|                 raise FileAlreadyExistsError | ||||
|             elif howManyDownloaded < imagesLenght: | ||||
|             elif howManyDownloaded + duplicates < imagesLenght: | ||||
|                 raise AlbumNotDownloadedCompletely( | ||||
|                     "Album Not Downloaded Completely" | ||||
|                 ) | ||||
|   | ||||
| @@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False): | ||||
|     gfycatCount = 0 | ||||
|     global imgurCount | ||||
|     imgurCount = 0 | ||||
|     global eromeCount | ||||
|     eromeCount = 0 | ||||
|     global directCount | ||||
|     directCount = 0 | ||||
|     global selfCount | ||||
| @@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False): | ||||
|     if not len(subList) == 0:     | ||||
|         print( | ||||
|             "\nTotal of {} submissions found!\n"\ | ||||
|             "{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n" | ||||
|             .format(len(subList),gfycatCount,imgurCount,directCount,selfCount) | ||||
|             "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n" | ||||
|             .format( | ||||
|                 len(subList), | ||||
|                 gfycatCount, | ||||
|                 imgurCount, | ||||
|                 eromeCount, | ||||
|                 directCount, | ||||
|                 selfCount | ||||
|             ) | ||||
|         ) | ||||
|         return subList | ||||
|     else: | ||||
| @@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False): | ||||
| def checkIfMatching(submission): | ||||
|     global gfycatCount | ||||
|     global imgurCount | ||||
|     global eromeCount | ||||
|     global directCount | ||||
|     global selfCount | ||||
|  | ||||
| @@ -383,19 +393,20 @@ def checkIfMatching(submission): | ||||
|     except AttributeError: | ||||
|         return None | ||||
|  | ||||
|     if ('gfycat' in submission.domain) or \ | ||||
|         ('imgur' in submission.domain): | ||||
|     if 'gfycat' in submission.domain: | ||||
|         details['postType'] = 'gfycat' | ||||
|         gfycatCount += 1 | ||||
|         return details | ||||
|  | ||||
|         if 'gfycat' in submission.domain: | ||||
|             details['postType'] = 'gfycat' | ||||
|             gfycatCount += 1 | ||||
|             return details | ||||
|     elif 'imgur' in submission.domain: | ||||
|         details['postType'] = 'imgur' | ||||
|         imgurCount += 1 | ||||
|         return details | ||||
|  | ||||
|         elif 'imgur' in submission.domain: | ||||
|             details['postType'] = 'imgur' | ||||
|              | ||||
|             imgurCount += 1 | ||||
|             return details | ||||
|     elif 'erome' in submission.domain: | ||||
|         details['postType'] = 'erome' | ||||
|         eromeCount += 1 | ||||
|         return details | ||||
|  | ||||
|     elif isDirectLink(submission.url) is not False: | ||||
|         details['postType'] = 'direct' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user