mirror of
				https://github.com/KevinMidboe/bulk-downloader-for-reddit.git
				synced 2025-10-29 17:40:15 +00:00 
			
		
		
		
	Added Self class
This commit is contained in:
		
							
								
								
									
										19
									
								
								script.py
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								script.py
									
									
									
									
									
								
							@@ -11,7 +11,7 @@ import sys
 | 
				
			|||||||
import time
 | 
					import time
 | 
				
			||||||
from pathlib import Path, PurePath
 | 
					from pathlib import Path, PurePath
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from src.downloader import Direct, Gfycat, Imgur
 | 
					from src.downloader import Direct, Gfycat, Imgur, Self
 | 
				
			||||||
from src.parser import LinkDesigner
 | 
					from src.parser import LinkDesigner
 | 
				
			||||||
from src.searcher import getPosts
 | 
					from src.searcher import getPosts
 | 
				
			||||||
from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
 | 
					from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
 | 
				
			||||||
@@ -451,7 +451,22 @@ def download(submissions):
 | 
				
			|||||||
                print(exception)
 | 
					                print(exception)
 | 
				
			||||||
                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
 | 
					                FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
 | 
				
			||||||
                downloadedCount -= 1
 | 
					                downloadedCount -= 1
 | 
				
			||||||
                
 | 
					        
 | 
				
			||||||
 | 
					        elif submissions[i]['postType'] == 'self':
 | 
				
			||||||
 | 
					            print("SELF")
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                Self(directory,submissions[i])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            except FileAlreadyExistsError:
 | 
				
			||||||
 | 
					                print("It already exists")
 | 
				
			||||||
 | 
					                downloadedCount -= 1
 | 
				
			||||||
 | 
					                duplicates += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # except Exception as exception:
 | 
				
			||||||
 | 
					            #     print(exception)
 | 
				
			||||||
 | 
					            #     FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
 | 
				
			||||||
 | 
					            #     downloadedCount -= 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            print("No match found, skipping...")
 | 
					            print("No match found, skipping...")
 | 
				
			||||||
            downloadedCount -= 1
 | 
					            downloadedCount -= 1
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,3 +1,4 @@
 | 
				
			|||||||
 | 
					import io
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import urllib.request
 | 
					import urllib.request
 | 
				
			||||||
@@ -16,7 +17,7 @@ except ModuleNotFoundError:
 | 
				
			|||||||
    install("imgurpython")
 | 
					    install("imgurpython")
 | 
				
			||||||
    from imgurpython import *
 | 
					    from imgurpython import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VanillaPrint = print
 | 
				
			||||||
print = printToFile
 | 
					print = printToFile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def dlProgress(count, blockSize, totalSize):
 | 
					def dlProgress(count, blockSize, totalSize):
 | 
				
			||||||
@@ -233,8 +234,13 @@ class Gfycat:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        fileDir = directory / (title+"_"+POST['postId']+POST['postExt'])
 | 
					        fileDir = directory / (title+"_"+POST['postId']+POST['postExt'])
 | 
				
			||||||
        tempDir = directory / (title+"_"+POST['postId']+".tmp")
 | 
					        tempDir = directory / (title+"_"+POST['postId']+".tmp")
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            getFile(fileDir,tempDir,POST['mediaURL'])
 | 
				
			||||||
 | 
					        except FileNameTooLong:
 | 
				
			||||||
 | 
					            fileDir = directory / (POST['postId']+POST['postExt'])
 | 
				
			||||||
 | 
					            tempDir = directory / (POST['postId']+".tmp")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        getFile(fileDir,tempDir,POST['mediaURL'])
 | 
					            getFile(fileDir,tempDir,POST['mediaURL'])
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
    def getLink(self, url, query='<source id="mp4Source" src=', lineNumber=105):
 | 
					    def getLink(self, url, query='<source id="mp4Source" src=', lineNumber=105):
 | 
				
			||||||
        """Extract direct link to the video from page's source
 | 
					        """Extract direct link to the video from page's source
 | 
				
			||||||
@@ -282,4 +288,46 @@ class Direct:
 | 
				
			|||||||
        tempDir = title+"_"+POST['postId']+".tmp"
 | 
					        tempDir = title+"_"+POST['postId']+".tmp"
 | 
				
			||||||
        tempDir = directory / tempDir
 | 
					        tempDir = directory / tempDir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        getFile(fileDir,tempDir,POST['postURL'])
 | 
					        try:
 | 
				
			||||||
 | 
					            getFile(fileDir,tempDir,POST['postURL'])
 | 
				
			||||||
 | 
					        except FileNameTooLong:
 | 
				
			||||||
 | 
					            fileDir = directory / (POST['postId']+POST['postExt'])
 | 
				
			||||||
 | 
					            tempDir = directory / (POST['postId']+".tmp")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            getFile(fileDir,tempDir,POST['postURL'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Self:
 | 
				
			||||||
 | 
					    def __init__(self,directory,post):
 | 
				
			||||||
 | 
					        if not os.path.exists(directory): os.makedirs(directory)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        title = nameCorrector(post['postTitle'])
 | 
				
			||||||
 | 
					        print(title+"_"+post['postId']+".md")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        fileDir = title+"_"+post['postId']+".md"
 | 
				
			||||||
 | 
					        fileDir = directory / fileDir
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        if Path.is_file(fileDir):
 | 
				
			||||||
 | 
					            raise FileAlreadyExistsError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.writeToFile(fileDir,post)
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def writeToFile(directory,post):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        content = ("## ["
 | 
				
			||||||
 | 
					                   + post["postTitle"]
 | 
				
			||||||
 | 
					                   + "]("
 | 
				
			||||||
 | 
					                   + post["postURL"]
 | 
				
			||||||
 | 
					                   + ")\n"
 | 
				
			||||||
 | 
					                   + post["postContent"]
 | 
				
			||||||
 | 
					                   + "\n\n---\n\n"
 | 
				
			||||||
 | 
					                   + "submitted by [u/"
 | 
				
			||||||
 | 
					                   + post["postSubmitter"]
 | 
				
			||||||
 | 
					                   + "](https://www.reddit.com/user/"
 | 
				
			||||||
 | 
					                   + post["postSubmitter"]
 | 
				
			||||||
 | 
					                   + ")")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with io.open(directory,"w",encoding="utf-8") as FILE:
 | 
				
			||||||
 | 
					            VanillaPrint(content,file=FILE)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        print("Downloaded")
 | 
				
			||||||
@@ -308,6 +308,10 @@ def redditSearcher(posts,SINGLE_POST=False):
 | 
				
			|||||||
    imgurCount = 0
 | 
					    imgurCount = 0
 | 
				
			||||||
    global directCount
 | 
					    global directCount
 | 
				
			||||||
    directCount = 0
 | 
					    directCount = 0
 | 
				
			||||||
 | 
					    global selfCount
 | 
				
			||||||
 | 
					    selfCount = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    allPosts = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    postsFile = createLogFile("POSTS")
 | 
					    postsFile = createLogFile("POSTS")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -356,13 +360,15 @@ def redditSearcher(posts,SINGLE_POST=False):
 | 
				
			|||||||
                printSubmission(submission,subCount,orderCount)
 | 
					                printSubmission(submission,subCount,orderCount)
 | 
				
			||||||
                subList.append(details)
 | 
					                subList.append(details)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            postsFile.add({subCount:[details]})
 | 
					            allPosts = {**allPosts,**details}
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        postsFile.add(allPosts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not len(subList) == 0:    
 | 
					    if not len(subList) == 0:    
 | 
				
			||||||
        print(
 | 
					        print(
 | 
				
			||||||
            "\nTotal of {} submissions found!\n"\
 | 
					            "\nTotal of {} submissions found!\n"\
 | 
				
			||||||
            "{} GFYCATs, {} IMGURs and {} DIRECTs\n"
 | 
					            "{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
 | 
				
			||||||
            .format(len(subList),gfycatCount,imgurCount,directCount)
 | 
					            .format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        return subList
 | 
					        return subList
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
@@ -372,6 +378,7 @@ def checkIfMatching(submission):
 | 
				
			|||||||
    global gfycatCount
 | 
					    global gfycatCount
 | 
				
			||||||
    global imgurCount
 | 
					    global imgurCount
 | 
				
			||||||
    global directCount
 | 
					    global directCount
 | 
				
			||||||
 | 
					    global selfCount
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        details = {'postId':submission.id,
 | 
					        details = {'postId':submission.id,
 | 
				
			||||||
@@ -397,13 +404,15 @@ def checkIfMatching(submission):
 | 
				
			|||||||
            imgurCount += 1
 | 
					            imgurCount += 1
 | 
				
			||||||
            return details
 | 
					            return details
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    elif isDirectLink(submission.url) is True:
 | 
					    elif isDirectLink(submission.url):
 | 
				
			||||||
        details['postType'] = 'direct'
 | 
					        details['postType'] = 'direct'
 | 
				
			||||||
        directCount += 1
 | 
					        directCount += 1
 | 
				
			||||||
        return details
 | 
					        return details
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    elif submission.is_self:
 | 
					    elif submission.is_self:
 | 
				
			||||||
        details['postType'] = 'self'
 | 
					        details['postType'] = 'self'
 | 
				
			||||||
 | 
					        details['postContent'] = submission.selftext
 | 
				
			||||||
 | 
					        selfCount += 1
 | 
				
			||||||
        return details
 | 
					        return details
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def printSubmission(SUB,validNumber,totalNumber):
 | 
					def printSubmission(SUB,validNumber,totalNumber):
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user