13 Commits

Author SHA1 Message Date
Ali Parlakci
aece2273fb Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-28 16:28:29 +03:00
Ali Parlakci
f807efe4d5 Ignore space at the end of directory 2018-08-28 16:27:29 +03:00
Ali Parlakci
743d887927 Ignore space at the end of directory 2018-08-28 16:24:14 +03:00
Ali Parlakci
da5492858c Add bs4 2018-08-28 16:15:22 +03:00
Ali Parlakci
cebfc713d2 Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit 2018-08-28 16:12:01 +03:00
Ali Parlakci
f522154214 Update version 2018-08-28 16:11:48 +03:00
Ali Parlakçı
27cd3ee991 Changed getting gfycat links' algorithm 2018-08-28 16:10:15 +03:00
Ali Parlakci
29873331e6 Typo fix 2018-08-23 16:41:07 +03:00
Ali Parlakci
8a3dcd68a3 Update version 2018-08-23 12:16:31 +03:00
Ali Parlakci
ac323f2abe Bug fix 2018-08-23 12:09:56 +03:00
Ali Parlakci
32d26fa956 Print out github link at start 2018-08-20 15:13:42 +03:00
Ali Parlakci
137481cf3e Print out program info 2018-08-18 14:51:20 +03:00
Ali Parlakci
9b63c55d3e Print out version info before starting 2018-08-17 21:25:01 +03:00
4 changed files with 36 additions and 24 deletions

View File

@@ -1,4 +1,8 @@
# Changes on *master* # Changes on *master*
## [28/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
- Adjusted algorith used for extracting gfycat links because of gfycat's design change
- Ignore space at the end of the given directory
## [16/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de) ## [16/08/2018](https://github.com/aliparlakci/bulk-downloader-for-reddit/tree/d56efed1c6833a66322d9158523b89d0ce57f5de)
- Fix the bug that prevents downloading imgur videos - Fix the bug that prevents downloading imgur videos

View File

@@ -1,3 +1,4 @@
bs4
requests requests
praw praw
imgurpython imgurpython

View File

@@ -23,7 +23,7 @@ from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
__author__ = "Ali Parlakci" __author__ = "Ali Parlakci"
__license__ = "GPL" __license__ = "GPL"
__version__ = "1.6.2.1" __version__ = "1.6.4"
__maintainer__ = "Ali Parlakci" __maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com" __email__ = "parlakciali@gmail.com"
@@ -569,7 +569,9 @@ def download(submissions):
print(f" {submissions[i]['postType'].upper()}",end="",noPrint=True) print(f" {submissions[i]['postType'].upper()}",end="",noPrint=True)
if isPostExists(submissions[i]): if isPostExists(submissions[i]):
print(f"\n{nameCorrector(submissions[i]['postTitle'])}") print(f"\n" \
f"{submissions[i]['postSubmitter']}_"
f"{nameCorrector(submissions[i]['postTitle'])}")
print("It already exists") print("It already exists")
duplicates += 1 duplicates += 1
downloadedCount -= 1 downloadedCount -= 1
@@ -633,23 +635,33 @@ def download(submissions):
downloadedCount -= 1 downloadedCount -= 1
if duplicates: if duplicates:
print("\n There was {} duplicates".format(duplicates)) print(f"\nThere {'were' if duplicates > 1 else 'was'} " \
f"{duplicates} duplicate{'s' if duplicates > 1 else ''}")
if downloadedCount == 0: if downloadedCount == 0:
print(" Nothing downloaded :(") print("Nothing downloaded :(")
else: else:
print(" Total of {} links downloaded!".format(downloadedCount)) print(f"Total of {downloadedCount} " \
f"link{'s' if downloadedCount > 1 else ''} downloaded!")
def main(): def main():
VanillaPrint(
f" Bulk Downloader for Reddit v{__version__}\n" \
f" Written by Ali PARLAKCI parlakciali@gmail.com\n\n" \
f" https://github.com/aliparlakci/bulk-downloader-for-reddit/"
)
GLOBAL.arguments = parseArguments() GLOBAL.arguments = parseArguments()
if GLOBAL.arguments.directory is not None: if GLOBAL.arguments.directory is not None:
GLOBAL.directory = Path(GLOBAL.arguments.directory) GLOBAL.directory = Path(GLOBAL.arguments.directory.strip())
else: else:
GLOBAL.directory = Path(input("download directory: ")) GLOBAL.directory = Path(input("download directory: ").strip())
print("\n"," ".join(sys.argv),"\n",noPrint=True) print("\n"," ".join(sys.argv),"\n",noPrint=True)
print(f"Bulk Downloader for Reddit v{__version__}\n",noPrint=True
)
try: try:
checkConflicts() checkConflicts()

View File

@@ -1,13 +1,15 @@
import io import io
import json
import os import os
import sys import sys
import urllib.request import urllib.request
from html.parser import HTMLParser from html.parser import HTMLParser
from multiprocessing import Queue
from pathlib import Path from pathlib import Path
from urllib.error import HTTPError from urllib.error import HTTPError
import imgurpython import imgurpython
from multiprocessing import Queue from bs4 import BeautifulSoup
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
FileNameTooLong, ImgurLoginError, FileNameTooLong, ImgurLoginError,
@@ -66,8 +68,9 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
] ]
opener = urllib.request.build_opener() opener = urllib.request.build_opener()
opener.addheaders = headers if not "imgur" in imageURL:
if not "imgur" in imageURL: urllib.request.install_opener(opener) opener.addheaders = headers
urllib.request.install_opener(opener)
if not (os.path.isfile(fileDir)): if not (os.path.isfile(fileDir)):
for i in range(3): for i in range(3):
@@ -441,24 +444,16 @@ class Gfycat:
url = "https://gfycat.com/" + url.split('/')[-1] url = "https://gfycat.com/" + url.split('/')[-1]
pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) pageSource = (urllib.request.urlopen(url).read().decode())
theLine = pageSource[lineNumber] soup = BeautifulSoup(pageSource, "html.parser")
lenght = len(query) attributes = {"data-react-helmet":"true","type":"application/ld+json"}
link = [] content = soup.find("script",attrs=attributes)
for i in range(len(theLine)): if content is None:
if theLine[i:i+lenght] == query:
cursor = (i+lenght)+1
while not theLine[cursor] == '"':
link.append(theLine[cursor])
cursor += 1
break
if "".join(link) == "":
raise NotADownloadableLinkError("Could not read the page source") raise NotADownloadableLinkError("Could not read the page source")
return "".join(link) return json.loads(content.text)["video"]["contentUrl"]
class Direct: class Direct:
def __init__(self,directory,POST): def __init__(self,directory,POST):