Merge branch 'master' of https://github.com/aliparlakci/bulk-downloader-for-reddit

2025-10-29 17:40:15 +00:00 · 2018-08-28 16:12:01 +03:00
parent f522154214 27cd3ee991
commit cebfc713d2
1 changed files with 9 additions and 15 deletions
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -1,13 +1,15 @@
 import io
 import json
 import os
 import sys
 import urllib.request
 from html.parser import HTMLParser
 from multiprocessing import Queue
 from pathlib import Path
 from urllib.error import HTTPError
 import imgurpython
-from multiprocessing import Queue
+from bs4 import BeautifulSoup
 from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
                        FileNameTooLong, ImgurLoginError,
@@ -442,24 +444,16 @@ class Gfycat:
        url = "https://gfycat.com/" + url.split('/')[-1]
-        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
+        pageSource = (urllib.request.urlopen(url).read().decode())
-        theLine = pageSource[lineNumber]
+        soup = BeautifulSoup(pageSource, "html.parser")
-        lenght = len(query)
+        attributes = {"data-react-helmet":"true","type":"application/ld+json"}
-        link = []
+        content = soup.find("script",attrs=attributes)
-        for i in range(len(theLine)):
+        if content is None:
            if theLine[i:i+lenght] == query:
                cursor = (i+lenght)+1
                while not theLine[cursor] == '"':
                    link.append(theLine[cursor])
                    cursor += 1
                break
        if "".join(link) == "":
            raise NotADownloadableLinkError("Could not read the page source")
-        return "".join(link)
+        return json.loads(content.text)["video"]["contentUrl"]
 class Direct:
    def __init__(self,directory,POST):