From 27cd3ee9919c7bc400d0d949fdff07a1196a1f1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ali=20Parlak=C3=A7=C4=B1?= Date: Tue, 28 Aug 2018 16:10:15 +0300 Subject: [PATCH] Changed getting gfycat links' algorithm --- src/downloader.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/downloader.py b/src/downloader.py index c9e75c6..53d98b9 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -1,13 +1,15 @@ import io +import json import os import sys import urllib.request from html.parser import HTMLParser +from multiprocessing import Queue from pathlib import Path from urllib.error import HTTPError import imgurpython -from multiprocessing import Queue +from bs4 import BeautifulSoup from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, FileNameTooLong, ImgurLoginError, @@ -442,24 +444,16 @@ class Gfycat: url = "https://gfycat.com/" + url.split('/')[-1] - pageSource = (urllib.request.urlopen(url).read().decode().split('\n')) + pageSource = (urllib.request.urlopen(url).read().decode()) - theLine = pageSource[lineNumber] - lenght = len(query) - link = [] + soup = BeautifulSoup(pageSource, "html.parser") + attributes = {"data-react-helmet":"true","type":"application/ld+json"} + content = soup.find("script",attrs=attributes) - for i in range(len(theLine)): - if theLine[i:i+lenght] == query: - cursor = (i+lenght)+1 - while not theLine[cursor] == '"': - link.append(theLine[cursor]) - cursor += 1 - break - - if "".join(link) == "": + if content is None: raise NotADownloadableLinkError("Could not read the page source") - return "".join(link) + return json.loads(content.text)["video"]["contentUrl"] class Direct: def __init__(self,directory,POST):