mirror of
https://github.com/KevinMidboe/ITLscraper.git
synced 2025-10-29 17:50:14 +00:00
try to prep for dist
This commit is contained in:
1
app.py
1
app.py
@@ -1,4 +1,5 @@
|
|||||||
import appJar
|
import appJar
|
||||||
|
import itl_scrape
|
||||||
from appJar import gui
|
from appJar import gui
|
||||||
from itl_scrape import itslearning_scraper
|
from itl_scrape import itslearning_scraper
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,9 @@ import re
|
|||||||
import requests as rq
|
import requests as rq
|
||||||
import mechanicalsoup as ms
|
import mechanicalsoup as ms
|
||||||
import html2text
|
import html2text
|
||||||
|
import multiprocessing
|
||||||
|
import bs4
|
||||||
|
import getpass
|
||||||
from multiprocessing import Process
|
from multiprocessing import Process
|
||||||
from bs4 import BeautifulSoup as bs
|
from bs4 import BeautifulSoup as bs
|
||||||
from getpass import getpass
|
from getpass import getpass
|
||||||
@@ -250,7 +252,7 @@ class itslearning_scraper():
|
|||||||
if not title:
|
if not title:
|
||||||
title = "Failed to name"+str(self.failure)
|
title = "Failed to name"+str(self.failure)
|
||||||
self.failure +=1
|
self.failure +=1
|
||||||
make_folder(itl_path, title)
|
make_folder(itl_path, str(title))
|
||||||
r = rq.get(base_url+link.get("href"), cookies=self.cookies)
|
r = rq.get(base_url+link.get("href"), cookies=self.cookies)
|
||||||
table = self.find_folder_table(r.text)
|
table = self.find_folder_table(r.text)
|
||||||
#print(table)
|
#print(table)
|
||||||
@@ -262,7 +264,7 @@ class itslearning_scraper():
|
|||||||
print("read_essay:",link.get("href"))
|
print("read_essay:",link.get("href"))
|
||||||
itl_path = os.path.join(os.path.abspath(os.path.curdir))
|
itl_path = os.path.join(os.path.abspath(os.path.curdir))
|
||||||
title = link.contents[0]
|
title = link.contents[0]
|
||||||
make_folder(itl_path, title)
|
make_folder(itl_path, str(title))
|
||||||
r = rq.get(base_url+link.get("href"), cookies=self.cookies)
|
r = rq.get(base_url+link.get("href"), cookies=self.cookies)
|
||||||
self.find_essay_files(r.text)
|
self.find_essay_files(r.text)
|
||||||
os.chdir('..')
|
os.chdir('..')
|
||||||
@@ -321,9 +323,8 @@ class itslearning_scraper():
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
scraper = itslearning_scraper()
|
scraper = itslearning_scraper()
|
||||||
scraper.enter()
|
scraper.enter()
|
||||||
scraper.find_all_courses()
|
url = input("Enter course url or press enter to download all active courses:")
|
||||||
#url = input("Enter course url or press enter to download all active courses:")
|
if url:
|
||||||
#if url:
|
scraper.download_one(url)
|
||||||
# scraper.download_one(url)
|
else:
|
||||||
#else:
|
scraper.download_all()
|
||||||
# scraper.download_all()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user