mirror of
https://github.com/KevinMidboe/ITLscraper.git
synced 2025-10-29 01:30:14 +00:00
add gui and download function
This commit is contained in:
56
app.py
56
app.py
@@ -1,21 +1,62 @@
|
|||||||
from appJar import gui
|
from appJar import gui
|
||||||
from itl_scrape import itslearning_scraper
|
from itl_scrape import itslearning_scraper
|
||||||
|
|
||||||
app = gui("It's learning scraper")
|
app = gui("It's learning scraper")
|
||||||
scraper = itslearning_scraper()
|
scraper = itslearning_scraper()
|
||||||
|
|
||||||
|
def download(btn):
|
||||||
|
courses = scraper.find_all_courses()
|
||||||
|
course_links = {}
|
||||||
|
for i, label in enumerate(courses):
|
||||||
|
selected = app.getCheckBox(courses[label])
|
||||||
|
if selected:
|
||||||
|
course_links[label]=courses[label]
|
||||||
|
print(course_links)
|
||||||
|
|
||||||
|
print("Download selected")
|
||||||
|
scraper.download_links(course_links)
|
||||||
|
|
||||||
|
def select_none(btn):
|
||||||
|
courses = scraper.find_all_courses()
|
||||||
|
for i, label in enumerate(courses):
|
||||||
|
app.setCheckBox(courses[label], ticked=False, callFunction=False)
|
||||||
|
print("select none")
|
||||||
|
|
||||||
|
def select_all(btn):
|
||||||
|
courses = scraper.find_all_courses()
|
||||||
|
for i, label in enumerate(courses):
|
||||||
|
app.setCheckBox(courses[label], ticked=True, callFunction=False)
|
||||||
|
print("select all")
|
||||||
|
|
||||||
def press(btn):
|
def press(btn):
|
||||||
user = app.getEntry("feideuser")
|
user = app.getEntry("feideuser")
|
||||||
password = app.getEntry("password")
|
password = app.getEntry("password")
|
||||||
scraper.login(user,password)
|
scraper.login(user,password)
|
||||||
|
courses = scraper.find_all_courses()
|
||||||
|
|
||||||
#if course_url:
|
|
||||||
#else:
|
|
||||||
print("pressed button", app.getEntry("feideuser"), app.getEntry("password"))
|
|
||||||
app.removeAllWidgets()
|
app.removeAllWidgets()
|
||||||
app.addLabel("title", "Courses to be selected", 0, 0, 2)
|
|
||||||
test = scraper.get_courses()
|
app.addLabel("title", "Courses you want to download", 0, 0, 2)
|
||||||
for i, label in enumerate(test):
|
app.setFont(12)
|
||||||
app.addCheckBox(test[label], i+1 ,0)
|
space = 0
|
||||||
|
|
||||||
|
app.startLabelFrame("Courses to download")
|
||||||
|
j = 0
|
||||||
|
k = 1
|
||||||
|
for i, label in enumerate(courses):
|
||||||
|
app.addCheckBox(courses[label], k, j, )
|
||||||
|
#app.setCheckBox(courses[label],ticked=True, callFunction=False)
|
||||||
|
k += 1
|
||||||
|
if k >= 20:
|
||||||
|
k=0
|
||||||
|
j+=1
|
||||||
|
space = i
|
||||||
|
|
||||||
|
app.stopLabelFrame()
|
||||||
|
print(space)
|
||||||
|
app.addButton("Download", download, space+2, 0)
|
||||||
|
app.addButton("Select none", select_none, space+2, 1)
|
||||||
|
app.addButton("Select all", select_all, space+2, 3)
|
||||||
|
|
||||||
app.startLabelFrame("It's Learning time")
|
app.startLabelFrame("It's Learning time")
|
||||||
app.setSticky("ew")
|
app.setSticky("ew")
|
||||||
@@ -28,4 +69,5 @@ app.addSecretEntry("password", 1 , 1)
|
|||||||
app.stopLabelFrame()
|
app.stopLabelFrame()
|
||||||
app.addButton("Login", press, 2, 0, 2)
|
app.addButton("Login", press, 2, 0, 2)
|
||||||
app.setFocus("feideuser")
|
app.setFocus("feideuser")
|
||||||
|
|
||||||
app.go()
|
app.go()
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ from multiprocessing import Process
|
|||||||
from bs4 import BeautifulSoup as bs
|
from bs4 import BeautifulSoup as bs
|
||||||
from getpass import getpass
|
from getpass import getpass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
base_url = "https://ntnu.itslearning.com/"
|
base_url = "https://ntnu.itslearning.com/"
|
||||||
folder_url = "https://ntnu.itslearning.com/Folder/processfolder.aspx?FolderID="
|
folder_url = "https://ntnu.itslearning.com/Folder/processfolder.aspx?FolderID="
|
||||||
def make_folder(curpath, title):
|
def make_folder(curpath, title):
|
||||||
@@ -45,7 +47,7 @@ class itslearning_scraper():
|
|||||||
resp = rq.get(self.start_url, cookies=self.jsession)
|
resp = rq.get(self.start_url, cookies=self.jsession)
|
||||||
|
|
||||||
self.get_cookies(resp)
|
self.get_cookies(resp)
|
||||||
self.find_courses()
|
self.find_all_courses()
|
||||||
|
|
||||||
def get_cookies(self, resp):
|
def get_cookies(self, resp):
|
||||||
split_cookie = resp.request.headers["Cookie"].split(";")
|
split_cookie = resp.request.headers["Cookie"].split(";")
|
||||||
@@ -69,6 +71,57 @@ class itslearning_scraper():
|
|||||||
courses[link.get("href")]=link.contents[0].contents[0]
|
courses[link.get("href")]=link.contents[0].contents[0]
|
||||||
self.courses = courses
|
self.courses = courses
|
||||||
|
|
||||||
|
def find_all_courses(self):
|
||||||
|
get_all = { "__EVENTARGUMENT":"",
|
||||||
|
"__EVENTTARGET":"ctl26$ctl00$ctl25$ctl02",
|
||||||
|
"__EVENTVALIDATION":"",
|
||||||
|
"__LASTFOCUS":"",
|
||||||
|
"__VIEWSTATE":"",
|
||||||
|
"__VIEWSTATEGENERATOR":"",
|
||||||
|
"ctl26$ctl00$ctl25$ctl02":"All",
|
||||||
|
}
|
||||||
|
|
||||||
|
course_url = "https://ntnu.itslearning.com/Course/AllCourses.aspx"
|
||||||
|
self.browser.open(course_url)
|
||||||
|
resp = rq.get(course_url, cookies=self.cookies)
|
||||||
|
|
||||||
|
target = "ctl26$ctl00$ctl25$ctl02"
|
||||||
|
three = bs(resp.text, "html.parser")
|
||||||
|
|
||||||
|
__VIEWSTATE = bs(resp.text, "html.parser").find("input", {"id":"__VIEWSTATE"}).attrs["value"]
|
||||||
|
__EVENTVALIDATION = bs(resp.text, "html.parser").find("input", {"id":"__EVENTVALIDATION"}).attrs["value"]
|
||||||
|
__VIEWSTATEGENERATOR = bs(resp.text, "html.parser").find("input", {"id":"__VIEWSTATEGENERATOR"}).attrs["value"]
|
||||||
|
|
||||||
|
get_all["__VIEWSTATE"] = __VIEWSTATE
|
||||||
|
get_all["__EVENTVALIDATION"] = __EVENTVALIDATION
|
||||||
|
get_all["__VIEWSTATEGENERATOR"]=__VIEWSTATEGENERATOR
|
||||||
|
headers = resp.headers
|
||||||
|
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||||
|
post = rq.post(course_url, cookies=self.cookies, data=get_all)
|
||||||
|
|
||||||
|
__VIEWSTATE = bs(post.text, "html.parser").find("input", {"id":"__VIEWSTATE"}).attrs["value"]
|
||||||
|
__EVENTVALIDATION = bs(post.text, "html.parser").find("input", {"id":"__EVENTVALIDATION"}).attrs["value"]
|
||||||
|
__VIEWSTATEGENERATOR = bs(post.text, "html.parser").find("input", {"id":"__VIEWSTATEGENERATOR"}).attrs["value"]
|
||||||
|
get_all["ctl26$PageSizeSelect"]="200"
|
||||||
|
get_all["__EVENTTARGET"]="ctl26:7:Pagesize:100"
|
||||||
|
post = rq.post(course_url, cookies=self.cookies, data=get_all)
|
||||||
|
|
||||||
|
with open("courses.html","wb") as f:
|
||||||
|
f.write(bytes(post.text.encode("utf-8")))
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
three = bs(post.text, "html.parser")
|
||||||
|
course = three.find("table",{"class":"h-table-show-first-4-columns"})
|
||||||
|
active_courses = course.find_all("a",{"class":"ccl-iconlink"})
|
||||||
|
courses = {}
|
||||||
|
for link in active_courses:
|
||||||
|
courses[link.get("href")]=link.contents[0].contents[0]
|
||||||
|
self.course = courses
|
||||||
|
return courses
|
||||||
|
|
||||||
|
def get_itl_cookies(self):
|
||||||
|
return self.cookies
|
||||||
|
|
||||||
|
|
||||||
def find_folder_table(self,html):
|
def find_folder_table(self,html):
|
||||||
three = bs(html, "html.parser")
|
three = bs(html, "html.parser")
|
||||||
@@ -234,16 +287,37 @@ class itslearning_scraper():
|
|||||||
table = self.find_folder_table(r.text)
|
table = self.find_folder_table(r.text)
|
||||||
self.find_files(table)
|
self.find_files(table)
|
||||||
|
|
||||||
|
def download_links(self, links):
|
||||||
|
for link in links:
|
||||||
|
r = rq.get(base_url+link, cookies=self.cookies)
|
||||||
|
course_path = os.path.join(os.path.abspath(os.path.curdir))
|
||||||
|
make_folder(course_path, links[link])
|
||||||
|
folder_id = re.search("FolderID=(.+?)'",r.text).group(1)
|
||||||
|
print("folder id",folder_id)
|
||||||
|
print("folder_url"+folder_id)
|
||||||
|
r = rq.get(folder_url+folder_id, cookies=self.cookies)
|
||||||
|
print(r.url)
|
||||||
|
table = self.find_folder_table(r.text)
|
||||||
|
Process(target=self.find_files, args=(table,)).start()
|
||||||
|
os.chdir('..')
|
||||||
|
|
||||||
def get_courses(self):
|
def get_courses(self):
|
||||||
return self.courses
|
return self.courses
|
||||||
|
|
||||||
|
def get_all_courses(self):
|
||||||
|
self.browser.open("https://ntnu.itslearning.com/Course%2fAllCourses.aspx")
|
||||||
|
p = self.browser.get_current_page()
|
||||||
|
print(p)
|
||||||
|
print(self.browser.session.cookies.get_dict())
|
||||||
|
|
||||||
#print(args.session_cookie)
|
#print(args.session_cookie)
|
||||||
#key = args.session_cookie
|
#key = args.session_cookie
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
scraper = itslearning_scraper()
|
scraper = itslearning_scraper()
|
||||||
scraper.enter()
|
scraper.enter()
|
||||||
url = input("Enter course url or press enter to download all active courses:")
|
scraper.find_all_courses()
|
||||||
if url:
|
#url = input("Enter course url or press enter to download all active courses:")
|
||||||
scraper.download_one(url)
|
#if url:
|
||||||
else:
|
# scraper.download_one(url)
|
||||||
scraper.download_all()
|
#else:
|
||||||
|
# scraper.download_all()
|
||||||
|
|||||||
Reference in New Issue
Block a user