diff --git a/app.py b/app.py index d777cf6..6932fb5 100644 --- a/app.py +++ b/app.py @@ -1,21 +1,62 @@ from appJar import gui from itl_scrape import itslearning_scraper + app = gui("It's learning scraper") scraper = itslearning_scraper() +def download(btn): + courses = scraper.find_all_courses() + course_links = {} + for i, label in enumerate(courses): + selected = app.getCheckBox(courses[label]) + if selected: + course_links[label]=courses[label] + print(course_links) + + print("Download selected") + scraper.download_links(course_links) + +def select_none(btn): + courses = scraper.find_all_courses() + for i, label in enumerate(courses): + app.setCheckBox(courses[label], ticked=False, callFunction=False) + print("select none") + +def select_all(btn): + courses = scraper.find_all_courses() + for i, label in enumerate(courses): + app.setCheckBox(courses[label], ticked=True, callFunction=False) + print("select all") + def press(btn): user = app.getEntry("feideuser") password = app.getEntry("password") scraper.login(user,password) + courses = scraper.find_all_courses() -#if course_url: -#else: - print("pressed button", app.getEntry("feideuser"), app.getEntry("password")) app.removeAllWidgets() - app.addLabel("title", "Courses to be selected", 0, 0, 2) - test = scraper.get_courses() - for i, label in enumerate(test): - app.addCheckBox(test[label], i+1 ,0) + + app.addLabel("title", "Courses you want to download", 0, 0, 2) + app.setFont(12) + space = 0 + + app.startLabelFrame("Courses to download") + j = 0 + k = 1 + for i, label in enumerate(courses): + app.addCheckBox(courses[label], k, j, ) + #app.setCheckBox(courses[label],ticked=True, callFunction=False) + k += 1 + if k >= 20: + k=0 + j+=1 + space = i + + app.stopLabelFrame() + print(space) + app.addButton("Download", download, space+2, 0) + app.addButton("Select none", select_none, space+2, 1) + app.addButton("Select all", select_all, space+2, 3) app.startLabelFrame("It's Learning time") app.setSticky("ew") @@ -28,4 +69,5 @@ app.addSecretEntry("password", 1 , 1) app.stopLabelFrame() app.addButton("Login", press, 2, 0, 2) app.setFocus("feideuser") + app.go() diff --git a/itl_scrape.py b/itl_scrape.py index e16934e..edade56 100644 --- a/itl_scrape.py +++ b/itl_scrape.py @@ -10,6 +10,8 @@ from multiprocessing import Process from bs4 import BeautifulSoup as bs from getpass import getpass + + base_url = "https://ntnu.itslearning.com/" folder_url = "https://ntnu.itslearning.com/Folder/processfolder.aspx?FolderID=" def make_folder(curpath, title): @@ -45,7 +47,7 @@ class itslearning_scraper(): resp = rq.get(self.start_url, cookies=self.jsession) self.get_cookies(resp) - self.find_courses() + self.find_all_courses() def get_cookies(self, resp): split_cookie = resp.request.headers["Cookie"].split(";") @@ -69,6 +71,57 @@ class itslearning_scraper(): courses[link.get("href")]=link.contents[0].contents[0] self.courses = courses + def find_all_courses(self): + get_all = { "__EVENTARGUMENT":"", + "__EVENTTARGET":"ctl26$ctl00$ctl25$ctl02", + "__EVENTVALIDATION":"", + "__LASTFOCUS":"", + "__VIEWSTATE":"", + "__VIEWSTATEGENERATOR":"", + "ctl26$ctl00$ctl25$ctl02":"All", + } + + course_url = "https://ntnu.itslearning.com/Course/AllCourses.aspx" + self.browser.open(course_url) + resp = rq.get(course_url, cookies=self.cookies) + + target = "ctl26$ctl00$ctl25$ctl02" + three = bs(resp.text, "html.parser") + + __VIEWSTATE = bs(resp.text, "html.parser").find("input", {"id":"__VIEWSTATE"}).attrs["value"] + __EVENTVALIDATION = bs(resp.text, "html.parser").find("input", {"id":"__EVENTVALIDATION"}).attrs["value"] + __VIEWSTATEGENERATOR = bs(resp.text, "html.parser").find("input", {"id":"__VIEWSTATEGENERATOR"}).attrs["value"] + + get_all["__VIEWSTATE"] = __VIEWSTATE + get_all["__EVENTVALIDATION"] = __EVENTVALIDATION + get_all["__VIEWSTATEGENERATOR"]=__VIEWSTATEGENERATOR + headers = resp.headers + headers["Content-Type"] = "application/x-www-form-urlencoded" + post = rq.post(course_url, cookies=self.cookies, data=get_all) + + __VIEWSTATE = bs(post.text, "html.parser").find("input", {"id":"__VIEWSTATE"}).attrs["value"] + __EVENTVALIDATION = bs(post.text, "html.parser").find("input", {"id":"__EVENTVALIDATION"}).attrs["value"] + __VIEWSTATEGENERATOR = bs(post.text, "html.parser").find("input", {"id":"__VIEWSTATEGENERATOR"}).attrs["value"] + get_all["ctl26$PageSizeSelect"]="200" + get_all["__EVENTTARGET"]="ctl26:7:Pagesize:100" + post = rq.post(course_url, cookies=self.cookies, data=get_all) + + with open("courses.html","wb") as f: + f.write(bytes(post.text.encode("utf-8"))) + f.close() + + three = bs(post.text, "html.parser") + course = three.find("table",{"class":"h-table-show-first-4-columns"}) + active_courses = course.find_all("a",{"class":"ccl-iconlink"}) + courses = {} + for link in active_courses: + courses[link.get("href")]=link.contents[0].contents[0] + self.course = courses + return courses + + def get_itl_cookies(self): + return self.cookies + def find_folder_table(self,html): three = bs(html, "html.parser") @@ -234,16 +287,37 @@ class itslearning_scraper(): table = self.find_folder_table(r.text) self.find_files(table) + def download_links(self, links): + for link in links: + r = rq.get(base_url+link, cookies=self.cookies) + course_path = os.path.join(os.path.abspath(os.path.curdir)) + make_folder(course_path, links[link]) + folder_id = re.search("FolderID=(.+?)'",r.text).group(1) + print("folder id",folder_id) + print("folder_url"+folder_id) + r = rq.get(folder_url+folder_id, cookies=self.cookies) + print(r.url) + table = self.find_folder_table(r.text) + Process(target=self.find_files, args=(table,)).start() + os.chdir('..') + def get_courses(self): return self.courses + def get_all_courses(self): + self.browser.open("https://ntnu.itslearning.com/Course%2fAllCourses.aspx") + p = self.browser.get_current_page() + print(p) + print(self.browser.session.cookies.get_dict()) + #print(args.session_cookie) #key = args.session_cookie if __name__ == '__main__': scraper = itslearning_scraper() scraper.enter() - url = input("Enter course url or press enter to download all active courses:") - if url: - scraper.download_one(url) - else: - scraper.download_all() + scraper.find_all_courses() + #url = input("Enter course url or press enter to download all active courses:") + #if url: + # scraper.download_one(url) + #else: + # scraper.download_all()