diff --git a/itl_scrape.py b/itl_scrape.py index 61b7f0b..41c5a82 100644 --- a/itl_scrape.py +++ b/itl_scrape.py @@ -11,13 +11,14 @@ import getpass from multiprocessing import Process from bs4 import BeautifulSoup as bs from getpass import getpass - +from slugify import slugify base_url = "https://ntnu.itslearning.com/" folder_url = "https://ntnu.itslearning.com/Folder/processfolder.aspx?FolderID=" def make_folder(curpath, title): + title = slugify(title) folder_path = os.path.join(curpath,title) #print("making dir:",folder_path) if not os.path.exists(folder_path): @@ -146,6 +147,7 @@ class itslearning_scraper(): filename = title self.failure += 1 print("File created with name:",filename) + filename = slugify(filename) filename = os.path.join(os.path.abspath(os.path.curdir),filename) with open(filename, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): @@ -179,6 +181,7 @@ class itslearning_scraper(): if text: title = three.find("span", {"id":"ctl05_TT"}).contents[0] text = self.html2text.handle(str(text)) + title = slugify(title) fp = os.path.join(os.path.abspath(os.path.curdir), title) #convert to md? with open(fp+".md", "wb") as note: @@ -208,7 +211,8 @@ class itslearning_scraper(): print(section_link) target = section_link.get("href") print(target) - fp = os.path.join(os.path.abspath(os.path.curdir), "".join(target.split('/'))) + + fp = os.path.join(os.path.abspath(os.path.curdir), slugify("".join(target.split('/')))) print("filepath:", fp) with open(fp+".url", "wb") as shortcut: shortcut.write(b'[InternetShortcut]\n') @@ -222,6 +226,7 @@ class itslearning_scraper(): text = three.find("div", {"class":"h-userinput"}) print(text.contents[0]) text = self.html2text.handle(str(text)) + title = slugify(title) fp = os.path.join(os.path.abspath(os.path.curdir), title) #convert to md? try: diff --git a/requirements.txt b/requirements.txt index 4739b7d..bb2b94e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ MechanicalSoup==0.7.0 html2text==2016.9.19 requests==2.13.0 appJar==0.52 +python-slugify==1.2.4