mirror of
https://github.com/tiyn/stud.ip-crawler.git
synced 2025-04-03 16:37:48 +02:00
bugfixes: events without data-folder error
If an event didn't have a folder for files it was erroring. This is fixed now by adding try-except blocks.
This commit is contained in:
parent
e8a0fe1000
commit
9a0dbc2feb
@ -32,7 +32,8 @@ the last time running it.
|
|||||||
|
|
||||||
### Errors and workarounds
|
### Errors and workarounds
|
||||||
|
|
||||||
- [ ] if filename is too long it currently gets skipped
|
- [ ] If filename is too long it currently gets skipped
|
||||||
|
- [ ] Filenames get sometimes displayed wrong (probably utf8 problems)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ class Crawler:
|
|||||||
self.create_dir(subdir_path)
|
self.create_dir(subdir_path)
|
||||||
os.chdir(subdir_path)
|
os.chdir(subdir_path)
|
||||||
self.download_folder_rec(subdir, subdir_path)
|
self.download_folder_rec(subdir, subdir_path)
|
||||||
log.info('Finished crawling folder ' + folder)
|
log.info('finished crawling folder ' + folder)
|
||||||
|
|
||||||
def download_course(self, course, base_dir):
|
def download_course(self, course, base_dir):
|
||||||
"""Download all documents in course.
|
"""Download all documents in course.
|
||||||
@ -55,7 +55,7 @@ class Crawler:
|
|||||||
os.chdir(base_dir)
|
os.chdir(base_dir)
|
||||||
root = self.studip.get_top_folder(course)
|
root = self.studip.get_top_folder(course)
|
||||||
self.download_folder_rec(root, base_dir)
|
self.download_folder_rec(root, base_dir)
|
||||||
log.info('Finished crawling course ' + course)
|
log.info('finished crawling course ' + course)
|
||||||
|
|
||||||
def download_curr_courses(self, base_dir):
|
def download_curr_courses(self, base_dir):
|
||||||
"""Download all documents of all current courses.
|
"""Download all documents of all current courses.
|
||||||
@ -74,7 +74,7 @@ class Crawler:
|
|||||||
course_name = curr_courses[course].replace('/', '-')
|
course_name = curr_courses[course].replace('/', '-')
|
||||||
path = os.path.join(base_dir, course_name)
|
path = os.path.join(base_dir, course_name)
|
||||||
self.download_course(course, path)
|
self.download_course(course, path)
|
||||||
log.info('Finished crawling all current courses')
|
log.info('finished crawling all current courses')
|
||||||
|
|
||||||
def create_dir(self, dir):
|
def create_dir(self, dir):
|
||||||
"""Creates a dir if it doesnt exist already.
|
"""Creates a dir if it doesnt exist already.
|
||||||
@ -83,5 +83,5 @@ class Crawler:
|
|||||||
dir(string): directory path to create
|
dir(string): directory path to create
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(dir):
|
if not os.path.exists(dir):
|
||||||
log.info('creating folder' + dir)
|
log.info('creating folder ' + dir)
|
||||||
os.mkdir(dir)
|
os.mkdir(dir)
|
||||||
|
@ -121,11 +121,17 @@ class Studip:
|
|||||||
list(string): ids of the documents
|
list(string): ids of the documents
|
||||||
"""
|
"""
|
||||||
rsp = self.auth_req('/api.php/folder/' + folder)
|
rsp = self.auth_req('/api.php/folder/' + folder)
|
||||||
docs = rsp.json()['file_refs']
|
|
||||||
res_docs = []
|
res_docs = []
|
||||||
|
try:
|
||||||
|
docs = rsp.json()['file_refs']
|
||||||
|
except ValueError:
|
||||||
|
return res_docs
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
doc_id = doc['id']
|
try:
|
||||||
res_docs.append(doc_id)
|
doc_id = doc['id']
|
||||||
|
res_docs.append(doc_id)
|
||||||
|
except KeyError:
|
||||||
|
return res_docs
|
||||||
return(res_docs)
|
return(res_docs)
|
||||||
|
|
||||||
def download(self, doc):
|
def download(self, doc):
|
||||||
@ -161,11 +167,17 @@ class Studip:
|
|||||||
Returns:
|
Returns:
|
||||||
list(string): ids of the subdirectories
|
list(string): ids of the subdirectories
|
||||||
"""
|
"""
|
||||||
rsp = self.auth_req('/api.php/folder/' + folder)
|
|
||||||
subdirs = rsp.json()['subfolders']
|
|
||||||
res_subdirs = {}
|
res_subdirs = {}
|
||||||
|
rsp = self.auth_req('/api.php/folder/' + folder)
|
||||||
|
try:
|
||||||
|
subdirs = rsp.json()['subfolders']
|
||||||
|
except ValueError:
|
||||||
|
return res_docs
|
||||||
for subdir in subdirs:
|
for subdir in subdirs:
|
||||||
sub_id = subdir['id']
|
try:
|
||||||
sub_name = subdir['name']
|
sub_id = subdir['id']
|
||||||
res_subdirs[sub_id] = sub_name
|
sub_name = subdir['name']
|
||||||
|
res_subdirs[sub_id] = sub_name
|
||||||
|
except KeyError:
|
||||||
|
return res_subdirs
|
||||||
return res_subdirs
|
return res_subdirs
|
||||||
|
Loading…
x
Reference in New Issue
Block a user