mirror of
				https://github.com/tiyn/stud.ip-crawler.git
				synced 2025-10-30 18:51:17 +01:00 
			
		
		
		
	bugfixes: events without data-folder error
If an event didn't have a folder for files it was erroring. This is fixed now by adding try-except blocks.
This commit is contained in:
		| @@ -32,7 +32,8 @@ the last time running it. | |||||||
|  |  | ||||||
| ### Errors and workarounds | ### Errors and workarounds | ||||||
|  |  | ||||||
| - [ ] if filename is too long it currently gets skipped | - [ ] If filename is too long it currently gets skipped | ||||||
|  | - [ ] Filenames get sometimes displayed wrong (probably utf8 problems) | ||||||
|  |  | ||||||
| ## Installation | ## Installation | ||||||
|  |  | ||||||
|   | |||||||
| @@ -40,7 +40,7 @@ class Crawler: | |||||||
|             self.create_dir(subdir_path) |             self.create_dir(subdir_path) | ||||||
|             os.chdir(subdir_path) |             os.chdir(subdir_path) | ||||||
|             self.download_folder_rec(subdir, subdir_path) |             self.download_folder_rec(subdir, subdir_path) | ||||||
|         log.info('Finished crawling folder ' + folder) |         log.info('finished crawling folder ' + folder) | ||||||
|  |  | ||||||
|     def download_course(self, course, base_dir): |     def download_course(self, course, base_dir): | ||||||
|         """Download all documents in course. |         """Download all documents in course. | ||||||
| @@ -55,7 +55,7 @@ class Crawler: | |||||||
|         os.chdir(base_dir) |         os.chdir(base_dir) | ||||||
|         root = self.studip.get_top_folder(course) |         root = self.studip.get_top_folder(course) | ||||||
|         self.download_folder_rec(root, base_dir) |         self.download_folder_rec(root, base_dir) | ||||||
|         log.info('Finished crawling course ' + course) |         log.info('finished crawling course ' + course) | ||||||
|  |  | ||||||
|     def download_curr_courses(self, base_dir): |     def download_curr_courses(self, base_dir): | ||||||
|         """Download all documents of all current courses. |         """Download all documents of all current courses. | ||||||
| @@ -74,7 +74,7 @@ class Crawler: | |||||||
|             course_name = curr_courses[course].replace('/', '-') |             course_name = curr_courses[course].replace('/', '-') | ||||||
|             path = os.path.join(base_dir, course_name) |             path = os.path.join(base_dir, course_name) | ||||||
|             self.download_course(course, path) |             self.download_course(course, path) | ||||||
|         log.info('Finished crawling all current courses') |         log.info('finished crawling all current courses') | ||||||
|  |  | ||||||
|     def create_dir(self, dir): |     def create_dir(self, dir): | ||||||
|         """Creates a dir if it doesnt exist already. |         """Creates a dir if it doesnt exist already. | ||||||
|   | |||||||
| @@ -121,11 +121,17 @@ class Studip: | |||||||
|         list(string): ids of the documents |         list(string): ids of the documents | ||||||
|         """ |         """ | ||||||
|         rsp = self.auth_req('/api.php/folder/' + folder) |         rsp = self.auth_req('/api.php/folder/' + folder) | ||||||
|         docs = rsp.json()['file_refs'] |  | ||||||
|         res_docs = [] |         res_docs = [] | ||||||
|  |         try: | ||||||
|  |             docs = rsp.json()['file_refs'] | ||||||
|  |         except ValueError: | ||||||
|  |             return res_docs | ||||||
|         for doc in docs: |         for doc in docs: | ||||||
|  |             try: | ||||||
|                 doc_id = doc['id'] |                 doc_id = doc['id'] | ||||||
|                 res_docs.append(doc_id) |                 res_docs.append(doc_id) | ||||||
|  |             except KeyError: | ||||||
|  |                 return res_docs | ||||||
|         return(res_docs) |         return(res_docs) | ||||||
|  |  | ||||||
|     def download(self, doc): |     def download(self, doc): | ||||||
| @@ -161,11 +167,17 @@ class Studip: | |||||||
|         Returns: |         Returns: | ||||||
|         list(string): ids of the subdirectories |         list(string): ids of the subdirectories | ||||||
|         """ |         """ | ||||||
|         rsp = self.auth_req('/api.php/folder/' + folder) |  | ||||||
|         subdirs = rsp.json()['subfolders'] |  | ||||||
|         res_subdirs = {} |         res_subdirs = {} | ||||||
|  |         rsp = self.auth_req('/api.php/folder/' + folder) | ||||||
|  |         try: | ||||||
|  |             subdirs = rsp.json()['subfolders'] | ||||||
|  |         except ValueError: | ||||||
|  |             return res_docs | ||||||
|         for subdir in subdirs: |         for subdir in subdirs: | ||||||
|  |             try: | ||||||
|                 sub_id = subdir['id'] |                 sub_id = subdir['id'] | ||||||
|                 sub_name = subdir['name'] |                 sub_name = subdir['name'] | ||||||
|                 res_subdirs[sub_id] = sub_name |                 res_subdirs[sub_id] = sub_name | ||||||
|  |             except KeyError: | ||||||
|  |                 return res_subdirs | ||||||
|         return res_subdirs |         return res_subdirs | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user