mirror of
				https://github.com/tiyn/stud.ip-crawler.git
				synced 2025-10-30 18:51:17 +01:00 
			
		
		
		
	log: added logging for jobs to finish
This commit is contained in:
		| @@ -10,8 +10,7 @@ class Crawler: | ||||
|         self.studip = studip | ||||
|  | ||||
|     def download_folder(self, folder): | ||||
|         """ | ||||
|         Download all documents in a folder. | ||||
|         """Download all documents in a folder. | ||||
|  | ||||
|         Parameters: | ||||
|         folder(string): id of the folder to download | ||||
| @@ -22,8 +21,7 @@ class Crawler: | ||||
|             self.studip.download(doc) | ||||
|  | ||||
|     def download_folder_rec(self, folder, base_dir): | ||||
|         """ | ||||
|         Download all documents in a folder and its subfolders. | ||||
|         """Download all documents in a folder and its subfolders. | ||||
|         This keeps the folder structure. | ||||
|  | ||||
|         Parameters: | ||||
| @@ -42,10 +40,10 @@ class Crawler: | ||||
|             self.create_dir(subdir_path) | ||||
|             os.chdir(subdir_path) | ||||
|             self.download_folder_rec(subdir, subdir_path) | ||||
|         log.info('Finished crawling folder ' + folder) | ||||
|  | ||||
|     def download_course(self, course, base_dir): | ||||
|         """ | ||||
|         Download all documents in course. | ||||
|         """Download all documents in course. | ||||
|         This keeps the folder structure. | ||||
|  | ||||
|         Parameters: | ||||
| @@ -57,29 +55,29 @@ class Crawler: | ||||
|         os.chdir(base_dir) | ||||
|         root = self.studip.get_top_folder(course) | ||||
|         self.download_folder_rec(root, base_dir) | ||||
|         log.info('Finished crawling course ' + course) | ||||
|  | ||||
|     def download_curr_courses(self, base_dir): | ||||
|         """ | ||||
|         Download all documents of all current courses. | ||||
|         """Download all documents of all current courses. | ||||
|         This keeps the folder structure. | ||||
|  | ||||
|         Parameters: | ||||
|         base_dir(string): directory where to put the download | ||||
|         """ | ||||
|         log.info('Start crawling all current courses') | ||||
|         self.create_dir(base_dir) | ||||
|         curr_courses = self.studip.get_curr_courses( | ||||
|             self.studip.get_uid(), self.studip.get_curr_semester()) | ||||
|         log.info('crawling all current courses' + curr_courses) | ||||
|         os.chdir(base_dir) | ||||
|         for course in curr_courses: | ||||
|             log.debug('course is ' + curr_courses[course]) | ||||
|             course_name = curr_courses[course].replace('/', '-') | ||||
|             path = os.path.join(base_dir, course_name) | ||||
|             self.download_course(course, path) | ||||
|         log.info('Finished crawling all current courses') | ||||
|  | ||||
|     def create_dir(self, dir): | ||||
|         """ | ||||
|         Creates a dir if it doesnt exist already. | ||||
|         """Creates a dir if it doesnt exist already. | ||||
|  | ||||
|         Parameters: | ||||
|         dir(string): directory path to create | ||||
|   | ||||
							
								
								
									
										12
									
								
								src/mysql.py
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								src/mysql.py
									
									
									
									
									
								
							| @@ -17,8 +17,7 @@ class Database: | ||||
|         self.setup_db() | ||||
|  | ||||
|     def connect(self): | ||||
|         """ | ||||
|         Connect to an existing database instance based on the object attributes. | ||||
|         """Connect to an existing database instance based on the object attributes. | ||||
|         """ | ||||
|         return pymysql.connect( | ||||
|             host=self.HOST, | ||||
| @@ -30,8 +29,7 @@ class Database: | ||||
|         ) | ||||
|  | ||||
|     def setup_db(self): | ||||
|         """ | ||||
|         Creates a database with tables. | ||||
|         """Creates a database with tables. | ||||
|         """ | ||||
|         db = self.connect() | ||||
|         crs = db.cursor() | ||||
| @@ -46,8 +44,7 @@ class Database: | ||||
|         log.debug(db) | ||||
|  | ||||
|     def set_last_file_dl(self, file_id, time): | ||||
|         """ | ||||
|         Insert a downloaded file to the database. | ||||
|         """Insert a downloaded file to the database. | ||||
|  | ||||
|         Parameters: | ||||
|         file_id (string): id of the file downloaded | ||||
| @@ -64,8 +61,7 @@ class Database: | ||||
|         db.commit() | ||||
|  | ||||
|     def get_last_file_dl(self, file_id): | ||||
|         """ | ||||
|         Check when a file was downloaded. | ||||
|         """Check when a file was downloaded. | ||||
|  | ||||
|         Parameters: | ||||
|         file_id(string): id of the file to check | ||||
|   | ||||
| @@ -15,8 +15,7 @@ class Studip: | ||||
|         self.db = db | ||||
|  | ||||
|     def auth_req(self, url): | ||||
|         """ | ||||
|         Creates a request for a user. | ||||
|         """Creates a request for a user. | ||||
|  | ||||
|         Parameter: | ||||
|         url(string): URL to send the request to | ||||
| @@ -28,8 +27,7 @@ class Studip: | ||||
|         return req.get(url, auth=self.USER) | ||||
|  | ||||
|     def get_uid(self): | ||||
|         """ | ||||
|         Get the user id of the user specified in the object. | ||||
|         """Get the user id of the user specified in the object. | ||||
|  | ||||
|         Returns: | ||||
|         string: user id | ||||
| @@ -39,8 +37,7 @@ class Studip: | ||||
|         return user_id | ||||
|  | ||||
|     def get_curr_semester(self): | ||||
|         """ | ||||
|         Get the current semester of the studip instance specified in the object. | ||||
|         """Get the current semester of the studip instance specified in the object. | ||||
|  | ||||
|         Returns: | ||||
|         string: id for current semester | ||||
| @@ -57,8 +54,7 @@ class Studip: | ||||
|         return 0 | ||||
|  | ||||
|     def get_ordered_semesters(self): | ||||
|         """ | ||||
|         Get the a list of semesters of studip instance specified in the object. | ||||
|         """Get the a list of semesters of studip instance specified in the object. | ||||
|  | ||||
|         Returns: | ||||
|         list(string): all semesters of the user | ||||
| @@ -71,8 +67,7 @@ class Studip: | ||||
|         return order_sems | ||||
|  | ||||
|     def get_curr_courses(self, user_id, semester): | ||||
|         """ | ||||
|         Get the a list of semesters of studip instance specified in the object. | ||||
|         """Get the a list of semesters of studip instance specified in the object. | ||||
|  | ||||
|         Returns: | ||||
|         string: id of the current semester | ||||
| @@ -101,8 +96,7 @@ class Studip: | ||||
|         return course_list | ||||
|  | ||||
|     def get_top_folder(self, course): | ||||
|         """ | ||||
|         Retrieves the top folder id of a given course. | ||||
|         """Retrieves the top folder id of a given course. | ||||
|  | ||||
|         Parameters: | ||||
|         course (string): course to get the top folder of | ||||
| @@ -116,8 +110,7 @@ class Studip: | ||||
|         return(tf_id) | ||||
|  | ||||
|     def get_docs(self, folder): | ||||
|         """ | ||||
|         Get all the documents of a given folder. | ||||
|         """Get all the documents of a given folder. | ||||
|  | ||||
|         Parameters: | ||||
|         folder(string): id of the folder to get documents of | ||||
| @@ -134,8 +127,7 @@ class Studip: | ||||
|         return(res_docs) | ||||
|  | ||||
|     def download(self, doc): | ||||
|         """ | ||||
|         Download a document. | ||||
|         """Download a document. | ||||
|  | ||||
|         Parameters: | ||||
|         doc (string): id of the document to download | ||||
| @@ -156,8 +148,7 @@ class Studip: | ||||
|             self.db.set_last_file_dl(str(doc), str(int(time.time()))) | ||||
|  | ||||
|     def get_subdirs(self, folder): | ||||
|         """ | ||||
|         Get all the subdirectories of a given folder. | ||||
|         """Get all the subdirectories of a given folder. | ||||
|  | ||||
|         Parameters: | ||||
|         folder(string): id of the folder to get subdirectories of | ||||
|   | ||||
		Reference in New Issue
	
	Block a user