diff --git a/src/crawler.py b/src/crawler.py index 541ef15..14feffb 100644 --- a/src/crawler.py +++ b/src/crawler.py @@ -10,8 +10,7 @@ class Crawler: self.studip = studip def download_folder(self, folder): - """ - Download all documents in a folder. + """Download all documents in a folder. Parameters: folder(string): id of the folder to download @@ -22,8 +21,7 @@ class Crawler: self.studip.download(doc) def download_folder_rec(self, folder, base_dir): - """ - Download all documents in a folder and its subfolders. + """Download all documents in a folder and its subfolders. This keeps the folder structure. Parameters: @@ -42,10 +40,10 @@ class Crawler: self.create_dir(subdir_path) os.chdir(subdir_path) self.download_folder_rec(subdir, subdir_path) + log.info('Finished crawling folder ' + folder) def download_course(self, course, base_dir): - """ - Download all documents in course. + """Download all documents in course. This keeps the folder structure. Parameters: @@ -57,29 +55,29 @@ class Crawler: os.chdir(base_dir) root = self.studip.get_top_folder(course) self.download_folder_rec(root, base_dir) + log.info('Finished crawling course ' + course) def download_curr_courses(self, base_dir): - """ - Download all documents of all current courses. + """Download all documents of all current courses. This keeps the folder structure. Parameters: base_dir(string): directory where to put the download """ - log.info('Start crawling all current courses') self.create_dir(base_dir) curr_courses = self.studip.get_curr_courses( self.studip.get_uid(), self.studip.get_curr_semester()) + log.info('crawling all current courses' + curr_courses) os.chdir(base_dir) for course in curr_courses: log.debug('course is ' + curr_courses[course]) course_name = curr_courses[course].replace('/', '-') path = os.path.join(base_dir, course_name) self.download_course(course, path) + log.info('Finished crawling all current courses') def create_dir(self, dir): - """ - Creates a dir if it doesnt exist already. + """Creates a dir if it doesnt exist already. Parameters: dir(string): directory path to create diff --git a/src/mysql.py b/src/mysql.py index 87d760a..103f228 100755 --- a/src/mysql.py +++ b/src/mysql.py @@ -17,8 +17,7 @@ class Database: self.setup_db() def connect(self): - """ - Connect to an existing database instance based on the object attributes. + """Connect to an existing database instance based on the object attributes. """ return pymysql.connect( host=self.HOST, @@ -30,8 +29,7 @@ class Database: ) def setup_db(self): - """ - Creates a database with tables. + """Creates a database with tables. """ db = self.connect() crs = db.cursor() @@ -46,8 +44,7 @@ class Database: log.debug(db) def set_last_file_dl(self, file_id, time): - """ - Insert a downloaded file to the database. + """Insert a downloaded file to the database. Parameters: file_id (string): id of the file downloaded @@ -64,8 +61,7 @@ class Database: db.commit() def get_last_file_dl(self, file_id): - """ - Check when a file was downloaded. + """Check when a file was downloaded. Parameters: file_id(string): id of the file to check diff --git a/src/studip.py b/src/studip.py index f382c62..c4b514a 100755 --- a/src/studip.py +++ b/src/studip.py @@ -15,8 +15,7 @@ class Studip: self.db = db def auth_req(self, url): - """ - Creates a request for a user. + """Creates a request for a user. Parameter: url(string): URL to send the request to @@ -28,8 +27,7 @@ class Studip: return req.get(url, auth=self.USER) def get_uid(self): - """ - Get the user id of the user specified in the object. + """Get the user id of the user specified in the object. Returns: string: user id @@ -39,8 +37,7 @@ class Studip: return user_id def get_curr_semester(self): - """ - Get the current semester of the studip instance specified in the object. + """Get the current semester of the studip instance specified in the object. Returns: string: id for current semester @@ -57,8 +54,7 @@ class Studip: return 0 def get_ordered_semesters(self): - """ - Get the a list of semesters of studip instance specified in the object. + """Get the a list of semesters of studip instance specified in the object. Returns: list(string): all semesters of the user @@ -71,8 +67,7 @@ class Studip: return order_sems def get_curr_courses(self, user_id, semester): - """ - Get the a list of semesters of studip instance specified in the object. + """Get the a list of semesters of studip instance specified in the object. Returns: string: id of the current semester @@ -101,8 +96,7 @@ class Studip: return course_list def get_top_folder(self, course): - """ - Retrieves the top folder id of a given course. + """Retrieves the top folder id of a given course. Parameters: course (string): course to get the top folder of @@ -116,8 +110,7 @@ class Studip: return(tf_id) def get_docs(self, folder): - """ - Get all the documents of a given folder. + """Get all the documents of a given folder. Parameters: folder(string): id of the folder to get documents of @@ -134,8 +127,7 @@ class Studip: return(res_docs) def download(self, doc): - """ - Download a document. + """Download a document. Parameters: doc (string): id of the document to download @@ -156,8 +148,7 @@ class Studip: self.db.set_last_file_dl(str(doc), str(int(time.time()))) def get_subdirs(self, folder): - """ - Get all the subdirectories of a given folder. + """Get all the subdirectories of a given folder. Parameters: folder(string): id of the folder to get subdirectories of