diff --git a/src/crawler.py b/src/crawler.py index 2d87ce2..541ef15 100644 --- a/src/crawler.py +++ b/src/crawler.py @@ -10,12 +10,26 @@ class Crawler: self.studip = studip def download_folder(self, folder): + """ + Download all documents in a folder. + + Parameters: + folder(string): id of the folder to download + """ docs = self.studip.get_docs(folder) for doc in docs: log.info('found doc ' + doc) self.studip.download(doc) def download_folder_rec(self, folder, base_dir): + """ + Download all documents in a folder and its subfolders. + This keeps the folder structure. + + Parameters: + folder(string): id of the folder to download + base_dir(string): directory where to put the download + """ log.info('crawling folder ' + folder) self.create_dir(base_dir) self.download_folder(folder) @@ -30,6 +44,14 @@ class Crawler: self.download_folder_rec(subdir, subdir_path) def download_course(self, course, base_dir): + """ + Download all documents in course. + This keeps the folder structure. + + Parameters: + course(string): id of the course to download + base_dir(string): directory where to put the download + """ log.info('crawling course ' + course) self.create_dir(base_dir) os.chdir(base_dir) @@ -37,6 +59,13 @@ class Crawler: self.download_folder_rec(root, base_dir) def download_curr_courses(self, base_dir): + """ + Download all documents of all current courses. + This keeps the folder structure. + + Parameters: + base_dir(string): directory where to put the download + """ log.info('Start crawling all current courses') self.create_dir(base_dir) curr_courses = self.studip.get_curr_courses( @@ -49,6 +78,12 @@ class Crawler: self.download_course(course, path) def create_dir(self, dir): + """ + Creates a dir if it doesnt exist already. + + Parameters: + dir(string): directory path to create + """ if not os.path.exists(dir): log.info('creating folder' + dir) os.mkdir(dir) diff --git a/src/mysql.py b/src/mysql.py index b5b64c0..87d760a 100755 --- a/src/mysql.py +++ b/src/mysql.py @@ -17,6 +17,9 @@ class Database: self.setup_db() def connect(self): + """ + Connect to an existing database instance based on the object attributes. + """ return pymysql.connect( host=self.HOST, port=self.PORT, @@ -27,6 +30,9 @@ class Database: ) def setup_db(self): + """ + Creates a database with tables. + """ db = self.connect() crs = db.cursor() sql_query = "CREATE DATABASE IF NOT EXISTS " + self.NAME @@ -40,6 +46,13 @@ class Database: log.debug(db) def set_last_file_dl(self, file_id, time): + """ + Insert a downloaded file to the database. + + Parameters: + file_id (string): id of the file downloaded + time(int): time the file was downloaded + """ db = self.connect() db.select_db(self.NAME) crs = db.cursor() @@ -51,6 +64,15 @@ class Database: db.commit() def get_last_file_dl(self, file_id): + """ + Check when a file was downloaded. + + Parameters: + file_id(string): id of the file to check + + Returns: + int: time when the file was downloaded last. None if it wasnt downloaded. + """ if self.RESET_DL: return None db = self.connect() diff --git a/src/studip.py b/src/studip.py index 99da36a..3fcde54 100755 --- a/src/studip.py +++ b/src/studip.py @@ -15,15 +15,36 @@ class Studip: self.db = db def auth_req(self, url): + """ + Creates a request for a user. + + Parameter: + url(string): URL to send the request to + + Returns: + string: request + """ url = self.DOMAIN + url return req.get(url, auth=self.USER) def get_uid(self): + """ + Get the user id of the user specified in the object. + + Returns: + string: user id + """ rsp = self.auth_req('/api.php/user/') user_id = rsp.json()['user_id'] return user_id def get_curr_semester(self): + """ + Get the current semester of the studip instance specified in the object. + + Returns: + string: id for current semester + """ rsp = self.auth_req('/api.php/semesters/') curr_time = int(str(int(time.time()))) semesters = rsp.json()['collection'] @@ -36,6 +57,12 @@ class Studip: return 0 def get_ordered_semesters(self): + """ + Get the a list of semesters of studip instance specified in the object. + + Returns: + list(string): all semesters of the user + """ rsp = self.auth_req('/api.php/semesters/') semesters = rsp.json()['collection'] order_sems = [] @@ -44,6 +71,12 @@ class Studip: return order_sems def get_curr_courses(self, user_id, semester): + """ + Get the a list of semesters of studip instance specified in the object. + + Returns: + string: id of the current semester + """ rsp = self.auth_req('/api.php/user/' + user_id + '/courses') ord_sems = self.get_ordered_semesters() courses = rsp.json()['collection'] @@ -69,12 +102,30 @@ class Studip: return course_list def get_top_folder(self, course): + """ + Retrieves the top folder id of a given course. + + Parameters: + course (string): course to get the top folder of + + Returns: + string: id of the top folder + """ rsp = self.auth_req('/api.php/course/' + course + '/top_folder') top_folder = rsp.json() tf_id = top_folder['id'] return(tf_id) def get_docs(self, folder): + """ + Get all the documents of a given folder. + + Parameters: + folder(string): id of the folder to get documents of + + Returns: + list(string): ids of the documents + """ rsp = self.auth_req('/api.php/folder/' + folder) docs = rsp.json()['file_refs'] res_docs = [] @@ -84,6 +135,12 @@ class Studip: return(res_docs) def download(self, doc): + """ + Download a document. + + Parameters: + doc (string): id of the document to download + """ rsp1 = self.auth_req('/api.php/file/' + doc) doc_name = rsp1.json()['name'] doc_chdate = rsp1.json()['chdate'] @@ -100,6 +157,15 @@ class Studip: self.db.set_last_file_dl(str(doc), str(int(time.time()))) def get_subdirs(self, folder): + """ + Get all the subdirectories of a given folder. + + Parameters: + folder(string): id of the folder to get subdirectories of + + Returns: + list(string): ids of the subdirectories + """ rsp = self.auth_req('/api.php/folder/' + folder) subdirs = rsp.json()['subfolders'] docs = rsp.json()['file_refs']