mirror of
				https://github.com/tiyn/stud.ip-crawler.git
				synced 2025-10-31 11:11:18 +01:00 
			
		
		
		
	database: files id and chdates are stored
- mysql creates database and tables to given mysql if not existent already - mysql reads last change values from db - mysql saves ch_date after downloading - run now takes care for the variables of mysql and studip
This commit is contained in:
		
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1,2 +1 @@ | |||||||
| last_dl.txt | database | ||||||
| data |  | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								README.md
									
									
									
									
									
								
							| @@ -13,28 +13,30 @@ If you run the program again it only downloads files that have changed since the | |||||||
|     - [x] Specify Stud.IP-URL |     - [x] Specify Stud.IP-URL | ||||||
|     - [x] Specify output directory |     - [x] Specify output directory | ||||||
|     - [x] Specify chunk size to download big files |     - [x] Specify chunk size to download big files | ||||||
|  |     - [x] Specify all important database variables | ||||||
| - [x] Only download files after given date | - [x] Only download files after given date | ||||||
|     - [x] Save and read download date |     - [x] Save and read download date | ||||||
|     - [x] Possible reset of download date |     - [x] Possible reset of download date | ||||||
| - [ ] Incremental file download | - [x] Incremental file download | ||||||
|     - [ ] Indexing downloaded files and folders |     - [x] Store id and chdate of downloaded files | ||||||
| - [ ] Logging | - [ ] Logging | ||||||
|     - [x] Console log |     - [x] Console log | ||||||
|     - [ ] Log file |     - [ ] Log file | ||||||
|  |  | ||||||
| ## Installation | ## Installation | ||||||
|  |  | ||||||
|  | - create an instance of | ||||||
| - `git clone https://github.com/tiyn/studip-crawler` | - `git clone https://github.com/tiyn/studip-crawler` | ||||||
| - `cd studip-crawler` | - `cd studip-crawler/src/` | ||||||
| - `pip3install -r requirements` - install dependencies | - `pip3install -r requirements` - install dependencies | ||||||
|  |  | ||||||
| ## Usage | ## Usage | ||||||
|  |  | ||||||
| Just run the file via `python3 crawler.py [options]`. | Just run the file via `python3 run.py [options]`. | ||||||
| Alternatively to `python3 crawler.py` you can give yourself permissions using `chmod +x crawler.py [options]` and | Alternatively to `python3 run.py` you can give yourself permissions using `chmod +x run.py [options]` and | ||||||
| run it with `./crawler.py [options]`. | run it with `./run.py [options]`. | ||||||
| There are several options required to work. | There are several options required to work. | ||||||
| Run `python3 crawler.py -h` for a help menu and see which ones are important for you. | Run `python3 run.py -h` for a help menu and see which ones are important for you. | ||||||
|  |  | ||||||
| ## Tested StudIP instances | ## Tested StudIP instances | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										202
									
								
								crawler.py
									
									
									
									
									
								
							
							
						
						
									
										202
									
								
								crawler.py
									
									
									
									
									
								
							| @@ -1,202 +0,0 @@ | |||||||
| #!/bin/env python3 |  | ||||||
| import time |  | ||||||
| import os |  | ||||||
| import argparse |  | ||||||
|  |  | ||||||
| from tqdm import tqdm |  | ||||||
| import requests as req |  | ||||||
| from requests.auth import HTTPBasicAuth |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def create_dir(dir): |  | ||||||
|     if not os.path.exists(dir): |  | ||||||
|         print('creating folder', dir) |  | ||||||
|         os.mkdir(dir) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def set_last_dl(time): |  | ||||||
|     last_dl_file = open('last_dl.txt', 'w') |  | ||||||
|     last_dl_file.write(str(time).split('.')[0]) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_last_dl(): |  | ||||||
|     try: |  | ||||||
|         last_dl_file = open('last_dl.txt', 'r') |  | ||||||
|         return int(last_dl_file.read()) |  | ||||||
|     except: |  | ||||||
|         return None |  | ||||||
|  |  | ||||||
| parser = argparse.ArgumentParser(description='Download Files from StudIP.') |  | ||||||
| parser.add_argument('-o', '--output', type=str, |  | ||||||
|                     default='./data', help='path to output directory') |  | ||||||
| parser.add_argument('-u', '--user', type=str, help='studip username', required=True) |  | ||||||
| parser.add_argument('-p', '--passw', type=str, help='studip password', required=True) |  | ||||||
| parser.add_argument('-s', '--url', type=str, help='studip url', required=True) |  | ||||||
| parser.add_argument('-c', '--chunk', type=int, default=1024 * |  | ||||||
|                     1024, help='chunksize for downloading data') |  | ||||||
| parser.add_argument('-r', '--reset_dl_date', action='store_true') |  | ||||||
|  |  | ||||||
| args = parser.parse_args() |  | ||||||
|  |  | ||||||
| BASE_DIR = os.path.abspath(args.output) |  | ||||||
| CHUNK_SIZE = args.chunk |  | ||||||
| STUDIP_DOMAIN = args.url |  | ||||||
| USERNAME = args.user |  | ||||||
| PASSWORD = args.passw |  | ||||||
| USER = (USERNAME, PASSWORD) |  | ||||||
| if args.reset_dl_date: |  | ||||||
|     set_last_dl(None) |  | ||||||
| LAST_DOWNLOAD = get_last_dl() |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_uid(): |  | ||||||
|     url = STUDIP_DOMAIN + '/api.php/user/' |  | ||||||
|     rsp = req.get(url, auth=USER) |  | ||||||
|     user_id = rsp.json()['user_id'] |  | ||||||
|     return user_id |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_curr_semester(): |  | ||||||
|     url = STUDIP_DOMAIN + '/api.php/semesters/' |  | ||||||
|     rsp = req.get(url, auth=USER) |  | ||||||
|     curr_time = int(str(time.time()).split('.')[0]) |  | ||||||
|     semesters = rsp.json()['collection'] |  | ||||||
|     for sem_uri in semesters: |  | ||||||
|         semester = semesters[sem_uri] |  | ||||||
|         sem_begin = semester['begin'] |  | ||||||
|         sem_end = semester['end'] |  | ||||||
|         if sem_begin < curr_time < sem_end: |  | ||||||
|             return sem_uri |  | ||||||
|     return 0 |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_ordered_semesters(): |  | ||||||
|     url = STUDIP_DOMAIN + '/api.php/semesters/' |  | ||||||
|     rsp = req.get(url, auth=USER) |  | ||||||
|     semesters = rsp.json()['collection'] |  | ||||||
|     order_sems = [] |  | ||||||
|     for sem_uri in semesters: |  | ||||||
|         order_sems.append(sem_uri) |  | ||||||
|     return order_sems |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_curr_courses(user_id, semester): |  | ||||||
|     url = STUDIP_DOMAIN + '/api.php/user/' + user_id + '/courses' |  | ||||||
|     rsp = req.get(url, auth=USER) |  | ||||||
|     ord_sems = get_ordered_semesters() |  | ||||||
|     courses = rsp.json()['collection'] |  | ||||||
|     i = 0 |  | ||||||
|     course_list = {} |  | ||||||
|     for course_uri in courses: |  | ||||||
|         course = courses[course_uri] |  | ||||||
|         start_sem = course['start_semester'] |  | ||||||
|         if start_sem != None: |  | ||||||
|             start_ind = ord_sems.index(start_sem) |  | ||||||
|         else: |  | ||||||
|             start_ind = 100 |  | ||||||
|         end_sem = course['end_semester'] |  | ||||||
|         if end_sem != None: |  | ||||||
|             end_ind = ord_sems.index(end_sem) |  | ||||||
|         else: |  | ||||||
|             end_ind = 100 |  | ||||||
|         curr_ind = ord_sems.index(semester) |  | ||||||
|         if start_ind <= curr_ind <= end_ind: |  | ||||||
|             course_title = course['title'] |  | ||||||
|             course_id = course['course_id'] |  | ||||||
|             course_list[course_id] = course_title |  | ||||||
|     return course_list |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_top_folder(course): |  | ||||||
|     url = STUDIP_DOMAIN + '/api.php/course/' + course + '/top_folder' |  | ||||||
|     rsp = req.get(url, auth=USER) |  | ||||||
|     top_folder = rsp.json() |  | ||||||
|     tf_id = top_folder['id'] |  | ||||||
|     return(tf_id) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_docs(folder): |  | ||||||
|     url = STUDIP_DOMAIN + '/api.php/folder/' + folder |  | ||||||
|     rsp = req.get(url, auth=USER) |  | ||||||
|     docs = rsp.json()['file_refs'] |  | ||||||
|     res_docs = [] |  | ||||||
|     for doc in docs: |  | ||||||
|         doc_id = doc['id'] |  | ||||||
|         res_docs.append(doc_id) |  | ||||||
|     return(res_docs) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def download(doc, time): |  | ||||||
|     url1 = STUDIP_DOMAIN + '/api.php/file/' + doc |  | ||||||
|     rsp1 = req.get(url1, auth=USER) |  | ||||||
|     doc_name = rsp1.json()['name'] |  | ||||||
|     doc_chdate = rsp1.json()['chdate'] |  | ||||||
|     if time == None or time < doc_chdate: |  | ||||||
|         print('downloading ', doc_name) |  | ||||||
|         url2 = STUDIP_DOMAIN + '/api.php/file/' + doc + '/download' |  | ||||||
|         rsp2 = req.get(url2, auth=USER, stream=True) |  | ||||||
|         total_size = int(rsp2.headers.get('content-length', 0)) |  | ||||||
|         progbar = tqdm(total=total_size, unit='iB', unit_scale=True) |  | ||||||
|         with open(doc_name, 'wb') as doc: |  | ||||||
|             for chunk in rsp2.iter_content(CHUNK_SIZE): |  | ||||||
|                 progbar.update(len(chunk)) |  | ||||||
|                 doc.write(chunk) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_subdirs(folder): |  | ||||||
|     url = STUDIP_DOMAIN + '/api.php/folder/' + folder |  | ||||||
|     rsp = req.get(url, auth=USER) |  | ||||||
|     subdirs = rsp.json()['subfolders'] |  | ||||||
|     docs = rsp.json()['file_refs'] |  | ||||||
|     res_subdirs = {} |  | ||||||
|     for subdir in subdirs: |  | ||||||
|         sub_id = subdir['id'] |  | ||||||
|         sub_name = subdir['name'] |  | ||||||
|         res_subdirs[sub_id] = sub_name |  | ||||||
|     return res_subdirs |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def download_folder(folder, time): |  | ||||||
|     docs = get_docs(folder) |  | ||||||
|     for doc in docs: |  | ||||||
|         print('found doc ', doc) |  | ||||||
|         download(doc, time) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def download_folder_rec(folder, time, base_dir): |  | ||||||
|     print('folder ', folder) |  | ||||||
|     create_dir(base_dir) |  | ||||||
|     download_folder(folder, time) |  | ||||||
|     subdirs = get_subdirs(folder) |  | ||||||
|     os.chdir(base_dir) |  | ||||||
|     for subdir in subdirs: |  | ||||||
|         subdir_name = subdirs[subdir].replace('/', '-') |  | ||||||
|         subdir_path = os.path.join(base_dir, subdir_name) |  | ||||||
|         print(subdir_path) |  | ||||||
|         create_dir(subdir_path) |  | ||||||
|         os.chdir(subdir_path) |  | ||||||
|         download_folder_rec(subdir, time, subdir_path) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def download_course(course, time, base_dir): |  | ||||||
|     print('course ', course) |  | ||||||
|     create_dir(base_dir) |  | ||||||
|     os.chdir(base_dir) |  | ||||||
|     root = get_top_folder(course) |  | ||||||
|     download_folder_rec(root, time, base_dir) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def download_curr_courses(time, base_dir): |  | ||||||
|     print('Start downloading all current courses') |  | ||||||
|     create_dir(base_dir) |  | ||||||
|     curr_courses = get_curr_courses(get_uid(), get_curr_semester()) |  | ||||||
|     os.chdir(base_dir) |  | ||||||
|     for course in curr_courses: |  | ||||||
|         print('course is ', curr_courses[course]) |  | ||||||
|         course_name = curr_courses[course].replace('/', '-') |  | ||||||
|         path = os.path.join(base_dir, course_name) |  | ||||||
|         download_course(course, time, path) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| download_curr_courses(LAST_DOWNLOAD, BASE_DIR) |  | ||||||
| set_last_dl(time.time()) |  | ||||||
							
								
								
									
										2
									
								
								src/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								src/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | |||||||
|  | __pycache__ | ||||||
|  | data | ||||||
							
								
								
									
										64
									
								
								src/mysql.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										64
									
								
								src/mysql.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,64 @@ | |||||||
|  | #!/bin/env python3 | ||||||
|  | import time | ||||||
|  |  | ||||||
|  | import pymysql | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Database: | ||||||
|  |  | ||||||
|  |     def __init__(self): | ||||||
|  |         self.HOST = None | ||||||
|  |         self.PORT = None | ||||||
|  |         self.DB_NAME = None | ||||||
|  |         self.USER = None | ||||||
|  |         self.PASSW = None | ||||||
|  |         self.TABLE_FILE = None | ||||||
|  |         self.TABLE_FILE = 'files' | ||||||
|  |         self.RESET_DL_DATE = False | ||||||
|  |  | ||||||
|  |     def connect(self): | ||||||
|  |         return pymysql.connect( | ||||||
|  |             host=self.HOST, | ||||||
|  |             port=self.PORT, | ||||||
|  |             user=self.USER, | ||||||
|  |             password=self.PASSW, | ||||||
|  |             charset='utf8mb4', | ||||||
|  |             cursorclass=pymysql.cursors.DictCursor | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def setup_db(self): | ||||||
|  |         db = self.connect() | ||||||
|  |         crs = db.cursor() | ||||||
|  |         sql_query = "CREATE DATABASE IF NOT EXISTS " + self.DB_NAME | ||||||
|  |         crs.execute(sql_query) | ||||||
|  |         db.select_db(self.DB_NAME) | ||||||
|  |         query = "CREATE TABLE IF NOT EXISTS " + self.TABLE_FILE + \ | ||||||
|  |             "(id CHAR(32) NOT NULL," + \ | ||||||
|  |             "ch_date INT(11) NOT NULL," + \ | ||||||
|  |             "PRIMARY KEY(id))" | ||||||
|  |         crs.execute(query) | ||||||
|  |         print(db) | ||||||
|  |  | ||||||
|  |     def set_last_file_dl(self, file_id, time): | ||||||
|  |         db = self.connect() | ||||||
|  |         db.select_db(self.DB_NAME) | ||||||
|  |         crs = db.cursor() | ||||||
|  |         print('file: ', file_id, ' time: ', time) | ||||||
|  |         query = "INSERT INTO " + self.TABLE_FILE + "(`id`,`ch_date`)" + \ | ||||||
|  |                 "VALUES ('" + file_id + "','" + time + "')" + \ | ||||||
|  |                 "ON DUPLICATE KEY UPDATE `ch_date` = '" + time + "'" | ||||||
|  |         crs.execute(query) | ||||||
|  |         db.commit() | ||||||
|  |  | ||||||
|  |     def get_last_file_dl(self, file_id): | ||||||
|  |         if self.RESET_DL_DATE: | ||||||
|  |             return None | ||||||
|  |         db = self.connect() | ||||||
|  |         db.select_db(self.DB_NAME) | ||||||
|  |         crs = db.cursor() | ||||||
|  |         query = "SELECT ch_date FROM files WHERE id ='" + file_id + "'" | ||||||
|  |         crs.execute(query) | ||||||
|  |         res = crs.fetchone() | ||||||
|  |         if res != None: | ||||||
|  |             return res['ch_date'] | ||||||
|  |         return None | ||||||
| @@ -1,2 +1,3 @@ | |||||||
| tqdm==4.46.1 | tqdm==4.46.1 | ||||||
| requests==2.23.0 | requests==2.23.0 | ||||||
|  | PyMySQL==0.9.3 | ||||||
							
								
								
									
										47
									
								
								src/run.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										47
									
								
								src/run.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,47 @@ | |||||||
|  | #!/bin/env python3 | ||||||
|  | import os | ||||||
|  | import argparse | ||||||
|  |  | ||||||
|  | import studip | ||||||
|  | import mysql | ||||||
|  |  | ||||||
|  |  | ||||||
|  | parser = argparse.ArgumentParser(description='Download Files from StudIP.') | ||||||
|  | parser.add_argument('-o', '--output', type=str, | ||||||
|  |                     default='./data', help='path to output directory') | ||||||
|  | parser.add_argument('-u', '--user', type=str, | ||||||
|  |                     help='studip username', required=True) | ||||||
|  | parser.add_argument('-p', '--passwd', type=str, | ||||||
|  |                     help='studip password', required=True) | ||||||
|  | parser.add_argument('-s', '--url', type=str, help='studip url', required=True) | ||||||
|  | parser.add_argument('--chunk', type=int, default=1024 * | ||||||
|  |                     1024, help='chunksize for downloading data') | ||||||
|  | parser.add_argument('-r', '--reset_dl_date', action='store_true', help='downloads everything and ignores last download date') | ||||||
|  | parser.add_argument('--host', type=str, default='localhost', help='mysql host') | ||||||
|  | parser.add_argument('--port', type=int, default=3306, help='mysql port') | ||||||
|  | parser.add_argument('--db_name', type=str, default='studip', help='mysql database name') | ||||||
|  | parser.add_argument('--db_user', type=str, default='root', help='mysql database user') | ||||||
|  | parser.add_argument('--db_passwd', type=str, default='secret-pw', help='mysql database password') | ||||||
|  | args = parser.parse_args() | ||||||
|  |  | ||||||
|  | BASE_DIR = os.path.abspath(args.output) | ||||||
|  | USERNAME = args.user | ||||||
|  | PASSWORD = args.passwd | ||||||
|  |  | ||||||
|  | db = mysql.Database() | ||||||
|  |  | ||||||
|  | db.HOST = args.host | ||||||
|  | db.PORT = args.port | ||||||
|  | db.DB_NAME = args.db_name | ||||||
|  | db.USER = args.db_user | ||||||
|  | db.PASSW = args.db_passwd | ||||||
|  | db.RESET_DL_DATE = args.reset_dl_date | ||||||
|  | db.setup_db() | ||||||
|  |  | ||||||
|  | crwlr = studip.Crawler(db) | ||||||
|  |  | ||||||
|  | crwlr.CHUNK_SIZE = args.chunk | ||||||
|  | crwlr.STUDIP_DOMAIN = args.url | ||||||
|  | crwlr.USER = (USERNAME, PASSWORD) | ||||||
|  |  | ||||||
|  | crwlr.download_curr_courses(BASE_DIR) | ||||||
							
								
								
									
										162
									
								
								src/studip.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										162
									
								
								src/studip.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,162 @@ | |||||||
|  | #!/bin/env python3 | ||||||
|  | import time | ||||||
|  | import os | ||||||
|  | import argparse | ||||||
|  |  | ||||||
|  | from tqdm import tqdm | ||||||
|  | import requests as req | ||||||
|  | from requests.auth import HTTPBasicAuth | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Crawler: | ||||||
|  |  | ||||||
|  |     def __init__(self, db): | ||||||
|  |         self.CHUNK_SIZE = None | ||||||
|  |         self.STUDIP_DOMAIN = None | ||||||
|  |         self.USER = None | ||||||
|  |         self.db = db | ||||||
|  |  | ||||||
|  |     def create_dir(self, dir): | ||||||
|  |         if not os.path.exists(dir): | ||||||
|  |             print('creating folder', dir) | ||||||
|  |             os.mkdir(dir) | ||||||
|  |  | ||||||
|  |     def get_uid(self): | ||||||
|  |         url = self.STUDIP_DOMAIN + '/api.php/user/' | ||||||
|  |         rsp = req.get(url, auth=self.USER) | ||||||
|  |         user_id = rsp.json()['user_id'] | ||||||
|  |         return user_id | ||||||
|  |  | ||||||
|  |     def get_curr_semester(self): | ||||||
|  |         url = self.STUDIP_DOMAIN + '/api.php/semesters/' | ||||||
|  |         rsp = req.get(url, auth=self.USER) | ||||||
|  |         curr_time = int(str(int(time.time()))) | ||||||
|  |         semesters = rsp.json()['collection'] | ||||||
|  |         for sem_uri in semesters: | ||||||
|  |             semester = semesters[sem_uri] | ||||||
|  |             sem_begin = semester['begin'] | ||||||
|  |             sem_end = semester['end'] | ||||||
|  |             if sem_begin < curr_time < sem_end: | ||||||
|  |                 return sem_uri | ||||||
|  |         return 0 | ||||||
|  |  | ||||||
|  |     def get_ordered_semesters(self): | ||||||
|  |         url = self.STUDIP_DOMAIN + '/api.php/semesters/' | ||||||
|  |         rsp = req.get(url, auth=self.USER) | ||||||
|  |         semesters = rsp.json()['collection'] | ||||||
|  |         order_sems = [] | ||||||
|  |         for sem_uri in semesters: | ||||||
|  |             order_sems.append(sem_uri) | ||||||
|  |         return order_sems | ||||||
|  |  | ||||||
|  |     def get_curr_courses(self, user_id, semester): | ||||||
|  |         url = self.STUDIP_DOMAIN + '/api.php/user/' + user_id + '/courses' | ||||||
|  |         rsp = req.get(url, auth=self.USER) | ||||||
|  |         ord_sems = self.get_ordered_semesters() | ||||||
|  |         courses = rsp.json()['collection'] | ||||||
|  |         i = 0 | ||||||
|  |         course_list = {} | ||||||
|  |         for course_uri in courses: | ||||||
|  |             course = courses[course_uri] | ||||||
|  |             start_sem = course['start_semester'] | ||||||
|  |             if start_sem != None: | ||||||
|  |                 start_ind = ord_sems.index(start_sem) | ||||||
|  |             else: | ||||||
|  |                 start_ind = 100 | ||||||
|  |             end_sem = course['end_semester'] | ||||||
|  |             if end_sem != None: | ||||||
|  |                 end_ind = ord_sems.index(end_sem) | ||||||
|  |             else: | ||||||
|  |                 end_ind = 100 | ||||||
|  |             curr_ind = ord_sems.index(semester) | ||||||
|  |             if start_ind <= curr_ind <= end_ind: | ||||||
|  |                 course_title = course['title'] | ||||||
|  |                 course_id = course['course_id'] | ||||||
|  |                 course_list[course_id] = course_title | ||||||
|  |         return course_list | ||||||
|  |  | ||||||
|  |     def get_top_folder(self, course): | ||||||
|  |         url = self.STUDIP_DOMAIN + '/api.php/course/' + course + '/top_folder' | ||||||
|  |         rsp = req.get(url, auth=self.USER) | ||||||
|  |         top_folder = rsp.json() | ||||||
|  |         tf_id = top_folder['id'] | ||||||
|  |         return(tf_id) | ||||||
|  |  | ||||||
|  |     def get_docs(self, folder): | ||||||
|  |         url = self.STUDIP_DOMAIN + '/api.php/folder/' + folder | ||||||
|  |         rsp = req.get(url, auth=self.USER) | ||||||
|  |         docs = rsp.json()['file_refs'] | ||||||
|  |         res_docs = [] | ||||||
|  |         for doc in docs: | ||||||
|  |             doc_id = doc['id'] | ||||||
|  |             res_docs.append(doc_id) | ||||||
|  |         return(res_docs) | ||||||
|  |  | ||||||
|  |     def download(self, doc): | ||||||
|  |         url1 = self.STUDIP_DOMAIN + '/api.php/file/' + doc | ||||||
|  |         rsp1 = req.get(url1, auth=self.USER) | ||||||
|  |         doc_name = rsp1.json()['name'] | ||||||
|  |         doc_chdate = rsp1.json()['chdate'] | ||||||
|  |         last_dl = self.db.get_last_file_dl(doc) | ||||||
|  |         if last_dl == None or last_dl < doc_chdate: | ||||||
|  |             print('downloading ', doc_name) | ||||||
|  |             url2 = self.STUDIP_DOMAIN + '/api.php/file/' + doc + '/download' | ||||||
|  |             rsp2 = req.get(url2, auth=self.USER, stream=True) | ||||||
|  |             total_size = int(rsp2.headers.get('content-length', 0)) | ||||||
|  |             progbar = tqdm(total=total_size, unit='iB', unit_scale=True) | ||||||
|  |             with open(doc_name, 'wb') as doc_file: | ||||||
|  |                 for chunk in rsp2.iter_content(self.CHUNK_SIZE): | ||||||
|  |                     progbar.update(len(chunk)) | ||||||
|  |                     doc_file.write(chunk) | ||||||
|  |             self.db.set_last_file_dl(str(doc), str(int(time.time()))) | ||||||
|  |  | ||||||
|  |     def get_subdirs(self, folder): | ||||||
|  |         url = self.STUDIP_DOMAIN + '/api.php/folder/' + folder | ||||||
|  |         rsp = req.get(url, auth=self.USER) | ||||||
|  |         subdirs = rsp.json()['subfolders'] | ||||||
|  |         docs = rsp.json()['file_refs'] | ||||||
|  |         res_subdirs = {} | ||||||
|  |         for subdir in subdirs: | ||||||
|  |             sub_id = subdir['id'] | ||||||
|  |             sub_name = subdir['name'] | ||||||
|  |             res_subdirs[sub_id] = sub_name | ||||||
|  |         return res_subdirs | ||||||
|  |  | ||||||
|  |     def download_folder(self, folder): | ||||||
|  |         docs = self.get_docs(folder) | ||||||
|  |         for doc in docs: | ||||||
|  |             print('found doc ', doc) | ||||||
|  |             self.download(doc) | ||||||
|  |  | ||||||
|  |     def download_folder_rec(self, folder, base_dir): | ||||||
|  |         print('folder ', folder) | ||||||
|  |         self.create_dir(base_dir) | ||||||
|  |         self.download_folder(folder) | ||||||
|  |         subdirs = self.get_subdirs(folder) | ||||||
|  |         os.chdir(base_dir) | ||||||
|  |         for subdir in subdirs: | ||||||
|  |             subdir_name = subdirs[subdir].replace('/', '-') | ||||||
|  |             subdir_path = os.path.join(base_dir, subdir_name) | ||||||
|  |             print(subdir_path) | ||||||
|  |             self.create_dir(subdir_path) | ||||||
|  |             os.chdir(subdir_path) | ||||||
|  |             self.download_folder_rec(subdir, subdir_path) | ||||||
|  |  | ||||||
|  |     def download_course(self, course, base_dir): | ||||||
|  |         print('course ', course) | ||||||
|  |         self.create_dir(base_dir) | ||||||
|  |         os.chdir(base_dir) | ||||||
|  |         root = self.get_top_folder(course) | ||||||
|  |         self.download_folder_rec(root, base_dir) | ||||||
|  |  | ||||||
|  |     def download_curr_courses(self, base_dir): | ||||||
|  |         print('Start downloading all current courses') | ||||||
|  |         self.create_dir(base_dir) | ||||||
|  |         curr_courses = self.get_curr_courses( | ||||||
|  |             self.get_uid(), self.get_curr_semester()) | ||||||
|  |         os.chdir(base_dir) | ||||||
|  |         for course in curr_courses: | ||||||
|  |             print('course is ', curr_courses[course]) | ||||||
|  |             course_name = curr_courses[course].replace('/', '-') | ||||||
|  |             path = os.path.join(base_dir, course_name) | ||||||
|  |             self.download_course(course, path) | ||||||
		Reference in New Issue
	
	Block a user