|
|
#!/bin/env python3import timeimport osimport argparse
from tqdm import tqdmimport requests as reqfrom requests.auth import HTTPBasicAuth
def create_dir(dir): if not os.path.exists(dir): print('creating folder', dir) os.mkdir(dir)
def set_last_dl(time): last_dl_file = open('last_dl.txt', 'w') last_dl_file.write(str(time).split('.')[0])
def get_last_dl(): try: last_dl_file = open('last_dl.txt', 'r') return int(last_dl_file.read()) except: return None
parser = argparse.ArgumentParser(description='Download Files from StudIP.')parser.add_argument('-o', '--output', type=str, default='./data', help='path to output directory')parser.add_argument('-u', '--user', type=str, help='studip username', required=True)parser.add_argument('-p', '--passw', type=str, help='studip password', required=True)parser.add_argument('-s', '--url', type=str, help='studip url', required=True)parser.add_argument('-c', '--chunk', type=int, default=1024 * 1024, help='chunksize for downloading data')parser.add_argument('-r', '--reset_dl_date', action='store_true')
args = parser.parse_args()
BASE_DIR = os.path.abspath(args.output)CHUNK_SIZE = args.chunkSTUDIP_DOMAIN = args.urlUSERNAME = args.userPASSWORD = args.passwUSER = (USERNAME, PASSWORD)if args.reset_dl_date: set_last_dl(None)LAST_DOWNLOAD = get_last_dl()
def get_uid(): url = STUDIP_DOMAIN + '/api.php/user/' rsp = req.get(url, auth=USER) user_id = rsp.json()['user_id'] return user_id
def get_curr_semester(): url = STUDIP_DOMAIN + '/api.php/semesters/' rsp = req.get(url, auth=USER) curr_time = int(str(time.time()).split('.')[0]) semesters = rsp.json()['collection'] for sem_uri in semesters: semester = semesters[sem_uri] sem_begin = semester['begin'] sem_end = semester['end'] if sem_begin < curr_time < sem_end: return sem_uri return 0
def get_ordered_semesters(): url = STUDIP_DOMAIN + '/api.php/semesters/' rsp = req.get(url, auth=USER) semesters = rsp.json()['collection'] order_sems = [] for sem_uri in semesters: order_sems.append(sem_uri) return order_sems
def get_curr_courses(user_id, semester): url = STUDIP_DOMAIN + '/api.php/user/' + user_id + '/courses' rsp = req.get(url, auth=USER) ord_sems = get_ordered_semesters() courses = rsp.json()['collection'] i = 0 course_list = {} for course_uri in courses: course = courses[course_uri] start_sem = course['start_semester'] if start_sem != None: start_ind = ord_sems.index(start_sem) else: start_ind = 100 end_sem = course['end_semester'] if end_sem != None: end_ind = ord_sems.index(end_sem) else: end_ind = 100 curr_ind = ord_sems.index(semester) if start_ind <= curr_ind <= end_ind: course_title = course['title'] course_id = course['course_id'] course_list[course_id] = course_title return course_list
def get_top_folder(course): url = STUDIP_DOMAIN + '/api.php/course/' + course + '/top_folder' rsp = req.get(url, auth=USER) top_folder = rsp.json() tf_id = top_folder['id'] return(tf_id)
def get_docs(folder): url = STUDIP_DOMAIN + '/api.php/folder/' + folder rsp = req.get(url, auth=USER) docs = rsp.json()['file_refs'] res_docs = [] for doc in docs: doc_id = doc['id'] res_docs.append(doc_id) return(res_docs)
def download(doc, time): url1 = STUDIP_DOMAIN + '/api.php/file/' + doc rsp1 = req.get(url1, auth=USER) doc_name = rsp1.json()['name'] doc_chdate = rsp1.json()['chdate'] if time == None or time < doc_chdate: print('downloading ', doc_name) url2 = STUDIP_DOMAIN + '/api.php/file/' + doc + '/download' rsp2 = req.get(url2, auth=USER, stream=True) total_size = int(rsp2.headers.get('content-length', 0)) progbar = tqdm(total=total_size, unit='iB', unit_scale=True) with open(doc_name, 'wb') as doc: for chunk in rsp2.iter_content(CHUNK_SIZE): progbar.update(len(chunk)) doc.write(chunk)
def get_subdirs(folder): url = STUDIP_DOMAIN + '/api.php/folder/' + folder rsp = req.get(url, auth=USER) subdirs = rsp.json()['subfolders'] docs = rsp.json()['file_refs'] res_subdirs = {} for subdir in subdirs: sub_id = subdir['id'] sub_name = subdir['name'] res_subdirs[sub_id] = sub_name return res_subdirs
def download_folder(folder, time): docs = get_docs(folder) for doc in docs: print('found doc ', doc) download(doc, time)
def download_folder_rec(folder, time, base_dir): print('folder ', folder) create_dir(base_dir) download_folder(folder, time) subdirs = get_subdirs(folder) os.chdir(base_dir) for subdir in subdirs: subdir_name = subdirs[subdir].replace('/', '-') subdir_path = os.path.join(base_dir, subdir_name) print(subdir_path) create_dir(subdir_path) os.chdir(subdir_path) download_folder_rec(subdir, time, subdir_path)
def download_course(course, time, base_dir): print('course ', course) create_dir(base_dir) os.chdir(base_dir) root = get_top_folder(course) download_folder_rec(root, time, base_dir)
def download_curr_courses(time, base_dir): print('Start downloading all current courses') create_dir(base_dir) curr_courses = get_curr_courses(get_uid(), get_curr_semester()) os.chdir(base_dir) for course in curr_courses: print('course is ', curr_courses[course]) course_name = curr_courses[course].replace('/', '-') path = os.path.join(base_dir, course_name) download_course(course, time, path)
download_curr_courses(LAST_DOWNLOAD, BASE_DIR)set_last_dl(time.time())
|