This program downloads all files of a Stud.IP users current semester.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

202 lines
5.9 KiB

6 months ago
  1. #!/bin/env python3
  2. import time
  3. import os
  4. import argparse
  5. from tqdm import tqdm
  6. import requests as req
  7. from requests.auth import HTTPBasicAuth
  8. def create_dir(dir):
  9. if not os.path.exists(dir):
  10. print('creating folder', dir)
  11. os.mkdir(dir)
  12. def set_last_dl(time):
  13. last_dl_file = open('last_dl.txt', 'w')
  14. last_dl_file.write(str(time).split('.')[0])
  15. def get_last_dl():
  16. try:
  17. last_dl_file = open('last_dl.txt', 'r')
  18. return int(last_dl_file.read())
  19. except:
  20. return None
  21. parser = argparse.ArgumentParser(description='Download Files from StudIP.')
  22. parser.add_argument('-o', '--output', type=str,
  23. default='./data', help='path to output directory')
  24. parser.add_argument('-u', '--user', type=str, help='studip username', required=True)
  25. parser.add_argument('-p', '--passw', type=str, help='studip password', required=True)
  26. parser.add_argument('-s', '--url', type=str, help='studip url', required=True)
  27. parser.add_argument('-c', '--chunk', type=int, default=1024 *
  28. 1024, help='chunksize for downloading data')
  29. parser.add_argument('-r', '--reset_dl_date', action='store_true')
  30. args = parser.parse_args()
  31. BASE_DIR = os.path.abspath(args.output)
  32. CHUNK_SIZE = args.chunk
  33. STUDIP_DOMAIN = args.url
  34. USERNAME = args.user
  35. PASSWORD = args.passw
  36. USER = (USERNAME, PASSWORD)
  37. if args.reset_dl_date:
  38. set_last_dl(None)
  39. LAST_DOWNLOAD = get_last_dl()
  40. def get_uid():
  41. url = STUDIP_DOMAIN + '/api.php/user/'
  42. rsp = req.get(url, auth=USER)
  43. user_id = rsp.json()['user_id']
  44. return user_id
  45. def get_curr_semester():
  46. url = STUDIP_DOMAIN + '/api.php/semesters/'
  47. rsp = req.get(url, auth=USER)
  48. curr_time = int(str(time.time()).split('.')[0])
  49. semesters = rsp.json()['collection']
  50. for sem_uri in semesters:
  51. semester = semesters[sem_uri]
  52. sem_begin = semester['begin']
  53. sem_end = semester['end']
  54. if sem_begin < curr_time < sem_end:
  55. return sem_uri
  56. return 0
  57. def get_ordered_semesters():
  58. url = STUDIP_DOMAIN + '/api.php/semesters/'
  59. rsp = req.get(url, auth=USER)
  60. semesters = rsp.json()['collection']
  61. order_sems = []
  62. for sem_uri in semesters:
  63. order_sems.append(sem_uri)
  64. return order_sems
  65. def get_curr_courses(user_id, semester):
  66. url = STUDIP_DOMAIN + '/api.php/user/' + user_id + '/courses'
  67. rsp = req.get(url, auth=USER)
  68. ord_sems = get_ordered_semesters()
  69. courses = rsp.json()['collection']
  70. i = 0
  71. course_list = {}
  72. for course_uri in courses:
  73. course = courses[course_uri]
  74. start_sem = course['start_semester']
  75. if start_sem != None:
  76. start_ind = ord_sems.index(start_sem)
  77. else:
  78. start_ind = 100
  79. end_sem = course['end_semester']
  80. if end_sem != None:
  81. end_ind = ord_sems.index(end_sem)
  82. else:
  83. end_ind = 100
  84. curr_ind = ord_sems.index(semester)
  85. if start_ind <= curr_ind <= end_ind:
  86. course_title = course['title']
  87. course_id = course['course_id']
  88. course_list[course_id] = course_title
  89. return course_list
  90. def get_top_folder(course):
  91. url = STUDIP_DOMAIN + '/api.php/course/' + course + '/top_folder'
  92. rsp = req.get(url, auth=USER)
  93. top_folder = rsp.json()
  94. tf_id = top_folder['id']
  95. return(tf_id)
  96. def get_docs(folder):
  97. url = STUDIP_DOMAIN + '/api.php/folder/' + folder
  98. rsp = req.get(url, auth=USER)
  99. docs = rsp.json()['file_refs']
  100. res_docs = []
  101. for doc in docs:
  102. doc_id = doc['id']
  103. res_docs.append(doc_id)
  104. return(res_docs)
  105. def download(doc, time):
  106. url1 = STUDIP_DOMAIN + '/api.php/file/' + doc
  107. rsp1 = req.get(url1, auth=USER)
  108. doc_name = rsp1.json()['name']
  109. doc_chdate = rsp1.json()['chdate']
  110. if time == None or time < doc_chdate:
  111. print('downloading ', doc_name)
  112. url2 = STUDIP_DOMAIN + '/api.php/file/' + doc + '/download'
  113. rsp2 = req.get(url2, auth=USER, stream=True)
  114. total_size = int(rsp2.headers.get('content-length', 0))
  115. progbar = tqdm(total=total_size, unit='iB', unit_scale=True)
  116. with open(doc_name, 'wb') as doc:
  117. for chunk in rsp2.iter_content(CHUNK_SIZE):
  118. progbar.update(len(chunk))
  119. doc.write(chunk)
  120. def get_subdirs(folder):
  121. url = STUDIP_DOMAIN + '/api.php/folder/' + folder
  122. rsp = req.get(url, auth=USER)
  123. subdirs = rsp.json()['subfolders']
  124. docs = rsp.json()['file_refs']
  125. res_subdirs = {}
  126. for subdir in subdirs:
  127. sub_id = subdir['id']
  128. sub_name = subdir['name']
  129. res_subdirs[sub_id] = sub_name
  130. return res_subdirs
  131. def download_folder(folder, time):
  132. docs = get_docs(folder)
  133. for doc in docs:
  134. print('found doc ', doc)
  135. download(doc, time)
  136. def download_folder_rec(folder, time, base_dir):
  137. print('folder ', folder)
  138. create_dir(base_dir)
  139. download_folder(folder, time)
  140. subdirs = get_subdirs(folder)
  141. os.chdir(base_dir)
  142. for subdir in subdirs:
  143. subdir_name = subdirs[subdir].replace('/', '-')
  144. subdir_path = os.path.join(base_dir, subdir_name)
  145. print(subdir_path)
  146. create_dir(subdir_path)
  147. os.chdir(subdir_path)
  148. download_folder_rec(subdir, time, subdir_path)
  149. def download_course(course, time, base_dir):
  150. print('course ', course)
  151. create_dir(base_dir)
  152. os.chdir(base_dir)
  153. root = get_top_folder(course)
  154. download_folder_rec(root, time, base_dir)
  155. def download_curr_courses(time, base_dir):
  156. print('Start downloading all current courses')
  157. create_dir(base_dir)
  158. curr_courses = get_curr_courses(get_uid(), get_curr_semester())
  159. os.chdir(base_dir)
  160. for course in curr_courses:
  161. print('course is ', curr_courses[course])
  162. course_name = curr_courses[course].replace('/', '-')
  163. path = os.path.join(base_dir, course_name)
  164. download_course(course, time, path)
  165. download_curr_courses(LAST_DOWNLOAD, BASE_DIR)
  166. set_last_dl(time.time())