This program downloads all files of a Stud.IP users current semester.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.8 KiB

  1. import os
  2. import logging as log
  3. from studip import Studip
  4. class Crawler:
  5. def __init__(self, studip):
  6. self.studip = studip
  7. def download_folder(self, folder):
  8. """
  9. Download all documents in a folder.
  10. Parameters:
  11. folder(string): id of the folder to download
  12. """
  13. docs = self.studip.get_docs(folder)
  14. for doc in docs:
  15. log.info('found doc ' + doc)
  16. self.studip.download(doc)
  17. def download_folder_rec(self, folder, base_dir):
  18. """
  19. Download all documents in a folder and its subfolders.
  20. This keeps the folder structure.
  21. Parameters:
  22. folder(string): id of the folder to download
  23. base_dir(string): directory where to put the download
  24. """
  25. log.info('crawling folder ' + folder)
  26. self.create_dir(base_dir)
  27. self.download_folder(folder)
  28. subdirs = self.studip.get_subdirs(folder)
  29. os.chdir(base_dir)
  30. for subdir in subdirs:
  31. subdir_name = subdirs[subdir].replace('/', '-')
  32. subdir_path = os.path.join(base_dir, subdir_name)
  33. log.debug(subdir_path)
  34. self.create_dir(subdir_path)
  35. os.chdir(subdir_path)
  36. self.download_folder_rec(subdir, subdir_path)
  37. def download_course(self, course, base_dir):
  38. """
  39. Download all documents in course.
  40. This keeps the folder structure.
  41. Parameters:
  42. course(string): id of the course to download
  43. base_dir(string): directory where to put the download
  44. """
  45. log.info('crawling course ' + course)
  46. self.create_dir(base_dir)
  47. os.chdir(base_dir)
  48. root = self.studip.get_top_folder(course)
  49. self.download_folder_rec(root, base_dir)
  50. def download_curr_courses(self, base_dir):
  51. """
  52. Download all documents of all current courses.
  53. This keeps the folder structure.
  54. Parameters:
  55. base_dir(string): directory where to put the download
  56. """
  57. log.info('Start crawling all current courses')
  58. self.create_dir(base_dir)
  59. curr_courses = self.studip.get_curr_courses(
  60. self.studip.get_uid(), self.studip.get_curr_semester())
  61. os.chdir(base_dir)
  62. for course in curr_courses:
  63. log.debug('course is ' + curr_courses[course])
  64. course_name = curr_courses[course].replace('/', '-')
  65. path = os.path.join(base_dir, course_name)
  66. self.download_course(course, path)
  67. def create_dir(self, dir):
  68. """
  69. Creates a dir if it doesnt exist already.
  70. Parameters:
  71. dir(string): directory path to create
  72. """
  73. if not os.path.exists(dir):
  74. log.info('creating folder' + dir)
  75. os.mkdir(dir)