This program downloads all files of a Stud.IP users current semester.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

87 lines
2.9 KiB

  1. import os
  2. import logging as log
  3. from studip import Studip
  4. class Crawler:
  5. def __init__(self, studip):
  6. self.studip = studip
  7. def download_folder(self, folder):
  8. """Download all documents in a folder.
  9. Parameters:
  10. folder(string): id of the folder to download
  11. """
  12. docs = self.studip.get_docs(folder)
  13. for doc in docs:
  14. log.info('found doc ' + doc)
  15. self.studip.download(doc)
  16. def download_folder_rec(self, folder, base_dir):
  17. """Download all documents in a folder and its subfolders.
  18. This keeps the folder structure.
  19. Parameters:
  20. folder(string): id of the folder to download
  21. base_dir(string): directory where to put the download
  22. """
  23. log.info('crawling folder ' + folder)
  24. self.create_dir(base_dir)
  25. self.download_folder(folder)
  26. subdirs = self.studip.get_subdirs(folder)
  27. os.chdir(base_dir)
  28. for subdir in subdirs:
  29. subdir_name = subdirs[subdir].replace('/', '-')
  30. subdir_path = os.path.join(base_dir, subdir_name)
  31. log.debug(subdir_path)
  32. self.create_dir(subdir_path)
  33. os.chdir(subdir_path)
  34. self.download_folder_rec(subdir, subdir_path)
  35. log.info('finished crawling folder ' + folder)
  36. def download_course(self, course, base_dir):
  37. """Download all documents in course.
  38. This keeps the folder structure.
  39. Parameters:
  40. course(string): id of the course to download
  41. base_dir(string): directory where to put the download
  42. """
  43. log.info('crawling course ' + course)
  44. self.create_dir(base_dir)
  45. os.chdir(base_dir)
  46. root = self.studip.get_top_folder(course)
  47. self.download_folder_rec(root, base_dir)
  48. log.info('finished crawling course ' + course)
  49. def download_curr_courses(self, base_dir):
  50. """Download all documents of all current courses.
  51. This keeps the folder structure.
  52. Parameters:
  53. base_dir(string): directory where to put the download
  54. """
  55. self.create_dir(base_dir)
  56. curr_courses = self.studip.get_curr_courses(
  57. self.studip.get_uid(), self.studip.get_curr_semester())
  58. log.info('crawling all current courses')
  59. os.chdir(base_dir)
  60. for course in curr_courses:
  61. log.debug('course is ' + curr_courses[course])
  62. course_name = curr_courses[course].replace('/', '-')
  63. path = os.path.join(base_dir, course_name)
  64. self.download_course(course, path)
  65. log.info('finished crawling all current courses')
  66. def create_dir(self, dir):
  67. """Creates a dir if it doesnt exist already.
  68. Parameters:
  69. dir(string): directory path to create
  70. """
  71. if not os.path.exists(dir):
  72. log.info('creating folder ' + dir)
  73. os.mkdir(dir)