1
0
mirror of https://github.com/tiyn/stud.ip-crawler.git synced 2026-02-22 06:34:48 +01:00
Files
stud.ip-crawler/src/crawler.py
TiynGER 7ef3f063d2 log: adding basic log with levels and file
The log has options for several levels that can
be set from the command line.
The file is hardcoded as log.txt and can be toggled
2020-06-10 00:24:01 +02:00

55 lines
1.8 KiB
Python

import os
import logging as log
from studip import Studip
class Crawler:
def __init__(self, studip):
self.studip = studip
def download_folder(self, folder):
docs = self.studip.get_docs(folder)
for doc in docs:
log.info('found doc ' + doc)
self.studip.download(doc)
def download_folder_rec(self, folder, base_dir):
log.info('crawling folder ' + folder)
self.create_dir(base_dir)
self.download_folder(folder)
subdirs = self.studip.get_subdirs(folder)
os.chdir(base_dir)
for subdir in subdirs:
subdir_name = subdirs[subdir].replace('/', '-')
subdir_path = os.path.join(base_dir, subdir_name)
log.debug(subdir_path)
self.create_dir(subdir_path)
os.chdir(subdir_path)
self.download_folder_rec(subdir, subdir_path)
def download_course(self, course, base_dir):
log.info('crawling course ' + course)
self.create_dir(base_dir)
os.chdir(base_dir)
root = self.studip.get_top_folder(course)
self.download_folder_rec(root, base_dir)
def download_curr_courses(self, base_dir):
log.info('Start crawling all current courses')
self.create_dir(base_dir)
curr_courses = self.studip.get_curr_courses(
self.studip.get_uid(), self.studip.get_curr_semester())
os.chdir(base_dir)
for course in curr_courses:
log.debug('course is ' + curr_courses[course])
course_name = curr_courses[course].replace('/', '-')
path = os.path.join(base_dir, course_name)
self.download_course(course, path)
def create_dir(self, dir):
if not os.path.exists(dir):
log.info('creating folder' + dir)
os.mkdir(dir)