1
0
mirror of https://github.com/tiyn/stud.ip-crawler.git synced 2026-02-22 06:34:48 +01:00

log: adding basic log with levels and file

The log has options for several levels that can
be set from the command line.
The file is hardcoded as log.txt and can be toggled
This commit is contained in:
TiynGER
2020-06-10 00:24:01 +02:00
parent fa36e0f29e
commit 7ef3f063d2
6 changed files with 134 additions and 109 deletions

54
src/crawler.py Normal file
View File

@@ -0,0 +1,54 @@
import os
import logging as log
from studip import Studip
class Crawler:
def __init__(self, studip):
self.studip = studip
def download_folder(self, folder):
docs = self.studip.get_docs(folder)
for doc in docs:
log.info('found doc ' + doc)
self.studip.download(doc)
def download_folder_rec(self, folder, base_dir):
log.info('crawling folder ' + folder)
self.create_dir(base_dir)
self.download_folder(folder)
subdirs = self.studip.get_subdirs(folder)
os.chdir(base_dir)
for subdir in subdirs:
subdir_name = subdirs[subdir].replace('/', '-')
subdir_path = os.path.join(base_dir, subdir_name)
log.debug(subdir_path)
self.create_dir(subdir_path)
os.chdir(subdir_path)
self.download_folder_rec(subdir, subdir_path)
def download_course(self, course, base_dir):
log.info('crawling course ' + course)
self.create_dir(base_dir)
os.chdir(base_dir)
root = self.studip.get_top_folder(course)
self.download_folder_rec(root, base_dir)
def download_curr_courses(self, base_dir):
log.info('Start crawling all current courses')
self.create_dir(base_dir)
curr_courses = self.studip.get_curr_courses(
self.studip.get_uid(), self.studip.get_curr_semester())
os.chdir(base_dir)
for course in curr_courses:
log.debug('course is ' + curr_courses[course])
course_name = curr_courses[course].replace('/', '-')
path = os.path.join(base_dir, course_name)
self.download_course(course, path)
def create_dir(self, dir):
if not os.path.exists(dir):
log.info('creating folder' + dir)
os.mkdir(dir)