mirror of
https://github.com/tiyn/stud.ip-crawler.git
synced 2026-02-22 06:34:48 +01:00
log: adding basic log with levels and file
The log has options for several levels that can be set from the command line. The file is hardcoded as log.txt and can be toggled
This commit is contained in:
54
src/crawler.py
Normal file
54
src/crawler.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import os
|
||||
import logging as log
|
||||
|
||||
from studip import Studip
|
||||
|
||||
|
||||
class Crawler:
|
||||
|
||||
def __init__(self, studip):
|
||||
self.studip = studip
|
||||
|
||||
def download_folder(self, folder):
|
||||
docs = self.studip.get_docs(folder)
|
||||
for doc in docs:
|
||||
log.info('found doc ' + doc)
|
||||
self.studip.download(doc)
|
||||
|
||||
def download_folder_rec(self, folder, base_dir):
|
||||
log.info('crawling folder ' + folder)
|
||||
self.create_dir(base_dir)
|
||||
self.download_folder(folder)
|
||||
subdirs = self.studip.get_subdirs(folder)
|
||||
os.chdir(base_dir)
|
||||
for subdir in subdirs:
|
||||
subdir_name = subdirs[subdir].replace('/', '-')
|
||||
subdir_path = os.path.join(base_dir, subdir_name)
|
||||
log.debug(subdir_path)
|
||||
self.create_dir(subdir_path)
|
||||
os.chdir(subdir_path)
|
||||
self.download_folder_rec(subdir, subdir_path)
|
||||
|
||||
def download_course(self, course, base_dir):
|
||||
log.info('crawling course ' + course)
|
||||
self.create_dir(base_dir)
|
||||
os.chdir(base_dir)
|
||||
root = self.studip.get_top_folder(course)
|
||||
self.download_folder_rec(root, base_dir)
|
||||
|
||||
def download_curr_courses(self, base_dir):
|
||||
log.info('Start crawling all current courses')
|
||||
self.create_dir(base_dir)
|
||||
curr_courses = self.studip.get_curr_courses(
|
||||
self.studip.get_uid(), self.studip.get_curr_semester())
|
||||
os.chdir(base_dir)
|
||||
for course in curr_courses:
|
||||
log.debug('course is ' + curr_courses[course])
|
||||
course_name = curr_courses[course].replace('/', '-')
|
||||
path = os.path.join(base_dir, course_name)
|
||||
self.download_course(course, path)
|
||||
|
||||
def create_dir(self, dir):
|
||||
if not os.path.exists(dir):
|
||||
log.info('creating folder' + dir)
|
||||
os.mkdir(dir)
|
||||
Reference in New Issue
Block a user