1
0
mirror of https://github.com/tiyn/stud.ip-crawler.git synced 2026-02-22 06:34:48 +01:00
Files
stud.ip-crawler/src/run.py
TiynGER 4a8cf45ad3 database: switching from mysql to sqlite
Currently we only need the db to keep track of the files that were
already downloaded, thus we only use one database and only one table.
A complete sql database is a bit too bulky for this purpose.
By using sqlite we can minimize and embed the db.
2020-11-17 11:04:03 +01:00

59 lines
1.9 KiB
Python
Executable File

#!/bin/env python3
import os
import sys
import argparse
import logging as log
from studip import Studip
from crawler import Crawler
from database import Database
parser = argparse.ArgumentParser(description='Download Files from StudIP.')
parser.add_argument('-o', '--output', type=str,
default='./data', help='path to output directory')
parser.add_argument('-u', '--user', type=str,
help='studip username', required=True)
parser.add_argument('-p', '--passwd', type=str,
help='studip password', required=True)
parser.add_argument('-s', '--url', type=str, help='studip url', required=True)
parser.add_argument('--chunk', type=int, default=1024 *
1024, help='chunksize for downloading data')
parser.add_argument('-r', '--reset_dl_date', action='store_true',
help='downloads everything and ignores last download date')
parser.add_argument('-d', '--debug_output', action='store_true',
help='display debug information about the process')
parser.add_argument('-q', '--quiet', action='store_true',
help='only display most important output')
parser.add_argument('-l', '--log_file', action='store_true',
help='saves log to a log file named "log.txt"')
args = parser.parse_args()
if args.quiet:
log_level = log.WARNING
elif args.debug_output:
log_level = log.DEBUG
else:
log_level = log.INFO
if args.log_file:
log.basicConfig(level=log_level, filename='log.txt')
else:
log.basicConfig(level=log_level)
BASE_DIR = os.path.abspath(args.output)
USERNAME = args.user
PASSWORD = args.passwd
db = Database(args.reset_dl_date)
studip = Studip(args.chunk, args.url, (USERNAME, PASSWORD), db)
crawler = Crawler(studip)
# Start crawling
try:
crawler.download_curr_courses(BASE_DIR)
except KeyboardInterrupt:
sys.exit(0)