From 4a8cf45ad355c4a6a11e183ecf1edffe9bfbf30f Mon Sep 17 00:00:00 2001 From: TiynGER Date: Tue, 17 Nov 2020 11:04:03 +0100 Subject: [PATCH] database: switching from mysql to sqlite Currently we only need the db to keep track of the files that were already downloaded, thus we only use one database and only one table. A complete sql database is a bit too bulky for this purpose. By using sqlite we can minimize and embed the db. --- README.md | 21 ++++++++++---------- docker-compose.yml | 15 -------------- docker-entry.sh | 4 ++-- src/{mysql.py => database.py} | 37 ++++++++++++----------------------- src/requirements.txt | 5 ++--- src/run.py | 13 ++---------- src/studip.py | 5 +---- 7 files changed, 29 insertions(+), 71 deletions(-) rename src/{mysql.py => database.py} (66%) diff --git a/README.md b/README.md index 7c796e7..82f56ac 100644 --- a/README.md +++ b/README.md @@ -56,21 +56,20 @@ Run `python3 run.py -h` for a help menu and see which ones are important for you #### Environment-variables Set the following variables with the -e tag. -| Name | Usage | Default | -| ---- |------ | ------- | -| `USER` | username on the studip server | `admin` | -| `PSWD` | password on the studip server | `admin` | -| `URL` | url of the studip server | `admin` | -| `HOST` | ip of the mysql instance to connect | `mysql` | -| `DB_USER` | username of the mysql instance to connect | `root` | -| `DB_PSWD` | password of the mysql instance to connect | `root` | -| `INTERVAl` | update interval in seconds | `86400` | + +| Name | Usage | Default | +| ---------- | ----------------------------- | ------- | +| `USER` | username on the studip server | `admin` | +| `PSWD` | password on the studip server | `admin` | +| `URL` | url of the studip server | `admin` | +| `INTERVAl` | update interval in seconds | `86400` | #### Volumes Set the following volumes with the -v tag. -| Volume-Name | Container mount | Description | -| ----------- | --------------- | ----------------------------------- | + +| Volume-Name | Container mount | Description | +| ------------- | ------------------ | ----------------------------------------- | | `studip_data` | `/studip/src/data` | directory for studip files to be saved to | #### docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml index f44374e..c27abb9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,24 +8,9 @@ services: PSWD: 'pswd' URL: 'https://url.tld' INTERVAL: 86400 - HOST: 'mysql' - DB_USER: root - DB_PSWD: 'pswddb' volumes: - studip:/studip/data - depends_on: - - mysql - - mysql: - image: mysql:5.6 - restart: unless-stopped - environment: - MYSQL_ROOT_PASSWORD: 'pswddb' - volumes: - - mysql:/var/lib/mysql volumes: studip: driver: local - mysql: - driver: local diff --git a/docker-entry.sh b/docker-entry.sh index 0e94a34..88e0b7f 100755 --- a/docker-entry.sh +++ b/docker-entry.sh @@ -1,3 +1,3 @@ -#!/bin/bash +#!/bin/sh -while true; do python /studip/run.py -o /studip/data -u $USER -p $PSWD -s $URL --db_user $DB_USER --db_passwd $DB_PSWD --host $HOST && sleep $INTERVAL; done +while true; do python /studip/run.py -o /studip/data -u $USER -p $PSWD -s $URL && sleep $INTERVAL; done diff --git a/src/mysql.py b/src/database.py similarity index 66% rename from src/mysql.py rename to src/database.py index 103f228..5337698 100755 --- a/src/mysql.py +++ b/src/database.py @@ -1,41 +1,30 @@ import time import logging as log +import os -import pymysql +import pysqlite3 class Database: - def __init__(self, host, port, name, user, passwd, reset_dl): - self.HOST = host - self.PORT = port - self.NAME = name - self.USER = user - self.PASSWD = passwd + def __init__(self, reset_dl): self.RESET_DL = reset_dl self.TABLE_FILE = 'files' + self.DB_DIR = os.path.dirname(os.path.realpath(__file__)) self.setup_db() def connect(self): - """Connect to an existing database instance based on the object attributes. + """Connect to an existing database instance based on the object + attributes. """ - return pymysql.connect( - host=self.HOST, - port=self.PORT, - user=self.USER, - password=self.PASSWD, - charset='utf8mb4', - cursorclass=pymysql.cursors.DictCursor - ) + path = os.path.join(self.DB_DIR, "data.db") + return pysqlite3.connect(path) def setup_db(self): - """Creates a database with tables. - """ + """Creates a database with tables.""" + log.info("check database") db = self.connect() crs = db.cursor() - sql_query = "CREATE DATABASE IF NOT EXISTS " + self.NAME - crs.execute(sql_query) - db.select_db(self.NAME) query = "CREATE TABLE IF NOT EXISTS " + self.TABLE_FILE + \ "(id CHAR(32) NOT NULL," + \ "ch_date INT(11) NOT NULL," + \ @@ -51,12 +40,11 @@ class Database: time(int): time the file was downloaded """ db = self.connect() - db.select_db(self.NAME) crs = db.cursor() log.debug('file: ' + file_id + ' time: ' + time) query = "INSERT INTO " + self.TABLE_FILE + "(`id`,`ch_date`)" + \ "VALUES ('" + file_id + "','" + time + "')" + \ - "ON DUPLICATE KEY UPDATE `ch_date` = '" + time + "'" + "ON CONFLICT(`id`) DO UPDATE SET `ch_date` = '" + time + "'" crs.execute(query) db.commit() @@ -72,11 +60,10 @@ class Database: if self.RESET_DL: return None db = self.connect() - db.select_db(self.NAME) crs = db.cursor() query = "SELECT ch_date FROM files WHERE id ='" + file_id + "'" crs.execute(query) res = crs.fetchone() if res != None: - return res['ch_date'] + return res[0] return None diff --git a/src/requirements.txt b/src/requirements.txt index 23012f7..e1c976e 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,3 +1,2 @@ -tqdm==4.46.1 -requests==2.23.0 -PyMySQL==0.9.3 +pysqlite3==0.4.3 +requests==2.24.0 diff --git a/src/run.py b/src/run.py index a706df9..fe8a7df 100755 --- a/src/run.py +++ b/src/run.py @@ -6,7 +6,7 @@ import logging as log from studip import Studip from crawler import Crawler -from mysql import Database +from database import Database parser = argparse.ArgumentParser(description='Download Files from StudIP.') @@ -21,14 +21,6 @@ parser.add_argument('--chunk', type=int, default=1024 * 1024, help='chunksize for downloading data') parser.add_argument('-r', '--reset_dl_date', action='store_true', help='downloads everything and ignores last download date') -parser.add_argument('--host', type=str, default='localhost', help='mysql host') -parser.add_argument('--port', type=int, default=3306, help='mysql port') -parser.add_argument('--db_name', type=str, default='studip', - help='mysql database name') -parser.add_argument('--db_user', type=str, default='root', - help='mysql database user') -parser.add_argument('--db_passwd', type=str, - default='secret-pw', help='mysql database password') parser.add_argument('-d', '--debug_output', action='store_true', help='display debug information about the process') parser.add_argument('-q', '--quiet', action='store_true', @@ -53,8 +45,7 @@ BASE_DIR = os.path.abspath(args.output) USERNAME = args.user PASSWORD = args.passwd -db = Database(args.host, args.port, args.db_name, - args.db_user, args.db_passwd, args.reset_dl_date) +db = Database(args.reset_dl_date) studip = Studip(args.chunk, args.url, (USERNAME, PASSWORD), db) diff --git a/src/studip.py b/src/studip.py index 396739d..322eeee 100755 --- a/src/studip.py +++ b/src/studip.py @@ -146,13 +146,10 @@ class Studip: last_dl = self.db.get_last_file_dl(doc) if last_dl == None or last_dl < doc_chdate: rsp2 = self.auth_req('/api.php/file/' + doc + '/download') - #total_size = int(rsp2.headers.get('content-length', 0)) log.info('downloading ' + doc_name) - #progbar = tqdm(total=total_size, unit='iB', unit_scale=True) try: with open(doc_name, 'wb') as doc_file: for chunk in rsp2.iter_content(self.CHUNK_SIZE): - #progbar.update(len(chunk)) doc_file.write(chunk) self.db.set_last_file_dl(str(doc), str(int(time.time()))) except OSError: @@ -172,7 +169,7 @@ class Studip: try: subdirs = rsp.json()['subfolders'] except ValueError: - return res_docs + return res_subdirs for subdir in subdirs: try: sub_id = subdir['id']