database: switching from mysql to sqlite

Currently we only need the db to keep track of the files that were
already downloaded, thus we only use one database and only one table.
A complete sql database is a bit too bulky for this purpose.
By using sqlite we can minimize and embed the db.
master
TiynGER 4 years ago
parent 415a21da3b
commit 4a8cf45ad3

@ -56,21 +56,20 @@ Run `python3 run.py -h` for a help menu and see which ones are important for you
#### Environment-variables #### Environment-variables
Set the following variables with the -e tag. Set the following variables with the -e tag.
| Name | Usage | Default |
| ---- |------ | ------- | | Name | Usage | Default |
| `USER` | username on the studip server | `admin` | | ---------- | ----------------------------- | ------- |
| `PSWD` | password on the studip server | `admin` | | `USER` | username on the studip server | `admin` |
| `URL` | url of the studip server | `admin` | | `PSWD` | password on the studip server | `admin` |
| `HOST` | ip of the mysql instance to connect | `mysql` | | `URL` | url of the studip server | `admin` |
| `DB_USER` | username of the mysql instance to connect | `root` | | `INTERVAl` | update interval in seconds | `86400` |
| `DB_PSWD` | password of the mysql instance to connect | `root` |
| `INTERVAl` | update interval in seconds | `86400` |
#### Volumes #### Volumes
Set the following volumes with the -v tag. Set the following volumes with the -v tag.
| Volume-Name | Container mount | Description |
| ----------- | --------------- | ----------------------------------- | | Volume-Name | Container mount | Description |
| ------------- | ------------------ | ----------------------------------------- |
| `studip_data` | `/studip/src/data` | directory for studip files to be saved to | | `studip_data` | `/studip/src/data` | directory for studip files to be saved to |
#### docker-compose.yml #### docker-compose.yml

@ -8,24 +8,9 @@ services:
PSWD: 'pswd' PSWD: 'pswd'
URL: 'https://url.tld' URL: 'https://url.tld'
INTERVAL: 86400 INTERVAL: 86400
HOST: 'mysql'
DB_USER: root
DB_PSWD: 'pswddb'
volumes: volumes:
- studip:/studip/data - studip:/studip/data
depends_on:
- mysql
mysql:
image: mysql:5.6
restart: unless-stopped
environment:
MYSQL_ROOT_PASSWORD: 'pswddb'
volumes:
- mysql:/var/lib/mysql
volumes: volumes:
studip: studip:
driver: local driver: local
mysql:
driver: local

@ -1,3 +1,3 @@
#!/bin/bash #!/bin/sh
while true; do python /studip/run.py -o /studip/data -u $USER -p $PSWD -s $URL --db_user $DB_USER --db_passwd $DB_PSWD --host $HOST && sleep $INTERVAL; done while true; do python /studip/run.py -o /studip/data -u $USER -p $PSWD -s $URL && sleep $INTERVAL; done

@ -1,41 +1,30 @@
import time import time
import logging as log import logging as log
import os
import pymysql import pysqlite3
class Database: class Database:
def __init__(self, host, port, name, user, passwd, reset_dl): def __init__(self, reset_dl):
self.HOST = host
self.PORT = port
self.NAME = name
self.USER = user
self.PASSWD = passwd
self.RESET_DL = reset_dl self.RESET_DL = reset_dl
self.TABLE_FILE = 'files' self.TABLE_FILE = 'files'
self.DB_DIR = os.path.dirname(os.path.realpath(__file__))
self.setup_db() self.setup_db()
def connect(self): def connect(self):
"""Connect to an existing database instance based on the object attributes. """Connect to an existing database instance based on the object
attributes.
""" """
return pymysql.connect( path = os.path.join(self.DB_DIR, "data.db")
host=self.HOST, return pysqlite3.connect(path)
port=self.PORT,
user=self.USER,
password=self.PASSWD,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
def setup_db(self): def setup_db(self):
"""Creates a database with tables. """Creates a database with tables."""
""" log.info("check database")
db = self.connect() db = self.connect()
crs = db.cursor() crs = db.cursor()
sql_query = "CREATE DATABASE IF NOT EXISTS " + self.NAME
crs.execute(sql_query)
db.select_db(self.NAME)
query = "CREATE TABLE IF NOT EXISTS " + self.TABLE_FILE + \ query = "CREATE TABLE IF NOT EXISTS " + self.TABLE_FILE + \
"(id CHAR(32) NOT NULL," + \ "(id CHAR(32) NOT NULL," + \
"ch_date INT(11) NOT NULL," + \ "ch_date INT(11) NOT NULL," + \
@ -51,12 +40,11 @@ class Database:
time(int): time the file was downloaded time(int): time the file was downloaded
""" """
db = self.connect() db = self.connect()
db.select_db(self.NAME)
crs = db.cursor() crs = db.cursor()
log.debug('file: ' + file_id + ' time: ' + time) log.debug('file: ' + file_id + ' time: ' + time)
query = "INSERT INTO " + self.TABLE_FILE + "(`id`,`ch_date`)" + \ query = "INSERT INTO " + self.TABLE_FILE + "(`id`,`ch_date`)" + \
"VALUES ('" + file_id + "','" + time + "')" + \ "VALUES ('" + file_id + "','" + time + "')" + \
"ON DUPLICATE KEY UPDATE `ch_date` = '" + time + "'" "ON CONFLICT(`id`) DO UPDATE SET `ch_date` = '" + time + "'"
crs.execute(query) crs.execute(query)
db.commit() db.commit()
@ -72,11 +60,10 @@ class Database:
if self.RESET_DL: if self.RESET_DL:
return None return None
db = self.connect() db = self.connect()
db.select_db(self.NAME)
crs = db.cursor() crs = db.cursor()
query = "SELECT ch_date FROM files WHERE id ='" + file_id + "'" query = "SELECT ch_date FROM files WHERE id ='" + file_id + "'"
crs.execute(query) crs.execute(query)
res = crs.fetchone() res = crs.fetchone()
if res != None: if res != None:
return res['ch_date'] return res[0]
return None return None

@ -1,3 +1,2 @@
tqdm==4.46.1 pysqlite3==0.4.3
requests==2.23.0 requests==2.24.0
PyMySQL==0.9.3

@ -6,7 +6,7 @@ import logging as log
from studip import Studip from studip import Studip
from crawler import Crawler from crawler import Crawler
from mysql import Database from database import Database
parser = argparse.ArgumentParser(description='Download Files from StudIP.') parser = argparse.ArgumentParser(description='Download Files from StudIP.')
@ -21,14 +21,6 @@ parser.add_argument('--chunk', type=int, default=1024 *
1024, help='chunksize for downloading data') 1024, help='chunksize for downloading data')
parser.add_argument('-r', '--reset_dl_date', action='store_true', parser.add_argument('-r', '--reset_dl_date', action='store_true',
help='downloads everything and ignores last download date') help='downloads everything and ignores last download date')
parser.add_argument('--host', type=str, default='localhost', help='mysql host')
parser.add_argument('--port', type=int, default=3306, help='mysql port')
parser.add_argument('--db_name', type=str, default='studip',
help='mysql database name')
parser.add_argument('--db_user', type=str, default='root',
help='mysql database user')
parser.add_argument('--db_passwd', type=str,
default='secret-pw', help='mysql database password')
parser.add_argument('-d', '--debug_output', action='store_true', parser.add_argument('-d', '--debug_output', action='store_true',
help='display debug information about the process') help='display debug information about the process')
parser.add_argument('-q', '--quiet', action='store_true', parser.add_argument('-q', '--quiet', action='store_true',
@ -53,8 +45,7 @@ BASE_DIR = os.path.abspath(args.output)
USERNAME = args.user USERNAME = args.user
PASSWORD = args.passwd PASSWORD = args.passwd
db = Database(args.host, args.port, args.db_name, db = Database(args.reset_dl_date)
args.db_user, args.db_passwd, args.reset_dl_date)
studip = Studip(args.chunk, args.url, (USERNAME, PASSWORD), db) studip = Studip(args.chunk, args.url, (USERNAME, PASSWORD), db)

@ -146,13 +146,10 @@ class Studip:
last_dl = self.db.get_last_file_dl(doc) last_dl = self.db.get_last_file_dl(doc)
if last_dl == None or last_dl < doc_chdate: if last_dl == None or last_dl < doc_chdate:
rsp2 = self.auth_req('/api.php/file/' + doc + '/download') rsp2 = self.auth_req('/api.php/file/' + doc + '/download')
#total_size = int(rsp2.headers.get('content-length', 0))
log.info('downloading ' + doc_name) log.info('downloading ' + doc_name)
#progbar = tqdm(total=total_size, unit='iB', unit_scale=True)
try: try:
with open(doc_name, 'wb') as doc_file: with open(doc_name, 'wb') as doc_file:
for chunk in rsp2.iter_content(self.CHUNK_SIZE): for chunk in rsp2.iter_content(self.CHUNK_SIZE):
#progbar.update(len(chunk))
doc_file.write(chunk) doc_file.write(chunk)
self.db.set_last_file_dl(str(doc), str(int(time.time()))) self.db.set_last_file_dl(str(doc), str(int(time.time())))
except OSError: except OSError:
@ -172,7 +169,7 @@ class Studip:
try: try:
subdirs = rsp.json()['subfolders'] subdirs = rsp.json()['subfolders']
except ValueError: except ValueError:
return res_docs return res_subdirs
for subdir in subdirs: for subdir in subdirs:
try: try:
sub_id = subdir['id'] sub_id = subdir['id']

Loading…
Cancel
Save