mirror of
https://github.com/tiyn/stud.ip-crawler.git
synced 2025-04-01 15:37:47 +02:00
bugfixes: included error message for large files - skipping them
This commit is contained in:
parent
9e6355f7de
commit
3d8d71c3fd
33
Dockerfile
Normal file
33
Dockerfile
Normal file
@ -0,0 +1,33 @@
|
||||
FROM python
|
||||
|
||||
LABEL maintainer "TiynGER <mail@martenkante.eu>"
|
||||
|
||||
ENV USER admin
|
||||
|
||||
ENV PSWD admin
|
||||
|
||||
ENV URL admin
|
||||
|
||||
ENV HOST mysql
|
||||
|
||||
ENV INTERVAL 86400
|
||||
|
||||
ENV DB_USER root
|
||||
|
||||
ENV DB_PSWD root
|
||||
|
||||
ADD src /studip
|
||||
|
||||
WORKDIR /studip
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
ADD docker-entry.sh .
|
||||
|
||||
RUN chmod +x docker-entry.sh
|
||||
|
||||
VOLUME /studip/data
|
||||
|
||||
WORKDIR /studip
|
||||
|
||||
CMD ["./docker-entry.sh"]
|
@ -1,29 +1,31 @@
|
||||
version: "3.1"
|
||||
services:
|
||||
studip:
|
||||
image: tiynger/studip-crawler
|
||||
image: studip:latest
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
USER: 'user'
|
||||
PSWD: 'pswd'
|
||||
URL: 'https://url.tld'
|
||||
INTERVAL: 86400
|
||||
HOST: mysql
|
||||
HOST: 'mysql'
|
||||
DB_USER: root
|
||||
DB_PSWD: root
|
||||
DB_PSWD: 'pswddb'
|
||||
volumes:
|
||||
- studip_data:/studip/src/data
|
||||
- studip:/studip/data
|
||||
depends_on:
|
||||
- mysql
|
||||
|
||||
mysql:
|
||||
image: mysql
|
||||
image: mysql:5.6
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: root
|
||||
MYSQL_ROOT_PASSWORD: 'pswddb'
|
||||
volumes:
|
||||
- studip_db:/var/lib/mysql
|
||||
- mysql:/var/lib/mysql
|
||||
|
||||
volumes:
|
||||
studip_data:
|
||||
studip:
|
||||
driver: local
|
||||
studip_db:
|
||||
mysql:
|
||||
driver: local
|
3
docker-entry.sh
Executable file
3
docker-entry.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
while true; do python /studip/run.py -o /studip/data -u $USER -p $PSWD -s $URL --db_user $DB_USER --db_passwd $DB_PSWD --host $HOST && sleep $INTERVAL; done
|
@ -1,35 +0,0 @@
|
||||
FROM python
|
||||
|
||||
LABEL maintainer "TiynGER <mail@martenkante.eu>"
|
||||
|
||||
ENV USER admin
|
||||
|
||||
ENV PSWD admin
|
||||
|
||||
ENV URL admin
|
||||
|
||||
ENV HOST mysql
|
||||
|
||||
ENV INTERVAL 86400
|
||||
|
||||
ENV DB_USER root
|
||||
|
||||
ENV DB_PSWD root
|
||||
|
||||
RUN git clone https://github.com/tiyn/stud.ip-crawler /studip
|
||||
|
||||
WORKDIR /studip
|
||||
|
||||
RUN pip install -r src/requirements.txt
|
||||
|
||||
ADD run.sh .
|
||||
|
||||
RUN chmod +x run.sh
|
||||
|
||||
RUN chmod +x src/run.py
|
||||
|
||||
VOLUME /studip/src/data
|
||||
|
||||
WORKDIR /studip/src
|
||||
|
||||
CMD ["/studip/run.sh"]
|
@ -1,3 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
while true; do python /studip/src/run.py -o /studip/src/data -u $USER -p $PSWD -s $URL --db_user $DB_USER --db_passwd $DB_PSWD --host $HOST && sleep $INTERVAL; done
|
@ -150,7 +150,7 @@ class Studip:
|
||||
doc_file.write(chunk)
|
||||
self.db.set_last_file_dl(str(doc), str(int(time.time())))
|
||||
except OSError:
|
||||
log.critical("Error while writing to the file " + doc_name)
|
||||
log.warning("Error while writing to the file " + doc_name)
|
||||
|
||||
def get_subdirs(self, folder):
|
||||
"""Get all the subdirectories of a given folder.
|
||||
|
Loading…
x
Reference in New Issue
Block a user