adding fulltextsearch with searchpage

pull/1/head
TiynGER 5 years ago
parent 51682d00f0
commit 3c49ea6687

@ -7,16 +7,22 @@ I however just want to put my markdown files in a directory and get a working wi
## Features/To-Dos
- [ ] Plain text support for blog entries
- [ ] Markdown Files (.md)
- [ ] Entry page
- [x] Plain text support for blog entries
- [x] Markdown Files (.md)
- [x] Entry page
- [ ] Option to get plain text file
- [x] Search page
- [x] Full-text search
- [ ] Show first few lines of each match (description)
- [ ] Navigation
- [ ] Header
- [ ] Footer
- [ ] Switchable CSS
- [ ] CSS dark-theme
- [ ] CSS light-theme
- [ ] Config file
- [ ] More advanced namespaces
- [x] Header
- [ ] Search bar in header
- [x] Footer
- [x] Switchable CSS
- [x] CSS dark-theme
- [x] CSS light-theme
- [x] Config file
- [ ] Docker installation
- [ ] Enable variables/environment variables
- [ ] Logo

1
src/.gitignore vendored

@ -1 +1,2 @@
__pycache__/
indexdir/

@ -1,21 +1,46 @@
import os
from flask import Flask, flash, make_response, render_template, request, redirect, abort
import content as con_gen
import config
import search as src
from forms import SearchForm, csrf
app = Flask(__name__)
SECRET_KEY = os.urandom(32)
app.secret_key = SECRET_KEY
csrf.init_app(app)
TITLE = config.TITLE
STYLE = config.STYLE
@app.errorhandler(404)
def page_not_found(e):
return render_template('error.html', title=config.TITLE, errorcode='404', style=config.STYLE), 404
return render_template('error.html', title=TITLE, errorcode='404', style=STYLE), 404
@app.route('/')
@app.route('/index.html')
def index():
return 'ok'
return 'placeholder for index', 200
@app.route('/search', methods=['GET', 'POST'])
def search():
form = SearchForm()
if request.method == 'POST':
query_str = request.form['query_str']
content = con_gen.gen_query_res_string(query_str)
return render_template('search.html', title=TITLE, style=STYLE, form=form, content=content), 200
return render_template('search.html', title=TITLE, style=STYLE, form=form, content=''), 200
@app.route('/entry/<path:fullurl>')
def entry(fullurl):
content = con_gen.gen_stand_string(fullurl)
return render_template('entry.html', title=TITLE, style=STYLE, content=content), 200
if __name__ == '__main__':

@ -6,3 +6,8 @@ WEBSITE = 'localhost:5000'
# Theme for the blog: dark, light
STYLE = 'dark'
###############################################
## NO CHANGES RECOMMENDED ##
###############################################
ENTRY_DIR = 'templates/entry'

@ -1,10 +1,58 @@
import config
import search
import datetime
from datetime import datetime
import markdown
import os
from os import path
import pathlib
import config
import markdown
ENTRY_DIR = config.ENTRY_DIR
WEBSITE = config.WEBSITE
def gen_stand_string(path_ex):
filename = os.path.join(ENTRY_DIR, path_ex)
content_string = ''
if path.exists(filename):
title = open(filename).readline().rstrip('\n')
text = open(filename).readlines()[1:]
filename_no_end = filename.split('.', 1)[0]
content_string += '<h1>' + title + '</h1>\n'
if filename.endswith('.md'):
content_string += gen_md_content(filename, 1)
return content_string
def gen_md_content(path_ex, depth):
content_string = ''
if path.exists(path_ex):
filename = path_ex.split('.', 1)
fileend = filename[len(filename) - 1]
header = '#'
for i in range(depth):
header += '#'
header += ' '
markdown_lines = open(path_ex, "r").readlines()[1:]
markdown_text = ''
for line in markdown_lines:
markdown_text += line.replace('# ', header)
content_string = markdown.markdown(
markdown_text, extensions=["fenced_code", "tables"]
)
return content_string
ENTRY_DIR = 'templates/entry'
def gen_query_res_string(query_str):
src_results = search.search(query_str)
res_string = ''
res_string += '<ul>\n'
for result in src_results:
title = result['title']
path = result['path']
path = '/entry/' + path.split('/', 2)[2]
res_string += '<li><a href="' + path + '">' + title + '</a></li>'
res_string += '</ul>\n'
return res_string

@ -0,0 +1,11 @@
from flask_wtf import FlaskForm
from flask_wtf import CSRFProtect
from wtforms import TextField, SubmitField, ValidationError, validators
csrf = CSRFProtect()
class SearchForm(FlaskForm):
query_str = TextField(
"Query", [validators.Required("Please enter the search term")])
submit = SubmitField("Send")

@ -1,2 +1,5 @@
Flask==1.1.2
Markdown==3.1.1
WTForms==2.2.1
Flask==1.1.2
Flask_WTF==0.14.3
Whoosh==2.7.4

@ -0,0 +1,55 @@
import config
import os
import sys
from whoosh import scoring
from whoosh.index import create_in, open_dir
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser
INDEX_DIR = "indexdir"
DEF_TOPN = 10
ENTRY_DIR = config.ENTRY_DIR
def createSearchableData(root):
'''
Schema definition: title(name of file), path(as ID), content(indexed but not stored), textdata (stored text content)
source:
https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-pythhon-library/
'''
schema = Schema(title=TEXT(stored=True),
path=ID(stored=True), content=TEXT)
if not os.path.exists(INDEX_DIR):
os.mkdir(INDEX_DIR)
ix = create_in(INDEX_DIR, schema)
writer = ix.writer()
for r, d, f in os.walk(root):
for file in f:
path = os.path.join(r, file)
fp = open(path)
title = fp.readline()
text = title + fp.read()
writer.add_document(title=title, path=path, content=text)
fp.close()
writer.commit()
def search_times(query_str, topN):
ix = open_dir(INDEX_DIR)
results = []
with ix.searcher(weighting=scoring.BM25F) as s:
query = QueryParser("content", ix.schema).parse(query_str)
matches = s.search(query, limit=topN)
for match in matches:
results.append(
{'title': match['title'], 'path': match['path'], 'match': match.score})
return results
def search(query_str):
return search_times(query_str, DEF_TOPN)
createSearchableData(ENTRY_DIR)

@ -0,0 +1,10 @@
{% extends "template.html" %}
{% block content %}
<div class="container">
<div class="content">
{% autoescape off %}
<span>{{ content }}</span>
{% endautoescape %}
</div>
</div>
{% endblock %}

@ -0,0 +1,11 @@
Test Entry Title 4
This is a markdown file
[link to entry3](../test-entry3.md)
- list entry
- list entry
- list entry
# md-header
more content

@ -0,0 +1,10 @@
Test Entry Title 5
This is a markdown file
- list entry
- list entry
- list entry
# md-header
more content

@ -0,0 +1,10 @@
Test Entry Title 6
This is a markdown file
- list entry
- list entry
- list entry
# md-header
more content

@ -1,5 +1,6 @@
Test Entry Title 3
Test Entry Title 1
This is a markdown file
This text contains a one.
- list entry
- list entry

@ -1,5 +1,6 @@
Test Entry Title 3
Test Entry Title 2
This is a markdown file
Two Two
- list entry
- list entry

@ -1,4 +1,4 @@
Test Entry Title 3
Test Entry Title 3 Three
This is a markdown file
- list entry

@ -0,0 +1,15 @@
{% extends "template.html" %}
{% block content %}
<div class="container">
<div class="content">
<form action="{{ url_for('search') }}" method=post>
{{ form.hidden_tag() }}
{{ form.query_str }}
{{ form.submit }}
</form>
{% autoescape off %}
<span>{{ content }}</span>
{% endautoescape %}
</div>
</div>
{% endblock %}

@ -14,6 +14,7 @@
<label for="main-menu-check" class="show-menu">&#9776;</label>
<div class="main-menu">
<a href="/">Startpage</a>
<a href="/search">Search</a>
<label for="main-menu-check" class="hide-menu">X</label>
</div>
</div>
@ -24,7 +25,7 @@
<!-- Content -->
<footer>
<div class="center">
Dieser Blog enthält kein Javascript oder PHP.<br>
Dieses Wiki enthält kein Javascript oder PHP.<br>
Dies ist eine Instanz vom <a href="https://github.com/tiyn/tiyny-wiki">Tiyny-Wiki</a>.
</div>
</footer>

Loading…
Cancel
Save