1
0
mirror of https://github.com/tiyn/amphora-wiki.git synced 2025-04-19 15:27:46 +02:00

adding fulltextsearch with searchpage

This commit is contained in:
TiynGER 2020-06-01 20:51:37 +02:00
parent 51682d00f0
commit 3c49ea6687
17 changed files with 233 additions and 20 deletions

View File

@ -7,16 +7,22 @@ I however just want to put my markdown files in a directory and get a working wi
## Features/To-Dos ## Features/To-Dos
- [ ] Plain text support for blog entries - [x] Plain text support for blog entries
- [ ] Markdown Files (.md) - [x] Markdown Files (.md)
- [ ] Entry page - [x] Entry page
- [ ] Option to get plain text file
- [x] Search page
- [x] Full-text search
- [ ] Show first few lines of each match (description)
- [ ] Navigation - [ ] Navigation
- [ ] Header - [ ] More advanced namespaces
- [ ] Footer - [x] Header
- [ ] Switchable CSS - [ ] Search bar in header
- [ ] CSS dark-theme - [x] Footer
- [ ] CSS light-theme - [x] Switchable CSS
- [ ] Config file - [x] CSS dark-theme
- [x] CSS light-theme
- [x] Config file
- [ ] Docker installation - [ ] Docker installation
- [ ] Enable variables/environment variables - [ ] Enable variables/environment variables
- [ ] Logo - [ ] Logo

1
src/.gitignore vendored
View File

@ -1 +1,2 @@
__pycache__/ __pycache__/
indexdir/

View File

@ -1,21 +1,46 @@
import os
from flask import Flask, flash, make_response, render_template, request, redirect, abort from flask import Flask, flash, make_response, render_template, request, redirect, abort
import content as con_gen import content as con_gen
import config import config
import search as src
from forms import SearchForm, csrf
app = Flask(__name__) app = Flask(__name__)
SECRET_KEY = os.urandom(32)
app.secret_key = SECRET_KEY
csrf.init_app(app)
TITLE = config.TITLE
STYLE = config.STYLE
@app.errorhandler(404) @app.errorhandler(404)
def page_not_found(e): def page_not_found(e):
return render_template('error.html', title=config.TITLE, errorcode='404', style=config.STYLE), 404 return render_template('error.html', title=TITLE, errorcode='404', style=STYLE), 404
@app.route('/') @app.route('/')
@app.route('/index.html') @app.route('/index.html')
def index(): def index():
return 'ok' return 'placeholder for index', 200
@app.route('/search', methods=['GET', 'POST'])
def search():
form = SearchForm()
if request.method == 'POST':
query_str = request.form['query_str']
content = con_gen.gen_query_res_string(query_str)
return render_template('search.html', title=TITLE, style=STYLE, form=form, content=content), 200
return render_template('search.html', title=TITLE, style=STYLE, form=form, content=''), 200
@app.route('/entry/<path:fullurl>')
def entry(fullurl):
content = con_gen.gen_stand_string(fullurl)
return render_template('entry.html', title=TITLE, style=STYLE, content=content), 200
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -6,3 +6,8 @@ WEBSITE = 'localhost:5000'
# Theme for the blog: dark, light # Theme for the blog: dark, light
STYLE = 'dark' STYLE = 'dark'
###############################################
## NO CHANGES RECOMMENDED ##
###############################################
ENTRY_DIR = 'templates/entry'

View File

@ -1,10 +1,58 @@
import config
import search
import datetime import datetime
from datetime import datetime from datetime import datetime
import markdown
import os import os
from os import path from os import path
import pathlib import pathlib
import config import markdown
ENTRY_DIR = 'templates/entry' ENTRY_DIR = config.ENTRY_DIR
WEBSITE = config.WEBSITE
def gen_stand_string(path_ex):
filename = os.path.join(ENTRY_DIR, path_ex)
content_string = ''
if path.exists(filename):
title = open(filename).readline().rstrip('\n')
text = open(filename).readlines()[1:]
filename_no_end = filename.split('.', 1)[0]
content_string += '<h1>' + title + '</h1>\n'
if filename.endswith('.md'):
content_string += gen_md_content(filename, 1)
return content_string
def gen_md_content(path_ex, depth):
content_string = ''
if path.exists(path_ex):
filename = path_ex.split('.', 1)
fileend = filename[len(filename) - 1]
header = '#'
for i in range(depth):
header += '#'
header += ' '
markdown_lines = open(path_ex, "r").readlines()[1:]
markdown_text = ''
for line in markdown_lines:
markdown_text += line.replace('# ', header)
content_string = markdown.markdown(
markdown_text, extensions=["fenced_code", "tables"]
)
return content_string
def gen_query_res_string(query_str):
src_results = search.search(query_str)
res_string = ''
res_string += '<ul>\n'
for result in src_results:
title = result['title']
path = result['path']
path = '/entry/' + path.split('/', 2)[2]
res_string += '<li><a href="' + path + '">' + title + '</a></li>'
res_string += '</ul>\n'
return res_string

11
src/forms.py Normal file
View File

@ -0,0 +1,11 @@
from flask_wtf import FlaskForm
from flask_wtf import CSRFProtect
from wtforms import TextField, SubmitField, ValidationError, validators
csrf = CSRFProtect()
class SearchForm(FlaskForm):
query_str = TextField(
"Query", [validators.Required("Please enter the search term")])
submit = SubmitField("Send")

View File

@ -1,2 +1,5 @@
Flask==1.1.2
Markdown==3.1.1 Markdown==3.1.1
WTForms==2.2.1
Flask==1.1.2
Flask_WTF==0.14.3
Whoosh==2.7.4

55
src/search.py Normal file
View File

@ -0,0 +1,55 @@
import config
import os
import sys
from whoosh import scoring
from whoosh.index import create_in, open_dir
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser
INDEX_DIR = "indexdir"
DEF_TOPN = 10
ENTRY_DIR = config.ENTRY_DIR
def createSearchableData(root):
'''
Schema definition: title(name of file), path(as ID), content(indexed but not stored), textdata (stored text content)
source:
https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-pythhon-library/
'''
schema = Schema(title=TEXT(stored=True),
path=ID(stored=True), content=TEXT)
if not os.path.exists(INDEX_DIR):
os.mkdir(INDEX_DIR)
ix = create_in(INDEX_DIR, schema)
writer = ix.writer()
for r, d, f in os.walk(root):
for file in f:
path = os.path.join(r, file)
fp = open(path)
title = fp.readline()
text = title + fp.read()
writer.add_document(title=title, path=path, content=text)
fp.close()
writer.commit()
def search_times(query_str, topN):
ix = open_dir(INDEX_DIR)
results = []
with ix.searcher(weighting=scoring.BM25F) as s:
query = QueryParser("content", ix.schema).parse(query_str)
matches = s.search(query, limit=topN)
for match in matches:
results.append(
{'title': match['title'], 'path': match['path'], 'match': match.score})
return results
def search(query_str):
return search_times(query_str, DEF_TOPN)
createSearchableData(ENTRY_DIR)

10
src/templates/entry.html Normal file
View File

@ -0,0 +1,10 @@
{% extends "template.html" %}
{% block content %}
<div class="container">
<div class="content">
{% autoescape off %}
<span>{{ content }}</span>
{% endautoescape %}
</div>
</div>
{% endblock %}

View File

@ -0,0 +1,11 @@
Test Entry Title 4
This is a markdown file
[link to entry3](../test-entry3.md)
- list entry
- list entry
- list entry
# md-header
more content

View File

@ -0,0 +1,10 @@
Test Entry Title 5
This is a markdown file
- list entry
- list entry
- list entry
# md-header
more content

View File

@ -0,0 +1,10 @@
Test Entry Title 6
This is a markdown file
- list entry
- list entry
- list entry
# md-header
more content

View File

@ -1,5 +1,6 @@
Test Entry Title 3 Test Entry Title 1
This is a markdown file This is a markdown file
This text contains a one.
- list entry - list entry
- list entry - list entry

View File

@ -1,5 +1,6 @@
Test Entry Title 3 Test Entry Title 2
This is a markdown file This is a markdown file
Two Two
- list entry - list entry
- list entry - list entry

View File

@ -1,4 +1,4 @@
Test Entry Title 3 Test Entry Title 3 Three
This is a markdown file This is a markdown file
- list entry - list entry

15
src/templates/search.html Normal file
View File

@ -0,0 +1,15 @@
{% extends "template.html" %}
{% block content %}
<div class="container">
<div class="content">
<form action="{{ url_for('search') }}" method=post>
{{ form.hidden_tag() }}
{{ form.query_str }}
{{ form.submit }}
</form>
{% autoescape off %}
<span>{{ content }}</span>
{% endautoescape %}
</div>
</div>
{% endblock %}

View File

@ -14,6 +14,7 @@
<label for="main-menu-check" class="show-menu">&#9776;</label> <label for="main-menu-check" class="show-menu">&#9776;</label>
<div class="main-menu"> <div class="main-menu">
<a href="/">Startpage</a> <a href="/">Startpage</a>
<a href="/search">Search</a>
<label for="main-menu-check" class="hide-menu">X</label> <label for="main-menu-check" class="hide-menu">X</label>
</div> </div>
</div> </div>
@ -24,7 +25,7 @@
<!-- Content --> <!-- Content -->
<footer> <footer>
<div class="center"> <div class="center">
Dieser Blog enthält kein Javascript oder PHP.<br> Dieses Wiki enthält kein Javascript oder PHP.<br>
Dies ist eine Instanz vom <a href="https://github.com/tiyn/tiyny-wiki">Tiyny-Wiki</a>. Dies ist eine Instanz vom <a href="https://github.com/tiyn/tiyny-wiki">Tiyny-Wiki</a>.
</div> </div>
</footer> </footer>