adding fulltextsearch with searchpage

pull/1/head
TiynGER 5 years ago
parent 51682d00f0
commit 3c49ea6687

@ -7,16 +7,22 @@ I however just want to put my markdown files in a directory and get a working wi
## Features/To-Dos ## Features/To-Dos
- [ ] Plain text support for blog entries - [x] Plain text support for blog entries
- [ ] Markdown Files (.md) - [x] Markdown Files (.md)
- [ ] Entry page - [x] Entry page
- [ ] Option to get plain text file
- [x] Search page
- [x] Full-text search
- [ ] Show first few lines of each match (description)
- [ ] Navigation - [ ] Navigation
- [ ] Header - [ ] More advanced namespaces
- [ ] Footer - [x] Header
- [ ] Switchable CSS - [ ] Search bar in header
- [ ] CSS dark-theme - [x] Footer
- [ ] CSS light-theme - [x] Switchable CSS
- [ ] Config file - [x] CSS dark-theme
- [x] CSS light-theme
- [x] Config file
- [ ] Docker installation - [ ] Docker installation
- [ ] Enable variables/environment variables - [ ] Enable variables/environment variables
- [ ] Logo - [ ] Logo

1
src/.gitignore vendored

@ -1 +1,2 @@
__pycache__/ __pycache__/
indexdir/

@ -1,21 +1,46 @@
import os
from flask import Flask, flash, make_response, render_template, request, redirect, abort from flask import Flask, flash, make_response, render_template, request, redirect, abort
import content as con_gen import content as con_gen
import config import config
import search as src
from forms import SearchForm, csrf
app = Flask(__name__) app = Flask(__name__)
SECRET_KEY = os.urandom(32)
app.secret_key = SECRET_KEY
csrf.init_app(app)
TITLE = config.TITLE
STYLE = config.STYLE
@app.errorhandler(404) @app.errorhandler(404)
def page_not_found(e): def page_not_found(e):
return render_template('error.html', title=config.TITLE, errorcode='404', style=config.STYLE), 404 return render_template('error.html', title=TITLE, errorcode='404', style=STYLE), 404
@app.route('/') @app.route('/')
@app.route('/index.html') @app.route('/index.html')
def index(): def index():
return 'ok' return 'placeholder for index', 200
@app.route('/search', methods=['GET', 'POST'])
def search():
form = SearchForm()
if request.method == 'POST':
query_str = request.form['query_str']
content = con_gen.gen_query_res_string(query_str)
return render_template('search.html', title=TITLE, style=STYLE, form=form, content=content), 200
return render_template('search.html', title=TITLE, style=STYLE, form=form, content=''), 200
@app.route('/entry/<path:fullurl>')
def entry(fullurl):
content = con_gen.gen_stand_string(fullurl)
return render_template('entry.html', title=TITLE, style=STYLE, content=content), 200
if __name__ == '__main__': if __name__ == '__main__':

@ -6,3 +6,8 @@ WEBSITE = 'localhost:5000'
# Theme for the blog: dark, light # Theme for the blog: dark, light
STYLE = 'dark' STYLE = 'dark'
###############################################
## NO CHANGES RECOMMENDED ##
###############################################
ENTRY_DIR = 'templates/entry'

@ -1,10 +1,58 @@
import config
import search
import datetime import datetime
from datetime import datetime from datetime import datetime
import markdown
import os import os
from os import path from os import path
import pathlib import pathlib
import config import markdown
ENTRY_DIR = config.ENTRY_DIR
WEBSITE = config.WEBSITE
def gen_stand_string(path_ex):
filename = os.path.join(ENTRY_DIR, path_ex)
content_string = ''
if path.exists(filename):
title = open(filename).readline().rstrip('\n')
text = open(filename).readlines()[1:]
filename_no_end = filename.split('.', 1)[0]
content_string += '<h1>' + title + '</h1>\n'
if filename.endswith('.md'):
content_string += gen_md_content(filename, 1)
return content_string
def gen_md_content(path_ex, depth):
content_string = ''
if path.exists(path_ex):
filename = path_ex.split('.', 1)
fileend = filename[len(filename) - 1]
header = '#'
for i in range(depth):
header += '#'
header += ' '
markdown_lines = open(path_ex, "r").readlines()[1:]
markdown_text = ''
for line in markdown_lines:
markdown_text += line.replace('# ', header)
content_string = markdown.markdown(
markdown_text, extensions=["fenced_code", "tables"]
)
return content_string
ENTRY_DIR = 'templates/entry' def gen_query_res_string(query_str):
src_results = search.search(query_str)
res_string = ''
res_string += '<ul>\n'
for result in src_results:
title = result['title']
path = result['path']
path = '/entry/' + path.split('/', 2)[2]
res_string += '<li><a href="' + path + '">' + title + '</a></li>'
res_string += '</ul>\n'
return res_string

@ -0,0 +1,11 @@
from flask_wtf import FlaskForm
from flask_wtf import CSRFProtect
from wtforms import TextField, SubmitField, ValidationError, validators
csrf = CSRFProtect()
class SearchForm(FlaskForm):
query_str = TextField(
"Query", [validators.Required("Please enter the search term")])
submit = SubmitField("Send")

@ -1,2 +1,5 @@
Flask==1.1.2
Markdown==3.1.1 Markdown==3.1.1
WTForms==2.2.1
Flask==1.1.2
Flask_WTF==0.14.3
Whoosh==2.7.4

@ -0,0 +1,55 @@
import config
import os
import sys
from whoosh import scoring
from whoosh.index import create_in, open_dir
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser
INDEX_DIR = "indexdir"
DEF_TOPN = 10
ENTRY_DIR = config.ENTRY_DIR
def createSearchableData(root):
'''
Schema definition: title(name of file), path(as ID), content(indexed but not stored), textdata (stored text content)
source:
https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-pythhon-library/
'''
schema = Schema(title=TEXT(stored=True),
path=ID(stored=True), content=TEXT)
if not os.path.exists(INDEX_DIR):
os.mkdir(INDEX_DIR)
ix = create_in(INDEX_DIR, schema)
writer = ix.writer()
for r, d, f in os.walk(root):
for file in f:
path = os.path.join(r, file)
fp = open(path)
title = fp.readline()
text = title + fp.read()
writer.add_document(title=title, path=path, content=text)
fp.close()
writer.commit()
def search_times(query_str, topN):
ix = open_dir(INDEX_DIR)
results = []
with ix.searcher(weighting=scoring.BM25F) as s:
query = QueryParser("content", ix.schema).parse(query_str)
matches = s.search(query, limit=topN)
for match in matches:
results.append(
{'title': match['title'], 'path': match['path'], 'match': match.score})
return results
def search(query_str):
return search_times(query_str, DEF_TOPN)
createSearchableData(ENTRY_DIR)

@ -0,0 +1,10 @@
{% extends "template.html" %}
{% block content %}
<div class="container">
<div class="content">
{% autoescape off %}
<span>{{ content }}</span>
{% endautoescape %}
</div>
</div>
{% endblock %}

@ -0,0 +1,11 @@
Test Entry Title 4
This is a markdown file
[link to entry3](../test-entry3.md)
- list entry
- list entry
- list entry
# md-header
more content

@ -0,0 +1,10 @@
Test Entry Title 5
This is a markdown file
- list entry
- list entry
- list entry
# md-header
more content

@ -0,0 +1,10 @@
Test Entry Title 6
This is a markdown file
- list entry
- list entry
- list entry
# md-header
more content

@ -1,5 +1,6 @@
Test Entry Title 3 Test Entry Title 1
This is a markdown file This is a markdown file
This text contains a one.
- list entry - list entry
- list entry - list entry

@ -1,5 +1,6 @@
Test Entry Title 3 Test Entry Title 2
This is a markdown file This is a markdown file
Two Two
- list entry - list entry
- list entry - list entry

@ -1,4 +1,4 @@
Test Entry Title 3 Test Entry Title 3 Three
This is a markdown file This is a markdown file
- list entry - list entry

@ -0,0 +1,15 @@
{% extends "template.html" %}
{% block content %}
<div class="container">
<div class="content">
<form action="{{ url_for('search') }}" method=post>
{{ form.hidden_tag() }}
{{ form.query_str }}
{{ form.submit }}
</form>
{% autoescape off %}
<span>{{ content }}</span>
{% endautoescape %}
</div>
</div>
{% endblock %}

@ -14,6 +14,7 @@
<label for="main-menu-check" class="show-menu">&#9776;</label> <label for="main-menu-check" class="show-menu">&#9776;</label>
<div class="main-menu"> <div class="main-menu">
<a href="/">Startpage</a> <a href="/">Startpage</a>
<a href="/search">Search</a>
<label for="main-menu-check" class="hide-menu">X</label> <label for="main-menu-check" class="hide-menu">X</label>
</div> </div>
</div> </div>
@ -24,7 +25,7 @@
<!-- Content --> <!-- Content -->
<footer> <footer>
<div class="center"> <div class="center">
Dieser Blog enthält kein Javascript oder PHP.<br> Dieses Wiki enthält kein Javascript oder PHP.<br>
Dies ist eine Instanz vom <a href="https://github.com/tiyn/tiyny-wiki">Tiyny-Wiki</a>. Dies ist eine Instanz vom <a href="https://github.com/tiyn/tiyny-wiki">Tiyny-Wiki</a>.
</div> </div>
</footer> </footer>

Loading…
Cancel
Save