From 3c49ea668706a6378dba8cb0eff090a9ed41edc6 Mon Sep 17 00:00:00 2001 From: TiynGER Date: Mon, 1 Jun 2020 20:51:37 +0200 Subject: [PATCH] adding fulltextsearch with searchpage --- README.md | 24 +++++---- src/.gitignore | 1 + src/app.py | 31 +++++++++-- src/config.py | 5 ++ src/content.py | 54 +++++++++++++++++-- src/forms.py | 11 ++++ src/requirements.txt | 5 +- src/search.py | 55 ++++++++++++++++++++ src/templates/entry.html | 10 ++++ src/templates/entry/namespace/test-entry4.md | 11 ++++ src/templates/entry/namespace/test-entry5.md | 10 ++++ src/templates/entry/namespace/test-entry6.md | 10 ++++ src/templates/entry/test-entry1.md | 3 +- src/templates/entry/test-entry2.md | 3 +- src/templates/entry/test-entry3.md | 2 +- src/templates/search.html | 15 ++++++ src/templates/template.html | 3 +- 17 files changed, 233 insertions(+), 20 deletions(-) create mode 100644 src/forms.py create mode 100644 src/search.py create mode 100644 src/templates/entry.html create mode 100644 src/templates/entry/namespace/test-entry4.md create mode 100644 src/templates/entry/namespace/test-entry5.md create mode 100644 src/templates/entry/namespace/test-entry6.md create mode 100644 src/templates/search.html diff --git a/README.md b/README.md index 1af9de6..9f1971f 100644 --- a/README.md +++ b/README.md @@ -7,16 +7,22 @@ I however just want to put my markdown files in a directory and get a working wi ## Features/To-Dos -- [ ] Plain text support for blog entries - - [ ] Markdown Files (.md) -- [ ] Entry page +- [x] Plain text support for blog entries + - [x] Markdown Files (.md) +- [x] Entry page + - [ ] Option to get plain text file +- [x] Search page + - [x] Full-text search + - [ ] Show first few lines of each match (description) - [ ] Navigation - - [ ] Header - - [ ] Footer -- [ ] Switchable CSS - - [ ] CSS dark-theme - - [ ] CSS light-theme -- [ ] Config file + - [ ] More advanced namespaces + - [x] Header + - [ ] Search bar in header + - [x] Footer +- [x] Switchable CSS + - [x] CSS dark-theme + - [x] CSS light-theme +- [x] Config file - [ ] Docker installation - [ ] Enable variables/environment variables - [ ] Logo diff --git a/src/.gitignore b/src/.gitignore index c18dd8d..5235e9d 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1 +1,2 @@ __pycache__/ +indexdir/ diff --git a/src/app.py b/src/app.py index 5a9538b..7132aa6 100644 --- a/src/app.py +++ b/src/app.py @@ -1,21 +1,46 @@ +import os from flask import Flask, flash, make_response, render_template, request, redirect, abort import content as con_gen import config - +import search as src +from forms import SearchForm, csrf app = Flask(__name__) +SECRET_KEY = os.urandom(32) +app.secret_key = SECRET_KEY +csrf.init_app(app) + +TITLE = config.TITLE +STYLE = config.STYLE + @app.errorhandler(404) def page_not_found(e): - return render_template('error.html', title=config.TITLE, errorcode='404', style=config.STYLE), 404 + return render_template('error.html', title=TITLE, errorcode='404', style=STYLE), 404 @app.route('/') @app.route('/index.html') def index(): - return 'ok' + return 'placeholder for index', 200 + + +@app.route('/search', methods=['GET', 'POST']) +def search(): + form = SearchForm() + if request.method == 'POST': + query_str = request.form['query_str'] + content = con_gen.gen_query_res_string(query_str) + return render_template('search.html', title=TITLE, style=STYLE, form=form, content=content), 200 + return render_template('search.html', title=TITLE, style=STYLE, form=form, content=''), 200 + + +@app.route('/entry/') +def entry(fullurl): + content = con_gen.gen_stand_string(fullurl) + return render_template('entry.html', title=TITLE, style=STYLE, content=content), 200 if __name__ == '__main__': diff --git a/src/config.py b/src/config.py index 5f251a9..5e786dd 100644 --- a/src/config.py +++ b/src/config.py @@ -6,3 +6,8 @@ WEBSITE = 'localhost:5000' # Theme for the blog: dark, light STYLE = 'dark' + +############################################### +## NO CHANGES RECOMMENDED ## +############################################### +ENTRY_DIR = 'templates/entry' diff --git a/src/content.py b/src/content.py index b5a3320..d604c06 100644 --- a/src/content.py +++ b/src/content.py @@ -1,10 +1,58 @@ +import config +import search + import datetime from datetime import datetime -import markdown import os from os import path import pathlib -import config +import markdown + +ENTRY_DIR = config.ENTRY_DIR +WEBSITE = config.WEBSITE + + +def gen_stand_string(path_ex): + filename = os.path.join(ENTRY_DIR, path_ex) + content_string = '' + if path.exists(filename): + title = open(filename).readline().rstrip('\n') + text = open(filename).readlines()[1:] + filename_no_end = filename.split('.', 1)[0] + content_string += '

' + title + '

\n' + if filename.endswith('.md'): + content_string += gen_md_content(filename, 1) + return content_string + + +def gen_md_content(path_ex, depth): + content_string = '' + if path.exists(path_ex): + filename = path_ex.split('.', 1) + fileend = filename[len(filename) - 1] + header = '#' + for i in range(depth): + header += '#' + header += ' ' + markdown_lines = open(path_ex, "r").readlines()[1:] + markdown_text = '' + for line in markdown_lines: + markdown_text += line.replace('# ', header) + content_string = markdown.markdown( + markdown_text, extensions=["fenced_code", "tables"] + ) + return content_string + -ENTRY_DIR = 'templates/entry' +def gen_query_res_string(query_str): + src_results = search.search(query_str) + res_string = '' + res_string += '\n' + return res_string diff --git a/src/forms.py b/src/forms.py new file mode 100644 index 0000000..136de1c --- /dev/null +++ b/src/forms.py @@ -0,0 +1,11 @@ +from flask_wtf import FlaskForm +from flask_wtf import CSRFProtect +from wtforms import TextField, SubmitField, ValidationError, validators + +csrf = CSRFProtect() + + +class SearchForm(FlaskForm): + query_str = TextField( + "Query", [validators.Required("Please enter the search term")]) + submit = SubmitField("Send") diff --git a/src/requirements.txt b/src/requirements.txt index 5ab13d0..ccdd930 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,2 +1,5 @@ -Flask==1.1.2 Markdown==3.1.1 +WTForms==2.2.1 +Flask==1.1.2 +Flask_WTF==0.14.3 +Whoosh==2.7.4 diff --git a/src/search.py b/src/search.py new file mode 100644 index 0000000..d67c7c5 --- /dev/null +++ b/src/search.py @@ -0,0 +1,55 @@ +import config + +import os +import sys + +from whoosh import scoring +from whoosh.index import create_in, open_dir +from whoosh.fields import Schema, TEXT, ID +from whoosh.qparser import QueryParser + +INDEX_DIR = "indexdir" +DEF_TOPN = 10 +ENTRY_DIR = config.ENTRY_DIR + + +def createSearchableData(root): + ''' + Schema definition: title(name of file), path(as ID), content(indexed but not stored), textdata (stored text content) + source: + https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-pythhon-library/ + ''' + schema = Schema(title=TEXT(stored=True), + path=ID(stored=True), content=TEXT) + if not os.path.exists(INDEX_DIR): + os.mkdir(INDEX_DIR) + ix = create_in(INDEX_DIR, schema) + writer = ix.writer() + for r, d, f in os.walk(root): + for file in f: + path = os.path.join(r, file) + fp = open(path) + title = fp.readline() + text = title + fp.read() + writer.add_document(title=title, path=path, content=text) + fp.close() + writer.commit() + + +def search_times(query_str, topN): + ix = open_dir(INDEX_DIR) + results = [] + with ix.searcher(weighting=scoring.BM25F) as s: + query = QueryParser("content", ix.schema).parse(query_str) + matches = s.search(query, limit=topN) + for match in matches: + results.append( + {'title': match['title'], 'path': match['path'], 'match': match.score}) + return results + + +def search(query_str): + return search_times(query_str, DEF_TOPN) + + +createSearchableData(ENTRY_DIR) diff --git a/src/templates/entry.html b/src/templates/entry.html new file mode 100644 index 0000000..f839581 --- /dev/null +++ b/src/templates/entry.html @@ -0,0 +1,10 @@ +{% extends "template.html" %} +{% block content %} +
+
+ {% autoescape off %} + {{ content }} + {% endautoescape %} +
+
+{% endblock %} diff --git a/src/templates/entry/namespace/test-entry4.md b/src/templates/entry/namespace/test-entry4.md new file mode 100644 index 0000000..2eee83c --- /dev/null +++ b/src/templates/entry/namespace/test-entry4.md @@ -0,0 +1,11 @@ +Test Entry Title 4 +This is a markdown file +[link to entry3](../test-entry3.md) + +- list entry +- list entry +- list entry + +# md-header + +more content diff --git a/src/templates/entry/namespace/test-entry5.md b/src/templates/entry/namespace/test-entry5.md new file mode 100644 index 0000000..2b51448 --- /dev/null +++ b/src/templates/entry/namespace/test-entry5.md @@ -0,0 +1,10 @@ +Test Entry Title 5 +This is a markdown file + +- list entry +- list entry +- list entry + +# md-header + +more content diff --git a/src/templates/entry/namespace/test-entry6.md b/src/templates/entry/namespace/test-entry6.md new file mode 100644 index 0000000..038dd29 --- /dev/null +++ b/src/templates/entry/namespace/test-entry6.md @@ -0,0 +1,10 @@ +Test Entry Title 6 +This is a markdown file + +- list entry +- list entry +- list entry + +# md-header + +more content diff --git a/src/templates/entry/test-entry1.md b/src/templates/entry/test-entry1.md index 440dae5..78f24a9 100644 --- a/src/templates/entry/test-entry1.md +++ b/src/templates/entry/test-entry1.md @@ -1,5 +1,6 @@ -Test Entry Title 3 +Test Entry Title 1 This is a markdown file +This text contains a one. - list entry - list entry diff --git a/src/templates/entry/test-entry2.md b/src/templates/entry/test-entry2.md index 440dae5..5b1cb37 100644 --- a/src/templates/entry/test-entry2.md +++ b/src/templates/entry/test-entry2.md @@ -1,5 +1,6 @@ -Test Entry Title 3 +Test Entry Title 2 This is a markdown file +Two Two - list entry - list entry diff --git a/src/templates/entry/test-entry3.md b/src/templates/entry/test-entry3.md index 440dae5..f69e0bb 100644 --- a/src/templates/entry/test-entry3.md +++ b/src/templates/entry/test-entry3.md @@ -1,4 +1,4 @@ -Test Entry Title 3 +Test Entry Title 3 Three This is a markdown file - list entry diff --git a/src/templates/search.html b/src/templates/search.html new file mode 100644 index 0000000..cc558f5 --- /dev/null +++ b/src/templates/search.html @@ -0,0 +1,15 @@ +{% extends "template.html" %} +{% block content %} +
+
+
+ {{ form.hidden_tag() }} + {{ form.query_str }} + {{ form.submit }} +
+ {% autoescape off %} + {{ content }} + {% endautoescape %} +
+
+{% endblock %} diff --git a/src/templates/template.html b/src/templates/template.html index 982d79e..7943759 100644 --- a/src/templates/template.html +++ b/src/templates/template.html @@ -14,6 +14,7 @@ @@ -24,7 +25,7 @@