mirror of
				https://github.com/tiyn/amphora-wiki.git
				synced 2025-10-25 17:01:16 +02:00 
			
		
		
		
	adding fulltextsearch with searchpage
This commit is contained in:
		
							
								
								
									
										24
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								README.md
									
									
									
									
									
								
							| @@ -7,16 +7,22 @@ I however just want to put my markdown files in a directory and get a working wi | |||||||
|  |  | ||||||
| ## Features/To-Dos | ## Features/To-Dos | ||||||
|  |  | ||||||
| - [ ] Plain text support for blog entries | - [x] Plain text support for blog entries | ||||||
|     - [ ] Markdown Files (.md) |     - [x] Markdown Files (.md) | ||||||
| - [ ] Entry page | - [x] Entry page | ||||||
|  |     - [ ] Option to get plain text file | ||||||
|  | - [x] Search page | ||||||
|  |     - [x] Full-text search | ||||||
|  |     - [ ] Show first few lines of each match (description) | ||||||
| - [ ] Navigation | - [ ] Navigation | ||||||
|     - [ ] Header |     - [ ] More advanced namespaces | ||||||
|     - [ ] Footer |     - [x] Header | ||||||
| - [ ] Switchable CSS |         - [ ] Search bar in header | ||||||
|     - [ ] CSS dark-theme |     - [x] Footer | ||||||
|     - [ ] CSS light-theme | - [x] Switchable CSS | ||||||
| - [ ] Config file |     - [x] CSS dark-theme | ||||||
|  |     - [x] CSS light-theme | ||||||
|  | - [x] Config file | ||||||
| - [ ] Docker installation | - [ ] Docker installation | ||||||
|     - [ ] Enable variables/environment variables |     - [ ] Enable variables/environment variables | ||||||
| - [ ] Logo | - [ ] Logo | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								src/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								src/.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1 +1,2 @@ | |||||||
| __pycache__/ | __pycache__/ | ||||||
|  | indexdir/ | ||||||
|   | |||||||
							
								
								
									
										31
									
								
								src/app.py
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								src/app.py
									
									
									
									
									
								
							| @@ -1,21 +1,46 @@ | |||||||
|  | import os | ||||||
| from flask import Flask, flash, make_response, render_template, request, redirect, abort | from flask import Flask, flash, make_response, render_template, request, redirect, abort | ||||||
|  |  | ||||||
| import content as con_gen | import content as con_gen | ||||||
| import config | import config | ||||||
|  | import search as src | ||||||
|  | from forms import SearchForm, csrf | ||||||
|  |  | ||||||
| app = Flask(__name__) | app = Flask(__name__) | ||||||
|  |  | ||||||
|  | SECRET_KEY = os.urandom(32) | ||||||
|  | app.secret_key = SECRET_KEY | ||||||
|  | csrf.init_app(app) | ||||||
|  |  | ||||||
|  | TITLE = config.TITLE | ||||||
|  | STYLE = config.STYLE | ||||||
|  |  | ||||||
|  |  | ||||||
| @app.errorhandler(404) | @app.errorhandler(404) | ||||||
| def page_not_found(e): | def page_not_found(e): | ||||||
|     return render_template('error.html', title=config.TITLE, errorcode='404', style=config.STYLE), 404 |     return render_template('error.html', title=TITLE, errorcode='404', style=STYLE), 404 | ||||||
|  |  | ||||||
|  |  | ||||||
| @app.route('/') | @app.route('/') | ||||||
| @app.route('/index.html') | @app.route('/index.html') | ||||||
| def index(): | def index(): | ||||||
|     return 'ok' |     return 'placeholder for index', 200 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @app.route('/search', methods=['GET', 'POST']) | ||||||
|  | def search(): | ||||||
|  |     form = SearchForm() | ||||||
|  |     if request.method == 'POST': | ||||||
|  |         query_str = request.form['query_str'] | ||||||
|  |         content = con_gen.gen_query_res_string(query_str) | ||||||
|  |         return render_template('search.html', title=TITLE, style=STYLE, form=form, content=content), 200 | ||||||
|  |     return render_template('search.html', title=TITLE, style=STYLE, form=form, content=''), 200 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @app.route('/entry/<path:fullurl>') | ||||||
|  | def entry(fullurl): | ||||||
|  |     content = con_gen.gen_stand_string(fullurl) | ||||||
|  |     return render_template('entry.html', title=TITLE, style=STYLE, content=content), 200 | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|   | |||||||
| @@ -6,3 +6,8 @@ WEBSITE = 'localhost:5000' | |||||||
|  |  | ||||||
| # Theme for the blog: dark, light | # Theme for the blog: dark, light | ||||||
| STYLE = 'dark' | STYLE = 'dark' | ||||||
|  |  | ||||||
|  | ############################################### | ||||||
|  | ## NO CHANGES RECOMMENDED                    ## | ||||||
|  | ############################################### | ||||||
|  | ENTRY_DIR = 'templates/entry' | ||||||
|   | |||||||
| @@ -1,10 +1,58 @@ | |||||||
|  | import config | ||||||
|  | import search | ||||||
|  |  | ||||||
| import datetime | import datetime | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| import markdown |  | ||||||
| import os | import os | ||||||
| from os import path | from os import path | ||||||
| import pathlib | import pathlib | ||||||
|  |  | ||||||
| import config | import markdown | ||||||
|  |  | ||||||
| ENTRY_DIR = 'templates/entry' | ENTRY_DIR = config.ENTRY_DIR | ||||||
|  | WEBSITE = config.WEBSITE | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def gen_stand_string(path_ex): | ||||||
|  |     filename = os.path.join(ENTRY_DIR, path_ex) | ||||||
|  |     content_string = '' | ||||||
|  |     if path.exists(filename): | ||||||
|  |         title = open(filename).readline().rstrip('\n') | ||||||
|  |         text = open(filename).readlines()[1:] | ||||||
|  |         filename_no_end = filename.split('.', 1)[0] | ||||||
|  |         content_string += '<h1>' + title + '</h1>\n' | ||||||
|  |         if filename.endswith('.md'): | ||||||
|  |             content_string += gen_md_content(filename, 1) | ||||||
|  |     return content_string | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def gen_md_content(path_ex, depth): | ||||||
|  |     content_string = '' | ||||||
|  |     if path.exists(path_ex): | ||||||
|  |         filename = path_ex.split('.', 1) | ||||||
|  |         fileend = filename[len(filename) - 1] | ||||||
|  |         header = '#' | ||||||
|  |         for i in range(depth): | ||||||
|  |             header += '#' | ||||||
|  |         header += ' ' | ||||||
|  |         markdown_lines = open(path_ex, "r").readlines()[1:] | ||||||
|  |         markdown_text = '' | ||||||
|  |         for line in markdown_lines: | ||||||
|  |             markdown_text += line.replace('# ', header) | ||||||
|  |         content_string = markdown.markdown( | ||||||
|  |             markdown_text, extensions=["fenced_code", "tables"] | ||||||
|  |         ) | ||||||
|  |     return content_string | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def gen_query_res_string(query_str): | ||||||
|  |     src_results = search.search(query_str) | ||||||
|  |     res_string = '' | ||||||
|  |     res_string += '<ul>\n' | ||||||
|  |     for result in src_results: | ||||||
|  |         title = result['title'] | ||||||
|  |         path = result['path'] | ||||||
|  |         path = '/entry/' + path.split('/', 2)[2] | ||||||
|  |         res_string += '<li><a href="' + path + '">' + title + '</a></li>' | ||||||
|  |     res_string += '</ul>\n' | ||||||
|  |     return res_string | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								src/forms.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								src/forms.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | |||||||
|  | from flask_wtf import FlaskForm | ||||||
|  | from flask_wtf import CSRFProtect | ||||||
|  | from wtforms import TextField, SubmitField, ValidationError, validators | ||||||
|  |  | ||||||
|  | csrf = CSRFProtect() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class SearchForm(FlaskForm): | ||||||
|  |     query_str = TextField( | ||||||
|  |         "Query", [validators.Required("Please enter the search term")]) | ||||||
|  |     submit = SubmitField("Send") | ||||||
| @@ -1,2 +1,5 @@ | |||||||
| Flask==1.1.2 |  | ||||||
| Markdown==3.1.1 | Markdown==3.1.1 | ||||||
|  | WTForms==2.2.1 | ||||||
|  | Flask==1.1.2 | ||||||
|  | Flask_WTF==0.14.3 | ||||||
|  | Whoosh==2.7.4 | ||||||
|   | |||||||
							
								
								
									
										55
									
								
								src/search.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								src/search.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | |||||||
|  | import config | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  |  | ||||||
|  | from whoosh import scoring | ||||||
|  | from whoosh.index import create_in, open_dir | ||||||
|  | from whoosh.fields import Schema, TEXT, ID | ||||||
|  | from whoosh.qparser import QueryParser | ||||||
|  |  | ||||||
|  | INDEX_DIR = "indexdir" | ||||||
|  | DEF_TOPN = 10 | ||||||
|  | ENTRY_DIR = config.ENTRY_DIR | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def createSearchableData(root): | ||||||
|  |     ''' | ||||||
|  |     Schema definition: title(name of file), path(as ID), content(indexed but not stored), textdata (stored text content) | ||||||
|  |     source: | ||||||
|  |     https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-pythhon-library/ | ||||||
|  |     ''' | ||||||
|  |     schema = Schema(title=TEXT(stored=True), | ||||||
|  |                     path=ID(stored=True), content=TEXT) | ||||||
|  |     if not os.path.exists(INDEX_DIR): | ||||||
|  |         os.mkdir(INDEX_DIR) | ||||||
|  |     ix = create_in(INDEX_DIR, schema) | ||||||
|  |     writer = ix.writer() | ||||||
|  |     for r, d, f in os.walk(root): | ||||||
|  |         for file in f: | ||||||
|  |             path = os.path.join(r, file) | ||||||
|  |             fp = open(path) | ||||||
|  |             title = fp.readline() | ||||||
|  |             text = title + fp.read() | ||||||
|  |             writer.add_document(title=title, path=path, content=text) | ||||||
|  |             fp.close() | ||||||
|  |     writer.commit() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def search_times(query_str, topN): | ||||||
|  |     ix = open_dir(INDEX_DIR) | ||||||
|  |     results = [] | ||||||
|  |     with ix.searcher(weighting=scoring.BM25F) as s: | ||||||
|  |         query = QueryParser("content", ix.schema).parse(query_str) | ||||||
|  |         matches = s.search(query, limit=topN) | ||||||
|  |         for match in matches: | ||||||
|  |             results.append( | ||||||
|  |                 {'title': match['title'], 'path': match['path'], 'match': match.score}) | ||||||
|  |     return results | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def search(query_str): | ||||||
|  |     return search_times(query_str, DEF_TOPN) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | createSearchableData(ENTRY_DIR) | ||||||
							
								
								
									
										10
									
								
								src/templates/entry.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								src/templates/entry.html
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | {% extends "template.html" %} | ||||||
|  | {% block content %} | ||||||
|  | <div class="container"> | ||||||
|  |     <div class="content"> | ||||||
|  |         {% autoescape off %} | ||||||
|  | 	<span>{{ content }}</span> | ||||||
|  |         {% endautoescape %} | ||||||
|  |     </div> | ||||||
|  | </div> | ||||||
|  | {% endblock %} | ||||||
							
								
								
									
										11
									
								
								src/templates/entry/namespace/test-entry4.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								src/templates/entry/namespace/test-entry4.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | |||||||
|  | Test Entry Title 4 | ||||||
|  | This is a markdown file | ||||||
|  | [link to entry3](../test-entry3.md) | ||||||
|  |  | ||||||
|  | - list entry | ||||||
|  | - list entry | ||||||
|  | - list entry | ||||||
|  |  | ||||||
|  | # md-header | ||||||
|  |  | ||||||
|  | more content | ||||||
							
								
								
									
										10
									
								
								src/templates/entry/namespace/test-entry5.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								src/templates/entry/namespace/test-entry5.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | Test Entry Title 5 | ||||||
|  | This is a markdown file | ||||||
|  |  | ||||||
|  | - list entry | ||||||
|  | - list entry | ||||||
|  | - list entry | ||||||
|  |  | ||||||
|  | # md-header | ||||||
|  |  | ||||||
|  | more content | ||||||
							
								
								
									
										10
									
								
								src/templates/entry/namespace/test-entry6.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								src/templates/entry/namespace/test-entry6.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | Test Entry Title 6 | ||||||
|  | This is a markdown file | ||||||
|  |  | ||||||
|  | - list entry | ||||||
|  | - list entry | ||||||
|  | - list entry | ||||||
|  |  | ||||||
|  | # md-header | ||||||
|  |  | ||||||
|  | more content | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| Test Entry Title 3 | Test Entry Title 1 | ||||||
| This is a markdown file | This is a markdown file | ||||||
|  | This text contains a one. | ||||||
|  |  | ||||||
| - list entry | - list entry | ||||||
| - list entry | - list entry | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| Test Entry Title 3 | Test Entry Title 2 | ||||||
| This is a markdown file | This is a markdown file | ||||||
|  | Two Two | ||||||
|  |  | ||||||
| - list entry | - list entry | ||||||
| - list entry | - list entry | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| Test Entry Title 3 | Test Entry Title 3 Three | ||||||
| This is a markdown file | This is a markdown file | ||||||
|  |  | ||||||
| - list entry | - list entry | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								src/templates/search.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								src/templates/search.html
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | |||||||
|  | {% extends "template.html" %} | ||||||
|  | {% block content %} | ||||||
|  | <div class="container"> | ||||||
|  |     <div class="content"> | ||||||
|  |         <form action="{{ url_for('search') }}" method=post> | ||||||
|  |             {{ form.hidden_tag() }} | ||||||
|  |             {{ form.query_str }} | ||||||
|  |             {{ form.submit }} | ||||||
|  |         </form> | ||||||
|  |         {% autoescape off %} | ||||||
|  | 	<span>{{ content }}</span> | ||||||
|  |         {% endautoescape %} | ||||||
|  |     </div> | ||||||
|  | </div> | ||||||
|  | {% endblock %} | ||||||
| @@ -14,6 +14,7 @@ | |||||||
|         <label for="main-menu-check" class="show-menu">☰</label> |         <label for="main-menu-check" class="show-menu">☰</label> | ||||||
|         <div class="main-menu"> |         <div class="main-menu"> | ||||||
|             <a href="/">Startpage</a> |             <a href="/">Startpage</a> | ||||||
|  |             <a href="/search">Search</a> | ||||||
|             <label for="main-menu-check" class="hide-menu">X</label> |             <label for="main-menu-check" class="hide-menu">X</label> | ||||||
|         </div> |         </div> | ||||||
|     </div> |     </div> | ||||||
| @@ -24,7 +25,7 @@ | |||||||
|     <!-- Content --> |     <!-- Content --> | ||||||
|     <footer> |     <footer> | ||||||
|         <div class="center"> |         <div class="center"> | ||||||
|             Dieser Blog enthält kein Javascript oder PHP.<br> |             Dieses Wiki enthält kein Javascript oder PHP.<br> | ||||||
|             Dies ist eine Instanz vom <a href="https://github.com/tiyn/tiyny-wiki">Tiyny-Wiki</a>. |             Dies ist eine Instanz vom <a href="https://github.com/tiyn/tiyny-wiki">Tiyny-Wiki</a>. | ||||||
|         </div> |         </div> | ||||||
|     </footer> |     </footer> | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user