Simple file-based wiki with fulltext-search.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.1 KiB

  1. import config
  2. import os
  3. import sys
  4. from whoosh import scoring
  5. from whoosh.index import create_in, open_dir
  6. from whoosh.fields import Schema, TEXT, ID
  7. from whoosh.qparser import QueryParser
  8. INDEX_DIR = "indexdir"
  9. DEF_TOPN = 10
  10. ENTRY_DIR = config.ENTRY_DIR
  11. def createSearchableData(root):
  12. """
  13. Schema definition: title(name of file), path(as ID), content(indexed but not stored), textdata (stored text content)
  14. source:
  15. https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-pythhon-library/
  16. """
  17. schema = Schema(title=TEXT(stored=True),
  18. path=ID(stored=True), content=TEXT)
  19. if not os.path.exists(INDEX_DIR):
  20. os.mkdir(INDEX_DIR)
  21. ix = create_in(INDEX_DIR, schema)
  22. writer = ix.writer()
  23. for r, d, f in os.walk(root):
  24. for file in f:
  25. path = os.path.join(r, file)
  26. fp = open(path,encoding='utf-8')
  27. title = fp.readline()
  28. text = title + fp.read()
  29. writer.add_document(title=title, path=path, content=text)
  30. fp.close()
  31. writer.commit()
  32. def search_times(query_str, topN):
  33. """
  34. Search for a given term and returns a specific amount of results.
  35. Parameters:
  36. query_str (string): term to search for
  37. topN (int): number of results to return
  38. Returns:
  39. string: html-formatted string including the hits of the search
  40. """
  41. ix = open_dir(INDEX_DIR)
  42. results = []
  43. with ix.searcher(weighting=scoring.BM25F) as s:
  44. query = QueryParser("content", ix.schema).parse(query_str)
  45. matches = s.search(query, limit=topN)
  46. for match in matches:
  47. results.append(
  48. {'title': match['title'], 'path': match['path'], 'match': match.score})
  49. return results
  50. def search(query_str):
  51. """
  52. Search for a given term and show the predefined amount of results.
  53. Parameters:
  54. query_str (string): term to search for
  55. Returns:
  56. string: html-formatted string including the hits of the search
  57. """
  58. return search_times(query_str, DEF_TOPN)
  59. createSearchableData(ENTRY_DIR)