Simple file-based wiki with fulltext-search.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
1.6 KiB

  1. import config
  2. import os
  3. import sys
  4. from whoosh import scoring
  5. from whoosh.index import create_in, open_dir
  6. from whoosh.fields import Schema, TEXT, ID
  7. from whoosh.qparser import QueryParser
  8. INDEX_DIR = "indexdir"
  9. DEF_TOPN = 10
  10. ENTRY_DIR = config.ENTRY_DIR
  11. def createSearchableData(root):
  12. '''
  13. Schema definition: title(name of file), path(as ID), content(indexed but not stored), textdata (stored text content)
  14. source:
  15. https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-pythhon-library/
  16. '''
  17. schema = Schema(title=TEXT(stored=True),
  18. path=ID(stored=True), content=TEXT)
  19. if not os.path.exists(INDEX_DIR):
  20. os.mkdir(INDEX_DIR)
  21. ix = create_in(INDEX_DIR, schema)
  22. writer = ix.writer()
  23. for r, d, f in os.walk(root):
  24. for file in f:
  25. path = os.path.join(r, file)
  26. fp = open(path)
  27. title = fp.readline()
  28. text = title + fp.read()
  29. writer.add_document(title=title, path=path, content=text)
  30. fp.close()
  31. writer.commit()
  32. def search_times(query_str, topN):
  33. ix = open_dir(INDEX_DIR)
  34. results = []
  35. with ix.searcher(weighting=scoring.BM25F) as s:
  36. query = QueryParser("content", ix.schema).parse(query_str)
  37. matches = s.search(query, limit=topN)
  38. for match in matches:
  39. results.append(
  40. {'title': match['title'], 'path': match['path'], 'match': match.score})
  41. return results
  42. def search(query_str):
  43. return search_times(query_str, DEF_TOPN)
  44. createSearchableData(ENTRY_DIR)