import glob import locale import os import pathlib import urllib.parse from datetime import datetime from os import path import markdown from bs4 import BeautifulSoup from gtts import gTTS, gTTSError import config import search WEBSITE = config.WEBSITE ENTRY_DIR = config.ENTRY_DIR LANGUAGE = config.LANGUAGE LOCAL = "de_DE.UTF-8" if LANGUAGE == "de-de" else "en_US.UTF-8" TIMEZONE = config.TIMEZONE locale.setlocale(locale.LC_TIME, LOCAL) def gen_arch_string(): """ Creates and returns a archive string of every file in ENTRY_DIR. Returns: string: html-formatted archive-string """ path_ex = ENTRY_DIR if path.exists(path_ex): name_list = os.listdir(path_ex) full_list = [os.path.join(path_ex, i) for i in name_list] contents = sorted(full_list, key=os.path.getmtime) content_string = "" last_month = "" for file in reversed(contents): curr_date = datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d") curr_month = datetime.fromtimestamp(os.path.getmtime(file)).strftime("%B %Y") if curr_month != last_month: if last_month != "": content_string += "\n" content_string += "

" + curr_month + "

\n" content_string += "

" content_string += "" + curr_date + " - " content_string += "" + title + "" content_string += "
" content_string += "

\n" return content_string def gen_index_string(): """ Create and returns a string including every file in the ENTRY_DIR as an index. Returns: string: html-formatted index string """ path_ex = ENTRY_DIR content_string = "" if path.exists(path_ex): name_list = os.listdir(path_ex) full_list = [os.path.join(path_ex, i) for i in name_list] contents = sorted(full_list, key=os.path.getmtime) for file in reversed(contents): filename = pathlib.PurePath(file) # purefile = filename title = open(filename).readline().rstrip("\n") text = open(filename).readlines()[1:] filename = filename.name if filename[0] != ".": filename = filename.split(".", 1)[0] content_string += "

\n" content_string += "

" content_string += "" + \ title + "" +"

\n" content_string += "" + \ datetime.fromtimestamp(os.path.getmtime( file)).strftime("%Y-%m-%d") + "

" if file.endswith(".html"): for line in text: content_string += line if file.endswith(".md"): content_string += gen_md_content(file, 2) content_string += "

" content_string = absolutize_html(content_string) return content_string def absolutize_html(string): """ Creates a html string from another string that only uses absolute links that use the full domain. Parameters: string: html-formatted string. Returns: string: html-formatted string with absolute linksn """ soup = BeautifulSoup(string, "html.parser") for a_tag in soup.find_all("a"): href = str(a_tag.get("href")) if href.startswith("/") or href.startswith("."): a_tag["href"] = urllib.parse.urljoin(WEBSITE, href) for img_tag in soup.find_all("img"): src = str(img_tag.get("src")) if src.startswith("/") or src.startswith("."): img_tag["src"] = urllib.parse.urljoin(WEBSITE, src) return str(soup) def gen_stand_string(path_ex): """ Creates a html-string for a file. If the file is markdown it will convert it. This functions ensures upscaling for future formats. Parameters: path_ex: path to a file. Returns: string: html-formatted string string equivalent to the file """ filename = os.path.join(ENTRY_DIR, path_ex) content_string = "" if path.exists(filename): title = open(filename).readline().rstrip("\n") text = open(filename).readlines()[1:] curr_date = datetime.fromtimestamp(os.path.getmtime(filename)).strftime("%Y-%m-%d") filename_no_end = filename.split(".", 1)[0] filename_no_end = filename_no_end.split("/")[-1] content_string += "

" + title + "

\n" content_string += "" + curr_date + "" content_string += "

\n" if os.path.isfile("static/tmp/" + filename_no_end + ".mp3"): content_string += "

\n" content_string += "

\n" if filename.endswith(".html"): for line in text: content_string += line if filename.endswith(".md"): content_string += gen_md_content(filename, 1) content_string = absolutize_html(content_string) return content_string def gen_md_content(path_ex, depth): """ Convert a markdown file to a html string. Parameters: path_ex (string): path to the markdown file depth (int): starting depth for markdown headings Returns: string: html-formatted string string equivalent to the markdown file """ content_string = "" if path.exists(path_ex): header = "#" for _ in range(depth): header += "#" header += " " markdown_lines = open(path_ex, "r").readlines()[1:] markdown_text = "" for line in markdown_lines: markdown_text += line.replace("# ", header) content_string = markdown.markdown(markdown_text, extensions=["fenced_code", "tables"]) return content_string def get_rss_string(): """ Create a rss-string of the blog and return it. Returns: string: rss-string of everything that is in the ENTRY_DIR. """ path_ex = ENTRY_DIR content_string = "" if path.exists(path_ex): name_list = os.listdir(path_ex) full_list = [os.path.join(path_ex, i) for i in name_list] contents = sorted(full_list, key=os.path.getmtime) for file in reversed(contents): filename = pathlib.PurePath(file) title = open(filename).readline().rstrip("\n") text = open(filename).readlines()[1:] filename = filename.name if filename[0] != ".": filename = filename.split(".", 1)[0] content_string += "\n" content_string += "" + title + "\n" content_string += "" + WEBSITE + \ "/index.html#" + filename + "\n" locale.setlocale(locale.LC_TIME, "en_US.UTF-8") content_string += "" + \ datetime.fromtimestamp(os.path.getmtime(file)).strftime( "%a, %d %b %Y %H:%M:%S") + " " + TIMEZONE + "\n" locale.setlocale(locale.LC_TIME, LOCAL) content_string += "\n\n\n\n\n" html_string = "" for line in text: html_string += line content_string += absolutize_html(html_string) content_string += "\n\n]]>\n\n" content_string += "\n" return content_string def gen_query_res_string(query_str): """ Return the results of a query. Parameters: query_str (string): term to search Returns: string: html-formated search result """ src_results = search.search(query_str) res_string = "" for result in src_results: title = result["title"] path = result["path"] filename = pathlib.PurePath(path) filename = filename.name if filename[0] != ".": filename = filename.split(".", 1)[0] curr_date = datetime.fromtimestamp(os.path.getmtime(path)).strftime("%Y-%m-%d") is_markdown = path.endswith(".md") preview = create_preview(path, is_markdown) path = "/entry/" + path.split("/", 2)[2] res_string += "

" res_string += "

" + title + "

" res_string += "" res_string += "" + curr_date + "" res_string += "

" res_string += preview + "

" return res_string def create_preview(path, is_markdown): """ Create a preview of a given article and return it. Parameters: path (string): path to the article Returns: string: html-formated preview """ file = open(path, "r", encoding="utf-8") lines = file.read() if is_markdown: lines += markdown.markdown(lines) preview = "" first_p = BeautifulSoup(lines).find('p') if first_p is not None: preview = "\n

" + first_p.text + "

\n" preview += "...
" return preview def get_text_only(filename): """ Convert a file to text only to use in tts Parameters: path (string): path to the article Returns: string: unformatted string containing the contents of the file """ # filename = os.path.join(ENTRY_DIR, path) clean_text = "" if path.exists(filename): title = open(filename).readline().rstrip("\n") text = open(filename).readlines()[1:] filename_no_end = filename.split(".", 1)[0] filename_no_end = filename_no_end.split("/")[-1] content_string = "" if filename.endswith(".html"): for line in text: content_string += line if filename.endswith(".md"): content_string += gen_md_content(filename, 1) content_string = absolutize_html(content_string) soup = BeautifulSoup(content_string, "html.parser") tag_to_remove = soup.find("figure") if tag_to_remove: tag_to_remove.decompose() clean_text = soup.get_text(separator=" ") clean_text = title + "\n\n" + clean_text return clean_text def prepare_tts(): files = glob.glob('static/tmp/*') for f in files: os.remove(f) files = glob.glob('templates/entry/*') clean_text = "" for f in files: clean_text = get_text_only(f) _, tail = os.path.split(f) new_filename = "static/tmp/" + os.path.splitext(tail)[0] + ".mp3" try: tts = gTTS(clean_text, lang=LANGUAGE.split("-")[0]) tts.save(new_filename) except gTTSError as e: print("Too many request to the google servers. Try it again later.") os.remove(new_filename) return e