From 8f6096617dbf42fa6a0cc2e94df7e0b7c486491b Mon Sep 17 00:00:00 2001 From: Ian Griffin Date: Wed, 12 Nov 2025 18:16:46 +0800 Subject: [PATCH] replaced parse_markdown with a separate library --- ragger.py | 67 ++----------------------------------------------------- 1 file changed, 2 insertions(+), 65 deletions(-) diff --git a/ragger.py b/ragger.py index 731ef6c..d572684 100755 --- a/ragger.py +++ b/ragger.py @@ -8,7 +8,6 @@ import os import mimetypes -import re import readline from argparse import ArgumentParser @@ -34,7 +33,7 @@ from langgraph.checkpoint.memory import MemorySaver from langgraph.graph import START, StateGraph from langgraph.graph.message import add_messages from sys import stderr -from termcolor import colored +from term_color_md import render as md_render from typing import NotRequired, Sequence from typing_extensions import Annotated, TypedDict from urllib.parse import urlparse @@ -152,7 +151,6 @@ def main(): ] ) - history_aware_retriever = create_history_aware_retriever( llm, retriever, contextualize_q_prompt ) @@ -226,7 +224,7 @@ def main(): print() break - print(colored("A:", "green", attrs=["reverse"]), parse_markdown(app.invoke({"input": question}, + print(colored("A:", "green", attrs=["reverse"]), md_render(app.invoke({"input": question}, config=config)["answer"]), end="\n\n") # We define a dict representing the state of the application. @@ -237,66 +235,5 @@ class State(TypedDict): context: NotRequired[str] answer: NotRequired[str] -def parse_markdown(text): - lines = text.splitlines() - formatted_text = "" - in_code_block = False - - for line in lines: - # Check for code blocks - if line.startswith("```"): - in_code_block = not in_code_block - continue # Skip the line with ``` - elif in_code_block: - formatted_text += colored(line + "\n", "green") - continue - - # Check for headers - if line.startswith("# "): - header_text = line.lstrip("#").strip() - formatted_text += colored(header_text, "blue", attrs=["bold", "underline"]) + "\n" - continue - - if line.startswith("## "): - header_text = line.lstrip("#").strip() - formatted_text += colored(header_text, "blue", attrs=["bold"]) + "\n" - continue - - if line.startswith("### "): - header_text = line.lstrip("#").strip() - formatted_text += colored(header_text, "cyan", attrs=["bold"]) + "\n" - continue - - # Check for blockquotes - if line.startswith(">"): - quote_text = line.lstrip(">").strip() - formatted_text += colored(quote_text, "yellow") + "\n" - continue - - # Check for tables (rows separated by "|") - if "|" in line: - table_row = "\t| ".join(line.split("|")).strip() - formatted_text += table_row + "\n" - continue - - # Inline formatting for bold, italic, and code (keeping the symbols) - # Bold (**text** or __text__) - line = re.sub(r"[^\*_](\*\*|__)(.+?)(\*\*|__)[^\*_]", lambda m: colored(m.group(), attrs=["bold"]), line) - # Italic (*text* or _text_) - line = re.sub(r"[^\*_](\*|_)([^\*_].+?[^\*_])(\*|_)[^\*_]", lambda m: colored(m.group(), attrs=["underline"]), line) - # Inline code (`code`) - line = re.sub(r"[^\*_](`)(.+?)`[^\*_]", lambda m: colored(m.group() + "`", "green"), line) - - # List items (bullets and numbers) - # Bulleted list - line = re.sub(r"^(\s*[-*])\s", lambda m: colored(m.group(1), "cyan") + " ", line) - # Numbered list - line = re.sub(r"^(\s*\d+\.)\s", lambda m: colored(m.group(1), "cyan") + " ", line) - - # Add processed line to formatted text - formatted_text += line + "\n" - - return formatted_text - if __name__ == "__main__": main()