replaced parse_markdown with a separate library

This commit is contained in:
Ian Griffin 2025-11-12 18:16:46 +08:00
parent 91241a597b
commit 8f6096617d
1 changed files with 2 additions and 65 deletions

View File

@ -8,7 +8,6 @@
import os
import mimetypes
import re
import readline
from argparse import ArgumentParser
@ -34,7 +33,7 @@ from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from sys import stderr
from termcolor import colored
from term_color_md import render as md_render
from typing import NotRequired, Sequence
from typing_extensions import Annotated, TypedDict
from urllib.parse import urlparse
@ -152,7 +151,6 @@ def main():
]
)
history_aware_retriever = create_history_aware_retriever(
llm, retriever, contextualize_q_prompt
)
@ -226,7 +224,7 @@ def main():
print()
break
print(colored("A:", "green", attrs=["reverse"]), parse_markdown(app.invoke({"input": question},
print(colored("A:", "green", attrs=["reverse"]), md_render(app.invoke({"input": question},
config=config)["answer"]), end="\n\n")
# We define a dict representing the state of the application.
@ -237,66 +235,5 @@ class State(TypedDict):
context: NotRequired[str]
answer: NotRequired[str]
def parse_markdown(text):
lines = text.splitlines()
formatted_text = ""
in_code_block = False
for line in lines:
# Check for code blocks
if line.startswith("```"):
in_code_block = not in_code_block
continue # Skip the line with ```
elif in_code_block:
formatted_text += colored(line + "\n", "green")
continue
# Check for headers
if line.startswith("# "):
header_text = line.lstrip("#").strip()
formatted_text += colored(header_text, "blue", attrs=["bold", "underline"]) + "\n"
continue
if line.startswith("## "):
header_text = line.lstrip("#").strip()
formatted_text += colored(header_text, "blue", attrs=["bold"]) + "\n"
continue
if line.startswith("### "):
header_text = line.lstrip("#").strip()
formatted_text += colored(header_text, "cyan", attrs=["bold"]) + "\n"
continue
# Check for blockquotes
if line.startswith(">"):
quote_text = line.lstrip(">").strip()
formatted_text += colored(quote_text, "yellow") + "\n"
continue
# Check for tables (rows separated by "|")
if "|" in line:
table_row = "\t| ".join(line.split("|")).strip()
formatted_text += table_row + "\n"
continue
# Inline formatting for bold, italic, and code (keeping the symbols)
# Bold (**text** or __text__)
line = re.sub(r"[^\*_](\*\*|__)(.+?)(\*\*|__)[^\*_]", lambda m: colored(m.group(), attrs=["bold"]), line)
# Italic (*text* or _text_)
line = re.sub(r"[^\*_](\*|_)([^\*_].+?[^\*_])(\*|_)[^\*_]", lambda m: colored(m.group(), attrs=["underline"]), line)
# Inline code (`code`)
line = re.sub(r"[^\*_](`)(.+?)`[^\*_]", lambda m: colored(m.group() + "`", "green"), line)
# List items (bullets and numbers)
# Bulleted list
line = re.sub(r"^(\s*[-*])\s", lambda m: colored(m.group(1), "cyan") + " ", line)
# Numbered list
line = re.sub(r"^(\s*\d+\.)\s", lambda m: colored(m.group(1), "cyan") + " ", line)
# Add processed line to formatted text
formatted_text += line + "\n"
return formatted_text
if __name__ == "__main__":
main()