replaced parse_markdown with a separate library
This commit is contained in:
parent
91241a597b
commit
8f6096617d
67
ragger.py
67
ragger.py
|
|
@ -8,7 +8,6 @@
|
|||
|
||||
import os
|
||||
import mimetypes
|
||||
import re
|
||||
import readline
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
|
@ -34,7 +33,7 @@ from langgraph.checkpoint.memory import MemorySaver
|
|||
from langgraph.graph import START, StateGraph
|
||||
from langgraph.graph.message import add_messages
|
||||
from sys import stderr
|
||||
from termcolor import colored
|
||||
from term_color_md import render as md_render
|
||||
from typing import NotRequired, Sequence
|
||||
from typing_extensions import Annotated, TypedDict
|
||||
from urllib.parse import urlparse
|
||||
|
|
@ -152,7 +151,6 @@ def main():
|
|||
]
|
||||
)
|
||||
|
||||
|
||||
history_aware_retriever = create_history_aware_retriever(
|
||||
llm, retriever, contextualize_q_prompt
|
||||
)
|
||||
|
|
@ -226,7 +224,7 @@ def main():
|
|||
print()
|
||||
break
|
||||
|
||||
print(colored("A:", "green", attrs=["reverse"]), parse_markdown(app.invoke({"input": question},
|
||||
print(colored("A:", "green", attrs=["reverse"]), md_render(app.invoke({"input": question},
|
||||
config=config)["answer"]), end="\n\n")
|
||||
|
||||
# We define a dict representing the state of the application.
|
||||
|
|
@ -237,66 +235,5 @@ class State(TypedDict):
|
|||
context: NotRequired[str]
|
||||
answer: NotRequired[str]
|
||||
|
||||
def parse_markdown(text):
|
||||
lines = text.splitlines()
|
||||
formatted_text = ""
|
||||
in_code_block = False
|
||||
|
||||
for line in lines:
|
||||
# Check for code blocks
|
||||
if line.startswith("```"):
|
||||
in_code_block = not in_code_block
|
||||
continue # Skip the line with ```
|
||||
elif in_code_block:
|
||||
formatted_text += colored(line + "\n", "green")
|
||||
continue
|
||||
|
||||
# Check for headers
|
||||
if line.startswith("# "):
|
||||
header_text = line.lstrip("#").strip()
|
||||
formatted_text += colored(header_text, "blue", attrs=["bold", "underline"]) + "\n"
|
||||
continue
|
||||
|
||||
if line.startswith("## "):
|
||||
header_text = line.lstrip("#").strip()
|
||||
formatted_text += colored(header_text, "blue", attrs=["bold"]) + "\n"
|
||||
continue
|
||||
|
||||
if line.startswith("### "):
|
||||
header_text = line.lstrip("#").strip()
|
||||
formatted_text += colored(header_text, "cyan", attrs=["bold"]) + "\n"
|
||||
continue
|
||||
|
||||
# Check for blockquotes
|
||||
if line.startswith(">"):
|
||||
quote_text = line.lstrip(">").strip()
|
||||
formatted_text += colored(quote_text, "yellow") + "\n"
|
||||
continue
|
||||
|
||||
# Check for tables (rows separated by "|")
|
||||
if "|" in line:
|
||||
table_row = "\t| ".join(line.split("|")).strip()
|
||||
formatted_text += table_row + "\n"
|
||||
continue
|
||||
|
||||
# Inline formatting for bold, italic, and code (keeping the symbols)
|
||||
# Bold (**text** or __text__)
|
||||
line = re.sub(r"[^\*_](\*\*|__)(.+?)(\*\*|__)[^\*_]", lambda m: colored(m.group(), attrs=["bold"]), line)
|
||||
# Italic (*text* or _text_)
|
||||
line = re.sub(r"[^\*_](\*|_)([^\*_].+?[^\*_])(\*|_)[^\*_]", lambda m: colored(m.group(), attrs=["underline"]), line)
|
||||
# Inline code (`code`)
|
||||
line = re.sub(r"[^\*_](`)(.+?)`[^\*_]", lambda m: colored(m.group() + "`", "green"), line)
|
||||
|
||||
# List items (bullets and numbers)
|
||||
# Bulleted list
|
||||
line = re.sub(r"^(\s*[-*])\s", lambda m: colored(m.group(1), "cyan") + " ", line)
|
||||
# Numbered list
|
||||
line = re.sub(r"^(\s*\d+\.)\s", lambda m: colored(m.group(1), "cyan") + " ", line)
|
||||
|
||||
# Add processed line to formatted text
|
||||
formatted_text += line + "\n"
|
||||
|
||||
return formatted_text
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
Loading…
Reference in New Issue