From 6a6b41250eb2903af75be6934a08bbebcfb9321b Mon Sep 17 00:00:00 2001 From: Ian Griffin Date: Mon, 28 Oct 2024 14:24:09 +0800 Subject: [PATCH] multi types of text support --- ragger.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ragger.py b/ragger.py index e976cf3..823ce77 100755 --- a/ragger.py +++ b/ragger.py @@ -7,7 +7,6 @@ # You should have received a copy of the GNU General Public License along with this program. If not, see . import os -import marko import mimetypes import re @@ -59,7 +58,7 @@ def main(): # loaders = { - "text/plain": lambda file: TextLoader(file).load(), + "text": lambda file: TextLoader(file).load(), "application/pdf": lambda file: PyPDFLoader(file).load(), "url": lambda file: WebBaseLoader(file).load(), } @@ -81,6 +80,9 @@ def main(): # detect filetype else: mimetype, _ = mimetypes.guess_type(path) + if mimetype.startswith("text/"): + mimetype = "text" + if mimetype not in loaders: raise ValueError("Unsupported file type: %s" % mimetype) else: