feat: add markdown file support (#202)

* feat: add support for .md

* fix: disable download all on private collection
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin) 2024-09-03 23:15:26 +07:00 committed by GitHub
parent 4f0785773d
commit 607867d7e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 8 additions and 2 deletions

View File

@ -222,7 +222,7 @@ KH_INDICES = [
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .zip"
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},
@ -233,7 +233,7 @@ KH_INDICES = [
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .zip"
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},

View File

@ -18,6 +18,7 @@ from kotaemon.loaders import (
OCRReader,
PandasExcelReader,
PDFThumbnailReader,
TxtReader,
UnstructuredReader,
)
@ -47,6 +48,8 @@ KH_DEFAULT_FILE_EXTRACTORS: dict[str, BaseReader] = {
".tiff": unstructured,
".tif": unstructured,
".pdf": PDFThumbnailReader(),
".txt": TxtReader(),
".md": TxtReader(),
}

View File

@ -353,6 +353,9 @@ class FileIndexPage(BasePage):
return not is_zipped_state, new_button
def download_all_files(self):
if self._index.config.get("private", False):
raise gr.Error("This feature is not available for private collection.")
zip_files = []
for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR):
zip_files.append(os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name))