feat: integrate with docling (#471) bump:patch

* feat: add docling reader implementation

* feat: expose docling to UI

* fix: improve docling output parsing

* docs: update README

---------

Co-authored-by: Tadashi <tadashi@cinnamon.is>
This commit is contained in:
Quang (Albert)
2024-11-16 10:04:57 +07:00
committed by GitHub
parent 5b828c213c
commit 56c40f1c05
7 changed files with 271 additions and 13 deletions

View File

@@ -39,6 +39,7 @@ from kotaemon.indices.ingests.files import (
KH_DEFAULT_FILE_EXTRACTORS,
adobe_reader,
azure_reader,
docling_reader,
unstructured,
web_reader,
)
@@ -673,6 +674,8 @@ class IndexDocumentPipeline(BaseFileIndexIndexing):
readers[".pdf"] = adobe_reader
elif self.reader_mode == "azure-di":
readers[".pdf"] = azure_reader
elif self.reader_mode == "docling":
readers[".pdf"] = docling_reader
dev_readers, _, _ = dev_settings()
readers.update(dev_readers)
@@ -692,6 +695,7 @@ class IndexDocumentPipeline(BaseFileIndexIndexing):
"Azure AI Document Intelligence (figure+table extraction)",
"azure-di",
),
("Docling", "docling"),
],
"component": "dropdown",
},