diff --git a/libs/kotaemon/kotaemon/indices/ingests/files.py b/libs/kotaemon/kotaemon/indices/ingests/files.py
index 22e7db9..5800554 100644
--- a/libs/kotaemon/kotaemon/indices/ingests/files.py
+++ b/libs/kotaemon/kotaemon/indices/ingests/files.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+from typing import Type
 
 from llama_index.readers.base import BaseReader
 
@@ -14,6 +15,13 @@ from kotaemon.loaders import (
     UnstructuredReader,
 )
 
+KH_DEFAULT_FILE_EXTRACTORS: dict[str, Type[BaseReader]] = {
+    ".xlsx": PandasExcelReader,
+    ".docx": UnstructuredReader,
+    ".xls": UnstructuredReader,
+    ".doc": UnstructuredReader,
+}
+
 
 class DocumentIngestor(BaseComponent):
     """Ingest common office document types into Document for indexing
@@ -30,6 +38,8 @@ class DocumentIngestor(BaseComponent):
             - ocr: parse pdf image using flax
         doc_parsers: list of document parsers to parse the document
         text_splitter: splitter to split the document into text nodes
+        override_file_extractors: override file extractors for specific file extensions
+            The default file extractors are stored in `KH_DEFAULT_FILE_EXTRACTORS`
     """
 
     pdf_mode: str = "normal"  # "normal", "mathpix", "ocr"
@@ -38,26 +48,26 @@ class DocumentIngestor(BaseComponent):
         chunk_size=1024,
         chunk_overlap=256,
     )
+    override_file_extractors: dict[str, Type[BaseReader]] = {}
 
     def _get_reader(self, input_files: list[str | Path]):
         """Get appropriate readers for the input files based on file extension"""
-        file_extractor: dict[str, AutoReader | BaseReader] = {
-            ".xlsx": PandasExcelReader(),
-            ".docx": UnstructuredReader(),
-            ".xls": UnstructuredReader(),
-            ".doc": UnstructuredReader(),
+        file_extractors: dict[str, BaseReader] = {
+            ext: cls() for ext, cls in KH_DEFAULT_FILE_EXTRACTORS.items()
         }
+        for ext, cls in self.override_file_extractors.items():
+            file_extractors[ext] = cls()
 
         if self.pdf_mode == "normal":
-            file_extractor[".pdf"] = AutoReader("UnstructuredReader")
+            file_extractors[".pdf"] = AutoReader("UnstructuredReader")  # type: ignore
         elif self.pdf_mode == "ocr":
-            file_extractor[".pdf"] = OCRReader()
+            file_extractors[".pdf"] = OCRReader()
         else:
-            file_extractor[".pdf"] = MathpixPDFReader()
+            file_extractors[".pdf"] = MathpixPDFReader()
 
         main_reader = DirectoryReader(
             input_files=input_files,
-            file_extractor=file_extractor,
+            file_extractor=file_extractors,  # type: ignore
         )
 
         return main_reader
diff --git a/libs/kotaemon/kotaemon/loaders/__init__.py b/libs/kotaemon/kotaemon/loaders/__init__.py
index 369242e..d742b52 100644
--- a/libs/kotaemon/kotaemon/loaders/__init__.py
+++ b/libs/kotaemon/kotaemon/loaders/__init__.py
@@ -1,4 +1,5 @@
-from .base import AutoReader, DirectoryReader
+from .base import AutoReader, BaseReader
+from .composite_loader import DirectoryReader
 from .docx_loader import DocxReader
 from .excel_loader import PandasExcelReader
 from .html_loader import HtmlReader
@@ -8,6 +9,7 @@ from .unstructured_loader import UnstructuredReader
 
 __all__ = [
     "AutoReader",
+    "BaseReader",
     "PandasExcelReader",
     "MathpixPDFReader",
     "OCRReader",
diff --git a/libs/kotaemon/kotaemon/loaders/base.py b/libs/kotaemon/kotaemon/loaders/base.py
index cb92d5b..ca27e49 100644
--- a/libs/kotaemon/kotaemon/loaders/base.py
+++ b/libs/kotaemon/kotaemon/loaders/base.py
@@ -1,19 +1,25 @@
 from pathlib import Path
-from typing import Any, List, Type, Union
-
-from llama_index import SimpleDirectoryReader, download_loader
-from llama_index.readers.base import BaseReader
+from typing import TYPE_CHECKING, Any, List, Type, Union
 
 from kotaemon.base import BaseComponent, Document
 
+if TYPE_CHECKING:
+    from llama_index.readers.base import BaseReader as LIBaseReader
 
-class AutoReader(BaseComponent):
+
+class BaseReader(BaseComponent):
+    ...
+
+
+class AutoReader(BaseReader):
     """General auto reader for a variety of files. (based on llama-hub)"""
 
-    def __init__(self, reader_type: Union[str, Type[BaseReader]]) -> None:
+    def __init__(self, reader_type: Union[str, Type["LIBaseReader"]]) -> None:
         """Init reader using string identifier or class name from llama-hub"""
 
         if isinstance(reader_type, str):
+            from llama_index import download_loader
+
             self._reader = download_loader(reader_type)()
         else:
             self._reader = reader_type()
@@ -30,15 +36,30 @@ class AutoReader(BaseComponent):
         return self.load_data(file=file, **kwargs)
 
 
-class LIBaseReader(BaseComponent):
-    _reader_class: Type[BaseReader]
+class LIReaderMixin(BaseComponent):
+    """Base wrapper around llama-index reader
+
+    To use the LIBaseReader, you need to implement the _get_wrapped_class method to
+    return the relevant llama-index reader class that you want to wrap.
+
+    Example:
+
+        ```python
+        class DirectoryReader(LIBaseReader):
+            def _get_wrapped_class(self) -> Type["BaseReader"]:
+                from llama_index import SimpleDirectoryReader
+
+                return SimpleDirectoryReader
+        ```
+    """
+
+    def _get_wrapped_class(self) -> Type["LIBaseReader"]:
+        raise NotImplementedError(
+            "Please return the relevant Langchain class in in _get_lc_class"
+        )
 
     def __init__(self, *args, **kwargs):
-        if self._reader_class is None:
-            raise AttributeError(
-                "Require `_reader_class` to set a BaseReader class from LlamarIndex"
-            )
-
+        self._reader_class = self._get_wrapped_class()
         self._reader = self._reader_class(*args, **kwargs)
         super().__init__()
 
@@ -60,7 +81,3 @@ class LIBaseReader(BaseComponent):
 
     def run(self, *args, **kwargs: Any) -> List[Document]:
         return self.load_data(*args, **kwargs)
-
-
-class DirectoryReader(LIBaseReader):
-    _reader_class = SimpleDirectoryReader
diff --git a/libs/kotaemon/kotaemon/loaders/composite_loader.py b/libs/kotaemon/kotaemon/loaders/composite_loader.py
new file mode 100644
index 0000000..9d35e26
--- /dev/null
+++ b/libs/kotaemon/kotaemon/loaders/composite_loader.py
@@ -0,0 +1,53 @@
+from typing import Callable, List, Optional, Type
+
+from llama_index.readers.base import BaseReader as LIBaseReader
+
+from .base import BaseReader, LIReaderMixin
+
+
+class DirectoryReader(LIReaderMixin, BaseReader):
+    """Wrap around llama-index SimpleDirectoryReader
+
+    Args:
+        input_dir (str): Path to the directory.
+        input_files (List): List of file paths to read
+            (Optional; overrides input_dir, exclude)
+        exclude (List): glob of python file paths to exclude (Optional)
+        exclude_hidden (bool): Whether to exclude hidden files (dotfiles).
+        encoding (str): Encoding of the files.
+            Default is utf-8.
+        errors (str): how encoding and decoding errors are to be handled,
+              see https://docs.python.org/3/library/functions.html#open
+        recursive (bool): Whether to recursively search in subdirectories.
+            False by default.
+        filename_as_id (bool): Whether to use the filename as the document id.
+            False by default.
+        required_exts (Optional[List[str]]): List of required extensions.
+            Default is None.
+        file_extractor (Optional[Dict[str, BaseReader]]): A mapping of file
+            extension to a BaseReader class that specifies how to convert that file
+            to text. If not specified, use default from DEFAULT_FILE_READER_CLS.
+        num_files_limit (Optional[int]): Maximum number of files to read.
+            Default is None.
+        file_metadata (Optional[Callable[str, Dict]]): A function that takes
+            in a filename and returns a Dict of metadata for the Document.
+            Default is None.
+    """
+
+    input_dir: Optional[str] = None
+    input_files: Optional[List] = None
+    exclude: Optional[List] = None
+    exclude_hidden: bool = True
+    errors: str = "ignore"
+    recursive: bool = False
+    encoding: str = "utf-8"
+    filename_as_id: bool = False
+    required_exts: Optional[list[str]] = None
+    file_extractor: Optional[dict[str, "LIBaseReader"]] = None
+    num_files_limit: Optional[int] = None
+    file_metadata: Optional[Callable[[str], dict]] = None
+
+    def _get_wrapped_class(self) -> Type["LIBaseReader"]:
+        from llama_index import SimpleDirectoryReader
+
+        return SimpleDirectoryReader
diff --git a/libs/kotaemon/pyproject.toml b/libs/kotaemon/pyproject.toml
index 944e7e2..547ed12 100644
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@@ -11,14 +11,14 @@ packages.find.exclude = ["tests*", "env*"]
 # metadata and dependencies
 [project]
 name = "kotaemon"
-version = "0.3.6"
+version = "0.3.7"
 requires-python = ">= 3.10"
 description = "Kotaemon core library for AI development."
 dependencies = [
     "langchain",
     "langchain-community",
     "theflow",
-    "llama-index>=0.9.0",
+    "llama-index>=0.9.0,<0.10.0",
     "llama-hub",
     "gradio>=4.0.0",
     "openpyxl",
diff --git a/libs/kotaemon/tests/test_agent.py b/libs/kotaemon/tests/test_agent.py
index dad9a33..06a89c4 100644
--- a/libs/kotaemon/tests/test_agent.py
+++ b/libs/kotaemon/tests/test_agent.py
@@ -93,7 +93,7 @@ _openai_chat_completion_responses_react_langchain_tool = [
         (
             "I don't have prior knowledge about Cinnamon AI company, "
             "so I should gather information about it.\n"
-            "Action: Wikipedia\n"
+            "Action: wikipedia\n"
             "Action Input: Cinnamon AI company\n"
         ),
         (