kotaemon/knowledgehub/loaders/base.py
Tuan Anh Nguyen Dang (Tadashi_Cin) 21350153d4 [AUR-391, AUR-393] Add Document and DocumentReader base (#6)
* Declare BaseComponent

* Brainstorming base class for LLM call

* Define base LLM

* Add tests

* Clean telemetry environment for accurate testing

* Fix README

* Fix typing

* add base document reader

* update test

* update requirements

* Cosmetic change

* update requirements

* reformat

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
2023-08-31 11:24:12 +07:00

27 lines
941 B
Python

from pathlib import Path
from typing import Any, List, Type, Union
from llama_index import download_loader
from llama_index.readers.base import BaseReader
from ..documents.base import Document
class AutoReader(BaseReader):
"""General auto reader for a variety of files. (based on llama-hub)"""
def __init__(self, reader_type: Union[str, Type[BaseReader]]) -> None:
"""Init reader using string identifier or class name from llama-hub"""
if isinstance(reader_type, str):
self._reader = download_loader(reader_type)()
else:
self._reader = reader_type()
def load_data(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:
documents = self._reader.load_data(file=file, **kwargs)
# convert Document to new base class from kotaemon
converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]
return converted_documents