kotaemon/knowledgehub/documents/base.py
Tuan Anh Nguyen Dang (Tadashi_Cin) 21350153d4 [AUR-391, AUR-393] Add Document and DocumentReader base (#6)
* Declare BaseComponent

* Brainstorming base class for LLM call

* Define base LLM

* Add tests

* Clean telemetry environment for accurate testing

* Fix README

* Fix typing

* add base document reader

* update test

* update requirements

* Cosmetic change

* update requirements

* reformat

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
2023-08-31 11:24:12 +07:00

23 lines
736 B
Python

from haystack.schema import Document as HaystackDocument
from llama_index.schema import Document as BaseDocument
SAMPLE_TEXT = "A sample Document from kotaemon"
class Document(BaseDocument):
"""Base document class, mostly inherited from Document class from llama-index"""
@classmethod
def example(cls) -> "Document":
document = Document(
text=SAMPLE_TEXT,
metadata={"filename": "README.md", "category": "codebase"},
)
return document
def to_haystack_format(self) -> HaystackDocument:
"""Convert struct to Haystack document format."""
metadata = self.metadata or {}
text = self.text
return HaystackDocument(content=text, meta=metadata)