* enforce Document as IO * Separate rerankers, splitters and extractors (#85) * partially refractor importing * add text to embedding outputs --------- Co-authored-by: Nguyen Trung Duc (john) <trungduc1992@gmail.com>
38 lines
1.0 KiB
Python
38 lines
1.0 KiB
Python
from abc import abstractmethod
|
|
|
|
from theflow.base import Function
|
|
|
|
from kotaemon.base.schema import Document
|
|
|
|
|
|
class BaseComponent(Function):
|
|
"""A component is a class that can be used to compose a pipeline
|
|
|
|
Benefits of component:
|
|
- Auto caching, logging
|
|
- Allow deployment
|
|
|
|
For each component, the spirit is:
|
|
- Tolerate multiple input types, e.g. str, Document, List[str], List[Document]
|
|
- Enforce single output type. Hence, the output type of a component should be
|
|
as generic as possible.
|
|
"""
|
|
|
|
inflow = None
|
|
|
|
def flow(self):
|
|
if self.inflow is None:
|
|
raise ValueError("No inflow provided.")
|
|
|
|
if not isinstance(self.inflow, BaseComponent):
|
|
raise ValueError(
|
|
f"inflow must be a BaseComponent, found {type(self.inflow)}"
|
|
)
|
|
|
|
return self.__call__(self.inflow.flow())
|
|
|
|
@abstractmethod
|
|
def run(self, *args, **kwargs) -> Document | list[Document] | None:
|
|
"""Run the component."""
|
|
...
|