diff --git a/knowledgehub/__init__.py b/knowledgehub/__init__.py index c88c13c..48255eb 100644 --- a/knowledgehub/__init__.py +++ b/knowledgehub/__init__.py @@ -22,4 +22,4 @@ try: except ImportError: pass -__version__ = "0.3.1" +__version__ = "0.3.2" diff --git a/knowledgehub/base/component.py b/knowledgehub/base/component.py index 25505a7..132d597 100644 --- a/knowledgehub/base/component.py +++ b/knowledgehub/base/component.py @@ -1,9 +1,9 @@ from abc import abstractmethod -from theflow.base import Compose +from theflow.base import Function -class BaseComponent(Compose): +class BaseComponent(Function): """A component is a class that can be used to compose a pipeline Benefits of component: diff --git a/knowledgehub/chatbot/base.py b/knowledgehub/chatbot/base.py index ac01675..846f26d 100644 --- a/knowledgehub/chatbot/base.py +++ b/knowledgehub/chatbot/base.py @@ -2,7 +2,7 @@ from abc import abstractmethod from typing import List, Optional from langchain.schema.messages import AIMessage, SystemMessage -from theflow import Param, SessionCompose +from theflow import SessionFunction from ..base import BaseComponent from ..base.schema import LLMInterface @@ -20,7 +20,7 @@ def session_chat_storage(obj): return obj._store_result -class ChatConversation(SessionCompose): +class ChatConversation(SessionFunction): """Base implementation of a chat bot component A chatbot component should: @@ -31,7 +31,7 @@ class ChatConversation(SessionCompose): class Config: store_result = session_chat_storage - system_message: Param[str] = Param(default="") + system_message: str = "" bot: BaseChatBot def __init__(self, *args, **kwargs): diff --git a/knowledgehub/chatbot/simple_respondent.py b/knowledgehub/chatbot/simple_respondent.py index 6da25e3..6a14a63 100644 --- a/knowledgehub/chatbot/simple_respondent.py +++ b/knowledgehub/chatbot/simple_respondent.py @@ -1,5 +1,3 @@ -from theflow import Node - from ..llms import ChatLLM from .base import BaseChatBot @@ -7,7 +5,7 @@ from .base import BaseChatBot class SimpleRespondentChatbot(BaseChatBot): """Simple text respondent chatbot that essentially wraps around a chat LLM""" - llm: Node[ChatLLM] + llm: ChatLLM def _get_message(self) -> str: return self.llm(self.history).text diff --git a/knowledgehub/contribs/docs.py b/knowledgehub/contribs/docs.py index 59dff61..0b3e275 100644 --- a/knowledgehub/contribs/docs.py +++ b/knowledgehub/contribs/docs.py @@ -1,7 +1,7 @@ import inspect from collections import defaultdict -from theflow.utils.documentation import get_compose_documentation_from_module +from theflow.utils.documentation import get_function_documentation_from_module def from_definition_to_markdown(definition: dict) -> str: @@ -38,7 +38,7 @@ def from_definition_to_markdown(definition: dict) -> str: def make_doc(module: str, output: str, separation_level: int): - """Run exporting from compose to markdown + """Run exporting components to markdown Args: module (str): module name @@ -46,7 +46,7 @@ def make_doc(module: str, output: str, separation_level: int): separation_level (int): level of separation """ documentation = sorted( - get_compose_documentation_from_module(module).items(), key=lambda x: x[0] + get_function_documentation_from_module(module).items(), key=lambda x: x[0] ) entries = defaultdict(list) diff --git a/knowledgehub/pipelines/cot.py b/knowledgehub/pipelines/cot.py index e6100b7..4ef4416 100644 --- a/knowledgehub/pipelines/cot.py +++ b/knowledgehub/pipelines/cot.py @@ -1,10 +1,10 @@ from copy import deepcopy -from typing import List +from typing import Callable, List -from theflow import Compose, Node, Param +from theflow import Function, Node, Param from kotaemon.base import BaseComponent -from kotaemon.llms import BasePromptComponent +from kotaemon.llms import LLM, BasePromptComponent from kotaemon.llms.chats.openai import AzureChatOpenAI @@ -64,15 +64,13 @@ class Thought(BaseComponent): is created. """ - prompt: Param[str] = Param( + prompt: str = Param( help="The prompt template string. This prompt template has Python-like " "variable placeholders, that then will be subsituted with real values when " "this component is executed" ) - llm: Node[BaseComponent] = Node( - AzureChatOpenAI, help="The LLM model to execute the input prompt" - ) - post_process: Node[Compose] = Node( + llm: LLM = Node(AzureChatOpenAI, help="The LLM model to execute the input prompt") + post_process: Function = Node( help="The function post-processor that post-processes LLM output prediction ." "It should take a string as input (this is the LLM output text) and return " "a dictionary, where the key should" @@ -139,11 +137,11 @@ class ManualSequentialChainOfThought(BaseComponent): returns False. """ - thoughts: Param[List[Thought]] = Param( + thoughts: List[Thought] = Param( default_callback=lambda *_: [], help="List of Thought" ) - llm: Param = Param(help="The LLM model to use (base of kotaemon.llms.LLM)") - terminate: Param = Param( + llm: LLM = Param(help="The LLM model to use (base of kotaemon.llms.LLM)") + terminate: Callable = Param( default=lambda _: False, help="Callback on terminate condition. Default to always return False", ) diff --git a/knowledgehub/pipelines/indexing.py b/knowledgehub/pipelines/indexing.py index 97e75a6..52db865 100644 --- a/knowledgehub/pipelines/indexing.py +++ b/knowledgehub/pipelines/indexing.py @@ -2,8 +2,7 @@ from __future__ import annotations import uuid from pathlib import Path - -from theflow import Node, Param +from typing import cast from ..base import BaseComponent, Document from ..embeddings import BaseEmbeddings @@ -22,9 +21,9 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent): - List of texts """ - vector_store: Param[BaseVectorStore] = Param() - doc_store: Param[BaseDocumentStore] = Param() - embedding: Node[BaseEmbeddings] = Node() + vector_store: BaseVectorStore + doc_store: BaseDocumentStore + embedding: BaseEmbeddings # TODO: refer to llama_index's storage as well def run(self, text: str | list[str] | Document | list[Document]) -> None: @@ -32,7 +31,7 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent): if not isinstance(text, list): text = [text] - for item in text: + for item in cast(list, text): if isinstance(item, str): input_.append(Document(text=item, id_=str(uuid.uuid4()))) elif isinstance(item, Document): diff --git a/knowledgehub/pipelines/retrieving.py b/knowledgehub/pipelines/retrieving.py index 67bf26b..cc69924 100644 --- a/knowledgehub/pipelines/retrieving.py +++ b/knowledgehub/pipelines/retrieving.py @@ -3,8 +3,6 @@ from __future__ import annotations from pathlib import Path from typing import Optional, Sequence -from theflow import Node, Param - from ..base import BaseComponent from ..base.schema import Document, RetrievedDocument from ..embeddings import BaseEmbeddings @@ -18,9 +16,9 @@ DOC_STORE_FNAME = "docstore" class RetrieveDocumentFromVectorStorePipeline(BaseComponent): """Retrieve list of documents from vector store""" - vector_store: Param[BaseVectorStore] = Param() - doc_store: Param[BaseDocumentStore] = Param() - embedding: Node[BaseEmbeddings] = Node() + vector_store: BaseVectorStore + doc_store: BaseDocumentStore + embedding: BaseEmbeddings rerankers: Sequence[BaseRerankingPipeline] = [] top_k: int = 1 # TODO: refer to llama_index's storage as well diff --git a/knowledgehub/pipelines/tools/base.py b/knowledgehub/pipelines/tools/base.py index 5a55912..b06f2c4 100644 --- a/knowledgehub/pipelines/tools/base.py +++ b/knowledgehub/pipelines/tools/base.py @@ -1,4 +1,3 @@ -from abc import abstractmethod from typing import Any, Callable, Dict, Optional, Tuple, Type, Union from langchain.agents import Tool as LCTool @@ -51,13 +50,13 @@ class BaseTool(BaseComponent): return {k: v for k, v in result.dict().items() if k in tool_input} return tool_input - @abstractmethod def _run_tool( self, *args: Any, **kwargs: Any, ) -> Any: """Call tool.""" + raise NotImplementedError(f"_run_tool is not implemented for {self.name}") def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]: # For backwards compatibility, if run_input is a string, diff --git a/setup.py b/setup.py index f1f9a4b..a60a1ff 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ setuptools.setup( install_requires=[ "langchain", "theflow", - "llama-index", + "llama-index>=0.9.0", "llama-hub", "gradio", "openpyxl", diff --git a/tests/simple_pipeline.py b/tests/simple_pipeline.py index 295eead..7a0ed20 100644 --- a/tests/simple_pipeline.py +++ b/tests/simple_pipeline.py @@ -35,3 +35,6 @@ class Pipeline(BaseComponent): def run_raw(self, text: str) -> str: matched_texts: List[str] = self.retrieving_pipeline(text) return self.llm("\n".join(matched_texts)).text + + def run(self): + ... diff --git a/tests/test_indexing_retrieval.py b/tests/test_indexing_retrieval.py index cd0cb50..59ed090 100644 --- a/tests/test_indexing_retrieval.py +++ b/tests/test_indexing_retrieval.py @@ -34,6 +34,7 @@ def test_indexing(mock_openai_embedding, tmp_path): vector_store=db, embedding=embedding, doc_store=doc_store ) pipeline.doc_store = cast(InMemoryDocumentStore, pipeline.doc_store) + pipeline.vector_store = cast(ChromaVectorStore, pipeline.vector_store) assert pipeline.vector_store._collection.count() == 0, "Expected empty collection" assert len(pipeline.doc_store._store) == 0, "Expected empty doc store" pipeline(text=Document(text="Hello world"))