Correct the use of abstractmethod (#80)

* Correct abstractmethod usage * Update interface * Specify minimal llama-index version [ignore cache] * Update examples
2023-11-20 11:18:53 +07:00
parent 98509f886c
commit 0a3fc4b228
12 changed files with 33 additions and 37 deletions
--- a/knowledgehub/init.py
+++ b/knowledgehub/init.py
@@ -22,4 +22,4 @@ try:
 except ImportError:
    pass

-__version__ = "0.3.1"
+__version__ = "0.3.2"
--- a/knowledgehub/base/component.py
+++ b/knowledgehub/base/component.py
@@ -1,9 +1,9 @@
 from abc import abstractmethod

-from theflow.base import Compose
+from theflow.base import Function


-class BaseComponent(Compose):
+class BaseComponent(Function):
    """A component is a class that can be used to compose a pipeline

    Benefits of component:
--- a/knowledgehub/chatbot/base.py
+++ b/knowledgehub/chatbot/base.py
@@ -2,7 +2,7 @@ from abc import abstractmethod
 from typing import List, Optional

 from langchain.schema.messages import AIMessage, SystemMessage
-from theflow import Param, SessionCompose
+from theflow import SessionFunction

 from ..base import BaseComponent
 from ..base.schema import LLMInterface
@@ -20,7 +20,7 @@ def session_chat_storage(obj):
    return obj._store_result


-class ChatConversation(SessionCompose):
+class ChatConversation(SessionFunction):
    """Base implementation of a chat bot component

    A chatbot component should:
@@ -31,7 +31,7 @@ class ChatConversation(SessionCompose):
    class Config:
        store_result = session_chat_storage

-    system_message: Param[str] = Param(default="")
+    system_message: str = ""
    bot: BaseChatBot

    def __init__(self, *args, **kwargs):
--- a/knowledgehub/chatbot/simple_respondent.py
+++ b/knowledgehub/chatbot/simple_respondent.py
@@ -1,5 +1,3 @@
-from theflow import Node
-
 from ..llms import ChatLLM
 from .base import BaseChatBot

@@ -7,7 +5,7 @@ from .base import BaseChatBot
 class SimpleRespondentChatbot(BaseChatBot):
    """Simple text respondent chatbot that essentially wraps around a chat LLM"""

-    llm: Node[ChatLLM]
+    llm: ChatLLM

    def _get_message(self) -> str:
        return self.llm(self.history).text
--- a/knowledgehub/contribs/docs.py
+++ b/knowledgehub/contribs/docs.py
@@ -1,7 +1,7 @@
 import inspect
 from collections import defaultdict

-from theflow.utils.documentation import get_compose_documentation_from_module
+from theflow.utils.documentation import get_function_documentation_from_module


 def from_definition_to_markdown(definition: dict) -> str:
@@ -38,7 +38,7 @@ def from_definition_to_markdown(definition: dict) -> str:


 def make_doc(module: str, output: str, separation_level: int):
-    """Run exporting from compose to markdown
+    """Run exporting components to markdown

    Args:
        module (str): module name
@@ -46,7 +46,7 @@ def make_doc(module: str, output: str, separation_level: int):
        separation_level (int): level of separation
    """
    documentation = sorted(
-        get_compose_documentation_from_module(module).items(), key=lambda x: x[0]
+        get_function_documentation_from_module(module).items(), key=lambda x: x[0]
    )

    entries = defaultdict(list)
--- a/knowledgehub/pipelines/cot.py
+++ b/knowledgehub/pipelines/cot.py
@@ -1,10 +1,10 @@
 from copy import deepcopy
-from typing import List
+from typing import Callable, List

-from theflow import Compose, Node, Param
+from theflow import Function, Node, Param

 from kotaemon.base import BaseComponent
-from kotaemon.llms import BasePromptComponent
+from kotaemon.llms import LLM, BasePromptComponent
 from kotaemon.llms.chats.openai import AzureChatOpenAI


@@ -64,15 +64,13 @@ class Thought(BaseComponent):
    is created.
    """

-    prompt: Param[str] = Param(
+    prompt: str = Param(
        help="The prompt template string. This prompt template has Python-like "
        "variable placeholders, that then will be subsituted with real values when "
        "this component is executed"
    )
-    llm: Node[BaseComponent] = Node(
-        AzureChatOpenAI, help="The LLM model to execute the input prompt"
-    )
-    post_process: Node[Compose] = Node(
+    llm: LLM = Node(AzureChatOpenAI, help="The LLM model to execute the input prompt")
+    post_process: Function = Node(
        help="The function post-processor that post-processes LLM output prediction ."
        "It should take a string as input (this is the LLM output text) and return "
        "a dictionary, where the key should"
@@ -139,11 +137,11 @@ class ManualSequentialChainOfThought(BaseComponent):
    returns False.
    """

-    thoughts: Param[List[Thought]] = Param(
+    thoughts: List[Thought] = Param(
        default_callback=lambda *_: [], help="List of Thought"
    )
-    llm: Param = Param(help="The LLM model to use (base of kotaemon.llms.LLM)")
-    terminate: Param = Param(
+    llm: LLM = Param(help="The LLM model to use (base of kotaemon.llms.LLM)")
+    terminate: Callable = Param(
        default=lambda _: False,
        help="Callback on terminate condition. Default to always return False",
    )
--- a/knowledgehub/pipelines/indexing.py
+++ b/knowledgehub/pipelines/indexing.py
@@ -2,8 +2,7 @@ from __future__ import annotations

 import uuid
 from pathlib import Path
-
-from theflow import Node, Param
+from typing import cast

 from ..base import BaseComponent, Document
 from ..embeddings import BaseEmbeddings
@@ -22,9 +21,9 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent):
        - List of texts
    """

-    vector_store: Param[BaseVectorStore] = Param()
-    doc_store: Param[BaseDocumentStore] = Param()
-    embedding: Node[BaseEmbeddings] = Node()
+    vector_store: BaseVectorStore
+    doc_store: BaseDocumentStore
+    embedding: BaseEmbeddings
    # TODO: refer to llama_index's storage as well

    def run(self, text: str | list[str] | Document | list[Document]) -> None:
@@ -32,7 +31,7 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent):
        if not isinstance(text, list):
            text = [text]

-        for item in text:
+        for item in cast(list, text):
            if isinstance(item, str):
                input_.append(Document(text=item, id_=str(uuid.uuid4())))
            elif isinstance(item, Document):
--- a/knowledgehub/pipelines/retrieving.py
+++ b/knowledgehub/pipelines/retrieving.py
@@ -3,8 +3,6 @@ from __future__ import annotations
 from pathlib import Path
 from typing import Optional, Sequence

-from theflow import Node, Param
-
 from ..base import BaseComponent
 from ..base.schema import Document, RetrievedDocument
 from ..embeddings import BaseEmbeddings
@@ -18,9 +16,9 @@ DOC_STORE_FNAME = "docstore"
 class RetrieveDocumentFromVectorStorePipeline(BaseComponent):
    """Retrieve list of documents from vector store"""

-    vector_store: Param[BaseVectorStore] = Param()
-    doc_store: Param[BaseDocumentStore] = Param()
-    embedding: Node[BaseEmbeddings] = Node()
+    vector_store: BaseVectorStore
+    doc_store: BaseDocumentStore
+    embedding: BaseEmbeddings
    rerankers: Sequence[BaseRerankingPipeline] = []
    top_k: int = 1
    # TODO: refer to llama_index's storage as well
--- a/knowledgehub/pipelines/tools/base.py
+++ b/knowledgehub/pipelines/tools/base.py
@@ -1,4 +1,3 @@
-from abc import abstractmethod
 from typing import Any, Callable, Dict, Optional, Tuple, Type, Union

 from langchain.agents import Tool as LCTool
@@ -51,13 +50,13 @@ class BaseTool(BaseComponent):
                return {k: v for k, v in result.dict().items() if k in tool_input}
        return tool_input

-    @abstractmethod
    def _run_tool(
        self,
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        """Call tool."""
+        raise NotImplementedError(f"_run_tool is not implemented for {self.name}")

    def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
        # For backwards compatibility, if run_input is a string,
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setuptools.setup(
    install_requires=[
        "langchain",
        "theflow",
-        "llama-index",
+        "llama-index>=0.9.0",
        "llama-hub",
        "gradio",
        "openpyxl",
--- a/tests/simple_pipeline.py
+++ b/tests/simple_pipeline.py
@@ -35,3 +35,6 @@ class Pipeline(BaseComponent):
    def run_raw(self, text: str) -> str:
        matched_texts: List[str] = self.retrieving_pipeline(text)
        return self.llm("\n".join(matched_texts)).text
+
+    def run(self):
+        ...
--- a/tests/test_indexing_retrieval.py
+++ b/tests/test_indexing_retrieval.py
@@ -34,6 +34,7 @@ def test_indexing(mock_openai_embedding, tmp_path):
        vector_store=db, embedding=embedding, doc_store=doc_store
    )
    pipeline.doc_store = cast(InMemoryDocumentStore, pipeline.doc_store)
+    pipeline.vector_store = cast(ChromaVectorStore, pipeline.vector_store)
    assert pipeline.vector_store._collection.count() == 0, "Expected empty collection"
    assert len(pipeline.doc_store._store) == 0, "Expected empty doc store"
    pipeline(text=Document(text="Hello world"))