diff --git a/knowledgehub/composite/__init__.py b/knowledgehub/composite/__init__.py deleted file mode 100644 index fac7d70..0000000 --- a/knowledgehub/composite/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .branching import GatedBranchingPipeline, SimpleBranchingPipeline -from .linear import GatedLinearPipeline, SimpleLinearPipeline - -__all__ = [ - "SimpleLinearPipeline", - "GatedLinearPipeline", - "SimpleBranchingPipeline", - "GatedBranchingPipeline", -] diff --git a/knowledgehub/post_processing/__init__.py b/knowledgehub/indexing/__init__.py similarity index 100% rename from knowledgehub/post_processing/__init__.py rename to knowledgehub/indexing/__init__.py diff --git a/knowledgehub/parsers/splitter.py b/knowledgehub/indexing/splitters.py similarity index 100% rename from knowledgehub/parsers/splitter.py rename to knowledgehub/indexing/splitters.py diff --git a/knowledgehub/llms/__init__.py b/knowledgehub/llms/__init__.py index 7f69cd1..cbf2860 100644 --- a/knowledgehub/llms/__init__.py +++ b/knowledgehub/llms/__init__.py @@ -1,8 +1,9 @@ from langchain.schema.messages import AIMessage, SystemMessage -from .chats import AzureChatOpenAI, ChatLLM -from .chats.base import BaseMessage, HumanMessage +from .branching import GatedBranchingPipeline, SimpleBranchingPipeline +from .chats import AzureChatOpenAI, BaseMessage, ChatLLM, HumanMessage from .completions import LLM, AzureOpenAI, OpenAI +from .linear import GatedLinearPipeline, SimpleLinearPipeline from .prompts import BasePromptComponent, PromptTemplate __all__ = [ @@ -20,4 +21,9 @@ __all__ = [ # prompt-specific components "BasePromptComponent", "PromptTemplate", + # strategies + "SimpleLinearPipeline", + "GatedLinearPipeline", + "SimpleBranchingPipeline", + "GatedBranchingPipeline", ] diff --git a/knowledgehub/composite/branching.py b/knowledgehub/llms/branching.py similarity index 88% rename from knowledgehub/composite/branching.py rename to knowledgehub/llms/branching.py index c082d5d..ccbcfcb 100644 --- a/knowledgehub/composite/branching.py +++ b/knowledgehub/llms/branching.py @@ -2,8 +2,8 @@ from typing import List, Optional from theflow import Param -from kotaemon.base import BaseComponent, Document -from kotaemon.composite.linear import GatedLinearPipeline +from ..base import BaseComponent, Document +from .linear import GatedLinearPipeline class SimpleBranchingPipeline(BaseComponent): @@ -14,10 +14,12 @@ class SimpleBranchingPipeline(BaseComponent): branches (List[BaseComponent]): The list of branches to be executed. Example Usage: - from kotaemon.composite import GatedLinearPipeline - from kotaemon.llms.chats.openai import AzureChatOpenAI - from kotaemon.post_processing.extractor import RegexExtractor - from kotaemon.llms import BasePromptComponent + from kotaemon.llms import ( + AzureChatOpenAI, + BasePromptComponent, + GatedLinearPipeline, + ) + from kotaemon.parsers import RegexExtractor def identity(x): return x @@ -87,10 +89,12 @@ class GatedBranchingPipeline(SimpleBranchingPipeline): branches (List[BaseComponent]): The list of branches to be executed. Example Usage: - from kotaemon.composite import GatedLinearPipeline - from kotaemon.llms.chats.openai import AzureChatOpenAI - from kotaemon.post_processing.extractor import RegexExtractor - from kotaemon.llms import BasePromptComponent + from kotaemon.llms import ( + AzureChatOpenAI, + BasePromptComponent, + GatedLinearPipeline, + ) + from kotaemon.parsers import RegexExtractor def identity(x): return x @@ -152,7 +156,7 @@ if __name__ == "__main__": from kotaemon.llms import BasePromptComponent from kotaemon.llms.chats.openai import AzureChatOpenAI - from kotaemon.post_processing.extractor import RegexExtractor + from kotaemon.parsers import RegexExtractor def identity(x): return x diff --git a/knowledgehub/llms/chats/__init__.py b/knowledgehub/llms/chats/__init__.py index 7eb20c0..d634222 100644 --- a/knowledgehub/llms/chats/__init__.py +++ b/knowledgehub/llms/chats/__init__.py @@ -1,4 +1,4 @@ -from .base import ChatLLM +from .base import BaseMessage, ChatLLM, HumanMessage from .openai import AzureChatOpenAI -__all__ = ["ChatLLM", "AzureChatOpenAI"] +__all__ = ["ChatLLM", "AzureChatOpenAI", "BaseMessage", "HumanMessage"] diff --git a/knowledgehub/composite/linear.py b/knowledgehub/llms/linear.py similarity index 95% rename from knowledgehub/composite/linear.py rename to knowledgehub/llms/linear.py index 9f1d0d3..df6cc87 100644 --- a/knowledgehub/composite/linear.py +++ b/knowledgehub/llms/linear.py @@ -1,8 +1,10 @@ from typing import Any, Callable, Optional, Union -from kotaemon.base import BaseComponent -from kotaemon.base.schema import Document, IO_Type -from kotaemon.llms import LLM, BasePromptComponent, ChatLLM +from ..base import BaseComponent +from ..base.schema import Document, IO_Type +from .chats import ChatLLM +from .completions import LLM +from .prompts import BasePromptComponent class SimpleLinearPipeline(BaseComponent): @@ -86,8 +88,8 @@ class GatedLinearPipeline(SimpleLinearPipeline): Example Usage: from kotaemon.llms.chats.openai import AzureChatOpenAI - from kotaemon.post_processing.extractor import RegexExtractor from kotaemon.llms import BasePromptComponent + from kotaemon.parsers import RegexExtractor def identity(x): return x diff --git a/knowledgehub/parsers/__init__.py b/knowledgehub/parsers/__init__.py index e69de29..80207ce 100644 --- a/knowledgehub/parsers/__init__.py +++ b/knowledgehub/parsers/__init__.py @@ -0,0 +1,3 @@ +from .regex_extractor import FirstMatchRegexExtractor, RegexExtractor + +__all__ = ["RegexExtractor", "FirstMatchRegexExtractor"] diff --git a/knowledgehub/post_processing/extractor.py b/knowledgehub/parsers/regex_extractor.py similarity index 100% rename from knowledgehub/post_processing/extractor.py rename to knowledgehub/parsers/regex_extractor.py diff --git a/knowledgehub/pipelines/ingest.py b/knowledgehub/pipelines/ingest.py index c486c73..78b7ee4 100644 --- a/knowledgehub/pipelines/ingest.py +++ b/knowledgehub/pipelines/ingest.py @@ -9,6 +9,7 @@ from theflow.utils.modules import ObjectInitDeclaration as _ from kotaemon.base import BaseComponent from kotaemon.embeddings import AzureOpenAIEmbeddings +from kotaemon.indexing.splitters import SimpleNodeParser from kotaemon.loaders import ( AutoReader, DirectoryReader, @@ -16,7 +17,6 @@ from kotaemon.loaders import ( OCRReader, PandasExcelReader, ) -from kotaemon.parsers.splitter import SimpleNodeParser from kotaemon.pipelines.agents import BaseAgent from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline from kotaemon.pipelines.reranking import BaseRerankingPipeline diff --git a/tests/test_composite.py b/tests/test_composite.py index f89310c..f9bde83 100644 --- a/tests/test_composite.py +++ b/tests/test_composite.py @@ -3,15 +3,15 @@ from copy import deepcopy import pytest from openai.types.chat.chat_completion import ChatCompletion -from kotaemon.composite import ( +from kotaemon.llms import ( + AzureChatOpenAI, + BasePromptComponent, GatedBranchingPipeline, GatedLinearPipeline, SimpleBranchingPipeline, SimpleLinearPipeline, ) -from kotaemon.llms import BasePromptComponent -from kotaemon.llms.chats.openai import AzureChatOpenAI -from kotaemon.post_processing.extractor import RegexExtractor +from kotaemon.parsers import RegexExtractor _openai_chat_completion_response = ChatCompletion.parse_obj( { diff --git a/tests/test_post_processing.py b/tests/test_post_processing.py index bda8337..d32808f 100644 --- a/tests/test_post_processing.py +++ b/tests/test_post_processing.py @@ -1,7 +1,7 @@ import pytest from kotaemon.base import Document -from kotaemon.post_processing.extractor import RegexExtractor +from kotaemon.parsers import RegexExtractor @pytest.fixture diff --git a/tests/test_prompt.py b/tests/test_prompt.py index 1f47df7..2063a65 100644 --- a/tests/test_prompt.py +++ b/tests/test_prompt.py @@ -2,7 +2,7 @@ import pytest from kotaemon.base import Document from kotaemon.llms import BasePromptComponent, PromptTemplate -from kotaemon.post_processing.extractor import RegexExtractor +from kotaemon.parsers import RegexExtractor def test_set_attributes():