Combine docstores and vectorstores within a storages component (#72)

This commit is contained in:
Nguyen Trung Duc (john) 2023-11-14 17:50:57 +07:00 committed by GitHub
parent 640962e916
commit b159897ac6
18 changed files with 36 additions and 21 deletions

View File

@ -6,9 +6,8 @@ from pathlib import Path
from theflow import Node, Param
from ..base import BaseComponent, Document
from ..docstores import BaseDocumentStore
from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore
from ..storages import BaseDocumentStore, BaseVectorStore
VECTOR_STORE_FNAME = "vectorstore"
DOC_STORE_FNAME = "docstore"

View File

@ -7,7 +7,6 @@ from theflow import Node
from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent
from kotaemon.docstores import BaseDocumentStore, InMemoryDocumentStore
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.loaders import (
AutoReader,
@ -20,7 +19,12 @@ from kotaemon.parsers.splitter import SimpleNodeParser
from kotaemon.pipelines.agents import BaseAgent
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import BaseVectorStore, InMemoryVectorStore
from kotaemon.storages import (
BaseDocumentStore,
BaseVectorStore,
InMemoryDocumentStore,
InMemoryVectorStore,
)
from .qa import AgentQAPipeline, QuestionAnsweringPipeline
from .utils import file_names_to_collection_name

View File

@ -7,14 +7,18 @@ from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent
from kotaemon.base.schema import RetrievedDocument
from kotaemon.docstores import BaseDocumentStore, InMemoryDocumentStore
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms import PromptTemplate
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.pipelines.agents import BaseAgent
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.pipelines.tools import ComponentTool
from kotaemon.vectorstores import BaseVectorStore, InMemoryVectorStore
from kotaemon.storages import (
BaseDocumentStore,
BaseVectorStore,
InMemoryDocumentStore,
InMemoryVectorStore,
)
from .utils import file_names_to_collection_name

View File

@ -7,9 +7,8 @@ from theflow import Node, Param
from ..base import BaseComponent
from ..base.schema import Document, RetrievedDocument
from ..docstores import BaseDocumentStore
from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore
from ..storages import BaseDocumentStore, BaseVectorStore
VECTOR_STORE_FNAME = "vectorstore"
DOC_STORE_FNAME = "docstore"

View File

@ -0,0 +1,12 @@
from .docstores import BaseDocumentStore, InMemoryDocumentStore
from .vectorstores import BaseVectorStore, ChromaVectorStore, InMemoryVectorStore
__all__ = [
# Document stores
"BaseDocumentStore",
"InMemoryDocumentStore",
# Vector stores
"BaseVectorStore",
"ChromaVectorStore",
"InMemoryVectorStore",
]

View File

@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional, Union
from ..base import Document
from ...base import Document
class BaseDocumentStore(ABC):

View File

@ -2,7 +2,7 @@ import json
from pathlib import Path
from typing import List, Optional, Union
from ..base import Document
from ...base import Document
from .base import BaseDocumentStore

View File

@ -6,7 +6,7 @@ from llama_index.vector_stores.types import BasePydanticVectorStore
from llama_index.vector_stores.types import VectorStore as LIVectorStore
from llama_index.vector_stores.types import VectorStoreQuery
from ..base import Document
from ...base import Document
class BaseVectorStore(ABC):

View File

@ -6,7 +6,7 @@ import fsspec
from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore
from llama_index.vector_stores.simple import SimpleVectorStoreData
from kotaemon.vectorstores.base import LlamaIndexVectorStore
from .base import LlamaIndexVectorStore
class InMemoryVectorStore(LlamaIndexVectorStore):

View File

@ -5,12 +5,11 @@ from theflow import Param
from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.completions.openai import AzureOpenAI
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import ChromaVectorStore
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
class QuestionAnsweringPipeline(BaseComponent):

View File

@ -7,7 +7,7 @@ from kotaemon.base import BaseComponent
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.completions.openai import AzureOpenAI
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import ChromaVectorStore
from kotaemon.storages import ChromaVectorStore
class Pipeline(BaseComponent):

View File

@ -1,7 +1,7 @@
import pytest
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.storages import InMemoryDocumentStore
def test_simple_document_store_base_interfaces(tmp_path):

View File

@ -6,11 +6,10 @@ import pytest
from openai.resources.embeddings import Embeddings
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import ChromaVectorStore
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f)

View File

@ -5,12 +5,11 @@ import pytest
from openai.resources.embeddings import Embeddings
from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.pipelines.tools import ComponentTool, GoogleSearchTool, WikipediaTool
from kotaemon.vectorstores import ChromaVectorStore
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f)

View File

@ -1,7 +1,7 @@
import json
from kotaemon.base import Document
from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore
from kotaemon.storages import ChromaVectorStore, InMemoryVectorStore
class TestChromaVectorStore: