Combine docstores and vectorstores within a storages component (#72)

This commit is contained in:
Nguyen Trung Duc (john) 2023-11-14 17:50:57 +07:00 committed by GitHub
parent 640962e916
commit b159897ac6
18 changed files with 36 additions and 21 deletions

View File

@ -6,9 +6,8 @@ from pathlib import Path
from theflow import Node, Param from theflow import Node, Param
from ..base import BaseComponent, Document from ..base import BaseComponent, Document
from ..docstores import BaseDocumentStore
from ..embeddings import BaseEmbeddings from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore from ..storages import BaseDocumentStore, BaseVectorStore
VECTOR_STORE_FNAME = "vectorstore" VECTOR_STORE_FNAME = "vectorstore"
DOC_STORE_FNAME = "docstore" DOC_STORE_FNAME = "docstore"

View File

@ -7,7 +7,6 @@ from theflow import Node
from theflow.utils.modules import ObjectInitDeclaration as _ from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent
from kotaemon.docstores import BaseDocumentStore, InMemoryDocumentStore
from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.loaders import ( from kotaemon.loaders import (
AutoReader, AutoReader,
@ -20,7 +19,12 @@ from kotaemon.parsers.splitter import SimpleNodeParser
from kotaemon.pipelines.agents import BaseAgent from kotaemon.pipelines.agents import BaseAgent
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import BaseVectorStore, InMemoryVectorStore from kotaemon.storages import (
BaseDocumentStore,
BaseVectorStore,
InMemoryDocumentStore,
InMemoryVectorStore,
)
from .qa import AgentQAPipeline, QuestionAnsweringPipeline from .qa import AgentQAPipeline, QuestionAnsweringPipeline
from .utils import file_names_to_collection_name from .utils import file_names_to_collection_name

View File

@ -7,14 +7,18 @@ from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent
from kotaemon.base.schema import RetrievedDocument from kotaemon.base.schema import RetrievedDocument
from kotaemon.docstores import BaseDocumentStore, InMemoryDocumentStore
from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms import PromptTemplate from kotaemon.llms import PromptTemplate
from kotaemon.llms.chats.openai import AzureChatOpenAI from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.pipelines.agents import BaseAgent from kotaemon.pipelines.agents import BaseAgent
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.pipelines.tools import ComponentTool from kotaemon.pipelines.tools import ComponentTool
from kotaemon.vectorstores import BaseVectorStore, InMemoryVectorStore from kotaemon.storages import (
BaseDocumentStore,
BaseVectorStore,
InMemoryDocumentStore,
InMemoryVectorStore,
)
from .utils import file_names_to_collection_name from .utils import file_names_to_collection_name

View File

@ -7,9 +7,8 @@ from theflow import Node, Param
from ..base import BaseComponent from ..base import BaseComponent
from ..base.schema import Document, RetrievedDocument from ..base.schema import Document, RetrievedDocument
from ..docstores import BaseDocumentStore
from ..embeddings import BaseEmbeddings from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore from ..storages import BaseDocumentStore, BaseVectorStore
VECTOR_STORE_FNAME = "vectorstore" VECTOR_STORE_FNAME = "vectorstore"
DOC_STORE_FNAME = "docstore" DOC_STORE_FNAME = "docstore"

View File

@ -0,0 +1,12 @@
from .docstores import BaseDocumentStore, InMemoryDocumentStore
from .vectorstores import BaseVectorStore, ChromaVectorStore, InMemoryVectorStore
__all__ = [
# Document stores
"BaseDocumentStore",
"InMemoryDocumentStore",
# Vector stores
"BaseVectorStore",
"ChromaVectorStore",
"InMemoryVectorStore",
]

View File

@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
from pathlib import Path from pathlib import Path
from typing import List, Optional, Union from typing import List, Optional, Union
from ..base import Document from ...base import Document
class BaseDocumentStore(ABC): class BaseDocumentStore(ABC):

View File

@ -2,7 +2,7 @@ import json
from pathlib import Path from pathlib import Path
from typing import List, Optional, Union from typing import List, Optional, Union
from ..base import Document from ...base import Document
from .base import BaseDocumentStore from .base import BaseDocumentStore

View File

@ -6,7 +6,7 @@ from llama_index.vector_stores.types import BasePydanticVectorStore
from llama_index.vector_stores.types import VectorStore as LIVectorStore from llama_index.vector_stores.types import VectorStore as LIVectorStore
from llama_index.vector_stores.types import VectorStoreQuery from llama_index.vector_stores.types import VectorStoreQuery
from ..base import Document from ...base import Document
class BaseVectorStore(ABC): class BaseVectorStore(ABC):

View File

@ -6,7 +6,7 @@ import fsspec
from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore
from llama_index.vector_stores.simple import SimpleVectorStoreData from llama_index.vector_stores.simple import SimpleVectorStoreData
from kotaemon.vectorstores.base import LlamaIndexVectorStore from .base import LlamaIndexVectorStore
class InMemoryVectorStore(LlamaIndexVectorStore): class InMemoryVectorStore(LlamaIndexVectorStore):

View File

@ -5,12 +5,11 @@ from theflow import Param
from theflow.utils.modules import ObjectInitDeclaration as _ from theflow.utils.modules import ObjectInitDeclaration as _
from kotaemon.base import BaseComponent from kotaemon.base import BaseComponent
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.completions.openai import AzureOpenAI from kotaemon.llms.completions.openai import AzureOpenAI
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import ChromaVectorStore from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
class QuestionAnsweringPipeline(BaseComponent): class QuestionAnsweringPipeline(BaseComponent):

View File

@ -7,7 +7,7 @@ from kotaemon.base import BaseComponent
from kotaemon.embeddings import AzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.completions.openai import AzureOpenAI from kotaemon.llms.completions.openai import AzureOpenAI
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import ChromaVectorStore from kotaemon.storages import ChromaVectorStore
class Pipeline(BaseComponent): class Pipeline(BaseComponent):

View File

@ -1,7 +1,7 @@
import pytest import pytest
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore from kotaemon.storages import InMemoryDocumentStore
def test_simple_document_store_base_interfaces(tmp_path): def test_simple_document_store_base_interfaces(tmp_path):

View File

@ -6,11 +6,10 @@ import pytest
from openai.resources.embeddings import Embeddings from openai.resources.embeddings import Embeddings
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.vectorstores import ChromaVectorStore from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = json.load(f)

View File

@ -5,12 +5,11 @@ import pytest
from openai.resources.embeddings import Embeddings from openai.resources.embeddings import Embeddings
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.docstores import InMemoryDocumentStore
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline from kotaemon.pipelines.indexing import IndexVectorStoreFromDocumentPipeline
from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline from kotaemon.pipelines.retrieving import RetrieveDocumentFromVectorStorePipeline
from kotaemon.pipelines.tools import ComponentTool, GoogleSearchTool, WikipediaTool from kotaemon.pipelines.tools import ComponentTool, GoogleSearchTool, WikipediaTool
from kotaemon.vectorstores import ChromaVectorStore from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = json.load(f)

View File

@ -1,7 +1,7 @@
import json import json
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.vectorstores import ChromaVectorStore, InMemoryVectorStore from kotaemon.storages import ChromaVectorStore, InMemoryVectorStore
class TestChromaVectorStore: class TestChromaVectorStore: