[AUR-392, AUR-413, AUR-414] Define base vector store, and make use of ChromaVectorStore from llama_index. Indexing and retrieving vectors with vector store (#18)

Design the base interface of vector store, and apply it to the Chroma Vector Store (wrapped around llama_index's implementation). Provide the pipelines to populate and retrieve from vector store.
This commit is contained in:
Nguyen Trung Duc (john)
2023-09-14 14:18:20 +07:00
committed by GitHub
parent c339912312
commit 620b2b03ca
10 changed files with 486 additions and 16 deletions

View File

@@ -1,3 +1,4 @@
from abc import abstractmethod
from typing import List, Type
from langchain.embeddings.base import Embeddings as LCEmbeddings
@@ -7,11 +8,37 @@ from ..components import BaseComponent
from ..documents.base import Document
class Embeddings(BaseComponent):
...
class BaseEmbeddings(BaseComponent):
@abstractmethod
def run_raw(self, text: str) -> List[float]:
...
@abstractmethod
def run_batch_raw(self, text: List[str]) -> List[List[float]]:
...
@abstractmethod
def run_document(self, text: Document) -> List[float]:
...
@abstractmethod
def run_batch_document(self, text: List[Document]) -> List[List[float]]:
...
def is_document(self, text) -> bool:
if isinstance(text, Document):
return True
elif isinstance(text, List) and isinstance(text[0], Document):
return True
return False
def is_batch(self, text) -> bool:
if isinstance(text, list):
return True
return False
class LangchainEmbeddings(Embeddings):
class LangchainEmbeddings(BaseEmbeddings):
_lc_class: Type[LCEmbeddings]
def __init__(self, **params):
@@ -46,17 +73,5 @@ class LangchainEmbeddings(Embeddings):
def run_document(self, text: Document) -> List[float]:
return self.agent.embed_query(text.text) # type: ignore
def run_batch_document(self, text: List[Document]):
def run_batch_document(self, text: List[Document]) -> List[List[float]]:
return self.agent.embed_documents([each.text for each in text]) # type: ignore
def is_document(self, text) -> bool:
if isinstance(text, Document):
return True
elif isinstance(text, List) and isinstance(text[0], Document):
return True
return False
def is_batch(self, text) -> bool:
if isinstance(text, list):
return True
return False