kotaemon/knowledgehub/storages/vectorstores/simple_file.py
Duc Nguyen (john) 37c744b616 Add file-based document store and vector store (#96)
* Modify docstore and vectorstore objects to be reconstructable
* Simplify the file docstore
* Use the simple file docstore and vector store in MVP
2023-12-04 17:46:00 +07:00

67 lines
2.0 KiB
Python

"""Simple file vector store index."""
from pathlib import Path
from typing import Any, Optional, Type
import fsspec
from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore
from llama_index.vector_stores.simple import SimpleVectorStoreData
from kotaemon.base import DocumentWithEmbedding
from .base import LlamaIndexVectorStore
class SimpleFileVectorStore(LlamaIndexVectorStore):
"""Similar to InMemoryVectorStore but is backed by file by default"""
_li_class: Type[LISimpleVectorStore] = LISimpleVectorStore
store_text: bool = False
def __init__(
self,
path: str | Path,
data: Optional[SimpleVectorStoreData] = None,
fs: Optional[fsspec.AbstractFileSystem] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
self._data = data or SimpleVectorStoreData()
self._fs = fs or fsspec.filesystem("file")
self._path = path
self._save_path = Path(path)
super().__init__(
data=data,
fs=fs,
**kwargs,
)
if self._save_path.is_file():
self._client = self._li_class.from_persist_path(
persist_path=str(self._save_path), fs=self._fs
)
def add(
self,
embeddings: list[list[float]] | list[DocumentWithEmbedding],
metadatas: Optional[list[dict]] = None,
ids: Optional[list[str]] = None,
):
r = super().add(embeddings, metadatas, ids)
self._client.persist(str(self._save_path), self._fs)
return r
def delete(self, ids: list[str], **kwargs):
r = super().delete(ids, **kwargs)
self._client.persist(str(self._save_path), self._fs)
return r
def __persist_flow__(self):
d = self._data.to_dict()
d["__type__"] = f"{self._data.__module__}.{self._data.__class__.__qualname__}"
return {
"data": d,
"path": str(self._path),
# "fs": self._fs,
}