kotaemon/knowledgehub/storages/docstores/simple_file.py
Duc Nguyen (john) 5a9d6f75be Migrate the MVP into kotaemon (#108)
- Migrate the MVP into kotaemon.
- Preliminary include the pipeline within chatbot interface.
- Organize MVP as an application.

Todo:

- Add an info panel to view the planning of agents -> Fix streaming agents' output.

Resolve: #60
Resolve: #61 
Resolve: #62
2024-01-10 15:28:09 +07:00

57 lines
1.6 KiB
Python

from pathlib import Path
from typing import List, Optional, Union
from kotaemon.base import Document
from .in_memory import InMemoryDocumentStore
class SimpleFileDocumentStore(InMemoryDocumentStore):
"""Improve InMemoryDocumentStore by auto saving whenever the corpus is changed"""
def __init__(self, path: str | Path):
super().__init__()
self._path = path
if path is not None and Path(path).is_file():
self.load(path)
def get(self, ids: Union[List[str], str]) -> List[Document]:
"""Get document by id"""
if not isinstance(ids, list):
ids = [ids]
for doc_id in ids:
if doc_id not in self._store:
self.load(self._path)
break
return [self._store[doc_id] for doc_id in ids]
def add(
self,
docs: Union[Document, List[Document]],
ids: Optional[Union[List[str], str]] = None,
**kwargs,
):
"""Add document into document store
Args:
docs: list of documents to add
ids: specify the ids of documents to add or
use existing doc.doc_id
exist_ok: raise error when duplicate doc-id
found in the docstore (default to False)
"""
super().add(docs=docs, ids=ids, **kwargs)
self.save(self._path)
def delete(self, ids: Union[List[str], str]):
"""Delete document by id"""
super().delete(ids=ids)
self.save(self._path)
def __persist_flow__(self):
from theflow.utils.modules import serialize
return {"path": serialize(self._path)}