- Migrate the MVP into kotaemon. - Preliminary include the pipeline within chatbot interface. - Organize MVP as an application. Todo: - Add an info panel to view the planning of agents -> Fix streaming agents' output. Resolve: #60 Resolve: #61 Resolve: #62
57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
from pathlib import Path
|
|
from typing import List, Optional, Union
|
|
|
|
from kotaemon.base import Document
|
|
|
|
from .in_memory import InMemoryDocumentStore
|
|
|
|
|
|
class SimpleFileDocumentStore(InMemoryDocumentStore):
|
|
"""Improve InMemoryDocumentStore by auto saving whenever the corpus is changed"""
|
|
|
|
def __init__(self, path: str | Path):
|
|
super().__init__()
|
|
self._path = path
|
|
if path is not None and Path(path).is_file():
|
|
self.load(path)
|
|
|
|
def get(self, ids: Union[List[str], str]) -> List[Document]:
|
|
"""Get document by id"""
|
|
if not isinstance(ids, list):
|
|
ids = [ids]
|
|
|
|
for doc_id in ids:
|
|
if doc_id not in self._store:
|
|
self.load(self._path)
|
|
break
|
|
|
|
return [self._store[doc_id] for doc_id in ids]
|
|
|
|
def add(
|
|
self,
|
|
docs: Union[Document, List[Document]],
|
|
ids: Optional[Union[List[str], str]] = None,
|
|
**kwargs,
|
|
):
|
|
"""Add document into document store
|
|
|
|
Args:
|
|
docs: list of documents to add
|
|
ids: specify the ids of documents to add or
|
|
use existing doc.doc_id
|
|
exist_ok: raise error when duplicate doc-id
|
|
found in the docstore (default to False)
|
|
"""
|
|
super().add(docs=docs, ids=ids, **kwargs)
|
|
self.save(self._path)
|
|
|
|
def delete(self, ids: Union[List[str], str]):
|
|
"""Delete document by id"""
|
|
super().delete(ids=ids)
|
|
self.save(self._path)
|
|
|
|
def __persist_flow__(self):
|
|
from theflow.utils.modules import serialize
|
|
|
|
return {"path": serialize(self._path)}
|