Update Base interface of Index/Retrieval pipeline (#36)

* add base Tool

* minor update test_tool

* update test dependency

* update test dependency

* Fix namespace conflict

* update test

* add base Agent Interface, add ReWoo Agent

* minor update

* update test

* fix typo

* remove unneeded print

* update rewoo agent

* add LLMTool

* update BaseAgent type

* add ReAct agent

* add ReAct agent

* minor update

* minor update

* minor update

* minor update

* update base reader with BaseComponent

* add splitter

* update agent and tool

* update vectorstores

* update load/save for indexing and retrieving pipeline

* update test_agent for more use-cases

* add missing dependency for test

* update test case for in memory vectorstore

* add TextSplitter to BaseComponent

* update type hint basetool

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin)
2023-10-04 14:27:44 +07:00
committed by GitHub
parent 49ed3f6994
commit 56bc41b673
13 changed files with 302 additions and 36 deletions

View File

@@ -1,5 +1,6 @@
import uuid
from typing import List, Optional
from pathlib import Path
from typing import List, Union
from theflow import Node, Param
@@ -9,6 +10,9 @@ from ..documents.base import Document
from ..embeddings import BaseEmbeddings
from ..vectorstores import BaseVectorStore
VECTOR_STORE_FNAME = "vectorstore"
DOC_STORE_FNAME = "docstore"
class IndexVectorStoreFromDocumentPipeline(BaseComponent):
"""Ingest the document, run through the embedding, and store the embedding in a
@@ -20,7 +24,7 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent):
"""
vector_store: Param[BaseVectorStore] = Param()
doc_store: Optional[BaseDocumentStore] = None
doc_store: Param[BaseDocumentStore] = Param()
embedding: Node[BaseEmbeddings] = Node()
# TODO: refer to llama_index's storage as well
@@ -30,7 +34,7 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent):
self.run_batch_document([document])
def run_batch_raw(self, text: List[str]) -> None:
documents = [Document(t, id_=str(uuid.uuid4())) for t in text]
documents = [Document(text=t, id_=str(uuid.uuid4())) for t in text]
self.run_batch_document(documents)
def run_document(self, text: Document) -> None:
@@ -57,13 +61,31 @@ class IndexVectorStoreFromDocumentPipeline(BaseComponent):
return True
return False
def persist(self, path: str):
def save(
self,
path: Union[str, Path],
vectorstore_fname: str = VECTOR_STORE_FNAME,
docstore_fname: str = DOC_STORE_FNAME,
):
"""Save the whole state of the indexing pipeline vector store and all
necessary information to disk
Args:
path (str): path to save the state
"""
if isinstance(path, str):
path = Path(path)
self.vector_store.save(path / vectorstore_fname)
self.doc_store.save(path / docstore_fname)
def load(self, path: str):
def load(
self,
path: Union[str, Path],
vectorstore_fname: str = VECTOR_STORE_FNAME,
docstore_fname: str = DOC_STORE_FNAME,
):
"""Load all information from disk to an object"""
if isinstance(path, str):
path = Path(path)
self.vector_store.load(path / vectorstore_fname)
self.doc_store.load(path / docstore_fname)