[AUR-429] Add MVP pipeline with Ingestion and QA stage (#39)

* add base Tool

* minor update test_tool

* update test dependency

* update test dependency

* Fix namespace conflict

* update test

* add base Agent Interface, add ReWoo Agent

* minor update

* update test

* fix typo

* remove unneeded print

* update rewoo agent

* add LLMTool

* update BaseAgent type

* add ReAct agent

* add ReAct agent

* minor update

* minor update

* minor update

* minor update

* update base reader with BaseComponent

* add splitter

* update agent and tool

* update vectorstores

* update load/save for indexing and retrieving pipeline

* update test_agent for more use-cases

* add missing dependency for test

* update test case for in memory vectorstore

* add TextSplitter to BaseComponent

* update type hint basetool

* add insurance mvp pipeline

* update requirements

* Remove redundant plugins param

* Mock GoogleSearch

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin)
2023-10-05 12:31:33 +07:00
committed by GitHub
parent 2638152054
commit 79cc60e6a2
9 changed files with 389 additions and 4 deletions

15
tests/conftest.py Normal file
View File

@@ -0,0 +1,15 @@
import pytest
@pytest.fixture(scope="function")
def mock_google_search(monkeypatch):
import googlesearch
def result(*args, **kwargs):
yield googlesearch.SearchResult(
url="https://www.cinnamon.is/en/",
title="Cinnamon AI",
description="Cinnamon AI is an enterprise AI company.",
)
monkeypatch.setattr(googlesearch, "search", result)

View File

@@ -135,7 +135,7 @@ def llm():
"openai.api_resources.chat_completion.ChatCompletion.create",
side_effect=_openai_chat_completion_responses_rewoo,
)
def test_rewoo_agent(openai_completion, llm):
def test_rewoo_agent(openai_completion, llm, mock_google_search):
plugins = [
GoogleSearchTool(),
WikipediaTool(),
@@ -153,7 +153,7 @@ def test_rewoo_agent(openai_completion, llm):
"openai.api_resources.chat_completion.ChatCompletion.create",
side_effect=_openai_chat_completion_responses_react,
)
def test_react_agent(openai_completion, llm):
def test_react_agent(openai_completion, llm, mock_google_search):
plugins = [
GoogleSearchTool(),
WikipediaTool(),
@@ -170,7 +170,7 @@ def test_react_agent(openai_completion, llm):
"openai.api_resources.chat_completion.ChatCompletion.create",
side_effect=_openai_chat_completion_responses_react,
)
def test_react_agent_langchain(openai_completion, llm):
def test_react_agent_langchain(openai_completion, llm, mock_google_search):
from langchain.agents import AgentType, initialize_agent
plugins = [

67
tests/test_qa.py Normal file
View File

@@ -0,0 +1,67 @@
import json
from pathlib import Path
from unittest.mock import patch
import pytest
from openai.api_resources.embedding import Embedding
from kotaemon.llms.chats.openai import AzureChatOpenAI
from kotaemon.pipelines.ingest import ReaderIndexingPipeline
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f)
_openai_chat_completion_response = {
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
"object": "chat.completion",
"created": 1692338378,
"model": "gpt-35-turbo",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today?",
},
}
],
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
}
@pytest.fixture(scope="function")
def mock_openai_embedding(monkeypatch):
monkeypatch.setattr(Embedding, "create", lambda *args, **kwargs: openai_embedding)
@patch(
"openai.api_resources.chat_completion.ChatCompletion.create",
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
)
def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
indexing_pipeline = ReaderIndexingPipeline(
storage=tmp_path, openai_api_key="some-key"
)
input_file_path = Path(__file__).parent / "resources/dummy.pdf"
# call ingestion pipeline
indexing_pipeline(input_file_path, force_reindex=True)
retrieving_pipeline = indexing_pipeline.to_retrieving_pipeline()
results = retrieving_pipeline("This is a query")
assert len(results) == 1
# create llm
llm = AzureChatOpenAI(
openai_api_base="https://test.openai.azure.com/",
openai_api_key="some-key",
openai_api_version="2023-03-15-preview",
deployment_name="gpt35turbo",
temperature=0,
request_timeout=60,
)
qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
response = qa_pipeline("Summarize this document.")
assert response

View File

@@ -21,7 +21,7 @@ def mock_openai_embedding(monkeypatch):
monkeypatch.setattr(Embedding, "create", lambda *args, **kwargs: openai_embedding)
def test_google_tool():
def test_google_tool(mock_google_search):
tool = GoogleSearchTool()
assert tool.name
assert tool.description