[AUR-429] Add MVP pipeline with Ingestion and QA stage (#39)

* add base Tool * minor update test_tool * update test dependency * update test dependency * Fix namespace conflict * update test * add base Agent Interface, add ReWoo Agent * minor update * update test * fix typo * remove unneeded print * update rewoo agent * add LLMTool * update BaseAgent type * add ReAct agent * add ReAct agent * minor update * minor update * minor update * minor update * update base reader with BaseComponent * add splitter * update agent and tool * update vectorstores * update load/save for indexing and retrieving pipeline * update test_agent for more use-cases * add missing dependency for test * update test case for in memory vectorstore * add TextSplitter to BaseComponent * update type hint basetool * add insurance mvp pipeline * update requirements * Remove redundant plugins param * Mock GoogleSearch --------- Co-authored-by: trducng <trungduc1992@gmail.com>
2023-10-05 12:31:33 +07:00
parent 2638152054
commit 79cc60e6a2
9 changed files with 389 additions and 4 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,15 @@
+import pytest
+
+
+@pytest.fixture(scope="function")
+def mock_google_search(monkeypatch):
+    import googlesearch
+
+    def result(*args, **kwargs):
+        yield googlesearch.SearchResult(
+            url="https://www.cinnamon.is/en/",
+            title="Cinnamon AI",
+            description="Cinnamon AI is an enterprise AI company.",
+        )
+
+    monkeypatch.setattr(googlesearch, "search", result)
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -135,7 +135,7 @@ def llm():
    "openai.api_resources.chat_completion.ChatCompletion.create",
    side_effect=_openai_chat_completion_responses_rewoo,
 )
-def test_rewoo_agent(openai_completion, llm):
+def test_rewoo_agent(openai_completion, llm, mock_google_search):
    plugins = [
        GoogleSearchTool(),
        WikipediaTool(),
@@ -153,7 +153,7 @@ def test_rewoo_agent(openai_completion, llm):
    "openai.api_resources.chat_completion.ChatCompletion.create",
    side_effect=_openai_chat_completion_responses_react,
 )
-def test_react_agent(openai_completion, llm):
+def test_react_agent(openai_completion, llm, mock_google_search):
    plugins = [
        GoogleSearchTool(),
        WikipediaTool(),
@@ -170,7 +170,7 @@ def test_react_agent(openai_completion, llm):
    "openai.api_resources.chat_completion.ChatCompletion.create",
    side_effect=_openai_chat_completion_responses_react,
 )
-def test_react_agent_langchain(openai_completion, llm):
+def test_react_agent_langchain(openai_completion, llm, mock_google_search):
    from langchain.agents import AgentType, initialize_agent

    plugins = [
--- a/tests/test_qa.py
+++ b/tests/test_qa.py
@@ -0,0 +1,67 @@
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from openai.api_resources.embedding import Embedding
+
+from kotaemon.llms.chats.openai import AzureChatOpenAI
+from kotaemon.pipelines.ingest import ReaderIndexingPipeline
+
+with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
+    openai_embedding = json.load(f)
+
+
+_openai_chat_completion_response = {
+    "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
+    "object": "chat.completion",
+    "created": 1692338378,
+    "model": "gpt-35-turbo",
+    "choices": [
+        {
+            "index": 0,
+            "finish_reason": "stop",
+            "message": {
+                "role": "assistant",
+                "content": "Hello! How can I assist you today?",
+            },
+        }
+    ],
+    "usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
+}
+
+
+@pytest.fixture(scope="function")
+def mock_openai_embedding(monkeypatch):
+    monkeypatch.setattr(Embedding, "create", lambda *args, **kwargs: openai_embedding)
+
+
+@patch(
+    "openai.api_resources.chat_completion.ChatCompletion.create",
+    side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
+)
+def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
+    indexing_pipeline = ReaderIndexingPipeline(
+        storage=tmp_path, openai_api_key="some-key"
+    )
+    input_file_path = Path(__file__).parent / "resources/dummy.pdf"
+
+    # call ingestion pipeline
+    indexing_pipeline(input_file_path, force_reindex=True)
+    retrieving_pipeline = indexing_pipeline.to_retrieving_pipeline()
+
+    results = retrieving_pipeline("This is a query")
+    assert len(results) == 1
+
+    # create llm
+    llm = AzureChatOpenAI(
+        openai_api_base="https://test.openai.azure.com/",
+        openai_api_key="some-key",
+        openai_api_version="2023-03-15-preview",
+        deployment_name="gpt35turbo",
+        temperature=0,
+        request_timeout=60,
+    )
+    qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
+    response = qa_pipeline("Summarize this document.")
+    assert response
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -21,7 +21,7 @@ def mock_openai_embedding(monkeypatch):
    monkeypatch.setattr(Embedding, "create", lambda *args, **kwargs: openai_embedding)


-def test_google_tool():
+def test_google_tool(mock_google_search):
    tool = GoogleSearchTool()
    assert tool.name
    assert tool.description