fix: re-enable tests and fix legacy test interface (#208)

* fix: re-enable tests and fix legacy test interface

* fix: skip llamacpp based on installed status

* fix: minor fix
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin) 2024-09-04 12:37:39 +07:00 committed by GitHub
parent 92f6b8e1bf
commit 76f2652d2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 177 additions and 120 deletions

View File

@ -11,9 +11,9 @@ env:
jobs: jobs:
unit-test: unit-test:
if: false # temporary disable this job due to legacy interface # if: false # temporary disable this job due to legacy interface
#TODO: enable this job after the new interface is ready #TODO: enable this job after the new interface is ready
# if: ${{ !cancelled() }} if: ${{ !cancelled() }}
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
timeout-minutes: 20 timeout-minutes: 20
defaults: defaults:

View File

@ -3,6 +3,7 @@ from typing import Type
from decouple import config from decouple import config
from llama_index.core.readers.base import BaseReader from llama_index.core.readers.base import BaseReader
from llama_index.readers.file import PDFReader
from theflow.settings import settings as flowsettings from theflow.settings import settings as flowsettings
from kotaemon.base import BaseComponent, Document, Param from kotaemon.base import BaseComponent, Document, Param
@ -91,7 +92,7 @@ class DocumentIngestor(BaseComponent):
file_extractors[ext] = cls() file_extractors[ext] = cls()
if self.pdf_mode == "normal": if self.pdf_mode == "normal":
pass # use default loader of llama-index which is pypdf file_extractors[".pdf"] = PDFReader()
elif self.pdf_mode == "ocr": elif self.pdf_mode == "ocr":
file_extractors[".pdf"] = OCRReader() file_extractors[".pdf"] = OCRReader()
elif self.pdf_mode == "multimodal": elif self.pdf_mode == "multimodal":

View File

@ -24,6 +24,62 @@ def if_haystack_not_installed():
return False return False
def if_sentence_bert_not_installed():
try:
import sentence_transformers # noqa: F401
except ImportError:
return True
else:
return False
def if_sentence_fastembed_not_installed():
try:
import fastembed # noqa: F401
except ImportError:
return True
else:
return False
def if_unstructured_not_installed():
try:
import unstructured # noqa: F401
except ImportError:
return True
else:
return False
def if_llama_cpp_not_installed():
try:
import llama_cpp # noqa: F401
except ImportError:
return True
else:
return False
skip_when_haystack_not_installed = pytest.mark.skipif( skip_when_haystack_not_installed = pytest.mark.skipif(
if_haystack_not_installed(), reason="Haystack is not installed" if_haystack_not_installed(), reason="Haystack is not installed"
) )
skip_when_sentence_bert_not_installed = pytest.mark.skipif(
if_sentence_bert_not_installed(), reason="SBert is not installed"
)
skip_when_fastembed_not_installed = pytest.mark.skipif(
if_sentence_fastembed_not_installed(), reason="fastembed is not installed"
)
skip_when_unstructured_not_installed = pytest.mark.skipif(
if_unstructured_not_installed(), reason="unstructured is not installed"
)
skip_openai_lc_wrapper_test = pytest.mark.skipif(
True, reason="OpenAI LC wrapper test is skipped"
)
skip_llama_cpp_not_installed = pytest.mark.skipif(
if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
)

View File

@ -13,7 +13,9 @@ from kotaemon.agents import (
RewooAgent, RewooAgent,
WikipediaTool, WikipediaTool,
) )
from kotaemon.llms import LCAzureChatOpenAI from kotaemon.llms import AzureChatOpenAI
from .conftest import skip_openai_lc_wrapper_test
FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!" FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!"
REWOO_VALID_PLAN = ( REWOO_VALID_PLAN = (
@ -112,12 +114,11 @@ _openai_chat_completion_responses_react_langchain_tool = [
@pytest.fixture @pytest.fixture
def llm(): def llm():
return LCAzureChatOpenAI( return AzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/", api_key="dummy",
openai_api_key="dummy", api_version="2024-05-01-preview",
openai_api_version="2023-03-15-preview", azure_deployment="gpt-4o",
deployment_name="dummy-q2", azure_endpoint="https://test.openai.azure.com/",
temperature=0,
) )
@ -175,6 +176,7 @@ def test_react_agent(openai_completion, llm, mock_google_search):
assert response.text == FINAL_RESPONSE_TEXT assert response.text == FINAL_RESPONSE_TEXT
@skip_openai_lc_wrapper_test
@patch( @patch(
"openai.resources.chat.completions.Completions.create", "openai.resources.chat.completions.Completions.create",
side_effect=_openai_chat_completion_responses_react, side_effect=_openai_chat_completion_responses_react,
@ -199,6 +201,7 @@ def test_react_agent_langchain(openai_completion, llm, mock_google_search):
assert response assert response
@skip_openai_lc_wrapper_test
@patch( @patch(
"openai.resources.chat.completions.Completions.create", "openai.resources.chat.completions.Completions.create",
side_effect=_openai_chat_completion_responses_react, side_effect=_openai_chat_completion_responses_react,

View File

@ -4,10 +4,10 @@ import pytest
from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import ( from kotaemon.llms import (
AzureChatOpenAI,
BasePromptComponent, BasePromptComponent,
GatedBranchingPipeline, GatedBranchingPipeline,
GatedLinearPipeline, GatedLinearPipeline,
LCAzureChatOpenAI,
SimpleBranchingPipeline, SimpleBranchingPipeline,
SimpleLinearPipeline, SimpleLinearPipeline,
) )
@ -40,12 +40,11 @@ _openai_chat_completion_response = ChatCompletion.parse_obj(
@pytest.fixture @pytest.fixture
def mock_llm(): def mock_llm():
return LCAzureChatOpenAI( return AzureChatOpenAI(
azure_endpoint="OPENAI_API_BASE", api_key="dummy",
openai_api_key="OPENAI_API_KEY", api_version="2024-05-01-preview",
openai_api_version="OPENAI_API_VERSION", azure_deployment="gpt-4o",
deployment_name="dummy-q2-gpt35", azure_endpoint="https://test.openai.azure.com/",
temperature=0,
) )

View File

@ -2,7 +2,7 @@ from unittest.mock import patch
from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import LCAzureChatOpenAI from kotaemon.llms import AzureChatOpenAI
from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought
_openai_chat_completion_response = [ _openai_chat_completion_response = [
@ -38,12 +38,11 @@ _openai_chat_completion_response = [
side_effect=_openai_chat_completion_response, side_effect=_openai_chat_completion_response,
) )
def test_cot_plus_operator(openai_completion): def test_cot_plus_operator(openai_completion):
llm = LCAzureChatOpenAI( llm = AzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/", api_key="dummy",
openai_api_key="dummy", api_version="2024-05-01-preview",
openai_api_version="2023-03-15-preview", azure_deployment="gpt-4o",
deployment_name="dummy-q2", azure_endpoint="https://test.openai.azure.com/",
temperature=0,
) )
thought1 = Thought( thought1 = Thought(
prompt="Word {word} in {language} is ", prompt="Word {word} in {language} is ",
@ -70,12 +69,11 @@ def test_cot_plus_operator(openai_completion):
side_effect=_openai_chat_completion_response, side_effect=_openai_chat_completion_response,
) )
def test_cot_manual(openai_completion): def test_cot_manual(openai_completion):
llm = LCAzureChatOpenAI( llm = AzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/", api_key="dummy",
openai_api_key="dummy", api_version="2024-05-01-preview",
openai_api_version="2023-03-15-preview", azure_deployment="gpt-4o",
deployment_name="dummy-q2", azure_endpoint="https://test.openai.azure.com/",
temperature=0,
) )
thought1 = Thought( thought1 = Thought(
prompt="Word {word} in {language} is ", prompt="Word {word} in {language} is ",
@ -100,12 +98,11 @@ def test_cot_manual(openai_completion):
side_effect=_openai_chat_completion_response, side_effect=_openai_chat_completion_response,
) )
def test_cot_with_termination_callback(openai_completion): def test_cot_with_termination_callback(openai_completion):
llm = LCAzureChatOpenAI( llm = AzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/", api_key="dummy",
openai_api_key="dummy", api_version="2024-05-01-preview",
openai_api_version="2023-03-15-preview", azure_deployment="gpt-4o",
deployment_name="dummy-q2", azure_endpoint="https://test.openai.azure.com/",
temperature=0,
) )
thought1 = Thought( thought1 = Thought(
prompt="Word {word} in {language} is ", prompt="Word {word} in {language} is ",

View File

@ -8,12 +8,16 @@ from kotaemon.base import Document
from kotaemon.embeddings import ( from kotaemon.embeddings import (
AzureOpenAIEmbeddings, AzureOpenAIEmbeddings,
FastEmbedEmbeddings, FastEmbedEmbeddings,
LCAzureOpenAIEmbeddings,
LCCohereEmbeddings, LCCohereEmbeddings,
LCHuggingFaceEmbeddings, LCHuggingFaceEmbeddings,
OpenAIEmbeddings, OpenAIEmbeddings,
) )
from .conftest import (
skip_when_fastembed_not_installed,
skip_when_sentence_bert_not_installed,
)
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
openai_embedding_batch = CreateEmbeddingResponse.model_validate(json.load(f)) openai_embedding_batch = CreateEmbeddingResponse.model_validate(json.load(f))
@ -32,12 +36,12 @@ def assert_embedding_result(output):
"openai.resources.embeddings.Embeddings.create", "openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding, side_effect=lambda *args, **kwargs: openai_embedding,
) )
def test_lcazureopenai_embeddings_raw(openai_embedding_call): def test_azureopenai_embeddings_raw(openai_embedding_call):
model = LCAzureOpenAIEmbeddings( model = AzureOpenAIEmbeddings(
model="text-embedding-ada-002", azure_deployment="embedding-deployment",
deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/", azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key", api_key="some-key",
api_version="version",
) )
output = model("Hello world") output = model("Hello world")
assert_embedding_result(output) assert_embedding_result(output)
@ -49,29 +53,13 @@ def test_lcazureopenai_embeddings_raw(openai_embedding_call):
side_effect=lambda *args, **kwargs: openai_embedding_batch, side_effect=lambda *args, **kwargs: openai_embedding_batch,
) )
def test_lcazureopenai_embeddings_batch_raw(openai_embedding_call): def test_lcazureopenai_embeddings_batch_raw(openai_embedding_call):
model = LCAzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
)
output = model(["Hello world", "Goodbye world"])
assert_embedding_result(output)
openai_embedding_call.assert_called()
@patch(
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_azureopenai_embeddings_raw(openai_embedding_call):
model = AzureOpenAIEmbeddings( model = AzureOpenAIEmbeddings(
azure_deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/", azure_endpoint="https://test.openai.azure.com/",
api_key="some-key", api_key="some-key",
api_version="version", api_version="version",
azure_deployment="text-embedding-ada-002",
) )
output = model("Hello world") output = model(["Hello world", "Goodbye world"])
assert_embedding_result(output) assert_embedding_result(output)
openai_embedding_call.assert_called() openai_embedding_call.assert_called()
@ -82,10 +70,10 @@ def test_azureopenai_embeddings_raw(openai_embedding_call):
) )
def test_azureopenai_embeddings_batch_raw(openai_embedding_call): def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
model = AzureOpenAIEmbeddings( model = AzureOpenAIEmbeddings(
azure_deployment="text-embedding-ada-002",
azure_endpoint="https://test.openai.azure.com/", azure_endpoint="https://test.openai.azure.com/",
api_key="some-key", api_key="some-key",
api_version="version", api_version="version",
azure_deployment="text-embedding-ada-002",
) )
output = model(["Hello world", "Goodbye world"]) output = model(["Hello world", "Goodbye world"])
assert_embedding_result(output) assert_embedding_result(output)
@ -120,6 +108,7 @@ def test_openai_embeddings_batch_raw(openai_embedding_call):
openai_embedding_call.assert_called() openai_embedding_call.assert_called()
@skip_when_sentence_bert_not_installed
@patch( @patch(
"sentence_transformers.SentenceTransformer", "sentence_transformers.SentenceTransformer",
side_effect=lambda *args, **kwargs: None, side_effect=lambda *args, **kwargs: None,
@ -149,7 +138,9 @@ def test_lchuggingface_embeddings(
) )
def test_lccohere_embeddings(langchain_cohere_embedding_call): def test_lccohere_embeddings(langchain_cohere_embedding_call):
model = LCCohereEmbeddings( model = LCCohereEmbeddings(
model="embed-english-light-v2.0", cohere_api_key="my-api-key" model="embed-english-light-v2.0",
cohere_api_key="my-api-key",
user_agent="test",
) )
output = model("Hello World") output = model("Hello World")
@ -157,6 +148,7 @@ def test_lccohere_embeddings(langchain_cohere_embedding_call):
langchain_cohere_embedding_call.assert_called() langchain_cohere_embedding_call.assert_called()
@skip_when_fastembed_not_installed
def test_fastembed_embeddings(): def test_fastembed_embeddings():
model = FastEmbedEmbeddings() model = FastEmbedEmbeddings()
output = model("Hello World") output = model("Hello World")

View File

@ -1,32 +1,31 @@
import json import json
from pathlib import Path from pathlib import Path
from typing import cast from typing import cast
from unittest.mock import patch
import pytest from openai.types.create_embedding_response import CreateEmbeddingResponse
from openai.resources.embeddings import Embeddings
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.embeddings import LCAzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))
@pytest.fixture(scope="function") @patch(
def mock_openai_embedding(monkeypatch): "openai.resources.embeddings.Embeddings.create",
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding) side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_indexing(tmp_path):
def test_indexing(mock_openai_embedding, tmp_path):
db = ChromaVectorStore(path=str(tmp_path)) db = ChromaVectorStore(path=str(tmp_path))
doc_store = InMemoryDocumentStore() doc_store = InMemoryDocumentStore()
embedding = LCAzureOpenAIEmbeddings( embedding = AzureOpenAIEmbeddings(
model="text-embedding-ada-002", azure_deployment="text-embedding-ada-002",
deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/", azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key", api_key="some-key",
api_version="version",
) )
pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store) pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store)
@ -39,14 +38,18 @@ def test_indexing(mock_openai_embedding, tmp_path):
assert len(pipeline.doc_store._store) == 1, "Expected 1 document" assert len(pipeline.doc_store._store) == 1, "Expected 1 document"
def test_retrieving(mock_openai_embedding, tmp_path): @patch(
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_retrieving(tmp_path):
db = ChromaVectorStore(path=str(tmp_path)) db = ChromaVectorStore(path=str(tmp_path))
doc_store = InMemoryDocumentStore() doc_store = InMemoryDocumentStore()
embedding = LCAzureOpenAIEmbeddings( embedding = AzureOpenAIEmbeddings(
model="text-embedding-ada-002", azure_deployment="text-embedding-ada-002",
deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/", azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key", api_key="some-key",
api_version="version",
) )
index_pipeline = VectorIndexing( index_pipeline = VectorIndexing(

View File

@ -4,15 +4,17 @@ from unittest.mock import patch
import pytest import pytest
from kotaemon.base.schema import AIMessage, HumanMessage, LLMInterface, SystemMessage from kotaemon.base.schema import AIMessage, HumanMessage, LLMInterface, SystemMessage
from kotaemon.llms import LCAzureChatOpenAI, LlamaCppChat from kotaemon.llms import AzureChatOpenAI, LlamaCppChat
try: try:
from langchain_openai import AzureChatOpenAI as AzureChatOpenAILC pass
except ImportError: except ImportError:
from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC pass
from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion import ChatCompletion
from .conftest import skip_llama_cpp_not_installed
_openai_chat_completion_response = ChatCompletion.parse_obj( _openai_chat_completion_response = ChatCompletion.parse_obj(
{ {
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x", "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
@ -43,17 +45,12 @@ _openai_chat_completion_response = ChatCompletion.parse_obj(
side_effect=lambda *args, **kwargs: _openai_chat_completion_response, side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
) )
def test_azureopenai_model(openai_completion): def test_azureopenai_model(openai_completion):
model = LCAzureChatOpenAI( model = AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/", azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
openai_api_version="2023-03-15-preview",
deployment_name="gpt35turbo",
temperature=0,
) )
assert isinstance(
model.to_langchain_format(), AzureChatOpenAILC
), "Agent not wrapped in Langchain's AzureChatOpenAI"
# test for str input - stream mode # test for str input - stream mode
output = model("hello world") output = model("hello world")
assert isinstance( assert isinstance(
@ -76,6 +73,7 @@ def test_azureopenai_model(openai_completion):
openai_completion.assert_called() openai_completion.assert_called()
@skip_llama_cpp_not_installed
def test_llamacpp_chat(): def test_llamacpp_chat():
from llama_cpp import Llama from llama_cpp import Llama

View File

@ -13,6 +13,8 @@ except ImportError:
from openai.types.completion import Completion from openai.types.completion import Completion
from .conftest import skip_llama_cpp_not_installed, skip_openai_lc_wrapper_test
_openai_completion_response = Completion.parse_obj( _openai_completion_response = Completion.parse_obj(
{ {
"id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0", "id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
@ -33,6 +35,7 @@ _openai_completion_response = Completion.parse_obj(
) )
@skip_openai_lc_wrapper_test
@patch( @patch(
"openai.resources.completions.Completions.create", "openai.resources.completions.Completions.create",
side_effect=lambda *args, **kwargs: _openai_completion_response, side_effect=lambda *args, **kwargs: _openai_completion_response,
@ -79,6 +82,7 @@ def test_openai_model(openai_completion):
), "Output for single text is not LLMInterface" ), "Output for single text is not LLMInterface"
@skip_llama_cpp_not_installed
def test_llamacpp_model(): def test_llamacpp_model():
weight_path = Path(__file__).parent / "resources" / "ggml-vocab-llama.gguf" weight_path = Path(__file__).parent / "resources" / "ggml-vocab-llama.gguf"

View File

@ -14,6 +14,8 @@ from kotaemon.loaders import (
UnstructuredReader, UnstructuredReader,
) )
from .conftest import skip_when_unstructured_not_installed
def test_docx_reader(): def test_docx_reader():
reader = DocxReader() reader = DocxReader()
@ -52,6 +54,7 @@ def test_pdf_reader():
assert len(nodes) > 0 assert len(nodes) > 0
@skip_when_unstructured_not_installed
def test_unstructured_pdf_reader(): def test_unstructured_pdf_reader():
reader = UnstructuredReader() reader = UnstructuredReader()
dirpath = Path(__file__).parent dirpath = Path(__file__).parent

View File

@ -5,7 +5,7 @@ from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.indices.rankings import LLMReranking from kotaemon.indices.rankings import LLMReranking
from kotaemon.llms import LCAzureChatOpenAI from kotaemon.llms import AzureChatOpenAI
_openai_chat_completion_responses = [ _openai_chat_completion_responses = [
ChatCompletion.parse_obj( ChatCompletion.parse_obj(
@ -41,11 +41,11 @@ _openai_chat_completion_responses = [
@pytest.fixture @pytest.fixture
def llm(): def llm():
return LCAzureChatOpenAI( return AzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/", api_key="dummy",
openai_api_key="dummy", api_version="2024-05-01-preview",
openai_api_version="2023-03-15-preview", azure_deployment="gpt-4o",
temperature=0, azure_endpoint="https://test.openai.azure.com/",
) )

View File

@ -5,6 +5,8 @@ import pytest
from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader
from .conftest import skip_when_unstructured_not_installed
input_file = Path(__file__).parent / "resources" / "table.pdf" input_file = Path(__file__).parent / "resources" / "table.pdf"
input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx" input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx"
@ -26,6 +28,7 @@ def mathpix_output():
return content return content
@skip_when_unstructured_not_installed
def test_ocr_reader(fullocr_output): def test_ocr_reader(fullocr_output):
reader = OCRReader() reader = OCRReader()
documents = reader.load_data(input_file, response_content=fullocr_output) documents = reader.load_data(input_file, response_content=fullocr_output)

View File

@ -1,22 +1,17 @@
import json import json
from pathlib import Path from pathlib import Path
from unittest.mock import patch
import pytest from openai.types.create_embedding_response import CreateEmbeddingResponse
from openai.resources.embeddings import Embeddings
from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.embeddings import LCAzureOpenAIEmbeddings from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))
@pytest.fixture(scope="function")
def mock_openai_embedding(monkeypatch):
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
def test_google_tool(mock_google_search): def test_google_tool(mock_google_search):
@ -35,14 +30,18 @@ def test_wikipedia_tool():
assert output assert output
def test_pipeline_tool(mock_openai_embedding, tmp_path): @patch(
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_pipeline_tool(tmp_path):
db = ChromaVectorStore(path=str(tmp_path)) db = ChromaVectorStore(path=str(tmp_path))
doc_store = InMemoryDocumentStore() doc_store = InMemoryDocumentStore()
embedding = LCAzureOpenAIEmbeddings( embedding = AzureOpenAIEmbeddings(
model="text-embedding-ada-002", azure_deployment="embedding-deployment",
deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/", azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key", api_key="some-key",
api_version="version",
) )
index_pipeline = VectorIndexing( index_pipeline = VectorIndexing(

View File

@ -7,7 +7,7 @@ from index import ReaderIndexingPipeline
from openai.resources.embeddings import Embeddings from openai.resources.embeddings import Embeddings
from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import LCAzureChatOpenAI from kotaemon.llms import AzureChatOpenAI
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f: with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f) openai_embedding = json.load(f)
@ -61,12 +61,11 @@ def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
assert len(results) == 1 assert len(results) == 1
# create llm # create llm
llm = LCAzureChatOpenAI( llm = AzureChatOpenAI(
openai_api_base="https://test.openai.azure.com/", api_key="dummy",
openai_api_key="some-key", api_version="2024-05-01-preview",
openai_api_version="2023-03-15-preview", azure_deployment="gpt-4o",
deployment_name="gpt35turbo", azure_endpoint="https://test.openai.azure.com/",
temperature=0,
) )
qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key") qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
response = qa_pipeline("Summarize this document.") response = qa_pipeline("Summarize this document.")