fix: re-enable tests and fix legacy test interface (#208)

* fix: re-enable tests and fix legacy test interface

* fix: skip llamacpp based on installed status

* fix: minor fix
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin) 2024-09-04 12:37:39 +07:00 committed by GitHub
parent 92f6b8e1bf
commit 76f2652d2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 177 additions and 120 deletions

View File

@ -11,9 +11,9 @@ env:
jobs:
unit-test:
if: false # temporary disable this job due to legacy interface
# if: false # temporary disable this job due to legacy interface
#TODO: enable this job after the new interface is ready
# if: ${{ !cancelled() }}
if: ${{ !cancelled() }}
runs-on: ${{ matrix.os }}
timeout-minutes: 20
defaults:

View File

@ -3,6 +3,7 @@ from typing import Type
from decouple import config
from llama_index.core.readers.base import BaseReader
from llama_index.readers.file import PDFReader
from theflow.settings import settings as flowsettings
from kotaemon.base import BaseComponent, Document, Param
@ -91,7 +92,7 @@ class DocumentIngestor(BaseComponent):
file_extractors[ext] = cls()
if self.pdf_mode == "normal":
pass # use default loader of llama-index which is pypdf
file_extractors[".pdf"] = PDFReader()
elif self.pdf_mode == "ocr":
file_extractors[".pdf"] = OCRReader()
elif self.pdf_mode == "multimodal":

View File

@ -24,6 +24,62 @@ def if_haystack_not_installed():
return False
def if_sentence_bert_not_installed():
try:
import sentence_transformers # noqa: F401
except ImportError:
return True
else:
return False
def if_sentence_fastembed_not_installed():
try:
import fastembed # noqa: F401
except ImportError:
return True
else:
return False
def if_unstructured_not_installed():
try:
import unstructured # noqa: F401
except ImportError:
return True
else:
return False
def if_llama_cpp_not_installed():
try:
import llama_cpp # noqa: F401
except ImportError:
return True
else:
return False
skip_when_haystack_not_installed = pytest.mark.skipif(
if_haystack_not_installed(), reason="Haystack is not installed"
)
skip_when_sentence_bert_not_installed = pytest.mark.skipif(
if_sentence_bert_not_installed(), reason="SBert is not installed"
)
skip_when_fastembed_not_installed = pytest.mark.skipif(
if_sentence_fastembed_not_installed(), reason="fastembed is not installed"
)
skip_when_unstructured_not_installed = pytest.mark.skipif(
if_unstructured_not_installed(), reason="unstructured is not installed"
)
skip_openai_lc_wrapper_test = pytest.mark.skipif(
True, reason="OpenAI LC wrapper test is skipped"
)
skip_llama_cpp_not_installed = pytest.mark.skipif(
if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
)

View File

@ -13,7 +13,9 @@ from kotaemon.agents import (
RewooAgent,
WikipediaTool,
)
from kotaemon.llms import LCAzureChatOpenAI
from kotaemon.llms import AzureChatOpenAI
from .conftest import skip_openai_lc_wrapper_test
FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!"
REWOO_VALID_PLAN = (
@ -112,12 +114,11 @@ _openai_chat_completion_responses_react_langchain_tool = [
@pytest.fixture
def llm():
return LCAzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/",
openai_api_key="dummy",
openai_api_version="2023-03-15-preview",
deployment_name="dummy-q2",
temperature=0,
return AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
)
@ -175,6 +176,7 @@ def test_react_agent(openai_completion, llm, mock_google_search):
assert response.text == FINAL_RESPONSE_TEXT
@skip_openai_lc_wrapper_test
@patch(
"openai.resources.chat.completions.Completions.create",
side_effect=_openai_chat_completion_responses_react,
@ -199,6 +201,7 @@ def test_react_agent_langchain(openai_completion, llm, mock_google_search):
assert response
@skip_openai_lc_wrapper_test
@patch(
"openai.resources.chat.completions.Completions.create",
side_effect=_openai_chat_completion_responses_react,

View File

@ -4,10 +4,10 @@ import pytest
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import (
AzureChatOpenAI,
BasePromptComponent,
GatedBranchingPipeline,
GatedLinearPipeline,
LCAzureChatOpenAI,
SimpleBranchingPipeline,
SimpleLinearPipeline,
)
@ -40,12 +40,11 @@ _openai_chat_completion_response = ChatCompletion.parse_obj(
@pytest.fixture
def mock_llm():
return LCAzureChatOpenAI(
azure_endpoint="OPENAI_API_BASE",
openai_api_key="OPENAI_API_KEY",
openai_api_version="OPENAI_API_VERSION",
deployment_name="dummy-q2-gpt35",
temperature=0,
return AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
)

View File

@ -2,7 +2,7 @@ from unittest.mock import patch
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import LCAzureChatOpenAI
from kotaemon.llms import AzureChatOpenAI
from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought
_openai_chat_completion_response = [
@ -38,12 +38,11 @@ _openai_chat_completion_response = [
side_effect=_openai_chat_completion_response,
)
def test_cot_plus_operator(openai_completion):
llm = LCAzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/",
openai_api_key="dummy",
openai_api_version="2023-03-15-preview",
deployment_name="dummy-q2",
temperature=0,
llm = AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
)
thought1 = Thought(
prompt="Word {word} in {language} is ",
@ -70,12 +69,11 @@ def test_cot_plus_operator(openai_completion):
side_effect=_openai_chat_completion_response,
)
def test_cot_manual(openai_completion):
llm = LCAzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/",
openai_api_key="dummy",
openai_api_version="2023-03-15-preview",
deployment_name="dummy-q2",
temperature=0,
llm = AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
)
thought1 = Thought(
prompt="Word {word} in {language} is ",
@ -100,12 +98,11 @@ def test_cot_manual(openai_completion):
side_effect=_openai_chat_completion_response,
)
def test_cot_with_termination_callback(openai_completion):
llm = LCAzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/",
openai_api_key="dummy",
openai_api_version="2023-03-15-preview",
deployment_name="dummy-q2",
temperature=0,
llm = AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
)
thought1 = Thought(
prompt="Word {word} in {language} is ",

View File

@ -8,12 +8,16 @@ from kotaemon.base import Document
from kotaemon.embeddings import (
AzureOpenAIEmbeddings,
FastEmbedEmbeddings,
LCAzureOpenAIEmbeddings,
LCCohereEmbeddings,
LCHuggingFaceEmbeddings,
OpenAIEmbeddings,
)
from .conftest import (
skip_when_fastembed_not_installed,
skip_when_sentence_bert_not_installed,
)
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
openai_embedding_batch = CreateEmbeddingResponse.model_validate(json.load(f))
@ -32,12 +36,12 @@ def assert_embedding_result(output):
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_lcazureopenai_embeddings_raw(openai_embedding_call):
model = LCAzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
def test_azureopenai_embeddings_raw(openai_embedding_call):
model = AzureOpenAIEmbeddings(
azure_deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
api_key="some-key",
api_version="version",
)
output = model("Hello world")
assert_embedding_result(output)
@ -49,29 +53,13 @@ def test_lcazureopenai_embeddings_raw(openai_embedding_call):
side_effect=lambda *args, **kwargs: openai_embedding_batch,
)
def test_lcazureopenai_embeddings_batch_raw(openai_embedding_call):
model = LCAzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
)
output = model(["Hello world", "Goodbye world"])
assert_embedding_result(output)
openai_embedding_call.assert_called()
@patch(
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_azureopenai_embeddings_raw(openai_embedding_call):
model = AzureOpenAIEmbeddings(
azure_deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/",
api_key="some-key",
api_version="version",
azure_deployment="text-embedding-ada-002",
)
output = model("Hello world")
output = model(["Hello world", "Goodbye world"])
assert_embedding_result(output)
openai_embedding_call.assert_called()
@ -82,10 +70,10 @@ def test_azureopenai_embeddings_raw(openai_embedding_call):
)
def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
model = AzureOpenAIEmbeddings(
azure_deployment="text-embedding-ada-002",
azure_endpoint="https://test.openai.azure.com/",
api_key="some-key",
api_version="version",
azure_deployment="text-embedding-ada-002",
)
output = model(["Hello world", "Goodbye world"])
assert_embedding_result(output)
@ -120,6 +108,7 @@ def test_openai_embeddings_batch_raw(openai_embedding_call):
openai_embedding_call.assert_called()
@skip_when_sentence_bert_not_installed
@patch(
"sentence_transformers.SentenceTransformer",
side_effect=lambda *args, **kwargs: None,
@ -149,7 +138,9 @@ def test_lchuggingface_embeddings(
)
def test_lccohere_embeddings(langchain_cohere_embedding_call):
model = LCCohereEmbeddings(
model="embed-english-light-v2.0", cohere_api_key="my-api-key"
model="embed-english-light-v2.0",
cohere_api_key="my-api-key",
user_agent="test",
)
output = model("Hello World")
@ -157,6 +148,7 @@ def test_lccohere_embeddings(langchain_cohere_embedding_call):
langchain_cohere_embedding_call.assert_called()
@skip_when_fastembed_not_installed
def test_fastembed_embeddings():
model = FastEmbedEmbeddings()
output = model("Hello World")

View File

@ -1,32 +1,31 @@
import json
from pathlib import Path
from typing import cast
from unittest.mock import patch
import pytest
from openai.resources.embeddings import Embeddings
from openai.types.create_embedding_response import CreateEmbeddingResponse
from kotaemon.base import Document
from kotaemon.embeddings import LCAzureOpenAIEmbeddings
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f)
openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))
@pytest.fixture(scope="function")
def mock_openai_embedding(monkeypatch):
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
def test_indexing(mock_openai_embedding, tmp_path):
@patch(
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_indexing(tmp_path):
db = ChromaVectorStore(path=str(tmp_path))
doc_store = InMemoryDocumentStore()
embedding = LCAzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
embedding = AzureOpenAIEmbeddings(
azure_deployment="text-embedding-ada-002",
azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
api_key="some-key",
api_version="version",
)
pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store)
@ -39,14 +38,18 @@ def test_indexing(mock_openai_embedding, tmp_path):
assert len(pipeline.doc_store._store) == 1, "Expected 1 document"
def test_retrieving(mock_openai_embedding, tmp_path):
@patch(
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_retrieving(tmp_path):
db = ChromaVectorStore(path=str(tmp_path))
doc_store = InMemoryDocumentStore()
embedding = LCAzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
embedding = AzureOpenAIEmbeddings(
azure_deployment="text-embedding-ada-002",
azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
api_key="some-key",
api_version="version",
)
index_pipeline = VectorIndexing(

View File

@ -4,15 +4,17 @@ from unittest.mock import patch
import pytest
from kotaemon.base.schema import AIMessage, HumanMessage, LLMInterface, SystemMessage
from kotaemon.llms import LCAzureChatOpenAI, LlamaCppChat
from kotaemon.llms import AzureChatOpenAI, LlamaCppChat
try:
from langchain_openai import AzureChatOpenAI as AzureChatOpenAILC
pass
except ImportError:
from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
pass
from openai.types.chat.chat_completion import ChatCompletion
from .conftest import skip_llama_cpp_not_installed
_openai_chat_completion_response = ChatCompletion.parse_obj(
{
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
@ -43,17 +45,12 @@ _openai_chat_completion_response = ChatCompletion.parse_obj(
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
)
def test_azureopenai_model(openai_completion):
model = LCAzureChatOpenAI(
model = AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
openai_api_version="2023-03-15-preview",
deployment_name="gpt35turbo",
temperature=0,
)
assert isinstance(
model.to_langchain_format(), AzureChatOpenAILC
), "Agent not wrapped in Langchain's AzureChatOpenAI"
# test for str input - stream mode
output = model("hello world")
assert isinstance(
@ -76,6 +73,7 @@ def test_azureopenai_model(openai_completion):
openai_completion.assert_called()
@skip_llama_cpp_not_installed
def test_llamacpp_chat():
from llama_cpp import Llama

View File

@ -13,6 +13,8 @@ except ImportError:
from openai.types.completion import Completion
from .conftest import skip_llama_cpp_not_installed, skip_openai_lc_wrapper_test
_openai_completion_response = Completion.parse_obj(
{
"id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
@ -33,6 +35,7 @@ _openai_completion_response = Completion.parse_obj(
)
@skip_openai_lc_wrapper_test
@patch(
"openai.resources.completions.Completions.create",
side_effect=lambda *args, **kwargs: _openai_completion_response,
@ -79,6 +82,7 @@ def test_openai_model(openai_completion):
), "Output for single text is not LLMInterface"
@skip_llama_cpp_not_installed
def test_llamacpp_model():
weight_path = Path(__file__).parent / "resources" / "ggml-vocab-llama.gguf"

View File

@ -14,6 +14,8 @@ from kotaemon.loaders import (
UnstructuredReader,
)
from .conftest import skip_when_unstructured_not_installed
def test_docx_reader():
reader = DocxReader()
@ -52,6 +54,7 @@ def test_pdf_reader():
assert len(nodes) > 0
@skip_when_unstructured_not_installed
def test_unstructured_pdf_reader():
reader = UnstructuredReader()
dirpath = Path(__file__).parent

View File

@ -5,7 +5,7 @@ from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.base import Document
from kotaemon.indices.rankings import LLMReranking
from kotaemon.llms import LCAzureChatOpenAI
from kotaemon.llms import AzureChatOpenAI
_openai_chat_completion_responses = [
ChatCompletion.parse_obj(
@ -41,11 +41,11 @@ _openai_chat_completion_responses = [
@pytest.fixture
def llm():
return LCAzureChatOpenAI(
azure_endpoint="https://dummy.openai.azure.com/",
openai_api_key="dummy",
openai_api_version="2023-03-15-preview",
temperature=0,
return AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
)

View File

@ -5,6 +5,8 @@ import pytest
from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader
from .conftest import skip_when_unstructured_not_installed
input_file = Path(__file__).parent / "resources" / "table.pdf"
input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx"
@ -26,6 +28,7 @@ def mathpix_output():
return content
@skip_when_unstructured_not_installed
def test_ocr_reader(fullocr_output):
reader = OCRReader()
documents = reader.load_data(input_file, response_content=fullocr_output)

View File

@ -1,22 +1,17 @@
import json
from pathlib import Path
from unittest.mock import patch
import pytest
from openai.resources.embeddings import Embeddings
from openai.types.create_embedding_response import CreateEmbeddingResponse
from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool
from kotaemon.base import Document
from kotaemon.embeddings import LCAzureOpenAIEmbeddings
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f)
@pytest.fixture(scope="function")
def mock_openai_embedding(monkeypatch):
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))
def test_google_tool(mock_google_search):
@ -35,14 +30,18 @@ def test_wikipedia_tool():
assert output
def test_pipeline_tool(mock_openai_embedding, tmp_path):
@patch(
"openai.resources.embeddings.Embeddings.create",
side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_pipeline_tool(tmp_path):
db = ChromaVectorStore(path=str(tmp_path))
doc_store = InMemoryDocumentStore()
embedding = LCAzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
embedding = AzureOpenAIEmbeddings(
azure_deployment="embedding-deployment",
azure_endpoint="https://test.openai.azure.com/",
openai_api_key="some-key",
api_key="some-key",
api_version="version",
)
index_pipeline = VectorIndexing(

View File

@ -7,7 +7,7 @@ from index import ReaderIndexingPipeline
from openai.resources.embeddings import Embeddings
from openai.types.chat.chat_completion import ChatCompletion
from kotaemon.llms import LCAzureChatOpenAI
from kotaemon.llms import AzureChatOpenAI
with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
openai_embedding = json.load(f)
@ -61,12 +61,11 @@ def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
assert len(results) == 1
# create llm
llm = LCAzureChatOpenAI(
openai_api_base="https://test.openai.azure.com/",
openai_api_key="some-key",
openai_api_version="2023-03-15-preview",
deployment_name="gpt35turbo",
temperature=0,
llm = AzureChatOpenAI(
api_key="dummy",
api_version="2024-05-01-preview",
azure_deployment="gpt-4o",
azure_endpoint="https://test.openai.azure.com/",
)
qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
response = qa_pipeline("Summarize this document.")