Add Huggingface embeddings and Cohere embeddings (#63)
* Add huggingface embeddings and cohere embeddings * Update openai interface and the mock for newer OpenAI SDK --------- Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
parent
9035e25666
commit
6095526dc7
12
knowledgehub/embeddings/cohere.py
Normal file
12
knowledgehub/embeddings/cohere.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
from langchain.embeddings import CohereEmbeddings as LCCohereEmbeddings
|
||||||
|
|
||||||
|
from kotaemon.embeddings.base import LangchainEmbeddings
|
||||||
|
|
||||||
|
|
||||||
|
class CohereEmbdeddings(LangchainEmbeddings):
|
||||||
|
"""Cohere embeddings.
|
||||||
|
|
||||||
|
This class wraps around the Langchain CohereEmbeddings class.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_lc_class = LCCohereEmbeddings
|
12
knowledgehub/embeddings/huggingface.py
Normal file
12
knowledgehub/embeddings/huggingface.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
from langchain.embeddings import HuggingFaceBgeEmbeddings as LCHuggingFaceEmbeddings
|
||||||
|
|
||||||
|
from kotaemon.embeddings.base import LangchainEmbeddings
|
||||||
|
|
||||||
|
|
||||||
|
class HuggingFaceEmbeddings(LangchainEmbeddings):
|
||||||
|
"""HuggingFace embeddings
|
||||||
|
|
||||||
|
This class wraps around the Langchain HuggingFaceEmbeddings class
|
||||||
|
"""
|
||||||
|
|
||||||
|
_lc_class = LCHuggingFaceEmbeddings
|
|
@ -8,7 +8,7 @@ from ..base import BaseComponent
|
||||||
from ..documents.base import Document
|
from ..documents.base import Document
|
||||||
|
|
||||||
|
|
||||||
class AutoReader(BaseComponent, BaseReader):
|
class AutoReader(BaseComponent):
|
||||||
"""General auto reader for a variety of files. (based on llama-hub)"""
|
"""General auto reader for a variety of files. (based on llama-hub)"""
|
||||||
|
|
||||||
def __init__(self, reader_type: Union[str, Type[BaseReader]]) -> None:
|
def __init__(self, reader_type: Union[str, Type[BaseReader]]) -> None:
|
||||||
|
@ -31,7 +31,7 @@ class AutoReader(BaseComponent, BaseReader):
|
||||||
return self.load_data(file=file, **kwargs)
|
return self.load_data(file=file, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class LIBaseReader(BaseComponent, BaseReader):
|
class LIBaseReader(BaseComponent):
|
||||||
_reader_class: Type[BaseReader]
|
_reader_class: Type[BaseReader]
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
|
@ -75,8 +75,8 @@ class Planner(BaseComponent):
|
||||||
try:
|
try:
|
||||||
response = self.model(prompt)
|
response = self.model(prompt)
|
||||||
output.info("Planner run successful.")
|
output.info("Planner run successful.")
|
||||||
except ValueError:
|
except ValueError as e:
|
||||||
output.error("Planner failed to retrieve response from LLM")
|
output.error("Planner failed to retrieve response from LLM")
|
||||||
raise ValueError("Planner failed to retrieve response from LLM")
|
raise ValueError("Planner failed to retrieve response from LLM") from e
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -59,6 +59,9 @@ setuptools.setup(
|
||||||
"pytest-mock",
|
"pytest-mock",
|
||||||
"unstructured[pdf]",
|
"unstructured[pdf]",
|
||||||
"farm-haystack==1.19.0",
|
"farm-haystack==1.19.0",
|
||||||
|
"sentence_transformers",
|
||||||
|
"cohere",
|
||||||
|
"pypdf",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
entry_points={"console_scripts": ["kh=kotaemon.cli:main"]},
|
entry_points={"console_scripts": ["kh=kotaemon.cli:main"]},
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||||
from kotaemon.pipelines.agents.react import ReactAgent
|
from kotaemon.pipelines.agents.react import ReactAgent
|
||||||
|
@ -14,24 +15,30 @@ from kotaemon.pipelines.tools import (
|
||||||
|
|
||||||
FINAL_RESPONSE_TEXT = "Hello Cinnamon AI!"
|
FINAL_RESPONSE_TEXT = "Hello Cinnamon AI!"
|
||||||
|
|
||||||
|
|
||||||
_openai_chat_completion_responses_rewoo = [
|
_openai_chat_completion_responses_rewoo = [
|
||||||
{
|
ChatCompletion.parse_obj(
|
||||||
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
{
|
||||||
"object": "chat.completion",
|
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
||||||
"created": 1692338378,
|
"object": "chat.completion",
|
||||||
"model": "gpt-35-turbo",
|
"created": 1692338378,
|
||||||
"choices": [
|
"model": "gpt-35-turbo",
|
||||||
{
|
"system_fingerprint": None,
|
||||||
"index": 0,
|
"choices": [
|
||||||
"finish_reason": "stop",
|
{
|
||||||
"message": {
|
"index": 0,
|
||||||
"role": "assistant",
|
"finish_reason": "stop",
|
||||||
"content": text,
|
"message": {
|
||||||
},
|
"role": "assistant",
|
||||||
}
|
"content": text,
|
||||||
],
|
"function_call": None,
|
||||||
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
"tool_calls": None,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
||||||
|
}
|
||||||
|
)
|
||||||
for text in [
|
for text in [
|
||||||
(
|
(
|
||||||
"#Plan1: Search for Cinnamon AI company on Google\n"
|
"#Plan1: Search for Cinnamon AI company on Google\n"
|
||||||
|
@ -44,23 +51,28 @@ _openai_chat_completion_responses_rewoo = [
|
||||||
]
|
]
|
||||||
|
|
||||||
_openai_chat_completion_responses_react = [
|
_openai_chat_completion_responses_react = [
|
||||||
{
|
ChatCompletion.parse_obj(
|
||||||
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
{
|
||||||
"object": "chat.completion",
|
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
||||||
"created": 1692338378,
|
"object": "chat.completion",
|
||||||
"model": "gpt-35-turbo",
|
"created": 1692338378,
|
||||||
"choices": [
|
"model": "gpt-35-turbo",
|
||||||
{
|
"system_fingerprint": None,
|
||||||
"index": 0,
|
"choices": [
|
||||||
"finish_reason": "stop",
|
{
|
||||||
"message": {
|
"index": 0,
|
||||||
"role": "assistant",
|
"finish_reason": "stop",
|
||||||
"content": text,
|
"message": {
|
||||||
},
|
"role": "assistant",
|
||||||
}
|
"content": text,
|
||||||
],
|
"function_call": None,
|
||||||
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
"tool_calls": None,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
||||||
|
}
|
||||||
|
)
|
||||||
for text in [
|
for text in [
|
||||||
(
|
(
|
||||||
"I don't have prior knowledge about Cinnamon AI company, "
|
"I don't have prior knowledge about Cinnamon AI company, "
|
||||||
|
@ -82,23 +94,28 @@ _openai_chat_completion_responses_react = [
|
||||||
]
|
]
|
||||||
|
|
||||||
_openai_chat_completion_responses_react_langchain_tool = [
|
_openai_chat_completion_responses_react_langchain_tool = [
|
||||||
{
|
ChatCompletion.parse_obj(
|
||||||
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
{
|
||||||
"object": "chat.completion",
|
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
||||||
"created": 1692338378,
|
"object": "chat.completion",
|
||||||
"model": "gpt-35-turbo",
|
"created": 1692338378,
|
||||||
"choices": [
|
"model": "gpt-35-turbo",
|
||||||
{
|
"system_fingerprint": None,
|
||||||
"index": 0,
|
"choices": [
|
||||||
"finish_reason": "stop",
|
{
|
||||||
"message": {
|
"index": 0,
|
||||||
"role": "assistant",
|
"finish_reason": "stop",
|
||||||
"content": text,
|
"message": {
|
||||||
},
|
"role": "assistant",
|
||||||
}
|
"content": text,
|
||||||
],
|
"function_call": None,
|
||||||
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
"tool_calls": None,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
||||||
|
}
|
||||||
|
)
|
||||||
for text in [
|
for text in [
|
||||||
(
|
(
|
||||||
"I don't have prior knowledge about Cinnamon AI company, "
|
"I don't have prior knowledge about Cinnamon AI company, "
|
||||||
|
@ -123,7 +140,7 @@ _openai_chat_completion_responses_react_langchain_tool = [
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def llm():
|
def llm():
|
||||||
return AzureChatOpenAI(
|
return AzureChatOpenAI(
|
||||||
openai_api_base="https://dummy.openai.azure.com/",
|
azure_endpoint="https://dummy.openai.azure.com/",
|
||||||
openai_api_key="dummy",
|
openai_api_key="dummy",
|
||||||
openai_api_version="2023-03-15-preview",
|
openai_api_version="2023-03-15-preview",
|
||||||
deployment_name="dummy-q2",
|
deployment_name="dummy-q2",
|
||||||
|
@ -132,7 +149,7 @@ def llm():
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_responses_rewoo,
|
side_effect=_openai_chat_completion_responses_rewoo,
|
||||||
)
|
)
|
||||||
def test_rewoo_agent(openai_completion, llm, mock_google_search):
|
def test_rewoo_agent(openai_completion, llm, mock_google_search):
|
||||||
|
@ -150,7 +167,7 @@ def test_rewoo_agent(openai_completion, llm, mock_google_search):
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_responses_react,
|
side_effect=_openai_chat_completion_responses_react,
|
||||||
)
|
)
|
||||||
def test_react_agent(openai_completion, llm, mock_google_search):
|
def test_react_agent(openai_completion, llm, mock_google_search):
|
||||||
|
@ -167,7 +184,7 @@ def test_react_agent(openai_completion, llm, mock_google_search):
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_responses_react,
|
side_effect=_openai_chat_completion_responses_react,
|
||||||
)
|
)
|
||||||
def test_react_agent_langchain(openai_completion, llm, mock_google_search):
|
def test_react_agent_langchain(openai_completion, llm, mock_google_search):
|
||||||
|
@ -191,7 +208,7 @@ def test_react_agent_langchain(openai_completion, llm, mock_google_search):
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_responses_react_langchain_tool,
|
side_effect=_openai_chat_completion_responses_react_langchain_tool,
|
||||||
)
|
)
|
||||||
def test_react_agent_with_langchain_tools(openai_completion, llm):
|
def test_react_agent_with_langchain_tools(openai_completion, llm):
|
||||||
|
|
|
@ -1,32 +1,39 @@
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||||
from kotaemon.pipelines.cot import ManualSequentialChainOfThought, Thought
|
from kotaemon.pipelines.cot import ManualSequentialChainOfThought, Thought
|
||||||
|
|
||||||
_openai_chat_completion_response = [
|
_openai_chat_completion_response = [
|
||||||
{
|
ChatCompletion.parse_obj(
|
||||||
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
{
|
||||||
"object": "chat.completion",
|
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
||||||
"created": 1692338378,
|
"object": "chat.completion",
|
||||||
"model": "gpt-35-turbo",
|
"created": 1692338378,
|
||||||
"choices": [
|
"model": "gpt-35-turbo",
|
||||||
{
|
"system_fingerprint": None,
|
||||||
"index": 0,
|
"choices": [
|
||||||
"finish_reason": "stop",
|
{
|
||||||
"message": {
|
"index": 0,
|
||||||
"role": "assistant",
|
"finish_reason": "stop",
|
||||||
"content": text,
|
"message": {
|
||||||
},
|
"role": "assistant",
|
||||||
}
|
"content": text,
|
||||||
],
|
"function_call": None,
|
||||||
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
"tool_calls": None,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
||||||
|
}
|
||||||
|
)
|
||||||
for text in ["Bonjour", "こんにちは (Konnichiwa)"]
|
for text in ["Bonjour", "こんにちは (Konnichiwa)"]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_response,
|
side_effect=_openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_cot_plus_operator(openai_completion):
|
def test_cot_plus_operator(openai_completion):
|
||||||
|
@ -58,7 +65,7 @@ def test_cot_plus_operator(openai_completion):
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_response,
|
side_effect=_openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_cot_manual(openai_completion):
|
def test_cot_manual(openai_completion):
|
||||||
|
@ -88,7 +95,7 @@ def test_cot_manual(openai_completion):
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=_openai_chat_completion_response,
|
side_effect=_openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_cot_with_termination_callback(openai_completion):
|
def test_cot_with_termination_callback(openai_completion):
|
||||||
|
|
|
@ -2,6 +2,8 @@ import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from kotaemon.embeddings.cohere import CohereEmbdeddings
|
||||||
|
from kotaemon.embeddings.huggingface import HuggingFaceEmbeddings
|
||||||
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
from kotaemon.embeddings.openai import AzureOpenAIEmbeddings
|
||||||
|
|
||||||
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
|
with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
|
||||||
|
@ -12,7 +14,7 @@ with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.embedding.Embedding.create",
|
"openai.resources.embeddings.Embeddings.create",
|
||||||
side_effect=lambda *args, **kwargs: openai_embedding,
|
side_effect=lambda *args, **kwargs: openai_embedding,
|
||||||
)
|
)
|
||||||
def test_azureopenai_embeddings_raw(openai_embedding_call):
|
def test_azureopenai_embeddings_raw(openai_embedding_call):
|
||||||
|
@ -29,7 +31,7 @@ def test_azureopenai_embeddings_raw(openai_embedding_call):
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.embedding.Embedding.create",
|
"openai.resources.embeddings.Embeddings.create",
|
||||||
side_effect=lambda *args, **kwargs: openai_embedding_batch,
|
side_effect=lambda *args, **kwargs: openai_embedding_batch,
|
||||||
)
|
)
|
||||||
def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
|
def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
|
||||||
|
@ -44,3 +46,42 @@ def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
|
||||||
assert isinstance(output[0], list)
|
assert isinstance(output[0], list)
|
||||||
assert isinstance(output[0][0], float)
|
assert isinstance(output[0][0], float)
|
||||||
openai_embedding_call.assert_called()
|
openai_embedding_call.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"sentence_transformers.SentenceTransformer",
|
||||||
|
side_effect=lambda *args, **kwargs: None,
|
||||||
|
)
|
||||||
|
@patch(
|
||||||
|
"langchain.embeddings.huggingface.HuggingFaceBgeEmbeddings.embed_query",
|
||||||
|
side_effect=lambda *args, **kwargs: [1.0, 2.1, 3.2],
|
||||||
|
)
|
||||||
|
def test_huggingface_embddings(
|
||||||
|
langchain_huggingface_embedding_call, sentence_transformers_init
|
||||||
|
):
|
||||||
|
model = HuggingFaceEmbeddings(
|
||||||
|
model_name="intfloat/multilingual-e5-large",
|
||||||
|
model_kwargs={"device": "cpu"},
|
||||||
|
encode_kwargs={"normalize_embeddings": False},
|
||||||
|
)
|
||||||
|
|
||||||
|
output = model("Hello World")
|
||||||
|
assert isinstance(output, list)
|
||||||
|
assert isinstance(output[0], float)
|
||||||
|
sentence_transformers_init.assert_called()
|
||||||
|
langchain_huggingface_embedding_call.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch(
|
||||||
|
"langchain.embeddings.cohere.CohereEmbeddings.embed_query",
|
||||||
|
side_effect=lambda *args, **kwargs: [1.0, 2.1, 3.2],
|
||||||
|
)
|
||||||
|
def test_cohere_embddings(langchain_cohere_embedding_call):
|
||||||
|
model = CohereEmbdeddings(
|
||||||
|
model="embed-english-light-v2.0", cohere_api_key="my-api-key"
|
||||||
|
)
|
||||||
|
|
||||||
|
output = model("Hello World")
|
||||||
|
assert isinstance(output, list)
|
||||||
|
assert isinstance(output[0], float)
|
||||||
|
langchain_cohere_embedding_call.assert_called()
|
||||||
|
|
|
@ -3,7 +3,7 @@ from pathlib import Path
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from openai.api_resources.embedding import Embedding
|
from openai.resources.embeddings import Embeddings
|
||||||
|
|
||||||
from kotaemon.docstores import InMemoryDocumentStore
|
from kotaemon.docstores import InMemoryDocumentStore
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.documents.base import Document
|
||||||
|
@ -18,7 +18,7 @@ with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def mock_openai_embedding(monkeypatch):
|
def mock_openai_embedding(monkeypatch):
|
||||||
monkeypatch.setattr(Embedding, "create", lambda *args, **kwargs: openai_embedding)
|
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
|
||||||
|
|
||||||
|
|
||||||
def test_indexing(mock_openai_embedding, tmp_path):
|
def test_indexing(mock_openai_embedding, tmp_path):
|
||||||
|
|
|
@ -2,31 +2,37 @@ from unittest.mock import patch
|
||||||
|
|
||||||
from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
|
from langchain.chat_models import AzureChatOpenAI as AzureChatOpenAILC
|
||||||
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
|
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
|
||||||
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms.base import LLMInterface
|
from kotaemon.llms.base import LLMInterface
|
||||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||||
|
|
||||||
_openai_chat_completion_response = {
|
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||||
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
{
|
||||||
"object": "chat.completion",
|
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
||||||
"created": 1692338378,
|
"object": "chat.completion",
|
||||||
"model": "gpt-35-turbo",
|
"created": 1692338378,
|
||||||
"choices": [
|
"model": "gpt-35-turbo",
|
||||||
{
|
"system_fingerprint": None,
|
||||||
"index": 0,
|
"choices": [
|
||||||
"finish_reason": "stop",
|
{
|
||||||
"message": {
|
"index": 0,
|
||||||
"role": "assistant",
|
"finish_reason": "stop",
|
||||||
"content": "Hello! How can I assist you today?",
|
"message": {
|
||||||
},
|
"role": "assistant",
|
||||||
}
|
"content": "Hello! How can I assist you today?",
|
||||||
],
|
"function_call": None,
|
||||||
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
"tool_calls": None,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
|
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_azureopenai_model(openai_completion):
|
def test_azureopenai_model(openai_completion):
|
||||||
|
@ -36,7 +42,6 @@ def test_azureopenai_model(openai_completion):
|
||||||
openai_api_version="2023-03-15-preview",
|
openai_api_version="2023-03-15-preview",
|
||||||
deployment_name="gpt35turbo",
|
deployment_name="gpt35turbo",
|
||||||
temperature=0,
|
temperature=0,
|
||||||
request_timeout=60,
|
|
||||||
)
|
)
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
model.agent, AzureChatOpenAILC
|
model.agent, AzureChatOpenAILC
|
||||||
|
|
|
@ -2,24 +2,33 @@ from unittest.mock import patch
|
||||||
|
|
||||||
from langchain.llms import AzureOpenAI as AzureOpenAILC
|
from langchain.llms import AzureOpenAI as AzureOpenAILC
|
||||||
from langchain.llms import OpenAI as OpenAILC
|
from langchain.llms import OpenAI as OpenAILC
|
||||||
|
from openai.types.completion import Completion
|
||||||
|
|
||||||
from kotaemon.llms.base import LLMInterface
|
from kotaemon.llms.base import LLMInterface
|
||||||
from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
|
from kotaemon.llms.completions.openai import AzureOpenAI, OpenAI
|
||||||
|
|
||||||
_openai_completion_response = {
|
_openai_completion_response = Completion.parse_obj(
|
||||||
"id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
|
{
|
||||||
"object": "sample text_completion",
|
"id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
|
||||||
"created": 1392751226,
|
"object": "text_completion",
|
||||||
"model": "gpt-35-turbo",
|
"created": 1392751226,
|
||||||
"choices": [
|
"model": "gpt-35-turbo",
|
||||||
{"text": "completion", "index": 0, "finish_reason": "length", "logprobs": None}
|
"system_fingerprint": None,
|
||||||
],
|
"choices": [
|
||||||
"usage": {"completion_tokens": 20, "prompt_tokens": 2, "total_tokens": 22},
|
{
|
||||||
}
|
"text": "completion",
|
||||||
|
"index": 0,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"logprobs": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"completion_tokens": 20, "prompt_tokens": 2, "total_tokens": 22},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.completion.Completion.create",
|
"openai.resources.completions.Completions.create",
|
||||||
side_effect=lambda *args, **kwargs: _openai_completion_response,
|
side_effect=lambda *args, **kwargs: _openai_completion_response,
|
||||||
)
|
)
|
||||||
def test_azureopenai_model(openai_completion):
|
def test_azureopenai_model(openai_completion):
|
||||||
|
@ -47,7 +56,7 @@ def test_azureopenai_model(openai_completion):
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.completion.Completion.create",
|
"openai.resources.completions.Completions.create",
|
||||||
side_effect=lambda *args, **kwargs: _openai_completion_response,
|
side_effect=lambda *args, **kwargs: _openai_completion_response,
|
||||||
)
|
)
|
||||||
def test_openai_model(openai_completion):
|
def test_openai_model(openai_completion):
|
||||||
|
|
|
@ -3,7 +3,8 @@ from pathlib import Path
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from openai.api_resources.embedding import Embedding
|
from openai.resources.embeddings import Embeddings
|
||||||
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
from kotaemon.llms.chats.openai import AzureChatOpenAI
|
||||||
from kotaemon.pipelines.ingest import ReaderIndexingPipeline
|
from kotaemon.pipelines.ingest import ReaderIndexingPipeline
|
||||||
|
@ -12,32 +13,37 @@ with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||||
openai_embedding = json.load(f)
|
openai_embedding = json.load(f)
|
||||||
|
|
||||||
|
|
||||||
_openai_chat_completion_response = {
|
_openai_chat_completion_response = ChatCompletion.parse_obj(
|
||||||
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
{
|
||||||
"object": "chat.completion",
|
"id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
|
||||||
"created": 1692338378,
|
"object": "chat.completion",
|
||||||
"model": "gpt-35-turbo",
|
"created": 1692338378,
|
||||||
"choices": [
|
"model": "gpt-35-turbo",
|
||||||
{
|
"system_fingerprint": None,
|
||||||
"index": 0,
|
"choices": [
|
||||||
"finish_reason": "stop",
|
{
|
||||||
"message": {
|
"index": 0,
|
||||||
"role": "assistant",
|
"finish_reason": "stop",
|
||||||
"content": "Hello! How can I assist you today?",
|
"message": {
|
||||||
},
|
"role": "assistant",
|
||||||
}
|
"content": "Hello! How can I assist you today?",
|
||||||
],
|
"function_call": None,
|
||||||
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
"tool_calls": None,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def mock_openai_embedding(monkeypatch):
|
def mock_openai_embedding(monkeypatch):
|
||||||
monkeypatch.setattr(Embedding, "create", lambda *args, **kwargs: openai_embedding)
|
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
|
||||||
|
|
||||||
|
|
||||||
@patch(
|
@patch(
|
||||||
"openai.api_resources.chat_completion.ChatCompletion.create",
|
"openai.resources.chat.completions.Completions.create",
|
||||||
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
|
side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
|
||||||
)
|
)
|
||||||
def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
|
def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
|
||||||
|
@ -61,7 +67,6 @@ def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
|
||||||
openai_api_version="2023-03-15-preview",
|
openai_api_version="2023-03-15-preview",
|
||||||
deployment_name="gpt35turbo",
|
deployment_name="gpt35turbo",
|
||||||
temperature=0,
|
temperature=0,
|
||||||
request_timeout=60,
|
|
||||||
)
|
)
|
||||||
qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
|
qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
|
||||||
response = qa_pipeline("Summarize this document.")
|
response = qa_pipeline("Summarize this document.")
|
||||||
|
|
|
@ -2,7 +2,7 @@ import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from openai.api_resources.embedding import Embedding
|
from openai.resources.embeddings import Embeddings
|
||||||
|
|
||||||
from kotaemon.docstores import InMemoryDocumentStore
|
from kotaemon.docstores import InMemoryDocumentStore
|
||||||
from kotaemon.documents.base import Document
|
from kotaemon.documents.base import Document
|
||||||
|
@ -18,7 +18,7 @@ with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def mock_openai_embedding(monkeypatch):
|
def mock_openai_embedding(monkeypatch):
|
||||||
monkeypatch.setattr(Embedding, "create", lambda *args, **kwargs: openai_embedding)
|
monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)
|
||||||
|
|
||||||
|
|
||||||
def test_google_tool(mock_google_search):
|
def test_google_tool(mock_google_search):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user