From c33bedca9e8b62418ac2404f320749476a16fca7 Mon Sep 17 00:00:00 2001 From: Amin Date: Tue, 15 Apr 2025 03:11:22 -0500 Subject: [PATCH] feat: add options for Mistral AI (#707) #none * add Mistral AI emb AI embedding vendor, types * add mistral env setting to example * add mistral LLM option * chore: fix default embedding back to normal * fix: comfort CI --------- Co-authored-by: Tadashi --- .env.example | 3 ++ flowsettings.py | 17 +++++++++ libs/kotaemon/kotaemon/embeddings/__init__.py | 2 + .../kotaemon/embeddings/langchain_based.py | 37 +++++++++++++++++++ libs/kotaemon/pyproject.toml | 1 + libs/ktem/ktem/embeddings/manager.py | 2 + 6 files changed, 62 insertions(+) diff --git a/.env.example b/.env.example index e6a84d9..cb9b3dd 100644 --- a/.env.example +++ b/.env.example @@ -16,6 +16,9 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002 # settings for Cohere COHERE_API_KEY= +# settings for Mistral +# MISTRAL_API_KEY=placeholder + # settings for local models LOCAL_MODEL=qwen2.5:7b LOCAL_MODEL_EMBEDDINGS=nomic-embed-text diff --git a/flowsettings.py b/flowsettings.py index 350d16d..14f2fc3 100644 --- a/flowsettings.py +++ b/flowsettings.py @@ -243,6 +243,15 @@ KH_LLMS["cohere"] = { }, "default": False, } +KH_LLMS["mistral"] = { + "spec": { + "__type__": "kotaemon.llms.ChatOpenAI", + "base_url": "https://api.mistral.ai/v1", + "model": "ministral-8b-latest", + "api_key": config("MISTRAL_API_KEY", default="your-key"), + }, + "default": False, +} # additional embeddings configurations KH_EMBEDDINGS["cohere"] = { @@ -262,6 +271,14 @@ KH_EMBEDDINGS["google"] = { }, "default": not IS_OPENAI_DEFAULT, } +KH_EMBEDDINGS["mistral"] = { + "spec": { + "__type__": "kotaemon.embeddings.LCMistralEmbeddings", + "model": "mistral-embed", + "api_key": config("MISTRAL_API_KEY", default="your-key"), + }, + "default": False, +} # KH_EMBEDDINGS["huggingface"] = { # "spec": { # "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings", diff --git a/libs/kotaemon/kotaemon/embeddings/__init__.py b/libs/kotaemon/kotaemon/embeddings/__init__.py index 0ff7774..3c3cb2f 100644 --- a/libs/kotaemon/kotaemon/embeddings/__init__.py +++ b/libs/kotaemon/kotaemon/embeddings/__init__.py @@ -6,6 +6,7 @@ from .langchain_based import ( LCCohereEmbeddings, LCGoogleEmbeddings, LCHuggingFaceEmbeddings, + LCMistralEmbeddings, LCOpenAIEmbeddings, ) from .openai import AzureOpenAIEmbeddings, OpenAIEmbeddings @@ -20,6 +21,7 @@ __all__ = [ "LCCohereEmbeddings", "LCHuggingFaceEmbeddings", "LCGoogleEmbeddings", + "LCMistralEmbeddings", "OpenAIEmbeddings", "AzureOpenAIEmbeddings", "FastEmbedEmbeddings", diff --git a/libs/kotaemon/kotaemon/embeddings/langchain_based.py b/libs/kotaemon/kotaemon/embeddings/langchain_based.py index 9e8422a..2a77b28 100644 --- a/libs/kotaemon/kotaemon/embeddings/langchain_based.py +++ b/libs/kotaemon/kotaemon/embeddings/langchain_based.py @@ -254,3 +254,40 @@ class LCGoogleEmbeddings(LCEmbeddingMixin, BaseEmbeddings): raise ImportError("Please install langchain-google-genai") return GoogleGenerativeAIEmbeddings + + +class LCMistralEmbeddings(LCEmbeddingMixin, BaseEmbeddings): + """Wrapper around LangChain's MistralAI embedding, focusing on key parameters""" + + api_key: str = Param( + help="API key (https://console.mistral.ai/api-keys)", + default=None, + required=True, + ) + model: str = Param( + help="Model name to use ('mistral-embed')", + default="mistral-embed", + required=True, + ) + + def __init__( + self, + model: str = "mistral-embed", + api_key: Optional[str] = None, + **params, + ): + super().__init__( + model=model, + api_key=api_key, + **params, + ) + + def _get_lc_class(self): + try: + from langchain_mistralai import MistralAIEmbeddings + except ImportError: + raise ImportError( + "Please install langchain_mistralai: " + "`pip install -U langchain_mistralai`" + ) + return MistralAIEmbeddings diff --git a/libs/kotaemon/pyproject.toml b/libs/kotaemon/pyproject.toml index 59e5ee5..f0ad6d7 100644 --- a/libs/kotaemon/pyproject.toml +++ b/libs/kotaemon/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "langchain-google-genai>=1.0.3,<2.0.0", "langchain-anthropic", "langchain-ollama", + "langchain-mistralai", "langchain-cohere>=0.2.4,<0.3.0", "llama-hub>=0.0.79,<0.1.0", "llama-index>=0.10.40,<0.11.0", diff --git a/libs/ktem/ktem/embeddings/manager.py b/libs/ktem/ktem/embeddings/manager.py index 1c1c470..45873e0 100644 --- a/libs/ktem/ktem/embeddings/manager.py +++ b/libs/ktem/ktem/embeddings/manager.py @@ -59,6 +59,7 @@ class EmbeddingManager: LCCohereEmbeddings, LCGoogleEmbeddings, LCHuggingFaceEmbeddings, + LCMistralEmbeddings, OpenAIEmbeddings, TeiEndpointEmbeddings, ) @@ -70,6 +71,7 @@ class EmbeddingManager: LCCohereEmbeddings, LCHuggingFaceEmbeddings, LCGoogleEmbeddings, + LCMistralEmbeddings, TeiEndpointEmbeddings, ]