feat: add structured output to openai (#603) #none

* add structured output to openai * remove notebook, modify prepare output method * fix: comfort precommit --------- Co-authored-by: Tadashi <tadashi@cinnamon.is>
2025-04-15 01:54:23 -06:00
parent 6f4acc979c
commit 9b05693e4f
5 changed files with 103 additions and 3 deletions
--- a/libs/kotaemon/kotaemon/base/init.py
+++ b/libs/kotaemon/kotaemon/base/init.py
@@ -8,6 +8,7 @@ from .schema import (
    HumanMessage,
    LLMInterface,
    RetrievedDocument,
+    StructuredOutputLLMInterface,
    SystemMessage,
 )

@@ -21,6 +22,7 @@ __all__ = [
    "HumanMessage",
    "RetrievedDocument",
    "LLMInterface",
+    "StructuredOutputLLMInterface",
    "ExtractorOutput",
    "Param",
    "Node",
--- a/libs/kotaemon/kotaemon/base/schema.py
+++ b/libs/kotaemon/kotaemon/base/schema.py
@@ -143,6 +143,11 @@ class LLMInterface(AIMessage):
    logprobs: list[float] = []


+class StructuredOutputLLMInterface(LLMInterface):
+    parsed: Any
+    refusal: str = ""
+
+
 class ExtractorOutput(Document):
    """
    Represents the output of an extractor.
--- a/libs/kotaemon/kotaemon/llms/init.py
+++ b/libs/kotaemon/kotaemon/llms/init.py
@@ -14,6 +14,7 @@ from .chats import (
    LCGeminiChat,
    LCOllamaChat,
    LlamaCppChat,
+    StructuredOutputChatOpenAI,
 )
 from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
 from .cot import ManualSequentialChainOfThought, Thought
@@ -31,6 +32,7 @@ __all__ = [
    "SystemMessage",
    "AzureChatOpenAI",
    "ChatOpenAI",
+    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
--- a/libs/kotaemon/kotaemon/llms/chats/init.py
+++ b/libs/kotaemon/kotaemon/llms/chats/init.py
@@ -10,7 +10,7 @@ from .langchain_based import (
    LCOllamaChat,
 )
 from .llamacpp import LlamaCppChat
-from .openai import AzureChatOpenAI, ChatOpenAI
+from .openai import AzureChatOpenAI, ChatOpenAI, StructuredOutputChatOpenAI

 __all__ = [
    "ChatOpenAI",
@@ -18,6 +18,7 @@ __all__ = [
    "ChatLLM",
    "EndpointChatLLM",
    "ChatOpenAI",
+    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
--- a/libs/kotaemon/kotaemon/llms/chats/openai.py
+++ b/libs/kotaemon/kotaemon/llms/chats/openai.py
@@ -1,8 +1,16 @@
-from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional
+from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional, Type

+from pydantic import BaseModel
 from theflow.utils.modules import import_dotted_string

-from kotaemon.base import AIMessage, BaseMessage, HumanMessage, LLMInterface, Param
+from kotaemon.base import (
+    AIMessage,
+    BaseMessage,
+    HumanMessage,
+    LLMInterface,
+    Param,
+    StructuredOutputLLMInterface,
+)

 from .base import ChatLLM

@@ -330,6 +338,88 @@ class ChatOpenAI(BaseChatOpenAI):
        return await client.chat.completions.create(**params)


+class StructuredOutputChatOpenAI(ChatOpenAI):
+    """OpenAI chat model that returns structured output"""
+
+    response_schema: Type[BaseModel] = Param(
+        help="class that subclasses pydantics BaseModel", required=True
+    )
+
+    def prepare_output(self, resp: dict) -> StructuredOutputLLMInterface:
+        """Convert the OpenAI response into StructuredOutputLLMInterface"""
+        additional_kwargs = {}
+
+        if "tool_calls" in resp["choices"][0]["message"]:
+            additional_kwargs["tool_calls"] = resp["choices"][0]["message"][
+                "tool_calls"
+            ]
+
+        if resp["choices"][0].get("logprobs") is None:
+            logprobs = []
+        else:
+            all_logprobs = resp["choices"][0]["logprobs"].get("content")
+            logprobs = (
+                [logprob["logprob"] for logprob in all_logprobs] if all_logprobs else []
+            )
+
+        output = StructuredOutputLLMInterface(
+            parsed=resp["choices"][0]["message"]["parsed"],
+            candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
+            content=resp["choices"][0]["message"]["content"] or "",
+            total_tokens=resp["usage"]["total_tokens"],
+            prompt_tokens=resp["usage"]["prompt_tokens"],
+            completion_tokens=resp["usage"]["completion_tokens"],
+            messages=[
+                AIMessage(content=(_["message"]["content"]) or "")
+                for _ in resp["choices"]
+            ],
+            additional_kwargs=additional_kwargs,
+            logprobs=logprobs,
+        )
+
+        return output
+
+    def prepare_params(self, **kwargs):
+        if "tools_pydantic" in kwargs:
+            kwargs.pop("tools_pydantic")
+
+        params_ = {
+            "model": self.model,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "n": self.n,
+            "stop": self.stop,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.presence_penalty,
+            "tool_choice": self.tool_choice,
+            "tools": self.tools,
+            "logprobs": self.logprobs,
+            "logit_bias": self.logit_bias,
+            "top_logprobs": self.top_logprobs,
+            "top_p": self.top_p,
+            "response_format": self.response_schema,
+        }
+        params = {k: v for k, v in params_.items() if v is not None}
+        params.update(kwargs)
+
+        # doesn't do streaming
+        params.pop("stream")
+
+        return params
+
+    def openai_response(self, client, **kwargs):
+        """Get the openai response"""
+        params = self.prepare_params(**kwargs)
+
+        return client.beta.chat.completions.parse(**params)
+
+    async def aopenai_response(self, client, **kwargs):
+        """Get the openai response"""
+        params = self.prepare_params(**kwargs)
+
+        return await client.beta.chat.completions.parse(**params)
+
+
 class AzureChatOpenAI(BaseChatOpenAI):
    """OpenAI chat model provided by Microsoft Azure"""