kotaemon/libs/kotaemon/kotaemon/llms/chats/endpoint_based.py
Duc Nguyen (john) a203fc0f7c
Allow users to add LLM within the UI (#6)
* Rename AzureChatOpenAI to LCAzureChatOpenAI
* Provide vanilla ChatOpenAI and AzureChatOpenAI
* Remove the highest accuracy, lowest cost criteria

These criteria are unnecessary. The users, not pipeline creators, should choose
which LLM to use. Furthermore, it's cumbersome to input this information,
really degrades user experience.

* Remove the LLM selection in simple reasoning pipeline
* Provide a dedicated stream method to generate the output
* Return placeholder message to chat if the text is empty
2024-04-06 11:53:17 +07:00

89 lines
2.5 KiB
Python

import requests
from kotaemon.base import (
AIMessage,
BaseMessage,
HumanMessage,
LLMInterface,
Param,
SystemMessage,
)
from .base import ChatLLM
class EndpointChatLLM(ChatLLM):
"""
A ChatLLM that uses an endpoint to generate responses. This expects an OpenAI API
compatible endpoint.
Attributes:
endpoint_url (str): The url of a OpenAI API compatible endpoint.
"""
endpoint_url: str = Param(
help="URL of the OpenAI API compatible endpoint", required=True
)
def run(
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
) -> LLMInterface:
"""
Generate response from messages
Args:
messages (str | BaseMessage | list[BaseMessage]): history of messages to
generate response from
**kwargs: additional arguments to pass to the OpenAI API
Returns:
LLMInterface: generated response
"""
if isinstance(messages, str):
input_ = [HumanMessage(content=messages)]
elif isinstance(messages, BaseMessage):
input_ = [messages]
else:
input_ = messages
def decide_role(message: BaseMessage):
if isinstance(message, SystemMessage):
return "system"
elif isinstance(message, AIMessage):
return "assistant"
else:
return "user"
request_json = {
"messages": [{"content": m.text, "role": decide_role(m)} for m in input_]
}
response = requests.post(self.endpoint_url, json=request_json).json()
content = ""
candidates = []
if response["choices"]:
candidates = [
each["message"]["content"]
for each in response["choices"]
if each["message"]["content"]
]
content = candidates[0]
return LLMInterface(
content=content,
candidates=candidates,
completion_tokens=response["usage"]["completion_tokens"],
total_tokens=response["usage"]["total_tokens"],
prompt_tokens=response["usage"]["prompt_tokens"],
)
def invoke(
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
) -> LLMInterface:
"""Same as run"""
return self.run(messages, **kwargs)
async def ainvoke(
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
) -> LLMInterface:
return self.invoke(messages, **kwargs)