Feat/local endpoint llm (#148)

* serve local model in a different process from the app
---------

Co-authored-by: albert <albert@cinnamon.is>
Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
ian_Cin
2024-03-15 16:17:33 +07:00
committed by GitHub
parent 2950e6ed02
commit df12dec732
20 changed files with 675 additions and 79 deletions

View File

@@ -12,7 +12,7 @@ user_cache_dir.mkdir(parents=True, exist_ok=True)
COHERE_API_KEY = config("COHERE_API_KEY", default="")
KH_MODE = "dev"
KH_FEATURE_USER_MANAGEMENT = True
KH_FEATURE_USER_MANAGEMENT = False
KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
)
@@ -21,6 +21,8 @@ KH_FEATURE_USER_MANAGEMENT_PASSWORD = str(
)
KH_ENABLE_ALEMBIC = False
KH_DATABASE = f"sqlite:///{user_cache_dir / 'sql.db'}"
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
KH_DOCSTORE = {
"__type__": "kotaemon.storages.SimpleFileDocumentStore",
"path": str(user_cache_dir / "docstore"),
@@ -29,51 +31,68 @@ KH_VECTORSTORE = {
"__type__": "kotaemon.storages.ChromaVectorStore",
"path": str(user_cache_dir / "vectorstore"),
}
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
KH_LLMS = {
"gpt4": {
# example for using Azure OpenAI, the config variables can set as environment
# variables or in the .env file
# "gpt4": {
# "def": {
# "__type__": "kotaemon.llms.AzureChatOpenAI",
# "temperature": 0,
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
# "deployment_name": "<your deployment name>",
# "stream": True,
# },
# "accuracy": 10,
# "cost": 10,
# "default": False,
# },
# "gpt35": {
# "def": {
# "__type__": "kotaemon.llms.AzureChatOpenAI",
# "temperature": 0,
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
# "deployment_name": "<your deployment name>",
# "request_timeout": 10,
# "stream": False,
# },
# "accuracy": 5,
# "cost": 5,
# "default": False,
# },
"local": {
"def": {
"__type__": "kotaemon.llms.AzureChatOpenAI",
"temperature": 0,
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
"openai_api_version": config("OPENAI_API_VERSION", default=""),
"deployment_name": "dummy-q2",
"stream": True,
"__type__": "kotaemon.llms.EndpointChatLLM",
"endpoint_url": "http://localhost:31415/v1/chat/completions",
},
"accuracy": 10,
"cost": 10,
"default": False,
},
"gpt35": {
"def": {
"__type__": "kotaemon.llms.AzureChatOpenAI",
"temperature": 0,
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
"openai_api_version": config("OPENAI_API_VERSION", default=""),
"deployment_name": "dummy-q2",
"request_timeout": 10,
"stream": False,
},
"accuracy": 5,
"cost": 5,
"default": True,
},
}
KH_EMBEDDINGS = {
"ada": {
# example for using Azure OpenAI, the config variables can set as environment
# variables or in the .env file
# "ada": {
# "def": {
# "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
# "model": "text-embedding-ada-002",
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
# "deployment": "<your deployment name>",
# "chunk_size": 16,
# },
# "accuracy": 5,
# "cost": 5,
# "default": True,
# },
"local": {
"def": {
"__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
"model": "text-embedding-ada-002",
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
"deployment": "dummy-q2-text-embedding",
"chunk_size": 16,
"__type__": "kotaemon.embeddings.EndpointEmbeddings",
"endpoint_url": "http://localhost:31415/v1/embeddings",
},
"accuracy": 5,
"cost": 5,
"default": True,
"default": False,
},
}
KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"]

View File

@@ -118,7 +118,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
# rerank
docs = self.vector_retrieval(text=text, top_k=top_k, **kwargs)
if self.get_from_path("reranker"):
if docs and self.get_from_path("reranker"):
docs = self.reranker(docs, query=text)
if not self.get_extra_table:

View File

@@ -200,24 +200,37 @@ class AnswerWithContextPipeline(BaseComponent):
lang=self.lang,
)
citation_task = asyncio.create_task(
self.citation_pipeline.ainvoke(context=evidence, question=question)
)
print("Citation task created")
if evidence:
citation_task = asyncio.create_task(
self.citation_pipeline.ainvoke(context=evidence, question=question)
)
print("Citation task created")
messages = []
if self.system_prompt:
messages.append(SystemMessage(content=self.system_prompt))
messages.append(HumanMessage(content=prompt))
output = ""
for text in self.llm.stream(messages):
output += text.text
self.report_output({"output": text.text})
await asyncio.sleep(0)
try:
# try streaming first
print("Trying LLM streaming")
for text in self.llm.stream(messages):
output += text.text
self.report_output({"output": text.text})
await asyncio.sleep(0)
except NotImplementedError:
print("Streaming is not supported, falling back to normal processing")
output = self.llm(messages).text
self.report_output({"output": output})
# retrieve the citation
print("Waiting for citation task")
citation = await citation_task
if evidence:
citation = await citation_task
else:
citation = None
answer = Document(text=output, metadata={"citation": citation})
return answer

View File

@@ -2,4 +2,4 @@ from ktem.main import App
app = App()
demo = app.make()
demo.queue().launch(favicon_path=app._favicon)
demo.queue().launch(favicon_path=app._favicon, inbrowser=True)