Files
2023-12-08 18:10:34 +07:00

105 lines
3.5 KiB
Python

import os
from typing import List
from kotaemon.base import BaseComponent, Document, LLMInterface, Node, Param, lazy
from kotaemon.contribs.promptui.logs import ResultLog
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.llms import AzureChatOpenAI
from kotaemon.storages import ChromaVectorStore, SimpleFileDocumentStore
class QAResultLog(ResultLog):
@staticmethod
def _get_prompt(obj):
return obj["prompt"]
class QuestionAnsweringPipeline(BaseComponent):
_promptui_resultlog = QAResultLog
_promptui_outputs: list = [
{
"step": ".prompt",
"getter": "_get_prompt",
"component": "text",
"params": {"label": "Constructed prompt to LLM"},
},
{
"step": ".",
"getter": "_get_output",
"component": "text",
"params": {"label": "Answer"},
},
]
retrieval_top_k: int = 1
llm: AzureChatOpenAI = AzureChatOpenAI.withx(
azure_endpoint="https://bleh-dummy-2.openai.azure.com/",
openai_api_key=os.environ.get("OPENAI_API_KEY", "default-key"),
openai_api_version="2023-03-15-preview",
deployment_name="dummy-q2-gpt35",
temperature=0,
request_timeout=60,
)
retrieving_pipeline: VectorRetrieval = Node(
VectorRetrieval.withx(
vector_store=lazy(ChromaVectorStore).withx(path="./tmp"),
doc_store=lazy(SimpleFileDocumentStore).withx(path="docstore.json"),
embedding=AzureOpenAIEmbeddings.withx(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
azure_endpoint="https://bleh-dummy-2.openai.azure.com/",
openai_api_key=os.environ.get("OPENAI_API_KEY", "default-key"),
),
),
ignore_ui=True,
)
def run(self, text: str) -> LLMInterface:
# retrieve relevant documents as context
matched_texts: List[str] = [
_.text
for _ in self.retrieving_pipeline(text, top_k=int(self.retrieval_top_k))
]
context = "\n".join(matched_texts)
# generate the answer
prompt = f'Answer the following question: "{text}". The context is: \n{context}'
self.log_progress(".prompt", prompt=prompt)
return self.llm(prompt).text
class IndexingPipeline(VectorIndexing):
vector_store: ChromaVectorStore = Param(
lazy(ChromaVectorStore).withx(path="./tmp"),
ignore_ui=True,
)
doc_store: SimpleFileDocumentStore = Param(
lazy(SimpleFileDocumentStore).withx(path="docstore.json"),
ignore_ui=True,
)
embedding: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings.withx(
model="text-embedding-ada-002",
deployment="dummy-q2-text-embedding",
azure_endpoint="https://bleh-dummy-2.openai.azure.com/",
openai_api_key=os.environ.get("OPENAI_API_KEY", "default-key"),
)
def run(self, text: str) -> Document:
"""Normally, this indexing pipeline returns nothing. For demonstration,
we want it to return something, so let's return the number of documents
in the vector store
"""
super().run(text)
if self.doc_store is not None:
# persist to local anytime an indexing is created
# this can be bypassed when we have a FileDocumentStore
self.doc_store.save("docstore.json")
return Document(self.vector_store._collection.count())