[AUR-408] Export logs to Excel (#23)

This CL implements:

- The logic to export log to Excel.
- Route the export logic in the UI.
- Demonstrate this functionality in `./examples/promptui` project.
This commit is contained in:
Nguyen Trung Duc (john)
2023-09-25 17:20:03 +07:00
committed by GitHub
parent 08b6e5d3fb
commit 4f189dc931
5 changed files with 265 additions and 64 deletions

View File

@@ -1,66 +1,14 @@
import pytest
from kotaemon.contribs.promptui.config import export_pipeline_to_config
from kotaemon.contribs.promptui.export import export_from_dict
from kotaemon.contribs.promptui.ui import build_from_dict
@pytest.fixture()
def simple_pipeline_cls(tmp_path):
"""Create a pipeline class that can be used"""
from typing import List
from theflow import Node
from kotaemon.base import BaseComponent
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.llms.completions.openai import AzureOpenAI
from kotaemon.pipelines.retrieving import (
RetrieveDocumentFromVectorStorePipeline,
)
from kotaemon.vectorstores import ChromaVectorStore
class Pipeline(BaseComponent):
vectorstore_path: str = str(tmp_path)
llm: Node[AzureOpenAI] = Node(
default=AzureOpenAI,
default_kwargs={
"openai_api_base": "https://test.openai.azure.com/",
"openai_api_key": "some-key",
"openai_api_version": "2023-03-15-preview",
"deployment_name": "gpt35turbo",
"temperature": 0,
"request_timeout": 60,
},
)
@Node.decorate(depends_on=["vectorstore_path"])
def retrieving_pipeline(self):
vector_store = ChromaVectorStore(self.vectorstore_path)
embedding = AzureOpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="embedding-deployment",
openai_api_base="https://test.openai.azure.com/",
openai_api_key="some-key",
)
return RetrieveDocumentFromVectorStorePipeline(
vector_store=vector_store, embedding=embedding
)
def run_raw(self, text: str) -> str:
matched_texts: List[str] = self.retrieving_pipeline(text)
return self.llm("\n".join(matched_texts)).text[0]
return Pipeline
Pipeline = simple_pipeline_cls
from .simple_pipeline import Pipeline
class TestPromptConfig:
def test_export_prompt_config(self, simple_pipeline_cls):
def test_export_prompt_config(self):
"""Test if the prompt config is exported correctly"""
pipeline = simple_pipeline_cls()
pipeline = Pipeline()
config_dict = export_pipeline_to_config(pipeline)
config = list(config_dict.values())[0]
@@ -78,9 +26,42 @@ class TestPromptConfig:
class TestPromptUI:
def test_uigeneration(self, simple_pipeline_cls):
def test_uigeneration(self):
"""Test if the gradio UI is exposed without any problem"""
pipeline = simple_pipeline_cls()
pipeline = Pipeline()
config = export_pipeline_to_config(pipeline)
build_from_dict(config)
class TestExport:
def test_export(self, tmp_path):
"""Test if the export functionality works without error"""
from pathlib import Path
import yaml
from theflow.storage import storage
config_path = tmp_path / "config.yaml"
pipeline = Pipeline()
Path(storage.url(pipeline.config.store_result)).mkdir(
parents=True, exist_ok=True
)
config_dict = export_pipeline_to_config(pipeline)
pipeline_name = list(config_dict.keys())[0]
config_dict[pipeline_name]["logs"] = {
"sheet1": {
"inputs": [{"name": "text", "step": ".", "variable": "text"}],
"outputs": [{"name": "answer", "step": "."}],
},
}
with open(config_path, "w") as f:
yaml.safe_dump(config_dict, f)
export_from_dict(
config=str(config_path),
pipeline=pipeline_name,
output_path=str(tmp_path / "exported.xlsx"),
)