Allow listing indices (#22)

This commit is contained in:
Duc Nguyen (john) 2024-04-11 16:28:04 +07:00 committed by GitHub
parent 3ed50b0f10
commit 5ce6bac03d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 215 additions and 28 deletions

View File

@ -16,7 +16,8 @@ from .base import BaseFileIndexIndexing, BaseFileIndexRetriever
class FileIndex(BaseIndex):
"""Index for the uploaded files
"""
File index to store and allow retrieval of files
The file index stores files in a local folder and index them for retrieval.
This file index provides the following infrastructure to support the indexing:
@ -303,30 +304,28 @@ class FileIndex(BaseIndex):
"value": embedding_default,
"component": "dropdown",
"choices": embedding_choices,
"info": "The name of embedding model to use.",
},
"supported_file_types": {
"name": "Supported file types",
"value": (
"image, .pdf, .txt, .csv, .xlsx, .doc, .docx, .pptx, .html, .zip"
),
"value": ".pdf, .txt",
"component": "text",
"info": "The file types that can be indexed, separated by comma.",
},
"max_file_size": {
"name": "Max file size (MB) - set 0 to disable",
"name": "Max file size (MB)",
"value": 1000,
"component": "number",
"info": "The maximum size of file. Set 0 to disable.",
},
"max_number_of_files": {
"name": "Max number of files that can be indexed - set 0 to disable",
"name": "Max number of files that can be indexed",
"value": 0,
"component": "number",
},
"max_number_of_text_length": {
"name": (
"Max amount of characters that can be indexed - set 0 to disable"
"info": (
"The total number of files that can be indexed on the system. "
"Set 0 to disable."
),
"value": 0,
"component": "number",
},
}

View File

@ -53,13 +53,13 @@ class IndexManager:
index = index_cls(app=self._app, id=id, name=name, config=config)
index.on_create()
with Session(engine) as session:
with Session(engine) as sess:
index_entry = Index(
id=index.id, name=index.name, config=index.config, index_type=index_type
)
session.add(index_entry)
session.commit()
session.refresh(index_entry)
sess.add(index_entry)
sess.commit()
sess.refresh(index_entry)
index.id = index_entry.id
@ -91,15 +91,13 @@ class IndexManager:
bool: True if the index exists, False otherwise
"""
if id:
with Session(engine) as session:
index = session.get(Index, id)
with Session(engine) as sess:
index = sess.get(Index, id)
return index is not None
if name:
with Session(engine) as session:
index = session.exec(
select(Index).where(Index.name == name)
).one_or_none()
with Session(engine) as sess:
index = sess.exec(select(Index).where(Index.name == name)).one_or_none()
return index is not None
return False
@ -117,11 +115,14 @@ class IndexManager:
if not self.exists(index["id"]):
self.build_index(**index)
with Session(engine) as session:
index_defs = session.exec(select(Index))
with Session(engine) as sess:
index_defs = sess.exec(select(Index))
for index_def in index_defs:
self.start_index(**index_def.dict())
self.start_index(**index_def.model_dump())
@property
def indices(self):
return self._indices
def info(self):
return {index.id: index for index in self._indices}

184
libs/ktem/ktem/index/ui.py Normal file
View File

@ -0,0 +1,184 @@
import gradio as gr
import pandas as pd
import yaml
from ktem.app import BasePage
def format_description(cls):
user_settings = cls.get_admin_settings()
params_lines = ["| Name | Default | Description |", "| --- | --- | --- |"]
for key, value in user_settings.items():
params_lines.append(
f"| {key} | {value.get('value', '')} | {value.get('info', '')} |"
)
return f"{cls.__doc__}\n\n" + "\n".join(params_lines)
class IndexManagement(BasePage):
def __init__(self, app):
self._app = app
self.manager = app.index_manager
self.spec_desc_default = (
"# Spec description\n\nSelect an index to view the spec description."
)
self.on_building_ui()
def on_building_ui(self):
with gr.Tab(label="View"):
self.index_list = gr.DataFrame(
headers=["ID", "Name", "Index Type"],
interactive=False,
)
with gr.Column(visible=False) as self._selected_panel:
self.selected_index_id = gr.Number(value=-1, visible=False)
with gr.Row():
with gr.Column():
self.edit_name = gr.Textbox(
label="Index name",
)
self.edit_spec = gr.Textbox(
label="Specification",
info="Specification of the Index in YAML format",
lines=10,
)
gr.Markdown(
"IMPORTANT: Changing or deleting the name or "
"specification of the index will require restarting "
"the system. Some settings will require rebuilding "
"the index."
)
with gr.Row():
self.btn_edit_save = gr.Button(
"Save", min_width=10, variant="primary"
)
self.btn_delete = gr.Button(
"Delete", min_width=10, variant="stop"
)
with gr.Row(visible=False) as self._delete_confirm:
self.btn_delete_yes = gr.Button(
"Confirm Delete",
variant="stop",
min_width=10,
)
self.btn_delete_no = gr.Button("Cancel", min_width=10)
self.btn_close = gr.Button("Close", min_width=10)
with gr.Column():
self.edit_spec_desc = gr.Markdown("# Spec description")
def _on_app_created(self):
"""Called when the app is created"""
self._app.app.load(
self.list_indices,
inputs=None,
outputs=[self.index_list],
)
def on_register_events(self):
self.index_list.select(
self.select_index,
inputs=self.index_list,
outputs=[self.selected_index_id],
show_progress="hidden",
)
self.selected_index_id.change(
self.on_change_selected_index,
inputs=[self.selected_index_id],
outputs=[
self._selected_panel,
# edit section
self.edit_spec,
self.edit_spec_desc,
self.edit_name,
],
show_progress="hidden",
)
self.btn_delete.click(
lambda: (
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
),
inputs=None,
outputs=[
self.btn_edit_save,
self.btn_delete,
self.btn_close,
self._delete_confirm,
],
show_progress="hidden",
)
self.btn_delete_no.click(
lambda: (
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
),
inputs=None,
outputs=[
self.btn_edit_save,
self.btn_delete,
self.btn_close,
self._delete_confirm,
],
show_progress="hidden",
)
self.btn_close.click(
lambda: -1,
outputs=[self.selected_index_id],
)
def list_indices(self):
"""List the indices constructed by the user"""
items = []
for item in self.manager.indices:
record = {}
record["ID"] = item.id
record["Name"] = item.name
record["Index Type"] = item.__class__.__name__
items.append(record)
if items:
indices_list = pd.DataFrame.from_records(items)
else:
indices_list = pd.DataFrame.from_records(
[{"ID": "-", "Name": "-", "Index Type": "-"}]
)
return indices_list
def select_index(self, index_list, ev: gr.SelectData) -> int:
"""Return the index id"""
if ev.value == "-" and ev.index[0] == 0:
gr.Info("No index is constructed. Please create one first!")
return -1
if not ev.selected:
return -1
return int(index_list["ID"][ev.index[0]])
def on_change_selected_index(self, selected_index_id: int):
if selected_index_id == -1:
_selected_panel = gr.update(visible=False)
edit_spec = gr.update(value="")
edit_spec_desc = gr.update(value="")
edit_name = gr.update(value="")
else:
_selected_panel = gr.update(visible=True)
index = self.manager.info()[selected_index_id]
edit_spec = yaml.dump(index.config)
edit_spec_desc = format_description(index.__class__)
edit_name = index.name
return (
_selected_panel,
edit_spec,
edit_spec_desc,
edit_name,
)

View File

@ -2,6 +2,7 @@ import gradio as gr
from ktem.app import BasePage
from ktem.db.models import User, engine
from ktem.embeddings.ui import EmbeddingManagement
from ktem.index.ui import IndexManagement
from ktem.llms.ui import LLMManagement
from sqlmodel import Session, select
@ -21,9 +22,12 @@ class ResourcesTab(BasePage):
with gr.Tab("LLMs") as self.llm_management_tab:
self.llm_management = LLMManagement(self._app)
with gr.Tab("Embedding Models") as self.llm_management_tab:
with gr.Tab("Embedding Models") as self.emb_management_tab:
self.emb_management = EmbeddingManagement(self._app)
with gr.Tab("Index Management") as self.index_management_tab:
self.index_management = IndexManagement(self._app)
def on_subscribe_public_events(self):
if self._app.f_user_management:
self._app.subscribe_event(

View File

@ -17,11 +17,10 @@ dependencies = [
"platformdirs",
"pluggy",
"python-decouple",
"python-pptx",
"sqlalchemy",
"sqlmodel",
"tiktoken",
"gradio>=4.0.0,<=4.22.0",
"gradio>=4.26.0",
]
readme = "README.md"
license = { text = "MIT License" }