Allow listing indices (#22)

This commit is contained in:
Duc Nguyen (john) 2024-04-11 16:28:04 +07:00 committed by GitHub
parent 3ed50b0f10
commit 5ce6bac03d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 215 additions and 28 deletions

View File

@ -16,7 +16,8 @@ from .base import BaseFileIndexIndexing, BaseFileIndexRetriever
class FileIndex(BaseIndex): class FileIndex(BaseIndex):
"""Index for the uploaded files """
File index to store and allow retrieval of files
The file index stores files in a local folder and index them for retrieval. The file index stores files in a local folder and index them for retrieval.
This file index provides the following infrastructure to support the indexing: This file index provides the following infrastructure to support the indexing:
@ -303,30 +304,28 @@ class FileIndex(BaseIndex):
"value": embedding_default, "value": embedding_default,
"component": "dropdown", "component": "dropdown",
"choices": embedding_choices, "choices": embedding_choices,
"info": "The name of embedding model to use.",
}, },
"supported_file_types": { "supported_file_types": {
"name": "Supported file types", "name": "Supported file types",
"value": ( "value": ".pdf, .txt",
"image, .pdf, .txt, .csv, .xlsx, .doc, .docx, .pptx, .html, .zip"
),
"component": "text", "component": "text",
"info": "The file types that can be indexed, separated by comma.",
}, },
"max_file_size": { "max_file_size": {
"name": "Max file size (MB) - set 0 to disable", "name": "Max file size (MB)",
"value": 1000, "value": 1000,
"component": "number", "component": "number",
"info": "The maximum size of file. Set 0 to disable.",
}, },
"max_number_of_files": { "max_number_of_files": {
"name": "Max number of files that can be indexed - set 0 to disable", "name": "Max number of files that can be indexed",
"value": 0, "value": 0,
"component": "number", "component": "number",
}, "info": (
"max_number_of_text_length": { "The total number of files that can be indexed on the system. "
"name": ( "Set 0 to disable."
"Max amount of characters that can be indexed - set 0 to disable"
), ),
"value": 0,
"component": "number",
}, },
} }

View File

@ -53,13 +53,13 @@ class IndexManager:
index = index_cls(app=self._app, id=id, name=name, config=config) index = index_cls(app=self._app, id=id, name=name, config=config)
index.on_create() index.on_create()
with Session(engine) as session: with Session(engine) as sess:
index_entry = Index( index_entry = Index(
id=index.id, name=index.name, config=index.config, index_type=index_type id=index.id, name=index.name, config=index.config, index_type=index_type
) )
session.add(index_entry) sess.add(index_entry)
session.commit() sess.commit()
session.refresh(index_entry) sess.refresh(index_entry)
index.id = index_entry.id index.id = index_entry.id
@ -91,15 +91,13 @@ class IndexManager:
bool: True if the index exists, False otherwise bool: True if the index exists, False otherwise
""" """
if id: if id:
with Session(engine) as session: with Session(engine) as sess:
index = session.get(Index, id) index = sess.get(Index, id)
return index is not None return index is not None
if name: if name:
with Session(engine) as session: with Session(engine) as sess:
index = session.exec( index = sess.exec(select(Index).where(Index.name == name)).one_or_none()
select(Index).where(Index.name == name)
).one_or_none()
return index is not None return index is not None
return False return False
@ -117,11 +115,14 @@ class IndexManager:
if not self.exists(index["id"]): if not self.exists(index["id"]):
self.build_index(**index) self.build_index(**index)
with Session(engine) as session: with Session(engine) as sess:
index_defs = session.exec(select(Index)) index_defs = sess.exec(select(Index))
for index_def in index_defs: for index_def in index_defs:
self.start_index(**index_def.dict()) self.start_index(**index_def.model_dump())
@property @property
def indices(self): def indices(self):
return self._indices return self._indices
def info(self):
return {index.id: index for index in self._indices}

184
libs/ktem/ktem/index/ui.py Normal file
View File

@ -0,0 +1,184 @@
import gradio as gr
import pandas as pd
import yaml
from ktem.app import BasePage
def format_description(cls):
user_settings = cls.get_admin_settings()
params_lines = ["| Name | Default | Description |", "| --- | --- | --- |"]
for key, value in user_settings.items():
params_lines.append(
f"| {key} | {value.get('value', '')} | {value.get('info', '')} |"
)
return f"{cls.__doc__}\n\n" + "\n".join(params_lines)
class IndexManagement(BasePage):
def __init__(self, app):
self._app = app
self.manager = app.index_manager
self.spec_desc_default = (
"# Spec description\n\nSelect an index to view the spec description."
)
self.on_building_ui()
def on_building_ui(self):
with gr.Tab(label="View"):
self.index_list = gr.DataFrame(
headers=["ID", "Name", "Index Type"],
interactive=False,
)
with gr.Column(visible=False) as self._selected_panel:
self.selected_index_id = gr.Number(value=-1, visible=False)
with gr.Row():
with gr.Column():
self.edit_name = gr.Textbox(
label="Index name",
)
self.edit_spec = gr.Textbox(
label="Specification",
info="Specification of the Index in YAML format",
lines=10,
)
gr.Markdown(
"IMPORTANT: Changing or deleting the name or "
"specification of the index will require restarting "
"the system. Some settings will require rebuilding "
"the index."
)
with gr.Row():
self.btn_edit_save = gr.Button(
"Save", min_width=10, variant="primary"
)
self.btn_delete = gr.Button(
"Delete", min_width=10, variant="stop"
)
with gr.Row(visible=False) as self._delete_confirm:
self.btn_delete_yes = gr.Button(
"Confirm Delete",
variant="stop",
min_width=10,
)
self.btn_delete_no = gr.Button("Cancel", min_width=10)
self.btn_close = gr.Button("Close", min_width=10)
with gr.Column():
self.edit_spec_desc = gr.Markdown("# Spec description")
def _on_app_created(self):
"""Called when the app is created"""
self._app.app.load(
self.list_indices,
inputs=None,
outputs=[self.index_list],
)
def on_register_events(self):
self.index_list.select(
self.select_index,
inputs=self.index_list,
outputs=[self.selected_index_id],
show_progress="hidden",
)
self.selected_index_id.change(
self.on_change_selected_index,
inputs=[self.selected_index_id],
outputs=[
self._selected_panel,
# edit section
self.edit_spec,
self.edit_spec_desc,
self.edit_name,
],
show_progress="hidden",
)
self.btn_delete.click(
lambda: (
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=True),
),
inputs=None,
outputs=[
self.btn_edit_save,
self.btn_delete,
self.btn_close,
self._delete_confirm,
],
show_progress="hidden",
)
self.btn_delete_no.click(
lambda: (
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=True),
gr.update(visible=False),
),
inputs=None,
outputs=[
self.btn_edit_save,
self.btn_delete,
self.btn_close,
self._delete_confirm,
],
show_progress="hidden",
)
self.btn_close.click(
lambda: -1,
outputs=[self.selected_index_id],
)
def list_indices(self):
"""List the indices constructed by the user"""
items = []
for item in self.manager.indices:
record = {}
record["ID"] = item.id
record["Name"] = item.name
record["Index Type"] = item.__class__.__name__
items.append(record)
if items:
indices_list = pd.DataFrame.from_records(items)
else:
indices_list = pd.DataFrame.from_records(
[{"ID": "-", "Name": "-", "Index Type": "-"}]
)
return indices_list
def select_index(self, index_list, ev: gr.SelectData) -> int:
"""Return the index id"""
if ev.value == "-" and ev.index[0] == 0:
gr.Info("No index is constructed. Please create one first!")
return -1
if not ev.selected:
return -1
return int(index_list["ID"][ev.index[0]])
def on_change_selected_index(self, selected_index_id: int):
if selected_index_id == -1:
_selected_panel = gr.update(visible=False)
edit_spec = gr.update(value="")
edit_spec_desc = gr.update(value="")
edit_name = gr.update(value="")
else:
_selected_panel = gr.update(visible=True)
index = self.manager.info()[selected_index_id]
edit_spec = yaml.dump(index.config)
edit_spec_desc = format_description(index.__class__)
edit_name = index.name
return (
_selected_panel,
edit_spec,
edit_spec_desc,
edit_name,
)

View File

@ -2,6 +2,7 @@ import gradio as gr
from ktem.app import BasePage from ktem.app import BasePage
from ktem.db.models import User, engine from ktem.db.models import User, engine
from ktem.embeddings.ui import EmbeddingManagement from ktem.embeddings.ui import EmbeddingManagement
from ktem.index.ui import IndexManagement
from ktem.llms.ui import LLMManagement from ktem.llms.ui import LLMManagement
from sqlmodel import Session, select from sqlmodel import Session, select
@ -21,9 +22,12 @@ class ResourcesTab(BasePage):
with gr.Tab("LLMs") as self.llm_management_tab: with gr.Tab("LLMs") as self.llm_management_tab:
self.llm_management = LLMManagement(self._app) self.llm_management = LLMManagement(self._app)
with gr.Tab("Embedding Models") as self.llm_management_tab: with gr.Tab("Embedding Models") as self.emb_management_tab:
self.emb_management = EmbeddingManagement(self._app) self.emb_management = EmbeddingManagement(self._app)
with gr.Tab("Index Management") as self.index_management_tab:
self.index_management = IndexManagement(self._app)
def on_subscribe_public_events(self): def on_subscribe_public_events(self):
if self._app.f_user_management: if self._app.f_user_management:
self._app.subscribe_event( self._app.subscribe_event(

View File

@ -17,11 +17,10 @@ dependencies = [
"platformdirs", "platformdirs",
"pluggy", "pluggy",
"python-decouple", "python-decouple",
"python-pptx",
"sqlalchemy", "sqlalchemy",
"sqlmodel", "sqlmodel",
"tiktoken", "tiktoken",
"gradio>=4.0.0,<=4.22.0", "gradio>=4.26.0",
] ]
readme = "README.md" readme = "README.md"
license = { text = "MIT License" } license = { text = "MIT License" }