feat: add support for reasoning llm with thought visualization (#652) bump:patch

* fix: lanceDB query with empty file_ids

* feat: add thinking display

* feat: add low request mode for local llm
This commit is contained in:
Tuan Anh Nguyen Dang (Tadashi_Cin) 2025-02-05 13:45:12 +07:00 committed by GitHub
parent f5b2200ffa
commit 0b090896fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 53 additions and 9 deletions

View File

@ -80,3 +80,18 @@ def find_start_end_phrase(
final_match = None final_match = None
return final_match, matched_length return final_match, matched_length
def replace_think_tag_with_details(text):
text = text.replace(
"<think>",
'<details><summary><span style="color:grey">Thought</span></summary><blockquote>', # noqa
)
text = text.replace("</think>", "</blockquote></details>")
return text
def strip_think_tag(text):
if "</think>" in text:
text = text.split("</think>")[1]
return text

View File

@ -98,6 +98,9 @@ class LanceDBDocumentStore(BaseDocumentStore):
if not isinstance(ids, list): if not isinstance(ids, list):
ids = [ids] ids = [ids]
if len(ids) == 0:
return []
id_filter = ", ".join([f"'{_id}'" for _id in ids]) id_filter = ", ".join([f"'{_id}'" for _id in ids])
try: try:
document_collection = self.db_connection.open_table(self.collection_name) document_collection = self.db_connection.open_table(self.collection_name)

View File

@ -14,6 +14,7 @@ from pathlib import Path
from typing import Generator, Optional, Sequence from typing import Generator, Optional, Sequence
import tiktoken import tiktoken
from decouple import config
from ktem.db.models import engine from ktem.db.models import engine
from ktem.embeddings.manager import embedding_models_manager from ktem.embeddings.manager import embedding_models_manager
from ktem.llms.manager import llms from ktem.llms.manager import llms
@ -270,7 +271,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
}, },
"use_llm_reranking": { "use_llm_reranking": {
"name": "Use LLM relevant scoring", "name": "Use LLM relevant scoring",
"value": True, "value": not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool),
"choices": [True, False], "choices": [True, False],
"component": "checkbox", "component": "checkbox",
}, },

View File

@ -5,6 +5,7 @@ from copy import deepcopy
from typing import Optional from typing import Optional
import gradio as gr import gradio as gr
from decouple import config
from ktem.app import BasePage from ktem.app import BasePage
from ktem.components import reasonings from ktem.components import reasonings
from ktem.db.models import Conversation, engine from ktem.db.models import Conversation, engine
@ -23,6 +24,7 @@ from theflow.utils.modules import import_dotted_string
from kotaemon.base import Document from kotaemon.base import Document
from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS
from kotaemon.indices.qa.utils import strip_think_tag
from ...utils import SUPPORTED_LANGUAGE_MAP, get_file_names_regex, get_urls from ...utils import SUPPORTED_LANGUAGE_MAP, get_file_names_regex, get_urls
from ...utils.commands import WEB_SEARCH_COMMAND from ...utils.commands import WEB_SEARCH_COMMAND
@ -367,6 +369,7 @@ class ChatPage(BasePage):
elem_id="citation-dropdown", elem_id="citation-dropdown",
) )
if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
self.use_mindmap = gr.State(value=True) self.use_mindmap = gr.State(value=True)
self.use_mindmap_check = gr.Checkbox( self.use_mindmap_check = gr.Checkbox(
label="Mindmap (on)", label="Mindmap (on)",
@ -374,6 +377,14 @@ class ChatPage(BasePage):
elem_id="use-mindmap-checkbox", elem_id="use-mindmap-checkbox",
value=True, value=True,
) )
else:
self.use_mindmap = gr.State(value=False)
self.use_mindmap_check = gr.Checkbox(
label="Mindmap (off)",
container=False,
elem_id="use-mindmap-checkbox",
value=False,
)
with gr.Column( with gr.Column(
scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel" scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
@ -1361,6 +1372,7 @@ class ChatPage(BasePage):
# check if this is a newly created conversation # check if this is a newly created conversation
if len(chat_history) == 1: if len(chat_history) == 1:
suggested_name = suggest_pipeline(chat_history).text suggested_name = suggest_pipeline(chat_history).text
suggested_name = strip_think_tag(suggested_name)
suggested_name = suggested_name.replace('"', "").replace("'", "")[:40] suggested_name = suggested_name.replace('"', "").replace("'", "")[:40]
new_name = gr.update(value=suggested_name) new_name = gr.update(value=suggested_name)
renamed = True renamed = True

View File

@ -3,6 +3,7 @@ import threading
from textwrap import dedent from textwrap import dedent
from typing import Generator from typing import Generator
from decouple import config
from ktem.embeddings.manager import embedding_models_manager as embeddings from ktem.embeddings.manager import embedding_models_manager as embeddings
from ktem.llms.manager import llms from ktem.llms.manager import llms
from ktem.reasoning.prompt_optimization import ( from ktem.reasoning.prompt_optimization import (
@ -29,6 +30,7 @@ from kotaemon.indices.qa.citation_qa import (
) )
from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation
from kotaemon.indices.qa.format_context import PrepareEvidencePipeline from kotaemon.indices.qa.format_context import PrepareEvidencePipeline
from kotaemon.indices.qa.utils import replace_think_tag_with_details
from kotaemon.llms import ChatLLM from kotaemon.llms import ChatLLM
from ..utils import SUPPORTED_LANGUAGE_MAP from ..utils import SUPPORTED_LANGUAGE_MAP
@ -313,6 +315,13 @@ class FullQAPipeline(BaseReasoning):
**kwargs, **kwargs,
) )
# check <think> tag from reasoning models
processed_answer = replace_think_tag_with_details(answer.text)
if processed_answer != answer.text:
# clear the chat message and render again
yield Document(channel="chat", content=None)
yield Document(channel="chat", content=processed_answer)
# show the evidence # show the evidence
if scoring_thread: if scoring_thread:
scoring_thread.join() scoring_thread.join()
@ -410,7 +419,11 @@ class FullQAPipeline(BaseReasoning):
}, },
"highlight_citation": { "highlight_citation": {
"name": "Citation style", "name": "Citation style",
"value": "highlight", "value": (
"highlight"
if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool)
else "off"
),
"component": "radio", "component": "radio",
"choices": [ "choices": [
("citation: highlight", "highlight"), ("citation: highlight", "highlight"),