feat: add support for reasoning llm with thought visualization (#652) bump:patch

* fix: lanceDB query with empty file_ids * feat: add thinking display * feat: add low request mode for local llm
2025-02-05 13:45:12 +07:00 · 2025-02-05 13:45:12 +07:00 · 0b090896fd
commit 0b090896fd
parent f5b2200ffa
5 changed files with 53 additions and 9 deletions
--- a/libs/kotaemon/kotaemon/indices/qa/utils.py
+++ b/libs/kotaemon/kotaemon/indices/qa/utils.py
@ -80,3 +80,18 @@ def find_start_end_phrase(
        final_match = None
    return final_match, matched_length
 def replace_think_tag_with_details(text):
    text = text.replace(
        "<think>",
        '<details><summary><span style="color:grey">Thought</span></summary><blockquote>',  # noqa
    )
    text = text.replace("</think>", "</blockquote></details>")
    return text
 def strip_think_tag(text):
    if "</think>" in text:
        text = text.split("</think>")[1]
    return text
--- a/libs/kotaemon/kotaemon/storages/docstores/lancedb.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/lancedb.py
@ -98,6 +98,9 @@ class LanceDBDocumentStore(BaseDocumentStore):
        if not isinstance(ids, list):
            ids = [ids]
        if len(ids) == 0:
            return []
        id_filter = ", ".join([f"'{_id}'" for _id in ids])
        try:
            document_collection = self.db_connection.open_table(self.collection_name)
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@ -14,6 +14,7 @@ from pathlib import Path
 from typing import Generator, Optional, Sequence
 import tiktoken
 from decouple import config
 from ktem.db.models import engine
 from ktem.embeddings.manager import embedding_models_manager
 from ktem.llms.manager import llms
@ -270,7 +271,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
            },
            "use_llm_reranking": {
                "name": "Use LLM relevant scoring",
-                "value": True,
+                "value": not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool),
                "choices": [True, False],
                "component": "checkbox",
            },
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@ -5,6 +5,7 @@ from copy import deepcopy
 from typing import Optional
 import gradio as gr
 from decouple import config
 from ktem.app import BasePage
 from ktem.components import reasonings
 from ktem.db.models import Conversation, engine
@ -23,6 +24,7 @@ from theflow.utils.modules import import_dotted_string
 from kotaemon.base import Document
 from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS
 from kotaemon.indices.qa.utils import strip_think_tag
 from ...utils import SUPPORTED_LANGUAGE_MAP, get_file_names_regex, get_urls
 from ...utils.commands import WEB_SEARCH_COMMAND
@ -367,6 +369,7 @@ class ChatPage(BasePage):
                            elem_id="citation-dropdown",
                        )
                        if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
                            self.use_mindmap = gr.State(value=True)
                            self.use_mindmap_check = gr.Checkbox(
                                label="Mindmap (on)",
@ -374,6 +377,14 @@ class ChatPage(BasePage):
                                elem_id="use-mindmap-checkbox",
                                value=True,
                            )
                        else:
                            self.use_mindmap = gr.State(value=False)
                            self.use_mindmap_check = gr.Checkbox(
                                label="Mindmap (off)",
                                container=False,
                                elem_id="use-mindmap-checkbox",
                                value=False,
                            )
            with gr.Column(
                scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
@ -1361,6 +1372,7 @@ class ChatPage(BasePage):
        # check if this is a newly created conversation
        if len(chat_history) == 1:
            suggested_name = suggest_pipeline(chat_history).text
            suggested_name = strip_think_tag(suggested_name)
            suggested_name = suggested_name.replace('"', "").replace("'", "")[:40]
            new_name = gr.update(value=suggested_name)
            renamed = True
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@ -3,6 +3,7 @@ import threading
 from textwrap import dedent
 from typing import Generator
 from decouple import config
 from ktem.embeddings.manager import embedding_models_manager as embeddings
 from ktem.llms.manager import llms
 from ktem.reasoning.prompt_optimization import (
@ -29,6 +30,7 @@ from kotaemon.indices.qa.citation_qa import (
 )
 from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation
 from kotaemon.indices.qa.format_context import PrepareEvidencePipeline
 from kotaemon.indices.qa.utils import replace_think_tag_with_details
 from kotaemon.llms import ChatLLM
 from ..utils import SUPPORTED_LANGUAGE_MAP
@ -313,6 +315,13 @@ class FullQAPipeline(BaseReasoning):
            **kwargs,
        )
        # check <think> tag from reasoning models
        processed_answer = replace_think_tag_with_details(answer.text)
        if processed_answer != answer.text:
            # clear the chat message and render again
            yield Document(channel="chat", content=None)
            yield Document(channel="chat", content=processed_answer)
        # show the evidence
        if scoring_thread:
            scoring_thread.join()
@ -410,7 +419,11 @@ class FullQAPipeline(BaseReasoning):
            },
            "highlight_citation": {
                "name": "Citation style",
-                "value": "highlight",
+                "value": (
                    "highlight"
                    if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool)
                    else "off"
                ),
                "component": "radio",
                "choices": [
                    ("citation: highlight", "highlight"),