feat: add inline citation style (#523) bump:minor

* feat: add URL quick index, export mindmap, refine UI & animation * fix: inject multimodal mode from env var * fix: minor update css * feat: add citation inline mode * fix: minor update citation inline pipeline * feat: add citation quick setting * fix: minor update * fix: minor update
2024-11-25 12:07:02 +07:00
parent 013f6f4103
commit 7e34e4343b
18 changed files with 1173 additions and 651 deletions
--- a/libs/ktem/ktem/assets/css/main.css
+++ b/libs/ktem/ktem/assets/css/main.css
@@ -97,6 +97,11 @@ button.selected {
 #chat-info-panel {
  max-height: var(--main-area-height) !important;
  overflow: auto !important;
+  transition: all 0.5s;
+}
+
+body.dark #chat-info-panel figure>img{
+  filter: invert(100%);
 }

 #conv-settings-panel {
@@ -199,11 +204,26 @@ mark {
  right: 15px;
 }

-/* #new-conv-button > img {
-  position: relative;
-  top: 0px;
-  right: -50%;
-} */
+#use-mindmap-checkbox {
+  position: absolute;
+  width: 110px;
+  top: 10px;
+  right: 25px;
+}
+
+#quick-url textarea {
+  resize: none;
+  background: transparent;
+  margin-top: 0px;
+}
+
+#quick-url textarea::placeholder {
+  text-align: center;
+}
+
+#quick-file {
+  height: 110px;
+}

 span.icon {
  color: #cecece;
@@ -225,11 +245,6 @@ span.icon {
  overflow: unset !important;
 }

-/*body {*/
-/*  margin: 0;*/
-/*  font-family: Arial, sans-serif;*/
-/*}*/
-
 pdfjs-viewer-element {
  height: 100vh;
  height: 100dvh;
@@ -280,8 +295,7 @@ pdfjs-viewer-element {
  overflow: auto;
 }

-/** Switch
- -------------------------------------*/
+/* Switch checkbox styles */

 #is-public-checkbox {
  position: relative;
@@ -293,10 +307,6 @@ pdfjs-viewer-element {
  opacity: 0;
 }

-/**
- * 1. Adjust this to size
- */
-
 .switch {
  display: inline-block;
  /* 1 */
@@ -330,3 +340,28 @@ pdfjs-viewer-element {
 .switch:has(> input:checked) {
  background: #0c895f;
 }
+
+/* Bot animation */
+
+.message.bot {
+  animation: fadein 1.5s ease-in-out forwards;
+}
+
+details.evidence {
+  animation: fadein 0.5s ease-in-out forwards;
+}
+
+@keyframes fadein {
+  0% {
+    opacity: 0;
+  }
+
+  100% {
+    opacity: 100%;
+  }
+}
+
+.message a.citation {
+  color: #10b981;
+  text-decoration: none;
+}
--- a/libs/ktem/ktem/assets/js/main.js
+++ b/libs/ktem/ktem/assets/js/main.js
@@ -16,6 +16,11 @@ function run() {
  let chat_info_panel = document.getElementById("info-expand");
  chat_info_panel.insertBefore(info_expand_button, chat_info_panel.childNodes[2]);

+  // move use mind-map checkbox
+  let mindmap_checkbox = document.getElementById("use-mindmap-checkbox");
+  let chat_setting_panel = document.getElementById("chat-settings-expand");
+  chat_setting_panel.insertBefore(mindmap_checkbox, chat_setting_panel.childNodes[2]);
+
  // create slider toggle
  const is_public_checkbox = document.getElementById("is-public-checkbox");
  const label_element = is_public_checkbox.getElementsByTagName("label")[0];
@@ -49,4 +54,21 @@ function run() {
  globalThis.removeFromStorage = (key) => {
      localStorage.removeItem(key)
  }
+
+  // Function to scroll to given citation with ID
+  // Sleep function using Promise and setTimeout
+  function sleep(ms) {
+      return new Promise(resolve => setTimeout(resolve, ms));
+  }
+
+  globalThis.scrollToCitation = async (event) => {
+      event.preventDefault(); // Prevent the default link behavior
+      var citationId = event.target.getAttribute('id');
+
+      await sleep(100); // Sleep for 500 milliseconds
+      var citation = document.querySelector('mark[id="' + citationId + '"]');
+      if (citation) {
+          citation.scrollIntoView({ behavior: 'smooth' });
+      }
+  }
 }
--- a/libs/ktem/ktem/db/base_models.py
+++ b/libs/ktem/ktem/db/base_models.py
@@ -25,8 +25,8 @@ class BaseConversation(SQLModel):
        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
    )
    name: str = Field(
-        default_factory=lambda: datetime.datetime.now(get_localzone()).strftime(
-            "%Y-%m-%d %H:%M:%S"
+        default_factory=lambda: "Untitled - {}".format(
+            datetime.datetime.now(get_localzone()).strftime("%Y-%m-%d %H:%M:%S")
        )
    )
    user: int = Field(default=0)  # For now we only have one user
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -126,6 +126,9 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
        if doc_ids:
            flatten_doc_ids = []
            for doc_id in doc_ids:
+                if doc_id is None:
+                    raise ValueError("No document is selected")
+
                if doc_id.startswith("["):
                    flatten_doc_ids.extend(json.loads(doc_id))
                else:
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -22,6 +22,13 @@ from theflow.settings import settings as flowsettings
 DOWNLOAD_MESSAGE = "Press again to download"
 MAX_FILENAME_LENGTH = 20

+chat_input_focus_js = """
+function() {
+    let chatInput = document.querySelector("#chat-input textarea");
+    chatInput.focus();
+}
+"""
+

 class File(gr.File):
    """Subclass from gr.File to maintain the original filename
@@ -666,7 +673,7 @@ class FileIndexPage(BasePage):
                        outputs=self._app.chat_page.quick_file_upload_status,
                    )
                    .then(
-                        fn=self.index_fn_with_default_loaders,
+                        fn=self.index_fn_file_with_default_loaders,
                        inputs=[
                            self._app.chat_page.quick_file_upload,
                            gr.State(value=False),
@@ -689,6 +696,38 @@ class FileIndexPage(BasePage):
                for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
                    quickUploadedEvent = quickUploadedEvent.then(**event)

+                quickURLUploadedEvent = (
+                    self._app.chat_page.quick_urls.submit(
+                        fn=lambda: gr.update(
+                            value="Please wait for the indexing process "
+                            "to complete before adding your question."
+                        ),
+                        outputs=self._app.chat_page.quick_file_upload_status,
+                    )
+                    .then(
+                        fn=self.index_fn_url_with_default_loaders,
+                        inputs=[
+                            self._app.chat_page.quick_urls,
+                            gr.State(value=True),
+                            self._app.settings_state,
+                            self._app.user_id,
+                        ],
+                        outputs=self.quick_upload_state,
+                    )
+                    .success(
+                        fn=lambda: [
+                            gr.update(value=None),
+                            gr.update(value="select"),
+                        ],
+                        outputs=[
+                            self._app.chat_page.quick_urls,
+                            self._app.chat_page._indices_input[0],
+                        ],
+                    )
+                )
+                for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
+                    quickURLUploadedEvent = quickURLUploadedEvent.then(**event)
+
                quickUploadedEvent.success(
                    fn=lambda x: x,
                    inputs=self.quick_upload_state,
@@ -701,6 +740,30 @@ class FileIndexPage(BasePage):
                    inputs=[self._app.user_id, self.filter],
                    outputs=[self.file_list_state, self.file_list],
                    concurrency_limit=20,
+                ).then(
+                    fn=lambda: True,
+                    inputs=None,
+                    outputs=None,
+                    js=chat_input_focus_js,
+                )
+
+                quickURLUploadedEvent.success(
+                    fn=lambda x: x,
+                    inputs=self.quick_upload_state,
+                    outputs=self._app.chat_page._indices_input[1],
+                ).then(
+                    fn=lambda: gr.update(value="Indexing completed."),
+                    outputs=self._app.chat_page.quick_file_upload_status,
+                ).then(
+                    fn=self.list_file,
+                    inputs=[self._app.user_id, self.filter],
+                    outputs=[self.file_list_state, self.file_list],
+                    concurrency_limit=20,
+                ).then(
+                    fn=lambda: True,
+                    inputs=None,
+                    outputs=None,
+                    js=chat_input_focus_js,
                )

        except Exception as e:
@@ -951,7 +1014,7 @@ class FileIndexPage(BasePage):

        return results

-    def index_fn_with_default_loaders(
+    def index_fn_file_with_default_loaders(
        self, files, reindex: bool, settings, user_id
    ) -> list["str"]:
        """Function for quick upload with default loaders
@@ -991,6 +1054,22 @@ class FileIndexPage(BasePage):

        return exist_ids + returned_ids

+    def index_fn_url_with_default_loaders(self, urls, reindex: bool, settings, user_id):
+        returned_ids = []
+        settings = deepcopy(settings)
+        settings[f"index.options.{self._index.id}.reader_mode"] = "default"
+        settings[f"index.options.{self._index.id}.quick_index_mode"] = True
+
+        if urls:
+            _iter = self.index_fn([], urls, reindex, settings, user_id)
+            try:
+                while next(_iter):
+                    pass
+            except StopIteration as e:
+                returned_ids = e.value
+
+        return returned_ids
+
    def index_files_from_dir(
        self, folder_path, reindex, settings, user_id
    ) -> Generator[tuple[str, str], None, None]:
--- a/libs/ktem/ktem/pages/chat/init.py
+++ b/libs/ktem/ktem/pages/chat/init.py
@@ -40,26 +40,52 @@ function() {
        links[i].onclick = openModal;
    }

-    var mindmap_el = document.getElementById('mindmap');
-    if (mindmap_el) {
-        var output = svgPanZoom(mindmap_el);
+    // Get all citation links and attach click event
+    var links = document.querySelectorAll("a.citation");
+    for (var i = 0; i < links.length; i++) {
+        links[i].onclick = scrollToCitation;
    }

-    var link = document.getElementById("mindmap-toggle");
-    if (link) {
-        link.onclick = function(event) {
+    var mindmap_el = document.getElementById('mindmap');
+
+    if (mindmap_el) {
+        var output = svgPanZoom(mindmap_el);
+        const svg = mindmap_el.cloneNode(true);
+
+        function on_svg_export(event) {
            event.preventDefault(); // Prevent the default link behavior
-            var div = document.getElementById("mindmap-wrapper");
-            if (div) {
-                var currentHeight = div.style.height;
-                if (currentHeight === '400px') {
-                    var contentHeight = div.scrollHeight;
-                    div.style.height = contentHeight + 'px';
-                } else {
-                    div.style.height = '400px'
+            // convert to a valid XML source
+            const as_text = new XMLSerializer().serializeToString(svg);
+            // store in a Blob
+            const blob = new Blob([as_text], { type: "image/svg+xml" });
+            // create an URI pointing to that blob
+            const url = URL.createObjectURL(blob);
+            const win = open(url);
+            // so the Garbage Collector can collect the blob
+            win.onload = (evt) => URL.revokeObjectURL(url);
+        }
+
+        var link = document.getElementById("mindmap-toggle");
+        if (link) {
+            link.onclick = function(event) {
+                event.preventDefault(); // Prevent the default link behavior
+                var div = document.getElementById("mindmap-wrapper");
+                if (div) {
+                    var currentHeight = div.style.height;
+                    if (currentHeight === '400px') {
+                        var contentHeight = div.scrollHeight;
+                        div.style.height = contentHeight + 'px';
+                    } else {
+                        div.style.height = '400px'
+                    }
                }
-            }
-        };
+            };
+        }
+
+        var link = document.getElementById("mindmap-export");
+        if (link) {
+            link.addEventListener('click', on_svg_export);
+        }
    }

    return [links.length]
@@ -127,6 +153,14 @@ class ChatPage(BasePage):
                            file_count="multiple",
                            container=True,
                            show_label=False,
+                            elem_id="quick-file",
+                        )
+                        self.quick_urls = gr.Textbox(
+                            placeholder="Or paste URLs here",
+                            lines=1,
+                            container=False,
+                            show_label=False,
+                            elem_id="quick-url",
                        )
                        self.quick_file_upload_status = gr.Markdown()

@@ -136,12 +170,17 @@ class ChatPage(BasePage):
                self.chat_panel = ChatPanel(self._app)

                with gr.Row():
-                    with gr.Accordion(label="Chat settings", open=False):
+                    with gr.Accordion(
+                        label="Chat settings",
+                        elem_id="chat-settings-expand",
+                        open=False,
+                    ):
                        # a quick switch for reasoning type option
                        with gr.Row():
                            gr.HTML("Reasoning method")
                            gr.HTML("Model")
-                            gr.HTML("Generate mindmap")
+                            gr.HTML("Language")
+                            gr.HTML("Citation")

                        with gr.Row():
                            reasoning_type_values = [
@@ -165,17 +204,36 @@ class ChatPage(BasePage):
                                container=False,
                                show_label=False,
                            )
-                            binary_default_choices = [
-                                (DEFAULT_SETTING, DEFAULT_SETTING),
-                                ("Enable", True),
-                                ("Disable", False),
-                            ]
-                            self.use_mindmap = gr.Dropdown(
+                            self.language = gr.Dropdown(
+                                choices=[
+                                    (DEFAULT_SETTING, DEFAULT_SETTING),
+                                ]
+                                + self._app.default_settings.reasoning.settings[
+                                    "lang"
+                                ].choices,
                                value=DEFAULT_SETTING,
-                                choices=binary_default_choices,
                                container=False,
                                show_label=False,
                            )
+                            self.citation = gr.Dropdown(
+                                choices=[
+                                    (DEFAULT_SETTING, DEFAULT_SETTING),
+                                ]
+                                + self._app.default_settings.reasoning.options["simple"]
+                                .settings["highlight_citation"]
+                                .choices,
+                                value=DEFAULT_SETTING,
+                                container=False,
+                                show_label=False,
+                                interactive=True,
+                            )
+
+                            self.use_mindmap = gr.State(value=DEFAULT_SETTING)
+                            self.use_mindmap_check = gr.Checkbox(
+                                label="Mindmap (default)",
+                                container=False,
+                                elem_id="use-mindmap-checkbox",
+                            )

            with gr.Column(
                scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
@@ -235,6 +293,8 @@ class ChatPage(BasePage):
                    self._reasoning_type,
                    self.model_type,
                    self.use_mindmap,
+                    self.citation,
+                    self.language,
                    self.state_chat,
                    self._app.user_id,
                ]
@@ -506,6 +566,12 @@ class ChatPage(BasePage):
            inputs=[self.reasoning_type],
            outputs=[self._reasoning_type],
        )
+        self.use_mindmap_check.change(
+            lambda x: (x, gr.update(label="Mindmap " + ("(on)" if x else "(off)"))),
+            inputs=[self.use_mindmap_check],
+            outputs=[self.use_mindmap, self.use_mindmap_check],
+            show_progress="hidden",
+        )
        self.chat_control.conversation_id.change(
            lambda: gr.update(visible=False),
            outputs=self.plot_panel,
@@ -722,6 +788,8 @@ class ChatPage(BasePage):
        session_reasoning_type: str,
        session_llm: str,
        session_use_mindmap: bool | str,
+        session_use_citation: str,
+        session_language: str,
        state: dict,
        user_id: int,
        *selecteds,
@@ -743,6 +811,10 @@ class ChatPage(BasePage):
            session_reasoning_type,
            "use mindmap",
            session_use_mindmap,
+            "use citation",
+            session_use_citation,
+            "language",
+            session_language,
        )
        print("Session LLM", session_llm)
        reasoning_mode = (
@@ -766,6 +838,14 @@ class ChatPage(BasePage):
        if session_use_mindmap not in (DEFAULT_SETTING, None):
            settings["reasoning.options.simple.create_mindmap"] = session_use_mindmap

+        if session_use_citation not in (DEFAULT_SETTING, None):
+            settings[
+                "reasoning.options.simple.highlight_citation"
+            ] = session_use_citation
+
+        if session_language not in (DEFAULT_SETTING, None):
+            settings["reasoning.lang"] = session_language
+
        # get retrievers
        retrievers = []
        for index in self._app.index_manager.indices:
@@ -798,6 +878,8 @@ class ChatPage(BasePage):
        reasoning_type,
        llm_type,
        use_mind_map,
+        use_citation,
+        language,
        state,
        user_id,
        *selecteds,
@@ -814,7 +896,15 @@ class ChatPage(BasePage):

        # construct the pipeline
        pipeline, reasoning_state = self.create_pipeline(
-            settings, reasoning_type, llm_type, use_mind_map, state, user_id, *selecteds
+            settings,
+            reasoning_type,
+            llm_type,
+            use_mind_map,
+            use_citation,
+            language,
+            state,
+            user_id,
+            *selecteds,
        )
        print("Reasoning state", reasoning_state)
        pipeline.set_output_queue(queue)
--- a/libs/ktem/ktem/pages/chat/chat_panel.py
+++ b/libs/ktem/ktem/pages/chat/chat_panel.py
@@ -28,6 +28,7 @@ class ChatPanel(BasePage):
                placeholder="Chat input",
                container=False,
                show_label=False,
+                elem_id="chat-input",
            )

    def submit_msg(self, chat_input, chat_history):
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -1,17 +1,10 @@
-import html
 import logging
 import threading
-from collections import defaultdict
-from difflib import SequenceMatcher
-from functools import partial
 from typing import Generator

-import numpy as np
-import tiktoken
 from ktem.embeddings.manager import embedding_models_manager as embeddings
 from ktem.llms.manager import llms
 from ktem.reasoning.prompt_optimization import (
-    CreateMindmapPipeline,
    DecomposeQuestionPipeline,
    RewriteQuestionPipeline,
 )
@@ -19,7 +12,6 @@ from ktem.utils.plantuml import PlantUML
 from ktem.utils.render import Render
 from ktem.utils.visualize_cited import CreateCitationVizPipeline
 from plotly.io import to_json
-from theflow.settings import settings as flowsettings

 from kotaemon.base import (
    AIMessage,
@@ -30,399 +22,20 @@ from kotaemon.base import (
    RetrievedDocument,
    SystemMessage,
 )
-from kotaemon.indices.qa.citation import CitationPipeline
-from kotaemon.indices.splitters import TokenSplitter
-from kotaemon.llms import ChatLLM, PromptTemplate
+from kotaemon.indices.qa.citation_qa import (
+    CONTEXT_RELEVANT_WARNING_SCORE,
+    DEFAULT_QA_TEXT_PROMPT,
+    AnswerWithContextPipeline,
+)
+from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation
+from kotaemon.indices.qa.format_context import PrepareEvidencePipeline
+from kotaemon.llms import ChatLLM

 from ..utils import SUPPORTED_LANGUAGE_MAP
 from .base import BaseReasoning

 logger = logging.getLogger(__name__)

-EVIDENCE_MODE_TEXT = 0
-EVIDENCE_MODE_TABLE = 1
-EVIDENCE_MODE_CHATBOT = 2
-EVIDENCE_MODE_FIGURE = 3
-MAX_IMAGES = 10
-CITATION_TIMEOUT = 5.0
-
-
-def find_text(search_span, context):
-    sentence_list = search_span.split("\n")
-    context = context.replace("\n", " ")
-
-    matches = []
-    # don't search for small text
-    if len(search_span) > 5:
-        for sentence in sentence_list:
-            match = SequenceMatcher(
-                None, sentence, context, autojunk=False
-            ).find_longest_match()
-            if match.size > max(len(sentence) * 0.35, 5):
-                matches.append((match.b, match.b + match.size))
-
-    return matches
-
-
-class PrepareEvidencePipeline(BaseComponent):
-    """Prepare the evidence text from the list of retrieved documents
-
-    This step usually happens after `DocumentRetrievalPipeline`.
-
-    Args:
-        trim_func: a callback function or a BaseComponent, that splits a large
-            chunk of text into smaller ones. The first one will be retained.
-    """
-
-    max_context_length: int = 32000
-    trim_func: TokenSplitter | None = None
-
-    def run(self, docs: list[RetrievedDocument]) -> Document:
-        evidence = ""
-        images = []
-        table_found = 0
-        evidence_modes = []
-
-        evidence_trim_func = (
-            self.trim_func
-            if self.trim_func
-            else TokenSplitter(
-                chunk_size=self.max_context_length,
-                chunk_overlap=0,
-                separator=" ",
-                tokenizer=partial(
-                    tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
-                    allowed_special=set(),
-                    disallowed_special="all",
-                ),
-            )
-        )
-
-        for _id, retrieved_item in enumerate(docs):
-            retrieved_content = ""
-            page = retrieved_item.metadata.get("page_label", None)
-            source = filename = retrieved_item.metadata.get("file_name", "-")
-            if page:
-                source += f" (Page {page})"
-            if retrieved_item.metadata.get("type", "") == "table":
-                evidence_modes.append(EVIDENCE_MODE_TABLE)
-                if table_found < 5:
-                    retrieved_content = retrieved_item.metadata.get(
-                        "table_origin", retrieved_item.text
-                    )
-                    if retrieved_content not in evidence:
-                        table_found += 1
-                        evidence += (
-                            f"<br><b>Table from {source}</b>\n"
-                            + retrieved_content
-                            + "\n<br>"
-                        )
-            elif retrieved_item.metadata.get("type", "") == "chatbot":
-                evidence_modes.append(EVIDENCE_MODE_CHATBOT)
-                retrieved_content = retrieved_item.metadata["window"]
-                evidence += (
-                    f"<br><b>Chatbot scenario from {filename} (Row {page})</b>\n"
-                    + retrieved_content
-                    + "\n<br>"
-                )
-            elif retrieved_item.metadata.get("type", "") == "image":
-                evidence_modes.append(EVIDENCE_MODE_FIGURE)
-                retrieved_content = retrieved_item.metadata.get("image_origin", "")
-                retrieved_caption = html.escape(retrieved_item.get_content())
-                evidence += (
-                    f"<br><b>Figure from {source}</b>\n"
-                    + "<img width='85%' src='<src>' "
-                    + f"alt='{retrieved_caption}'/>"
-                    + "\n<br>"
-                )
-                images.append(retrieved_content)
-            else:
-                if "window" in retrieved_item.metadata:
-                    retrieved_content = retrieved_item.metadata["window"]
-                else:
-                    retrieved_content = retrieved_item.text
-                retrieved_content = retrieved_content.replace("\n", " ")
-                if retrieved_content not in evidence:
-                    evidence += (
-                        f"<br><b>Content from {source}: </b> "
-                        + retrieved_content
-                        + " \n<br>"
-                    )
-
-        # resolve evidence mode
-        evidence_mode = EVIDENCE_MODE_TEXT
-        if EVIDENCE_MODE_FIGURE in evidence_modes:
-            evidence_mode = EVIDENCE_MODE_FIGURE
-        elif EVIDENCE_MODE_TABLE in evidence_modes:
-            evidence_mode = EVIDENCE_MODE_TABLE
-
-        # trim context by trim_len
-        print("len (original)", len(evidence))
-        if evidence:
-            texts = evidence_trim_func([Document(text=evidence)])
-            evidence = texts[0].text
-            print("len (trimmed)", len(evidence))
-
-        return Document(content=(evidence_mode, evidence, images))
-
-
-DEFAULT_QA_TEXT_PROMPT = (
-    "Use the following pieces of context to answer the question at the end in detail with clear explanation. "  # noqa: E501
-    "If you don't know the answer, just say that you don't know, don't try to "
-    "make up an answer. Give answer in "
-    "{lang}.\n\n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Helpful Answer:"
-)
-
-DEFAULT_QA_TABLE_PROMPT = (
-    "Use the given context: texts, tables, and figures below to answer the question, "
-    "then provide answer with clear explanation."
-    "If you don't know the answer, just say that you don't know, "
-    "don't try to make up an answer. Give answer in {lang}.\n\n"
-    "Context:\n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Helpful Answer:"
-)  # noqa
-
-DEFAULT_QA_CHATBOT_PROMPT = (
-    "Pick the most suitable chatbot scenarios to answer the question at the end, "
-    "output the provided answer text. If you don't know the answer, "
-    "just say that you don't know. Keep the answer as concise as possible. "
-    "Give answer in {lang}.\n\n"
-    "Context:\n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Answer:"
-)  # noqa
-
-DEFAULT_QA_FIGURE_PROMPT = (
-    "Use the given context: texts, tables, and figures below to answer the question. "
-    "If you don't know the answer, just say that you don't know. "
-    "Give answer in {lang}.\n\n"
-    "Context: \n"
-    "{context}\n"
-    "Question: {question}\n"
-    "Answer: "
-)  # noqa
-
-CONTEXT_RELEVANT_WARNING_SCORE = 0.7
-
-
-class AnswerWithContextPipeline(BaseComponent):
-    """Answer the question based on the evidence
-
-    Args:
-        llm: the language model to generate the answer
-        citation_pipeline: generates citation from the evidence
-        qa_template: the prompt template for LLM to generate answer (refer to
-            evidence_mode)
-        qa_table_template: the prompt template for LLM to generate answer for table
-            (refer to evidence_mode)
-        qa_chatbot_template: the prompt template for LLM to generate answer for
-            pre-made scenarios (refer to evidence_mode)
-        lang: the language of the answer. Currently support English and Japanese
-    """
-
-    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
-    vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
-    use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True)
-    citation_pipeline: CitationPipeline = Node(
-        default_callback=lambda _: CitationPipeline(llm=llms.get_default())
-    )
-    create_mindmap_pipeline: CreateMindmapPipeline = Node(
-        default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default())
-    )
-
-    qa_template: str = DEFAULT_QA_TEXT_PROMPT
-    qa_table_template: str = DEFAULT_QA_TABLE_PROMPT
-    qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT
-    qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT
-
-    enable_citation: bool = False
-    enable_mindmap: bool = False
-    enable_citation_viz: bool = False
-
-    system_prompt: str = ""
-    lang: str = "English"  # support English and Japanese
-    n_last_interactions: int = 5
-
-    def get_prompt(self, question, evidence, evidence_mode: int):
-        """Prepare the prompt and other information for LLM"""
-        if evidence_mode == EVIDENCE_MODE_TEXT:
-            prompt_template = PromptTemplate(self.qa_template)
-        elif evidence_mode == EVIDENCE_MODE_TABLE:
-            prompt_template = PromptTemplate(self.qa_table_template)
-        elif evidence_mode == EVIDENCE_MODE_FIGURE:
-            if self.use_multimodal:
-                prompt_template = PromptTemplate(self.qa_figure_template)
-            else:
-                prompt_template = PromptTemplate(self.qa_template)
-        else:
-            prompt_template = PromptTemplate(self.qa_chatbot_template)
-
-        prompt = prompt_template.populate(
-            context=evidence,
-            question=question,
-            lang=self.lang,
-        )
-
-        return prompt, evidence
-
-    def run(
-        self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
-    ) -> Document:
-        return self.invoke(question, evidence, evidence_mode, **kwargs)
-
-    def invoke(
-        self,
-        question: str,
-        evidence: str,
-        evidence_mode: int = 0,
-        images: list[str] = [],
-        **kwargs,
-    ) -> Document:
-        raise NotImplementedError
-
-    async def ainvoke(  # type: ignore
-        self,
-        question: str,
-        evidence: str,
-        evidence_mode: int = 0,
-        images: list[str] = [],
-        **kwargs,
-    ) -> Document:
-        """Answer the question based on the evidence
-
-        In addition to the question and the evidence, this method also take into
-        account evidence_mode. The evidence_mode tells which kind of evidence is.
-        The kind of evidence affects:
-            1. How the evidence is represented.
-            2. The prompt to generate the answer.
-
-        By default, the evidence_mode is 0, which means the evidence is plain text with
-        no particular semantic representation. The evidence_mode can be:
-            1. "table": There will be HTML markup telling that there is a table
-                within the evidence.
-            2. "chatbot": There will be HTML markup telling that there is a chatbot.
-                This chatbot is a scenario, extracted from an Excel file, where each
-                row corresponds to an interaction.
-
-        Args:
-            question: the original question posed by user
-            evidence: the text that contain relevant information to answer the question
-                (determined by retrieval pipeline)
-            evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
-        """
-        raise NotImplementedError
-
-    def stream(  # type: ignore
-        self,
-        question: str,
-        evidence: str,
-        evidence_mode: int = 0,
-        images: list[str] = [],
-        **kwargs,
-    ) -> Generator[Document, None, Document]:
-        history = kwargs.get("history", [])
-        print(f"Got {len(images)} images")
-        # check if evidence exists, use QA prompt
-        if evidence:
-            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
-        else:
-            prompt = question
-
-        # retrieve the citation
-        citation = None
-        mindmap = None
-
-        def citation_call():
-            nonlocal citation
-            citation = self.citation_pipeline(context=evidence, question=question)
-
-        def mindmap_call():
-            nonlocal mindmap
-            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)
-
-        citation_thread = None
-        mindmap_thread = None
-
-        # execute function call in thread
-        if evidence:
-            if self.enable_citation:
-                citation_thread = threading.Thread(target=citation_call)
-                citation_thread.start()
-
-            if self.enable_mindmap:
-                mindmap_thread = threading.Thread(target=mindmap_call)
-                mindmap_thread.start()
-
-        output = ""
-        logprobs = []
-
-        messages = []
-        if self.system_prompt:
-            messages.append(SystemMessage(content=self.system_prompt))
-        for human, ai in history[-self.n_last_interactions :]:
-            messages.append(HumanMessage(content=human))
-            messages.append(AIMessage(content=ai))
-
-        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
-            # create image message:
-            messages.append(
-                HumanMessage(
-                    content=[
-                        {"type": "text", "text": prompt},
-                    ]
-                    + [
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": image},
-                        }
-                        for image in images[:MAX_IMAGES]
-                    ],
-                )
-            )
-        else:
-            # append main prompt
-            messages.append(HumanMessage(content=prompt))
-
-        try:
-            # try streaming first
-            print("Trying LLM streaming")
-            for out_msg in self.llm.stream(messages):
-                output += out_msg.text
-                logprobs += out_msg.logprobs
-                yield Document(channel="chat", content=out_msg.text)
-        except NotImplementedError:
-            print("Streaming is not supported, falling back to normal processing")
-            output = self.llm(messages).text
-            yield Document(channel="chat", content=output)
-
-        if logprobs:
-            qa_score = np.exp(np.average(logprobs))
-        else:
-            qa_score = None
-
-        if citation_thread:
-            citation_thread.join(timeout=CITATION_TIMEOUT)
-        if mindmap_thread:
-            mindmap_thread.join(timeout=CITATION_TIMEOUT)
-
-        answer = Document(
-            text=output,
-            metadata={
-                "citation_viz": self.enable_citation_viz,
-                "mindmap": mindmap,
-                "citation": citation,
-                "qa_score": qa_score,
-            },
-        )
-
-        return answer
-

 class AddQueryContextPipeline(BaseComponent):

@@ -481,7 +94,7 @@ class FullQAPipeline(BaseReasoning):
    retrievers: list[BaseComponent]

    evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx()
-    answering_pipeline: AnswerWithContextPipeline = AnswerWithContextPipeline.withx()
+    answering_pipeline: AnswerWithContextPipeline
    rewrite_pipeline: RewriteQuestionPipeline | None = None
    create_citation_viz_pipeline: CreateCitationVizPipeline = Node(
        default_callback=lambda _: CreateCitationVizPipeline(
@@ -548,104 +161,35 @@ class FullQAPipeline(BaseReasoning):

        return docs, info

-    def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
-        """Prepare the citations to show on the UI"""
-        with_citation, without_citation = [], []
-        spans = defaultdict(list)
-        has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)
-
-        if answer.metadata["citation"]:
-            evidences = answer.metadata["citation"].evidences
-            for quote in evidences:
-                matched_excerpts = []
-                for doc in docs:
-                    matches = find_text(quote, doc.text)
-
-                    for start, end in matches:
-                        if "|" not in doc.text[start:end]:
-                            spans[doc.doc_id].append(
-                                {
-                                    "start": start,
-                                    "end": end,
-                                }
-                            )
-                            matched_excerpts.append(doc.text[start:end])
-
-                # print("Matched citation:", quote, matched_excerpts),
-
-        id2docs = {doc.doc_id: doc for doc in docs}
-        not_detected = set(id2docs.keys()) - set(spans.keys())
-
-        # render highlight spans
-        for _id, ss in spans.items():
-            if not ss:
-                not_detected.add(_id)
-                continue
-            cur_doc = id2docs[_id]
-            highlight_text = ""
-
-            ss = sorted(ss, key=lambda x: x["start"])
-            text = cur_doc.text[: ss[0]["start"]]
-            for idx, span in enumerate(ss):
-                to_highlight = cur_doc.text[span["start"] : span["end"]]
-                if len(to_highlight) > len(highlight_text):
-                    highlight_text = to_highlight
-                text += Render.highlight(to_highlight)
-                if idx < len(ss) - 1:
-                    text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]
-            text += cur_doc.text[ss[-1]["end"] :]
-            # add to display list
-            with_citation.append(
-                Document(
-                    channel="info",
-                    content=Render.collapsible_with_header_score(
-                        cur_doc,
-                        override_text=text,
-                        highlight_text=highlight_text,
-                        open_collapsible=True,
-                    ),
-                )
-            )
-
-        print("Got {} cited docs".format(len(with_citation)))
-
-        sorted_not_detected_items_with_scores = [
-            (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0))
-            for id_ in not_detected
-        ]
-        sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)
-
-        for id_, _ in sorted_not_detected_items_with_scores:
-            doc = id2docs[id_]
-            doc_score = doc.metadata.get("llm_trulens_score", 0.0)
-            is_open = not has_llm_score or (
-                doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0
-            )
-            without_citation.append(
-                Document(
-                    channel="info",
-                    content=Render.collapsible_with_header_score(
-                        doc, open_collapsible=is_open
-                    ),
-                )
-            )
-        return with_citation, without_citation
-
    def prepare_mindmap(self, answer) -> Document | None:
        mindmap = answer.metadata["mindmap"]
        if mindmap:
            mindmap_text = mindmap.text
            uml_renderer = PlantUML()
-            mindmap_svg = uml_renderer.process(mindmap_text)
+
+            try:
+                mindmap_svg = uml_renderer.process(mindmap_text)
+            except Exception as e:
+                print("Failed to process mindmap:", e)
+                mindmap_svg = "<svg></svg>"
+
+            # post-process the mindmap SVG
+            mindmap_svg = (
+                mindmap_svg.replace("sans-serif", "Quicksand, sans-serif")
+                .replace("#181818", "#cecece")
+                .replace("background:#FFFFF", "background:none")
+                .replace("stroke-width:1", "stroke-width:2")
+            )

            mindmap_content = Document(
                channel="info",
                content=Render.collapsible(
                    header="""
                    <i>Mindmap</i>
-                    <a href="#" id='mindmap-toggle'">
-                        [Expand]
-                    </a>""",
+                    <a href="#" id='mindmap-toggle'>
+                        [Expand]</a>
+                    <a href="#" id='mindmap-export'>
+                        [Export]</a>""",
                    content=mindmap_svg,
                    open=True,
                ),
@@ -674,7 +218,9 @@ class FullQAPipeline(BaseReasoning):

    def show_citations_and_addons(self, answer, docs, question):
        # show the evidence
-        with_citation, without_citation = self.prepare_citations(answer, docs)
+        with_citation, without_citation = self.answering_pipeline.prepare_citations(
+            answer, docs
+        )
        mindmap_output = self.prepare_mindmap(answer)
        citation_plot_output = self.prepare_citation_viz(answer, question, docs)

@@ -773,6 +319,13 @@ class FullQAPipeline(BaseReasoning):

        return answer

+    @classmethod
+    def prepare_pipeline_instance(cls, settings, retrievers):
+        return cls(
+            retrievers=retrievers,
+            rewrite_pipeline=RewriteQuestionPipeline(),
+        )
+
    @classmethod
    def get_pipeline(cls, settings, states, retrievers):
        """Get the reasoning pipeline
@@ -783,10 +336,7 @@ class FullQAPipeline(BaseReasoning):
        """
        max_context_length_setting = settings.get("reasoning.max_context_length", 32000)

-        pipeline = cls(
-            retrievers=retrievers,
-            rewrite_pipeline=RewriteQuestionPipeline(),
-        )
+        pipeline = cls.prepare_pipeline_instance(settings, retrievers)

        prefix = f"reasoning.options.{cls.get_info()['id']}"
        llm_name = settings.get(f"{prefix}.llm", None)
@@ -797,13 +347,22 @@ class FullQAPipeline(BaseReasoning):
        evidence_pipeline.max_context_length = max_context_length_setting

        # answering pipeline configuration
-        answer_pipeline = pipeline.answering_pipeline
+        use_inline_citation = settings[f"{prefix}.highlight_citation"] == "inline"
+
+        if use_inline_citation:
+            answer_pipeline = pipeline.answering_pipeline = AnswerWithInlineCitation()
+        else:
+            answer_pipeline = pipeline.answering_pipeline = AnswerWithContextPipeline()
+
        answer_pipeline.llm = llm
        answer_pipeline.citation_pipeline.llm = llm
        answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"]
-        answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"]
+        answer_pipeline.enable_citation = (
+            settings[f"{prefix}.highlight_citation"] != "off"
+        )
        answer_pipeline.enable_mindmap = settings[f"{prefix}.create_mindmap"]
        answer_pipeline.enable_citation_viz = settings[f"{prefix}.create_citation_viz"]
+        answer_pipeline.use_multimodal = settings[f"{prefix}.use_multimodal"]
        answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"]
        answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"]
        answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
@@ -848,9 +407,10 @@ class FullQAPipeline(BaseReasoning):
                ),
            },
            "highlight_citation": {
-                "name": "Highlight Citation",
-                "value": True,
-                "component": "checkbox",
+                "name": "Citation style",
+                "value": "highlight",
+                "component": "radio",
+                "choices": ["highlight", "inline", "off"],
            },
            "create_mindmap": {
                "name": "Create Mindmap",
@@ -862,6 +422,11 @@ class FullQAPipeline(BaseReasoning):
                "value": False,
                "component": "checkbox",
            },
+            "use_multimodal": {
+                "name": "Use Multimodal Input",
+                "value": False,
+                "component": "checkbox",
+            },
            "system_prompt": {
                "name": "System Prompt",
                "value": "This is a question answering system",
@@ -979,7 +544,9 @@ class FullDecomposeQAPipeline(FullQAPipeline):
        )

        # show the evidence
-        with_citation, without_citation = self.prepare_citations(answer, docs)
+        with_citation, without_citation = self.answering_pipeline.prepare_citations(
+            answer, docs
+        )
        if not with_citation and not without_citation:
            yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
        else:
@@ -999,13 +566,7 @@ class FullDecomposeQAPipeline(FullQAPipeline):
        return user_settings

    @classmethod
-    def get_pipeline(cls, settings, states, retrievers):
-        """Get the reasoning pipeline
-
-        Args:
-            settings: the settings for the pipeline
-            retrievers: the retrievers to use
-        """
+    def prepare_pipeline_instance(cls, settings, retrievers):
        prefix = f"reasoning.options.{cls.get_info()['id']}"
        pipeline = cls(
            retrievers=retrievers,
@@ -1013,31 +574,6 @@ class FullDecomposeQAPipeline(FullQAPipeline):
                prompt_template=settings.get(f"{prefix}.decompose_prompt")
            ),
        )
-
-        llm_name = settings.get(f"{prefix}.llm", None)
-        llm = llms.get(llm_name, llms.get_default())
-
-        # answering pipeline configuration
-        answer_pipeline = pipeline.answering_pipeline
-        answer_pipeline.llm = llm
-        answer_pipeline.citation_pipeline.llm = llm
-        answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"]
-        answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"]
-        answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"]
-        answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"]
-        answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
-            settings["reasoning.lang"], "English"
-        )
-
-        pipeline.add_query_context.llm = llm
-        pipeline.add_query_context.n_last_interactions = settings[
-            f"{prefix}.n_last_interactions"
-        ]
-
-        pipeline.trigger_context = settings[f"{prefix}.trigger_context"]
-        pipeline.use_rewrite = states.get("app", {}).get("regen", False)
-        if pipeline.rewrite_pipeline:
-            pipeline.rewrite_pipeline.llm = llm
        return pipeline

    @classmethod
--- a/libs/ktem/ktem/utils/render.py
+++ b/libs/ktem/ktem/utils/render.py
@@ -40,7 +40,10 @@ class Render:
    def collapsible(header, content, open: bool = False) -> str:
        """Render an HTML friendly collapsible section"""
        o = " open" if open else ""
-        return f"<details{o}><summary>{header}</summary>{content}</details><br>"
+        return (
+            f"<details class='evidence' {o}><summary>"
+            f"{header}</summary>{content}</details><br>"
+        )

    @staticmethod
    def table(text: str) -> str:
@@ -103,9 +106,10 @@ class Render:
        """  # noqa

    @staticmethod
-    def highlight(text: str) -> str:
+    def highlight(text: str, elem_id: str | None = None) -> str:
        """Highlight text"""
-        return f"<mark>{text}</mark>"
+        id_text = f" id='mark-{elem_id}'" if elem_id else ""
+        return f"<mark{id_text}>{text}</mark>"

    @staticmethod
    def image(url: str, text: str = "") -> str: