diff --git a/flowsettings.py b/flowsettings.py
index c4bb74e..b8d4c35 100644
--- a/flowsettings.py
+++ b/flowsettings.py
@@ -255,7 +255,7 @@ KH_REASONINGS = [
"ktem.reasoning.react.ReactAgentPipeline",
"ktem.reasoning.rewoo.RewooAgentPipeline",
]
-KH_REASONINGS_USE_MULTIMODAL = False
+KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool)
KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format(
config("AZURE_OPENAI_ENDPOINT", default=""),
config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"),
diff --git a/libs/kotaemon/kotaemon/indices/qa/__init__.py b/libs/kotaemon/kotaemon/indices/qa/__init__.py
index 03a185f..d1a6a99 100644
--- a/libs/kotaemon/kotaemon/indices/qa/__init__.py
+++ b/libs/kotaemon/kotaemon/indices/qa/__init__.py
@@ -1,7 +1,5 @@
from .citation import CitationPipeline
-from .text_based import CitationQAPipeline
__all__ = [
"CitationPipeline",
- "CitationQAPipeline",
]
diff --git a/libs/kotaemon/kotaemon/indices/qa/citation_qa.py b/libs/kotaemon/kotaemon/indices/qa/citation_qa.py
new file mode 100644
index 0000000..0fda8c7
--- /dev/null
+++ b/libs/kotaemon/kotaemon/indices/qa/citation_qa.py
@@ -0,0 +1,390 @@
+import threading
+from collections import defaultdict
+from typing import Generator
+
+import numpy as np
+from theflow.settings import settings as flowsettings
+
+from kotaemon.base import (
+ AIMessage,
+ BaseComponent,
+ Document,
+ HumanMessage,
+ Node,
+ SystemMessage,
+)
+from kotaemon.llms import ChatLLM, PromptTemplate
+
+from .citation import CitationPipeline
+from .format_context import (
+ EVIDENCE_MODE_FIGURE,
+ EVIDENCE_MODE_TABLE,
+ EVIDENCE_MODE_TEXT,
+)
+from .utils import find_text
+
+try:
+ from ktem.llms.manager import llms
+ from ktem.reasoning.prompt_optimization.mindmap import CreateMindmapPipeline
+ from ktem.utils.render import Render
+except ImportError:
+ raise ImportError("Please install `ktem` to use this component")
+
+MAX_IMAGES = 10
+CITATION_TIMEOUT = 5.0
+CONTEXT_RELEVANT_WARNING_SCORE = 0.7
+
+DEFAULT_QA_TEXT_PROMPT = (
+ "Use the following pieces of context to answer the question at the end in detail with clear explanation. " # noqa: E501
+ "If you don't know the answer, just say that you don't know, don't try to "
+ "make up an answer. Give answer in "
+ "{lang}.\n\n"
+ "{context}\n"
+ "Question: {question}\n"
+ "Helpful Answer:"
+)
+
+DEFAULT_QA_TABLE_PROMPT = (
+ "Use the given context: texts, tables, and figures below to answer the question, "
+ "then provide answer with clear explanation."
+ "If you don't know the answer, just say that you don't know, "
+ "don't try to make up an answer. Give answer in {lang}.\n\n"
+ "Context:\n"
+ "{context}\n"
+ "Question: {question}\n"
+ "Helpful Answer:"
+) # noqa
+
+DEFAULT_QA_CHATBOT_PROMPT = (
+ "Pick the most suitable chatbot scenarios to answer the question at the end, "
+ "output the provided answer text. If you don't know the answer, "
+ "just say that you don't know. Keep the answer as concise as possible. "
+ "Give answer in {lang}.\n\n"
+ "Context:\n"
+ "{context}\n"
+ "Question: {question}\n"
+ "Answer:"
+) # noqa
+
+DEFAULT_QA_FIGURE_PROMPT = (
+ "Use the given context: texts, tables, and figures below to answer the question. "
+ "If you don't know the answer, just say that you don't know. "
+ "Give answer in {lang}.\n\n"
+ "Context: \n"
+ "{context}\n"
+ "Question: {question}\n"
+ "Answer: "
+) # noqa
+
+
+class AnswerWithContextPipeline(BaseComponent):
+ """Answer the question based on the evidence
+
+ Args:
+ llm: the language model to generate the answer
+ citation_pipeline: generates citation from the evidence
+ qa_template: the prompt template for LLM to generate answer (refer to
+ evidence_mode)
+ qa_table_template: the prompt template for LLM to generate answer for table
+ (refer to evidence_mode)
+ qa_chatbot_template: the prompt template for LLM to generate answer for
+ pre-made scenarios (refer to evidence_mode)
+ lang: the language of the answer. Currently support English and Japanese
+ """
+
+ llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
+ vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
+ use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True)
+ citation_pipeline: CitationPipeline = Node(
+ default_callback=lambda _: CitationPipeline(llm=llms.get_default())
+ )
+ create_mindmap_pipeline: CreateMindmapPipeline = Node(
+ default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default())
+ )
+
+ qa_template: str = DEFAULT_QA_TEXT_PROMPT
+ qa_table_template: str = DEFAULT_QA_TABLE_PROMPT
+ qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT
+ qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT
+
+ enable_citation: bool = False
+ enable_mindmap: bool = False
+ enable_citation_viz: bool = False
+
+ system_prompt: str = ""
+ lang: str = "English" # support English and Japanese
+ n_last_interactions: int = 5
+
+ def get_prompt(self, question, evidence, evidence_mode: int):
+ """Prepare the prompt and other information for LLM"""
+ if evidence_mode == EVIDENCE_MODE_TEXT:
+ prompt_template = PromptTemplate(self.qa_template)
+ elif evidence_mode == EVIDENCE_MODE_TABLE:
+ prompt_template = PromptTemplate(self.qa_table_template)
+ elif evidence_mode == EVIDENCE_MODE_FIGURE:
+ if self.use_multimodal:
+ prompt_template = PromptTemplate(self.qa_figure_template)
+ else:
+ prompt_template = PromptTemplate(self.qa_template)
+ else:
+ prompt_template = PromptTemplate(self.qa_chatbot_template)
+
+ prompt = prompt_template.populate(
+ context=evidence,
+ question=question,
+ lang=self.lang,
+ )
+
+ return prompt, evidence
+
+ def run(
+ self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
+ ) -> Document:
+ return self.invoke(question, evidence, evidence_mode, **kwargs)
+
+ def invoke(
+ self,
+ question: str,
+ evidence: str,
+ evidence_mode: int = 0,
+ images: list[str] = [],
+ **kwargs,
+ ) -> Document:
+ raise NotImplementedError
+
+ async def ainvoke( # type: ignore
+ self,
+ question: str,
+ evidence: str,
+ evidence_mode: int = 0,
+ images: list[str] = [],
+ **kwargs,
+ ) -> Document:
+ """Answer the question based on the evidence
+
+ In addition to the question and the evidence, this method also take into
+ account evidence_mode. The evidence_mode tells which kind of evidence is.
+ The kind of evidence affects:
+ 1. How the evidence is represented.
+ 2. The prompt to generate the answer.
+
+ By default, the evidence_mode is 0, which means the evidence is plain text with
+ no particular semantic representation. The evidence_mode can be:
+ 1. "table": There will be HTML markup telling that there is a table
+ within the evidence.
+ 2. "chatbot": There will be HTML markup telling that there is a chatbot.
+ This chatbot is a scenario, extracted from an Excel file, where each
+ row corresponds to an interaction.
+
+ Args:
+ question: the original question posed by user
+ evidence: the text that contain relevant information to answer the question
+ (determined by retrieval pipeline)
+ evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
+ """
+ raise NotImplementedError
+
+ def stream( # type: ignore
+ self,
+ question: str,
+ evidence: str,
+ evidence_mode: int = 0,
+ images: list[str] = [],
+ **kwargs,
+ ) -> Generator[Document, None, Document]:
+ history = kwargs.get("history", [])
+ print(f"Got {len(images)} images")
+ # check if evidence exists, use QA prompt
+ if evidence:
+ prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
+ else:
+ prompt = question
+
+ # retrieve the citation
+ citation = None
+ mindmap = None
+
+ def citation_call():
+ nonlocal citation
+ citation = self.citation_pipeline(context=evidence, question=question)
+
+ def mindmap_call():
+ nonlocal mindmap
+ mindmap = self.create_mindmap_pipeline(context=evidence, question=question)
+
+ citation_thread = None
+ mindmap_thread = None
+
+ # execute function call in thread
+ if evidence:
+ if self.enable_citation:
+ citation_thread = threading.Thread(target=citation_call)
+ citation_thread.start()
+
+ if self.enable_mindmap:
+ mindmap_thread = threading.Thread(target=mindmap_call)
+ mindmap_thread.start()
+
+ output = ""
+ logprobs = []
+
+ messages = []
+ if self.system_prompt:
+ messages.append(SystemMessage(content=self.system_prompt))
+
+ for human, ai in history[-self.n_last_interactions :]:
+ messages.append(HumanMessage(content=human))
+ messages.append(AIMessage(content=ai))
+
+ if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
+ # create image message:
+ messages.append(
+ HumanMessage(
+ content=[
+ {"type": "text", "text": prompt},
+ ]
+ + [
+ {
+ "type": "image_url",
+ "image_url": {"url": image},
+ }
+ for image in images[:MAX_IMAGES]
+ ],
+ )
+ )
+ else:
+ # append main prompt
+ messages.append(HumanMessage(content=prompt))
+
+ try:
+ # try streaming first
+ print("Trying LLM streaming")
+ for out_msg in self.llm.stream(messages):
+ output += out_msg.text
+ logprobs += out_msg.logprobs
+ yield Document(channel="chat", content=out_msg.text)
+ except NotImplementedError:
+ print("Streaming is not supported, falling back to normal processing")
+ output = self.llm(messages).text
+ yield Document(channel="chat", content=output)
+
+ if logprobs:
+ qa_score = np.exp(np.average(logprobs))
+ else:
+ qa_score = None
+
+ if citation_thread:
+ citation_thread.join(timeout=CITATION_TIMEOUT)
+ if mindmap_thread:
+ mindmap_thread.join(timeout=CITATION_TIMEOUT)
+
+ answer = Document(
+ text=output,
+ metadata={
+ "citation_viz": self.enable_citation_viz,
+ "mindmap": mindmap,
+ "citation": citation,
+ "qa_score": qa_score,
+ },
+ )
+
+ return answer
+
+ def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:
+ """Match the evidence with the context"""
+ spans: dict[str, list[dict]] = defaultdict(list)
+
+ if not answer.metadata["citation"]:
+ return spans
+
+ evidences = answer.metadata["citation"].evidences
+ for quote in evidences:
+ matched_excerpts = []
+ for doc in docs:
+ matches = find_text(quote, doc.text)
+
+ for start, end in matches:
+ if "|" not in doc.text[start:end]:
+ spans[doc.doc_id].append(
+ {
+ "start": start,
+ "end": end,
+ }
+ )
+ matched_excerpts.append(doc.text[start:end])
+
+ # print("Matched citation:", quote, matched_excerpts),
+ return spans
+
+ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
+ """Prepare the citations to show on the UI"""
+ with_citation, without_citation = [], []
+ has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)
+
+ spans = self.match_evidence_with_context(answer, docs)
+ id2docs = {doc.doc_id: doc for doc in docs}
+ not_detected = set(id2docs.keys()) - set(spans.keys())
+
+ # render highlight spans
+ for _id, ss in spans.items():
+ if not ss:
+ not_detected.add(_id)
+ continue
+ cur_doc = id2docs[_id]
+ highlight_text = ""
+
+ ss = sorted(ss, key=lambda x: x["start"])
+ text = cur_doc.text[: ss[0]["start"]]
+ for idx, span in enumerate(ss):
+ to_highlight = cur_doc.text[span["start"] : span["end"]]
+ if len(to_highlight) > len(highlight_text):
+ highlight_text = to_highlight
+
+ span_idx = span.get("idx", None)
+ if span_idx is not None:
+ to_highlight = f"【{span_idx + 1}】" + to_highlight
+
+ text += Render.highlight(
+ to_highlight,
+ elem_id=str(span_idx + 1) if span_idx is not None else None,
+ )
+ print(text)
+ if idx < len(ss) - 1:
+ text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]
+ text += cur_doc.text[ss[-1]["end"] :]
+ # add to display list
+ with_citation.append(
+ Document(
+ channel="info",
+ content=Render.collapsible_with_header_score(
+ cur_doc,
+ override_text=text,
+ highlight_text=highlight_text,
+ open_collapsible=True,
+ ),
+ )
+ )
+
+ print("Got {} cited docs".format(len(with_citation)))
+
+ sorted_not_detected_items_with_scores = [
+ (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0))
+ for id_ in not_detected
+ ]
+ sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)
+
+ for id_, _ in sorted_not_detected_items_with_scores:
+ doc = id2docs[id_]
+ doc_score = doc.metadata.get("llm_trulens_score", 0.0)
+ is_open = not has_llm_score or (
+ doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0
+ )
+ without_citation.append(
+ Document(
+ channel="info",
+ content=Render.collapsible_with_header_score(
+ doc, open_collapsible=is_open
+ ),
+ )
+ )
+ return with_citation, without_citation
diff --git a/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py b/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py
new file mode 100644
index 0000000..414e4e2
--- /dev/null
+++ b/libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py
@@ -0,0 +1,267 @@
+import re
+import threading
+from collections import defaultdict
+from typing import Generator
+
+import numpy as np
+
+from kotaemon.base import AIMessage, Document, HumanMessage, SystemMessage
+from kotaemon.llms import PromptTemplate
+
+from .citation import CiteEvidence
+from .citation_qa import CITATION_TIMEOUT, MAX_IMAGES, AnswerWithContextPipeline
+from .format_context import EVIDENCE_MODE_FIGURE
+from .utils import find_start_end_phrase
+
+DEFAULT_QA_CITATION_PROMPT = """
+Use the following pieces of context to answer the question at the end.
+Provide DETAILED ansswer with clear explanation.
+Format answer with easy to follow bullets / paragraphs.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+Use the same language as the question to response.
+
+CONTEXT:
+----
+{context}
+----
+
+Answer using this format:
+CITATION LIST
+
+// the index in this array
+CITATION【number】
+
+// output 2 phrase to mark start and end of the relevant span
+// each has ~ 6 words
+// MUST COPY EXACTLY from the CONTEXT
+// NO CHANGE or REPHRASE
+// RELEVANT_SPAN_FROM_CONTEXT
+START_PHRASE: string
+END_PHRASE: string
+
+// When you answer, ensure to add citations from the documents
+// in the CONTEXT with a number that corresponds to the answersInText array.
+// (in the form [number])
+// Try to include the number after each facts / statements you make.
+// You can create as many citations as you need.
+FINAL ANSWER
+string
+
+STRICTLY FOLLOW THIS EXAMPLE:
+CITATION LIST
+
+CITATION【1】
+
+START_PHRASE: Known as fixed-size chunking , the traditional
+END_PHRASE: not degrade the final retrieval performance.
+
+CITATION【2】
+
+START_PHRASE: Fixed-size Chunker This is our baseline chunker
+END_PHRASE: this shows good retrieval quality.
+
+FINAL ANSWER
+An alternative to semantic chunking is fixed-size chunking. This traditional method involves splitting documents into chunks of a predetermined or user-specified size, regardless of semantic content, which is computationally efficient【1】. However, it may result in the fragmentation of semantically related content, thereby potentially degrading retrieval performance【2】.
+
+QUESTION: {question}\n
+ANSWER:
+""" # noqa
+
+
+class AnswerWithInlineCitation(AnswerWithContextPipeline):
+ """Answer the question based on the evidence with inline citation"""
+
+ qa_citation_template: str = DEFAULT_QA_CITATION_PROMPT
+
+ def get_prompt(self, question, evidence, evidence_mode: int):
+ """Prepare the prompt and other information for LLM"""
+ prompt_template = PromptTemplate(self.qa_citation_template)
+
+ prompt = prompt_template.populate(
+ context=evidence,
+ question=question,
+ safe=False,
+ )
+
+ return prompt, evidence
+
+ def answer_to_citations(self, answer):
+ evidences = []
+ lines = answer.split("\n")
+ for line in lines:
+ for keyword in ["START_PHRASE:", "END_PHRASE:"]:
+ if line.startswith(keyword):
+ evidences.append(line[len(keyword) :].strip())
+
+ return CiteEvidence(evidences=evidences)
+
+ def replace_citation_with_link(self, answer: str):
+ # Define the regex pattern to match 【number】
+ pattern = r"【\d+】"
+ matches = re.finditer(pattern, answer)
+
+ matched_citations = set()
+ for match in matches:
+ citation = match.group()
+ matched_citations.add(citation)
+
+ for citation in matched_citations:
+ print("Found citation:", citation)
+ answer = answer.replace(
+ citation,
+ (
+ "{citation}"
+ ),
+ )
+
+ print("Replaced answer:", answer)
+ return answer
+
+ def stream( # type: ignore
+ self,
+ question: str,
+ evidence: str,
+ evidence_mode: int = 0,
+ images: list[str] = [],
+ **kwargs,
+ ) -> Generator[Document, None, Document]:
+ history = kwargs.get("history", [])
+ print(f"Got {len(images)} images")
+ # check if evidence exists, use QA prompt
+ if evidence:
+ prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
+ else:
+ prompt = question
+
+ output = ""
+ logprobs = []
+
+ citation = None
+ mindmap = None
+
+ def mindmap_call():
+ nonlocal mindmap
+ mindmap = self.create_mindmap_pipeline(context=evidence, question=question)
+
+ mindmap_thread = None
+
+ # execute function call in thread
+ if evidence:
+ if self.enable_mindmap:
+ mindmap_thread = threading.Thread(target=mindmap_call)
+ mindmap_thread.start()
+
+ messages = []
+ if self.system_prompt:
+ messages.append(SystemMessage(content=self.system_prompt))
+
+ for human, ai in history[-self.n_last_interactions :]:
+ messages.append(HumanMessage(content=human))
+ messages.append(AIMessage(content=ai))
+
+ if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
+ # create image message:
+ messages.append(
+ HumanMessage(
+ content=[
+ {"type": "text", "text": prompt},
+ ]
+ + [
+ {
+ "type": "image_url",
+ "image_url": {"url": image},
+ }
+ for image in images[:MAX_IMAGES]
+ ],
+ )
+ )
+ else:
+ # append main prompt
+ messages.append(HumanMessage(content=prompt))
+
+ START_ANSWER = "FINAL ANSWER"
+ start_of_answer = True
+ final_answer = ""
+
+ try:
+ # try streaming first
+ print("Trying LLM streaming")
+ for out_msg in self.llm.stream(messages):
+ if START_ANSWER in output:
+ final_answer += (
+ out_msg.text.lstrip() if start_of_answer else out_msg.text
+ )
+ start_of_answer = False
+ yield Document(channel="chat", content=out_msg.text)
+
+ output += out_msg.text
+ logprobs += out_msg.logprobs
+ except NotImplementedError:
+ print("Streaming is not supported, falling back to normal processing")
+ output = self.llm(messages).text
+ yield Document(channel="chat", content=output)
+
+ if logprobs:
+ qa_score = np.exp(np.average(logprobs))
+ else:
+ qa_score = None
+
+ citation = self.answer_to_citations(output)
+
+ if mindmap_thread:
+ mindmap_thread.join(timeout=CITATION_TIMEOUT)
+
+ # convert citation to link
+ answer = Document(
+ text=final_answer,
+ metadata={
+ "citation_viz": self.enable_citation_viz,
+ "mindmap": mindmap,
+ "citation": citation,
+ "qa_score": qa_score,
+ },
+ )
+
+ # yield the final answer
+ final_answer = self.replace_citation_with_link(final_answer)
+ yield Document(channel="chat", content=None)
+ yield Document(channel="chat", content=final_answer)
+
+ return answer
+
+ def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:
+ """Match the evidence with the context"""
+ spans: dict[str, list[dict]] = defaultdict(list)
+
+ if not answer.metadata["citation"]:
+ return spans
+
+ evidences = answer.metadata["citation"].evidences
+
+ for start_idx in range(0, len(evidences), 2):
+ start_phrase, end_phrase = evidences[start_idx : start_idx + 2]
+ best_match = None
+ best_match_length = 0
+ best_match_doc_idx = None
+
+ for doc in docs:
+ match, match_length = find_start_end_phrase(
+ start_phrase, end_phrase, doc.text
+ )
+ if best_match is None or (
+ match is not None and match_length > best_match_length
+ ):
+ best_match = match
+ best_match_length = match_length
+ best_match_doc_idx = doc.doc_id
+
+ if best_match is not None and best_match_doc_idx is not None:
+ spans[best_match_doc_idx].append(
+ {
+ "start": best_match[0],
+ "end": best_match[1],
+ "idx": start_idx // 2, # implicitly set from the start_idx
+ }
+ )
+ return spans
diff --git a/libs/kotaemon/kotaemon/indices/qa/format_context.py b/libs/kotaemon/kotaemon/indices/qa/format_context.py
new file mode 100644
index 0000000..4727b1c
--- /dev/null
+++ b/libs/kotaemon/kotaemon/indices/qa/format_context.py
@@ -0,0 +1,114 @@
+import html
+from functools import partial
+
+import tiktoken
+
+from kotaemon.base import BaseComponent, Document, RetrievedDocument
+from kotaemon.indices.splitters import TokenSplitter
+
+EVIDENCE_MODE_TEXT = 0
+EVIDENCE_MODE_TABLE = 1
+EVIDENCE_MODE_CHATBOT = 2
+EVIDENCE_MODE_FIGURE = 3
+
+
+class PrepareEvidencePipeline(BaseComponent):
+ """Prepare the evidence text from the list of retrieved documents
+
+ This step usually happens after `DocumentRetrievalPipeline`.
+
+ Args:
+ trim_func: a callback function or a BaseComponent, that splits a large
+ chunk of text into smaller ones. The first one will be retained.
+ """
+
+ max_context_length: int = 32000
+ trim_func: TokenSplitter | None = None
+
+ def run(self, docs: list[RetrievedDocument]) -> Document:
+ evidence = ""
+ images = []
+ table_found = 0
+ evidence_modes = []
+
+ evidence_trim_func = (
+ self.trim_func
+ if self.trim_func
+ else TokenSplitter(
+ chunk_size=self.max_context_length,
+ chunk_overlap=0,
+ separator=" ",
+ tokenizer=partial(
+ tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
+ allowed_special=set(),
+ disallowed_special="all",
+ ),
+ )
+ )
+
+ for _, retrieved_item in enumerate(docs):
+ retrieved_content = ""
+ page = retrieved_item.metadata.get("page_label", None)
+ source = filename = retrieved_item.metadata.get("file_name", "-")
+ if page:
+ source += f" (Page {page})"
+ if retrieved_item.metadata.get("type", "") == "table":
+ evidence_modes.append(EVIDENCE_MODE_TABLE)
+ if table_found < 5:
+ retrieved_content = retrieved_item.metadata.get(
+ "table_origin", retrieved_item.text
+ )
+ if retrieved_content not in evidence:
+ table_found += 1
+ evidence += (
+ f"
Table from {source}\n"
+ + retrieved_content
+ + "\n
"
+ )
+ elif retrieved_item.metadata.get("type", "") == "chatbot":
+ evidence_modes.append(EVIDENCE_MODE_CHATBOT)
+ retrieved_content = retrieved_item.metadata["window"]
+ evidence += (
+ f"
Chatbot scenario from {filename} (Row {page})\n"
+ + retrieved_content
+ + "\n
"
+ )
+ elif retrieved_item.metadata.get("type", "") == "image":
+ evidence_modes.append(EVIDENCE_MODE_FIGURE)
+ retrieved_content = retrieved_item.metadata.get("image_origin", "")
+ retrieved_caption = html.escape(retrieved_item.get_content())
+ evidence += (
+ f"
Figure from {source}\n"
+ + ""
+ + "\n
"
+ )
+ images.append(retrieved_content)
+ else:
+ if "window" in retrieved_item.metadata:
+ retrieved_content = retrieved_item.metadata["window"]
+ else:
+ retrieved_content = retrieved_item.text
+ retrieved_content = retrieved_content.replace("\n", " ")
+ if retrieved_content not in evidence:
+ evidence += (
+ f"
Content from {source}: "
+ + retrieved_content
+ + " \n
"
+ )
+
+ # resolve evidence mode
+ evidence_mode = EVIDENCE_MODE_TEXT
+ if EVIDENCE_MODE_FIGURE in evidence_modes:
+ evidence_mode = EVIDENCE_MODE_FIGURE
+ elif EVIDENCE_MODE_TABLE in evidence_modes:
+ evidence_mode = EVIDENCE_MODE_TABLE
+
+ # trim context by trim_len
+ print("len (original)", len(evidence))
+ if evidence:
+ texts = evidence_trim_func([Document(text=evidence)])
+ evidence = texts[0].text
+ print("len (trimmed)", len(evidence))
+
+ return Document(content=(evidence_mode, evidence, images))
diff --git a/libs/kotaemon/kotaemon/indices/qa/text_based.py b/libs/kotaemon/kotaemon/indices/qa/text_based.py
deleted file mode 100644
index e0b49be..0000000
--- a/libs/kotaemon/kotaemon/indices/qa/text_based.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import os
-
-from kotaemon.base import BaseComponent, Document, Node, RetrievedDocument
-from kotaemon.llms import BaseLLM, LCAzureChatOpenAI, PromptTemplate
-
-from .citation import CitationPipeline
-
-
-class CitationQAPipeline(BaseComponent):
- """Answering question from a text corpus with citation"""
-
- qa_prompt_template: PromptTemplate = PromptTemplate(
- 'Answer the following question: "{question}". '
- "The context is: \n{context}\nAnswer: "
- )
- llm: BaseLLM = LCAzureChatOpenAI.withx(
- azure_endpoint="https://bleh-dummy.openai.azure.com/",
- openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
- openai_api_version="2023-07-01-preview",
- deployment_name="dummy-q2-16k",
- temperature=0,
- request_timeout=60,
- )
- citation_pipeline: CitationPipeline = Node(
- default_callback=lambda self: CitationPipeline(llm=self.llm)
- )
-
- def _format_doc_text(self, text: str) -> str:
- """Format the text of each document"""
- return text.replace("\n", " ")
-
- def _format_retrieved_context(self, documents: list[RetrievedDocument]) -> str:
- """Format the texts between all documents"""
- matched_texts: list[str] = [
- self._format_doc_text(doc.text) for doc in documents
- ]
- return "\n\n".join(matched_texts)
-
- def run(
- self,
- question: str,
- documents: list[RetrievedDocument],
- use_citation: bool = False,
- **kwargs
- ) -> Document:
- # retrieve relevant documents as context
- context = self._format_retrieved_context(documents)
- self.log_progress(".context", context=context)
-
- # generate the answer
- prompt = self.qa_prompt_template.populate(
- context=context,
- question=question,
- )
- self.log_progress(".prompt", prompt=prompt)
- answer_text = self.llm(prompt).text
- if use_citation:
- citation = self.citation_pipeline(context=context, question=question)
- else:
- citation = None
-
- answer = Document(text=answer_text, metadata={"citation": citation})
- return answer
diff --git a/libs/kotaemon/kotaemon/indices/qa/utils.py b/libs/kotaemon/kotaemon/indices/qa/utils.py
new file mode 100644
index 0000000..4b6495a
--- /dev/null
+++ b/libs/kotaemon/kotaemon/indices/qa/utils.py
@@ -0,0 +1,53 @@
+from difflib import SequenceMatcher
+
+
+def find_text(search_span, context, min_length=5):
+ sentence_list = search_span.split("\n")
+ context = context.replace("\n", " ")
+
+ matches = []
+ # don't search for small text
+ if len(search_span) > min_length:
+ for sentence in sentence_list:
+ match = SequenceMatcher(
+ None, sentence, context, autojunk=False
+ ).find_longest_match()
+ if match.size > max(len(sentence) * 0.35, min_length):
+ matches.append((match.b, match.b + match.size))
+
+ return matches
+
+
+def find_start_end_phrase(
+ start_phrase, end_phrase, context, min_length=5, max_excerpt_length=300
+):
+ context = context.replace("\n", " ")
+
+ matches = []
+ matched_length = 0
+ for sentence in [start_phrase, end_phrase]:
+ match = SequenceMatcher(
+ None, sentence, context, autojunk=False
+ ).find_longest_match()
+ if match.size > max(len(sentence) * 0.35, min_length):
+ matches.append((match.b, match.b + match.size))
+ matched_length += match.size
+
+ # check if second match is before the first match
+ if len(matches) == 2 and matches[1][0] < matches[0][0]:
+ # if so, keep only the first match
+ matches = [matches[0]]
+
+ if matches:
+ start_idx = min(start for start, _ in matches)
+ end_idx = max(end for _, end in matches)
+
+ # check if the excerpt is too long
+ if end_idx - start_idx > max_excerpt_length:
+ end_idx = start_idx + max_excerpt_length
+
+ final_match = (start_idx, end_idx)
+ else:
+ final_match = None
+
+ return final_match, matched_length
diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index e8f79a6..5bf77c3 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -42,14 +42,6 @@ class VectorIndexing(BaseIndexing):
**kwargs,
)
- def to_qa_pipeline(self, *args, **kwargs):
- from .qa import CitationQAPipeline
-
- return TextVectorQA(
- retrieving_pipeline=self.to_retrieval_pipeline(**kwargs),
- qa_pipeline=CitationQAPipeline(**kwargs),
- )
-
def write_chunk_to_file(self, docs: list[Document]):
# save the chunks content into markdown format
if self.cache_dir:
diff --git a/libs/kotaemon/kotaemon/llms/prompts/template.py b/libs/kotaemon/kotaemon/llms/prompts/template.py
index aa758be..b189045 100644
--- a/libs/kotaemon/kotaemon/llms/prompts/template.py
+++ b/libs/kotaemon/kotaemon/llms/prompts/template.py
@@ -72,7 +72,7 @@ class PromptTemplate:
UserWarning,
)
- def populate(self, **kwargs) -> str:
+ def populate(self, safe=True, **kwargs) -> str:
"""
Strictly populate the template with the given keyword arguments.
@@ -86,7 +86,8 @@ class PromptTemplate:
Raises:
ValueError: If an unknown placeholder is provided.
"""
- self.check_missing_kwargs(**kwargs)
+ if safe:
+ self.check_missing_kwargs(**kwargs)
return self.partial_populate(**kwargs)
diff --git a/libs/ktem/ktem/assets/css/main.css b/libs/ktem/ktem/assets/css/main.css
index 6336b02..ae6fe7d 100644
--- a/libs/ktem/ktem/assets/css/main.css
+++ b/libs/ktem/ktem/assets/css/main.css
@@ -97,6 +97,11 @@ button.selected {
#chat-info-panel {
max-height: var(--main-area-height) !important;
overflow: auto !important;
+ transition: all 0.5s;
+}
+
+body.dark #chat-info-panel figure>img{
+ filter: invert(100%);
}
#conv-settings-panel {
@@ -199,11 +204,26 @@ mark {
right: 15px;
}
-/* #new-conv-button > img {
- position: relative;
- top: 0px;
- right: -50%;
-} */
+#use-mindmap-checkbox {
+ position: absolute;
+ width: 110px;
+ top: 10px;
+ right: 25px;
+}
+
+#quick-url textarea {
+ resize: none;
+ background: transparent;
+ margin-top: 0px;
+}
+
+#quick-url textarea::placeholder {
+ text-align: center;
+}
+
+#quick-file {
+ height: 110px;
+}
span.icon {
color: #cecece;
@@ -225,11 +245,6 @@ span.icon {
overflow: unset !important;
}
-/*body {*/
-/* margin: 0;*/
-/* font-family: Arial, sans-serif;*/
-/*}*/
-
pdfjs-viewer-element {
height: 100vh;
height: 100dvh;
@@ -280,8 +295,7 @@ pdfjs-viewer-element {
overflow: auto;
}
-/** Switch
- -------------------------------------*/
+/* Switch checkbox styles */
#is-public-checkbox {
position: relative;
@@ -293,10 +307,6 @@ pdfjs-viewer-element {
opacity: 0;
}
-/**
- * 1. Adjust this to size
- */
-
.switch {
display: inline-block;
/* 1 */
@@ -330,3 +340,28 @@ pdfjs-viewer-element {
.switch:has(> input:checked) {
background: #0c895f;
}
+
+/* Bot animation */
+
+.message.bot {
+ animation: fadein 1.5s ease-in-out forwards;
+}
+
+details.evidence {
+ animation: fadein 0.5s ease-in-out forwards;
+}
+
+@keyframes fadein {
+ 0% {
+ opacity: 0;
+ }
+
+ 100% {
+ opacity: 100%;
+ }
+}
+
+.message a.citation {
+ color: #10b981;
+ text-decoration: none;
+}
diff --git a/libs/ktem/ktem/assets/js/main.js b/libs/ktem/ktem/assets/js/main.js
index d2d0b81..30c4067 100644
--- a/libs/ktem/ktem/assets/js/main.js
+++ b/libs/ktem/ktem/assets/js/main.js
@@ -16,6 +16,11 @@ function run() {
let chat_info_panel = document.getElementById("info-expand");
chat_info_panel.insertBefore(info_expand_button, chat_info_panel.childNodes[2]);
+ // move use mind-map checkbox
+ let mindmap_checkbox = document.getElementById("use-mindmap-checkbox");
+ let chat_setting_panel = document.getElementById("chat-settings-expand");
+ chat_setting_panel.insertBefore(mindmap_checkbox, chat_setting_panel.childNodes[2]);
+
// create slider toggle
const is_public_checkbox = document.getElementById("is-public-checkbox");
const label_element = is_public_checkbox.getElementsByTagName("label")[0];
@@ -49,4 +54,21 @@ function run() {
globalThis.removeFromStorage = (key) => {
localStorage.removeItem(key)
}
+
+ // Function to scroll to given citation with ID
+ // Sleep function using Promise and setTimeout
+ function sleep(ms) {
+ return new Promise(resolve => setTimeout(resolve, ms));
+ }
+
+ globalThis.scrollToCitation = async (event) => {
+ event.preventDefault(); // Prevent the default link behavior
+ var citationId = event.target.getAttribute('id');
+
+ await sleep(100); // Sleep for 500 milliseconds
+ var citation = document.querySelector('mark[id="' + citationId + '"]');
+ if (citation) {
+ citation.scrollIntoView({ behavior: 'smooth' });
+ }
+ }
}
diff --git a/libs/ktem/ktem/db/base_models.py b/libs/ktem/ktem/db/base_models.py
index 0c6004b..7c49705 100644
--- a/libs/ktem/ktem/db/base_models.py
+++ b/libs/ktem/ktem/db/base_models.py
@@ -25,8 +25,8 @@ class BaseConversation(SQLModel):
default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
)
name: str = Field(
- default_factory=lambda: datetime.datetime.now(get_localzone()).strftime(
- "%Y-%m-%d %H:%M:%S"
+ default_factory=lambda: "Untitled - {}".format(
+ datetime.datetime.now(get_localzone()).strftime("%Y-%m-%d %H:%M:%S")
)
)
user: int = Field(default=0) # For now we only have one user
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 31a48b1..6b0033c 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -126,6 +126,9 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
if doc_ids:
flatten_doc_ids = []
for doc_id in doc_ids:
+ if doc_id is None:
+ raise ValueError("No document is selected")
+
if doc_id.startswith("["):
flatten_doc_ids.extend(json.loads(doc_id))
else:
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index e440fbf..7a97aae 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -22,6 +22,13 @@ from theflow.settings import settings as flowsettings
DOWNLOAD_MESSAGE = "Press again to download"
MAX_FILENAME_LENGTH = 20
+chat_input_focus_js = """
+function() {
+ let chatInput = document.querySelector("#chat-input textarea");
+ chatInput.focus();
+}
+"""
+
class File(gr.File):
"""Subclass from gr.File to maintain the original filename
@@ -666,7 +673,7 @@ class FileIndexPage(BasePage):
outputs=self._app.chat_page.quick_file_upload_status,
)
.then(
- fn=self.index_fn_with_default_loaders,
+ fn=self.index_fn_file_with_default_loaders,
inputs=[
self._app.chat_page.quick_file_upload,
gr.State(value=False),
@@ -689,6 +696,38 @@ class FileIndexPage(BasePage):
for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
quickUploadedEvent = quickUploadedEvent.then(**event)
+ quickURLUploadedEvent = (
+ self._app.chat_page.quick_urls.submit(
+ fn=lambda: gr.update(
+ value="Please wait for the indexing process "
+ "to complete before adding your question."
+ ),
+ outputs=self._app.chat_page.quick_file_upload_status,
+ )
+ .then(
+ fn=self.index_fn_url_with_default_loaders,
+ inputs=[
+ self._app.chat_page.quick_urls,
+ gr.State(value=True),
+ self._app.settings_state,
+ self._app.user_id,
+ ],
+ outputs=self.quick_upload_state,
+ )
+ .success(
+ fn=lambda: [
+ gr.update(value=None),
+ gr.update(value="select"),
+ ],
+ outputs=[
+ self._app.chat_page.quick_urls,
+ self._app.chat_page._indices_input[0],
+ ],
+ )
+ )
+ for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
+ quickURLUploadedEvent = quickURLUploadedEvent.then(**event)
+
quickUploadedEvent.success(
fn=lambda x: x,
inputs=self.quick_upload_state,
@@ -701,6 +740,30 @@ class FileIndexPage(BasePage):
inputs=[self._app.user_id, self.filter],
outputs=[self.file_list_state, self.file_list],
concurrency_limit=20,
+ ).then(
+ fn=lambda: True,
+ inputs=None,
+ outputs=None,
+ js=chat_input_focus_js,
+ )
+
+ quickURLUploadedEvent.success(
+ fn=lambda x: x,
+ inputs=self.quick_upload_state,
+ outputs=self._app.chat_page._indices_input[1],
+ ).then(
+ fn=lambda: gr.update(value="Indexing completed."),
+ outputs=self._app.chat_page.quick_file_upload_status,
+ ).then(
+ fn=self.list_file,
+ inputs=[self._app.user_id, self.filter],
+ outputs=[self.file_list_state, self.file_list],
+ concurrency_limit=20,
+ ).then(
+ fn=lambda: True,
+ inputs=None,
+ outputs=None,
+ js=chat_input_focus_js,
)
except Exception as e:
@@ -951,7 +1014,7 @@ class FileIndexPage(BasePage):
return results
- def index_fn_with_default_loaders(
+ def index_fn_file_with_default_loaders(
self, files, reindex: bool, settings, user_id
) -> list["str"]:
"""Function for quick upload with default loaders
@@ -991,6 +1054,22 @@ class FileIndexPage(BasePage):
return exist_ids + returned_ids
+ def index_fn_url_with_default_loaders(self, urls, reindex: bool, settings, user_id):
+ returned_ids = []
+ settings = deepcopy(settings)
+ settings[f"index.options.{self._index.id}.reader_mode"] = "default"
+ settings[f"index.options.{self._index.id}.quick_index_mode"] = True
+
+ if urls:
+ _iter = self.index_fn([], urls, reindex, settings, user_id)
+ try:
+ while next(_iter):
+ pass
+ except StopIteration as e:
+ returned_ids = e.value
+
+ return returned_ids
+
def index_files_from_dir(
self, folder_path, reindex, settings, user_id
) -> Generator[tuple[str, str], None, None]:
diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py
index e25fdba..55f04b3 100644
--- a/libs/ktem/ktem/pages/chat/__init__.py
+++ b/libs/ktem/ktem/pages/chat/__init__.py
@@ -40,26 +40,52 @@ function() {
links[i].onclick = openModal;
}
- var mindmap_el = document.getElementById('mindmap');
- if (mindmap_el) {
- var output = svgPanZoom(mindmap_el);
+ // Get all citation links and attach click event
+ var links = document.querySelectorAll("a.citation");
+ for (var i = 0; i < links.length; i++) {
+ links[i].onclick = scrollToCitation;
}
- var link = document.getElementById("mindmap-toggle");
- if (link) {
- link.onclick = function(event) {
+ var mindmap_el = document.getElementById('mindmap');
+
+ if (mindmap_el) {
+ var output = svgPanZoom(mindmap_el);
+ const svg = mindmap_el.cloneNode(true);
+
+ function on_svg_export(event) {
event.preventDefault(); // Prevent the default link behavior
- var div = document.getElementById("mindmap-wrapper");
- if (div) {
- var currentHeight = div.style.height;
- if (currentHeight === '400px') {
- var contentHeight = div.scrollHeight;
- div.style.height = contentHeight + 'px';
- } else {
- div.style.height = '400px'
+ // convert to a valid XML source
+ const as_text = new XMLSerializer().serializeToString(svg);
+ // store in a Blob
+ const blob = new Blob([as_text], { type: "image/svg+xml" });
+ // create an URI pointing to that blob
+ const url = URL.createObjectURL(blob);
+ const win = open(url);
+ // so the Garbage Collector can collect the blob
+ win.onload = (evt) => URL.revokeObjectURL(url);
+ }
+
+ var link = document.getElementById("mindmap-toggle");
+ if (link) {
+ link.onclick = function(event) {
+ event.preventDefault(); // Prevent the default link behavior
+ var div = document.getElementById("mindmap-wrapper");
+ if (div) {
+ var currentHeight = div.style.height;
+ if (currentHeight === '400px') {
+ var contentHeight = div.scrollHeight;
+ div.style.height = contentHeight + 'px';
+ } else {
+ div.style.height = '400px'
+ }
}
- }
- };
+ };
+ }
+
+ var link = document.getElementById("mindmap-export");
+ if (link) {
+ link.addEventListener('click', on_svg_export);
+ }
}
return [links.length]
@@ -127,6 +153,14 @@ class ChatPage(BasePage):
file_count="multiple",
container=True,
show_label=False,
+ elem_id="quick-file",
+ )
+ self.quick_urls = gr.Textbox(
+ placeholder="Or paste URLs here",
+ lines=1,
+ container=False,
+ show_label=False,
+ elem_id="quick-url",
)
self.quick_file_upload_status = gr.Markdown()
@@ -136,12 +170,17 @@ class ChatPage(BasePage):
self.chat_panel = ChatPanel(self._app)
with gr.Row():
- with gr.Accordion(label="Chat settings", open=False):
+ with gr.Accordion(
+ label="Chat settings",
+ elem_id="chat-settings-expand",
+ open=False,
+ ):
# a quick switch for reasoning type option
with gr.Row():
gr.HTML("Reasoning method")
gr.HTML("Model")
- gr.HTML("Generate mindmap")
+ gr.HTML("Language")
+ gr.HTML("Citation")
with gr.Row():
reasoning_type_values = [
@@ -165,17 +204,36 @@ class ChatPage(BasePage):
container=False,
show_label=False,
)
- binary_default_choices = [
- (DEFAULT_SETTING, DEFAULT_SETTING),
- ("Enable", True),
- ("Disable", False),
- ]
- self.use_mindmap = gr.Dropdown(
+ self.language = gr.Dropdown(
+ choices=[
+ (DEFAULT_SETTING, DEFAULT_SETTING),
+ ]
+ + self._app.default_settings.reasoning.settings[
+ "lang"
+ ].choices,
value=DEFAULT_SETTING,
- choices=binary_default_choices,
container=False,
show_label=False,
)
+ self.citation = gr.Dropdown(
+ choices=[
+ (DEFAULT_SETTING, DEFAULT_SETTING),
+ ]
+ + self._app.default_settings.reasoning.options["simple"]
+ .settings["highlight_citation"]
+ .choices,
+ value=DEFAULT_SETTING,
+ container=False,
+ show_label=False,
+ interactive=True,
+ )
+
+ self.use_mindmap = gr.State(value=DEFAULT_SETTING)
+ self.use_mindmap_check = gr.Checkbox(
+ label="Mindmap (default)",
+ container=False,
+ elem_id="use-mindmap-checkbox",
+ )
with gr.Column(
scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
@@ -235,6 +293,8 @@ class ChatPage(BasePage):
self._reasoning_type,
self.model_type,
self.use_mindmap,
+ self.citation,
+ self.language,
self.state_chat,
self._app.user_id,
]
@@ -506,6 +566,12 @@ class ChatPage(BasePage):
inputs=[self.reasoning_type],
outputs=[self._reasoning_type],
)
+ self.use_mindmap_check.change(
+ lambda x: (x, gr.update(label="Mindmap " + ("(on)" if x else "(off)"))),
+ inputs=[self.use_mindmap_check],
+ outputs=[self.use_mindmap, self.use_mindmap_check],
+ show_progress="hidden",
+ )
self.chat_control.conversation_id.change(
lambda: gr.update(visible=False),
outputs=self.plot_panel,
@@ -722,6 +788,8 @@ class ChatPage(BasePage):
session_reasoning_type: str,
session_llm: str,
session_use_mindmap: bool | str,
+ session_use_citation: str,
+ session_language: str,
state: dict,
user_id: int,
*selecteds,
@@ -743,6 +811,10 @@ class ChatPage(BasePage):
session_reasoning_type,
"use mindmap",
session_use_mindmap,
+ "use citation",
+ session_use_citation,
+ "language",
+ session_language,
)
print("Session LLM", session_llm)
reasoning_mode = (
@@ -766,6 +838,14 @@ class ChatPage(BasePage):
if session_use_mindmap not in (DEFAULT_SETTING, None):
settings["reasoning.options.simple.create_mindmap"] = session_use_mindmap
+ if session_use_citation not in (DEFAULT_SETTING, None):
+ settings[
+ "reasoning.options.simple.highlight_citation"
+ ] = session_use_citation
+
+ if session_language not in (DEFAULT_SETTING, None):
+ settings["reasoning.lang"] = session_language
+
# get retrievers
retrievers = []
for index in self._app.index_manager.indices:
@@ -798,6 +878,8 @@ class ChatPage(BasePage):
reasoning_type,
llm_type,
use_mind_map,
+ use_citation,
+ language,
state,
user_id,
*selecteds,
@@ -814,7 +896,15 @@ class ChatPage(BasePage):
# construct the pipeline
pipeline, reasoning_state = self.create_pipeline(
- settings, reasoning_type, llm_type, use_mind_map, state, user_id, *selecteds
+ settings,
+ reasoning_type,
+ llm_type,
+ use_mind_map,
+ use_citation,
+ language,
+ state,
+ user_id,
+ *selecteds,
)
print("Reasoning state", reasoning_state)
pipeline.set_output_queue(queue)
diff --git a/libs/ktem/ktem/pages/chat/chat_panel.py b/libs/ktem/ktem/pages/chat/chat_panel.py
index a7ec3a2..b83c5d1 100644
--- a/libs/ktem/ktem/pages/chat/chat_panel.py
+++ b/libs/ktem/ktem/pages/chat/chat_panel.py
@@ -28,6 +28,7 @@ class ChatPanel(BasePage):
placeholder="Chat input",
container=False,
show_label=False,
+ elem_id="chat-input",
)
def submit_msg(self, chat_input, chat_history):
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 80bebad..fbd861f 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -1,17 +1,10 @@
-import html
import logging
import threading
-from collections import defaultdict
-from difflib import SequenceMatcher
-from functools import partial
from typing import Generator
-import numpy as np
-import tiktoken
from ktem.embeddings.manager import embedding_models_manager as embeddings
from ktem.llms.manager import llms
from ktem.reasoning.prompt_optimization import (
- CreateMindmapPipeline,
DecomposeQuestionPipeline,
RewriteQuestionPipeline,
)
@@ -19,7 +12,6 @@ from ktem.utils.plantuml import PlantUML
from ktem.utils.render import Render
from ktem.utils.visualize_cited import CreateCitationVizPipeline
from plotly.io import to_json
-from theflow.settings import settings as flowsettings
from kotaemon.base import (
AIMessage,
@@ -30,399 +22,20 @@ from kotaemon.base import (
RetrievedDocument,
SystemMessage,
)
-from kotaemon.indices.qa.citation import CitationPipeline
-from kotaemon.indices.splitters import TokenSplitter
-from kotaemon.llms import ChatLLM, PromptTemplate
+from kotaemon.indices.qa.citation_qa import (
+ CONTEXT_RELEVANT_WARNING_SCORE,
+ DEFAULT_QA_TEXT_PROMPT,
+ AnswerWithContextPipeline,
+)
+from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation
+from kotaemon.indices.qa.format_context import PrepareEvidencePipeline
+from kotaemon.llms import ChatLLM
from ..utils import SUPPORTED_LANGUAGE_MAP
from .base import BaseReasoning
logger = logging.getLogger(__name__)
-EVIDENCE_MODE_TEXT = 0
-EVIDENCE_MODE_TABLE = 1
-EVIDENCE_MODE_CHATBOT = 2
-EVIDENCE_MODE_FIGURE = 3
-MAX_IMAGES = 10
-CITATION_TIMEOUT = 5.0
-
-
-def find_text(search_span, context):
- sentence_list = search_span.split("\n")
- context = context.replace("\n", " ")
-
- matches = []
- # don't search for small text
- if len(search_span) > 5:
- for sentence in sentence_list:
- match = SequenceMatcher(
- None, sentence, context, autojunk=False
- ).find_longest_match()
- if match.size > max(len(sentence) * 0.35, 5):
- matches.append((match.b, match.b + match.size))
-
- return matches
-
-
-class PrepareEvidencePipeline(BaseComponent):
- """Prepare the evidence text from the list of retrieved documents
-
- This step usually happens after `DocumentRetrievalPipeline`.
-
- Args:
- trim_func: a callback function or a BaseComponent, that splits a large
- chunk of text into smaller ones. The first one will be retained.
- """
-
- max_context_length: int = 32000
- trim_func: TokenSplitter | None = None
-
- def run(self, docs: list[RetrievedDocument]) -> Document:
- evidence = ""
- images = []
- table_found = 0
- evidence_modes = []
-
- evidence_trim_func = (
- self.trim_func
- if self.trim_func
- else TokenSplitter(
- chunk_size=self.max_context_length,
- chunk_overlap=0,
- separator=" ",
- tokenizer=partial(
- tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
- allowed_special=set(),
- disallowed_special="all",
- ),
- )
- )
-
- for _id, retrieved_item in enumerate(docs):
- retrieved_content = ""
- page = retrieved_item.metadata.get("page_label", None)
- source = filename = retrieved_item.metadata.get("file_name", "-")
- if page:
- source += f" (Page {page})"
- if retrieved_item.metadata.get("type", "") == "table":
- evidence_modes.append(EVIDENCE_MODE_TABLE)
- if table_found < 5:
- retrieved_content = retrieved_item.metadata.get(
- "table_origin", retrieved_item.text
- )
- if retrieved_content not in evidence:
- table_found += 1
- evidence += (
- f"
Table from {source}\n"
- + retrieved_content
- + "\n
"
- )
- elif retrieved_item.metadata.get("type", "") == "chatbot":
- evidence_modes.append(EVIDENCE_MODE_CHATBOT)
- retrieved_content = retrieved_item.metadata["window"]
- evidence += (
- f"
Chatbot scenario from {filename} (Row {page})\n"
- + retrieved_content
- + "\n
"
- )
- elif retrieved_item.metadata.get("type", "") == "image":
- evidence_modes.append(EVIDENCE_MODE_FIGURE)
- retrieved_content = retrieved_item.metadata.get("image_origin", "")
- retrieved_caption = html.escape(retrieved_item.get_content())
- evidence += (
- f"
Figure from {source}\n"
- + ""
- + "\n
"
- )
- images.append(retrieved_content)
- else:
- if "window" in retrieved_item.metadata:
- retrieved_content = retrieved_item.metadata["window"]
- else:
- retrieved_content = retrieved_item.text
- retrieved_content = retrieved_content.replace("\n", " ")
- if retrieved_content not in evidence:
- evidence += (
- f"
Content from {source}: "
- + retrieved_content
- + " \n
"
- )
-
- # resolve evidence mode
- evidence_mode = EVIDENCE_MODE_TEXT
- if EVIDENCE_MODE_FIGURE in evidence_modes:
- evidence_mode = EVIDENCE_MODE_FIGURE
- elif EVIDENCE_MODE_TABLE in evidence_modes:
- evidence_mode = EVIDENCE_MODE_TABLE
-
- # trim context by trim_len
- print("len (original)", len(evidence))
- if evidence:
- texts = evidence_trim_func([Document(text=evidence)])
- evidence = texts[0].text
- print("len (trimmed)", len(evidence))
-
- return Document(content=(evidence_mode, evidence, images))
-
-
-DEFAULT_QA_TEXT_PROMPT = (
- "Use the following pieces of context to answer the question at the end in detail with clear explanation. " # noqa: E501
- "If you don't know the answer, just say that you don't know, don't try to "
- "make up an answer. Give answer in "
- "{lang}.\n\n"
- "{context}\n"
- "Question: {question}\n"
- "Helpful Answer:"
-)
-
-DEFAULT_QA_TABLE_PROMPT = (
- "Use the given context: texts, tables, and figures below to answer the question, "
- "then provide answer with clear explanation."
- "If you don't know the answer, just say that you don't know, "
- "don't try to make up an answer. Give answer in {lang}.\n\n"
- "Context:\n"
- "{context}\n"
- "Question: {question}\n"
- "Helpful Answer:"
-) # noqa
-
-DEFAULT_QA_CHATBOT_PROMPT = (
- "Pick the most suitable chatbot scenarios to answer the question at the end, "
- "output the provided answer text. If you don't know the answer, "
- "just say that you don't know. Keep the answer as concise as possible. "
- "Give answer in {lang}.\n\n"
- "Context:\n"
- "{context}\n"
- "Question: {question}\n"
- "Answer:"
-) # noqa
-
-DEFAULT_QA_FIGURE_PROMPT = (
- "Use the given context: texts, tables, and figures below to answer the question. "
- "If you don't know the answer, just say that you don't know. "
- "Give answer in {lang}.\n\n"
- "Context: \n"
- "{context}\n"
- "Question: {question}\n"
- "Answer: "
-) # noqa
-
-CONTEXT_RELEVANT_WARNING_SCORE = 0.7
-
-
-class AnswerWithContextPipeline(BaseComponent):
- """Answer the question based on the evidence
-
- Args:
- llm: the language model to generate the answer
- citation_pipeline: generates citation from the evidence
- qa_template: the prompt template for LLM to generate answer (refer to
- evidence_mode)
- qa_table_template: the prompt template for LLM to generate answer for table
- (refer to evidence_mode)
- qa_chatbot_template: the prompt template for LLM to generate answer for
- pre-made scenarios (refer to evidence_mode)
- lang: the language of the answer. Currently support English and Japanese
- """
-
- llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
- vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
- use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True)
- citation_pipeline: CitationPipeline = Node(
- default_callback=lambda _: CitationPipeline(llm=llms.get_default())
- )
- create_mindmap_pipeline: CreateMindmapPipeline = Node(
- default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default())
- )
-
- qa_template: str = DEFAULT_QA_TEXT_PROMPT
- qa_table_template: str = DEFAULT_QA_TABLE_PROMPT
- qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT
- qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT
-
- enable_citation: bool = False
- enable_mindmap: bool = False
- enable_citation_viz: bool = False
-
- system_prompt: str = ""
- lang: str = "English" # support English and Japanese
- n_last_interactions: int = 5
-
- def get_prompt(self, question, evidence, evidence_mode: int):
- """Prepare the prompt and other information for LLM"""
- if evidence_mode == EVIDENCE_MODE_TEXT:
- prompt_template = PromptTemplate(self.qa_template)
- elif evidence_mode == EVIDENCE_MODE_TABLE:
- prompt_template = PromptTemplate(self.qa_table_template)
- elif evidence_mode == EVIDENCE_MODE_FIGURE:
- if self.use_multimodal:
- prompt_template = PromptTemplate(self.qa_figure_template)
- else:
- prompt_template = PromptTemplate(self.qa_template)
- else:
- prompt_template = PromptTemplate(self.qa_chatbot_template)
-
- prompt = prompt_template.populate(
- context=evidence,
- question=question,
- lang=self.lang,
- )
-
- return prompt, evidence
-
- def run(
- self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
- ) -> Document:
- return self.invoke(question, evidence, evidence_mode, **kwargs)
-
- def invoke(
- self,
- question: str,
- evidence: str,
- evidence_mode: int = 0,
- images: list[str] = [],
- **kwargs,
- ) -> Document:
- raise NotImplementedError
-
- async def ainvoke( # type: ignore
- self,
- question: str,
- evidence: str,
- evidence_mode: int = 0,
- images: list[str] = [],
- **kwargs,
- ) -> Document:
- """Answer the question based on the evidence
-
- In addition to the question and the evidence, this method also take into
- account evidence_mode. The evidence_mode tells which kind of evidence is.
- The kind of evidence affects:
- 1. How the evidence is represented.
- 2. The prompt to generate the answer.
-
- By default, the evidence_mode is 0, which means the evidence is plain text with
- no particular semantic representation. The evidence_mode can be:
- 1. "table": There will be HTML markup telling that there is a table
- within the evidence.
- 2. "chatbot": There will be HTML markup telling that there is a chatbot.
- This chatbot is a scenario, extracted from an Excel file, where each
- row corresponds to an interaction.
-
- Args:
- question: the original question posed by user
- evidence: the text that contain relevant information to answer the question
- (determined by retrieval pipeline)
- evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
- """
- raise NotImplementedError
-
- def stream( # type: ignore
- self,
- question: str,
- evidence: str,
- evidence_mode: int = 0,
- images: list[str] = [],
- **kwargs,
- ) -> Generator[Document, None, Document]:
- history = kwargs.get("history", [])
- print(f"Got {len(images)} images")
- # check if evidence exists, use QA prompt
- if evidence:
- prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
- else:
- prompt = question
-
- # retrieve the citation
- citation = None
- mindmap = None
-
- def citation_call():
- nonlocal citation
- citation = self.citation_pipeline(context=evidence, question=question)
-
- def mindmap_call():
- nonlocal mindmap
- mindmap = self.create_mindmap_pipeline(context=evidence, question=question)
-
- citation_thread = None
- mindmap_thread = None
-
- # execute function call in thread
- if evidence:
- if self.enable_citation:
- citation_thread = threading.Thread(target=citation_call)
- citation_thread.start()
-
- if self.enable_mindmap:
- mindmap_thread = threading.Thread(target=mindmap_call)
- mindmap_thread.start()
-
- output = ""
- logprobs = []
-
- messages = []
- if self.system_prompt:
- messages.append(SystemMessage(content=self.system_prompt))
- for human, ai in history[-self.n_last_interactions :]:
- messages.append(HumanMessage(content=human))
- messages.append(AIMessage(content=ai))
-
- if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
- # create image message:
- messages.append(
- HumanMessage(
- content=[
- {"type": "text", "text": prompt},
- ]
- + [
- {
- "type": "image_url",
- "image_url": {"url": image},
- }
- for image in images[:MAX_IMAGES]
- ],
- )
- )
- else:
- # append main prompt
- messages.append(HumanMessage(content=prompt))
-
- try:
- # try streaming first
- print("Trying LLM streaming")
- for out_msg in self.llm.stream(messages):
- output += out_msg.text
- logprobs += out_msg.logprobs
- yield Document(channel="chat", content=out_msg.text)
- except NotImplementedError:
- print("Streaming is not supported, falling back to normal processing")
- output = self.llm(messages).text
- yield Document(channel="chat", content=output)
-
- if logprobs:
- qa_score = np.exp(np.average(logprobs))
- else:
- qa_score = None
-
- if citation_thread:
- citation_thread.join(timeout=CITATION_TIMEOUT)
- if mindmap_thread:
- mindmap_thread.join(timeout=CITATION_TIMEOUT)
-
- answer = Document(
- text=output,
- metadata={
- "citation_viz": self.enable_citation_viz,
- "mindmap": mindmap,
- "citation": citation,
- "qa_score": qa_score,
- },
- )
-
- return answer
-
class AddQueryContextPipeline(BaseComponent):
@@ -481,7 +94,7 @@ class FullQAPipeline(BaseReasoning):
retrievers: list[BaseComponent]
evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx()
- answering_pipeline: AnswerWithContextPipeline = AnswerWithContextPipeline.withx()
+ answering_pipeline: AnswerWithContextPipeline
rewrite_pipeline: RewriteQuestionPipeline | None = None
create_citation_viz_pipeline: CreateCitationVizPipeline = Node(
default_callback=lambda _: CreateCitationVizPipeline(
@@ -548,104 +161,35 @@ class FullQAPipeline(BaseReasoning):
return docs, info
- def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
- """Prepare the citations to show on the UI"""
- with_citation, without_citation = [], []
- spans = defaultdict(list)
- has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)
-
- if answer.metadata["citation"]:
- evidences = answer.metadata["citation"].evidences
- for quote in evidences:
- matched_excerpts = []
- for doc in docs:
- matches = find_text(quote, doc.text)
-
- for start, end in matches:
- if "|" not in doc.text[start:end]:
- spans[doc.doc_id].append(
- {
- "start": start,
- "end": end,
- }
- )
- matched_excerpts.append(doc.text[start:end])
-
- # print("Matched citation:", quote, matched_excerpts),
-
- id2docs = {doc.doc_id: doc for doc in docs}
- not_detected = set(id2docs.keys()) - set(spans.keys())
-
- # render highlight spans
- for _id, ss in spans.items():
- if not ss:
- not_detected.add(_id)
- continue
- cur_doc = id2docs[_id]
- highlight_text = ""
-
- ss = sorted(ss, key=lambda x: x["start"])
- text = cur_doc.text[: ss[0]["start"]]
- for idx, span in enumerate(ss):
- to_highlight = cur_doc.text[span["start"] : span["end"]]
- if len(to_highlight) > len(highlight_text):
- highlight_text = to_highlight
- text += Render.highlight(to_highlight)
- if idx < len(ss) - 1:
- text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]
- text += cur_doc.text[ss[-1]["end"] :]
- # add to display list
- with_citation.append(
- Document(
- channel="info",
- content=Render.collapsible_with_header_score(
- cur_doc,
- override_text=text,
- highlight_text=highlight_text,
- open_collapsible=True,
- ),
- )
- )
-
- print("Got {} cited docs".format(len(with_citation)))
-
- sorted_not_detected_items_with_scores = [
- (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0))
- for id_ in not_detected
- ]
- sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)
-
- for id_, _ in sorted_not_detected_items_with_scores:
- doc = id2docs[id_]
- doc_score = doc.metadata.get("llm_trulens_score", 0.0)
- is_open = not has_llm_score or (
- doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0
- )
- without_citation.append(
- Document(
- channel="info",
- content=Render.collapsible_with_header_score(
- doc, open_collapsible=is_open
- ),
- )
- )
- return with_citation, without_citation
-
def prepare_mindmap(self, answer) -> Document | None:
mindmap = answer.metadata["mindmap"]
if mindmap:
mindmap_text = mindmap.text
uml_renderer = PlantUML()
- mindmap_svg = uml_renderer.process(mindmap_text)
+
+ try:
+ mindmap_svg = uml_renderer.process(mindmap_text)
+ except Exception as e:
+ print("Failed to process mindmap:", e)
+ mindmap_svg = ""
+
+ # post-process the mindmap SVG
+ mindmap_svg = (
+ mindmap_svg.replace("sans-serif", "Quicksand, sans-serif")
+ .replace("#181818", "#cecece")
+ .replace("background:#FFFFF", "background:none")
+ .replace("stroke-width:1", "stroke-width:2")
+ )
mindmap_content = Document(
channel="info",
content=Render.collapsible(
header="""
Mindmap
-
- [Expand]
- """,
+
+ [Expand]
+
+ [Export]""",
content=mindmap_svg,
open=True,
),
@@ -674,7 +218,9 @@ class FullQAPipeline(BaseReasoning):
def show_citations_and_addons(self, answer, docs, question):
# show the evidence
- with_citation, without_citation = self.prepare_citations(answer, docs)
+ with_citation, without_citation = self.answering_pipeline.prepare_citations(
+ answer, docs
+ )
mindmap_output = self.prepare_mindmap(answer)
citation_plot_output = self.prepare_citation_viz(answer, question, docs)
@@ -773,6 +319,13 @@ class FullQAPipeline(BaseReasoning):
return answer
+ @classmethod
+ def prepare_pipeline_instance(cls, settings, retrievers):
+ return cls(
+ retrievers=retrievers,
+ rewrite_pipeline=RewriteQuestionPipeline(),
+ )
+
@classmethod
def get_pipeline(cls, settings, states, retrievers):
"""Get the reasoning pipeline
@@ -783,10 +336,7 @@ class FullQAPipeline(BaseReasoning):
"""
max_context_length_setting = settings.get("reasoning.max_context_length", 32000)
- pipeline = cls(
- retrievers=retrievers,
- rewrite_pipeline=RewriteQuestionPipeline(),
- )
+ pipeline = cls.prepare_pipeline_instance(settings, retrievers)
prefix = f"reasoning.options.{cls.get_info()['id']}"
llm_name = settings.get(f"{prefix}.llm", None)
@@ -797,13 +347,22 @@ class FullQAPipeline(BaseReasoning):
evidence_pipeline.max_context_length = max_context_length_setting
# answering pipeline configuration
- answer_pipeline = pipeline.answering_pipeline
+ use_inline_citation = settings[f"{prefix}.highlight_citation"] == "inline"
+
+ if use_inline_citation:
+ answer_pipeline = pipeline.answering_pipeline = AnswerWithInlineCitation()
+ else:
+ answer_pipeline = pipeline.answering_pipeline = AnswerWithContextPipeline()
+
answer_pipeline.llm = llm
answer_pipeline.citation_pipeline.llm = llm
answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"]
- answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"]
+ answer_pipeline.enable_citation = (
+ settings[f"{prefix}.highlight_citation"] != "off"
+ )
answer_pipeline.enable_mindmap = settings[f"{prefix}.create_mindmap"]
answer_pipeline.enable_citation_viz = settings[f"{prefix}.create_citation_viz"]
+ answer_pipeline.use_multimodal = settings[f"{prefix}.use_multimodal"]
answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"]
answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"]
answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
@@ -848,9 +407,10 @@ class FullQAPipeline(BaseReasoning):
),
},
"highlight_citation": {
- "name": "Highlight Citation",
- "value": True,
- "component": "checkbox",
+ "name": "Citation style",
+ "value": "highlight",
+ "component": "radio",
+ "choices": ["highlight", "inline", "off"],
},
"create_mindmap": {
"name": "Create Mindmap",
@@ -862,6 +422,11 @@ class FullQAPipeline(BaseReasoning):
"value": False,
"component": "checkbox",
},
+ "use_multimodal": {
+ "name": "Use Multimodal Input",
+ "value": False,
+ "component": "checkbox",
+ },
"system_prompt": {
"name": "System Prompt",
"value": "This is a question answering system",
@@ -979,7 +544,9 @@ class FullDecomposeQAPipeline(FullQAPipeline):
)
# show the evidence
- with_citation, without_citation = self.prepare_citations(answer, docs)
+ with_citation, without_citation = self.answering_pipeline.prepare_citations(
+ answer, docs
+ )
if not with_citation and not without_citation:
yield Document(channel="info", content="