From cbe40fac995a369602cba5a1c0eded22bd525314 Mon Sep 17 00:00:00 2001 From: trducng Date: Mon, 29 Jan 2024 11:16:07 +0700 Subject: [PATCH] Show retrieved but non-evidence docs. Support language changing --- libs/ktem/ktem/reasoning/simple.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py index 352868f..4b2422d 100644 --- a/libs/ktem/ktem/reasoning/simple.py +++ b/libs/ktem/ktem/reasoning/simple.py @@ -255,8 +255,10 @@ class FullQAPipeline(BaseComponent): id2docs = {doc.doc_id: doc for doc in docs} lack_evidence = True + not_detected = set(id2docs.keys()) - set(spans.keys()) for id, ss in spans.items(): if not ss: + not_detected.add(id) continue ss = sorted(ss, key=lambda x: x["start"]) text = id2docs[id].text[: ss[0]["start"]] @@ -280,7 +282,23 @@ class FullQAPipeline(BaseComponent): lack_evidence = False if lack_evidence: - self.report_output({"evidence": "No evidence found"}) + self.report_output({"evidence": "No evidence found.\n"}) + + if not_detected: + self.report_output( + {"evidence": "Retrieved docs without matching evidence:\n"} + ) + for id in list(not_detected): + self.report_output( + { + "evidence": ( + "
" + f"{id2docs[id].metadata['file_name']}" + f"{id2docs[id].text}" + "

" + ) + } + ) self.report_output(None) return answer @@ -295,6 +313,9 @@ class FullQAPipeline(BaseComponent): """ pipeline = FullQAPipeline(retrievers=retrievers) pipeline.answering_pipeline.llm = llms.get_highest_accuracy() + pipeline.answering_pipeline.lang = {"en": "English", "ja": "Japanese"}.get( + settings["reasoning.lang"], "English" + ) return pipeline @classmethod