Feat/local endpoint llm (#148)
* serve local model in a different process from the app --------- Co-authored-by: albert <albert@cinnamon.is> Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
@@ -118,7 +118,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
|
||||
|
||||
# rerank
|
||||
docs = self.vector_retrieval(text=text, top_k=top_k, **kwargs)
|
||||
if self.get_from_path("reranker"):
|
||||
if docs and self.get_from_path("reranker"):
|
||||
docs = self.reranker(docs, query=text)
|
||||
|
||||
if not self.get_extra_table:
|
||||
|
@@ -200,24 +200,37 @@ class AnswerWithContextPipeline(BaseComponent):
|
||||
lang=self.lang,
|
||||
)
|
||||
|
||||
citation_task = asyncio.create_task(
|
||||
self.citation_pipeline.ainvoke(context=evidence, question=question)
|
||||
)
|
||||
print("Citation task created")
|
||||
if evidence:
|
||||
citation_task = asyncio.create_task(
|
||||
self.citation_pipeline.ainvoke(context=evidence, question=question)
|
||||
)
|
||||
print("Citation task created")
|
||||
|
||||
messages = []
|
||||
if self.system_prompt:
|
||||
messages.append(SystemMessage(content=self.system_prompt))
|
||||
messages.append(HumanMessage(content=prompt))
|
||||
|
||||
output = ""
|
||||
for text in self.llm.stream(messages):
|
||||
output += text.text
|
||||
self.report_output({"output": text.text})
|
||||
await asyncio.sleep(0)
|
||||
try:
|
||||
# try streaming first
|
||||
print("Trying LLM streaming")
|
||||
for text in self.llm.stream(messages):
|
||||
output += text.text
|
||||
self.report_output({"output": text.text})
|
||||
await asyncio.sleep(0)
|
||||
except NotImplementedError:
|
||||
print("Streaming is not supported, falling back to normal processing")
|
||||
output = self.llm(messages).text
|
||||
self.report_output({"output": output})
|
||||
|
||||
# retrieve the citation
|
||||
print("Waiting for citation task")
|
||||
citation = await citation_task
|
||||
if evidence:
|
||||
citation = await citation_task
|
||||
else:
|
||||
citation = None
|
||||
|
||||
answer = Document(text=output, metadata={"citation": citation})
|
||||
|
||||
return answer
|
||||
|
Reference in New Issue
Block a user