From cd87a7e35f768500a1b93ff2e5117d0b89870b62 Mon Sep 17 00:00:00 2001 From: "Tuan Anh Nguyen Dang (Tadashi_Cin)" Date: Tue, 17 Dec 2024 17:23:54 +0700 Subject: [PATCH] fix: UI tab name and reranking process for TeiFastReranking (#576) * fix: update user-management.md * fix: reranking process when TeiFastReranking is specified. --------- Co-authored-by: Ryuichi Takano <45957617+tknrych@users.noreply.github.com> Co-authored-by: Ryuichi Takano --- Dockerfile | 32 +++++++++---------- docs/pages/app/ext/user-management.md | 2 +- .../kotaemon/rerankings/tei_fast_rerank.py | 7 +++- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 94d83cb..714d0ef 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,15 +4,15 @@ FROM python:3.10-slim AS lite # Common dependencies RUN apt-get update -qqy && \ apt-get install -y --no-install-recommends \ - ssh \ - git \ - gcc \ - g++ \ - poppler-utils \ - libpoppler-dev \ - unzip \ - curl \ - cargo + ssh \ + git \ + gcc \ + g++ \ + poppler-utils \ + libpoppler-dev \ + unzip \ + curl \ + cargo # Setup args ARG TARGETPLATFORM @@ -62,13 +62,13 @@ FROM lite AS full # Additional dependencies for full version RUN apt-get update -qqy && \ apt-get install -y --no-install-recommends \ - tesseract-ocr \ - tesseract-ocr-jpn \ - libsm6 \ - libxext6 \ - libreoffice \ - ffmpeg \ - libmagic-dev + tesseract-ocr \ + tesseract-ocr-jpn \ + libsm6 \ + libxext6 \ + libreoffice \ + ffmpeg \ + libmagic-dev # Install torch and torchvision for unstructured RUN --mount=type=ssh \ diff --git a/docs/pages/app/ext/user-management.md b/docs/pages/app/ext/user-management.md index 988380f..1b3979e 100644 --- a/docs/pages/app/ext/user-management.md +++ b/docs/pages/app/ext/user-management.md @@ -11,4 +11,4 @@ Once enabled, you have access to the following features: - User login/logout (located in Settings Tab) - User changing password (located in Settings Tab) -- Create / List / Edit / Delete user (located in Admin > User Management Tab) +- Create / List / Edit / Delete user (located in Resources > Users Tab) diff --git a/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py b/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py index 4ac4b8e..3cb81f6 100644 --- a/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py +++ b/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py @@ -29,13 +29,18 @@ class TeiFastReranking(BaseReranking): ), ) is_truncated: Optional[bool] = Param(True, help="Whether to truncate the inputs") + max_tokens: Optional[int] = Param(512, help="This option is used to specify the maximum number of tokens supported by the reranker model.") def client(self, query, texts): + if self.is_truncated == True: + max_tokens = self.max_tokens # default is 512 tokens. + truncated_texts = [text[:max_tokens] for text in texts] + response = session.post( url=self.endpoint_url, json={ "query": query, - "texts": texts, + "texts": truncated_texts, "is_truncated": self.is_truncated, # default is True }, ).json()