Fix UI bugs (#8)

* Auto create conversation when the user starts

* Add conversation rename rule check

* Fix empty name during save

* Confirm deleting conversation

* Show warning if users don't select file when upload files in the File Index

* Feedback when user uploads duplicated file

* Limit the file types

* Fix valid username

* Allow login when username with leading and trailing whitespaces

* Improve the user

* Disable admin panel for non-admnin user

* Refresh user lists after creating/deleting users

* Auto logging in

* Clear admin information upon signing out

* Fix unable to receive uploaded filename that include special characters, like !@#$%^&*().pdf

* Set upload validation for FileIndex

* Improve user management UI/UIX

* Show extraction error when indexing file

* Return selected user -1 when signing out

* Fix default supported file types in file index

* Validate changing password

* Allow the selector to contain mulitple gradio components

* A more tolerable placeholder screen

* Allow chat suggestion box

* Increase concurrency limit

* Make adobe loader optional

* Use BaseReasoning

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
This commit is contained in:
ian_Cin
2024-04-03 16:33:54 +07:00
committed by GitHub
parent 43a18ba070
commit ecf09b275f
23 changed files with 936 additions and 255 deletions

View File

@@ -104,18 +104,16 @@ class CitationPipeline(BaseComponent):
print("CitationPipeline: invoking LLM")
llm_output = self.get_from_path("llm").invoke(messages, **llm_kwargs)
print("CitationPipeline: finish invoking LLM")
if not llm_output.messages:
return None
function_output = llm_output.messages[0].additional_kwargs["function_call"][
"arguments"
]
output = QuestionAnswer.parse_raw(function_output)
except Exception as e:
print(e)
return None
if not llm_output.messages:
return None
function_output = llm_output.messages[0].additional_kwargs["function_call"][
"arguments"
]
output = QuestionAnswer.parse_raw(function_output)
return output
async def ainvoke(self, context: str, question: str):

View File

@@ -5,7 +5,7 @@ from .docx_loader import DocxReader
from .excel_loader import PandasExcelReader
from .html_loader import HtmlReader
from .mathpix_loader import MathpixPDFReader
from .ocr_loader import OCRReader
from .ocr_loader import ImageReader, OCRReader
from .unstructured_loader import UnstructuredReader
__all__ = [
@@ -13,6 +13,7 @@ __all__ = [
"BaseReader",
"PandasExcelReader",
"MathpixPDFReader",
"ImageReader",
"OCRReader",
"DirectoryReader",
"UnstructuredReader",

View File

@@ -10,14 +10,6 @@ from llama_index.readers.base import BaseReader
from kotaemon.base import Document
from .utils.adobe import (
generate_figure_captions,
load_json,
parse_figure_paths,
parse_table_paths,
request_adobe_service,
)
logger = logging.getLogger(__name__)
DEFAULT_VLM_ENDPOINT = (
@@ -74,6 +66,13 @@ class AdobeReader(BaseReader):
includes 3 types: text, table, and image
"""
from .utils.adobe import (
generate_figure_captions,
load_json,
parse_figure_paths,
parse_table_paths,
request_adobe_service,
)
filename = file.name
filepath = str(Path(file).resolve())

View File

@@ -125,3 +125,70 @@ class OCRReader(BaseReader):
)
return documents
class ImageReader(BaseReader):
"""Read PDF using OCR, with high focus on table extraction
Example:
```python
>> from knowledgehub.loaders import OCRReader
>> reader = OCRReader()
>> documents = reader.load_data("path/to/pdf")
```
Args:
endpoint: URL to FullOCR endpoint. If not provided, will look for
environment variable `OCR_READER_ENDPOINT` or use the default
`knowledgehub.loaders.ocr_loader.DEFAULT_OCR_ENDPOINT`
(http://127.0.0.1:8000/v2/ai/infer/)
use_ocr: whether to use OCR to read text (e.g: from images, tables) in the PDF
If False, only the table and text within table cells will be extracted.
"""
def __init__(self, endpoint: Optional[str] = None):
"""Init the OCR reader with OCR endpoint (FullOCR pipeline)"""
super().__init__()
self.ocr_endpoint = endpoint or os.getenv(
"OCR_READER_ENDPOINT", DEFAULT_OCR_ENDPOINT
)
def load_data(
self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
) -> List[Document]:
"""Load data using OCR reader
Args:
file_path (Path): Path to PDF file
debug_path (Path): Path to store debug image output
artifact_path (Path): Path to OCR endpoints artifacts directory
Returns:
List[Document]: list of documents extracted from the PDF file
"""
file_path = Path(file_path).resolve()
with file_path.open("rb") as content:
files = {"input": content}
data = {"job_id": uuid4(), "table_only": False}
# call the API from FullOCR endpoint
if "response_content" in kwargs:
# overriding response content if specified
ocr_results = kwargs["response_content"]
else:
# call original API
resp = tenacious_api_post(url=self.ocr_endpoint, files=files, data=data)
ocr_results = resp.json()["result"]
extra_info = extra_info or {}
result = []
for ocr_result in ocr_results:
result.append(
Document(
content=ocr_result["csv_string"],
metadata=extra_info,
)
)
return result