diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0a57312 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.github/ +.git/ +.mypy_cache/ +__pycache__/ +ktem_app_data/ +env/ +.pre-commit-config.yaml +.commitlintrc +.gitignore +.gitattributes +README.md +*.zip +*.sh diff --git a/.env b/.env index e033553..526100a 100644 --- a/.env +++ b/.env @@ -1,8 +1,8 @@ # settings for OpenAI OPENAI_API_BASE=https://api.openai.com/v1 -OPENAI_API_KEY= -OPENAI_CHAT_MODEL=gpt-3.5-turbo -OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002 +OPENAI_API_KEY=openai_key +OPENAI_CHAT_MODEL=gpt-4o +OPENAI_EMBEDDINGS_MODEL=text-embedding-3-small # settings for Azure OpenAI AZURE_OPENAI_ENDPOINT= @@ -15,4 +15,21 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002 COHERE_API_KEY= # settings for local models -LOCAL_MODEL= +LOCAL_MODEL=llama3.1:8b +LOCAL_MODEL_EMBEDDINGS=nomic-embed-text + +# settings for GraphRAG +GRAPHRAG_API_KEY=openai_key +GRAPHRAG_LLM_MODEL=gpt-4o-mini +GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-small + +# settings for Azure DI +AZURE_DI_ENDPOINT= +AZURE_DI_CREDENTIAL= + +# settings for Adobe API +PDF_SERVICES_CLIENT_ID= +PDF_SERVICES_CLIENT_SECRET= + +# settings for PDF.js +PDFJS_VERSION_DIST="pdfjs-4.0.379-dist" diff --git a/.gitignore b/.gitignore index 789304e..ef2704a 100644 --- a/.gitignore +++ b/.gitignore @@ -471,3 +471,4 @@ doc_env/ # application data ktem_app_data/ +gradio_tmp/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..497a0ad --- /dev/null +++ b/Dockerfile @@ -0,0 +1,37 @@ +# syntax=docker/dockerfile:1.0.0-experimental +FROM python:3.10-slim as base_image + +# for additional file parsers + +# tesseract-ocr \ +# tesseract-ocr-jpn \ +# libsm6 \ +# libxext6 \ +# ffmpeg \ + +RUN apt update -qqy \ + && apt install -y \ + ssh git \ + gcc g++ \ + poppler-utils \ + libpoppler-dev \ + && \ + apt-get clean && \ + apt-get autoremove + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 + +WORKDIR /app + + +FROM base_image as dev + +COPY . /app +RUN --mount=type=ssh pip install -e "libs/kotaemon[all]" +RUN --mount=type=ssh pip install -e "libs/ktem" +RUN pip install graphrag future +RUN pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" + +ENTRYPOINT ["gradio", "app.py"] diff --git a/README.md b/README.md index b7e124f..295875f 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # kotaemon -An open-source tool for chatting with your documents. Built with both end users and +An open-source clean & customizable RAG UI for chatting with your documents. Built with both end users and developers in mind. -https://github.com/Cinnamon/kotaemon/assets/25688648/815ecf68-3a02-4914-a0dd-3f8ec7e75cd9 +![Preview](docs/images/preview-graph.png) -[Source Code](https://github.com/Cinnamon/kotaemon) | -[Live Demo](https://huggingface.co/spaces/cin-model/kotaemon-public) +[Live Demo](https://huggingface.co/spaces/taprosoft/kotaemon) | +[Source Code](https://github.com/Cinnamon/kotaemon) [User Guide](https://cinnamon.github.io/kotaemon/) | [Developer Guide](https://cinnamon.github.io/kotaemon/development/) | @@ -14,20 +14,23 @@ https://github.com/Cinnamon/kotaemon/assets/25688648/815ecf68-3a02-4914-a0dd-3f8 [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/release/python-31013/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +docker pull taprosoft/kotaemon:v1.0 [![built with Codeium](https://codeium.com/badges/main)](https://codeium.com) -This project would like to appeal to both end users who want to do QA on their -documents and developers who want to build their own QA pipeline. +## Introduction + +This project serves as a functional RAG UI for both end users who want to do QA on their +documents and developers who want to build their own RAG pipeline. - For end users: - - A local Question Answering UI for RAG-based QA. + - A clean & minimalistic UI for RAG-based QA. - Supports LLM API providers (OpenAI, AzureOpenAI, Cohere, etc) and local LLMs - (currently only GGUF format is supported via `llama-cpp-python`). - - Easy installation scripts, no environment setup required. + (via `ollama` and `llama-cpp-python`). + - Easy installation scripts. - For developers: - - A framework for building your own RAG-based QA pipeline. - - See your RAG pipeline in action with the provided UI (built with Gradio). - - Share your pipeline so that others can use it. + - A framework for building your own RAG-based document QA pipeline. + - Customize and see your RAG pipeline in action with the provided UI (built with Gradio). ```yml +----------------------------------------------------------------------------+ @@ -45,78 +48,128 @@ documents and developers who want to build their own QA pipeline. ``` This repository is under active development. Feedback, issues, and PRs are highly -appreciated. Your input is valuable as it helps us persuade our business guys to support -open source. +appreciated. + +## Key Features + +- **Host your own document QA (RAG) web-UI**. Support multi-user login, organize your files in private / public collections, collaborate and share your favorite chat with others. + +- **Organize your LLM & Embedding models**. Support both local LLMs & popular API providers (OpenAI, Azure, Ollama, Groq). + +- **Hybrid RAG pipeline**. Sane default RAG pipeline with hybrid (full-text & vector) retriever + re-ranking to ensure best retrieval quality. + +- **Multi-modal QA support**. Perform Question Answering on multiple documents with figures & tables support. Support multi-modal document parsing (selectable options on UI). + +- **Advance citations with document preview**. By default the system will provide detailed citations to ensure the correctness of LLM answers. View your citations (incl. relevant score) directly in the _in-browser PDF viewer_ with highlights. Warning when retrieval pipeline return low relevant articles. + +- **Support complex reasoning methods**. Use question decomposition to answer your complex / multi-hop question. Support agent-based reasoning with ReAct, ReWOO and other agents. + +- **Configurable settings UI**. You can adjust most important aspects of retrieval & generation process on the UI (incl. prompts). + +- **Extensible**. Being built on Gradio, you are free to customize / add any UI elements as you like. Also, we aim to support multiple strategies for document indexing & retrieval. `GraphRAG` indexing pipeline is provided as an example. + +![Preview](docs/images/preview.png) ## Installation ### For end users This document is intended for developers. If you just want to install and use the app as -it, please follow the [User Guide](https://cinnamon.github.io/kotaemon/). +it is, please follow the non-technical [User Guide](https://cinnamon.github.io/kotaemon/) (WIP). ### For developers -```shell -# Create a environment -python -m venv kotaemon-env +#### With Docker (recommended) -# Activate the environment -source kotaemon-env/bin/activate +- Use this command to launch the server -# Install the package -pip install git+https://github.com/Cinnamon/kotaemon.git +``` +docker run \ +-e GRADIO_SERVER_NAME=0.0.0.0 \ +-e GRADIO_SERVER_PORT=7860 \ +-p 7860:7860 -it --rm \ +taprosoft/kotaemon:v1.0 ``` -### For Contributors +Navigate to `http://localhost:7860/` to access the web UI. + +#### Without Docker + +- Clone and install required packages on a fresh python environment. ```shell -# Clone the repo -git clone git@github.com:Cinnamon/kotaemon.git +# optional (setup env) +conda create -n kotaemon python=3.10 +conda activate kotaemon -# Create a environment -python -m venv kotaemon-env - -# Activate the environment -source kotaemon-env/bin/activate +# clone this repo +git clone https://github.com/Cinnamon/kotaemon cd kotaemon -# Install the package in editable mode pip install -e "libs/kotaemon[all]" pip install -e "libs/ktem" -pip install -e "." - -# Setup pre-commit -pre-commit install ``` -## Creating your application +- View and edit your environment variables (API keys, end-points) in `.env`. -In order to create your own application, you need to prepare these files: +- (Optional) To enable in-browser PDF_JS viewer, download [PDF_JS_DIST](https://github.com/mozilla/pdf.js/releases/download/v4.0.379/pdfjs-4.0.379-dist.zip) and extract it to `libs/ktem/ktem/assets/prebuilt` + +pdf-setup + +- Start the web server: + +```shell +python app.py +``` + +The app will be automatically launched in your browser. + +Default username / password are: `admin` / `admin`. You can setup additional users directly on the UI. + +![Chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png) + +## Customize your application + +By default, all application data are stored in `./ktem_app_data` folder. You can backup or copy this folder to move your installation to a new machine. + +For advance users or specific use-cases, you can customize those files: - `flowsettings.py` -- `app.py` -- `.env` (Optional) +- `.env` ### `flowsettings.py` This file contains the configuration of your application. You can use the example -[here](https://github.com/Cinnamon/kotaemon/blob/main/libs/ktem/flowsettings.py) as the +[here](flowsettings.py) as the starting point. -### `app.py` +
-This file is where you create your Gradio app object. This can be as simple as: +Notable settings -```python -from ktem.main import App +``` +# setup your preferred document store (with full-text search capabilities) +KH_DOCSTORE=(Elasticsearch | LanceDB | SimpleFileDocumentStore) -app = App() -demo = app.make() -demo.launch() +# setup your preferred vectorstore (for vector-based search) +KH_VECTORSTORE=(ChromaDB | LanceDB + +# Enable / disable multimodal QA +KH_REASONINGS_USE_MULTIMODAL=True + +# Setup your new reasoning pipeline or modify existing one. +KH_REASONINGS = [ + "ktem.reasoning.simple.FullQAPipeline", + "ktem.reasoning.simple.FullDecomposeQAPipeline", + "ktem.reasoning.react.ReactAgentPipeline", + "ktem.reasoning.rewoo.RewooAgentPipeline", +] +) ``` -### `.env` (Optional) +
+ +### `.env` This file provides another way to configure your models and credentials. @@ -159,18 +212,22 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002 #### Local models -- Pros: -- Privacy. Your documents will be stored and process locally. -- Choices. There are a wide range of LLMs in terms of size, domain, language to choose - from. -- Cost. It's free. -- Cons: -- Quality. Local models are much smaller and thus have lower generative quality than - paid APIs. -- Speed. Local models are deployed using your machine so the processing speed is - limited by your hardware. +##### Using ollama OpenAI compatible server -##### Find and download a LLM +Install [ollama](https://github.com/ollama/ollama) and start the application. + +Pull your model (e.g): + +``` +ollama pull llama3.1:8b +ollama pull nomic-embed-text +``` + +Set the model names on web UI and make it as default. + +![Models](docs/images/models.png) + +##### Using GGUF with llama-cpp-python You can search and download a LLM to be ran locally from the [Hugging Face Hub](https://huggingface.co/models). Currently, these model formats are supported: @@ -187,33 +244,26 @@ Here are some recommendations and their size in memory: - [Qwen1.5-1.8B-Chat-GGUF](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf?download=true): around 2 GB -##### Enable local models +Add a new LlamaCpp model with the provided model name on the web uI. -To add a local model to the model pool, set the `LOCAL_MODEL` variable in the `.env` -file to the path of the model file. - -```shell -LOCAL_MODEL= -``` - -Here is how to get the full path of your model file: - -- On Windows 11: right click the file and select `Copy as Path`. -## Start your application +## Adding your own RAG pipeline -Simply run the following command: +#### Custom reasoning pipeline -```shell -python app.py -``` +First, check the default pipeline implementation in +[here](libs/ktem/ktem/reasoning/simple.py). You can make quick adjustment to how the default QA pipeline work. -The app will be automatically launched in your browser. +Next, if you feel comfortable adding new pipeline, add new `.py` implementation in `libs/ktem/ktem/reasoning/` and later include it in `flowssettings` to enable it on the UI. -![Chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png) +#### Custom indexing pipeline -## Customize your application +Check sample implementation in `libs/ktem/ktem/index/file/graph` + +(more instruction WIP). + +## Developer guide Please refer to the [Developer Guide](https://cinnamon.github.io/kotaemon/development/) for more details. diff --git a/app.py b/app.py index 2ac7a1a..a432d17 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,24 @@ -from ktem.main import App +import os + +from theflow.settings import settings as flowsettings + +KH_APP_DATA_DIR = getattr(flowsettings, "KH_APP_DATA_DIR", ".") +GRADIO_TEMP_DIR = os.getenv("GRADIO_TEMP_DIR", None) +# override GRADIO_TEMP_DIR if it's not set +if GRADIO_TEMP_DIR is None: + GRADIO_TEMP_DIR = os.path.join(KH_APP_DATA_DIR, "gradio_tmp") + os.environ["GRADIO_TEMP_DIR"] = GRADIO_TEMP_DIR + + +from ktem.main import App # noqa app = App() demo = app.make() -demo.queue().launch(favicon_path=app._favicon, inbrowser=True) +demo.queue().launch( + favicon_path=app._favicon, + inbrowser=True, + allowed_paths=[ + "libs/ktem/ktem/assets", + GRADIO_TEMP_DIR, + ], +) diff --git a/docs/about.md b/docs/about.md index 4792b87..fca0067 100644 --- a/docs/about.md +++ b/docs/about.md @@ -9,3 +9,6 @@ developers in mind. [User Guide](https://cinnamon.github.io/kotaemon/) | [Developer Guide](https://cinnamon.github.io/kotaemon/development/) | [Feedback](https://github.com/Cinnamon/kotaemon/issues) + +[Dark Mode](?__theme=dark) | +[Light Mode](?__theme=light) diff --git a/docs/images/info-panel-scores.png b/docs/images/info-panel-scores.png new file mode 100644 index 0000000..274ca49 Binary files /dev/null and b/docs/images/info-panel-scores.png differ diff --git a/docs/images/models.png b/docs/images/models.png new file mode 100644 index 0000000..91b14df Binary files /dev/null and b/docs/images/models.png differ diff --git a/docs/images/pdf-viewer-setup.png b/docs/images/pdf-viewer-setup.png new file mode 100644 index 0000000..135cf5f Binary files /dev/null and b/docs/images/pdf-viewer-setup.png differ diff --git a/docs/images/preview-graph.png b/docs/images/preview-graph.png new file mode 100644 index 0000000..53c13c2 Binary files /dev/null and b/docs/images/preview-graph.png differ diff --git a/docs/images/preview.png b/docs/images/preview.png new file mode 100644 index 0000000..76f2125 Binary files /dev/null and b/docs/images/preview.png differ diff --git a/docs/pages/app/index/file.md b/docs/pages/app/index/file.md index 2d479c5..e3bb344 100644 --- a/docs/pages/app/index/file.md +++ b/docs/pages/app/index/file.md @@ -107,9 +107,9 @@ string rather than a string. ## Software infrastructure -| Infra | Access | Schema | Ref | -| ---------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- | -| SQL table Source | self.\_Source | - id (int): id of the source (auto)
- name (str): the name of the file
- path (str): the path of the file
- size (int): the file size in bytes
- text_length (int): the number of characters in the file (default 0)
- date_created (datetime): the time the file is created (auto) | This is SQLALchemy ORM class. Can consult | -| SQL table Index | self.\_Index | - id (int): id of the index entry (auto)
- source_id (int): the id of a file in the Source table
- target_id: the id of the segment in docstore or vector store
- relation_type (str): if the link is "document" or "vector" | This is SQLAlchemy ORM class | -| Vector store | self.\_VS | - self.\_VS.add: add the list of embeddings to the vector store (optionally associate metadata and ids)
- self.\_VS.delete: delete vector entries based on ids
- self.\_VS.query: get embeddings based on embeddings. | kotaemon > storages > vectorstores > BaseVectorStore | -| Doc store | self.\_DS | - self.\_DS.add: add the segments to document stores
- self.\_DS.get: get the segments based on id
- self.\_DS.get_all: get all segments
- self.\_DS.delete: delete segments based on id | kotaemon > storages > docstores > base > BaseDocumentStore | +| Infra | Access | Schema | Ref | +| ---------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- | +| SQL table Source | self.\_Source | - id (int): id of the source (auto)
- name (str): the name of the file
- path (str): the path of the file
- size (int): the file size in bytes
- note (dict): allow extra optional information about the file
- date_created (datetime): the time the file is created (auto) | This is SQLALchemy ORM class. Can consult | +| SQL table Index | self.\_Index | - id (int): id of the index entry (auto)
- source_id (int): the id of a file in the Source table
- target_id: the id of the segment in docstore or vector store
- relation_type (str): if the link is "document" or "vector" | This is SQLAlchemy ORM class | +| Vector store | self.\_VS | - self.\_VS.add: add the list of embeddings to the vector store (optionally associate metadata and ids)
- self.\_VS.delete: delete vector entries based on ids
- self.\_VS.query: get embeddings based on embeddings. | kotaemon > storages > vectorstores > BaseVectorStore | +| Doc store | self.\_DS | - self.\_DS.add: add the segments to document stores
- self.\_DS.get: get the segments based on id
- self.\_DS.get_all: get all segments
- self.\_DS.delete: delete segments based on id | kotaemon > storages > docstores > base > BaseDocumentStore | diff --git a/docs/usage.md b/docs/usage.md index 348e414..f4e9f22 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,5 +1,3 @@ -# Basic Usage - ## 1. Add your AI models ![resources tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/resources-tab.png) @@ -63,12 +61,15 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002 # change to your deplo ### Local models -- Pros: +Pros: + - Privacy. Your documents will be stored and process locally. - Choices. There are a wide range of LLMs in terms of size, domain, language to choose from. - Cost. It's free. -- Cons: + +Cons: + - Quality. Local models are much smaller and thus have lower generative quality than paid APIs. - Speed. Local models are deployed using your machine so the processing speed is @@ -136,6 +137,21 @@ Now navigate back to the `Chat` tab. The chat tab is divided into 3 regions: files will be considered during chat. 2. Chat Panel - This is where you can chat with the chatbot. -3. Information panel - - Supporting information such as the retrieved evidence and reference will be - displayed here. +3. Information Panel + +![information panel](https://raw.githubusercontent.com/Cinnamon/kotaemon/develop/docs/images/info-panel-scores.png) + +- Supporting information such as the retrieved evidence and reference will be + displayed here. +- Direct citation for the answer produced by the LLM is highlighted. +- The confidence score of the answer and relevant scores of evidences are displayed to quickly assess the quality of the answer and retrieved content. + +- Meaning of the score displayed: + - **Answer confidence**: answer confidence level from the LLM model. + - **Relevance score**: overall relevant score between evidence and user question. + - **Vectorstore score**: relevant score from vector embedding similarity calculation (show `full-text search` if retrieved from full-text search DB). + - **LLM relevant score**: relevant score from LLM model (which judge relevancy between question and evidence using specific prompt). + - **Reranking score**: relevant score from Cohere [reranking model](https://cohere.com/rerank). + +Generally, the score quality is `LLM relevant score` > `Reranking score` > `Vectorscore`. +By default, overall relevance score is taken directly from LLM relevant score. Evidences are sorted based on their overall relevance score and whether they have citation or not. diff --git a/flowsettings.py b/flowsettings.py index 8d7f3cc..aa3489c 100644 --- a/flowsettings.py +++ b/flowsettings.py @@ -15,7 +15,7 @@ this_dir = Path(this_file).parent # change this if your app use a different name KH_PACKAGE_NAME = "kotaemon_app" -KH_APP_VERSION = os.environ.get("KH_APP_VERSION", None) +KH_APP_VERSION = config("KH_APP_VERSION", "local") if not KH_APP_VERSION: try: # Caution: This might produce the wrong version @@ -33,8 +33,21 @@ KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True) KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data" KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True) -# doc directory -KH_DOC_DIR = this_dir / "docs" +# markdown output directory +KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir" +KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + +# chunks output directory +KH_CHUNKS_OUTPUT_DIR = KH_APP_DATA_DIR / "chunks_cache_dir" +KH_CHUNKS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + +# zip output directory +KH_ZIP_OUTPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir" +KH_ZIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + +# zip input directory +KH_ZIP_INPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir_in" +KH_ZIP_INPUT_DIR.mkdir(parents=True, exist_ok=True) # HF models can be big, let's store them in the app data directory so that it's easier # for users to manage their storage. @@ -42,24 +55,30 @@ KH_DOC_DIR = this_dir / "docs" os.environ["HF_HOME"] = str(KH_APP_DATA_DIR / "huggingface") os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface") -COHERE_API_KEY = config("COHERE_API_KEY", default="") +# doc directory +KH_DOC_DIR = this_dir / "docs" + KH_MODE = "dev" -KH_FEATURE_USER_MANAGEMENT = False +KH_FEATURE_USER_MANAGEMENT = True +KH_USER_CAN_SEE_PUBLIC = None KH_FEATURE_USER_MANAGEMENT_ADMIN = str( config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin") ) KH_FEATURE_USER_MANAGEMENT_PASSWORD = str( - config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="XsdMbe8zKP8KdeE@") + config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin") ) KH_ENABLE_ALEMBIC = False KH_DATABASE = f"sqlite:///{KH_USER_DATA_DIR / 'sql.db'}" KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files") KH_DOCSTORE = { - "__type__": "kotaemon.storages.SimpleFileDocumentStore", + # "__type__": "kotaemon.storages.ElasticsearchDocumentStore", + # "__type__": "kotaemon.storages.SimpleFileDocumentStore", + "__type__": "kotaemon.storages.LanceDBDocumentStore", "path": str(KH_USER_DATA_DIR / "docstore"), } KH_VECTORSTORE = { + # "__type__": "kotaemon.storages.LanceDBVectorStore", "__type__": "kotaemon.storages.ChromaVectorStore", "path": str(KH_USER_DATA_DIR / "vectorstore"), } @@ -83,8 +102,6 @@ if config("AZURE_OPENAI_API_KEY", default="") and config( "timeout": 20, }, "default": False, - "accuracy": 5, - "cost": 5, } if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""): KH_EMBEDDINGS["azure"] = { @@ -110,71 +127,66 @@ if config("OPENAI_API_KEY", default=""): "base_url": config("OPENAI_API_BASE", default="") or "https://api.openai.com/v1", "api_key": config("OPENAI_API_KEY", default=""), - "model": config("OPENAI_CHAT_MODEL", default="") or "gpt-3.5-turbo", + "model": config("OPENAI_CHAT_MODEL", default="gpt-3.5-turbo"), + "timeout": 20, + }, + "default": True, + } + KH_EMBEDDINGS["openai"] = { + "spec": { + "__type__": "kotaemon.embeddings.OpenAIEmbeddings", + "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"), + "api_key": config("OPENAI_API_KEY", default=""), + "model": config( + "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002" + ), "timeout": 10, - }, - "default": False, - } - if len(KH_EMBEDDINGS) < 1: - KH_EMBEDDINGS["openai"] = { - "spec": { - "__type__": "kotaemon.embeddings.OpenAIEmbeddings", - "base_url": config("OPENAI_API_BASE", default="") - or "https://api.openai.com/v1", - "api_key": config("OPENAI_API_KEY", default=""), - "model": config( - "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002" - ) - or "text-embedding-ada-002", - "timeout": 10, - }, - "default": False, - } - -if config("LOCAL_MODEL", default=""): - KH_LLMS["local"] = { - "spec": { - "__type__": "kotaemon.llms.EndpointChatLLM", - "endpoint_url": "http://localhost:31415/v1/chat/completions", - }, - "default": False, - "cost": 0, - } - if len(KH_EMBEDDINGS) < 1: - KH_EMBEDDINGS["local"] = { - "spec": { - "__type__": "kotaemon.embeddings.EndpointEmbeddings", - "endpoint_url": "http://localhost:31415/v1/embeddings", - }, - "default": False, - "cost": 0, - } - -if len(KH_EMBEDDINGS) < 1: - KH_EMBEDDINGS["local-bge-base-en-v1.5"] = { - "spec": { - "__type__": "kotaemon.embeddings.FastEmbedEmbeddings", - "model_name": "BAAI/bge-base-en-v1.5", + "context_length": 8191, }, "default": True, } -KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"] +if config("LOCAL_MODEL", default=""): + KH_LLMS["ollama"] = { + "spec": { + "__type__": "kotaemon.llms.ChatOpenAI", + "base_url": "http://localhost:11434/v1/", + "model": config("LOCAL_MODEL", default="llama3.1:8b"), + }, + "default": False, + } + KH_EMBEDDINGS["ollama"] = { + "spec": { + "__type__": "kotaemon.embeddings.OpenAIEmbeddings", + "base_url": "http://localhost:11434/v1/", + "model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"), + }, + "default": False, + } + + KH_EMBEDDINGS["local-bge-en"] = { + "spec": { + "__type__": "kotaemon.embeddings.FastEmbedEmbeddings", + "model_name": "BAAI/bge-base-en-v1.5", + }, + "default": False, + } + +KH_REASONINGS = [ + "ktem.reasoning.simple.FullQAPipeline", + "ktem.reasoning.simple.FullDecomposeQAPipeline", + "ktem.reasoning.react.ReactAgentPipeline", + "ktem.reasoning.rewoo.RewooAgentPipeline", +] +KH_REASONINGS_USE_MULTIMODAL = False KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format( config("AZURE_OPENAI_ENDPOINT", default=""), - config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4-vision"), + config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"), config("OPENAI_API_VERSION", default=""), ) -SETTINGS_APP = { - "lang": { - "name": "Language", - "value": "en", - "choices": [("English", "en"), ("Japanese", "ja")], - "component": "dropdown", - } -} +SETTINGS_APP: dict[str, dict] = {} SETTINGS_REASONING = { @@ -187,17 +199,42 @@ SETTINGS_REASONING = { "lang": { "name": "Language", "value": "en", - "choices": [("English", "en"), ("Japanese", "ja")], + "choices": [("English", "en"), ("Japanese", "ja"), ("Vietnamese", "vi")], "component": "dropdown", }, + "max_context_length": { + "name": "Max context length (LLM)", + "value": 32000, + "component": "number", + }, } -KH_INDEX_TYPES = ["ktem.index.file.FileIndex"] +KH_INDEX_TYPES = [ + "ktem.index.file.FileIndex", + "ktem.index.file.graph.GraphRAGIndex", +] KH_INDICES = [ { "name": "File", - "config": {}, + "config": { + "supported_file_types": ( + ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " + ".pptx, .csv, .html, .mhtml, .txt, .zip" + ), + "private": False, + }, "index_type": "ktem.index.file.FileIndex", }, + { + "name": "GraphRAG", + "config": { + "supported_file_types": ( + ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " + ".pptx, .csv, .html, .mhtml, .txt, .zip" + ), + "private": False, + }, + "index_type": "ktem.index.file.graph.GraphRAGIndex", + }, ] diff --git a/libs/kotaemon/kotaemon/agents/react/agent.py b/libs/kotaemon/kotaemon/agents/react/agent.py index dffcf6b..73d2d21 100644 --- a/libs/kotaemon/kotaemon/agents/react/agent.py +++ b/libs/kotaemon/kotaemon/agents/react/agent.py @@ -39,16 +39,11 @@ class ReactAgent(BaseAgent): ) max_iterations: int = 5 strict_decode: bool = False - trim_func: TokenSplitter = TokenSplitter.withx( - chunk_size=800, - chunk_overlap=0, - separator=" ", - tokenizer=partial( - tiktoken.encoding_for_model("gpt-3.5-turbo").encode, - allowed_special=set(), - disallowed_special="all", - ), + max_context_length: int = Param( + default=3000, + help="Max context length for each tool output.", ) + trim_func: TokenSplitter | None = None def _compose_plugin_description(self) -> str: """ @@ -149,14 +144,28 @@ class ReactAgent(BaseAgent): function_map[plugin.name] = plugin return function_map - def _trim(self, text: str) -> str: + def _trim(self, text: str | Document) -> str: """ Trim the text to the maximum token length. """ + evidence_trim_func = ( + self.trim_func + if self.trim_func + else TokenSplitter( + chunk_size=self.max_context_length, + chunk_overlap=0, + separator=" ", + tokenizer=partial( + tiktoken.encoding_for_model("gpt-3.5-turbo").encode, + allowed_special=set(), + disallowed_special="all", + ), + ) + ) if isinstance(text, str): - texts = self.trim_func([Document(text=text)]) + texts = evidence_trim_func([Document(text=text)]) elif isinstance(text, Document): - texts = self.trim_func([text]) + texts = evidence_trim_func([text]) else: raise ValueError("Invalid text type to trim") trim_text = texts[0].text diff --git a/libs/kotaemon/kotaemon/agents/rewoo/agent.py b/libs/kotaemon/kotaemon/agents/rewoo/agent.py index 3ec1950..076595b 100644 --- a/libs/kotaemon/kotaemon/agents/rewoo/agent.py +++ b/libs/kotaemon/kotaemon/agents/rewoo/agent.py @@ -39,16 +39,11 @@ class RewooAgent(BaseAgent): examples: dict[str, str | list[str]] = Param( default_callback=lambda _: {}, help="Examples to be used in the agent." ) - trim_func: TokenSplitter = TokenSplitter.withx( - chunk_size=3000, - chunk_overlap=0, - separator=" ", - tokenizer=partial( - tiktoken.encoding_for_model("gpt-3.5-turbo").encode, - allowed_special=set(), - disallowed_special="all", - ), + max_context_length: int = Param( + default=3000, + help="Max context length for each tool output.", ) + trim_func: TokenSplitter | None = None @Node.auto(depends_on=["planner_llm", "plugins", "prompt_template", "examples"]) def planner(self): @@ -248,8 +243,22 @@ class RewooAgent(BaseAgent): return p def _trim_evidence(self, evidence: str): + evidence_trim_func = ( + self.trim_func + if self.trim_func + else TokenSplitter( + chunk_size=self.max_context_length, + chunk_overlap=0, + separator=" ", + tokenizer=partial( + tiktoken.encoding_for_model("gpt-3.5-turbo").encode, + allowed_special=set(), + disallowed_special="all", + ), + ) + ) if evidence: - texts = self.trim_func([Document(text=evidence)]) + texts = evidence_trim_func([Document(text=evidence)]) evidence = texts[0].text logging.info(f"len (trimmed): {len(evidence)}") return evidence @@ -317,6 +326,14 @@ class RewooAgent(BaseAgent): ) print("Planner output:", planner_text_output) + # output planner to info panel + yield AgentOutput( + text="", + agent_type=self.agent_type, + status="thinking", + intermediate_steps=[{"planner_log": planner_text_output}], + ) + # Work worker_evidences, plugin_cost, plugin_token = self._get_worker_evidence( planner_evidences, evidence_level @@ -326,7 +343,9 @@ class RewooAgent(BaseAgent): worker_log += f"{plan}: {plans[plan]}\n" current_progress = f"{plan}: {plans[plan]}\n" for e in plan_to_es[plan]: + worker_log += f"#Action: {planner_evidences.get(e, None)}\n" worker_log += f"{e}: {worker_evidences[e]}\n" + current_progress += f"#Action: {planner_evidences.get(e, None)}\n" current_progress += f"{e}: {worker_evidences[e]}\n" yield AgentOutput( diff --git a/libs/kotaemon/kotaemon/agents/tools/google.py b/libs/kotaemon/kotaemon/agents/tools/google.py index 80009e2..98c5eaf 100644 --- a/libs/kotaemon/kotaemon/agents/tools/google.py +++ b/libs/kotaemon/kotaemon/agents/tools/google.py @@ -1,7 +1,7 @@ from typing import AnyStr, Optional, Type from urllib.error import HTTPError -from langchain.utilities import SerpAPIWrapper +from langchain_community.utilities import SerpAPIWrapper from pydantic import BaseModel, Field from .base import BaseTool diff --git a/libs/kotaemon/kotaemon/agents/tools/llm.py b/libs/kotaemon/kotaemon/agents/tools/llm.py index ae2dd2e..d7cf748 100644 --- a/libs/kotaemon/kotaemon/agents/tools/llm.py +++ b/libs/kotaemon/kotaemon/agents/tools/llm.py @@ -22,12 +22,16 @@ class LLMTool(BaseTool): ) llm: BaseLLM args_schema: Optional[Type[BaseModel]] = LLMArgs + dummy_mode: bool = True def _run_tool(self, query: AnyStr) -> str: output = None try: - response = self.llm(query) + if not self.dummy_mode: + response = self.llm(query) + else: + response = None except ValueError: raise ToolException("LLM Tool call failed") - output = response.text + output = response.text if response else "<->" return output diff --git a/libs/kotaemon/kotaemon/base/schema.py b/libs/kotaemon/kotaemon/base/schema.py index a153ed3..ea26032 100644 --- a/libs/kotaemon/kotaemon/base/schema.py +++ b/libs/kotaemon/kotaemon/base/schema.py @@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, TypeVar from langchain.schema.messages import AIMessage as LCAIMessage from langchain.schema.messages import HumanMessage as LCHumanMessage from langchain.schema.messages import SystemMessage as LCSystemMessage -from llama_index.bridge.pydantic import Field -from llama_index.schema import Document as BaseDocument +from llama_index.core.bridge.pydantic import Field +from llama_index.core.schema import Document as BaseDocument if TYPE_CHECKING: from haystack.schema import Document as HaystackDocument @@ -38,7 +38,7 @@ class Document(BaseDocument): content: Any = None source: Optional[str] = None - channel: Optional[Literal["chat", "info", "index", "debug"]] = None + channel: Optional[Literal["chat", "info", "index", "debug", "plot"]] = None def __init__(self, content: Optional[Any] = None, *args, **kwargs): if content is None: @@ -140,6 +140,7 @@ class LLMInterface(AIMessage): total_cost: float = 0 logits: list[list[float]] = Field(default_factory=list) messages: list[AIMessage] = Field(default_factory=list) + logprobs: list[float] = [] class ExtractorOutput(Document): diff --git a/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py b/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py index 78eecb5..b3ef356 100644 --- a/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py +++ b/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py @@ -133,9 +133,7 @@ def construct_chat_ui( label="Output file", show_label=True, height=100 ) export_btn = gr.Button("Export") - export_btn.click( - func_export_to_excel, inputs=None, outputs=exported_file - ) + export_btn.click(func_export_to_excel, inputs=[], outputs=exported_file) with gr.Row(): with gr.Column(): diff --git a/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py b/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py index 725893d..978a751 100644 --- a/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py +++ b/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py @@ -91,7 +91,7 @@ def construct_pipeline_ui( save_btn.click(func_save, inputs=params, outputs=history_dataframe) load_params_btn = gr.Button("Reload params") load_params_btn.click( - func_load_params, inputs=None, outputs=history_dataframe + func_load_params, inputs=[], outputs=history_dataframe ) history_dataframe.render() history_dataframe.select( @@ -103,7 +103,7 @@ def construct_pipeline_ui( export_btn = gr.Button( "Export (Result will be in Exported file next to Output)" ) - export_btn.click(func_export, inputs=None, outputs=exported_file) + export_btn.click(func_export, inputs=[], outputs=exported_file) with gr.Row(): with gr.Column(): if params: diff --git a/libs/kotaemon/kotaemon/embeddings/openai.py b/libs/kotaemon/kotaemon/embeddings/openai.py index 74655dc..604540d 100644 --- a/libs/kotaemon/kotaemon/embeddings/openai.py +++ b/libs/kotaemon/kotaemon/embeddings/openai.py @@ -1,5 +1,15 @@ +from itertools import islice from typing import Optional +import numpy as np +import openai +import tiktoken +from tenacity import ( + retry, + retry_if_not_exception_type, + stop_after_attempt, + wait_random_exponential, +) from theflow.utils.modules import import_dotted_string from kotaemon.base import Param @@ -7,6 +17,24 @@ from kotaemon.base import Param from .base import BaseEmbeddings, Document, DocumentWithEmbedding +def split_text_by_chunk_size(text: str, chunk_size: int) -> list[list[int]]: + """Split the text into chunks of a given size + + Args: + text: text to split + chunk_size: size of each chunk + + Returns: + list of chunks (as tokens) + """ + encoding = tiktoken.get_encoding("cl100k_base") + tokens = iter(encoding.encode(text)) + result = [] + while chunk := list(islice(tokens, chunk_size)): + result.append(chunk) + return result + + class BaseOpenAIEmbeddings(BaseEmbeddings): """Base interface for OpenAI embedding model, using the openai library. @@ -32,6 +60,9 @@ class BaseOpenAIEmbeddings(BaseEmbeddings): "Only supported in `text-embedding-3` and later models." ), ) + context_length: Optional[int] = Param( + None, help="The maximum context length of the embedding model" + ) @Param.auto(depends_on=["max_retries"]) def max_retries_(self): @@ -56,16 +87,42 @@ class BaseOpenAIEmbeddings(BaseEmbeddings): def invoke( self, text: str | list[str] | Document | list[Document], *args, **kwargs ) -> list[DocumentWithEmbedding]: - input_ = self.prepare_input(text) + input_doc = self.prepare_input(text) client = self.prepare_client(async_version=False) - resp = self.openai_response( - client, input=[_.text if _.text else " " for _ in input_], **kwargs - ).dict() - output_ = sorted(resp["data"], key=lambda x: x["index"]) - return [ - DocumentWithEmbedding(embedding=o["embedding"], content=i) - for i, o in zip(input_, output_) - ] + + input_: list[str | list[int]] = [] + splitted_indices = {} + for idx, text in enumerate(input_doc): + if self.context_length: + chunks = split_text_by_chunk_size(text.text or " ", self.context_length) + splitted_indices[idx] = (len(input_), len(input_) + len(chunks)) + input_.extend(chunks) + else: + splitted_indices[idx] = (len(input_), len(input_) + 1) + input_.append(text.text) + + resp = self.openai_response(client, input=input_, **kwargs).dict() + output_ = list(sorted(resp["data"], key=lambda x: x["index"])) + + output = [] + for idx, doc in enumerate(input_doc): + embs = output_[splitted_indices[idx][0] : splitted_indices[idx][1]] + if len(embs) == 1: + output.append( + DocumentWithEmbedding(embedding=embs[0]["embedding"], content=doc) + ) + continue + + chunk_lens = [ + len(_) + for _ in input_[splitted_indices[idx][0] : splitted_indices[idx][1]] + ] + vs: list[list[float]] = [_["embedding"] for _ in embs] + emb = np.average(vs, axis=0, weights=chunk_lens) + emb = emb / np.linalg.norm(emb) + output.append(DocumentWithEmbedding(embedding=emb.tolist(), content=doc)) + + return output async def ainvoke( self, text: str | list[str] | Document | list[Document], *args, **kwargs @@ -118,6 +175,13 @@ class OpenAIEmbeddings(BaseOpenAIEmbeddings): return OpenAI(**params) + @retry( + retry=retry_if_not_exception_type( + (openai.NotFoundError, openai.BadRequestError) + ), + wait=wait_random_exponential(min=1, max=40), + stop=stop_after_attempt(6), + ) def openai_response(self, client, **kwargs): """Get the openai response""" params: dict = { @@ -174,6 +238,13 @@ class AzureOpenAIEmbeddings(BaseOpenAIEmbeddings): return AzureOpenAI(**params) + @retry( + retry=retry_if_not_exception_type( + (openai.NotFoundError, openai.BadRequestError) + ), + wait=wait_random_exponential(min=1, max=40), + stop=stop_after_attempt(6), + ) def openai_response(self, client, **kwargs): """Get the openai response""" params: dict = { diff --git a/libs/kotaemon/kotaemon/indices/base.py b/libs/kotaemon/kotaemon/indices/base.py index 938be66..a20ca6b 100644 --- a/libs/kotaemon/kotaemon/indices/base.py +++ b/libs/kotaemon/kotaemon/indices/base.py @@ -3,7 +3,7 @@ from __future__ import annotations from abc import abstractmethod from typing import Any, Type -from llama_index.node_parser.interface import NodeParser +from llama_index.core.node_parser.interface import NodeParser from kotaemon.base import BaseComponent, Document, RetrievedDocument @@ -32,7 +32,7 @@ class LlamaIndexDocTransformerMixin: Example: class TokenSplitter(LlamaIndexMixin, BaseSplitter): def _get_li_class(self): - from llama_index.text_splitter import TokenTextSplitter + from llama_index.core.text_splitter import TokenTextSplitter return TokenTextSplitter To use this mixin, please: diff --git a/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py b/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py index 7dad528..a57743b 100644 --- a/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py +++ b/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py @@ -15,7 +15,7 @@ class TitleExtractor(LlamaIndexDocTransformerMixin, BaseDocParser): super().__init__(llm=llm, nodes=nodes, **params) def _get_li_class(self): - from llama_index.extractors import TitleExtractor + from llama_index.core.extractors import TitleExtractor return TitleExtractor @@ -30,6 +30,6 @@ class SummaryExtractor(LlamaIndexDocTransformerMixin, BaseDocParser): super().__init__(llm=llm, summaries=summaries, **params) def _get_li_class(self): - from llama_index.extractors import SummaryExtractor + from llama_index.core.extractors import SummaryExtractor return SummaryExtractor diff --git a/libs/kotaemon/kotaemon/indices/ingests/files.py b/libs/kotaemon/kotaemon/indices/ingests/files.py index 3eb5361..7da3713 100644 --- a/libs/kotaemon/kotaemon/indices/ingests/files.py +++ b/libs/kotaemon/kotaemon/indices/ingests/files.py @@ -1,27 +1,42 @@ from pathlib import Path from typing import Type -from llama_index.readers import PDFReader -from llama_index.readers.base import BaseReader +from decouple import config +from llama_index.core.readers.base import BaseReader +from theflow.settings import settings as flowsettings from kotaemon.base import BaseComponent, Document, Param from kotaemon.indices.extractors import BaseDocParser from kotaemon.indices.splitters import BaseSplitter, TokenSplitter from kotaemon.loaders import ( AdobeReader, + AzureAIDocumentIntelligenceLoader, DirectoryReader, HtmlReader, MathpixPDFReader, MhtmlReader, OCRReader, PandasExcelReader, + PDFThumbnailReader, UnstructuredReader, ) unstructured = UnstructuredReader() +adobe_reader = AdobeReader() +azure_reader = AzureAIDocumentIntelligenceLoader( + endpoint=str(config("AZURE_DI_ENDPOINT", default="")), + credential=str(config("AZURE_DI_CREDENTIAL", default="")), + cache_dir=getattr(flowsettings, "KH_MARKDOWN_OUTPUT_DIR", None), +) +adobe_reader.vlm_endpoint = azure_reader.vlm_endpoint = getattr( + flowsettings, "KH_VLM_ENDPOINT", "" +) + + KH_DEFAULT_FILE_EXTRACTORS: dict[str, BaseReader] = { ".xlsx": PandasExcelReader(), ".docx": unstructured, + ".pptx": unstructured, ".xls": unstructured, ".doc": unstructured, ".html": HtmlReader(), @@ -31,7 +46,7 @@ KH_DEFAULT_FILE_EXTRACTORS: dict[str, BaseReader] = { ".jpg": unstructured, ".tiff": unstructured, ".tif": unstructured, - ".pdf": PDFReader(), + ".pdf": PDFThumbnailReader(), } diff --git a/libs/kotaemon/kotaemon/indices/qa/citation.py b/libs/kotaemon/kotaemon/indices/qa/citation.py index bf8def0..30eceaa 100644 --- a/libs/kotaemon/kotaemon/indices/qa/citation.py +++ b/libs/kotaemon/kotaemon/indices/qa/citation.py @@ -103,7 +103,9 @@ class CitationPipeline(BaseComponent): print("CitationPipeline: invoking LLM") llm_output = self.get_from_path("llm").invoke(messages, **llm_kwargs) print("CitationPipeline: finish invoking LLM") - if not llm_output.messages: + if not llm_output.messages or not llm_output.additional_kwargs.get( + "tool_calls" + ): return None function_output = llm_output.additional_kwargs["tool_calls"][0]["function"][ "arguments" diff --git a/libs/kotaemon/kotaemon/indices/rankings/__init__.py b/libs/kotaemon/kotaemon/indices/rankings/__init__.py index ccd99bb..84b8765 100644 --- a/libs/kotaemon/kotaemon/indices/rankings/__init__.py +++ b/libs/kotaemon/kotaemon/indices/rankings/__init__.py @@ -1,5 +1,13 @@ from .base import BaseReranking from .cohere import CohereReranking from .llm import LLMReranking +from .llm_scoring import LLMScoring +from .llm_trulens import LLMTrulensScoring -__all__ = ["CohereReranking", "LLMReranking", "BaseReranking"] +__all__ = [ + "CohereReranking", + "LLMReranking", + "LLMScoring", + "BaseReranking", + "LLMTrulensScoring", +] diff --git a/libs/kotaemon/kotaemon/indices/rankings/cohere.py b/libs/kotaemon/kotaemon/indices/rankings/cohere.py index d102efd..e759d6c 100644 --- a/libs/kotaemon/kotaemon/indices/rankings/cohere.py +++ b/libs/kotaemon/kotaemon/indices/rankings/cohere.py @@ -1,6 +1,6 @@ from __future__ import annotations -import os +from decouple import config from kotaemon.base import Document @@ -9,8 +9,7 @@ from .base import BaseReranking class CohereReranking(BaseReranking): model_name: str = "rerank-multilingual-v2.0" - cohere_api_key: str = os.environ.get("COHERE_API_KEY", "") - top_k: int = 1 + cohere_api_key: str = config("COHERE_API_KEY", "") def run(self, documents: list[Document], query: str) -> list[Document]: """Use Cohere Reranker model to re-order documents @@ -22,6 +21,10 @@ class CohereReranking(BaseReranking): "Please install Cohere " "`pip install cohere` to use Cohere Reranking" ) + if not self.cohere_api_key: + print("Cohere API key not found. Skipping reranking.") + return documents + cohere_client = cohere.Client(self.cohere_api_key) compressed_docs: list[Document] = [] @@ -29,12 +32,13 @@ class CohereReranking(BaseReranking): return compressed_docs _docs = [d.content for d in documents] - results = cohere_client.rerank( - model=self.model_name, query=query, documents=_docs, top_n=self.top_k + response = cohere_client.rerank( + model=self.model_name, query=query, documents=_docs ) - for r in results: + print("Cohere score", [r.relevance_score for r in response.results]) + for r in response.results: doc = documents[r.index] - doc.metadata["relevance_score"] = r.relevance_score + doc.metadata["cohere_reranking_score"] = r.relevance_score compressed_docs.append(doc) return compressed_docs diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py new file mode 100644 index 0000000..b4f5105 --- /dev/null +++ b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from concurrent.futures import ThreadPoolExecutor + +import numpy as np +from langchain.output_parsers.boolean import BooleanOutputParser + +from kotaemon.base import Document + +from .llm import LLMReranking + + +class LLMScoring(LLMReranking): + def run( + self, + documents: list[Document], + query: str, + ) -> list[Document]: + """Filter down documents based on their relevance to the query.""" + filtered_docs: list[Document] = [] + output_parser = BooleanOutputParser() + + if self.concurrent: + with ThreadPoolExecutor() as executor: + futures = [] + for doc in documents: + _prompt = self.prompt_template.populate( + question=query, context=doc.get_content() + ) + futures.append(executor.submit(lambda: self.llm(_prompt))) + + results = [future.result() for future in futures] + else: + results = [] + for doc in documents: + _prompt = self.prompt_template.populate( + question=query, context=doc.get_content() + ) + results.append(self.llm(_prompt)) + + for result, doc in zip(results, documents): + score = np.exp(np.average(result.logprobs)) + include_doc = output_parser.parse(result.text) + if include_doc: + doc.metadata["llm_reranking_score"] = score + else: + doc.metadata["llm_reranking_score"] = 1 - score + filtered_docs.append(doc) + + # prevent returning empty result + if len(filtered_docs) == 0: + filtered_docs = documents[: self.top_k] + + return filtered_docs diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py new file mode 100644 index 0000000..046998f --- /dev/null +++ b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +import re +from concurrent.futures import ThreadPoolExecutor +from functools import partial + +import tiktoken + +from kotaemon.base import Document, HumanMessage, SystemMessage +from kotaemon.indices.splitters import TokenSplitter +from kotaemon.llms import BaseLLM, PromptTemplate + +from .llm import LLMReranking + +SYSTEM_PROMPT_TEMPLATE = PromptTemplate( + """You are a RELEVANCE grader; providing the relevance of the given CONTEXT to the given QUESTION. + Respond only as a number from 0 to 10 where 0 is the least relevant and 10 is the most relevant. + + A few additional scoring guidelines: + + - Long CONTEXTS should score equally well as short CONTEXTS. + + - RELEVANCE score should increase as the CONTEXTS provides more RELEVANT context to the QUESTION. + + - RELEVANCE score should increase as the CONTEXTS provides RELEVANT context to more parts of the QUESTION. + + - CONTEXT that is RELEVANT to some of the QUESTION should score of 2, 3 or 4. Higher score indicates more RELEVANCE. + + - CONTEXT that is RELEVANT to most of the QUESTION should get a score of 5, 6, 7 or 8. Higher score indicates more RELEVANCE. + + - CONTEXT that is RELEVANT to the entire QUESTION should get a score of 9 or 10. Higher score indicates more RELEVANCE. + + - CONTEXT must be relevant and helpful for answering the entire QUESTION to get a score of 10. + + - Never elaborate.""" # noqa: E501 +) + +USER_PROMPT_TEMPLATE = PromptTemplate( + """QUESTION: {question} + + CONTEXT: {context} + + RELEVANCE: """ +) # noqa + +PATTERN_INTEGER: re.Pattern = re.compile(r"([+-]?[1-9][0-9]*|0)") +"""Regex that matches integers.""" + +MAX_CONTEXT_LEN = 7500 + + +def validate_rating(rating) -> int: + """Validate a rating is between 0 and 10.""" + + if not 0 <= rating <= 10: + raise ValueError("Rating must be between 0 and 10") + + return rating + + +def re_0_10_rating(s: str) -> int: + """Extract a 0-10 rating from a string. + + If the string does not match an integer or matches an integer outside the + 0-10 range, raises an error instead. If multiple numbers are found within + the expected 0-10 range, the smallest is returned. + + Args: + s: String to extract rating from. + + Returns: + int: Extracted rating. + + Raises: + ParseError: If no integers between 0 and 10 are found in the string. + """ + + matches = PATTERN_INTEGER.findall(s) + if not matches: + raise AssertionError + + vals = set() + for match in matches: + try: + vals.add(validate_rating(int(match))) + except ValueError: + pass + + if not vals: + raise AssertionError + + # Min to handle cases like "The rating is 8 out of 10." + return min(vals) + + +class LLMTrulensScoring(LLMReranking): + llm: BaseLLM + system_prompt_template: PromptTemplate = SYSTEM_PROMPT_TEMPLATE + user_prompt_template: PromptTemplate = USER_PROMPT_TEMPLATE + concurrent: bool = True + normalize: float = 10 + trim_func: TokenSplitter = TokenSplitter.withx( + chunk_size=MAX_CONTEXT_LEN, + chunk_overlap=0, + separator=" ", + tokenizer=partial( + tiktoken.encoding_for_model("gpt-3.5-turbo").encode, + allowed_special=set(), + disallowed_special="all", + ), + ) + + def run( + self, + documents: list[Document], + query: str, + ) -> list[Document]: + """Filter down documents based on their relevance to the query.""" + filtered_docs = [] + + documents = sorted(documents, key=lambda doc: doc.get_content()) + if self.concurrent: + with ThreadPoolExecutor() as executor: + futures = [] + for doc in documents: + chunked_doc_content = self.trim_func( + [ + Document(content=doc.get_content()) + # skip metadata which cause troubles + ] + )[0].text + + messages = [] + messages.append( + SystemMessage(self.system_prompt_template.populate()) + ) + messages.append( + HumanMessage( + self.user_prompt_template.populate( + question=query, context=chunked_doc_content + ) + ) + ) + + def llm_call(): + return self.llm(messages).text + + futures.append(executor.submit(llm_call)) + + results = [future.result() for future in futures] + else: + results = [] + for doc in documents: + messages = [] + messages.append(SystemMessage(self.system_prompt_template.populate())) + messages.append( + SystemMessage( + self.user_prompt_template.populate( + question=query, context=doc.get_content() + ) + ) + ) + results.append(self.llm(messages).text) + + # use Boolean parser to extract relevancy output from LLM + results = [ + (r_idx, float(re_0_10_rating(result)) / self.normalize) + for r_idx, result in enumerate(results) + ] + results.sort(key=lambda x: x[1], reverse=True) + + for r_idx, score in results: + doc = documents[r_idx] + doc.metadata["llm_trulens_score"] = score + filtered_docs.append(doc) + + print( + "LLM rerank scores", + [doc.metadata["llm_trulens_score"] for doc in filtered_docs], + ) + + return filtered_docs diff --git a/libs/kotaemon/kotaemon/indices/splitters/__init__.py b/libs/kotaemon/kotaemon/indices/splitters/__init__.py index 16a31fe..d9eb65f 100644 --- a/libs/kotaemon/kotaemon/indices/splitters/__init__.py +++ b/libs/kotaemon/kotaemon/indices/splitters/__init__.py @@ -23,7 +23,7 @@ class TokenSplitter(LlamaIndexDocTransformerMixin, BaseSplitter): ) def _get_li_class(self): - from llama_index.text_splitter import TokenTextSplitter + from llama_index.core.text_splitter import TokenTextSplitter return TokenTextSplitter @@ -44,6 +44,6 @@ class SentenceWindowSplitter(LlamaIndexDocTransformerMixin, BaseSplitter): ) def _get_li_class(self): - from llama_index.node_parser import SentenceWindowNodeParser + from llama_index.core.node_parser import SentenceWindowNodeParser return SentenceWindowNodeParser diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py index 8902528..e2984c7 100644 --- a/libs/kotaemon/kotaemon/indices/vectorindex.py +++ b/libs/kotaemon/kotaemon/indices/vectorindex.py @@ -1,14 +1,18 @@ from __future__ import annotations +import threading import uuid +from pathlib import Path from typing import Optional, Sequence, cast +from theflow.settings import settings as flowsettings + from kotaemon.base import BaseComponent, Document, RetrievedDocument from kotaemon.embeddings import BaseEmbeddings from kotaemon.storages import BaseDocumentStore, BaseVectorStore from .base import BaseIndexing, BaseRetrieval -from .rankings import BaseReranking +from .rankings import BaseReranking, LLMReranking VECTOR_STORE_FNAME = "vectorstore" DOC_STORE_FNAME = "docstore" @@ -23,9 +27,11 @@ class VectorIndexing(BaseIndexing): - List of texts """ + cache_dir: Optional[str] = getattr(flowsettings, "KH_CHUNKS_OUTPUT_DIR", None) vector_store: BaseVectorStore doc_store: Optional[BaseDocumentStore] = None embedding: BaseEmbeddings + count_: int = 0 def to_retrieval_pipeline(self, *args, **kwargs): """Convert the indexing pipeline to a retrieval pipeline""" @@ -44,6 +50,52 @@ class VectorIndexing(BaseIndexing): qa_pipeline=CitationQAPipeline(**kwargs), ) + def write_chunk_to_file(self, docs: list[Document]): + # save the chunks content into markdown format + if self.cache_dir: + file_name = Path(docs[0].metadata["file_name"]) + for i in range(len(docs)): + markdown_content = "" + if "page_label" in docs[i].metadata: + page_label = str(docs[i].metadata["page_label"]) + markdown_content += f"Page label: {page_label}" + if "file_name" in docs[i].metadata: + filename = docs[i].metadata["file_name"] + markdown_content += f"\nFile name: {filename}" + if "section" in docs[i].metadata: + section = docs[i].metadata["section"] + markdown_content += f"\nSection: {section}" + if "type" in docs[i].metadata: + if docs[i].metadata["type"] == "image": + image_origin = docs[i].metadata["image_origin"] + image_origin = f'

' + markdown_content += f"\nImage origin: {image_origin}" + if docs[i].text: + markdown_content += f"\ntext:\n{docs[i].text}" + + with open( + Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md", + "w", + encoding="utf-8", + ) as f: + f.write(markdown_content) + + def add_to_docstore(self, docs: list[Document]): + if self.doc_store: + print("Adding documents to doc store") + self.doc_store.add(docs) + + def add_to_vectorstore(self, docs: list[Document]): + # in case we want to skip embedding + if self.vector_store: + print(f"Getting embeddings for {len(docs)} nodes") + embeddings = self.embedding(docs) + print("Adding embeddings to vector store") + self.vector_store.add( + embeddings=embeddings, + ids=[t.doc_id for t in docs], + ) + def run(self, text: str | list[str] | Document | list[Document]): input_: list[Document] = [] if not isinstance(text, list): @@ -59,16 +111,10 @@ class VectorIndexing(BaseIndexing): f"Invalid input type {type(item)}, should be str or Document" ) - print(f"Getting embeddings for {len(input_)} nodes") - embeddings = self.embedding(input_) - print("Adding embeddings to vector store") - self.vector_store.add( - embeddings=embeddings, - ids=[t.doc_id for t in input_], - ) - if self.doc_store: - print("Adding documents to doc store") - self.doc_store.add(input_) + self.add_to_vectorstore(input_) + self.add_to_docstore(input_) + self.write_chunk_to_file(input_) + self.count_ += len(input_) class VectorRetrieval(BaseRetrieval): @@ -78,7 +124,16 @@ class VectorRetrieval(BaseRetrieval): doc_store: Optional[BaseDocumentStore] = None embedding: BaseEmbeddings rerankers: Sequence[BaseReranking] = [] - top_k: int = 1 + top_k: int = 5 + first_round_top_k_mult: int = 10 + retrieval_mode: str = "hybrid" # vector, text, hybrid + + def _filter_docs( + self, documents: list[RetrievedDocument], top_k: int | None = None + ): + if top_k: + documents = documents[:top_k] + return documents def run( self, text: str | Document, top_k: Optional[int] = None, **kwargs @@ -95,24 +150,155 @@ class VectorRetrieval(BaseRetrieval): if top_k is None: top_k = self.top_k + do_extend = kwargs.pop("do_extend", False) + thumbnail_count = kwargs.pop("thumbnail_count", 3) + + if do_extend: + top_k_first_round = top_k * self.first_round_top_k_mult + else: + top_k_first_round = top_k + if self.doc_store is None: raise ValueError( "doc_store is not provided. Please provide a doc_store to " "retrieve the documents" ) - emb: list[float] = self.embedding(text)[0].embedding - _, scores, ids = self.vector_store.query(embedding=emb, top_k=top_k, **kwargs) - docs = self.doc_store.get(ids) - result = [ - RetrievedDocument(**doc.to_dict(), score=score) - for doc, score in zip(docs, scores) - ] + result: list[RetrievedDocument] = [] + # TODO: should declare scope directly in the run params + scope = kwargs.pop("scope", None) + emb: list[float] + + if self.retrieval_mode == "vector": + emb = self.embedding(text)[0].embedding + _, scores, ids = self.vector_store.query( + embedding=emb, top_k=top_k_first_round, **kwargs + ) + docs = self.doc_store.get(ids) + result = [ + RetrievedDocument(**doc.to_dict(), score=score) + for doc, score in zip(docs, scores) + ] + elif self.retrieval_mode == "text": + query = text.text if isinstance(text, Document) else text + docs = self.doc_store.query(query, top_k=top_k_first_round, doc_ids=scope) + result = [RetrievedDocument(**doc.to_dict(), score=-1.0) for doc in docs] + elif self.retrieval_mode == "hybrid": + # similarity search section + emb = self.embedding(text)[0].embedding + vs_docs: list[RetrievedDocument] = [] + vs_ids: list[str] = [] + vs_scores: list[float] = [] + + def query_vectorstore(): + nonlocal vs_docs + nonlocal vs_scores + nonlocal vs_ids + + assert self.doc_store is not None + _, vs_scores, vs_ids = self.vector_store.query( + embedding=emb, top_k=top_k_first_round, **kwargs + ) + if vs_ids: + vs_docs = self.doc_store.get(vs_ids) + + # full-text search section + ds_docs: list[RetrievedDocument] = [] + + def query_docstore(): + nonlocal ds_docs + + assert self.doc_store is not None + query = text.text if isinstance(text, Document) else text + ds_docs = self.doc_store.query( + query, top_k=top_k_first_round, doc_ids=scope + ) + + vs_query_thread = threading.Thread(target=query_vectorstore) + ds_query_thread = threading.Thread(target=query_docstore) + + vs_query_thread.start() + ds_query_thread.start() + + vs_query_thread.join() + ds_query_thread.join() + + result = [ + RetrievedDocument(**doc.to_dict(), score=-1.0) + for doc in ds_docs + if doc not in vs_ids + ] + result += [ + RetrievedDocument(**doc.to_dict(), score=score) + for doc, score in zip(vs_docs, vs_scores) + ] + print(f"Got {len(vs_docs)} from vectorstore") + print(f"Got {len(ds_docs)} from docstore") + # use additional reranker to re-order the document list - if self.rerankers: + if self.rerankers and text: for reranker in self.rerankers: + # if reranker is LLMReranking, limit the document with top_k items only + if isinstance(reranker, LLMReranking): + result = self._filter_docs(result, top_k=top_k) result = reranker(documents=result, query=text) + result = self._filter_docs(result, top_k=top_k) + print(f"Got raw {len(result)} retrieved documents") + + # add page thumbnails to the result if exists + thumbnail_doc_ids: set[str] = set() + # we should copy the text from retrieved text chunk + # to the thumbnail to get relevant LLM score correctly + text_thumbnail_docs: dict[str, RetrievedDocument] = {} + + non_thumbnail_docs = [] + raw_thumbnail_docs = [] + for doc in result: + if doc.metadata.get("type") == "thumbnail": + # change type to image to display on UI + doc.metadata["type"] = "image" + raw_thumbnail_docs.append(doc) + continue + if ( + "thumbnail_doc_id" in doc.metadata + and len(thumbnail_doc_ids) < thumbnail_count + ): + thumbnail_id = doc.metadata["thumbnail_doc_id"] + thumbnail_doc_ids.add(thumbnail_id) + text_thumbnail_docs[thumbnail_id] = doc + else: + non_thumbnail_docs.append(doc) + + linked_thumbnail_docs = self.doc_store.get(list(thumbnail_doc_ids)) + print( + "thumbnail docs", + len(linked_thumbnail_docs), + "non-thumbnail docs", + len(non_thumbnail_docs), + "raw-thumbnail docs", + len(raw_thumbnail_docs), + ) + additional_docs = [] + + for thumbnail_doc in linked_thumbnail_docs: + text_doc = text_thumbnail_docs[thumbnail_doc.doc_id] + doc_dict = thumbnail_doc.to_dict() + doc_dict["_id"] = text_doc.doc_id + doc_dict["content"] = text_doc.content + doc_dict["metadata"]["type"] = "image" + for key in text_doc.metadata: + if key not in doc_dict["metadata"]: + doc_dict["metadata"][key] = text_doc.metadata[key] + + additional_docs.append(RetrievedDocument(**doc_dict, score=text_doc.score)) + + result = additional_docs + non_thumbnail_docs + + if not result: + # return output from raw retrieved thumbnails + result = self._filter_docs(raw_thumbnail_docs, top_k=thumbnail_count) + return result diff --git a/libs/kotaemon/kotaemon/llms/__init__.py b/libs/kotaemon/kotaemon/llms/__init__.py index 266e391..6494fc9 100644 --- a/libs/kotaemon/kotaemon/llms/__init__.py +++ b/libs/kotaemon/kotaemon/llms/__init__.py @@ -7,6 +7,7 @@ from .chats import ( ChatLLM, ChatOpenAI, EndpointChatLLM, + LCAnthropicChat, LCAzureChatOpenAI, LCChatOpenAI, LlamaCppChat, @@ -27,6 +28,7 @@ __all__ = [ "SystemMessage", "AzureChatOpenAI", "ChatOpenAI", + "LCAnthropicChat", "LCAzureChatOpenAI", "LCChatOpenAI", "LlamaCppChat", diff --git a/libs/kotaemon/kotaemon/llms/chats/__init__.py b/libs/kotaemon/kotaemon/llms/chats/__init__.py index 7fc1c40..6e3d3d5 100644 --- a/libs/kotaemon/kotaemon/llms/chats/__init__.py +++ b/libs/kotaemon/kotaemon/llms/chats/__init__.py @@ -1,6 +1,11 @@ from .base import ChatLLM from .endpoint_based import EndpointChatLLM -from .langchain_based import LCAzureChatOpenAI, LCChatMixin, LCChatOpenAI +from .langchain_based import ( + LCAnthropicChat, + LCAzureChatOpenAI, + LCChatMixin, + LCChatOpenAI, +) from .llamacpp import LlamaCppChat from .openai import AzureChatOpenAI, ChatOpenAI @@ -10,6 +15,7 @@ __all__ = [ "ChatLLM", "EndpointChatLLM", "ChatOpenAI", + "LCAnthropicChat", "LCChatOpenAI", "LCAzureChatOpenAI", "LCChatMixin", diff --git a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py index fca78dc..077c3f8 100644 --- a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py +++ b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py @@ -221,3 +221,27 @@ class LCAzureChatOpenAI(LCChatMixin, ChatLLM): # type: ignore from langchain.chat_models import AzureChatOpenAI return AzureChatOpenAI + + +class LCAnthropicChat(LCChatMixin, ChatLLM): # type: ignore + def __init__( + self, + api_key: str | None = None, + model_name: str | None = None, + temperature: float = 0.7, + **params, + ): + super().__init__( + api_key=api_key, + model_name=model_name, + temperature=temperature, + **params, + ) + + def _get_lc_class(self): + try: + from langchain_anthropic import ChatAnthropic + except ImportError: + raise ImportError("Please install langchain-anthropic") + + return ChatAnthropic diff --git a/libs/kotaemon/kotaemon/llms/chats/openai.py b/libs/kotaemon/kotaemon/llms/chats/openai.py index b12567d..6a605f6 100644 --- a/libs/kotaemon/kotaemon/llms/chats/openai.py +++ b/libs/kotaemon/kotaemon/llms/chats/openai.py @@ -159,6 +159,15 @@ class BaseChatOpenAI(ChatLLM): additional_kwargs["tool_calls"] = resp["choices"][0]["message"][ "tool_calls" ] + + if resp["choices"][0].get("logprobs") is None: + logprobs = [] + else: + all_logprobs = resp["choices"][0]["logprobs"].get("content") + logprobs = ( + [logprob["logprob"] for logprob in all_logprobs] if all_logprobs else [] + ) + output = LLMInterface( candidates=[(_["message"]["content"] or "") for _ in resp["choices"]], content=resp["choices"][0]["message"]["content"] or "", @@ -170,6 +179,7 @@ class BaseChatOpenAI(ChatLLM): AIMessage(content=(_["message"]["content"]) or "") for _ in resp["choices"] ], + logprobs=logprobs, ) return output @@ -216,11 +226,24 @@ class BaseChatOpenAI(ChatLLM): client, messages=input_messages, stream=True, **kwargs ) - for chunk in resp: - if not chunk.choices: + for c in resp: + chunk = c.dict() + if not chunk["choices"]: continue - if chunk.choices[0].delta.content is not None: - yield LLMInterface(content=chunk.choices[0].delta.content) + if chunk["choices"][0]["delta"]["content"] is not None: + if chunk["choices"][0].get("logprobs") is None: + logprobs = [] + else: + logprobs = [ + logprob["logprob"] + for logprob in chunk["choices"][0]["logprobs"].get( + "content", [] + ) + ] + + yield LLMInterface( + content=chunk["choices"][0]["delta"]["content"], logprobs=logprobs + ) async def astream( self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs diff --git a/libs/kotaemon/kotaemon/loaders/__init__.py b/libs/kotaemon/kotaemon/loaders/__init__.py index 6ccdbda..624a76b 100644 --- a/libs/kotaemon/kotaemon/loaders/__init__.py +++ b/libs/kotaemon/kotaemon/loaders/__init__.py @@ -3,10 +3,12 @@ from .azureai_document_intelligence_loader import AzureAIDocumentIntelligenceLoa from .base import AutoReader, BaseReader from .composite_loader import DirectoryReader from .docx_loader import DocxReader -from .excel_loader import PandasExcelReader +from .excel_loader import ExcelReader, PandasExcelReader from .html_loader import HtmlReader, MhtmlReader from .mathpix_loader import MathpixPDFReader from .ocr_loader import ImageReader, OCRReader +from .pdf_loader import PDFThumbnailReader +from .txt_loader import TxtReader from .unstructured_loader import UnstructuredReader __all__ = [ @@ -14,6 +16,7 @@ __all__ = [ "AzureAIDocumentIntelligenceLoader", "BaseReader", "PandasExcelReader", + "ExcelReader", "MathpixPDFReader", "ImageReader", "OCRReader", @@ -23,4 +26,6 @@ __all__ = [ "HtmlReader", "MhtmlReader", "AdobeReader", + "TxtReader", + "PDFThumbnailReader", ] diff --git a/libs/kotaemon/kotaemon/loaders/adobe_loader.py b/libs/kotaemon/kotaemon/loaders/adobe_loader.py index 09a802c..f1b0379 100644 --- a/libs/kotaemon/kotaemon/loaders/adobe_loader.py +++ b/libs/kotaemon/kotaemon/loaders/adobe_loader.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional from decouple import config -from llama_index.readers.base import BaseReader +from llama_index.core.readers.base import BaseReader from kotaemon.base import Document @@ -154,7 +154,7 @@ class AdobeReader(BaseReader): for page_number, table_content, table_caption in tables: documents.append( Document( - text=table_caption, + text=table_content, metadata={ "table_origin": table_content, "type": "table", diff --git a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py index 7e4c516..1e1d902 100644 --- a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py +++ b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py @@ -1,10 +1,56 @@ +import base64 import os +from io import BytesIO from pathlib import Path from typing import Optional +from PIL import Image + from kotaemon.base import Document, Param from .base import BaseReader +from .utils.adobe import generate_single_figure_caption + + +def crop_image(file_path: Path, bbox: list[float], page_number: int = 0) -> Image.Image: + """Crop the image based on the bounding box + + Args: + file_path (Path): path to the image file + bbox (list[float]): bounding box of the image (in percentage [x0, y0, x1, y1]) + page_number (int, optional): page number of the image. Defaults to 0. + + Returns: + Image.Image: cropped image + """ + left, upper, right, lower = bbox + + img: Image.Image + suffix = file_path.suffix.lower() + if suffix == ".pdf": + try: + import fitz + except ImportError: + raise ImportError("Please install PyMuPDF: 'pip install PyMuPDF'") + + doc = fitz.open(file_path) + page = doc.load_page(page_number) + pm = page.get_pixmap(dpi=150) + img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) + elif suffix in [".tif", ".tiff"]: + img = Image.open(file_path) + img.seek(page_number) + else: + img = Image.open(file_path) + + return img.crop( + ( + int(left * img.width), + int(upper * img.height), + int(right * img.width), + int(lower * img.height), + ) + ) class AzureAIDocumentIntelligenceLoader(BaseReader): @@ -14,7 +60,7 @@ class AzureAIDocumentIntelligenceLoader(BaseReader): heif, docx, xlsx, pptx and html. """ - _dependencies = ["azure-ai-documentintelligence"] + _dependencies = ["azure-ai-documentintelligence", "PyMuPDF", "Pillow"] endpoint: str = Param( os.environ.get("AZUREAI_DOCUMENT_INTELLIGENT_ENDPOINT", None), @@ -34,6 +80,29 @@ class AzureAIDocumentIntelligenceLoader(BaseReader): "#model-analysis-features)" ), ) + output_content_format: str = Param( + "markdown", + help="Output content format. Can be 'markdown' or 'text'.Default is markdown", + ) + vlm_endpoint: str = Param( + help=( + "Default VLM endpoint for figure captioning. If not provided, will not " + "caption the figures" + ) + ) + figure_friendly_filetypes: list[str] = Param( + [".pdf", ".jpeg", ".jpg", ".png", ".bmp", ".tiff", ".heif", ".tif"], + help=( + "File types that we can reliably open and extract figures. " + "For files like .docx or .html, the visual layout may be different " + "when viewed from different tools, hence we cannot use Azure DI " + "location to extract figures." + ), + ) + cache_dir: str = Param( + None, + help="Directory to cache the downloaded files. Default is None", + ) @Param.auto(depends_on=["endpoint", "credential"]) def client_(self): @@ -55,14 +124,114 @@ class AzureAIDocumentIntelligenceLoader(BaseReader): def load_data( self, file_path: Path, extra_info: Optional[dict] = None, **kwargs ) -> list[Document]: + """Extract the input file, allowing multi-modal extraction""" metadata = extra_info or {} + file_name = Path(file_path) with open(file_path, "rb") as fi: poller = self.client_.begin_analyze_document( self.model, analyze_request=fi, content_type="application/octet-stream", - output_content_format="markdown", + output_content_format=self.output_content_format, ) result = poller.result() - return [Document(content=result.content, metadata=metadata)] + # the total text content of the document in `output_content_format` format + text_content = result.content + removed_spans: list[dict] = [] + + # extract the figures + figures = [] + for figure_desc in result.get("figures", []): + if not self.vlm_endpoint: + continue + if file_path.suffix.lower() not in self.figure_friendly_filetypes: + continue + + # read & crop the image + page_number = figure_desc["boundingRegions"][0]["pageNumber"] + page_width = result.pages[page_number - 1]["width"] + page_height = result.pages[page_number - 1]["height"] + polygon = figure_desc["boundingRegions"][0]["polygon"] + xs = [polygon[i] for i in range(0, len(polygon), 2)] + ys = [polygon[i] for i in range(1, len(polygon), 2)] + bbox = [ + min(xs) / page_width, + min(ys) / page_height, + max(xs) / page_width, + max(ys) / page_height, + ] + img = crop_image(file_path, bbox, page_number - 1) + + # convert the image into base64 + img_bytes = BytesIO() + img.save(img_bytes, format="PNG") + img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8") + img_base64 = f"data:image/png;base64,{img_base64}" + + # caption the image + caption = generate_single_figure_caption( + figure=img_base64, vlm_endpoint=self.vlm_endpoint + ) + + # store the image into document + figure_metadata = { + "image_origin": img_base64, + "type": "image", + "page_label": page_number, + } + figure_metadata.update(metadata) + + figures.append( + Document( + text=caption, + metadata=figure_metadata, + ) + ) + removed_spans += figure_desc["spans"] + + # extract the tables + tables = [] + for table_desc in result.get("tables", []): + if not table_desc["spans"]: + continue + + # convert the tables into markdown format + boundingRegions = table_desc["boundingRegions"] + if boundingRegions: + page_number = boundingRegions[0]["pageNumber"] + else: + page_number = 1 + + # store the tables into document + offset = table_desc["spans"][0]["offset"] + length = table_desc["spans"][0]["length"] + table_metadata = { + "type": "table", + "page_label": page_number, + "table_origin": text_content[offset : offset + length], + } + table_metadata.update(metadata) + + tables.append( + Document( + text=text_content[offset : offset + length], + metadata=table_metadata, + ) + ) + removed_spans += table_desc["spans"] + # save the text content into markdown format + if self.cache_dir is not None: + with open( + Path(self.cache_dir) / f"{file_name.stem}.md", "w", encoding="utf-8" + ) as f: + f.write(text_content) + + removed_spans = sorted(removed_spans, key=lambda x: x["offset"], reverse=True) + for span in removed_spans: + text_content = ( + text_content[: span["offset"]] + + text_content[span["offset"] + span["length"] :] + ) + + return [Document(content=text_content, metadata=metadata)] + figures + tables diff --git a/libs/kotaemon/kotaemon/loaders/base.py b/libs/kotaemon/kotaemon/loaders/base.py index 52bef49..aebf919 100644 --- a/libs/kotaemon/kotaemon/loaders/base.py +++ b/libs/kotaemon/kotaemon/loaders/base.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, List, Type, Union from kotaemon.base import BaseComponent, Document if TYPE_CHECKING: - from llama_index.readers.base import BaseReader as LIBaseReader + from llama_index.core.readers.base import BaseReader as LIBaseReader class BaseReader(BaseComponent): @@ -20,7 +20,7 @@ class AutoReader(BaseReader): """Init reader using string identifier or class name from llama-hub""" if isinstance(reader_type, str): - from llama_index import download_loader + from llama_index.core import download_loader self._reader = download_loader(reader_type)() else: diff --git a/libs/kotaemon/kotaemon/loaders/composite_loader.py b/libs/kotaemon/kotaemon/loaders/composite_loader.py index 9d35e26..ddf71fa 100644 --- a/libs/kotaemon/kotaemon/loaders/composite_loader.py +++ b/libs/kotaemon/kotaemon/loaders/composite_loader.py @@ -1,6 +1,6 @@ from typing import Callable, List, Optional, Type -from llama_index.readers.base import BaseReader as LIBaseReader +from llama_index.core.readers.base import BaseReader as LIBaseReader from .base import BaseReader, LIReaderMixin @@ -48,6 +48,6 @@ class DirectoryReader(LIReaderMixin, BaseReader): file_metadata: Optional[Callable[[str], dict]] = None def _get_wrapped_class(self) -> Type["LIBaseReader"]: - from llama_index import SimpleDirectoryReader + from llama_index.core import SimpleDirectoryReader return SimpleDirectoryReader diff --git a/libs/kotaemon/kotaemon/loaders/docx_loader.py b/libs/kotaemon/kotaemon/loaders/docx_loader.py index dcec539..463a8fa 100644 --- a/libs/kotaemon/kotaemon/loaders/docx_loader.py +++ b/libs/kotaemon/kotaemon/loaders/docx_loader.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import List, Optional import pandas as pd -from llama_index.readers.base import BaseReader +from llama_index.core.readers.base import BaseReader from kotaemon.base import Document @@ -27,6 +27,21 @@ class DocxReader(BaseReader): "Please install it using `pip install python-docx`" ) + def _load_single_table(self, table) -> List[List[str]]: + """Extract content from tables. Return a list of columns: list[str] + Some merged cells will share duplicated content. + """ + n_row = len(table.rows) + n_col = len(table.columns) + + arrays = [["" for _ in range(n_row)] for _ in range(n_col)] + + for i, row in enumerate(table.rows): + for j, cell in enumerate(row.cells): + arrays[j][i] = cell.text + + return arrays + def load_data( self, file_path: Path, extra_info: Optional[dict] = None, **kwargs ) -> List[Document]: @@ -50,13 +65,9 @@ class DocxReader(BaseReader): tables = [] for t in doc.tables: - arrays = [ - [ - unicodedata.normalize("NFKC", t.cell(i, j).text) - for i in range(len(t.rows)) - ] - for j in range(len(t.columns)) - ] + # return list of columns: list of string + arrays = self._load_single_table(t) + tables.append(pd.DataFrame({a[0]: a[1:] for a in arrays})) extra_info = extra_info or {} diff --git a/libs/kotaemon/kotaemon/loaders/excel_loader.py b/libs/kotaemon/kotaemon/loaders/excel_loader.py index d903aea..67d0043 100644 --- a/libs/kotaemon/kotaemon/loaders/excel_loader.py +++ b/libs/kotaemon/kotaemon/loaders/excel_loader.py @@ -6,7 +6,7 @@ Pandas parser for .xlsx files. from pathlib import Path from typing import Any, List, Optional, Union -from llama_index.readers.base import BaseReader +from llama_index.core.readers.base import BaseReader from kotaemon.base import Document @@ -82,6 +82,9 @@ class PandasExcelReader(BaseReader): sheet = [] if include_sheetname: sheet.append([key]) + dfs[key] = dfs[key].dropna(axis=0, how="all") + dfs[key] = dfs[key].dropna(axis=0, how="all") + dfs[key].fillna("", inplace=True) sheet.extend(dfs[key].values.astype(str).tolist()) df_sheets.append(sheet) @@ -99,3 +102,91 @@ class PandasExcelReader(BaseReader): ] return output + + +class ExcelReader(BaseReader): + r"""Spreadsheet exporter respecting multiple worksheets + + Parses CSVs using the separator detection from Pandas `read_csv` function. + If special parameters are required, use the `pandas_config` dict. + + Args: + + pandas_config (dict): Options for the `pandas.read_excel` function call. + Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html + for more information. Set to empty dict by default, + this means defaults will be used. + + """ + + def __init__( + self, + *args: Any, + pandas_config: Optional[dict] = None, + row_joiner: str = "\n", + col_joiner: str = " ", + **kwargs: Any, + ) -> None: + """Init params.""" + super().__init__(*args, **kwargs) + self._pandas_config = pandas_config or {} + self._row_joiner = row_joiner if row_joiner else "\n" + self._col_joiner = col_joiner if col_joiner else " " + + def load_data( + self, + file: Path, + include_sheetname: bool = True, + sheet_name: Optional[Union[str, int, list]] = None, + extra_info: Optional[dict] = None, + **kwargs, + ) -> List[Document]: + """Parse file and extract values from a specific column. + + Args: + file (Path): The path to the Excel file to read. + include_sheetname (bool): Whether to include the sheet name in the output. + sheet_name (Union[str, int, None]): The specific sheet to read from, + default is None which reads all sheets. + + Returns: + List[Document]: A list of`Document objects containing the + values from the specified column in the Excel file. + """ + + try: + import pandas as pd + except ImportError: + raise ImportError( + "install pandas using `pip3 install pandas` to use this loader" + ) + + if sheet_name is not None: + sheet_name = ( + [sheet_name] if not isinstance(sheet_name, list) else sheet_name + ) + + # clean up input + file = Path(file) + extra_info = extra_info or {} + + dfs = pd.read_excel(file, sheet_name=sheet_name, **self._pandas_config) + sheet_names = dfs.keys() + output = [] + + for idx, key in enumerate(sheet_names): + dfs[key] = dfs[key].dropna(axis=0, how="all") + dfs[key] = dfs[key].dropna(axis=0, how="all") + dfs[key] = dfs[key].astype("object") + dfs[key].fillna("", inplace=True) + + rows = dfs[key].values.astype(str).tolist() + content = self._row_joiner.join( + self._col_joiner.join(row).strip() for row in rows + ).strip() + if include_sheetname: + content = f"(Sheet {key} of file {file.name})\n{content}" + metadata = {"page_label": idx + 1, "sheet_name": key, **extra_info} + output.append(Document(text=content, metadata=metadata)) + + return output diff --git a/libs/kotaemon/kotaemon/loaders/html_loader.py b/libs/kotaemon/kotaemon/loaders/html_loader.py index c939c8a..555efe5 100644 --- a/libs/kotaemon/kotaemon/loaders/html_loader.py +++ b/libs/kotaemon/kotaemon/loaders/html_loader.py @@ -2,7 +2,8 @@ import email from pathlib import Path from typing import Optional -from llama_index.readers.base import BaseReader +from llama_index.core.readers.base import BaseReader +from theflow.settings import settings as flowsettings from kotaemon.base import Document @@ -78,6 +79,9 @@ class MhtmlReader(BaseReader): def __init__( self, + cache_dir: Optional[str] = getattr( + flowsettings, "KH_MARKDOWN_OUTPUT_DIR", None + ), open_encoding: Optional[str] = None, bs_kwargs: Optional[dict] = None, get_text_separator: str = "", @@ -86,6 +90,7 @@ class MhtmlReader(BaseReader): to pass to the BeautifulSoup object. Args: + cache_dir: Path for markdwon format. file_path: Path to file to load. open_encoding: The encoding to use when opening the file. bs_kwargs: Any kwargs to pass to the BeautifulSoup object. @@ -100,6 +105,7 @@ class MhtmlReader(BaseReader): "`pip install beautifulsoup4`" ) + self.cache_dir = cache_dir self.open_encoding = open_encoding if bs_kwargs is None: bs_kwargs = {"features": "lxml"} @@ -116,6 +122,7 @@ class MhtmlReader(BaseReader): extra_info = extra_info or {} metadata: dict = extra_info page = [] + file_name = Path(file_path) with open(file_path, "r", encoding=self.open_encoding) as f: message = email.message_from_string(f.read()) parts = message.get_payload() @@ -144,5 +151,11 @@ class MhtmlReader(BaseReader): text = "\n\n".join(lines) if text: page.append(text) + # save the page into markdown format + print(self.cache_dir) + if self.cache_dir is not None: + print(Path(self.cache_dir) / f"{file_name.stem}.md") + with open(Path(self.cache_dir) / f"{file_name.stem}.md", "w") as f: + f.write(page[0]) return [Document(text="\n\n".join(page), metadata=metadata)] diff --git a/libs/kotaemon/kotaemon/loaders/mathpix_loader.py b/libs/kotaemon/kotaemon/loaders/mathpix_loader.py index 19f1001..d07b069 100644 --- a/libs/kotaemon/kotaemon/loaders/mathpix_loader.py +++ b/libs/kotaemon/kotaemon/loaders/mathpix_loader.py @@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional import requests from langchain.utils import get_from_dict_or_env -from llama_index.readers.base import BaseReader +from llama_index.core.readers.base import BaseReader from kotaemon.base import Document diff --git a/libs/kotaemon/kotaemon/loaders/ocr_loader.py b/libs/kotaemon/kotaemon/loaders/ocr_loader.py index bb1ac5d..4e009de 100644 --- a/libs/kotaemon/kotaemon/loaders/ocr_loader.py +++ b/libs/kotaemon/kotaemon/loaders/ocr_loader.py @@ -5,8 +5,8 @@ from typing import List, Optional from uuid import uuid4 import requests -from llama_index.readers.base import BaseReader -from tenacity import after_log, retry, stop_after_attempt, wait_fixed, wait_random +from llama_index.core.readers.base import BaseReader +from tenacity import after_log, retry, stop_after_attempt, wait_exponential from kotaemon.base import Document @@ -19,13 +19,16 @@ DEFAULT_OCR_ENDPOINT = "http://127.0.0.1:8000/v2/ai/infer/" @retry( - stop=stop_after_attempt(3), - wait=wait_fixed(5) + wait_random(0, 2), - after=after_log(logger, logging.DEBUG), + stop=stop_after_attempt(6), + wait=wait_exponential(multiplier=20, exp_base=2, min=1, max=1000), + after=after_log(logger, logging.WARNING), ) -def tenacious_api_post(url, **kwargs): - resp = requests.post(url=url, **kwargs) - resp.raise_for_status() +def tenacious_api_post(url, file_path, table_only, **kwargs): + with file_path.open("rb") as content: + files = {"input": content} + data = {"job_id": uuid4(), "table_only": table_only} + resp = requests.post(url=url, files=files, data=data, **kwargs) + resp.raise_for_status() return resp @@ -71,18 +74,16 @@ class OCRReader(BaseReader): """ file_path = Path(file_path).resolve() - with file_path.open("rb") as content: - files = {"input": content} - data = {"job_id": uuid4(), "table_only": not self.use_ocr} - - # call the API from FullOCR endpoint - if "response_content" in kwargs: - # overriding response content if specified - ocr_results = kwargs["response_content"] - else: - # call original API - resp = tenacious_api_post(url=self.ocr_endpoint, files=files, data=data) - ocr_results = resp.json()["result"] + # call the API from FullOCR endpoint + if "response_content" in kwargs: + # overriding response content if specified + ocr_results = kwargs["response_content"] + else: + # call original API + resp = tenacious_api_post( + url=self.ocr_endpoint, file_path=file_path, table_only=not self.use_ocr + ) + ocr_results = resp.json()["result"] debug_path = kwargs.pop("debug_path", None) artifact_path = kwargs.pop("artifact_path", None) @@ -168,18 +169,16 @@ class ImageReader(BaseReader): """ file_path = Path(file_path).resolve() - with file_path.open("rb") as content: - files = {"input": content} - data = {"job_id": uuid4(), "table_only": False} - - # call the API from FullOCR endpoint - if "response_content" in kwargs: - # overriding response content if specified - ocr_results = kwargs["response_content"] - else: - # call original API - resp = tenacious_api_post(url=self.ocr_endpoint, files=files, data=data) - ocr_results = resp.json()["result"] + # call the API from FullOCR endpoint + if "response_content" in kwargs: + # overriding response content if specified + ocr_results = kwargs["response_content"] + else: + # call original API + resp = tenacious_api_post( + url=self.ocr_endpoint, file_path=file_path, table_only=False + ) + ocr_results = resp.json()["result"] extra_info = extra_info or {} result = [] diff --git a/libs/kotaemon/kotaemon/loaders/pdf_loader.py b/libs/kotaemon/kotaemon/loaders/pdf_loader.py new file mode 100644 index 0000000..ecba89d --- /dev/null +++ b/libs/kotaemon/kotaemon/loaders/pdf_loader.py @@ -0,0 +1,114 @@ +import base64 +from io import BytesIO +from pathlib import Path +from typing import Dict, List, Optional + +from fsspec import AbstractFileSystem +from llama_index.readers.file import PDFReader +from PIL import Image + +from kotaemon.base import Document + + +def get_page_thumbnails( + file_path: Path, pages: list[int], dpi: int = 80 +) -> List[Image.Image]: + """Get image thumbnails of the pages in the PDF file. + + Args: + file_path (Path): path to the image file + page_number (list[int]): list of page numbers to extract + + Returns: + list[Image.Image]: list of page thumbnails + """ + + img: Image.Image + suffix = file_path.suffix.lower() + assert suffix == ".pdf", "This function only supports PDF files." + try: + import fitz + except ImportError: + raise ImportError("Please install PyMuPDF: 'pip install PyMuPDF'") + + doc = fitz.open(file_path) + + output_imgs = [] + for page_number in pages: + page = doc.load_page(page_number) + pm = page.get_pixmap(dpi=dpi) + img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) + output_imgs.append(convert_image_to_base64(img)) + + return output_imgs + + +def convert_image_to_base64(img: Image.Image) -> str: + # convert the image into base64 + img_bytes = BytesIO() + img.save(img_bytes, format="PNG") + img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8") + img_base64 = f"data:image/png;base64,{img_base64}" + + return img_base64 + + +class PDFThumbnailReader(PDFReader): + """PDF parser with thumbnail for each page.""" + + def __init__(self) -> None: + """ + Initialize PDFReader. + """ + super().__init__(return_full_document=False) + + def load_data( + self, + file: Path, + extra_info: Optional[Dict] = None, + fs: Optional[AbstractFileSystem] = None, + ) -> List[Document]: + """Parse file.""" + documents = super().load_data(file, extra_info, fs) + + page_numbers_str = [] + filtered_docs = [] + is_int_page_number: dict[str, bool] = {} + + for doc in documents: + if "page_label" in doc.metadata: + page_num_str = doc.metadata["page_label"] + page_numbers_str.append(page_num_str) + try: + _ = int(page_num_str) + is_int_page_number[page_num_str] = True + filtered_docs.append(doc) + except ValueError: + is_int_page_number[page_num_str] = False + continue + + documents = filtered_docs + page_numbers = list(range(len(page_numbers_str))) + + print("Page numbers:", len(page_numbers)) + page_thumbnails = get_page_thumbnails(file, page_numbers) + + documents.extend( + [ + Document( + text="Page thumbnail", + metadata={ + "image_origin": page_thumbnail, + "type": "thumbnail", + "page_label": page_number, + **(extra_info if extra_info is not None else {}), + }, + ) + for (page_thumbnail, page_number) in zip( + page_thumbnails, page_numbers_str + ) + if is_int_page_number[page_number] + ] + ) + + return documents diff --git a/libs/kotaemon/kotaemon/loaders/txt_loader.py b/libs/kotaemon/kotaemon/loaders/txt_loader.py new file mode 100644 index 0000000..6484029 --- /dev/null +++ b/libs/kotaemon/kotaemon/loaders/txt_loader.py @@ -0,0 +1,22 @@ +from pathlib import Path +from typing import Optional + +from kotaemon.base import Document + +from .base import BaseReader + + +class TxtReader(BaseReader): + def run( + self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs + ) -> list[Document]: + return self.load_data(Path(file_path), extra_info=extra_info, **kwargs) + + def load_data( + self, file_path: Path, extra_info: Optional[dict] = None, **kwargs + ) -> list[Document]: + with open(file_path, "r") as f: + text = f.read() + + metadata = extra_info or {} + return [Document(text=text, metadata=metadata)] diff --git a/libs/kotaemon/kotaemon/loaders/unstructured_loader.py b/libs/kotaemon/kotaemon/loaders/unstructured_loader.py index 82f3255..e8f8e30 100644 --- a/libs/kotaemon/kotaemon/loaders/unstructured_loader.py +++ b/libs/kotaemon/kotaemon/loaders/unstructured_loader.py @@ -12,7 +12,7 @@ pip install xlrd from pathlib import Path from typing import Any, Dict, List, Optional -from llama_index.readers.base import BaseReader +from llama_index.core.readers.base import BaseReader from kotaemon.base import Document diff --git a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py index 1e219d6..e43059c 100644 --- a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py +++ b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py @@ -1,12 +1,19 @@ import json +import logging from typing import Any, List import requests from decouple import config +logger = logging.getLogger(__name__) + def generate_gpt4v( - endpoint: str, images: str | List[str], prompt: str, max_tokens: int = 512 + endpoint: str, + images: str | List[str], + prompt: str, + max_tokens: int = 512, + max_images: int = 10, ) -> str: # OpenAI API Key api_key = config("AZURE_OPENAI_API_KEY", default="") @@ -27,24 +34,36 @@ def generate_gpt4v( "type": "image_url", "image_url": {"url": image}, } - for image in images + for image in images[:max_images] ], } ], "max_tokens": max_tokens, + "temperature": 0, } + if len(images) > max_images: + print(f"Truncated to {max_images} images (original {len(images)} images") + + response = requests.post(endpoint, headers=headers, json=payload) + try: - response = requests.post(endpoint, headers=headers, json=payload) - output = response.json() - output = output["choices"][0]["message"]["content"] - except Exception: - output = "" + response.raise_for_status() + except Exception as e: + logger.exception(f"Error generating gpt4v: {response.text}; error {e}") + return "" + + output = response.json() + output = output["choices"][0]["message"]["content"] return output def stream_gpt4v( - endpoint: str, images: str | List[str], prompt: str, max_tokens: int = 512 + endpoint: str, + images: str | List[str], + prompt: str, + max_tokens: int = 512, + max_images: int = 10, ) -> Any: # OpenAI API Key api_key = config("AZURE_OPENAI_API_KEY", default="") @@ -65,17 +84,22 @@ def stream_gpt4v( "type": "image_url", "image_url": {"url": image}, } - for image in images + for image in images[:max_images] ], } ], "max_tokens": max_tokens, "stream": True, + "logprobs": True, + "temperature": 0, } + if len(images) > max_images: + print(f"Truncated to {max_images} images (original {len(images)} images") try: response = requests.post(endpoint, headers=headers, json=payload, stream=True) assert response.status_code == 200, str(response.content) output = "" + logprobs = [] for line in response.iter_lines(): if line: if line.startswith(b"\xef\xbb\xbf"): @@ -89,8 +113,23 @@ def stream_gpt4v( except Exception: break if len(line["choices"]): + if line["choices"][0].get("logprobs") is None: + _logprobs = [] + else: + _logprobs = [ + logprob["logprob"] + for logprob in line["choices"][0]["logprobs"].get( + "content", [] + ) + ] + output += line["choices"][0]["delta"].get("content", "") - yield line["choices"][0]["delta"].get("content", "") - except Exception: + logprobs += _logprobs + yield line["choices"][0]["delta"].get("content", ""), _logprobs + + except Exception as e: + logger.error(f"Error streaming gpt4v {e}") + logprobs = [] output = "" - return output + + return output, logprobs diff --git a/libs/kotaemon/kotaemon/storages/__init__.py b/libs/kotaemon/kotaemon/storages/__init__.py index 63cc6f6..d5f5c94 100644 --- a/libs/kotaemon/kotaemon/storages/__init__.py +++ b/libs/kotaemon/kotaemon/storages/__init__.py @@ -2,12 +2,14 @@ from .docstores import ( BaseDocumentStore, ElasticsearchDocumentStore, InMemoryDocumentStore, + LanceDBDocumentStore, SimpleFileDocumentStore, ) from .vectorstores import ( BaseVectorStore, ChromaVectorStore, InMemoryVectorStore, + LanceDBVectorStore, SimpleFileVectorStore, ) @@ -17,9 +19,11 @@ __all__ = [ "InMemoryDocumentStore", "ElasticsearchDocumentStore", "SimpleFileDocumentStore", + "LanceDBDocumentStore", # Vector stores "BaseVectorStore", "ChromaVectorStore", "InMemoryVectorStore", "SimpleFileVectorStore", + "LanceDBVectorStore", ] diff --git a/libs/kotaemon/kotaemon/storages/docstores/__init__.py b/libs/kotaemon/kotaemon/storages/docstores/__init__.py index 8c1da98..8bd0788 100644 --- a/libs/kotaemon/kotaemon/storages/docstores/__init__.py +++ b/libs/kotaemon/kotaemon/storages/docstores/__init__.py @@ -1,6 +1,7 @@ from .base import BaseDocumentStore from .elasticsearch import ElasticsearchDocumentStore from .in_memory import InMemoryDocumentStore +from .lancedb import LanceDBDocumentStore from .simple_file import SimpleFileDocumentStore __all__ = [ @@ -8,4 +9,5 @@ __all__ = [ "InMemoryDocumentStore", "ElasticsearchDocumentStore", "SimpleFileDocumentStore", + "LanceDBDocumentStore", ] diff --git a/libs/kotaemon/kotaemon/storages/docstores/base.py b/libs/kotaemon/kotaemon/storages/docstores/base.py index 243584b..4b6f397 100644 --- a/libs/kotaemon/kotaemon/storages/docstores/base.py +++ b/libs/kotaemon/kotaemon/storages/docstores/base.py @@ -41,6 +41,13 @@ class BaseDocumentStore(ABC): """Count number of documents""" ... + @abstractmethod + def query( + self, query: str, top_k: int = 10, doc_ids: Optional[list] = None + ) -> List[Document]: + """Search document store using search query""" + ... + @abstractmethod def delete(self, ids: Union[List[str], str]): """Delete document by id""" diff --git a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py index b58b211..27045cf 100644 --- a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py +++ b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py @@ -92,7 +92,10 @@ class ElasticsearchDocumentStore(BaseDocumentStore): "_id": doc_id, } requests.append(request) - self.es_bulk(self.client, requests) + + success, failed = self.es_bulk(self.client, requests) + print("Added/Updated documents to index", success) + print("Failed documents to index", failed) if refresh_indices: self.client.indices.refresh(index=self.index_name) @@ -131,16 +134,17 @@ class ElasticsearchDocumentStore(BaseDocumentStore): Returns: List[Document]: List of result documents """ - query_dict: dict = {"query": {"match": {"content": query}}, "size": top_k} - if doc_ids: - query_dict["query"]["match"]["_id"] = {"values": doc_ids} + query_dict: dict = {"match": {"content": query}} + if doc_ids is not None: + query_dict = {"bool": {"must": [query_dict, {"terms": {"_id": doc_ids}}]}} + query_dict = {"query": query_dict, "size": top_k} return self.query_raw(query_dict) def get(self, ids: Union[List[str], str]) -> List[Document]: """Get document by id""" if not isinstance(ids, list): ids = [ids] - query_dict = {"query": {"terms": {"_id": ids}}} + query_dict = {"query": {"terms": {"_id": ids}}, "size": 10000} return self.query_raw(query_dict) def count(self) -> int: diff --git a/libs/kotaemon/kotaemon/storages/docstores/in_memory.py b/libs/kotaemon/kotaemon/storages/docstores/in_memory.py index 3e2ee01..0df7a3e 100644 --- a/libs/kotaemon/kotaemon/storages/docstores/in_memory.py +++ b/libs/kotaemon/kotaemon/storages/docstores/in_memory.py @@ -81,6 +81,12 @@ class InMemoryDocumentStore(BaseDocumentStore): # Also, for portability, use SQLAlchemy for document store. self._store = {key: Document.from_dict(value) for key, value in store.items()} + def query( + self, query: str, top_k: int = 10, doc_ids: Optional[list] = None + ) -> List[Document]: + """Perform full-text search on document store""" + return [] + def __persist_flow__(self): return {} diff --git a/libs/kotaemon/kotaemon/storages/docstores/lancedb.py b/libs/kotaemon/kotaemon/storages/docstores/lancedb.py new file mode 100644 index 0000000..a0f69e3 --- /dev/null +++ b/libs/kotaemon/kotaemon/storages/docstores/lancedb.py @@ -0,0 +1,153 @@ +import json +from typing import List, Optional, Union + +from kotaemon.base import Document + +from .base import BaseDocumentStore + +MAX_DOCS_TO_GET = 10**4 + + +class LanceDBDocumentStore(BaseDocumentStore): + """LancdDB document store which support full-text search query""" + + def __init__(self, path: str = "lancedb", collection_name: str = "docstore"): + try: + import lancedb + except ImportError: + raise ImportError( + "Please install lancedb: 'pip install lancedb tanvity-py'" + ) + + self.db_uri = path + self.collection_name = collection_name + self.db_connection = lancedb.connect(self.db_uri) # type: ignore + + def add( + self, + docs: Union[Document, List[Document]], + ids: Optional[Union[List[str], str]] = None, + refresh_indices: bool = True, + **kwargs, + ): + """Load documents into lancedb storage.""" + doc_ids = ids if ids else [doc.doc_id for doc in docs] + data: list[dict[str, str]] | None = [ + { + "id": doc_id, + "text": doc.text, + "attributes": json.dumps(doc.metadata), + } + for doc_id, doc in zip(doc_ids, docs) + ] + + if self.collection_name not in self.db_connection.table_names(): + if data: + document_collection = self.db_connection.create_table( + self.collection_name, data=data, mode="overwrite" + ) + else: + # add data to existing table + document_collection = self.db_connection.open_table(self.collection_name) + if data: + document_collection.add(data) + + if refresh_indices: + document_collection.create_fts_index( + "text", + tokenizer_name="en_stem", + replace=True, + ) + + def query( + self, query: str, top_k: int = 10, doc_ids: Optional[list] = None + ) -> List[Document]: + if doc_ids: + id_filter = ", ".join([f"'{_id}'" for _id in doc_ids]) + query_filter = f"id in ({id_filter})" + else: + query_filter = None + try: + document_collection = self.db_connection.open_table(self.collection_name) + if query_filter: + docs = ( + document_collection.search(query, query_type="fts") + .where(query_filter, prefilter=True) + .limit(top_k) + .to_list() + ) + else: + docs = ( + document_collection.search(query, query_type="fts") + .limit(top_k) + .to_list() + ) + except (ValueError, FileNotFoundError): + docs = [] + return [ + Document( + id_=doc["id"], + text=doc["text"] if doc["text"] else "", + metadata=json.loads(doc["attributes"]), + ) + for doc in docs + ] + + def get(self, ids: Union[List[str], str]) -> List[Document]: + """Get document by id""" + if not isinstance(ids, list): + ids = [ids] + + id_filter = ", ".join([f"'{_id}'" for _id in ids]) + try: + document_collection = self.db_connection.open_table(self.collection_name) + query_filter = f"id in ({id_filter})" + docs = ( + document_collection.search() + .where(query_filter) + .limit(MAX_DOCS_TO_GET) + .to_list() + ) + except (ValueError, FileNotFoundError): + docs = [] + return [ + Document( + id_=doc["id"], + text=doc["text"] if doc["text"] else "", + metadata=json.loads(doc["attributes"]), + ) + for doc in docs + ] + + def delete(self, ids: Union[List[str], str], refresh_indices: bool = True): + """Delete document by id""" + if not isinstance(ids, list): + ids = [ids] + + document_collection = self.db_connection.open_table(self.collection_name) + id_filter = ", ".join([f"'{_id}'" for _id in ids]) + query_filter = f"id in ({id_filter})" + document_collection.delete(query_filter) + + if refresh_indices: + document_collection.create_fts_index( + "text", + tokenizer_name="en_stem", + replace=True, + ) + + def drop(self): + """Drop the document store""" + self.db_connection.drop_table(self.collection_name) + + def count(self) -> int: + raise NotImplementedError + + def get_all(self) -> List[Document]: + raise NotImplementedError + + def __persist_flow__(self): + return { + "db_uri": self.db_uri, + "collection_name": self.collection_name, + } diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/__init__.py b/libs/kotaemon/kotaemon/storages/vectorstores/__init__.py index 1606393..befc1e8 100644 --- a/libs/kotaemon/kotaemon/storages/vectorstores/__init__.py +++ b/libs/kotaemon/kotaemon/storages/vectorstores/__init__.py @@ -1,6 +1,7 @@ from .base import BaseVectorStore from .chroma import ChromaVectorStore from .in_memory import InMemoryVectorStore +from .lancedb import LanceDBVectorStore from .simple_file import SimpleFileVectorStore __all__ = [ @@ -8,4 +9,5 @@ __all__ = [ "ChromaVectorStore", "InMemoryVectorStore", "SimpleFileVectorStore", + "LanceDBVectorStore", ] diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/base.py b/libs/kotaemon/kotaemon/storages/vectorstores/base.py index 0e762a9..e6f2518 100644 --- a/libs/kotaemon/kotaemon/storages/vectorstores/base.py +++ b/libs/kotaemon/kotaemon/storages/vectorstores/base.py @@ -3,10 +3,10 @@ from __future__ import annotations from abc import ABC, abstractmethod from typing import Any, Optional -from llama_index.schema import NodeRelationship, RelatedNodeInfo -from llama_index.vector_stores.types import BasePydanticVectorStore -from llama_index.vector_stores.types import VectorStore as LIVectorStore -from llama_index.vector_stores.types import VectorStoreQuery +from llama_index.core.schema import NodeRelationship, RelatedNodeInfo +from llama_index.core.vector_stores.types import BasePydanticVectorStore +from llama_index.core.vector_stores.types import VectorStore as LIVectorStore +from llama_index.core.vector_stores.types import VectorStoreQuery from kotaemon.base import DocumentWithEmbedding diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py b/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py index a8ba7e1..9e01766 100644 --- a/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py +++ b/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py @@ -2,8 +2,8 @@ from typing import Any, Optional, Type import fsspec -from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore -from llama_index.vector_stores.simple import SimpleVectorStoreData +from llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore +from llama_index.core.vector_stores.simple import SimpleVectorStoreData from .base import LlamaIndexVectorStore diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/lancedb.py b/libs/kotaemon/kotaemon/storages/vectorstores/lancedb.py new file mode 100644 index 0000000..e0fa4c0 --- /dev/null +++ b/libs/kotaemon/kotaemon/storages/vectorstores/lancedb.py @@ -0,0 +1,87 @@ +from typing import Any, List, Type, cast + +from llama_index.core.vector_stores.types import MetadataFilters +from llama_index.vector_stores.lancedb import LanceDBVectorStore as LILanceDBVectorStore +from llama_index.vector_stores.lancedb import base as base_lancedb + +from .base import LlamaIndexVectorStore + +# custom monkey patch for LanceDB +original_to_lance_filter = base_lancedb._to_lance_filter + + +def custom_to_lance_filter( + standard_filters: MetadataFilters, metadata_keys: list +) -> Any: + for filter in standard_filters.filters: + if isinstance(filter.value, list): + # quote string values if filter are list of strings + if filter.value and isinstance(filter.value[0], str): + filter.value = [f"'{v}'" for v in filter.value] + + return original_to_lance_filter(standard_filters, metadata_keys) + + +# skip table existence check +LILanceDBVectorStore._table_exists = lambda _: False +base_lancedb._to_lance_filter = custom_to_lance_filter + + +class LanceDBVectorStore(LlamaIndexVectorStore): + _li_class: Type[LILanceDBVectorStore] = LILanceDBVectorStore + + def __init__( + self, + path: str = "./lancedb", + collection_name: str = "default", + **kwargs: Any, + ): + self._path = path + self._collection_name = collection_name + + try: + import lancedb + except ImportError: + raise ImportError( + "Please install lancedb: 'pip install lancedb tanvity-py'" + ) + + db_connection = lancedb.connect(path) # type: ignore + try: + table = db_connection.open_table(collection_name) + except FileNotFoundError: + table = None + + self._kwargs = kwargs + + # pass through for nice IDE support + super().__init__( + uri=path, + table_name=collection_name, + table=table, + **kwargs, + ) + self._client = cast(LILanceDBVectorStore, self._client) + self._client._metadata_keys = ["file_id"] + + def delete(self, ids: List[str], **kwargs): + """Delete vector embeddings from vector stores + + Args: + ids: List of ids of the embeddings to be deleted + kwargs: meant for vectorstore-specific parameters + """ + self._client.delete_nodes(ids) + + def drop(self): + """Delete entire collection from vector stores""" + self._client.client.drop_table(self.collection_name) + + def count(self) -> int: + raise NotImplementedError + + def __persist_flow__(self): + return { + "path": self._path, + "collection_name": self._collection_name, + } diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py b/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py index 043c66e..b96b67c 100644 --- a/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py +++ b/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py @@ -3,8 +3,8 @@ from pathlib import Path from typing import Any, Optional, Type import fsspec -from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore -from llama_index.vector_stores.simple import SimpleVectorStoreData +from llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore +from llama_index.core.vector_stores.simple import SimpleVectorStoreData from kotaemon.base import DocumentWithEmbedding diff --git a/libs/kotaemon/pyproject.toml b/libs/kotaemon/pyproject.toml index 101b91b..f1e1f83 100644 --- a/libs/kotaemon/pyproject.toml +++ b/libs/kotaemon/pyproject.toml @@ -26,9 +26,11 @@ dependencies = [ "langchain-openai>=0.1.4,<0.2.0", "openai>=1.23.6,<2", "theflow>=0.8.6,<0.9.0", - "llama-index==0.9.48", + "llama-index>=0.10.40,<0.11.0", + "llama-index-vector-stores-chroma>=0.1.9", + "llama-index-vector-stores-lancedb", "llama-hub>=0.0.79,<0.1.0", - "gradio>=4.26.0,<5", + "gradio>=4.31.0,<4.40", "openpyxl>=3.1.2,<3.2", "cookiecutter>=2.6.0,<2.7", "click>=8.1.7,<9", @@ -36,13 +38,9 @@ dependencies = [ "trogon>=0.5.0,<0.6", "tenacity>=8.2.3,<8.3", "python-dotenv>=1.0.1,<1.1", - "chromadb>=0.4.21,<0.5", - "unstructured==0.13.4", "pypdf>=4.2.0,<4.3", + "PyMuPDF>=1.23", "html2text==2024.2.26", - "fastembed==0.2.6", - "llama-cpp-python>=0.2.72,<0.3", - "azure-ai-documentintelligence", "cohere>=5.3.2,<5.4", ] readme = "README.md" @@ -63,11 +61,12 @@ adv = [ "duckduckgo-search>=6.1.0,<6.2", "googlesearch-python>=1.2.4,<1.3", "python-docx>=1.1.0,<1.2", - "unstructured[pdf]==0.13.4", - "sentence_transformers==2.7.0", "elasticsearch>=8.13.0,<8.14", - "pdfservices-sdk @ git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements", "beautifulsoup4>=4.12.3,<4.13", + "plotly", + "tabulate", + "fast_langdetect", + "azure-ai-documentintelligence", ] dev = [ "ipython", diff --git a/libs/kotaemon/tests/test_reader.py b/libs/kotaemon/tests/test_reader.py index 0cdfa51..24c2dc0 100644 --- a/libs/kotaemon/tests/test_reader.py +++ b/libs/kotaemon/tests/test_reader.py @@ -2,7 +2,7 @@ from pathlib import Path from unittest.mock import patch from langchain.schema import Document as LangchainDocument -from llama_index.node_parser import SimpleNodeParser +from llama_index.core.node_parser import SimpleNodeParser from kotaemon.base import Document from kotaemon.loaders import ( diff --git a/libs/kotaemon/tests/test_splitter.py b/libs/kotaemon/tests/test_splitter.py index 71e63ee..75e9bc3 100644 --- a/libs/kotaemon/tests/test_splitter.py +++ b/libs/kotaemon/tests/test_splitter.py @@ -1,4 +1,4 @@ -from llama_index.schema import NodeRelationship +from llama_index.core.schema import NodeRelationship from kotaemon.base import Document from kotaemon.indices.splitters import TokenSplitter diff --git a/libs/ktem/.gitignore b/libs/ktem/.gitignore index a3659c1..e95c0eb 100644 --- a/libs/ktem/.gitignore +++ b/libs/ktem/.gitignore @@ -1,2 +1,3 @@ 14-1_抜粋-1.pdf _example_.db +ktem/assets/prebuilt/ diff --git a/libs/ktem/ktem/app.py b/libs/ktem/ktem/app.py index 357b0d9..f4a4237 100644 --- a/libs/ktem/ktem/app.py +++ b/libs/ktem/ktem/app.py @@ -4,6 +4,7 @@ from typing import Optional import gradio as gr import pluggy from ktem import extension_protocol +from ktem.assets import PDFJS_PREBUILT_DIR from ktem.components import reasonings from ktem.exceptions import HookAlreadyDeclared, HookNotDeclared from ktem.index import IndexManager @@ -36,6 +37,7 @@ class BaseApp: def __init__(self): self.dev_mode = getattr(settings, "KH_MODE", "") == "dev" self.app_name = getattr(settings, "KH_APP_NAME", "Kotaemon") + self.app_version = getattr(settings, "KH_APP_VERSION", "") self.f_user_management = getattr(settings, "KH_FEATURE_USER_MANAGEMENT", False) self._theme = gr.Theme.from_hub("lone17/kotaemon") @@ -44,6 +46,13 @@ class BaseApp: self._css = fi.read() with (dir_assets / "js" / "main.js").open() as fi: self._js = fi.read() + self._js = self._js.replace("KH_APP_VERSION", self.app_version) + with (dir_assets / "js" / "pdf_viewer.js").open() as fi: + self._pdf_view_js = fi.read() + self._pdf_view_js = self._pdf_view_js.replace( + "PDFJS_PREBUILT_DIR", str(PDFJS_PREBUILT_DIR) + ) + self._favicon = str(dir_assets / "img" / "favicon.svg") self.default_settings = SettingGroup( @@ -156,11 +165,17 @@ class BaseApp: """Called when the app is created""" def make(self): + external_js = """ + + """ + with gr.Blocks( theme=self._theme, css=self._css, title=self.app_name, analytics_enabled=False, + js=self._js, + head=external_js, ) as demo: self.app = demo self.settings_state.render() @@ -173,6 +188,8 @@ class BaseApp: self.register_events() self.on_app_created() + demo.load(None, None, None, js=self._pdf_view_js) + return demo def declare_public_events(self): @@ -200,7 +217,6 @@ class BaseApp: def on_app_created(self): """Execute on app created callbacks""" - self.app.load(lambda: None, None, None, js=f"() => {{{self._js}}}") self._on_app_created() for value in self.__dict__.values(): if isinstance(value, BasePage): diff --git a/libs/ktem/ktem/assets/__init__.py b/libs/ktem/ktem/assets/__init__.py new file mode 100644 index 0000000..8c0df2a --- /dev/null +++ b/libs/ktem/ktem/assets/__init__.py @@ -0,0 +1,6 @@ +from pathlib import Path + +from decouple import config + +PDFJS_VERSION_DIST: str = config("PDFJS_VERSION_DIST", "pdfjs-4.0.379-dist") +PDFJS_PREBUILT_DIR: Path = Path(__file__).parent / "prebuilt" / PDFJS_VERSION_DIST diff --git a/libs/ktem/ktem/assets/css/main.css b/libs/ktem/ktem/assets/css/main.css index 36fc7ab..da83e83 100644 --- a/libs/ktem/ktem/assets/css/main.css +++ b/libs/ktem/ktem/assets/css/main.css @@ -147,6 +147,16 @@ mark { max-height: 42px; } +/* Hide sort buttons at gr.DataFrame */ +.sort-button { + display: none !important; +} + +/* Show sort button only in File list*/ +#file_list_view .sort-button { + display: block !important; +} + .scrollable { overflow-y: auto; } @@ -158,3 +168,58 @@ mark { .unset-overflow { overflow: unset !important; } + +/*body {*/ +/* margin: 0;*/ +/* font-family: Arial, sans-serif;*/ +/*}*/ + +pdfjs-viewer-element { + height: 100vh; + height: 100dvh; +} + +/* Modal styles */ + +.modal { + display: none; + position: relative; + z-index: 1; + left: 0; + top: 0; + width: 100%; + height: 100%; + overflow: auto; + background-color: rgb(0, 0, 0); + background-color: rgba(0, 0, 0, 0.4); +} + +.modal-header { + padding: 0px 10px +} + +.modal-content { + background-color: #fefefe; + height: 110%; + display: flex; + flex-direction: column; +} + +.close { + color: #aaa; + align-self: flex-end; + font-size: 28px; + font-weight: bold; +} + +.close:hover, +.close:focus { + color: black; + text-decoration: none; + cursor: pointer; +} + +.modal-body { + flex: 1; + overflow: auto; +} diff --git a/libs/ktem/ktem/assets/icons/delete.svg b/libs/ktem/ktem/assets/icons/delete.svg new file mode 100644 index 0000000..66b72c2 --- /dev/null +++ b/libs/ktem/ktem/assets/icons/delete.svg @@ -0,0 +1 @@ + diff --git a/libs/ktem/ktem/assets/icons/new.svg b/libs/ktem/ktem/assets/icons/new.svg new file mode 100644 index 0000000..bd3deb7 --- /dev/null +++ b/libs/ktem/ktem/assets/icons/new.svg @@ -0,0 +1 @@ + diff --git a/libs/ktem/ktem/assets/icons/rename.svg b/libs/ktem/ktem/assets/icons/rename.svg new file mode 100644 index 0000000..200a94b --- /dev/null +++ b/libs/ktem/ktem/assets/icons/rename.svg @@ -0,0 +1 @@ + diff --git a/libs/ktem/ktem/assets/icons/sidebar.svg b/libs/ktem/ktem/assets/icons/sidebar.svg new file mode 100644 index 0000000..41363a2 --- /dev/null +++ b/libs/ktem/ktem/assets/icons/sidebar.svg @@ -0,0 +1 @@ + diff --git a/libs/ktem/ktem/assets/js/main.js b/libs/ktem/ktem/assets/js/main.js index 9ce6933..a441b8a 100644 --- a/libs/ktem/ktem/assets/js/main.js +++ b/libs/ktem/ktem/assets/js/main.js @@ -1,30 +1,37 @@ -let main_parent = document.getElementById("chat-tab").parentNode; +function run() { + let main_parent = document.getElementById("chat-tab").parentNode; -main_parent.childNodes[0].classList.add("header-bar"); -main_parent.style = "padding: 0; margin: 0"; -main_parent.parentNode.style = "gap: 0"; -main_parent.parentNode.parentNode.style = "padding: 0"; + main_parent.childNodes[0].classList.add("header-bar"); + main_parent.style = "padding: 0; margin: 0"; + main_parent.parentNode.style = "gap: 0"; + main_parent.parentNode.parentNode.style = "padding: 0"; + const version_node = document.createElement("p"); + version_node.innerHTML = "version: KH_APP_VERSION"; + version_node.style = "position: fixed; top: 10px; right: 10px;"; + main_parent.appendChild(version_node); -// clpse -globalThis.clpseFn = (id) => { - var obj = document.getElementById('clpse-btn-' + id); - obj.classList.toggle("clpse-active"); - var content = obj.nextElementSibling; - if (content.style.display === "none") { - content.style.display = "block"; - } else { - content.style.display = "none"; + // clpse + globalThis.clpseFn = (id) => { + var obj = document.getElementById('clpse-btn-' + id); + obj.classList.toggle("clpse-active"); + var content = obj.nextElementSibling; + if (content.style.display === "none") { + content.style.display = "block"; + } else { + content.style.display = "none"; + } + } + + // store info in local storage + globalThis.setStorage = (key, value) => { + localStorage.setItem(key, value) + } + globalThis.getStorage = (key, value) => { + item = localStorage.getItem(key); + return item ? item : value; + } + globalThis.removeFromStorage = (key) => { + localStorage.removeItem(key) } } - -// store info in local storage -globalThis.setStorage = (key, value) => { - localStorage.setItem(key, JSON.stringify(value)) -} -globalThis.getStorage = (key, value) => { - return JSON.parse(localStorage.getItem(key)) -} -globalThis.removeFromStorage = (key) => { - localStorage.removeItem(key) -} diff --git a/libs/ktem/ktem/assets/js/pdf_viewer.js b/libs/ktem/ktem/assets/js/pdf_viewer.js new file mode 100644 index 0000000..10b32d9 --- /dev/null +++ b/libs/ktem/ktem/assets/js/pdf_viewer.js @@ -0,0 +1,99 @@ +function onBlockLoad () { + var infor_panel_scroll_pos = 0; + globalThis.createModal = () => { + // Create modal for the 1st time if it does not exist + var modal = document.getElementById("pdf-modal"); + var old_position = null; + var old_width = null; + var old_left = null; + var expanded = false; + + modal.id = "pdf-modal"; + modal.className = "modal"; + modal.innerHTML = ` + + `; + + modal.querySelector("#modal-close").onclick = function() { + modal.style.display = "none"; + var info_panel = document.getElementById("html-info-panel"); + if (info_panel) { + info_panel.style.display = "block"; + } + var scrollableDiv = document.getElementById("chat-info-panel"); + scrollableDiv.scrollTop = infor_panel_scroll_pos; + }; + + modal.querySelector("#modal-expand").onclick = function () { + expanded = !expanded; + if (expanded) { + old_position = modal.style.position; + old_left = modal.style.left; + old_width = modal.style.width; + + modal.style.position = "fixed"; + modal.style.width = "70%"; + modal.style.left = "15%"; + } else { + modal.style.position = old_position; + modal.style.width = old_width; + modal.style.left = old_left; + } + }; + } + + // Function to open modal and display PDF + globalThis.openModal = (event) => { + event.preventDefault(); + var target = event.currentTarget; + var src = target.getAttribute("data-src"); + var page = target.getAttribute("data-page"); + var search = target.getAttribute("data-search"); + var phrase = target.getAttribute("data-phrase"); + + var pdfViewer = document.getElementById("pdf-viewer"); + + current_src = pdfViewer.getAttribute("src"); + if (current_src != src) { + pdfViewer.setAttribute("src", src); + } + pdfViewer.setAttribute("phrase", phrase); + pdfViewer.setAttribute("search", search); + pdfViewer.setAttribute("page", page); + + var scrollableDiv = document.getElementById("chat-info-panel"); + infor_panel_scroll_pos = scrollableDiv.scrollTop; + + var modal = document.getElementById("pdf-modal") + modal.style.display = "block"; + var info_panel = document.getElementById("html-info-panel"); + if (info_panel) { + info_panel.style.display = "none"; + } + scrollableDiv.scrollTop = 0; + } + + globalThis.assignPdfOnclickEvent = () => { + // Get all links and attach click event + var links = document.getElementsByClassName("pdf-link"); + for (var i = 0; i < links.length; i++) { + links[i].onclick = openModal; + } + } + + var created_modal = document.getElementById("pdf-viewer"); + if (!created_modal) { + createModal(); + console.log("Created modal") + } + +} diff --git a/libs/ktem/ktem/assets/md/about.md b/libs/ktem/ktem/assets/md/about.md index 76f86df..8c69d96 100644 --- a/libs/ktem/ktem/assets/md/about.md +++ b/libs/ktem/ktem/assets/md/about.md @@ -8,3 +8,6 @@ An open-source tool for you to chat with your documents. [User Guide](https://cinnamon.github.io/kotaemon/) | [Developer Guide](https://cinnamon.github.io/kotaemon/development/) | [Feedback](https://github.com/Cinnamon/kotaemon/issues) + +[Dark Mode](?__theme=dark) +[Night Mode](?__theme=light) diff --git a/libs/ktem/ktem/assets/md/usage.md b/libs/ktem/ktem/assets/md/usage.md index e976f8c..58c0cdb 100644 --- a/libs/ktem/ktem/assets/md/usage.md +++ b/libs/ktem/ktem/assets/md/usage.md @@ -136,6 +136,6 @@ Now navigate back to the `Chat` tab. The chat tab is divided into 3 regions: files will be considered during chat. 2. Chat Panel - This is where you can chat with the chatbot. -3. Information panel +3. Information Panel - Supporting information such as the retrieved evidence and reference will be displayed here. diff --git a/libs/ktem/ktem/db/base_models.py b/libs/ktem/ktem/db/base_models.py index 36349fb..1379caf 100644 --- a/libs/ktem/ktem/db/base_models.py +++ b/libs/ktem/ktem/db/base_models.py @@ -1,9 +1,11 @@ import datetime import uuid from typing import Optional +from zoneinfo import ZoneInfo from sqlalchemy import JSON, Column from sqlmodel import Field, SQLModel +from theflow.settings import settings as flowsettings class BaseConversation(SQLModel): @@ -24,10 +26,14 @@ class BaseConversation(SQLModel): default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True ) name: str = Field( - default_factory=lambda: datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + default_factory=lambda: datetime.datetime.now( + ZoneInfo(getattr(flowsettings, "TIME_ZONE", "UTC")) + ).strftime("%Y-%m-%d %H:%M:%S") ) user: int = Field(default=0) # For now we only have one user + is_public: bool = Field(default=False) + # contains messages + current files data_source: dict = Field(default={}, sa_column=Column(JSON)) diff --git a/libs/ktem/ktem/embeddings/manager.py b/libs/ktem/ktem/embeddings/manager.py index dde04f4..4948aeb 100644 --- a/libs/ktem/ktem/embeddings/manager.py +++ b/libs/ktem/ktem/embeddings/manager.py @@ -36,7 +36,7 @@ class EmbeddingManager: def load(self): """Load the model pool from database""" - self._models, self._info, self._defaut = {}, {}, "" + self._models, self._info, self._default = {}, {}, "" with Session(engine) as sess: stmt = select(EmbeddingTable) items = sess.execute(stmt) diff --git a/libs/ktem/ktem/embeddings/ui.py b/libs/ktem/ktem/embeddings/ui.py index f8db9f1..1b2e549 100644 --- a/libs/ktem/ktem/embeddings/ui.py +++ b/libs/ktem/ktem/embeddings/ui.py @@ -115,7 +115,7 @@ class EmbeddingManagement(BasePage): """Called when the app is created""" self._app.app.load( self.list_embeddings, - inputs=None, + inputs=[], outputs=[self.emb_list], ) self._app.app.load( @@ -144,7 +144,7 @@ class EmbeddingManagement(BasePage): self.create_emb, inputs=[self.name, self.emb_choices, self.spec, self.default], outputs=None, - ).success(self.list_embeddings, inputs=None, outputs=[self.emb_list]).success( + ).success(self.list_embeddings, inputs=[], outputs=[self.emb_list]).success( lambda: ("", None, "", False, self.spec_desc_default), outputs=[ self.name, @@ -179,7 +179,7 @@ class EmbeddingManagement(BasePage): ) self.btn_delete.click( self.on_btn_delete_click, - inputs=None, + inputs=[], outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no], show_progress="hidden", ) @@ -190,7 +190,7 @@ class EmbeddingManagement(BasePage): show_progress="hidden", ).then( self.list_embeddings, - inputs=None, + inputs=[], outputs=[self.emb_list], ) self.btn_delete_no.click( @@ -199,7 +199,7 @@ class EmbeddingManagement(BasePage): gr.update(visible=False), gr.update(visible=False), ), - inputs=None, + inputs=[], outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no], show_progress="hidden", ) @@ -213,7 +213,7 @@ class EmbeddingManagement(BasePage): show_progress="hidden", ).then( self.list_embeddings, - inputs=None, + inputs=[], outputs=[self.emb_list], ) self.btn_close.click( diff --git a/libs/ktem/ktem/index/file/base.py b/libs/ktem/ktem/index/file/base.py index a489a8e..427a396 100644 --- a/libs/ktem/ktem/index/file/base.py +++ b/libs/ktem/ktem/index/file/base.py @@ -54,6 +54,7 @@ class BaseFileIndexIndexing(BaseComponent): DS = Param(help="The DocStore") FSPath = Param(help="The file storage path") user_id = Param(help="The user id") + private = Param(False, help="Whether this is private index") def run( self, file_paths: str | Path | list[str | Path], *args, **kwargs @@ -73,7 +74,9 @@ class BaseFileIndexIndexing(BaseComponent): def stream( self, file_paths: str | Path | list[str | Path], *args, **kwargs - ) -> Generator[Document, None, tuple[list[str | None], list[str | None]]]: + ) -> Generator[ + Document, None, tuple[list[str | None], list[str | None], list[Document]] + ]: """Stream the indexing pipeline Args: @@ -87,6 +90,7 @@ class BaseFileIndexIndexing(BaseComponent): None if the indexing failed for that file path) - the error messages (each error message corresponds to an input file path, or None if the indexing was successful for that file path) + - the indexed documents in form of list[Documents] """ raise NotImplementedError @@ -149,3 +153,7 @@ class BaseFileIndexIndexing(BaseComponent): msg: the message to log """ print(msg) + + def rebuild_index(self): + """Rebuild the index""" + raise NotImplementedError diff --git a/libs/ktem/ktem/index/file/graph/__init__.py b/libs/ktem/ktem/index/file/graph/__init__.py new file mode 100644 index 0000000..194be6f --- /dev/null +++ b/libs/ktem/ktem/index/file/graph/__init__.py @@ -0,0 +1,3 @@ +from .graph_index import GraphRAGIndex + +__all__ = ["GraphRAGIndex"] diff --git a/libs/ktem/ktem/index/file/graph/graph_index.py b/libs/ktem/ktem/index/file/graph/graph_index.py new file mode 100644 index 0000000..797fd3e --- /dev/null +++ b/libs/ktem/ktem/index/file/graph/graph_index.py @@ -0,0 +1,36 @@ +from typing import Any + +from ktem.index.file import FileIndex + +from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever +from .pipelines import GraphRAGIndexingPipeline, GraphRAGRetrieverPipeline + + +class GraphRAGIndex(FileIndex): + def _setup_indexing_cls(self): + self._indexing_pipeline_cls = GraphRAGIndexingPipeline + + def _setup_retriever_cls(self): + self._retriever_pipeline_cls = [GraphRAGRetrieverPipeline] + + def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing: + """Define the interface of the indexing pipeline""" + + obj = super().get_indexing_pipeline(settings, user_id) + # disable vectorstore for this kind of Index + obj.VS = None + + return obj + + def get_retriever_pipelines( + self, settings: dict, user_id: int, selected: Any = None + ) -> list["BaseFileIndexRetriever"]: + _, file_ids, _ = selected + retrievers = [ + GraphRAGRetrieverPipeline( + file_ids=file_ids, + Index=self._resources["Index"], + ) + ] + + return retrievers diff --git a/libs/ktem/ktem/index/file/graph/pipelines.py b/libs/ktem/ktem/index/file/graph/pipelines.py new file mode 100644 index 0000000..3016753 --- /dev/null +++ b/libs/ktem/ktem/index/file/graph/pipelines.py @@ -0,0 +1,359 @@ +import os +import subprocess +from pathlib import Path +from shutil import rmtree +from typing import Generator +from uuid import uuid4 + +import pandas as pd +import tiktoken +from ktem.db.models import engine +from sqlalchemy.orm import Session +from theflow.settings import settings + +from kotaemon.base import Document, Param, RetrievedDocument + +from ..pipelines import BaseFileIndexRetriever, IndexDocumentPipeline, IndexPipeline +from .visualize import create_knowledge_graph, visualize_graph + +try: + from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey + from graphrag.query.indexer_adapters import ( + read_indexer_entities, + read_indexer_relationships, + read_indexer_reports, + read_indexer_text_units, + ) + from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings + from graphrag.query.llm.oai.embedding import OpenAIEmbedding + from graphrag.query.llm.oai.typing import OpenaiApiType + from graphrag.query.structured_search.local_search.mixed_context import ( + LocalSearchMixedContext, + ) + from graphrag.vector_stores.lancedb import LanceDBVectorStore +except ImportError: + print( + ( + "GraphRAG dependencies not installed. " + "GraphRAG retriever pipeline will not work properly." + ) + ) + + +filestorage_path = Path(settings.KH_FILESTORAGE_PATH) / "graphrag" +filestorage_path.mkdir(parents=True, exist_ok=True) + + +def prepare_graph_index_path(graph_id: str): + root_path = Path(filestorage_path) / graph_id + input_path = root_path / "input" + + return root_path, input_path + + +class GraphRAGIndexingPipeline(IndexDocumentPipeline): + """GraphRAG specific indexing pipeline""" + + def route(self, file_path: Path) -> IndexPipeline: + """Simply disable the splitter (chunking) for this pipeline""" + pipeline = super().route(file_path) + pipeline.splitter = None + + return pipeline + + def store_file_id_with_graph_id(self, file_ids: list[str | None]): + # create new graph_id and assign them to doc_id in self.Index + # record in the index + graph_id = str(uuid4()) + with Session(engine) as session: + nodes = [] + for file_id in file_ids: + if not file_id: + continue + nodes.append( + self.Index( + source_id=file_id, + target_id=graph_id, + relation_type="graph", + ) + ) + + session.add_all(nodes) + session.commit() + + return graph_id + + def write_docs_to_files(self, graph_id: str, docs: list[Document]): + root_path, input_path = prepare_graph_index_path(graph_id) + input_path.mkdir(parents=True, exist_ok=True) + + for doc in docs: + if doc.metadata.get("type", "text") == "text": + with open(input_path / f"{doc.doc_id}.txt", "w") as f: + f.write(doc.text) + + return root_path + + def call_graphrag_index(self, input_path: str): + # Construct the command + command = [ + "python", + "-m", + "graphrag.index", + "--root", + input_path, + "--reporter", + "rich", + "--init", + ] + + # Run the command + yield Document( + channel="debug", + text="[GraphRAG] Creating index... This can take a long time.", + ) + result = subprocess.run(command, capture_output=True, text=True) + print(result.stdout) + command = command[:-1] + + # Run the command and stream stdout + with subprocess.Popen(command, stdout=subprocess.PIPE, text=True) as process: + if process.stdout: + for line in process.stdout: + yield Document(channel="debug", text=line) + + def stream( + self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs + ) -> Generator[ + Document, None, tuple[list[str | None], list[str | None], list[Document]] + ]: + file_ids, errors, all_docs = yield from super().stream( + file_paths, reindex=reindex, **kwargs + ) + + # assign graph_id to file_ids + graph_id = self.store_file_id_with_graph_id(file_ids) + # call GraphRAG index with docs and graph_id + graph_index_path = self.write_docs_to_files(graph_id, all_docs) + yield from self.call_graphrag_index(graph_index_path) + + return file_ids, errors, all_docs + + +class GraphRAGRetrieverPipeline(BaseFileIndexRetriever): + """GraphRAG specific retriever pipeline""" + + Index = Param(help="The SQLAlchemy Index table") + file_ids: list[str] = [] + + @classmethod + def get_user_settings(cls) -> dict: + return { + "search_type": { + "name": "Search type", + "value": "local", + "choices": ["local", "global"], + "component": "dropdown", + "info": "Whether to use local or global search in the graph.", + } + } + + def _build_graph_search(self): + assert ( + len(self.file_ids) <= 1 + ), "GraphRAG retriever only supports one file_id at a time" + + file_id = self.file_ids[0] + # retrieve the graph_id from the index + with Session(engine) as session: + graph_id = ( + session.query(self.Index.target_id) + .filter(self.Index.source_id == file_id) + .filter(self.Index.relation_type == "graph") + .first() + ) + graph_id = graph_id[0] if graph_id else None + assert graph_id, f"GraphRAG index not found for file_id: {file_id}" + + root_path, _ = prepare_graph_index_path(graph_id) + output_path = root_path / "output" + child_paths = sorted( + list(output_path.iterdir()), key=lambda x: x.stem, reverse=True + ) + + # get the latest child path + assert child_paths, "GraphRAG index output not found" + latest_child_path = Path(child_paths[0]) / "artifacts" + + INPUT_DIR = latest_child_path + LANCEDB_URI = str(INPUT_DIR / "lancedb") + COMMUNITY_REPORT_TABLE = "create_final_community_reports" + ENTITY_TABLE = "create_final_nodes" + ENTITY_EMBEDDING_TABLE = "create_final_entities" + RELATIONSHIP_TABLE = "create_final_relationships" + TEXT_UNIT_TABLE = "create_final_text_units" + COMMUNITY_LEVEL = 2 + + # read nodes table to get community and degree data + entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") + entity_embedding_df = pd.read_parquet( + f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet" + ) + entities = read_indexer_entities( + entity_df, entity_embedding_df, COMMUNITY_LEVEL + ) + + # load description embeddings to an in-memory lancedb vectorstore + # to connect to a remote db, specify url and port values. + description_embedding_store = LanceDBVectorStore( + collection_name="entity_description_embeddings", + ) + description_embedding_store.connect(db_uri=LANCEDB_URI) + if Path(LANCEDB_URI).is_dir(): + rmtree(LANCEDB_URI) + _ = store_entity_semantic_embeddings( + entities=entities, vectorstore=description_embedding_store + ) + print(f"Entity count: {len(entity_df)}") + + # Read relationships + relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet") + relationships = read_indexer_relationships(relationship_df) + + # Read community reports + report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet") + reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL) + + # Read text units + text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet") + text_units = read_indexer_text_units(text_unit_df) + + embedding_model = os.getenv("GRAPHRAG_EMBEDDING_MODEL") + text_embedder = OpenAIEmbedding( + api_key=os.getenv("OPENAI_API_KEY"), + api_base=None, + api_type=OpenaiApiType.OpenAI, + model=embedding_model, + deployment_name=embedding_model, + max_retries=20, + ) + token_encoder = tiktoken.get_encoding("cl100k_base") + + context_builder = LocalSearchMixedContext( + community_reports=reports, + text_units=text_units, + entities=entities, + relationships=relationships, + covariates=None, + entity_text_embeddings=description_embedding_store, + embedding_vectorstore_key=EntityVectorStoreKey.ID, + # if the vectorstore uses entity title as ids, + # set this to EntityVectorStoreKey.TITLE + text_embedder=text_embedder, + token_encoder=token_encoder, + ) + return context_builder + + def _to_document(self, header: str, context_text: str) -> RetrievedDocument: + return RetrievedDocument( + text=context_text, + metadata={ + "file_name": header, + "type": "table", + "llm_trulens_score": 1.0, + }, + score=1.0, + ) + + def format_context_records(self, context_records) -> list[RetrievedDocument]: + entities = context_records.get("entities", []) + relationships = context_records.get("relationships", []) + reports = context_records.get("reports", []) + sources = context_records.get("sources", []) + + docs = [] + + context: str = "" + + header = "Entities\n" + context = entities[["entity", "description"]].to_markdown(index=False) + docs.append(self._to_document(header, context)) + + header = "\nRelationships\n" + context = relationships[["source", "target", "description"]].to_markdown( + index=False + ) + docs.append(self._to_document(header, context)) + + header = "\nReports\n" + context = "" + for idx, row in reports.iterrows(): + title, content = row["title"], row["content"] + context += f"\n\n
Report {title}
\n" + context += content + docs.append(self._to_document(header, context)) + + header = "\nSources\n" + context = "" + for idx, row in sources.iterrows(): + title, content = row["id"], row["text"] + context += f"\n\n
Source #{title}
\n" + context += content + docs.append(self._to_document(header, context)) + + return docs + + def plot_graph(self, context_records): + relationships = context_records.get("relationships", []) + G = create_knowledge_graph(relationships) + plot = visualize_graph(G) + return plot + + def generate_relevant_scores(self, text, documents: list[RetrievedDocument]): + return documents + + def run( + self, + text: str, + ) -> list[RetrievedDocument]: + if not self.file_ids: + return [] + context_builder = self._build_graph_search() + + local_context_params = { + "text_unit_prop": 0.5, + "community_prop": 0.1, + "conversation_history_max_turns": 5, + "conversation_history_user_turns_only": True, + "top_k_mapped_entities": 10, + "top_k_relationships": 10, + "include_entity_rank": False, + "include_relationship_weight": False, + "include_community_rank": False, + "return_candidate_context": False, + "embedding_vectorstore_key": EntityVectorStoreKey.ID, + # set this to EntityVectorStoreKey.TITLE i + # f the vectorstore uses entity title as ids + "max_tokens": 12_000, + # change this based on the token limit you have on your model + # (if you are using a model with 8k limit, a good setting could be 5000) + } + + context_text, context_records = context_builder.build_context( + query=text, + conversation_history=None, + **local_context_params, + ) + documents = self.format_context_records(context_records) + plot = self.plot_graph(context_records) + + return documents + [ + RetrievedDocument( + text="", + metadata={ + "file_name": "GraphRAG", + "type": "plot", + "data": plot, + }, + ), + ] diff --git a/libs/ktem/ktem/index/file/graph/visualize.py b/libs/ktem/ktem/index/file/graph/visualize.py new file mode 100644 index 0000000..722dab7 --- /dev/null +++ b/libs/ktem/ktem/index/file/graph/visualize.py @@ -0,0 +1,102 @@ +import networkx as nx +import plotly.graph_objects as go +from plotly.io import to_json + + +def create_knowledge_graph(df): + """ + create nx Graph from DataFrame relations data + """ + G = nx.Graph() + for _, row in df.iterrows(): + source = row["source"] + target = row["target"] + attributes = {k: v for k, v in row.items() if k not in ["source", "target"]} + G.add_edge(source, target, **attributes) + + return G + + +def visualize_graph(G): + pos = nx.spring_layout(G, dim=2) + + edge_x = [] + edge_y = [] + edge_texts = nx.get_edge_attributes(G, "description") + to_display_edge_texts = [] + for edge in G.edges(): + x0, y0 = pos[edge[0]] + x1, y1 = pos[edge[1]] + edge_x.append(x0) + edge_x.append(x1) + edge_x.append(None) + edge_y.append(y0) + edge_y.append(y1) + edge_y.append(None) + to_display_edge_texts.append(edge_texts[edge]) + + edge_trace = go.Scatter( + x=edge_x, + y=edge_y, + text=to_display_edge_texts, + line=dict(width=0.5, color="#888"), + hoverinfo="text", + mode="lines", + ) + + node_x = [] + node_y = [] + for node in G.nodes(): + x, y = pos[node] + node_x.append(x) + node_y.append(y) + + node_adjacencies = [] + node_text = [] + node_size = [] + for node_id, adjacencies in enumerate(G.adjacency()): + degree = len(adjacencies[1]) + node_adjacencies.append(degree) + node_text.append(adjacencies[0]) + node_size.append(15 if degree < 5 else (30 if degree < 10 else 60)) + + node_trace = go.Scatter( + x=node_x, + y=node_y, + textfont=dict( + family="Courier New, monospace", + size=10, # Set the font size here + ), + textposition="top center", + mode="markers+text", + hoverinfo="text", + text=node_text, + marker=dict( + showscale=True, + # colorscale options + size=node_size, + colorscale="YlGnBu", + reversescale=True, + color=node_adjacencies, + colorbar=dict( + thickness=5, + xanchor="left", + titleside="right", + ), + line_width=2, + ), + ) + + fig = go.Figure( + data=[edge_trace, node_trace], + layout=go.Layout( + showlegend=False, + hovermode="closest", + margin=dict(b=20, l=5, r=5, t=40), + xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + ), + ) + fig.update_layout(autosize=True) + + return to_json(fig) diff --git a/libs/ktem/ktem/index/file/index.py b/libs/ktem/ktem/index/file/index.py index e3d4405..e94a8a7 100644 --- a/libs/ktem/ktem/index/file/index.py +++ b/libs/ktem/ktem/index/file/index.py @@ -4,8 +4,9 @@ from typing import Any, Optional, Type from ktem.components import filestorage_path, get_docstore, get_vectorstore from ktem.db.engine import engine from ktem.index.base import BaseIndex -from sqlalchemy import Column, DateTime, Integer, String +from sqlalchemy import JSON, Column, DateTime, Integer, String, UniqueConstraint from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.sql import func from theflow.settings import settings as flowsettings from theflow.utils.modules import import_dotted_string @@ -52,27 +53,60 @@ class FileIndex(BaseIndex): - File storage path """ Base = declarative_base() - Source = type( - "Source", - (Base,), - { - "__tablename__": f"index__{self.id}__source", - "id": Column( - String, - primary_key=True, - default=lambda: str(uuid.uuid4()), - unique=True, - ), - "name": Column(String, unique=True), - "path": Column(String), - "size": Column(Integer, default=0), - "text_length": Column(Integer, default=0), - "date_created": Column( - DateTime(timezone=True), server_default=func.now() - ), - "user": Column(Integer, default=1), - }, - ) + + if self.config.get("private", False): + Source = type( + "Source", + (Base,), + { + "__tablename__": f"index__{self.id}__source", + "__table_args__": ( + UniqueConstraint("name", "user", name="_name_user_uc"), + ), + "id": Column( + String, + primary_key=True, + default=lambda: str(uuid.uuid4()), + unique=True, + ), + "name": Column(String), + "path": Column(String), + "size": Column(Integer, default=0), + "date_created": Column( + DateTime(timezone=True), server_default=func.now() + ), + "user": Column(Integer, default=1), + "note": Column( + MutableDict.as_mutable(JSON), # type: ignore + default={}, + ), + }, + ) + else: + Source = type( + "Source", + (Base,), + { + "__tablename__": f"index__{self.id}__source", + "id": Column( + String, + primary_key=True, + default=lambda: str(uuid.uuid4()), + unique=True, + ), + "name": Column(String, unique=True), + "path": Column(String), + "size": Column(Integer, default=0), + "date_created": Column( + DateTime(timezone=True), server_default=func.now() + ), + "user": Column(Integer, default=1), + "note": Column( + MutableDict.as_mutable(JSON), # type: ignore + default={}, + ), + }, + ) Index = type( "IndexTable", (Base,), @@ -85,6 +119,7 @@ class FileIndex(BaseIndex): "user": Column(Integer, default=1), }, ) + self._vs: BaseVectorStore = get_vectorstore(f"index_{self.id}") self._docstore: BaseDocumentStore = get_docstore(f"index_{self.id}") self._fs_path = filestorage_path / f"index_{self.id}" @@ -358,8 +393,6 @@ class FileIndex(BaseIndex): for key, value in settings.items(): if key.startswith(prefix): stripped_settings[key[len(prefix) :]] = value - else: - stripped_settings[key] = value obj = self._indexing_pipeline_cls.get_pipeline(stripped_settings, self.config) obj.Source = self._resources["Source"] @@ -368,6 +401,7 @@ class FileIndex(BaseIndex): obj.DS = self._docstore obj.FSPath = self._fs_path obj.user_id = user_id + obj.private = self.config.get("private", False) return obj @@ -380,8 +414,6 @@ class FileIndex(BaseIndex): for key, value in settings.items(): if key.startswith(prefix): stripped_settings[key[len(prefix) :]] = value - else: - stripped_settings[key] = value # transform selected id selected_ids: Optional[list[str]] = self._selector_ui.get_selected_ids(selected) diff --git a/libs/ktem/ktem/index/file/knet/__init__.py b/libs/ktem/ktem/index/file/knet/__init__.py new file mode 100644 index 0000000..1ca8ef4 --- /dev/null +++ b/libs/ktem/ktem/index/file/knet/__init__.py @@ -0,0 +1,3 @@ +from .knet_index import KnowledgeNetworkFileIndex + +__all__ = ["KnowledgeNetworkFileIndex"] diff --git a/libs/ktem/ktem/index/file/knet/knet_index.py b/libs/ktem/ktem/index/file/knet/knet_index.py new file mode 100644 index 0000000..f98e7a0 --- /dev/null +++ b/libs/ktem/ktem/index/file/knet/knet_index.py @@ -0,0 +1,47 @@ +from typing import Any + +from ktem.index.file import FileIndex + +from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever +from .pipelines import KnetIndexingPipeline, KnetRetrievalPipeline + + +class KnowledgeNetworkFileIndex(FileIndex): + @classmethod + def get_admin_settings(cls): + admin_settings = super().get_admin_settings() + + # remove embedding from admin settings + # as we don't need it + admin_settings.pop("embedding") + return admin_settings + + def _setup_indexing_cls(self): + self._indexing_pipeline_cls = KnetIndexingPipeline + + def _setup_retriever_cls(self): + self._retriever_pipeline_cls = [KnetRetrievalPipeline] + + def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing: + """Define the interface of the indexing pipeline""" + + obj = super().get_indexing_pipeline(settings, user_id) + # disable vectorstore for this kind of Index + # also set the collection_name for API call + obj.VS = None + obj.collection_name = f"kh_index_{self.id}" + + return obj + + def get_retriever_pipelines( + self, settings: dict, user_id: int, selected: Any = None + ) -> list["BaseFileIndexRetriever"]: + retrievers = super().get_retriever_pipelines(settings, user_id, selected) + + for obj in retrievers: + # disable vectorstore for this kind of Index + # also set the collection_name for API call + obj.VS = None + obj.collection_name = f"kh_index_{self.id}" + + return retrievers diff --git a/libs/ktem/ktem/index/file/knet/pipelines.py b/libs/ktem/ktem/index/file/knet/pipelines.py new file mode 100644 index 0000000..9741e5a --- /dev/null +++ b/libs/ktem/ktem/index/file/knet/pipelines.py @@ -0,0 +1,169 @@ +import base64 +import json +import os +from pathlib import Path +from typing import Optional, Sequence + +import requests +import yaml + +from kotaemon.base import RetrievedDocument +from kotaemon.indices.rankings import BaseReranking, LLMReranking, LLMTrulensScoring + +from ..pipelines import BaseFileIndexRetriever, IndexDocumentPipeline, IndexPipeline + + +class KnetIndexingPipeline(IndexDocumentPipeline): + """Knowledge Network specific indexing pipeline""" + + # collection name for external indexing call + collection_name: str = "default" + + @classmethod + def get_user_settings(cls): + return { + "reader_mode": { + "name": "Index parser", + "value": "knowledge_network", + "choices": [ + ("Default (KN)", "knowledge_network"), + ], + "component": "dropdown", + }, + } + + def route(self, file_path: Path) -> IndexPipeline: + """Simply disable the splitter (chunking) for this pipeline""" + pipeline = super().route(file_path) + pipeline.splitter = None + # assign IndexPipeline collection name to parse to loader + pipeline.collection_name = self.collection_name + + return pipeline + + +class KnetRetrievalPipeline(BaseFileIndexRetriever): + DEFAULT_KNET_ENDPOINT: str = "http://127.0.0.1:8081/retrieve" + + collection_name: str = "default" + rerankers: Sequence[BaseReranking] = [LLMReranking.withx()] + + def encode_image_base64(self, image_path: str | Path) -> bytes | str: + """Convert image to base64""" + img_base64 = "data:image/png;base64,{}" + with open(image_path, "rb") as image_file: + return img_base64.format( + base64.b64encode(image_file.read()).decode("utf-8") + ) + + def run( + self, + text: str, + doc_ids: Optional[list[str]] = None, + *args, + **kwargs, + ) -> list[RetrievedDocument]: + """Retrieve document excerpts similar to the text + + Args: + text: the text to retrieve similar documents + doc_ids: list of document ids to constraint the retrieval + """ + print("searching in doc_ids", doc_ids) + if not doc_ids: + return [] + + docs: list[RetrievedDocument] = [] + params = { + "query": text, + "collection": self.collection_name, + "meta_filters": {"doc_name": doc_ids}, + } + params["meta_filters"] = json.dumps(params["meta_filters"]) + response = requests.get(self.DEFAULT_KNET_ENDPOINT, params=params) + metadata_translation = { + "TABLE": "table", + "FIGURE": "image", + } + + if response.status_code == 200: + # Load YAML content from the response content + chunks = yaml.safe_load(response.content) + for chunk in chunks: + metadata = chunk["node"]["metadata"] + metadata["type"] = metadata_translation.get( + metadata.pop("content_type", ""), "" + ) + metadata["file_name"] = metadata.pop("company_name", "") + + # load image from returned path + image_path = metadata.get("image_path", "") + if image_path and os.path.isfile(image_path): + base64_im = self.encode_image_base64(image_path) + # explicitly set document type + metadata["type"] = "image" + metadata["image_origin"] = base64_im + + docs.append( + RetrievedDocument(text=chunk["node"]["text"], metadata=metadata) + ) + else: + raise IOError(f"{response.status_code}: {response.text}") + + for reranker in self.rerankers: + docs = reranker(documents=docs, query=text) + + return docs + + @classmethod + def get_user_settings(cls) -> dict: + from ktem.llms.manager import llms + + try: + reranking_llm = llms.get_default_name() + reranking_llm_choices = list(llms.options().keys()) + except Exception: + reranking_llm = None + reranking_llm_choices = [] + + return { + "reranking_llm": { + "name": "LLM for scoring", + "value": reranking_llm, + "component": "dropdown", + "choices": reranking_llm_choices, + "special_type": "llm", + }, + "retrieval_mode": { + "name": "Retrieval mode", + "value": "hybrid", + "choices": ["vector", "text", "hybrid"], + "component": "dropdown", + }, + } + + @classmethod + def get_pipeline(cls, user_settings, index_settings, selected): + """Get retriever objects associated with the index + + Args: + settings: the settings of the app + kwargs: other arguments + """ + from ktem.llms.manager import llms + + retriever = cls( + rerankers=[LLMTrulensScoring()], + ) + + # hacky way to input doc_ids to retriever.run() call (through theflow) + kwargs = {".doc_ids": selected} + retriever.set_run(kwargs, temp=False) + + for reranker in retriever.rerankers: + if isinstance(reranker, LLMReranking): + reranker.llm = llms.get( + user_settings["reranking_llm"], llms.get_default() + ) + + return retriever diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py index 450bb3a..d664e7f 100644 --- a/libs/ktem/ktem/index/file/pipelines.py +++ b/libs/ktem/ktem/index/file/pipelines.py @@ -2,25 +2,29 @@ from __future__ import annotations import logging import shutil +import threading +import time import warnings from collections import defaultdict +from copy import deepcopy from functools import lru_cache from hashlib import sha256 from pathlib import Path -from typing import Generator, Optional +from typing import Generator, Optional, Sequence +import tiktoken from ktem.db.models import engine from ktem.embeddings.manager import embedding_models_manager from ktem.llms.manager import llms -from llama_index.readers.base import BaseReader -from llama_index.readers.file.base import default_file_metadata_func -from llama_index.vector_stores import ( +from llama_index.core.readers.base import BaseReader +from llama_index.core.readers.file.base import default_file_metadata_func +from llama_index.core.vector_stores import ( FilterCondition, FilterOperator, MetadataFilter, MetadataFilters, ) -from llama_index.vector_stores.types import VectorStoreQueryMode +from llama_index.core.vector_stores.types import VectorStoreQueryMode from sqlalchemy import delete, select from sqlalchemy.orm import Session from theflow.settings import settings @@ -29,8 +33,18 @@ from theflow.utils.modules import import_dotted_string from kotaemon.base import BaseComponent, Document, Node, Param, RetrievedDocument from kotaemon.embeddings import BaseEmbeddings from kotaemon.indices import VectorIndexing, VectorRetrieval -from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS -from kotaemon.indices.rankings import BaseReranking, LLMReranking +from kotaemon.indices.ingests.files import ( + KH_DEFAULT_FILE_EXTRACTORS, + adobe_reader, + azure_reader, + unstructured, +) +from kotaemon.indices.rankings import ( + BaseReranking, + CohereReranking, + LLMReranking, + LLMTrulensScoring, +) from kotaemon.indices.splitters import BaseSplitter, TokenSplitter from .base import BaseFileIndexIndexing, BaseFileIndexRetriever @@ -60,6 +74,9 @@ def dev_settings(): return file_extractors, chunk_size, chunk_overlap +_default_token_func = tiktoken.encoding_for_model("gpt-3.5-turbo").encode + + class DocumentRetrievalPipeline(BaseFileIndexRetriever): """Retrieve relevant document @@ -75,10 +92,13 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): """ embedding: BaseEmbeddings - reranker: BaseReranking = LLMReranking.withx() + rerankers: Sequence[BaseReranking] = [] + # use LLM to create relevant scores for displaying on UI + llm_scorer: LLMReranking | None = LLMReranking.withx() get_extra_table: bool = False mmr: bool = False top_k: int = 5 + retrieval_mode: str = "hybrid" @Node.auto(depends_on=["embedding", "VS", "DS"]) def vector_retrieval(self) -> VectorRetrieval: @@ -86,6 +106,8 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): embedding=self.embedding, vector_store=self.VS, doc_store=self.DS, + retrieval_mode=self.retrieval_mode, # type: ignore + rerankers=self.rerankers, ) def run( @@ -101,27 +123,30 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): text: the text to retrieve similar documents doc_ids: list of document ids to constraint the retrieval """ + print("searching in doc_ids", doc_ids) if not doc_ids: logger.info(f"Skip retrieval because of no selected files: {self}") return [] - retrieval_kwargs = {} + retrieval_kwargs: dict = {} with Session(engine) as session: stmt = select(self.Index).where( - self.Index.relation_type == "vector", + self.Index.relation_type == "document", self.Index.source_id.in_(doc_ids), ) results = session.execute(stmt) - vs_ids = [r[0].target_id for r in results.all()] + chunk_ids = [r[0].target_id for r in results.all()] + # do first round top_k extension + retrieval_kwargs["do_extend"] = True + retrieval_kwargs["scope"] = chunk_ids retrieval_kwargs["filters"] = MetadataFilters( filters=[ MetadataFilter( - key="doc_id", - value=vs_id, - operator=FilterOperator.EQ, + key="file_id", + value=doc_ids, + operator=FilterOperator.IN, ) - for vs_id in vs_ids ], condition=FilterCondition.OR, ) @@ -132,9 +157,10 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): retrieval_kwargs["mmr_threshold"] = 0.5 # rerank + s_time = time.time() + print(f"retrieval_kwargs: {retrieval_kwargs.keys()}") docs = self.vector_retrieval(text=text, top_k=self.top_k, **retrieval_kwargs) - if docs and self.get_from_path("reranker"): - docs = self.reranker(docs, query=text) + print("retrieval step took", time.time() - s_time) if not self.get_extra_table: return docs @@ -157,17 +183,30 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): for fn, pls in table_pages.items() ] if queries: - extra_docs = self.vector_retrieval( - text="", - top_k=50, - where=queries[0] if len(queries) == 1 else {"$or": queries}, - ) - for doc in extra_docs: - if doc.doc_id not in retrieved_id: - docs.append(doc) + try: + extra_docs = self.vector_retrieval( + text="", + top_k=50, + where=queries[0] if len(queries) == 1 else {"$or": queries}, + ) + for doc in extra_docs: + if doc.doc_id not in retrieved_id: + docs.append(doc) + except Exception: + print("Error retrieving additional tables") return docs + def generate_relevant_scores( + self, query: str, documents: list[RetrievedDocument] + ) -> list[RetrievedDocument]: + docs = ( + documents + if not self.llm_scorer + else self.llm_scorer(documents=documents, query=query) + ) + return docs + @classmethod def get_user_settings(cls) -> dict: from ktem.llms.manager import llms @@ -182,43 +221,44 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): return { "reranking_llm": { - "name": "LLM for reranking", + "name": "LLM for relevant scoring", "value": reranking_llm, "component": "dropdown", "choices": reranking_llm_choices, - }, - "separate_embedding": { - "name": "Use separate embedding", - "value": False, - "choices": [("Yes", True), ("No", False)], - "component": "dropdown", + "special_type": "llm", }, "num_retrieval": { "name": "Number of document chunks to retrieve", - "value": 3, + "value": 10, "component": "number", }, "retrieval_mode": { "name": "Retrieval mode", - "value": "vector", + "value": "hybrid", "choices": ["vector", "text", "hybrid"], "component": "dropdown", }, "prioritize_table": { "name": "Prioritize table", - "value": True, + "value": False, "choices": [True, False], "component": "checkbox", }, "mmr": { "name": "Use MMR", - "value": True, + "value": False, "choices": [True, False], "component": "checkbox", }, "use_reranking": { "name": "Use reranking", - "value": False, + "value": True, + "choices": [True, False], + "component": "checkbox", + }, + "use_llm_reranking": { + "name": "Use LLM relevant scoring", + "value": True, "choices": [True, False], "component": "checkbox", }, @@ -232,6 +272,8 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): settings: the settings of the app kwargs: other arguments """ + use_llm_reranking = user_settings.get("use_llm_reranking", False) + retriever = cls( get_extra_table=user_settings["prioritize_table"], top_k=user_settings["num_retrieval"], @@ -241,16 +283,26 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever): "embedding", embedding_models_manager.get_default_name() ) ], + retrieval_mode=user_settings["retrieval_mode"], + llm_scorer=(LLMTrulensScoring() if use_llm_reranking else None), + rerankers=[CohereReranking()], ) if not user_settings["use_reranking"]: - retriever.reranker = None # type: ignore - else: - retriever.reranker.llm = llms.get( + retriever.rerankers = [] # type: ignore + + for reranker in retriever.rerankers: + if isinstance(reranker, LLMReranking): + reranker.llm = llms.get( + user_settings["reranking_llm"], llms.get_default() + ) + + if retriever.llm_scorer: + retriever.llm_scorer.llm = llms.get( user_settings["reranking_llm"], llms.get_default() ) kwargs = {".doc_ids": selected} - retriever.set_run(kwargs, temp=True) + retriever.set_run(kwargs, temp=False) return retriever @@ -258,8 +310,8 @@ class IndexPipeline(BaseComponent): """Index a single file""" loader: BaseReader - splitter: BaseSplitter - chunk_batch_size: int = 50 + splitter: BaseSplitter | None + chunk_batch_size: int = 200 Source = Param(help="The SQLAlchemy Source table") Index = Param(help="The SQLAlchemy Index table") @@ -267,6 +319,9 @@ class IndexPipeline(BaseComponent): DS = Param(help="The DocStore") FSPath = Param(help="The file storage path") user_id = Param(help="The user id") + collection_name: str = "default" + private: bool = False + run_embedding_in_thread: bool = False embedding: BaseEmbeddings @Node.auto(depends_on=["Source", "Index", "embedding"]) @@ -276,31 +331,81 @@ class IndexPipeline(BaseComponent): ) def handle_docs(self, docs, file_id, file_name) -> Generator[Document, None, int]: + s_time = time.time() + text_docs = [] + non_text_docs = [] + thumbnail_docs = [] + + for doc in docs: + doc_type = doc.metadata.get("type", "text") + if doc_type == "text": + text_docs.append(doc) + elif doc_type == "thumbnail": + thumbnail_docs.append(doc) + else: + non_text_docs.append(doc) + + print(f"Got {len(thumbnail_docs)} page thumbnails") + page_label_to_thumbnail = { + doc.metadata["page_label"]: doc.doc_id for doc in thumbnail_docs + } + + if self.splitter: + all_chunks = self.splitter(text_docs) + else: + all_chunks = text_docs + + # add the thumbnails doc_id to the chunks + for chunk in all_chunks: + page_label = chunk.metadata.get("page_label", None) + if page_label and page_label in page_label_to_thumbnail: + chunk.metadata["thumbnail_doc_id"] = page_label_to_thumbnail[page_label] + + to_index_chunks = all_chunks + non_text_docs + thumbnail_docs + + # add to doc store chunks = [] n_chunks = 0 - for cidx, chunk in enumerate(self.splitter(docs)): - chunks.append(chunk) - if cidx % self.chunk_batch_size == 0: - self.handle_chunks(chunks, file_id) - n_chunks += len(chunks) - chunks = [] - yield Document( - f" => [{file_name}] Processed {n_chunks} chunks", channel="debug" - ) - - if chunks: - self.handle_chunks(chunks, file_id) + chunk_size = self.chunk_batch_size * 4 + for start_idx in range(0, len(to_index_chunks), chunk_size): + chunks = to_index_chunks[start_idx : start_idx + chunk_size] + self.handle_chunks_docstore(chunks, file_id) n_chunks += len(chunks) yield Document( - f" => [{file_name}] Processed {n_chunks} chunks", channel="debug" + f" => [{file_name}] Processed {n_chunks} chunks", + channel="debug", ) + def insert_chunks_to_vectorstore(): + chunks = [] + n_chunks = 0 + chunk_size = self.chunk_batch_size + for start_idx in range(0, len(to_index_chunks), chunk_size): + chunks = to_index_chunks[start_idx : start_idx + chunk_size] + self.handle_chunks_vectorstore(chunks, file_id) + n_chunks += len(chunks) + if self.VS: + yield Document( + f" => [{file_name}] Created embedding for {n_chunks} chunks", + channel="debug", + ) + + # run vector indexing in thread if specified + if self.run_embedding_in_thread: + print("Running embedding in thread") + threading.Thread( + target=lambda: list(insert_chunks_to_vectorstore()) + ).start() + else: + yield from insert_chunks_to_vectorstore() + + print("indexing step took", time.time() - s_time) return n_chunks - def handle_chunks(self, chunks, file_id): + def handle_chunks_docstore(self, chunks, file_id): """Run chunks""" # run embedding, add to both vector store and doc store - self.vector_indexing(chunks) + self.vector_indexing.add_to_docstore(chunks) # record in the index with Session(engine) as session: @@ -313,16 +418,30 @@ class IndexPipeline(BaseComponent): relation_type="document", ) ) - nodes.append( - self.Index( - source_id=file_id, - target_id=chunk.doc_id, - relation_type="vector", - ) - ) session.add_all(nodes) session.commit() + def handle_chunks_vectorstore(self, chunks, file_id): + """Run chunks""" + # run embedding, add to both vector store and doc store + self.vector_indexing.add_to_vectorstore(chunks) + self.vector_indexing.write_chunk_to_file(chunks) + + if self.VS: + # record in the index + with Session(engine) as session: + nodes = [] + for chunk in chunks: + nodes.append( + self.Index( + source_id=file_id, + target_id=chunk.doc_id, + relation_type="vector", + ) + ) + session.add_all(nodes) + session.commit() + def get_id_if_exists(self, file_path: Path) -> Optional[str]: """Check if the file is already indexed @@ -332,8 +451,16 @@ class IndexPipeline(BaseComponent): Returns: the file id if the file is indexed, otherwise None """ + if self.private: + cond: tuple = ( + self.Source.name == file_path.name, + self.Source.user == self.user_id, + ) + else: + cond = (self.Source.name == file_path.name,) + with Session(engine) as session: - stmt = select(self.Source).where(self.Source.name == file_path.name) + stmt = select(self.Source).where(*cond) item = session.execute(stmt).first() if item: return item[0].id @@ -369,20 +496,36 @@ class IndexPipeline(BaseComponent): def finish(self, file_id: str, file_path: Path) -> str: """Finish the indexing""" with Session(engine) as session: - stmt = select(self.Index.target_id).where(self.Index.source_id == file_id) - doc_ids = [_[0] for _ in session.execute(stmt)] - if doc_ids: + stmt = select(self.Source).where(self.Source.id == file_id) + result = session.execute(stmt).first() + if not result: + return file_id + + item = result[0] + + # populate the number of tokens + doc_ids_stmt = select(self.Index.target_id).where( + self.Index.source_id == file_id, + self.Index.relation_type == "document", + ) + doc_ids = [_[0] for _ in session.execute(doc_ids_stmt)] + token_func = self.get_token_func() + if doc_ids and token_func: docs = self.DS.get(doc_ids) - stmt = select(self.Source).where(self.Source.id == file_id) - result = session.execute(stmt).first() - if result: - item = result[0] - item.text_length = sum([len(doc.text) for doc in docs]) - session.add(item) - session.commit() + item.note["tokens"] = sum([len(token_func(doc.text)) for doc in docs]) + + # populate the note + item.note["loader"] = self.get_from_path("loader").__class__.__name__ + + session.add(item) + session.commit() return file_id + def get_token_func(self): + """Get the token function for calculating the number of tokens""" + return _default_token_func + def delete_file(self, file_id: str): """Delete a file from the db, including its chunks in docstore and vectorstore @@ -398,44 +541,24 @@ class IndexPipeline(BaseComponent): for each in index: if each[0].relation_type == "vector": vs_ids.append(each[0].target_id) - else: + elif each[0].relation_type == "document": ds_ids.append(each[0].target_id) session.delete(each[0]) session.commit() - self.VS.delete(vs_ids) - self.DS.delete(ds_ids) - def run(self, file_path: str | Path, reindex: bool, **kwargs) -> str: - """Index the file and return the file id""" - # check for duplication - file_path = Path(file_path).resolve() - file_id = self.get_id_if_exists(file_path) - if file_id is not None: - if not reindex: - raise ValueError( - f"File {file_path.name} already indexed. Please rerun with " - "reindex=True to force reindexing." - ) - else: - # remove the existing records - self.delete_file(file_id) - file_id = self.store_file(file_path) - else: - # add record to db - file_id = self.store_file(file_path) + if vs_ids and self.VS: + self.VS.delete(vs_ids) + if ds_ids: + self.DS.delete(ds_ids) - # extract the file - extra_info = default_file_metadata_func(str(file_path)) - docs = self.loader.load_data(file_path, extra_info=extra_info) - for _ in self.handle_docs(docs, file_id, file_path.name): - continue - self.finish(file_id, file_path) - - return file_id + def run( + self, file_path: str | Path, reindex: bool, **kwargs + ) -> tuple[str, list[Document]]: + raise NotImplementedError def stream( self, file_path: str | Path, reindex: bool, **kwargs - ) -> Generator[Document, None, str]: + ) -> Generator[Document, None, tuple[str, list[Document]]]: # check for duplication file_path = Path(file_path).resolve() file_id = self.get_id_if_exists(file_path) @@ -456,6 +579,9 @@ class IndexPipeline(BaseComponent): # extract the file extra_info = default_file_metadata_func(str(file_path)) + extra_info["file_id"] = file_id + extra_info["collection_name"] = self.collection_name + yield Document(f" => Converting {file_path.name} to text", channel="debug") docs = self.loader.load_data(file_path, extra_info=extra_info) yield Document(f" => Converted {file_path.name} to text", channel="debug") @@ -464,7 +590,7 @@ class IndexPipeline(BaseComponent): self.finish(file_id, file_path) yield Document(f" => Finished indexing {file_path.name}", channel="debug") - return file_id + return file_id, docs class IndexDocumentPipeline(BaseFileIndexIndexing): @@ -479,16 +605,54 @@ class IndexDocumentPipeline(BaseFileIndexIndexing): decide which pipeline should be used. """ + reader_mode: str = Param("default", help="The reader mode") embedding: BaseEmbeddings + run_embedding_in_thread: bool = False + + @Param.auto(depends_on="reader_mode") + def readers(self): + readers = deepcopy(KH_DEFAULT_FILE_EXTRACTORS) + print("reader_mode", self.reader_mode) + if self.reader_mode == "adobe": + readers[".pdf"] = adobe_reader + elif self.reader_mode == "azure-di": + readers[".pdf"] = azure_reader + + dev_readers, _, _ = dev_settings() + readers.update(dev_readers) + + return readers + + @classmethod + def get_user_settings(cls): + return { + "reader_mode": { + "name": "File loader", + "value": "default", + "choices": [ + ("Default (open-source)", "default"), + ("Adobe API (figure+table extraction)", "adobe"), + ( + "Azure AI Document Intelligence (figure+table extraction)", + "azure-di", + ), + ], + "component": "dropdown", + }, + } @classmethod def get_pipeline(cls, user_settings, index_settings) -> BaseFileIndexIndexing: + use_quick_index_mode = user_settings.get("quick_index_mode", False) + print("use_quick_index_mode", use_quick_index_mode) obj = cls( embedding=embedding_models_manager[ index_settings.get( "embedding", embedding_models_manager.get_default_name() ) - ] + ], + run_embedding_in_thread=use_quick_index_mode, + reader_mode=user_settings.get("reader_mode", "default"), ) return obj @@ -497,16 +661,17 @@ class IndexDocumentPipeline(BaseFileIndexIndexing): Can subclass this method for a more elaborate pipeline routing strategy. """ - readers, chunk_size, chunk_overlap = dev_settings() + _, chunk_size, chunk_overlap = dev_settings() - ext = file_path.suffix - reader = readers.get(ext, KH_DEFAULT_FILE_EXTRACTORS.get(ext, None)) + ext = file_path.suffix.lower() + reader = self.readers.get(ext, unstructured) if reader is None: raise NotImplementedError( f"No supported pipeline to index {file_path.name}. Please specify " "the suitable pipeline for this file type in the settings." ) + print("Using reader", reader) pipeline: IndexPipeline = IndexPipeline( loader=reader, splitter=TokenSplitter( @@ -515,50 +680,37 @@ class IndexDocumentPipeline(BaseFileIndexIndexing): separator="\n\n", backup_separators=["\n", ".", "\u200B"], ), + run_embedding_in_thread=self.run_embedding_in_thread, Source=self.Source, Index=self.Index, VS=self.VS, DS=self.DS, FSPath=self.FSPath, user_id=self.user_id, + private=self.private, embedding=self.embedding, ) return pipeline def run( - self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs + self, file_paths: str | Path | list[str | Path], *args, **kwargs ) -> tuple[list[str | None], list[str | None]]: - """Return a list of indexed file ids, and a list of errors""" - if not isinstance(file_paths, list): - file_paths = [file_paths] - - file_ids: list[str | None] = [] - errors: list[str | None] = [] - for file_path in file_paths: - file_path = Path(file_path) - - try: - pipeline = self.route(file_path) - file_id = pipeline.run(file_path, reindex=reindex, **kwargs) - file_ids.append(file_id) - errors.append(None) - except Exception as e: - logger.error(e) - file_ids.append(None) - errors.append(str(e)) - - return file_ids, errors + raise NotImplementedError def stream( self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs - ) -> Generator[Document, None, tuple[list[str | None], list[str | None]]]: + ) -> Generator[ + Document, None, tuple[list[str | None], list[str | None], list[Document]] + ]: """Return a list of indexed file ids, and a list of errors""" if not isinstance(file_paths, list): file_paths = [file_paths] file_ids: list[str | None] = [] errors: list[str | None] = [] + all_docs = [] + n_files = len(file_paths) for idx, file_path in enumerate(file_paths): file_path = Path(file_path) @@ -569,9 +721,10 @@ class IndexDocumentPipeline(BaseFileIndexIndexing): try: pipeline = self.route(file_path) - file_id = yield from pipeline.stream( + file_id, docs = yield from pipeline.stream( file_path, reindex=reindex, **kwargs ) + all_docs.extend(docs) file_ids.append(file_id) errors.append(None) yield Document( @@ -579,7 +732,7 @@ class IndexDocumentPipeline(BaseFileIndexIndexing): channel="index", ) except Exception as e: - logger.error(e) + logger.exception(e) file_ids.append(None) errors.append(str(e)) yield Document( @@ -591,4 +744,4 @@ class IndexDocumentPipeline(BaseFileIndexIndexing): channel="index", ) - return file_ids, errors + return file_ids, errors, all_docs diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py index d46e072..a639c43 100644 --- a/libs/ktem/ktem/index/file/ui.py +++ b/libs/ktem/ktem/index/file/ui.py @@ -1,5 +1,9 @@ +import html import os +import shutil import tempfile +import zipfile +from copy import deepcopy from pathlib import Path from typing import Generator @@ -9,8 +13,12 @@ from gradio.data_classes import FileData from gradio.utils import NamedString from ktem.app import BasePage from ktem.db.engine import engine +from ktem.utils.render import Render from sqlalchemy import select from sqlalchemy.orm import Session +from theflow.settings import settings as flowsettings + +DOWNLOAD_MESSAGE = "Press again to download" class File(gr.File): @@ -143,28 +151,57 @@ class FileIndexPage(BasePage): ) gr.Markdown("## File List") + self.filter = gr.Textbox( + value="", + label="Filter by name:", + info=( + "(1) Case-insensitive. " + "(2) Search with empty string to show all files." + ), + ) self.file_list_state = gr.State(value=None) self.file_list = gr.DataFrame( - headers=["id", "name", "size", "text_length", "date_created"], + headers=[ + "id", + "name", + "size", + "tokens", + "loader", + "date_created", + ], + column_widths=["0%", "50%", "8%", "7%", "15%", "20%"], interactive=False, + wrap=False, + elem_id="file_list_view", ) + with gr.Row(): + self.deselect_button = gr.Button( + "Close", + visible=False, + ) + self.delete_button = gr.Button( + "Delete", + variant="stop", + visible=False, + ) + with gr.Row(): + self.is_zipped_state = gr.State(value=False) + self.download_all_button = gr.DownloadButton( + "Download all files", + visible=True, + ) + self.download_single_button = gr.DownloadButton( + "Download file", + visible=False, + ) + with gr.Row() as self.selection_info: self.selected_file_id = gr.State(value=None) with gr.Column(scale=2): self.selected_panel = gr.Markdown(self.selected_panel_false) - self.deselect_button = gr.Button( - "Deselect", - visible=False, - elem_classes=["right-button"], - ) - self.delete_button = gr.Button( - "Delete", - variant="stop", - visible=False, - elem_classes=["right-button"], - ) + self.chunks = gr.HTML(visible=False) def on_subscribe_public_events(self): """Subscribe to the declared public event of the app""" @@ -189,12 +226,58 @@ class FileIndexPage(BasePage): ) def file_selected(self, file_id): + chunks = [] + if file_id is not None: + # get the chunks + + Index = self._index._resources["Index"] + with Session(engine) as session: + matches = session.execute( + select(Index).where( + Index.source_id == file_id, + Index.relation_type == "document", + ) + ) + doc_ids = [doc.target_id for (doc,) in matches] + docs = self._index._docstore.get(doc_ids) + docs = sorted( + docs, key=lambda x: x.metadata.get("page_label", float("inf")) + ) + + for idx, doc in enumerate(docs): + title = html.escape( + f"{doc.text[:50]}..." if len(doc.text) > 50 else doc.text + ) + doc_type = doc.metadata.get("type", "text") + content = "" + if doc_type == "text": + content = html.escape(doc.text) + elif doc_type == "table": + content = Render.table(doc.text) + elif doc_type == "image": + content = Render.image( + url=doc.metadata.get("image_origin", ""), text=doc.text + ) + + header_prefix = f"[{idx+1}/{len(docs)}]" + if doc.metadata.get("page_label"): + header_prefix += f" [Page {doc.metadata['page_label']}]" + + chunks.append( + Render.collapsible( + header=f"{header_prefix} {title}", + content=content, + ) + ) return ( + gr.update(value="".join(chunks), visible=file_id is not None), + gr.update(visible=file_id is not None), gr.update(visible=file_id is not None), gr.update(visible=file_id is not None), ) def delete_event(self, file_id): + file_name = "" with Session(engine) as session: source = session.execute( select(self._index._resources["Source"]).where( @@ -202,6 +285,7 @@ class FileIndexPage(BasePage): ) ).first() if source: + file_name = source[0].name session.delete(source[0]) vs_ids, ds_ids = [], [] @@ -213,15 +297,16 @@ class FileIndexPage(BasePage): for each in index: if each[0].relation_type == "vector": vs_ids.append(each[0].target_id) - else: + elif each[0].relation_type == "document": ds_ids.append(each[0].target_id) session.delete(each[0]) session.commit() - self._index._vs.delete(vs_ids) + if vs_ids: + self._index._vs.delete(vs_ids) self._index._docstore.delete(ds_ids) - gr.Info(f"File {file_id} has been deleted") + gr.Info(f"File {file_name} has been deleted") return None, self.selected_panel_false @@ -231,6 +316,57 @@ class FileIndexPage(BasePage): gr.update(visible=False), ) + def download_single_file(self, is_zipped_state, file_id): + with Session(engine) as session: + source = session.execute( + select(self._index._resources["Source"]).where( + self._index._resources["Source"].id == file_id + ) + ).first() + if source: + target_file_name = Path(source[0].name) + zip_files = [] + for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR): + if target_file_name.stem in file_name: + zip_files.append( + os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name) + ) + for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR): + if target_file_name.stem in file_name: + zip_files.append( + os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name) + ) + zip_file_path = os.path.join( + flowsettings.KH_ZIP_OUTPUT_DIR, target_file_name.stem + ) + with zipfile.ZipFile(f"{zip_file_path}.zip", "w") as zipMe: + for file in zip_files: + zipMe.write(file, arcname=os.path.basename(file)) + + if is_zipped_state: + new_button = gr.DownloadButton(label="Download", value=None) + else: + new_button = gr.DownloadButton( + label=DOWNLOAD_MESSAGE, value=f"{zip_file_path}.zip" + ) + + return not is_zipped_state, new_button + + def download_all_files(self): + zip_files = [] + for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR): + zip_files.append(os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name)) + for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR): + zip_files.append( + os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name) + ) + zip_file_path = os.path.join(flowsettings.KH_ZIP_OUTPUT_DIR, "all") + with zipfile.ZipFile(f"{zip_file_path}.zip", "w") as zipMe: + for file in zip_files: + arcname = Path(file) + zipMe.write(file, arcname=arcname.name) + return gr.DownloadButton(label=DOWNLOAD_MESSAGE, value=f"{zip_file_path}.zip") + def on_register_events(self): """Register all events to the app""" onDeleted = ( @@ -241,35 +377,61 @@ class FileIndexPage(BasePage): ) .then( fn=lambda: (None, self.selected_panel_false), - inputs=None, + inputs=[], outputs=[self.selected_file_id, self.selected_panel], show_progress="hidden", ) .then( fn=self.list_file, - inputs=[self._app.user_id], + inputs=[self._app.user_id, self.filter], outputs=[self.file_list_state, self.file_list], ) + .then( + fn=self.file_selected, + inputs=[self.selected_file_id], + outputs=[ + self.chunks, + self.deselect_button, + self.delete_button, + self.download_single_button, + ], + show_progress="hidden", + ) ) for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"): onDeleted = onDeleted.then(**event) self.deselect_button.click( fn=lambda: (None, self.selected_panel_false), - inputs=None, + inputs=[], outputs=[self.selected_file_id, self.selected_panel], show_progress="hidden", - ) - self.selected_panel.change( + ).then( fn=self.file_selected, inputs=[self.selected_file_id], outputs=[ + self.chunks, self.deselect_button, self.delete_button, + self.download_single_button, ], show_progress="hidden", ) + self.download_all_button.click( + fn=self.download_all_files, + inputs=[], + outputs=self.download_all_button, + show_progress="hidden", + ) + + self.download_single_button.click( + fn=self.download_single_file, + inputs=[self.is_zipped_state, self.selected_file_id], + outputs=[self.is_zipped_state, self.download_single_button], + show_progress="hidden", + ) + onUploaded = self.upload_button.click( fn=lambda: gr.update(visible=True), outputs=[self.upload_progress_panel], @@ -285,9 +447,63 @@ class FileIndexPage(BasePage): concurrency_limit=20, ) + try: + # quick file upload event registration of first Index only + if self._index.id == 1: + self.quick_upload_state = gr.State(value=[]) + print("Setting up quick upload event") + quickUploadedEvent = ( + self._app.chat_page.quick_file_upload.upload( + fn=lambda: gr.update( + value="Please wait for the indexing process " + "to complete before adding your question." + ), + outputs=self._app.chat_page.quick_file_upload_status, + ) + .then( + fn=self.index_fn_with_default_loaders, + inputs=[ + self._app.chat_page.quick_file_upload, + gr.State(value=False), + self._app.settings_state, + self._app.user_id, + ], + outputs=self.quick_upload_state, + ) + .success( + fn=lambda: [ + gr.update(value=None), + gr.update(value="select"), + ], + outputs=[ + self._app.chat_page.quick_file_upload, + self._app.chat_page._indices_input[0], + ], + ) + ) + for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"): + quickUploadedEvent = quickUploadedEvent.then(**event) + + quickUploadedEvent.success( + fn=lambda x: x, + inputs=self.quick_upload_state, + outputs=self._app.chat_page._indices_input[1], + ).then( + fn=lambda: gr.update(value="Indexing completed."), + outputs=self._app.chat_page.quick_file_upload_status, + ).then( + fn=self.list_file, + inputs=[self._app.user_id, self.filter], + outputs=[self.file_list_state, self.file_list], + concurrency_limit=20, + ) + + except Exception as e: + print(e) + uploadedEvent = onUploaded.then( fn=self.list_file, - inputs=[self._app.user_id], + inputs=[self._app.user_id, self.filter], outputs=[self.file_list_state, self.file_list], concurrency_limit=20, ) @@ -309,16 +525,64 @@ class FileIndexPage(BasePage): inputs=[self.file_list], outputs=[self.selected_file_id, self.selected_panel], show_progress="hidden", + ).then( + fn=self.file_selected, + inputs=[self.selected_file_id], + outputs=[ + self.chunks, + self.deselect_button, + self.delete_button, + self.download_single_button, + ], + show_progress="hidden", + ) + + self.filter.submit( + fn=self.list_file, + inputs=[self._app.user_id, self.filter], + outputs=[self.file_list_state, self.file_list], + show_progress="hidden", ) def _on_app_created(self): """Called when the app is created""" self._app.app.load( self.list_file, - inputs=[self._app.user_id], + inputs=[self._app.user_id, self.filter], outputs=[self.file_list_state, self.file_list], ) + def _may_extract_zip(self, files, zip_dir: str): + """Handle zip files""" + zip_files = [file for file in files if file.endswith(".zip")] + remaining_files = [file for file in files if not file.endswith("zip")] + + # Clean-up before unzip to remove old files + shutil.rmtree(zip_dir, ignore_errors=True) + + for zip_file in zip_files: + # Prepare new zip output dir, separated for each files + basename = os.path.splitext(os.path.basename(zip_file))[0] + zip_out_dir = os.path.join(zip_dir, basename) + os.makedirs(zip_out_dir, exist_ok=True) + with zipfile.ZipFile(zip_file, "r") as zip_ref: + zip_ref.extractall(zip_out_dir) + + n_zip_file = 0 + for root, dirs, files in os.walk(zip_dir): + for file in files: + ext = os.path.splitext(file)[1] + + # only allow supported file-types ( not zip ) + if ext not in [".zip"] and ext in self._supported_file_types: + remaining_files += [os.path.join(root, file)] + n_zip_file += 1 + + if n_zip_file > 0: + print(f"Update zip files: {n_zip_file}") + + return remaining_files + def index_fn( self, files, reindex: bool, settings, user_id ) -> Generator[tuple[str, str], None, None]: @@ -335,6 +599,8 @@ class FileIndexPage(BasePage): yield "", "" return + files = self._may_extract_zip(files, flowsettings.KH_ZIP_INPUT_DIR) + errors = self.validate(files) if errors: gr.Warning(", ".join(errors)) @@ -366,19 +632,61 @@ class FileIndexPage(BasePage): debugs.append(response.text) yield "\n".join(outputs), "\n".join(debugs) except StopIteration as e: - result, errors = e.value + results, index_errors, docs = e.value except Exception as e: debugs.append(f"Error: {e}") yield "\n".join(outputs), "\n".join(debugs) return - n_successes = len([_ for _ in result if _]) + n_successes = len([_ for _ in results if _]) if n_successes: gr.Info(f"Successfully index {n_successes} files") n_errors = len([_ for _ in errors if _]) if n_errors: gr.Warning(f"Have errors for {n_errors} files") + return results + + def index_fn_with_default_loaders( + self, files, reindex: bool, settings, user_id + ) -> list["str"]: + """Function for quick upload with default loaders + + Args: + files: the list of files to be uploaded + reindex: whether to reindex the files + selected_files: the list of files already selected + settings: the settings of the app + """ + print("Overriding with default loaders") + exist_ids = [] + to_process_files = [] + for str_file_path in files: + file_path = Path(str(str_file_path)) + exist_id = ( + self._index.get_indexing_pipeline(settings, user_id) + .route(file_path) + .get_id_if_exists(file_path) + ) + if exist_id: + exist_ids.append(exist_id) + else: + to_process_files.append(str_file_path) + + returned_ids = [] + settings = deepcopy(settings) + settings[f"index.options.{self._index.id}.reader_mode"] = "default" + settings[f"index.options.{self._index.id}.quick_index_mode"] = True + if to_process_files: + _iter = self.index_fn(to_process_files, reindex, settings, user_id) + try: + while next(_iter): + pass + except StopIteration as e: + returned_ids = e.value + + return exist_ids + returned_ids + def index_files_from_dir( self, folder_path, reindex, settings, user_id ) -> Generator[tuple[str, str], None, None]: @@ -452,7 +760,19 @@ class FileIndexPage(BasePage): yield from self.index_fn(files, reindex, settings, user_id) - def list_file(self, user_id): + def format_size_human_readable(self, num: float | str, suffix="B"): + try: + num = float(num) + except ValueError: + return num + + for unit in ("", "K", "M", "G", "T", "P", "E", "Z"): + if abs(num) < 1024.0: + return f"{num:3.0f}{unit}{suffix}" + num /= 1024.0 + return f"{num:.0f}Yi{suffix}" + + def list_file(self, user_id, name_pattern=""): if user_id is None: # not signed in return [], pd.DataFrame.from_records( @@ -461,7 +781,8 @@ class FileIndexPage(BasePage): "id": "-", "name": "-", "size": "-", - "text_length": "-", + "tokens": "-", + "loader": "-", "date_created": "-", } ] @@ -472,12 +793,17 @@ class FileIndexPage(BasePage): statement = select(Source) if self._index.config.get("private", False): statement = statement.where(Source.user == user_id) + if name_pattern: + statement = statement.where(Source.name.ilike(f"%{name_pattern}%")) results = [ { "id": each[0].id, "name": each[0].name, - "size": each[0].size, - "text_length": each[0].text_length, + "size": self.format_size_human_readable(each[0].size), + "tokens": self.format_size_human_readable( + each[0].note.get("tokens", "-"), suffix="" + ), + "loader": each[0].note.get("loader", "-"), "date_created": each[0].date_created.strftime("%Y-%m-%d %H:%M:%S"), } for each in session.execute(statement).all() @@ -492,12 +818,14 @@ class FileIndexPage(BasePage): "id": "-", "name": "-", "size": "-", - "text_length": "-", + "tokens": "-", + "loader": "-", "date_created": "-", } ] ) + print(f"{len(results)=}, {len(file_list)=}") return results, file_list def interact_file_list(self, list_files, ev: gr.SelectData): @@ -561,9 +889,8 @@ class FileSelector(BasePage): self.mode = gr.Radio( value=default_mode, choices=[ - ("Disabled", "disabled"), ("Search All", "all"), - ("Select", "select"), + ("Search In File(s)", "select"), ], container=False, ) diff --git a/libs/ktem/ktem/index/manager.py b/libs/ktem/ktem/index/manager.py index cfa6381..91e4270 100644 --- a/libs/ktem/ktem/index/manager.py +++ b/libs/ktem/ktem/index/manager.py @@ -123,8 +123,11 @@ class IndexManager: ) try: - # clean up - index.on_delete() + try: + # clean up + index.on_delete() + except Exception as e: + print(f"Error while deleting index {index.name}: {e}") # remove from database with Session(engine) as sess: diff --git a/libs/ktem/ktem/index/ui.py b/libs/ktem/ktem/index/ui.py index 9d6b7b2..47f7d9a 100644 --- a/libs/ktem/ktem/index/ui.py +++ b/libs/ktem/ktem/index/ui.py @@ -7,6 +7,21 @@ from ktem.utils.file import YAMLNoDateSafeLoader from .manager import IndexManager +# UGLY way to restart gradio server by updating atime +def update_current_module_atime(): + import os + import time + + # Define the file path + file_path = __file__ + print("Updating atime for", file_path) + + # Get the current time + current_time = time.time() + # Set the modified time (and access time) to the current time + os.utime(file_path, (current_time, current_time)) + + def format_description(cls): user_settings = cls.get_admin_settings() params_lines = ["| Name | Default | Description |", "| --- | --- | --- |"] @@ -29,7 +44,7 @@ class IndexManagement(BasePage): def on_building_ui(self): with gr.Tab(label="View"): self.index_list = gr.DataFrame( - headers=["ID", "Name", "Index Type"], + headers=["id", "name", "index type"], interactive=False, ) @@ -95,7 +110,7 @@ class IndexManagement(BasePage): """Called when the app is created""" self._app.app.load( self.list_indices, - inputs=None, + inputs=[], outputs=[self.index_list], ) self._app.app.load( @@ -117,7 +132,7 @@ class IndexManagement(BasePage): self.create_index, inputs=[self.name, self.index_type, self.spec], outputs=None, - ).success(self.list_indices, inputs=None, outputs=[self.index_list]).success( + ).success(self.list_indices, inputs=[], outputs=[self.index_list]).success( lambda: ("", None, "", self.spec_desc_default), outputs=[ self.name, @@ -125,6 +140,8 @@ class IndexManagement(BasePage): self.spec, self.spec_desc, ], + ).success( + update_current_module_atime ) self.index_list.select( self.select_index, @@ -152,7 +169,7 @@ class IndexManagement(BasePage): gr.update(visible=False), gr.update(visible=True), ), - inputs=None, + inputs=[], outputs=[ self.btn_edit_save, self.btn_delete, @@ -166,10 +183,8 @@ class IndexManagement(BasePage): inputs=[self.selected_index_id], outputs=[self.selected_index_id], show_progress="hidden", - ).then( - self.list_indices, - inputs=None, - outputs=[self.index_list], + ).then(self.list_indices, inputs=[], outputs=[self.index_list],).success( + update_current_module_atime ) self.btn_delete_no.click( lambda: ( @@ -178,7 +193,7 @@ class IndexManagement(BasePage): gr.update(visible=True), gr.update(visible=False), ), - inputs=None, + inputs=[], outputs=[ self.btn_edit_save, self.btn_delete, @@ -197,7 +212,7 @@ class IndexManagement(BasePage): show_progress="hidden", ).then( self.list_indices, - inputs=None, + inputs=[], outputs=[self.index_list], ) self.btn_close.click( @@ -245,16 +260,16 @@ class IndexManagement(BasePage): items = [] for item in self.manager.indices: record = {} - record["ID"] = item.id - record["Name"] = item.name - record["Index Type"] = item.__class__.__name__ + record["id"] = item.id + record["name"] = item.name + record["index type"] = item.__class__.__name__ items.append(record) if items: indices_list = pd.DataFrame.from_records(items) else: indices_list = pd.DataFrame.from_records( - [{"ID": "-", "Name": "-", "Index Type": "-"}] + [{"id": "-", "name": "-", "index type": "-"}] ) return indices_list @@ -268,7 +283,7 @@ class IndexManagement(BasePage): if not ev.selected: return -1 - return int(index_list["ID"][ev.index[0]]) + return int(index_list["id"][ev.index[0]]) def on_selected_index_change(self, selected_index_id: int): """Show the relevant index as user selects it on the UI diff --git a/libs/ktem/ktem/llms/manager.py b/libs/ktem/ktem/llms/manager.py index 6baa759..f4b204f 100644 --- a/libs/ktem/ktem/llms/manager.py +++ b/libs/ktem/ktem/llms/manager.py @@ -3,7 +3,7 @@ from typing import Optional, Type, overload from sqlalchemy import select from sqlalchemy.orm import Session from theflow.settings import settings as flowsettings -from theflow.utils.modules import deserialize +from theflow.utils.modules import deserialize, import_dotted_string from kotaemon.llms import ChatLLM @@ -38,7 +38,7 @@ class LLMManager: def load(self): """Load the model pool from database""" - self._models, self._info, self._defaut = {}, {}, "" + self._models, self._info, self._default = {}, {}, "" with Session(engine) as session: stmt = select(LLMTable) items = session.execute(stmt) @@ -54,14 +54,12 @@ class LLMManager: self._default = item.name def load_vendors(self): - from kotaemon.llms import ( - AzureChatOpenAI, - ChatOpenAI, - EndpointChatLLM, - LlamaCppChat, - ) + from kotaemon.llms import AzureChatOpenAI, ChatOpenAI, LlamaCppChat - self._vendors = [ChatOpenAI, AzureChatOpenAI, LlamaCppChat, EndpointChatLLM] + self._vendors = [ChatOpenAI, AzureChatOpenAI, LlamaCppChat] + + for extra_vendor in getattr(flowsettings, "KH_LLM_EXTRA_VENDORS", []): + self._vendors.append(import_dotted_string(extra_vendor, safe=False)) def __getitem__(self, key: str) -> ChatLLM: """Get model by name""" diff --git a/libs/ktem/ktem/llms/ui.py b/libs/ktem/ktem/llms/ui.py index 8b6f4ee..116982e 100644 --- a/libs/ktem/ktem/llms/ui.py +++ b/libs/ktem/ktem/llms/ui.py @@ -112,7 +112,7 @@ class LLMManagement(BasePage): """Called when the app is created""" self._app.app.load( self.list_llms, - inputs=None, + inputs=[], outputs=[self.llm_list], ) self._app.app.load( @@ -140,8 +140,8 @@ class LLMManagement(BasePage): self.btn_new.click( self.create_llm, inputs=[self.name, self.llm_choices, self.spec, self.default], - outputs=None, - ).success(self.list_llms, inputs=None, outputs=[self.llm_list]).success( + outputs=[], + ).success(self.list_llms, inputs=[], outputs=[self.llm_list]).success( lambda: ("", None, "", False, self.spec_desc_default), outputs=[ self.name, @@ -176,7 +176,7 @@ class LLMManagement(BasePage): ) self.btn_delete.click( self.on_btn_delete_click, - inputs=None, + inputs=[], outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no], show_progress="hidden", ) @@ -187,7 +187,7 @@ class LLMManagement(BasePage): show_progress="hidden", ).then( self.list_llms, - inputs=None, + inputs=[], outputs=[self.llm_list], ) self.btn_delete_no.click( @@ -196,7 +196,7 @@ class LLMManagement(BasePage): gr.update(visible=False), gr.update(visible=False), ), - inputs=None, + inputs=[], outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no], show_progress="hidden", ) @@ -210,7 +210,7 @@ class LLMManagement(BasePage): show_progress="hidden", ).then( self.list_llms, - inputs=None, + inputs=[], outputs=[self.llm_list], ) self.btn_close.click( diff --git a/libs/ktem/ktem/main.py b/libs/ktem/ktem/main.py index 6182e39..ba305e6 100644 --- a/libs/ktem/ktem/main.py +++ b/libs/ktem/ktem/main.py @@ -44,7 +44,7 @@ class App(BaseApp): if len(self.index_manager.indices) == 1: for index in self.index_manager.indices: with gr.Tab( - f"{index.name} Index", + f"{index.name}", elem_id="indices-tab", elem_classes=[ "fill-main-area-height", @@ -58,7 +58,7 @@ class App(BaseApp): setattr(self, f"_index_{index.id}", page) elif len(self.index_manager.indices) > 1: with gr.Tab( - "Indices", + "Files", elem_id="indices-tab", elem_classes=["fill-main-area-height", "scrollable", "indices-tab"], id="indices-tab", @@ -66,7 +66,7 @@ class App(BaseApp): ) as self._tabs["indices-tab"]: for index in self.index_manager.indices: with gr.Tab( - f"{index.name}", + f"{index.name} Collection", elem_id=f"{index.id}-tab", ) as self._tabs[f"{index.id}-tab"]: page = index.get_index_page_ui() diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py index d9826e0..a21c5f2 100644 --- a/libs/ktem/ktem/pages/chat/__init__.py +++ b/libs/ktem/ktem/pages/chat/__init__.py @@ -1,15 +1,25 @@ import asyncio +import csv from copy import deepcopy +from datetime import datetime +from pathlib import Path from typing import Optional import gradio as gr +from filelock import FileLock from ktem.app import BasePage from ktem.components import reasonings from ktem.db.models import Conversation, engine +from ktem.index.file.ui import File +from ktem.reasoning.prompt_optimization.suggest_conversation_name import ( + SuggestConvNamePipeline, +) +from plotly.io import from_json from sqlmodel import Session, select from theflow.settings import settings as flowsettings from kotaemon.base import Document +from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS from .chat_panel import ChatPanel from .chat_suggestion import ChatSuggestion @@ -17,23 +27,49 @@ from .common import STATE from .control import ConversationControl from .report import ReportIssue +DEFAULT_SETTING = "(default)" +INFO_PANEL_SCALES = {True: 8, False: 4} + + +pdfview_js = """ +function() { + // Get all links and attach click event + var links = document.getElementsByClassName("pdf-link"); + for (var i = 0; i < links.length; i++) { + links[i].onclick = openModal; + } +} +""" + class ChatPage(BasePage): def __init__(self, app): self._app = app self._indices_input = [] + self.on_building_ui() + self._reasoning_type = gr.State(value=None) + self._llm_type = gr.State(value=None) + self._conversation_renamed = gr.State(value=False) + self.info_panel_expanded = gr.State(value=True) def on_building_ui(self): with gr.Row(): - self.chat_state = gr.State(STATE) - with gr.Column(scale=1, elem_id="conv-settings-panel"): + self.state_chat = gr.State(STATE) + self.state_retrieval_history = gr.State([]) + self.state_chat_history = gr.State([]) + self.state_plot_history = gr.State([]) + self.state_settings = gr.State({}) + self.state_info_panel = gr.State("") + self.state_plot_panel = gr.State(None) + + with gr.Column(scale=1, elem_id="conv-settings-panel") as self.conv_column: self.chat_control = ConversationControl(self._app) if getattr(flowsettings, "KH_FEATURE_CHAT_SUGGESTION", False): self.chat_suggestion = ChatSuggestion(self._app) - for index in self._app.index_manager.indices: + for index_id, index in enumerate(self._app.index_manager.indices): index.selector = None index_ui = index.get_selector_component_ui() if not index_ui: @@ -41,7 +77,9 @@ class ChatPage(BasePage): continue index_ui.unrender() # need to rerender later within Accordion - with gr.Accordion(label=f"{index.name} Index", open=True): + with gr.Accordion( + label=f"{index.name} Collection", open=index_id < 1 + ): index_ui.render() gr_index = index_ui.as_gradio_component() if gr_index: @@ -60,14 +98,66 @@ class ChatPage(BasePage): self._indices_input.append(gr_index) setattr(self, f"_index_{index.id}", index_ui) + if len(self._app.index_manager.indices) > 0: + with gr.Accordion(label="Quick Upload") as _: + self.quick_file_upload = File( + file_types=list(KH_DEFAULT_FILE_EXTRACTORS.keys()), + file_count="multiple", + container=True, + show_label=False, + ) + self.quick_file_upload_status = gr.Markdown() + self.report_issue = ReportIssue(self._app) with gr.Column(scale=6, elem_id="chat-area"): self.chat_panel = ChatPanel(self._app) - with gr.Column(scale=3, elem_id="chat-info-panel"): + with gr.Row(): + with gr.Accordion(label="Chat settings", open=False): + # a quick switch for reasoning type option + with gr.Row(): + gr.HTML("Reasoning method") + gr.HTML("Model") + + with gr.Row(): + reasoning_type_values = [ + (DEFAULT_SETTING, DEFAULT_SETTING) + ] + self._app.default_settings.reasoning.settings[ + "use" + ].choices + self.reasoning_types = gr.Dropdown( + choices=reasoning_type_values, + value=DEFAULT_SETTING, + container=False, + show_label=False, + ) + self.model_types = gr.Dropdown( + choices=self._app.default_settings.reasoning.options[ + "simple" + ] + .settings["llm"] + .choices, + value="", + container=False, + show_label=False, + ) + + with gr.Column( + scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel" + ) as self.info_column: with gr.Accordion(label="Information panel", open=True): - self.info_panel = gr.HTML() + self.modal = gr.HTML("
") + self.plot_panel = gr.Plot(visible=False) + self.info_panel = gr.HTML(elem_id="html-info-panel") + + def _json_to_plot(self, json_dict: dict | None): + if json_dict: + plot = from_json(json_dict) + plot = gr.update(visible=True, value=plot) + else: + plot = gr.update(visible=False) + return plot def on_register_events(self): gr.on( @@ -98,27 +188,75 @@ class ChatPage(BasePage): self.chat_control.conversation_id, self.chat_panel.chatbot, self._app.settings_state, - self.chat_state, + self._reasoning_type, + self._llm_type, + self.state_chat, self._app.user_id, ] + self._indices_input, outputs=[ self.chat_panel.chatbot, self.info_panel, - self.chat_state, + self.plot_panel, + self.state_plot_panel, + self.state_chat, ], concurrency_limit=20, show_progress="minimal", + ).success( + fn=self.backup_original_info, + inputs=[ + self.chat_panel.chatbot, + self._app.settings_state, + self.info_panel, + self.state_chat_history, + ], + outputs=[ + self.state_chat_history, + self.state_settings, + self.state_info_panel, + ], ).then( - fn=self.update_data_source, + fn=self.persist_data_source, inputs=[ self.chat_control.conversation_id, + self._app.user_id, + self.info_panel, + self.state_plot_panel, + self.state_retrieval_history, + self.state_plot_history, self.chat_panel.chatbot, - self.chat_state, + self.state_chat, ] + self._indices_input, - outputs=None, + outputs=[ + self.state_retrieval_history, + self.state_plot_history, + ], concurrency_limit=20, + ).success( + fn=self.check_and_suggest_name_conv, + inputs=self.chat_panel.chatbot, + outputs=[ + self.chat_control.conversation_rn, + self._conversation_renamed, + ], + ).success( + self.chat_control.rename_conv, + inputs=[ + self.chat_control.conversation_id, + self.chat_control.conversation_rn, + self._conversation_renamed, + self._app.user_id, + ], + outputs=[ + self.chat_control.conversation, + self.chat_control.conversation, + self.chat_control.conversation_rn, + ], + show_progress="hidden", + ).then( + fn=None, inputs=None, outputs=None, js=pdfview_js ) self.chat_panel.regen_btn.click( @@ -127,33 +265,90 @@ class ChatPage(BasePage): self.chat_control.conversation_id, self.chat_panel.chatbot, self._app.settings_state, - self.chat_state, + self._reasoning_type, + self._llm_type, + self.state_chat, self._app.user_id, ] + self._indices_input, outputs=[ self.chat_panel.chatbot, self.info_panel, - self.chat_state, + self.plot_panel, + self.state_plot_panel, + self.state_chat, ], concurrency_limit=20, show_progress="minimal", ).then( - fn=self.update_data_source, + fn=self.persist_data_source, inputs=[ self.chat_control.conversation_id, + self._app.user_id, + self.info_panel, + self.state_plot_panel, + self.state_retrieval_history, + self.state_plot_history, self.chat_panel.chatbot, - self.chat_state, + self.state_chat, ] + self._indices_input, - outputs=None, + outputs=[ + self.state_retrieval_history, + self.state_plot_history, + ], concurrency_limit=20, + ).success( + fn=self.check_and_suggest_name_conv, + inputs=self.chat_panel.chatbot, + outputs=[ + self.chat_control.conversation_rn, + self._conversation_renamed, + ], + ).success( + self.chat_control.rename_conv, + inputs=[ + self.chat_control.conversation_id, + self.chat_control.conversation_rn, + self._conversation_renamed, + self._app.user_id, + ], + outputs=[ + self.chat_control.conversation, + self.chat_control.conversation, + self.chat_control.conversation_rn, + ], + show_progress="hidden", + ).then( + fn=None, inputs=None, outputs=None, js=pdfview_js + ) + + self.chat_control.btn_info_expand.click( + fn=lambda is_expanded: ( + gr.update(scale=INFO_PANEL_SCALES[is_expanded]), + not is_expanded, + ), + inputs=self.info_panel_expanded, + outputs=[self.info_column, self.info_panel_expanded], ) self.chat_panel.chatbot.like( fn=self.is_liked, inputs=[self.chat_control.conversation_id], outputs=None, + ).success( + self.save_log, + inputs=[ + self.chat_control.conversation_id, + self.chat_panel.chatbot, + self._app.settings_state, + self.info_panel, + self.state_chat_history, + self.state_settings, + self.state_info_panel, + gr.State(getattr(flowsettings, "KH_APP_DATA_DIR", "logs")), + ], + outputs=None, ) self.chat_control.btn_new.click( @@ -163,17 +358,25 @@ class ChatPage(BasePage): show_progress="hidden", ).then( self.chat_control.select_conv, - inputs=[self.chat_control.conversation], + inputs=[self.chat_control.conversation, self._app.user_id], outputs=[ self.chat_control.conversation_id, self.chat_control.conversation, self.chat_control.conversation_rn, self.chat_panel.chatbot, self.info_panel, - self.chat_state, + self.state_plot_panel, + self.state_retrieval_history, + self.state_plot_history, + self.chat_control.cb_is_public, + self.state_chat, ] + self._indices_input, show_progress="hidden", + ).then( + fn=self._json_to_plot, + inputs=self.state_plot_panel, + outputs=self.plot_panel, ) self.chat_control.btn_del.click( @@ -188,17 +391,25 @@ class ChatPage(BasePage): show_progress="hidden", ).then( self.chat_control.select_conv, - inputs=[self.chat_control.conversation], + inputs=[self.chat_control.conversation, self._app.user_id], outputs=[ self.chat_control.conversation_id, self.chat_control.conversation, self.chat_control.conversation_rn, self.chat_panel.chatbot, self.info_panel, - self.chat_state, + self.state_plot_panel, + self.state_retrieval_history, + self.state_plot_history, + self.chat_control.cb_is_public, + self.state_chat, ] + self._indices_input, show_progress="hidden", + ).then( + fn=self._json_to_plot, + inputs=self.state_plot_panel, + outputs=self.plot_panel, ).then( lambda: self.toggle_delete(""), outputs=[self.chat_control._new_delete, self.chat_control._delete_confirm], @@ -207,33 +418,80 @@ class ChatPage(BasePage): lambda: self.toggle_delete(""), outputs=[self.chat_control._new_delete, self.chat_control._delete_confirm], ) - self.chat_control.conversation_rn_btn.click( + self.chat_control.btn_conversation_rn.click( + lambda: gr.update(visible=True), + outputs=[ + self.chat_control.conversation_rn, + ], + ) + self.chat_control.conversation_rn.submit( self.chat_control.rename_conv, inputs=[ self.chat_control.conversation_id, self.chat_control.conversation_rn, + gr.State(value=True), self._app.user_id, ], - outputs=[self.chat_control.conversation, self.chat_control.conversation], + outputs=[ + self.chat_control.conversation, + self.chat_control.conversation, + self.chat_control.conversation_rn, + ], show_progress="hidden", ) self.chat_control.conversation.select( self.chat_control.select_conv, - inputs=[self.chat_control.conversation], + inputs=[self.chat_control.conversation, self._app.user_id], outputs=[ self.chat_control.conversation_id, self.chat_control.conversation, self.chat_control.conversation_rn, self.chat_panel.chatbot, self.info_panel, - self.chat_state, + self.state_plot_panel, + self.state_retrieval_history, + self.state_plot_history, + self.chat_control.cb_is_public, + self.state_chat, ] + self._indices_input, show_progress="hidden", + ).then( + fn=self._json_to_plot, + inputs=self.state_plot_panel, + outputs=self.plot_panel, ).then( lambda: self.toggle_delete(""), outputs=[self.chat_control._new_delete, self.chat_control._delete_confirm], + ).then( + fn=None, inputs=None, outputs=None, js=pdfview_js + ) + + # evidence display on message selection + self.chat_panel.chatbot.select( + self.message_selected, + inputs=[ + self.state_retrieval_history, + self.state_plot_history, + ], + outputs=[ + self.info_panel, + self.state_plot_panel, + ], + ).then( + fn=self._json_to_plot, + inputs=self.state_plot_panel, + outputs=self.plot_panel, + ).then( + fn=None, inputs=None, outputs=None, js=pdfview_js + ) + + self.chat_control.cb_is_public.change( + self.on_set_public_conversation, + inputs=[self.chat_control.cb_is_public, self.chat_control.conversation], + outputs=None, + show_progress="hidden", ) self.report_issue.report_btn.click( @@ -247,11 +505,26 @@ class ChatPage(BasePage): self._app.settings_state, self._app.user_id, self.info_panel, - self.chat_state, + self.state_chat, ] + self._indices_input, outputs=None, ) + self.reasoning_types.change( + self.reasoning_changed, + inputs=[self.reasoning_types], + outputs=[self._reasoning_type], + ) + self.model_types.change( + lambda x: x, + inputs=[self.model_types], + outputs=[self._llm_type], + ) + self.chat_control.conversation_id.change( + lambda: gr.update(visible=False), + outputs=self.plot_panel, + ) + if getattr(flowsettings, "KH_FEATURE_CHAT_SUGGESTION", False): self.chat_suggestion.example.select( self.chat_suggestion.select_example, @@ -291,6 +564,28 @@ class ChatPage(BasePage): else: return gr.update(visible=True), gr.update(visible=False) + def on_set_public_conversation(self, is_public, convo_id): + if not convo_id: + gr.Warning("No conversation selected") + return + + with Session(engine) as session: + statement = select(Conversation).where(Conversation.id == convo_id) + + result = session.exec(statement).one() + name = result.name + + if result.is_public != is_public: + # Only trigger updating when user + # select different value from the current + result.is_public = is_public + session.add(result) + session.commit() + + gr.Info( + f"Conversation: {name} is {'public' if is_public else 'private'}." + ) + def on_subscribe_public_events(self): if self._app.f_user_management: self._app.subscribe_event( @@ -306,25 +601,53 @@ class ChatPage(BasePage): self._app.subscribe_event( name="onSignOut", definition={ - "fn": lambda: self.chat_control.select_conv(""), + "fn": lambda: self.chat_control.select_conv("", None), "outputs": [ self.chat_control.conversation_id, self.chat_control.conversation, self.chat_control.conversation_rn, self.chat_panel.chatbot, self.info_panel, + self.state_plot_panel, + self.state_retrieval_history, + self.state_plot_history, + self.chat_control.cb_is_public, ] + self._indices_input, "show_progress": "hidden", }, ) - def update_data_source(self, convo_id, messages, state, *selecteds): + def persist_data_source( + self, + convo_id, + user_id, + retrieval_msg, + plot_data, + retrival_history, + plot_history, + messages, + state, + *selecteds, + ): """Update the data source""" if not convo_id: gr.Warning("No conversation selected") return + # if not regen, then append the new message + if not state["app"].get("regen", False): + retrival_history = retrival_history + [retrieval_msg] + plot_history = plot_history + [plot_data] + else: + if retrival_history: + print("Updating retrieval history (regen=True)") + retrival_history[-1] = retrieval_msg + plot_history[-1] = plot_data + + # reset regen state + state["app"]["regen"] = False + selecteds_ = {} for index in self._app.index_manager.indices: if index.selector is None: @@ -339,15 +662,29 @@ class ChatPage(BasePage): result = session.exec(statement).one() data_source = result.data_source + old_selecteds = data_source.get("selected", {}) + is_owner = result.user == user_id + + # Write down to db result.data_source = { - "selected": selecteds_, + "selected": selecteds_ if is_owner else old_selecteds, "messages": messages, + "retrieval_messages": retrival_history, + "plot_history": plot_history, "state": state, "likes": deepcopy(data_source.get("likes", [])), } session.add(result) session.commit() + return retrival_history, plot_history + + def reasoning_changed(self, reasoning_type): + if reasoning_type != DEFAULT_SETTING: + # override app settings state (temporary) + gr.Info("Reasoning type changed to `{}`".format(reasoning_type)) + return reasoning_type + def is_liked(self, convo_id, liked: gr.LikeData): with Session(engine) as session: statement = select(Conversation).where(Conversation.id == convo_id) @@ -362,7 +699,19 @@ class ChatPage(BasePage): session.add(result) session.commit() - def create_pipeline(self, settings: dict, state: dict, user_id: int, *selecteds): + def message_selected(self, retrieval_history, plot_history, msg: gr.SelectData): + index = msg.index[0] + return retrieval_history[index], plot_history[index] + + def create_pipeline( + self, + settings: dict, + session_reasoning_type: str, + session_llm: str, + state: dict, + user_id: int, + *selecteds, + ): """Create the pipeline from settings Args: @@ -374,10 +723,23 @@ class ChatPage(BasePage): Returns: - the pipeline objects """ - reasoning_mode = settings["reasoning.use"] + # override reasoning_mode by temporary chat page state + print("Session reasoning type", session_reasoning_type) + print("Session LLM", session_llm) + reasoning_mode = ( + settings["reasoning.use"] + if session_reasoning_type in (DEFAULT_SETTING, None) + else session_reasoning_type + ) reasoning_cls = reasonings[reasoning_mode] + print("Reasoning class", reasoning_cls) reasoning_id = reasoning_cls.get_info()["id"] + settings = deepcopy(settings) + llm_setting_key = f"reasoning.options.{reasoning_id}.llm" + if llm_setting_key in settings and session_llm not in (DEFAULT_SETTING, None): + settings[llm_setting_key] = session_llm + # get retrievers retrievers = [] for index in self._app.index_manager.indices: @@ -403,7 +765,15 @@ class ChatPage(BasePage): return pipeline, reasoning_state def chat_fn( - self, conversation_id, chat_history, settings, state, user_id, *selecteds + self, + conversation_id, + chat_history, + settings, + reasoning_type, + llm_type, + state, + user_id, + *selecteds, ): """Chat function""" chat_input = chat_history[-1][0] @@ -413,18 +783,23 @@ class ChatPage(BasePage): # construct the pipeline pipeline, reasoning_state = self.create_pipeline( - settings, state, user_id, *selecteds + settings, reasoning_type, llm_type, state, user_id, *selecteds ) + print("Reasoning state", reasoning_state) pipeline.set_output_queue(queue) - text, refs = "", "" + text, refs, plot, plot_gr = "", "", None, gr.update(visible=False) msg_placeholder = getattr( flowsettings, "KH_CHAT_MSG_PLACEHOLDER", "Thinking ..." ) print(msg_placeholder) - yield chat_history + [(chat_input, text or msg_placeholder)], refs, state - - len_ref = -1 # for logging purpose + yield ( + chat_history + [(chat_input, text or msg_placeholder)], + refs, + plot_gr, + plot, + state, + ) for response in pipeline.stream(chat_input, conversation_id, chat_history): @@ -446,22 +821,42 @@ class ChatPage(BasePage): else: refs += response.content - if len(refs) > len_ref: - print(f"Len refs: {len(refs)}") - len_ref = len(refs) + if response.channel == "plot": + plot = response.content + plot_gr = self._json_to_plot(plot) state[pipeline.get_info()["id"]] = reasoning_state["pipeline"] - yield chat_history + [(chat_input, text or msg_placeholder)], refs, state + yield ( + chat_history + [(chat_input, text or msg_placeholder)], + refs, + plot_gr, + plot, + state, + ) if not text: empty_msg = getattr( flowsettings, "KH_CHAT_EMPTY_MSG_PLACEHOLDER", "(Sorry, I don't know)" ) print(f"Generate nothing: {empty_msg}") - yield chat_history + [(chat_input, text or empty_msg)], refs, state + yield ( + chat_history + [(chat_input, text or empty_msg)], + refs, + plot_gr, + plot, + state, + ) def regen_fn( - self, conversation_id, chat_history, settings, state, user_id, *selecteds + self, + conversation_id, + chat_history, + settings, + reasoning_type, + llm_type, + state, + user_id, + *selecteds, ): """Regen function""" if not chat_history: @@ -470,11 +865,119 @@ class ChatPage(BasePage): return state["app"]["regen"] = True - for chat, refs, state in self.chat_fn( - conversation_id, chat_history, settings, state, user_id, *selecteds - ): - new_state = deepcopy(state) - new_state["app"]["regen"] = False - yield chat, refs, new_state + yield from self.chat_fn( + conversation_id, + chat_history, + settings, + reasoning_type, + llm_type, + state, + user_id, + *selecteds, + ) - state["app"]["regen"] = False + def check_and_suggest_name_conv(self, chat_history): + suggest_pipeline = SuggestConvNamePipeline() + new_name = gr.update() + renamed = False + + # check if this is a newly created conversation + if len(chat_history) == 1: + suggested_name = suggest_pipeline(chat_history).text[:40] + new_name = gr.update(value=suggested_name) + renamed = True + + return new_name, renamed + + def backup_original_info( + self, chat_history, settings, info_pannel, original_chat_history + ): + original_chat_history.append(chat_history[-1]) + return original_chat_history, settings, info_pannel + + def save_log( + self, + conversation_id, + chat_history, + settings, + info_panel, + original_chat_history, + original_settings, + original_info_panel, + log_dir, + ): + if not Path(log_dir).exists(): + Path(log_dir).mkdir(parents=True) + + lock = FileLock(Path(log_dir) / ".lock") + # get current date + today = datetime.now() + formatted_date = today.strftime("%d%m%Y_%H") + + with Session(engine) as session: + statement = select(Conversation).where(Conversation.id == conversation_id) + result = session.exec(statement).one() + + data_source = deepcopy(result.data_source) + likes = data_source.get("likes", []) + if not likes: + return + + feedback = likes[-1][-1] + message_index = likes[-1][0] + + current_message = chat_history[message_index[0]] + original_message = original_chat_history[message_index[0]] + is_original = all( + [ + current_item == original_item + for current_item, original_item in zip( + current_message, original_message + ) + ] + ) + + dataframe = [ + [ + conversation_id, + message_index, + current_message[0], + current_message[1], + chat_history, + settings, + info_panel, + feedback, + is_original, + original_message[1], + original_chat_history, + original_settings, + original_info_panel, + ] + ] + + with lock: + log_file = Path(log_dir) / f"{formatted_date}_log.csv" + is_log_file_exist = log_file.is_file() + with open(log_file, "a") as f: + writer = csv.writer(f) + # write headers + if not is_log_file_exist: + writer.writerow( + [ + "Conversation ID", + "Message ID", + "Question", + "Answer", + "Chat History", + "Settings", + "Evidences", + "Feedback", + "Original/ Rewritten", + "Original Answer", + "Original Chat History", + "Original Settings", + "Original Evidences", + ] + ) + + writer.writerows(dataframe) diff --git a/libs/ktem/ktem/pages/chat/control.py b/libs/ktem/ktem/pages/chat/control.py index 5e369b5..6fd47eb 100644 --- a/libs/ktem/ktem/pages/chat/control.py +++ b/libs/ktem/ktem/pages/chat/control.py @@ -1,13 +1,20 @@ import logging +import os import gradio as gr from ktem.app import BasePage -from ktem.db.models import Conversation, engine -from sqlmodel import Session, select +from ktem.db.models import Conversation, User, engine +from sqlmodel import Session, or_, select +import flowsettings + +from ...utils.conversation import sync_retrieval_n_message from .common import STATE logger = logging.getLogger(__name__) +ASSETS_DIR = "assets/icons" +if not os.path.isdir(ASSETS_DIR): + ASSETS_DIR = "libs/ktem/ktem/assets/icons" def is_conv_name_valid(name): @@ -35,14 +42,47 @@ class ConversationControl(BasePage): label="Chat sessions", choices=[], container=False, - filterable=False, + filterable=True, interactive=True, elem_classes=["unset-overflow"], ) with gr.Row() as self._new_delete: - self.btn_new = gr.Button(value="New", min_width=10, variant="primary") - self.btn_del = gr.Button(value="Delete", min_width=10, variant="stop") + self.btn_new = gr.Button( + value="", + icon=f"{ASSETS_DIR}/new.svg", + min_width=2, + scale=1, + size="sm", + elem_classes=["no-background", "body-text-color"], + ) + self.btn_del = gr.Button( + value="", + icon=f"{ASSETS_DIR}/delete.svg", + min_width=2, + scale=1, + size="sm", + elem_classes=["no-background", "body-text-color"], + ) + self.btn_conversation_rn = gr.Button( + value="", + icon=f"{ASSETS_DIR}/rename.svg", + min_width=2, + scale=1, + size="sm", + elem_classes=["no-background", "body-text-color"], + ) + self.btn_info_expand = gr.Button( + value="", + icon=f"{ASSETS_DIR}/sidebar.svg", + min_width=2, + scale=1, + size="sm", + elem_classes=["no-background", "body-text-color"], + ) + self.cb_is_public = gr.Checkbox( + value=False, label="Shared", min_width=10, scale=4 + ) with gr.Row(visible=False) as self._delete_confirm: self.btn_del_conf = gr.Button( @@ -54,28 +94,60 @@ class ConversationControl(BasePage): with gr.Row(): self.conversation_rn = gr.Text( + label="(Enter) to save", placeholder="Conversation name", - container=False, + container=True, scale=5, min_width=10, interactive=True, - ) - self.conversation_rn_btn = gr.Button( - value="Rename", - scale=1, - min_width=10, - elem_classes=["no-background", "body-text-color", "bold-text"], + visible=False, ) def load_chat_history(self, user_id): """Reload chat history""" + + # In case user are admin. They can also watch the + # public conversations + can_see_public: bool = False + with Session(engine) as session: + statement = select(User).where(User.id == user_id) + result = session.exec(statement).one_or_none() + + if result is not None: + if flowsettings.KH_USER_CAN_SEE_PUBLIC: + can_see_public = ( + result.username == flowsettings.KH_USER_CAN_SEE_PUBLIC + ) + else: + can_see_public = True + + print(f"User-id: {user_id}, can see public conversations: {can_see_public}") + options = [] with Session(engine) as session: - statement = ( - select(Conversation) - .where(Conversation.user == user_id) - .order_by(Conversation.date_created.desc()) # type: ignore - ) + # Define condition based on admin-role: + # - can_see: can see their conversations & public files + # - can_not_see: only see their conversations + if can_see_public: + statement = ( + select(Conversation) + .where( + or_( + Conversation.user == user_id, + Conversation.is_public, + ) + ) + .order_by( + Conversation.is_public.desc(), Conversation.date_created.desc() + ) # type: ignore + ) + else: + statement = ( + select(Conversation) + .where(Conversation.user == user_id) + .order_by(Conversation.date_created.desc()) # type: ignore + ) + results = session.exec(statement).all() for result in results: options.append((result.name, result.id)) @@ -129,7 +201,7 @@ class ConversationControl(BasePage): else: return None, gr.update(value=None, choices=[]) - def select_conv(self, conversation_id): + def select_conv(self, conversation_id, user_id): """Select the conversation""" with Session(engine) as session: statement = select(Conversation).where(Conversation.id == conversation_id) @@ -137,18 +209,46 @@ class ConversationControl(BasePage): result = session.exec(statement).one() id_ = result.id name = result.name - selected = result.data_source.get("selected", {}) + is_conv_public = result.is_public + + # disable file selection ids state if + # not the owner of the conversation + if user_id == result.user: + selected = result.data_source.get("selected", {}) + else: + selected = {} + chats = result.data_source.get("messages", []) - info_panel = "" + + retrieval_history: list[str] = result.data_source.get( + "retrieval_messages", [] + ) + plot_history: list[dict] = result.data_source.get("plot_history", []) + + # On initialization + # Ensure len of retrieval and messages are equal + retrieval_history = sync_retrieval_n_message(chats, retrieval_history) + + info_panel = ( + retrieval_history[-1] + if retrieval_history + else "
No evidence found.
" + ) + plot_data = plot_history[-1] if plot_history else None state = result.data_source.get("state", STATE) + except Exception as e: logger.warning(e) id_ = "" name = "" selected = {} chats = [] + retrieval_history = [] + plot_history = [] info_panel = "" + plot_data = None state = STATE + is_conv_public = False indices = [] for index in self._app.index_manager.indices: @@ -160,10 +260,29 @@ class ConversationControl(BasePage): if isinstance(index.selector, tuple): indices.extend(selected.get(str(index.id), index.default_selector)) - return id_, id_, name, chats, info_panel, state, *indices + return ( + id_, + id_, + name, + chats, + info_panel, + plot_data, + retrieval_history, + plot_history, + is_conv_public, + state, + *indices, + ) - def rename_conv(self, conversation_id, new_name, user_id): + def rename_conv(self, conversation_id, new_name, is_renamed, user_id): """Rename the conversation""" + if not is_renamed: + return ( + gr.update(), + conversation_id, + gr.update(visible=False), + ) + if user_id is None: gr.Warning("Please sign in first (Settings → User Settings)") return gr.update(), "" @@ -185,7 +304,12 @@ class ConversationControl(BasePage): session.commit() history = self.load_chat_history(user_id) - return gr.update(choices=history), conversation_id + gr.Info("Conversation renamed.") + return ( + gr.update(choices=history), + conversation_id, + gr.update(visible=False), + ) def _on_app_created(self): """Reload the conversation once the app is created""" diff --git a/libs/ktem/ktem/pages/chat/report.py b/libs/ktem/ktem/pages/chat/report.py index 7a4fe6a..f404743 100644 --- a/libs/ktem/ktem/pages/chat/report.py +++ b/libs/ktem/ktem/pages/chat/report.py @@ -12,7 +12,7 @@ class ReportIssue(BasePage): self.on_building_ui() def on_building_ui(self): - with gr.Accordion(label="Report", open=False): + with gr.Accordion(label="Feedback", open=False): self.correctness = gr.Radio( choices=[ ("The answer is correct", "correct"), diff --git a/libs/ktem/ktem/pages/help.py b/libs/ktem/ktem/pages/help.py index fd750ef..e3438d1 100644 --- a/libs/ktem/ktem/pages/help.py +++ b/libs/ktem/ktem/pages/help.py @@ -9,6 +9,7 @@ from theflow.settings import settings def get_remote_doc(url: str) -> str: try: res = requests.get(url) + res.raise_for_status() return res.text except Exception as e: print(f"Failed to fetch document from {url}: {e}") diff --git a/libs/ktem/ktem/pages/login.py b/libs/ktem/ktem/pages/login.py index 50f505f..9dc4839 100644 --- a/libs/ktem/ktem/pages/login.py +++ b/libs/ktem/ktem/pages/login.py @@ -7,9 +7,9 @@ from sqlmodel import Session, select fetch_creds = """ function() { - const username = getStorage('username') - const password = getStorage('password') - return [username, password]; + const username = getStorage('username', '') + const password = getStorage('password', '') + return [username, password, null]; } """ diff --git a/libs/ktem/ktem/pages/resources/__init__.py b/libs/ktem/ktem/pages/resources/__init__.py index c423d3f..aa606c9 100644 --- a/libs/ktem/ktem/pages/resources/__init__.py +++ b/libs/ktem/ktem/pages/resources/__init__.py @@ -15,18 +15,18 @@ class ResourcesTab(BasePage): self.on_building_ui() def on_building_ui(self): - if self._app.f_user_management: - with gr.Tab("User Management", visible=False) as self.user_management_tab: - self.user_management = UserManagement(self._app) + with gr.Tab("Index Collections") as self.index_management_tab: + self.index_management = IndexManagement(self._app) with gr.Tab("LLMs") as self.llm_management_tab: self.llm_management = LLMManagement(self._app) - with gr.Tab("Embedding Models") as self.emb_management_tab: + with gr.Tab("Embeddings") as self.emb_management_tab: self.emb_management = EmbeddingManagement(self._app) - with gr.Tab("Index Management") as self.index_management_tab: - self.index_management = IndexManagement(self._app) + if self._app.f_user_management: + with gr.Tab("Users", visible=False) as self.user_management_tab: + self.user_management = UserManagement(self._app) def on_subscribe_public_events(self): if self._app.f_user_management: diff --git a/libs/ktem/ktem/pages/resources/user.py b/libs/ktem/ktem/pages/resources/user.py index 2b65075..106c268 100644 --- a/libs/ktem/ktem/pages/resources/user.py +++ b/libs/ktem/ktem/pages/resources/user.py @@ -94,6 +94,28 @@ def validate_password(pwd, pwd_cnf): return "" +def create_user(usn, pwd) -> bool: + with Session(engine) as session: + statement = select(User).where(User.username_lower == usn.lower()) + result = session.exec(statement).all() + if result: + print(f'User "{usn}" already exists') + return False + + else: + hashed_password = hashlib.sha256(pwd.encode()).hexdigest() + user = User( + username=usn, + username_lower=usn.lower(), + password=hashed_password, + admin=True, + ) + session.add(user) + session.commit() + + return True + + class UserManagement(BasePage): def __init__(self, app): self._app = app @@ -105,23 +127,9 @@ class UserManagement(BasePage): usn = flowsettings.KH_FEATURE_USER_MANAGEMENT_ADMIN pwd = flowsettings.KH_FEATURE_USER_MANAGEMENT_PASSWORD - with Session(engine) as session: - statement = select(User).where(User.username_lower == usn.lower()) - result = session.exec(statement).all() - if result: - print(f'User "{usn}" already exists') - - else: - hashed_password = hashlib.sha256(pwd.encode()).hexdigest() - user = User( - username=usn, - username_lower=usn.lower(), - password=hashed_password, - admin=True, - ) - session.add(user) - session.commit() - gr.Info(f'User "{usn}" created successfully') + is_created = create_user(usn, pwd) + if is_created: + gr.Info(f'User "{usn}" created successfully') def on_building_ui(self): with gr.Tab(label="User list"): @@ -224,7 +232,7 @@ class UserManagement(BasePage): gr.update(visible=False), gr.update(visible=False), ), - inputs=None, + inputs=[], outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no], show_progress="hidden", ) diff --git a/libs/ktem/ktem/pages/settings.py b/libs/ktem/ktem/pages/settings.py index 23bbfac..498282f 100644 --- a/libs/ktem/ktem/pages/settings.py +++ b/libs/ktem/ktem/pages/settings.py @@ -2,13 +2,15 @@ import hashlib import gradio as gr from ktem.app import BasePage +from ktem.components import reasonings from ktem.db.models import Settings, User, engine from sqlmodel import Session, select signout_js = """ -function() { +function(u, c, pw, pwc) { removeFromStorage('username'); removeFromStorage('password'); + return [u, c, pw, pwc]; } """ @@ -72,6 +74,10 @@ class SettingsPage(BasePage): self._components = {} self._reasoning_mode = {} + # store llms and embeddings components + self._llms = [] + self._embeddings = [] + # render application page if there are application settings self._render_app_tab = False if self._default_settings.application.settings: @@ -101,14 +107,13 @@ class SettingsPage(BasePage): def on_building_ui(self): if self._app.f_user_management: - with gr.Tab("Users"): + with gr.Tab("User settings"): self.user_tab() - with gr.Tab("General"): - self.app_tab() - with gr.Tab("Document Indices"): - self.index_tab() - with gr.Tab("Reasoning Pipelines"): - self.reasoning_tab() + + self.app_tab() + self.index_tab() + self.reasoning_tab() + self.setting_save_btn = gr.Button( "Save changes", variant="primary", scale=1, elem_classes=["right-button"] ) @@ -192,7 +197,7 @@ class SettingsPage(BasePage): ) onSignOutClick = self.signout.click( lambda: (None, "Current user: ___", "", ""), - inputs=None, + inputs=[], outputs=[ self._user_id, self.current_name, @@ -248,10 +253,14 @@ class SettingsPage(BasePage): return "", "" def app_tab(self): - with gr.Tab("General application settings", visible=self._render_app_tab): + with gr.Tab("General", visible=self._render_app_tab): for n, si in self._default_settings.application.settings.items(): obj = render_setting_item(si, si.value) self._components[f"application.{n}"] = obj + if si.special_type == "llm": + self._llms.append(obj) + if si.special_type == "embedding": + self._embeddings.append(obj) def index_tab(self): # TODO: double check if we need general @@ -260,12 +269,18 @@ class SettingsPage(BasePage): # obj = render_setting_item(si, si.value) # self._components[f"index.{n}"] = obj - with gr.Tab("Index settings", visible=self._render_index_tab): + id2name = {k: v.name for k, v in self._app.index_manager.info().items()} + with gr.Tab("Retrieval settings", visible=self._render_index_tab): for pn, sig in self._default_settings.index.options.items(): - with gr.Tab(f"Index {pn}"): + name = "{} Collection".format(id2name.get(pn, f"")) + with gr.Tab(name): for n, si in sig.settings.items(): obj = render_setting_item(si, si.value) self._components[f"index.options.{pn}.{n}"] = obj + if si.special_type == "llm": + self._llms.append(obj) + if si.special_type == "embedding": + self._embeddings.append(obj) def reasoning_tab(self): with gr.Tab("Reasoning settings", visible=self._render_reasoning_tab): @@ -275,6 +290,10 @@ class SettingsPage(BasePage): continue obj = render_setting_item(si, si.value) self._components[f"reasoning.{n}"] = obj + if si.special_type == "llm": + self._llms.append(obj) + if si.special_type == "embedding": + self._embeddings.append(obj) gr.Markdown("### Reasoning-specific settings") self._components["reasoning.use"] = render_setting_item( @@ -289,10 +308,19 @@ class SettingsPage(BasePage): visible=idx == 0, elem_id=pn, ) as self._reasoning_mode[pn]: - gr.Markdown("**Name**: Description") + reasoning = reasonings.get(pn, None) + if reasoning is None: + gr.Markdown("**Name**: Description") + else: + info = reasoning.get_info() + gr.Markdown(f"**{info['name']}**: {info['description']}") for n, si in sig.settings.items(): obj = render_setting_item(si, si.value) self._components[f"reasoning.options.{pn}.{n}"] = obj + if si.special_type == "llm": + self._llms.append(obj) + if si.special_type == "embedding": + self._embeddings.append(obj) def change_reasoning_mode(self, value): output = [] @@ -360,3 +388,38 @@ class SettingsPage(BasePage): outputs=[self._settings_state] + self.components(), show_progress="hidden", ) + + def update_llms(): + from ktem.llms.manager import llms + + if llms._default: + llm_choices = [(f"{llms._default} (default)", "")] + else: + llm_choices = [("(random)", "")] + llm_choices += [(_, _) for _ in llms.options().keys()] + return gr.update(choices=llm_choices) + + def update_embeddings(): + from ktem.embeddings.manager import embedding_models_manager + + if embedding_models_manager._default: + emb_choices = [(f"{embedding_models_manager._default} (default)", "")] + else: + emb_choices = [("(random)", "")] + emb_choices += [(_, _) for _ in embedding_models_manager.options().keys()] + return gr.update(choices=emb_choices) + + for llm in self._llms: + self._app.app.load( + update_llms, + inputs=[], + outputs=[llm], + show_progress="hidden", + ) + for emb in self._embeddings: + self._app.app.load( + update_embeddings, + inputs=[], + outputs=[emb], + show_progress="hidden", + ) diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/__init__.py b/libs/ktem/ktem/reasoning/prompt_optimization/__init__.py new file mode 100644 index 0000000..29d8bf9 --- /dev/null +++ b/libs/ktem/ktem/reasoning/prompt_optimization/__init__.py @@ -0,0 +1,9 @@ +from .decompose_question import DecomposeQuestionPipeline +from .fewshot_rewrite_question import FewshotRewriteQuestionPipeline +from .rewrite_question import RewriteQuestionPipeline + +__all__ = [ + "DecomposeQuestionPipeline", + "FewshotRewriteQuestionPipeline", + "RewriteQuestionPipeline", +] diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py b/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py new file mode 100644 index 0000000..7fdc473 --- /dev/null +++ b/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py @@ -0,0 +1,79 @@ +import logging + +from ktem.llms.manager import llms +from ktem.reasoning.prompt_optimization.rewrite_question import RewriteQuestionPipeline +from pydantic import BaseModel, Field + +from kotaemon.base import Document, HumanMessage, Node, SystemMessage +from kotaemon.llms import ChatLLM + +logger = logging.getLogger(__name__) + + +class SubQuery(BaseModel): + """Search over a database of insurance rulebooks or financial reports""" + + sub_query: str = Field( + ..., + description="A very specific query against the database.", + ) + + +class DecomposeQuestionPipeline(RewriteQuestionPipeline): + """Decompose user complex question into multiple sub-questions + + Args: + llm: the language model to rewrite question + lang: the language of the answer. Currently support English and Japanese + """ + + llm: ChatLLM = Node( + default_callback=lambda _: llms.get("openai-gpt4-turbo", llms.get_default()) + ) + DECOMPOSE_SYSTEM_PROMPT_TEMPLATE = ( + "You are an expert at converting user complex questions into sub questions. " + "Perform query decomposition using provided function_call. " + "Given a user question, break it down into the most specific sub" + " questions you can (at most 3) " + "which will help you answer the original question. " + "Each sub question should be about a single concept/fact/idea. " + "If there are acronyms or words you are not familiar with, " + "do not try to rephrase them." + ) + prompt_template: str = DECOMPOSE_SYSTEM_PROMPT_TEMPLATE + + def create_prompt(self, question): + schema = SubQuery.model_json_schema() + function = { + "name": schema["title"], + "description": schema["description"], + "parameters": schema, + } + llm_kwargs = { + "tools": [{"type": "function", "function": function}], + "tool_choice": "auto", + } + + messages = [ + SystemMessage(content=self.prompt_template), + HumanMessage(content=question), + ] + + return messages, llm_kwargs + + def run(self, question: str) -> list: # type: ignore + messages, llm_kwargs = self.create_prompt(question) + result = self.llm(messages, **llm_kwargs) + tool_calls = result.additional_kwargs.get("tool_calls", None) + sub_queries = [] + if tool_calls: + for tool_call in tool_calls: + sub_queries.append( + Document( + content=SubQuery.parse_raw( + tool_call["function"]["arguments"] + ).sub_query + ) + ) + + return sub_queries diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py b/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py new file mode 100644 index 0000000..3c3b719 --- /dev/null +++ b/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py @@ -0,0 +1,100 @@ +import json +import uuid +from pathlib import Path + +from ktem.components import get_docstore, get_vectorstore +from ktem.llms.manager import llms +from ktem.reasoning.prompt_optimization.rewrite_question import ( + DEFAULT_REWRITE_PROMPT, + RewriteQuestionPipeline, +) +from theflow.settings import settings as flowsettings + +from kotaemon.base import AIMessage, Document, HumanMessage, Node, SystemMessage +from kotaemon.embeddings import BaseEmbeddings +from kotaemon.llms import ChatLLM +from kotaemon.storages import BaseDocumentStore, BaseVectorStore + + +class FewshotRewriteQuestionPipeline(RewriteQuestionPipeline): + """Rewrite user question + + Args: + llm: the language model to rewrite question + rewrite_template: the prompt template for llm to paraphrase a text input + lang: the language of the answer. Currently support English and Japanese + embedding: the embedding model to encode the question + vector_store: the vector store to store the encoded question + doc_store: the document store to store the original question + k: the number of examples to retrieve for rewriting + """ + + llm: ChatLLM = Node(default_callback=lambda _: llms.get_default()) + rewrite_template: str = DEFAULT_REWRITE_PROMPT + lang: str = "English" + embedding: BaseEmbeddings + vector_store: BaseVectorStore + doc_store: BaseDocumentStore + k: int = getattr(flowsettings, "N_PROMPT_OPT_EXAMPLES", 3) + + def add_documents(self, examples, batch_size: int = 50): + print("Adding fewshot examples for rewriting") + documents = [] + for example in examples: + doc = Document( + text=example["input"], id_=str(uuid.uuid4()), metadata=example + ) + documents.append(doc) + + for i in range(0, len(documents), batch_size): + embeddings = self.embedding(documents[i : i + batch_size]) + ids = [t.doc_id for t in documents[i : i + batch_size]] + self.vector_store.add( + embeddings=embeddings, + ids=ids, + ) + self.doc_store.add(documents[i : i + batch_size]) + + @classmethod + def get_pipeline( + cls, + embedding, + example_path=Path(__file__).parent / "rephrase_question_train.json", + collection_name: str = "fewshot_rewrite_examples", + ): + vector_store = get_vectorstore(collection_name) + doc_store = get_docstore(collection_name) + + pipeline = cls( + embedding=embedding, vector_store=vector_store, doc_store=doc_store + ) + if doc_store.count(): + return pipeline + + examples = json.load(open(example_path, "r")) + pipeline.add_documents(examples) + + return pipeline + + def run(self, question: str) -> Document: # type: ignore + emb = self.embedding(question)[0].embedding + _, _, ids = self.vector_store.query(embedding=emb, top_k=self.k) + examples = self.doc_store.get(ids) + messages = [SystemMessage(content="You are a helpful assistant")] + for example in examples: + messages.append( + HumanMessage( + content=self.rewrite_template.format( + question=example.metadata["input"], lang=self.lang + ) + ) + ) + messages.append(AIMessage(content=example.metadata["output"])) + messages.append( + HumanMessage( + content=self.rewrite_template.format(question=question, lang=self.lang) + ) + ) + + result = self.llm(messages) + return result diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json b/libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json new file mode 100644 index 0000000..fbe6b31 --- /dev/null +++ b/libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json @@ -0,0 +1,9090 @@ +[ + { + "input": "What was the percentage increase in General and Administrative expenses in the year 2018 compared to the previous year?", + "output": "What was the increase in the General and administrative in 2018?" + }, + { + "input": "What was the specific percentage increase in the General and Administrative expenses during the year 2018?", + "output": "What was the increase in the General and administrative in 2018?" + }, + { + "input": "What was the decrease in the Other net expense in 2019?", + "output": "What was the specific amount of the decrease in the Other net expense in the financial year of 2019?" + }, + { + "input": "What was the decrease in the Other net expense in 2019?", + "output": "What was the specific decrease amount experienced in the Other net expense during the year 2019?" + }, + { + "input": "What was the increase in interest income?", + "output": "What was the exact amount by which the interest income increased?" + }, + { + "input": "What was the increase in interest income?", + "output": "What was the exact amount of the increase in interest income?" + }, + { + "input": "What was the average Other expense, net for 2018 and 2019?", + "output": "What was the average net Other expense for the years 2018 and 2019 combined?" + }, + { + "input": "In which year was Other expense, net less than 5,000 thousands?", + "output": "In what particular year did the amount of Other expense, net fall below 5,000 thousands?" + }, + { + "input": "What were the income tax expenses in the years 2019, 2018, and 2017, respectively?", + "output": "What was the income tax expense in 2019,2018 and 2017 respectively?" + }, + { + "input": "\"What were the income tax expenses for the years 2019, 2018, and 2017?\"", + "output": "What was the income tax expense in 2019,2018 and 2017 respectively?" + }, + { + "input": "How much money did the company have in unremitted earnings for the year 2019?", + "output": "What was the company's unremitted earnings in 2019?" + }, + { + "input": "How much unremitted earnings did the company have in 2019, without releasing or distributing them?", + "output": "What was the company's unremitted earnings in 2019?" + }, + { + "input": "Which year witnessed interest and penalties that amounted to less than 20 thousand?", + "output": "In which year was Interest and penalties less than 20 thousand?" + }, + { + "input": "What is the net carrying amount in 2019?", + "output": "What is the net carrying amount for the year 2019? Please provide details to assist in providing a more accurate response." + }, + { + "input": "What is the net carrying amount in 2019?", + "output": "What is the net carrying amount for the year 2019? Can you provide more details or context about the specific asset, liability or financial element in consideration?" + }, + { + "input": "What is the change in the debt discount, net of amortization from December 31, 2019 to December 31, 2018?", + "output": "What was the net change in the debt discount's value, after taking into account amortization, between December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the change in Net carrying amount from December 31, 2019 to December 31, 2018?", + "output": "What is the difference in the net carrying amount between December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?", + "output": "What is the net difference in the value of furniture and equipment between December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?", + "output": "What was the net change in the value of Furniture and equipment between December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the change in System hardware from December 31, 2019 to December 31, 2018?", + "output": "What are the differences in the hardware of the System between December 31, 2019, and December 31, 2018?" + }, + { + "input": "What was the exact percentage increase in the cost of revenue between 2017 and 2018?", + "output": "What was the increase in the cost of revenue in 2018?" + }, + { + "input": "What was the percentage increase in the total amount spent on expenses directly related to generating revenue in 2018 compared to the previous year?", + "output": "What was the increase in the cost of revenue in 2018?" + }, + { + "input": "In which year was cost of revenue less than 40,000 thousands?", + "output": "In what specific year did the cost of revenue fall below the amount of 40,000 thousands?" + }, + { + "input": "If a company used its earnings to finance its domestic operations, what are the potential outcomes or consequences of this decision?", + "output": "What would happen if earnings were used to fund domestic operations?" + }, + { + "input": "What was the change in Cash, cash equivalents, and restricted cash at the beginning of the period, comparing December 31, 2019, to December 31, 2018?", + "output": "What is the change in Cash, cash equivalents and restricted cash at beginning of period from December 31, 2019 to December 31, 2018?" + }, + { + "input": "What is the net difference in Cash provided by operating activities between December 31, 2019 and December 31, 2018?", + "output": "What is the change in Cash provided by operating activities from December 31, 2019 to December 31, 2018?" + }, + { + "input": "What is the change in Cash used in investing activities from December 31, 2019 to December 31, 2018?", + "output": "What is the difference in the amount of Cash used in investing activities between December 31, 2019, and December 31, 2018?" + }, + { + "input": "What was the percentage change in the Sales and marketing expense from 2017 to 2018?", + "output": "What was the increase in the Sales and marketing expense in 2018 compared to 2017?" + }, + { + "input": "In which year was Sales and marketing expenses less than 50,000 thousands?", + "output": "In what specific year did the total expenses for sales and marketing fall below 50,000 thousands (50 million) currency units?" + }, + { + "input": "In which year was General and administrative expenses less than 50,000 thousands?", + "output": "In which specific year did the amount for General and Administrative expenses fall below 50,000 thousands (50 million) dollars?" + }, + { + "input": "What is the average Carrying Value for the period December 31, 2019 to December 31, 2018?", + "output": "What is the average Carrying Value from December 31, 2018 to December 31, 2019?" + }, + { + "input": "In which specific year did the fair value of the 2022 Notes measure below 200,000 thousands?", + "output": "In which year was the 2022 Notes fair value less than 200,000 thousands?" + }, + { + "input": "In the year when the fair value of the 2022 Notes was below 200,000 thousands, what specific year was it?", + "output": "In which year was the 2022 Notes fair value less than 200,000 thousands?" + }, + { + "input": "What was the 2022 Notes cap price?", + "output": "\"What was the cap price for the 2022 Notes? Please provide any additional details that could assist in retrieving the information accurately.\"" + }, + { + "input": "What were the average expenses for sales and marketing in the years 2018 and 2019?", + "output": "What was the average Sales and marketing expenses for 2018 and 2019?" + }, + { + "input": "What were the average expenses for Sales and Marketing in 2018 and 2019, respectively?", + "output": "What was the average Sales and marketing expenses for 2018 and 2019?" + }, + { + "input": "In which year was Sales and marketing expenses less than 90,000 thousands?", + "output": "When was the year that the sales and marketing expenses were recorded to be under 90,000 thousands?" + }, + { + "input": "In which year was Sales and marketing expenses less than 90,000 thousands?", + "output": "In which specific year did the total expenses for sales and marketing amount to a figure less than 90,000 thousands?" + }, + { + "input": "What was the exact amount of gross profit recorded in the fiscal year of 2018?", + "output": "What was the Gross profit in 2018?" + }, + { + "input": "What was the specific value of the gross profit for the year 2018?", + "output": "What was the Gross profit in 2018?" + }, + { + "input": "In what specific year did the revenue fall below one hundred thousand thousands (100,000,000)?", + "output": "In which year was revenue less than 100,000 thousands?" + }, + { + "input": "\"In which specific year did the revenue fall below 100,000 thousands (100 million)?\"", + "output": "In which year was revenue less than 100,000 thousands?" + }, + { + "input": "What is the difference in the Gross Profit between the fiscal years 2018 and 2019?", + "output": "What is the change in the Gross Profit from 2018 to 2019?" + }, + { + "input": "What were the capitalized software development costs for the company in the years ending on December 31, 2017?", + "output": "What was the company capitalized software development costs during the years ended December 31, 2017?" + }, + { + "input": "What does the Adjusted EBITDA represent?", + "output": "What is the meaning and purpose of the term Adjusted EBITDA?" + }, + { + "input": "What does the Adjusted EBITDA represent?", + "output": "What is the meaning and significance of Adjusted EBITDA?" + }, + { + "input": "\"What were the federal statutory income tax rates in 2019, 2018, and 2017?\"", + "output": "What was the Income tax at federal statutory rate in 2019, 2018 and 2017?" + }, + { + "input": "What were the federal statutory income tax rates in 2019, 2018, and 2017?", + "output": "What was the Income tax at federal statutory rate in 2019, 2018 and 2017?" + }, + { + "input": "In what year did the State income tax expense, taking into account the federal tax effect, fall below negative one thousand thousands?", + "output": "In which year was State income tax expense, net of federal tax effect less than (1,000) thousands?" + }, + { + "input": "What were the amounts of the Other expense, net in 2018 and 2017?", + "output": "What was the Other expense, net in 2018 and 2017 respectively?" + }, + { + "input": "What is the difference in the amount of money owed from customers (net accounts receivable) between December 31, 2019, and December 31, 2018?", + "output": "What is the change in Net accounts receivable from December 31, 2019 to December 31, 2018?" + }, + { + "input": "What are the respective federal income tax expense at statutory rates in 2018 and 2019?", + "output": "What were the federal income tax expenses at statutory rates for the years 2018 and 2019?" + }, + { + "input": "What were the specific state income tax rates, after accounting for federal benefit deductions, for the years 2017 and 2018?", + "output": "What are the respective state income taxes, net of federal benefit in 2017 and 2018?" + }, + { + "input": "What were the state income tax rates, after accounting for federal benefit, for both the years 2017 and 2018?", + "output": "What are the respective state income taxes, net of federal benefit in 2017 and 2018?" + }, + { + "input": "What are the respective service revenue at March 31 and June 30, 2018?", + "output": "What were the service revenues as of March 31 and June 30, 2018?" + }, + { + "input": "What are the respective service revenue at June 30 and September 30, 2018?", + "output": "What is the service revenue as of June 30 and September 30, 2018?" + }, + { + "input": "What are the respective service revenue at June 30 and September 30, 2018?", + "output": "What is the amount of service revenue reported on the financial statements for June 30 and September 30, 2018?" + }, + { + "input": "What percentage of the total revenue generated in North America in 2019 can be attributed to the non-core segment?", + "output": "What is the value of the revenue from the non-core segment from North America as a percentage of the total revenue earned in North America in 2019?" + }, + { + "input": "What is the average amount of net cash generated from operating activities for the years 2018 and 2019?", + "output": "What is the average net cash provided by operating activities in 2018 and 2019?" + }, + { + "input": "What is the amount of cash-based equity compensation expense included in network operations expenses in both 2018 and 2017?", + "output": "What are the respective on-cash equity-based compensation expense included in network operations expenses in 2018 and 2017?" + }, + { + "input": "What are the respective service revenue in 2018 and 2019?", + "output": "What was the amount of service revenue in the year 2018 and 2019 respectively?" + }, + { + "input": "What is the total amount of interest and principal payments required for the debt relating to the 2022 Notes?", + "output": "What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?" + }, + { + "input": "What is the total amount of both interest and principal that needs to be paid for the debt included in the 2022 Notes?", + "output": "What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?" + }, + { + "input": "What is the total amount of debt that the company needs to repay within the next 3 years?", + "output": "What is the company's total debt due within 3 years?" + }, + { + "input": "What is the aggregate amount of debt that the company needs to repay within the next three years?", + "output": "What is the company's total debt due within 3 years?" + }, + { + "input": "What is the total amount of finance lease obligations that the company needs to repay within the next 3 years?", + "output": "What is the company's total finance lease obligations due within 3 years?" + }, + { + "input": "What were the specific values of system infrastructure in the years 2018 and 2019?", + "output": "What are the respective values of system infrastructure in 2018 and 2019?" + }, + { + "input": "What were the values of system infrastructure in 2018 and 2019?", + "output": "What are the respective values of system infrastructure in 2018 and 2019?" + }, + { + "input": "What is the percentage increase or decrease in the value of network equipment from 2018 to 2019?", + "output": "What is the percentage change in the value of network equipment between 2018 and 2019?" + }, + { + "input": "What is the percentage change in the value of leasehold improvements between 2018 and 2019?", + "output": "What is the precise percentage increase or decrease in the value of leasehold improvements from 2018 to 2019?" + }, + { + "input": "What is the percentage change in the value of system infrastructure between 2018 and 2019?", + "output": "What is the percentage difference in the value of system infrastructure from 2018 to 2019?" + }, + { + "input": "What were the pre-tax domestic incomes for 2018 and 2019?", + "output": "What are the respective domestic income before income taxes in 2018 and 2019?" + }, + { + "input": "What was the mean pre-tax domestic income for individuals in the years 2017 and 2018?", + "output": "What is the average domestic income before income taxes in 2017 and 2018?" + }, + { + "input": "What were the average pre-tax domestic incomes in the years 2017 and 2018?", + "output": "What is the average domestic income before income taxes in 2017 and 2018?" + }, + { + "input": "What was the average pre-tax domestic income for households in 2018 and 2019?", + "output": "What is the average domestic income before income taxes in 2018 and 2019?" + }, + { + "input": "What is the average number of basic weighted average common shares in 2018 and 2019?", + "output": "What is the mean value of the basic weighted average common shares for the years 2018 and 2019?" + }, + { + "input": "What is the average number of basic weighted average common shares in 2018 and 2019?", + "output": "What is the average number of basic weighted average common shares for the years 2018 and 2019 combined?" + }, + { + "input": "What was the average impact on stock dilution from stock options in 2018 and 2019?", + "output": "What is the average dilutive effect of stock options in 2018 and 2019?" + }, + { + "input": "What was the average impact of stock options on dilution for the years 2018 and 2019?", + "output": "What is the average dilutive effect of stock options in 2018 and 2019?" + }, + { + "input": "What does the cost of operations represent?", + "output": "What is the meaning or significance of the term \"cost of operations\"?" + }, + { + "input": "How many quarters had operating revenues that was below $2,000 million?", + "output": "How many quarters had operating revenues below $2,000 million? Can you provide a breakdown of the number of quarters where the operating revenues were below $2,000 million?" + }, + { + "input": "What was the exact change in Operating revenues from the Three Months Ended March to the Three Months Ended June?", + "output": "What was the change in Operating revenues between Three Months Ended March and June?" + }, + { + "input": "What was the difference in operating revenues for the Three Months Ended March and the Three Months Ended June?", + "output": "What was the change in Operating revenues between Three Months Ended March and June?" + }, + { + "input": "What was the amount of net property and equipment in 2015?", + "output": "What was the exact monetary value of net property and equipment in the year 2015?" + }, + { + "input": "What was the amount of net property and equipment in 2015?", + "output": "What was the specific amount of net property and equipment recorded on the financial statements for the year 2015?" + }, + { + "input": "What was the value of redeemable noncontrolling interests in the year 2019?", + "output": "What were the Redeemable noncontrolling interests in 2019?" + }, + { + "input": "What was the value of the redeemable noncontrolling interests in 2019?", + "output": "What were the Redeemable noncontrolling interests in 2019?" + }, + { + "input": "What was the percent increase or decrease in the equity of American Tower Corporation from 2015 to 2016 in the United States?", + "output": "What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?" + }, + { + "input": "What was the percentage increase or decrease in the equity of American Tower Corporation from 2015 to 2016?", + "output": "What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?" + }, + { + "input": "What was the difference in the overall value of assets from 2017 to 2018?", + "output": "What was the change in total assets between 2017 and 2018?" + }, + { + "input": "What was the total amount received from the sales of senior notes in the year 2019 after deducting any expenses or fees?", + "output": "What were the net Proceeds from issuance of senior notes in 2019?" + }, + { + "input": "What was the total amount of money received from the repayment of credit facilities in 2018?", + "output": "What were the net Proceeds from (repayments of) credit facilities in 2018?" + }, + { + "input": "\"What was the total amount received or repaid through credit facilities in 2018?\"", + "output": "What were the net Proceeds from (repayments of) credit facilities in 2018?" + }, + { + "input": "What was the value of Equipment in 2018?", + "output": "What was the total monetary worth or market value of Equipment in the year 2018?" + }, + { + "input": "What was the value of Equipment in 2018?", + "output": "What was the monetary worth of the Equipment in the year 2018?" + }, + { + "input": "What was the exact monetary worth of buildings and any related improvements in the year 2019?", + "output": "What was the value of buildings and improvements in 2019?" + }, + { + "input": "What was the difference in the value of land and improvements from 2018 to 2019?", + "output": "What was the change in Land and improvements between 2018 and 2019?" + }, + { + "input": "What was the percentage increase or decrease in the net value of Property and equipment between the years 2018 and 2019?", + "output": "What was the percentage change in Property and equipment, net between 2018 and 2019?" + }, + { + "input": "What are the components that make up redevelopment capital expenditures?", + "output": "What does Redevelopment capital expenditures include?" + }, + { + "input": "What actions are authorized by the company's Buyback Programs?", + "output": "What do the company's Buyback Programs authorize the company to do?" + }, + { + "input": "What was the change in the total number of shares purchased between November and December?", + "output": "How much did the total number of shares purchased increase or decrease from November to December?" + }, + { + "input": "What was the total value of the Property, Plant, and Equipment (PPE) in 2016?", + "output": "What was the gross PPE in 2016?" + }, + { + "input": "What was the total value of personal protective equipment (PPE) in terms of sales or revenue for the year 2016?", + "output": "What was the gross PPE in 2016?" + }, + { + "input": "For how many years has the total value of gross intangibles been above $15,000 million?", + "output": "How many years did gross intangibles exceed $15,000 million?" + }, + { + "input": "How many consecutive years did the value of gross intangibles surpass $15,000 million?", + "output": "How many years did gross intangibles exceed $15,000 million?" + }, + { + "input": "What was the percentage change in gross goodwill between 2018 and 2019?", + "output": "What was the percentage difference in the amount of gross goodwill from 2018 to 2019?" + }, + { + "input": "What was the percentage change in gross goodwill between 2018 and 2019?", + "output": "What was the percentage difference in gross goodwill from 2018 to 2019?" + }, + { + "input": "\"What was the overall amount paid in lease payments for the operating lease agreement?\"", + "output": "What were the total lease payments for operating lease?" + }, + { + "input": "What was the sum total of all lease payments made for an operating lease?", + "output": "What were the total lease payments for operating lease?" + }, + { + "input": "\"What was the total amount of operating lease expenses incurred during the fiscal years 2020, 2021, and 2022?\"", + "output": "What was the sum of operating lease in fiscal years 2020-2022?" + }, + { + "input": "What is the proportion of non-current lease liability in relation to the total lease liability, expressed as a percentage?", + "output": "What is non-current lease liability as a percentage of Total lease liability?" + }, + { + "input": "Which years did the company allocate a valuation allowance?", + "output": "In which years did the company provide a valuation allowance?" + }, + { + "input": "For how many years, starting from January 1, was the balance consistently higher than $100 million?", + "output": "How many years was the balance as of January 1 above $100 million?" + }, + { + "input": "How many years, starting from January 1, had a balance above $100 million?", + "output": "How many years was the balance as of January 1 above $100 million?" + }, + { + "input": "What specific details about the company's taxes can be found in their state tax returns?", + "output": "What do the company's state tax returns reflect?" + }, + { + "input": "In 2017, how many instances were there where the deferred Income tax benefit (provision) exceeded $50 million?", + "output": "How many of the deferred Income tax benefit (provision) were above $50 million in 2017?" + }, + { + "input": "How many instances occurred in 2017 where the deferred income tax benefit (provision) exceeded $50 million?", + "output": "How many of the deferred Income tax benefit (provision) were above $50 million in 2017?" + }, + { + "input": "In 2019, what is the number of income tax benefits (provisions) that exceeded $(4 million)?", + "output": "How many of the current Income tax benefit (provision) were above $(4 million) in 2019?" + }, + { + "input": "In 2019, what is the total number of income tax benefits (provisions) that exceeded $(4 million)?", + "output": "How many of the current Income tax benefit (provision) were above $(4 million) in 2019?" + }, + { + "input": "What was the change in the net cash provided by (used for) by investing activities between 2017 and 2018?", + "output": "What was the difference in the net cash provided by (used for) investing activities from 2017 to 2018?" + }, + { + "input": "What was the percentage change in diluted net income per share attributable to common stockholders of American Tower Corporation between the Three Months Ended in March and June?", + "output": "What was the change in Diluted net income per share attributable to American Tower Corporation common stockholders between Three Months Ended March and June?" + }, + { + "input": "What was the specific difference in the value of Prepaids and other current assets between the fiscal years 2018 and 2019?", + "output": "What was the change in Prepaids and other current assets between 2018 and 2019?" + }, + { + "input": "What was the difference in the amount of Prepaids and other current assets reported in the financial statements between the years 2018 and 2019?", + "output": "What was the change in Prepaids and other current assets between 2018 and 2019?" + }, + { + "input": "What was the total amount of property and real estate taxes that were accumulated or owed in 2018?", + "output": "What was the Accrued property and real estate taxes in 2018?" + }, + { + "input": "What was the change in Other accrued expenses between 2018 and 2019?", + "output": "What was the difference in the amount of Other accrued expenses recorded in 2018 compared to 2019?" + }, + { + "input": "What was the change in Other accrued expenses between 2018 and 2019?", + "output": "What was the difference in the amount of Other accrued expenses recorded between the years 2018 and 2019?" + }, + { + "input": "What was the total amount of Net operating loss carryforwards recorded for the financial year 2019?", + "output": "What were the Net operating loss carryforwards in 2019?" + }, + { + "input": "What were the amounts of the liability decreases included in the company's revisions in estimates for both 2019 and 2018?", + "output": "How much were the decreases to the liability included in the revisions in estimates by the company in 2019 and 2018 respectively?" + }, + { + "input": "What was the change in balance as of December 31 between 2018 and 2019?", + "output": "What was the difference in the balance as of December 31st, 2018 and December 31st, 2019?" + }, + { + "input": "What was the value of the Eure-et-Loir interests as of December 31, 2019?", + "output": "What was the monetary worth of the Eure-et-Loir interests on the specific date of December 31, 2019?" + }, + { + "input": "How many of the non-current liabilities components in 2018 were above $500 million?", + "output": "How many components of non-current liabilities in the year 2018 exceeded $500 million in value?" + }, + { + "input": "How many of the non-current liabilities components in 2018 were above $500 million?", + "output": "In 2018, how many components of non-current liabilities exceeded $500 million in value?" + }, + { + "input": "How many years has the expense for stock-based compensation for Total exceeded $100 million?", + "output": "How many years did Total stock-based compensation expense exceed $100 million?" + }, + { + "input": "For how many years has the expense for Stock-based compensation - Services been higher than $1 million?", + "output": "How many years did Stock-based compensation expense - Services exceed $1 million?" + }, + { + "input": "What differentiates the characteristics of Federal and State NOLs from 2025 to 2029?", + "output": "What is the difference between Federal and State NOLs in the period 2025 to 2029?" + }, + { + "input": "What differentiates Federal and State NOLs in the 2025-2029 timeframe, and how do they vary during this period?", + "output": "What is the difference between Federal and State NOLs in the period 2025 to 2029?" + }, + { + "input": "What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?", + "output": "What was the estimated percentage of the company's total revenue in 2018 that came from their property segments, specifically from their communication sites?" + }, + { + "input": "What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?", + "output": "What percentage of the company's total revenue in 2018 can be attributed to their communication sites within their property segments?" + }, + { + "input": "What percentage of the total revenue in the United States in 2019 can be attributed to property segments?", + "output": "How many percent of total revenue in 2019 was accounted for by property segments in the U.S.?" + }, + { + "input": "How many percent of total revenue in 2018 was accounted for by property segments in Asia?", + "output": "What was the percentage of the total revenue in 2018 contributed by the property segments specifically in Asia?" + }, + { + "input": "What were the intangible assets related to the Tower and network location specifically in the year 2019?", + "output": "What were the Tower and network location intangible assets in 2019?" + }, + { + "input": "What were the intangible assets related to the tower and network locations observed in 2019?", + "output": "What were the Tower and network location intangible assets in 2019?" + }, + { + "input": "How many years were the total impairment charges above $200 million?", + "output": "How many years did the total impairment charges exceed or surpass $200 million?" + }, + { + "input": "How many years were the total impairment charges above $200 million?", + "output": "For how many years did the total impairment charges exceed $200 million?" + }, + { + "input": "What was the percentage change in Total impairment charges between 2018 and 2019?", + "output": "What percentage represents the difference in Total impairment charges between the years 2018 and 2019?" + }, + { + "input": "What was the percentage change in Total impairment charges between 2018 and 2019?", + "output": "What is the percentage difference in Total impairment charges from 2018 to 2019?" + }, + { + "input": "For how long has the sum of all assets reached or surpassed $40,000 million?", + "output": "How many years did the total assets exceed $40,000 million?" + }, + { + "input": "What was the balance as at January 1, 2019?", + "output": "What was the amount of money in the account on January 1, 2019?" + }, + { + "input": "What was the difference in account balance on January 1st, comparing 2018 to 2019?", + "output": "What was the change in balance as of January 1 between 2018 and 2019?" + }, + { + "input": "What was the difference in the account balance between January 1, 2018 and January 1, 2019?", + "output": "What was the change in balance as of January 1 between 2018 and 2019?" + }, + { + "input": "What is the difference in the percentage increase of current year for 2017 and 2018?", + "output": "What was the change in current year increases between 2017 and 2018?" + }, + { + "input": "What was the difference in the yearly increases of current year between 2017 and 2018?", + "output": "What was the change in current year increases between 2017 and 2018?" + }, + { + "input": "What was the total amount spent on depreciation in the year 2019?", + "output": "What was the depreciation cost in 2019?" + }, + { + "input": "What was the change in depreciation between 2018 and 2019?", + "output": "What was the difference in the amount of depreciation incurred in 2018 compared to 2019?" + }, + { + "input": "What was the gross amount of accumulated depreciation at the beginning of 2017 and how did it change by the beginning of 2018?", + "output": "What was the change in Gross amount of accumulated depreciation at beginning between 2017 and 2018?" + }, + { + "input": "Which specific years have the statute of limitations expired on certain unrecognized tax benefits?", + "output": "Which years did the statute of limitations on certain unrecognized tax benefits lapse?" + }, + { + "input": "In 2019, what was the total value of receivables that had not yet been invoiced to customers?", + "output": "What were the unbilled receivables in 2019?" + }, + { + "input": "What was the change in Unbilled receivables between 2018 and 2019?", + "output": "What was the net difference in Unbilled receivables from 2018 to 2019?" + }, + { + "input": "What was the change in the investments into Nippon Yttrium Co., Ltd (\"NYC\") between 2018 and 2019?", + "output": "\"What was the difference in the amount of money invested in Nippon Yttrium Co., Ltd (\"NYC\") from 2018 to 2019?\"" + }, + { + "input": "What was the change in the investments into Nippon Yttrium Co., Ltd (\"NYC\") between 2018 and 2019?", + "output": "What was the difference in the amount of investments made into Nippon Yttrium Co., Ltd (\"NYC\") during the years 2018 and 2019?" + }, + { + "input": "What was the percentage change in total investments between 2018 and 2019?", + "output": "What was the exact percentage difference in the total amount of investments from 2018 to 2019?" + }, + { + "input": "What was the percentage increase or decrease in Pro forma revenues from 2017 to 2018?", + "output": "What was the change in Pro forma revenues between 2017 and 2018?" + }, + { + "input": "How many years did net income exceed $100,000 thousand?", + "output": "For how many consecutive years has the net income surpassed $100,000 thousand?" + }, + { + "input": "How many years did net income exceed $100,000 thousand?", + "output": "For how many consecutive years has the net income been greater than $100,000 thousand?" + }, + { + "input": "What was the specific financial difference in adjustments made to the defined benefit post-retirement plan between the years 2017 and 2019?", + "output": "What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?" + }, + { + "input": "What was the difference in the adjustments made to Defined Benefit post-retirement plans between the years 2017 and 2019?", + "output": "What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?" + }, + { + "input": "What were the total sales revenues in Hong Kong in the year 2018?", + "output": "What was the net sales in Hong Kong in 2018?" + }, + { + "input": "What was the exact amount of net sales generated in Singapore during the year 2017?", + "output": "What was the net sales in Singapore in 2017?" + }, + { + "input": "What was the change in net sales in Mexico between 2017 and 2018?", + "output": "What was the difference in net sales in Mexico from 2017 to 2018 and how does it impact the overall sales performance in the country?" + }, + { + "input": "In which specific years did the cumulative net sales in every region surpass one million dollars?", + "output": "Which years did the total net sales in all regions exceed $1,000,000 thousand?" + }, + { + "input": "What was the percentage change in the net sales from Other Countries between 2018 and 2019?", + "output": "What was the percentage difference in net sales from Other Countries in 2019 compared to 2018?" + }, + { + "input": "What was the difference in the amount of Amortization of debt issuance costs between the years 2017 and 2018?", + "output": "What was the change in the Amortization of debt issuance costs between 2017 and 2018?" + }, + { + "input": "For how many years did the interest expense on capital leases surpass $200 thousand?", + "output": "How many years did Interest expense on capital leases exceed $200 thousand?" + }, + { + "input": "By what percentage did the total interest expense increase or decrease from 2018 to 2019?", + "output": "What was the percentage change in the total interest expense between 2018 and 2019?" + }, + { + "input": "What was the percentage increase or decrease in the total interest expense from 2018 to 2019?", + "output": "What was the percentage change in the total interest expense between 2018 and 2019?" + }, + { + "input": "What was the total net actuarial loss or gain specifically associated with Other Benefits for the year 2018?", + "output": "What was the Net actuarial loss (gain) for Other Benefits in 2018?" + }, + { + "input": "What was the change in net sales between 2017 and 2019?", + "output": "What is the difference in net sales from 2017 to 2019?" + }, + { + "input": "For how many years was the cost of sales higher than $800,000 thousand?", + "output": "How many years did cost of sales exceed $800,000 thousand?" + }, + { + "input": "For how many years has the cost of sales been higher than $800,000 thousand?", + "output": "How many years did cost of sales exceed $800,000 thousand?" + }, + { + "input": "What was the precise net value of property, plant, and equipment in Japan specifically for the year 2019?", + "output": "What was the net amount of property, plant and equipment in Japan in 2019?" + }, + { + "input": "What was the net amount of property, plant and equipment in Thailand in 2018?", + "output": "What was the exact net value of property, plant, and equipment in Thailand during the year 2018?" + }, + { + "input": "How many years did Total net property, plant and equipment from Non-United States regions exceed $400,000 thousand?", + "output": "How many years was the total value of property, plant, and equipment from regions outside the United States greater than $400,000 thousand?" + }, + { + "input": "What was the change in the net property, plant and equipment in China between 2018 and 2019?", + "output": "What was the exact difference in the net value of property, plant, and equipment in China during the period from 2018 to 2019?" + }, + { + "input": "What was the net interest expense in 2019?", + "output": "What was the amount of interest expenses incurred in 2019, after subtracting any interest income earned during the same year?" + }, + { + "input": "What was the change in the Loss on early extinguishment of debt between 2018 and 2019?", + "output": "What was the difference in the Loss on early extinguishment of debt from 2018 to 2019?" + }, + { + "input": "What was the percentage increase or decrease in the net interest expense from 2018 to 2019?", + "output": "What was the percentage change in the net interest expense between 2018 and 2019?" + }, + { + "input": "What percentage increase or decrease occurred in the net interest expense from 2018 to 2019?", + "output": "What was the percentage change in the net interest expense between 2018 and 2019?" + }, + { + "input": "What was the change in the Returns reserves between 2018 and 2019?", + "output": "What was the difference in the amount allocated to Returns reserves between the fiscal years 2018 and 2019?" + }, + { + "input": "How much did the Additions for tax positions change from 2018 to 2019 in the current year?", + "output": "What was the change in the Additions for tax positions of the current year between 2018 and 2019?" + }, + { + "input": "What was the reflection of Pension and other post-retirement benefits?", + "output": "What did Pension and other post-retirement benefits reflect?" + }, + { + "input": "What were the interest obligations that had payments due more than 5 years?", + "output": "What were the interest obligations that had payments due for a period longer than 5 years?" + }, + { + "input": "What was the precise difference in the total amount between Employee separation liability and Restructuring liability?", + "output": "What was the difference in the total between Employee separation liability and Restructuring liability?" + }, + { + "input": "What is the discrepancy in the total amount between Employee Separation Liability and Restructuring Liability?", + "output": "What was the difference in the total between Employee separation liability and Restructuring liability?" + }, + { + "input": "What was the distinction in the payment amounts expected in Year 1 for interest obligations compared to operating lease obligations?", + "output": "What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?" + }, + { + "input": "What were the contrasting payment amounts between Year 1 for interest obligations and operating lease obligations?", + "output": "What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?" + }, + { + "input": "What is the percentage of total debt obligations in relation to the overall contractual obligations?", + "output": "What were the total debt obligations as a percentage of the total contractual obligations?" + }, + { + "input": "What was the amount of cash generated or used in financing activities during the year 2017?", + "output": "What was the Net cash provided by (used in) financing activities in 2017?" + }, + { + "input": "What was the percentage change in the amount of cash, cash equivalents, and restricted cash from 2018 to 2019?", + "output": "What was the percentage change in the Net increase in cash, cash equivalents, and restricted cash between 2018 and 2019?" + }, + { + "input": "How much of a discount did Customer Advances receive?", + "output": "What was the amount of discount for Customer Advances?" + }, + { + "input": "What was the total amount of discount provided for Customer Advances?", + "output": "What was the amount of discount for Customer Advances?" + }, + { + "input": "What was the net gain or loss from acquisitions in the year 2018?", + "output": "What was the Acquisition (gain) loss in 2018?" + }, + { + "input": "What were the restructuring charges in 2017?", + "output": "\"What were the specific charges related to restructuring expenses incurred during the year 2017?\"" + }, + { + "input": "What were the restructuring charges in 2017?", + "output": "What were the specific charges related to the corporate restructuring that occurred in 2017?" + }, + { + "input": "For how many years has the income tax expense from continuing operations exceeded $10,000 thousand?", + "output": "How many years did Total current income tax expense from continuing operations exceed $10,000 thousand?" + }, + { + "input": "What was the difference in foreign income tax expense between 2017 and 2018?", + "output": "What was the change in current foreign income tax expense between 2017 and 2018?" + }, + { + "input": "What was the difference in the amount of foreign income tax expense recorded for the years 2017 and 2018?", + "output": "What was the change in current foreign income tax expense between 2017 and 2018?" + }, + { + "input": "What was the percentage change in the deferred federal income tax expense between 2018 and 2019?", + "output": "What was the percentage increase or decrease in the amount of deferred federal income tax expense from 2018 to 2019?" + }, + { + "input": "What were the restructuring charges in 2019?", + "output": "What were the specific charges incurred as part of the restructuring efforts in the year 2019?" + }, + { + "input": "What was the change in Personnel reduction costs between 2017 and 2018?", + "output": "What was the difference in the costs associated with reducing personnel between the years 2017 and 2018?" + }, + { + "input": "What is the percentage change in the costs associated with relocation and exit from 2018 to 2019?", + "output": "What was the percentage change in the Relocation and exit costs between 2018 and 2019?" + }, + { + "input": "What is the percentage difference in the Relocation and exit costs from 2018 to 2019?", + "output": "What was the percentage change in the Relocation and exit costs between 2018 and 2019?" + }, + { + "input": "What was the change in Foreign income between 2017 and 2018?", + "output": "What was the difference in the amount of income from foreign sources recorded for the years 2017 and 2018?" + }, + { + "input": "What were the net sales in 2019?", + "output": "What was the total value of sales generated in 2019?" + }, + { + "input": "What were the net sales in 2019?", + "output": "What was the total amount of revenue generated from sales in the year 2019?" + }, + { + "input": "\"What was the gross margin percentage for the quarter ending on September 30th?\"", + "output": "What was the gross margin for the Sep-30 quarter?" + }, + { + "input": "\"What was the exact gross margin percentage for the quarter ending on September 30th?\"", + "output": "What was the gross margin for the Sep-30 quarter?" + }, + { + "input": "\"For which specific time periods did the Gross Margin exceed $120,000 thousand?\"", + "output": "Which quarters ended did the Gross Margin exceed $120,000 thousand?" + }, + { + "input": "What was the total value of contract assets in 2019?", + "output": "What were the amount of contract assets in 2019?" + }, + { + "input": "What were the specific values of contract assets recorded in the year 2019?", + "output": "What were the amount of contract assets in 2019?" + }, + { + "input": "What was the difference in the amount of prepaid expenses between the fiscal years 2018 and 2019?", + "output": "What was the change in prepaid expenses between 2018 and 2019?" + }, + { + "input": "What was the difference in the amount of prepaid expenses between the years 2018 and 2019?", + "output": "What was the change in prepaid expenses between 2018 and 2019?" + }, + { + "input": "What was the percentage difference in the variance between Software Solutions and Data and Analytics?", + "output": "What was the difference in the percent variance between Software Solutions and Data and Analytics?" + }, + { + "input": "What was the reported amount of net trade receivables in 2017?", + "output": "What were the net trade receivables as reported in 2017?" + }, + { + "input": "What was the adjustmentments for ASC 606 adoption for net computer software?", + "output": "What adjustments were made for the adoption of ASC 606 in relation to net computer software? Please provide a detailed explanation of the adjustments made for net computer software under ASC 606." + }, + { + "input": "What was the adjustmentments for ASC 606 adoption for net computer software?", + "output": "What were the adjustments made for the adoption of ASC 606 regarding net computer software?" + }, + { + "input": "In which specific years did the total value of Deferred contract costs surpass $30 million?", + "output": "Which years did Deferred contract costs exceed $30 million?" + }, + { + "input": "What was the change in cash equivalents between 2018 and 2019?", + "output": "What was the difference in the amount of cash equivalents reported for the years 2018 and 2019?" + }, + { + "input": "What was the change in cash equivalents between 2018 and 2019?", + "output": "What was the difference in the amount of cash equivalents held between the year 2018 and the year 2019?" + }, + { + "input": "What was the change in Cash between 2018 and 2019?", + "output": "What was the difference in the Cash amount reported in the financial statements of the company between the years 2018 and 2019?" + }, + { + "input": "What was the amount of Equity method investments in 2019?", + "output": "How much money was invested through the equity method in 2019?" + }, + { + "input": "Which years does the table provide information for the EBITDA by segment?", + "output": "For which years does the table contain EBITDA information segmented by different categories?" + }, + { + "input": "Which years does the table provide information for the EBITDA by segment?", + "output": "For which specific years does the table provide EBITDA data segmented by different categories?" + }, + { + "input": "What was the total value of EBITDA (Earnings Before Interest, Taxes, Depreciation, and Amortization) for the year 2018?", + "output": "What was the sum of the EBITDA in 2018?" + }, + { + "input": "\"What was the specific monetary balance amount for Software Solutions in the year 2017?\"", + "output": "What was the balance in 2017 for Software Solutions?" + }, + { + "input": "What was the specific balance amount, in USD, for Software Solutions in the year 2017?", + "output": "What was the balance in 2017 for Software Solutions?" + }, + { + "input": "What was the specific amount of federal income tax expense incurred in the fiscal year of 2019?", + "output": "What was the current federal income tax expense in 2019?" + }, + { + "input": "Which years does the table provide information for depreciation and amortization by segment?", + "output": "For which specific periods does the table offer data regarding depreciation and amortization broken down by segment?" + }, + { + "input": "What was the value of the total assets before the implementation of ASC 606 revenue recognition standards?", + "output": "What were the total assets without the adoption of ASC 606?" + }, + { + "input": "What was the sum of all assets before the implementation of ASC 606?", + "output": "What were the total assets without the adoption of ASC 606?" + }, + { + "input": "How did the adoption of ASC 606 affect the difference between Total Assets and Total Liabilities?", + "output": "What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?" + }, + { + "input": "What is the impact of ASC 606 Adoption on the difference between Total Assets and Total Liabilities?", + "output": "What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?" + }, + { + "input": "What was the percentage change in cash flows from operating activities in 2019 compared to 2018?", + "output": "What was the cash flows provided by operating activities in 2019 as a percentage of the cash flow in 2018?" + }, + { + "input": "For how many consecutive years did the Net increase in cash and cash equivalents remain positive?", + "output": "How many years was the Net increase in cash and cash equivalents positive?" + }, + { + "input": "What was the EBITDA difference specifically recorded between the Software Solutions division and the Data and Analytics division?", + "output": "What was the difference in the EBITDA between Software Solutions and Data and Analytics?" + }, + { + "input": "What was the exact difference in EBITDA (earnings before interest, taxes, depreciation, and amortization) between Software Solutions and Data and Analytics?", + "output": "What was the difference in the EBITDA between Software Solutions and Data and Analytics?" + }, + { + "input": "What specific expenses were included in the category of operating expenses for Corporate and Other?", + "output": "What did operating expenses for Corporate and Other include?" + }, + { + "input": "What was the discrepancy or variance between the overall value of assets and the specific intangible asset of goodwill in the context of data and analytics?", + "output": "What was the difference between the total assets and goodwill from data and analytics?" + }, + { + "input": "What was the difference between Operating expenses and Revenues from Software Solutions?", + "output": "What differentiates Operating expenses and Revenues from Software Solutions? Please provide a detailed explanation comparing the two." + }, + { + "input": "What was the percentage change in the Net earnings margin between 2017 and 2019?", + "output": "What was the percentage difference in the Net earnings margin from 2017 to 2019?" + }, + { + "input": "What was the percentage change in the Net earnings margin between 2017 and 2019?", + "output": "What was the exact percentage difference in the net earnings margin from 2017 to 2019?" + }, + { + "input": "What was the percentage change in revenues between 2018 and 2019?", + "output": "What was the percentage increase or decrease in revenues from 2018 to 2019?" + }, + { + "input": "What was the percentage change in revenues between 2018 and 2019?", + "output": "What was the percentage difference in the amount of money earned in revenue from 2018 to 2019?" + }, + { + "input": "What were the specific prepaid expenses incurred during the year 2018?", + "output": "What were the prepaid expenses in 2018?" + }, + { + "input": "What were the specific categories of Other current assets in the financial statements for the year 2019?", + "output": "What were the Other current assets in 2019?" + }, + { + "input": "What was the change in contract assets between 2018 and 2019?", + "output": "What was the difference in the amount of contract assets from 2018 to 2019?" + }, + { + "input": "How many years did prepaid expenses exceed $40.0 million?", + "output": "For how many years was the amount of prepaid expenses higher than $40.0 million?" + }, + { + "input": "How many years did the operating margin exceed 20.0%?", + "output": "For how many years was the operating margin greater than 20.0%?" + }, + { + "input": "What was the amount of Corporate Services in 2018?", + "output": "What was the specific amount of funds allocated to Corporate Services during the year 2018?" + }, + { + "input": "What was the amount of Corporate Services in 2018?", + "output": "How much did Corporate Services amount to in the year 2018?" + }, + { + "input": "What specific factors or elements were taken into consideration when calculating the debt?", + "output": "What did the calculation for Debt include?" + }, + { + "input": "What was the difference between Total Debt and Total Interest on Debt?", + "output": "What is the distinction between the total amount of debt and the total interest incurred on that debt?" + }, + { + "input": "How many contracts with a value exceeding $100 million were made during the period of 2021-2022?", + "output": "For the period 2021-2022, how many contractual obligations exceeded $100 million?" + }, + { + "input": "What is the dollar difference between the actual cost and the estimated cost for software solutions?", + "output": "What was the dollar variance for software solutions?" + }, + { + "input": "What was the specific revenue generated from the field of Data and Analytics during the year 2017?", + "output": "What was revenue from Data and Analytics in 2017?" + }, + { + "input": "For how many consecutive years has revenue from Data and Analytics surpassed $150 million?", + "output": "How many years did revenue from Data and Analytics exceed $150 million?" + }, + { + "input": "What was the average amount of money earned between the years 2017 and 2018?", + "output": "What was the average total revenue between 2017 and 2018?" + }, + { + "input": "What was the average revenue from Corporate and Other between 2017 and 2018?", + "output": "What was the average revenue generated from the Corporate and Other category during the two-year period encompassing 2017 and 2018?" + }, + { + "input": "What was the change in purchased software between 2018 and 2019?", + "output": "What was the difference in the amount of software that was bought between the years 2018 and 2019?" + }, + { + "input": "For how many years has the internally developed software generated revenue exceeding $800 million?", + "output": "How many years did internally developed software exceed $800 million?" + }, + { + "input": "For how long did internally developed software generate revenue exceeding $800 million?", + "output": "How many years did internally developed software exceed $800 million?" + }, + { + "input": "What was the percentage change in net computer software between 2018 and 2019?", + "output": "What was the percentage increase or decrease in net computer software from 2018 to 2019?" + }, + { + "input": "How many years did Percentage of gross lease receivables exceed 2.0%?", + "output": "For how many years was the percentage of gross lease receivables above 2.0%?" + }, + { + "input": "How many years did Percentage of gross lease receivables exceed 2.0%?", + "output": "For how long has the percentage of gross lease receivables been above 2.0%?" + }, + { + "input": "What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?", + "output": "What is the percentage difference in the allowance for doubtful accounts from 2018 to 2019?" + }, + { + "input": "What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?", + "output": "What was the percentage increase or decrease in the Allowance for doubtful accounts from 2018 to 2019?" + }, + { + "input": "What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region between 2017 and 2018?", + "output": "What was the change for revenue from EMEA between 2017 and 2018?" + }, + { + "input": "What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region from 2017 to 2018?", + "output": "What was the change for revenue from EMEA between 2017 and 2018?" + }, + { + "input": "Why would actual maturities differ from the contractual maturities?", + "output": "Why do the actual time periods until maturity sometimes vary from the specified contractual time periods?" + }, + { + "input": "What is the distinction in amortized cost between debt investments held for less than 1 year compared to those held for a period ranging from 1 year to 5 years?", + "output": "What was the difference between the amortized cost of debt investments that were within 1 year and after 1 year through 5 years?" + }, + { + "input": "What was the total amount of money allocated to the gradual write-off of acquired intangible assets throughout the year 2018?", + "output": "What was the total amortization of purchased intangible assets in 2018?" + }, + { + "input": "What was the difference in the variance in dollars between Available-for-sale debt investments and net Other gains (losses)?", + "output": "What was the variance in dollars between the variance in Available-for-sale debt investments and the variance in net Other gains (losses)?" + }, + { + "input": "What was the percentage change in the net other gains (losses) between 2017 and 2018?", + "output": "What was the percent change in the net amount of other gains (losses) from 2017 to 2018?" + }, + { + "input": "What were the dividends in 2018?", + "output": "What were the dividend payments made in the year 2018?" + }, + { + "input": "What were the dividends in 2018?", + "output": "What was the total amount of dividends paid out in the year 2018?" + }, + { + "input": "What units are being utilized in the table and what purpose do they serve in the given context?", + "output": "What are the units used in the table?" + }, + { + "input": "What was the difference in the balance at the start of the fiscal year in 2017 compared to 2018?", + "output": "What was the change in Balance at beginning of fiscal year between 2017 and 2018?" + }, + { + "input": "What was the difference in the balance at the start of the fiscal year from 2017 to 2018?", + "output": "What was the change in Balance at beginning of fiscal year between 2017 and 2018?" + }, + { + "input": "How does the company typically recommend analyzing their liquidity and capital resources for future periods?", + "output": "How does the company believe that their liquidity and capital resources in future periods should be analyzed?" + }, + { + "input": "What differentiates operating leases with durations of less than 1 year from those lasting 1 to 3 years?", + "output": "What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?" + }, + { + "input": "How did the characteristics of operating leases differ between those with a duration of less than 1 year and those lasting from 1 to 3 years?", + "output": "What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?" + }, + { + "input": "What percentage of the total contractual obligations is represented by the senior notes?", + "output": "What was the total senior notes as a percentage of total contractual obligations?" + }, + { + "input": "What is the period that has the greatest Operating leases?", + "output": "What is the time frame during which Operating leases are most prevalent?" + }, + { + "input": "How does the company manage and minimize the potential financial loss related to credit risk in relation to derivatives?", + "output": "How does the company mitigate credit risk associated with derivatives?" + }, + { + "input": "Which years does the table provide information for the company's outstanding derivatives?", + "output": "For which specific years does the table contain information regarding the outstanding derivatives of the company?" + }, + { + "input": "Which years does the table provide information for the company's outstanding derivatives?", + "output": "For which specific years does the table present information about the company's exceptional derivatives?" + }, + { + "input": "What was the change in the Net investment hedging instruments between 2018 and 2019?", + "output": "What was the difference in the amount of net investment hedging instruments between 2018 and 2019?" + }, + { + "input": "What was the change in the Net investment hedging instruments between 2018 and 2019?", + "output": "What was the difference in the amount of Net investment hedging instruments between 2018 and 2019?" + }, + { + "input": "What was the percentage change in the total amount of outstanding derivatives between 2018 and 2019?", + "output": "What was the percentage difference in the total value of outstanding derivatives from 2018 to 2019?" + }, + { + "input": "How much was the total amount spent on Purchased Considerations specifically for BroadSoft?", + "output": "What was the amount of Purchased Considerations for BroadSoft?" + }, + { + "input": "What were the differences in terms of Purchase Consideration specifically between Springpath and Broadsoft? Please provide additional details and insights to help understand this comparison better.", + "output": "What was the difference in Purchase Consideration between Springpath and Broadsoft?" + }, + { + "input": "What was the specific variance in terms of Purchase Consideration when comparing Springpath and Broadsoft?", + "output": "What was the difference in Purchase Consideration between Springpath and Broadsoft?" + }, + { + "input": "What was the amount of unearned income specifically derived from lease receivables in the year 2019?", + "output": "What was unearned income from lease receivables in 2019?" + }, + { + "input": "\"What was the amount of unearned income generated specifically from lease receivables during the year 2019?\"", + "output": "What was unearned income from lease receivables in 2019?" + }, + { + "input": "What was the difference in the reported total between current and noncurrent financing receivables?", + "output": "How much greater or lesser was the total reported amount of financing receivables between current and noncurrent categories?" + }, + { + "input": "What were the inclusions of foreign exchange transactions and other related activities?", + "output": "What did foreign exchange and other include?" + }, + { + "input": "What was the difference, expressed as a percentage, in the balance of financing receivables at the end of fiscal year 2018 compared to the balance at the end of fiscal year 2019?", + "output": "What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?" + }, + { + "input": "What percentage increase or decrease occurred in the balance of financing receivables at the end of the fiscal year from 2018 to 2019?", + "output": "What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?" + }, + { + "input": "How many years did Revenue exceed $50,000 million?", + "output": "How many consecutive years has the revenue exceeded $50,000 million?" + }, + { + "input": "How many years did Revenue exceed $50,000 million?", + "output": "For how many complete years has the revenue consistently surpassed the $50,000 million mark?" + }, + { + "input": "What is the difference in the amount of Goodwill between Duo and Luxtera and how does it impact their overall value?", + "output": "What was the difference in Goodwill between Duo and Luxtera?" + }, + { + "input": "What were the variations in the level of Goodwill between Duo and Luxtera?", + "output": "What was the difference in Goodwill between Duo and Luxtera?" + }, + { + "input": "What was the discrepancy in the value of purchased intangible assets between Luxtera and other companies?", + "output": "What was the difference in Purchased intangible assets between Luxtera and Others?" + }, + { + "input": "What were the variations in the value of purchased intangible assets specifically between Luxtera and other companies/entities?", + "output": "What was the difference in Purchased intangible assets between Luxtera and Others?" + }, + { + "input": "What is the acquisition with the highest Purchase Consideration?", + "output": "What is the highest purchase consideration acquisition?" + }, + { + "input": "What was the total value of technology products purchased before accounting for any deductions or expenses?", + "output": "What was the amount of gross purchased technology?" + }, + { + "input": "What is the net value difference between Technology and Customer relationships?", + "output": "What was the difference in the net values between Technology and Customer relationships?" + }, + { + "input": "What was the numerical difference in the net values specifically attributed to Technology and Customer relationships in relation to a certain context?", + "output": "What was the difference in the net values between Technology and Customer relationships?" + }, + { + "input": "Which years does the table provide information for the company's activity related to their product warranty liability?", + "output": "For which specific years does the table display data regarding the company's product warranty liability?" + }, + { + "input": "Which years does the table provide information for the company's activity related to their product warranty liability?", + "output": "For which years does the table displayed provide information about the company's activity in terms of their product warranty liability?" + }, + { + "input": "What were the Acquisitions and divestitures in 2019?", + "output": "What were the acquisitions and divestitures that took place in the year 2019?" + }, + { + "input": "What was the difference in the amount allocated for warranty provisions issued in 2017 compared to 2018?", + "output": "What was the change in Provisions for warranties issued between 2017 and 2018?" + }, + { + "input": "What was the difference in the amount allocated for warranties issued between the years 2017 and 2018?", + "output": "What was the change in Provisions for warranties issued between 2017 and 2018?" + }, + { + "input": "For which specific years does the table offer data on cash and cash equivalents as well as investments?", + "output": "Which years does the table provide information for cash and cash equivalents and investments?" + }, + { + "input": "What was the change in the value of available-for-sale debt investments, indicating whether there was an increase or decrease?", + "output": "What was the increase (decrease) in available-for-sale debt investments?" + }, + { + "input": "In 2019, how did the distinction between cash and cash equivalents and available-for-sale debt investments manifest and how were they different from each other?", + "output": "What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?" + }, + { + "input": "What differentiates cash and cash equivalents from available-for-sale debt investments in the financial records for the year 2019?", + "output": "What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?" + }, + { + "input": "What was the change in Interest expense between 2017 and 2018?", + "output": "What is the difference in the amount of interest expense incurred from 2017 to 2018?" + }, + { + "input": "What was the change in Interest expense between 2017 and 2018?", + "output": "What was the difference in the amount of money paid for interest in 2017 compared to 2018?" + }, + { + "input": "What was the difference in the amount of income earned from interest between the years 2018 and 2019?", + "output": "What was the change in Interest income between 2018 and 2019?" + }, + { + "input": "What is the amount of federal net operating loss carryforwards that the company had for income tax purposes in the year 2019?", + "output": "What was the company's federal net operating loss carryforwards for income tax purposes in 2019?" + }, + { + "input": "What was the amount of the company's federal net operating loss carryforwards specifically for income tax purposes in the year 2019?", + "output": "What was the company's federal net operating loss carryforwards for income tax purposes in 2019?" + }, + { + "input": "What was the percentage change in total deferred tax assets between 2018 and 2019?", + "output": "What is the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?" + }, + { + "input": "What was the percentage change in total deferred tax assets between 2018 and 2019?", + "output": "What was the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?" + }, + { + "input": "In fiscal year 2019, what was the total amount of net interest expense recognized by the company?", + "output": "How much net interest expense did the company recognize in fiscal year 2019?" + }, + { + "input": "How much interest expense, after deducting interest income, did the company record as a net amount during fiscal year 2019?", + "output": "How much net interest expense did the company recognize in fiscal year 2019?" + }, + { + "input": "What were the Additions for tax positions of prior years in 2019?", + "output": "What were the additions made to the tax positions of previous years in the year 2019?" + }, + { + "input": "What was the percentage change in net income between 2018 and 2019?", + "output": "What was the percentage difference in net income from 2018 to 2019 and how can it be calculated?" + }, + { + "input": "Why has the direct effect of foreign currency fluctuations on revenue not been material?", + "output": "Why has the direct impact of changes in foreign currency exchange rates on our revenue not had a significant financial effect so far? Please provide an explanation for the lack of material impact despite fluctuations in currency values." + }, + { + "input": "Why has the direct effect of foreign currency fluctuations on revenue not been material?", + "output": "Why has the impact of foreign currency fluctuations on revenue not been significant enough to have a material effect?" + }, + { + "input": "What was the difference in the fair value of forward contracts that were sold between the years 2018 and 2019?", + "output": "What was the change in the fair value of sold forward contracts between 2018 and 2019?" + }, + { + "input": "Why does the company prioritize inventory and supply chain management as a specific area of focus?", + "output": "Why is inventory and supply chain management an area of focus for the company?" + }, + { + "input": "Why does the company prioritize inventory and supply chain management as a focal point for its operations?", + "output": "Why is inventory and supply chain management an area of focus for the company?" + }, + { + "input": "What was the total sum of purchase commitments made in the year 2018?", + "output": "What were the total purchase commitments in 2018?" + }, + { + "input": "What was the change in purchase commitments that were less than 1 year between 2018 and 2019?", + "output": "What was the difference in purchase commitments of less than 1 year between 2018 and 2019?" + }, + { + "input": "What was the change in purchase commitments that were less than 1 year between 2018 and 2019?", + "output": "What was the difference in purchase commitments shorter than one year between the year 2018 and the year 2019?" + }, + { + "input": "For how long did contracts ranging from 1 to 3 years surpass a cumulative worth of $700 million?", + "output": "How many years did commitments that were 1 to 3 years exceed $700 million?" + }, + { + "input": "What was the percentage change in product revenue for the APJC region from 2017 to 2018?", + "output": "What was the change in the product revenue from APJC between 2017 and 2018?" + }, + { + "input": "What was the change in the product revenue from Americas between 2017 and 2018?", + "output": "What was the difference in the amount of revenue generated from products in the Americas region between the years 2017 and 2018?" + }, + { + "input": "What was the numerical difference in the total value of net property and equipment in International regions between the years 2017 and 2018?", + "output": "What was the change in net property and equipment from International regions between 2017 and 2018?" + }, + { + "input": "What is the percentage increase or decrease in the total net property and equipment from 2018 to 2019?", + "output": "What was the percentage change in the total net property and equipment between 2018 and 2019?" + }, + { + "input": "What was the percentage change in revenue generated from security services between 2017 and 2018?", + "output": "What was the change in revenue from security between 2017 and 2018?" + }, + { + "input": "What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?", + "output": "What was the total payment for the shares from April 28, 2019 to May 25, 2019 and from May 26, 2019 to June 22, 2019?" + }, + { + "input": "What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?", + "output": "What was the overall payment for the shares between April 28, 2019 and May 25, 2019, as well as between May 26, 2019 and June 22, 2019?" + }, + { + "input": "What was the average total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?", + "output": "What was the average total payment for shares from April 28, 2019 to June 22, 2019, and how does it compare between the periods April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?" + }, + { + "input": "What was the difference in the balance of Americas between the years 2018 and 2019?", + "output": "What was the change in balance from Americas between 2018 and 2019?" + }, + { + "input": "What was the difference in America's balance between 2018 and 2019 and how did it change?", + "output": "What was the change in balance from Americas between 2018 and 2019?" + }, + { + "input": "Which years does the table provide information for the company's available-for-sale debt investments and equity investments?", + "output": "For which time period does the table provide data on the available-for-sale debt investments and equity investments of the company?" + }, + { + "input": "\"What were the total provisions, also known as benefits, provided in this context?", + "output": "What were the total provisions (benefits)?" + }, + { + "input": "What was the difference between total provisions (benefits) and net recoveries (write-offs)?", + "output": "What differentiates total provisions, also referred to as benefits, from net recoveries, which are write-offs?" + }, + { + "input": "What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?", + "output": "In 2018, what was the proportion or ratio of the allowance for credit loss specifically assigned to financed service contracts compared to the total allowance for credit loss?" + }, + { + "input": "What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?", + "output": "What was the percentage of the total allowance for credit loss in 2018 that was allocated specifically for financed service contracts?" + }, + { + "input": "What is the percentage difference in Loss from continuing operations before income tax between the years 2018 and 2019?", + "output": "What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?" + }, + { + "input": "What is the percentage difference in the Loss from continuing operations before income tax between the years 2018 and 2019?", + "output": "What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?" + }, + { + "input": "What is the percentage change in the income tax benefit from 2018 to 2019?", + "output": "What is the percentage difference in the income tax benefit received between 2018 and 2019?" + }, + { + "input": "In which year did the net loss for the year exceed the net loss of any other year?", + "output": "In which year is there a greater net loss for the year?" + }, + { + "input": "What does the net deferred tax liabilities include?", + "output": "What is included in the calculation of net deferred tax liabilities?" + }, + { + "input": "What is the exact percentage change in the net deferred tax liabilities from 2018 to 2019?", + "output": "What is the percentage change in the net deferred tax liabilities from 2018 to 2019?" + }, + { + "input": "What was the parent entity's current ratio in 2019?", + "output": "What is the current ratio of the parent entity in 2019?" + }, + { + "input": "What was the parent entity's current ratio in 2019? Please provide the numerical value.", + "output": "What is the current ratio of the parent entity in 2019?" + }, + { + "input": "What was the debts to assets ratio of the parent entity in the year 2018? Can you provide the specific ratio value or percentage?", + "output": "What is the debts to assets ratio of the parent entity in 2018?" + }, + { + "input": "What is the debt-to-assets ratio of the parent company for the year 2018?", + "output": "What is the debts to assets ratio of the parent entity in 2018?" + }, + { + "input": "What is the percentage change in the net cash generated or used for investing activities?", + "output": "What is the percentage change in the net cash provided from investing activities?" + }, + { + "input": "What is the change in the net cash provided from operating activities from 2018 to 2019?", + "output": "What is the difference between the net cash provided from operating activities in 2018 and 2019?" + }, + { + "input": "What is the change in the net cash provided from operating activities from 2018 to 2019?", + "output": "What is the difference in the amount of net cash provided from operating activities between 2018 and 2019?" + }, + { + "input": "What is the change in Net change in cash and cash equivalent from 2018 to 2019?", + "output": "What is the difference in the amount of cash and cash equivalents between 2018 and 2019?" + }, + { + "input": "What is the change in Net change in cash and cash equivalent from 2018 to 2019?", + "output": "What is the difference in the net change in cash and cash equivalents between the years 2018 and 2019?" + }, + { + "input": "What was the percentage increase or decrease in the value of current assets from 2018 to 2019?", + "output": "What is the percentage change in current assets from 2018 to 2019?" + }, + { + "input": "What is the current ratio in the year 2019 and what factors should be considered while determining it?", + "output": "What is the current ratio in 2019?" + }, + { + "input": "What was the debts to assets ratio for the year 2019?", + "output": "What is the debts to assets ratio in 2019?" + }, + { + "input": "\"What was the debts to assets ratio for the year 2019?\"", + "output": "What is the debts to assets ratio in 2019?" + }, + { + "input": "What is the purpose and significance of trial commissions, and how are they defined?", + "output": "What are trial commissions?" + }, + { + "input": "\"What is the purpose and definition of trial commissions?\"", + "output": "What are trial commissions?" + }, + { + "input": "\"How does the Group calculate or determine its total income or financial proceeds?\"", + "output": "How does the Group determine its revenue?" + }, + { + "input": "\"How does the Group calculate or establish its revenue? Provide details about the methods or criteria used for determining the Group's revenue.\"", + "output": "How does the Group determine its revenue?" + }, + { + "input": "What is the percentage difference in the amount of upfront fees charged from 2018 to 2019?", + "output": "What is the percentage change in upfront fees from 2018 to 2019?" + }, + { + "input": "What is the percentage difference in upfront fees between 2018 and 2019?", + "output": "What is the percentage change in upfront fees from 2018 to 2019?" + }, + { + "input": "What is the change in the Business combination reserve between 2018 and 2019?", + "output": "What is the monetary difference in the Business combination reserve from 2018 to 2019?" + }, + { + "input": "What is the percentage change in the total reserves from 2018 to 2019?", + "output": "By what percentage did the total reserves increase or decrease from 2018 to 2019?" + }, + { + "input": "What is the percentage increase or decrease in the trail commission asset comparing the values from 2018 and 2019?", + "output": "What is the percentage change in the current trail commission asset from 2018 to 2019?" + }, + { + "input": "What is the percentage change in the cash receipts from 2018 to 2019?", + "output": "What is the percentage increase or decrease in the cash receipts comparing the amounts in 2018 and 2019?" + }, + { + "input": "What are the payroll tax rates for both 2019 and 2018?", + "output": "What is the payroll tax for 2019 and 2018 respectively?" + }, + { + "input": "\"What was the amount of post-tax loss incurred from discontinued operations in the year 2019?\"", + "output": "What is the Post-tax loss of discontinued operations in 2019?" + }, + { + "input": "\"In which year did the revenue increase compared to other years?\"", + "output": "In which year is there a higher revenue?" + }, + { + "input": "In which specific year did the revenue increase, resulting in a higher amount compared to previous years?", + "output": "In which year is there a higher revenue?" + }, + { + "input": "What is the percentage difference in the amount of lease liabilities between 2018 and 2019?", + "output": "What is the percentage change in the current lease liabilities from 2018 to 2019?" + }, + { + "input": "What is the exact percentage difference in the lease liabilities as of 2018 compared to 2019?", + "output": "What is the percentage change in the current lease liabilities from 2018 to 2019?" + }, + { + "input": "What method does the Company use to determine and calculate the amount for its provision?", + "output": "How does the Company calculate its provision?" + }, + { + "input": "How does the Company determine the amount of money set aside for provisions and what factors are taken into consideration during the calculation process?", + "output": "How does the Company calculate its provision?" + }, + { + "input": "What is the percentage change in the contract assets from 2018 to 2019?", + "output": "What is the percentage change in the value of the contract assets from the year 2018 to the year 2019?" + }, + { + "input": "In which year does the number of outstanding shares have a higher value at the start of the period as compared to other years?", + "output": "In which year is the number of outstanding shares at the beginning of the period higher?" + }, + { + "input": "What is the pre-tax discount rate that was utilized to determine the value of projected cash flows for the year 2019?", + "output": "What is the pre-tax discount rate applied to cash flow projections in 2019?" + }, + { + "input": "What was the pre-tax discount rate used to calculate cash flow projections in 2019?", + "output": "What is the pre-tax discount rate applied to cash flow projections in 2019?" + }, + { + "input": "In which year is the health CGU higher?", + "output": "\"In which specific year does the health Comprehensive Grand Unit (CGU) exhibit a higher value compared to other years?\"" + }, + { + "input": "In which year is the car CGU higher?", + "output": "\"In which specific year does the car CGU (Car Gross Utilization) exhibit a higher value compared to other years? Please provide the year.\"" + }, + { + "input": "What is the post-employment benefits in 2018?", + "output": "What were the post-employment benefits in 2018, and can you provide more details about them?" + }, + { + "input": "What were the share-based payment transactions and their impact in 2019?", + "output": "What is the share-based payments in 2019?" + }, + { + "input": "What were the share-based payments in 2019 and could you provide further details on this topic?", + "output": "What is the share-based payments in 2019?" + }, + { + "input": "What is the percentage change in the post-employment benefits from 2018 to 2019?", + "output": "What is the percentage difference in post-employment benefits between the years 2018 and 2019?" + }, + { + "input": "What specific expenses or expenditure categories are included in the unallocated corporate costs for the current year?", + "output": "What do the unalloacated corporate costs include in the current year?" + }, + { + "input": "What is the exact percentage change in revenue in Australia from the year 2018 to the year 2019?", + "output": "What is the percentage change in the revenue in Australia from 2018 to 2019?" + }, + { + "input": "What is the revenue percentage change experienced in Asia between 2018 and 2019?", + "output": "What is the percentage change in the revenue in Asia from 2018 to 2019?" + }, + { + "input": "What was the percentage increase or decrease in revenue in the Asian region from 2018 to 2019?", + "output": "What is the percentage change in the revenue in Asia from 2018 to 2019?" + }, + { + "input": "What was the total number of outstanding stock options for VMware in 2017?", + "output": "What was the outstanding number of shares for VMware stock options in 2017?" + }, + { + "input": "What was the total number of shares allotted for VMware stock options in the year 2017?", + "output": "What was the outstanding number of shares for VMware stock options in 2017?" + }, + { + "input": "What was the number of granted shares from Pivotal Stock Options in 2018?", + "output": "\"How many shares were granted by Pivotal Stock Options during the year 2018?\"" + }, + { + "input": "For how long did the Weighted-Average Exercise Price (per share) for VMware stock options remain above $60.00?", + "output": "How many years did the outstanding Weighted-Average Exercise Price (per share) for VMware stock options exceed $60.00?" + }, + { + "input": "What was the difference in the overall amount of money earned by the company in 2018 compared to 2019?", + "output": "What was the change in total revenue between 2018 and 2019?" + }, + { + "input": "What was the difference in total revenue between the years 2018 and 2019?", + "output": "What was the change in total revenue between 2018 and 2019?" + }, + { + "input": "How many years did total revenue exceed $500 million?", + "output": "For how many consecutive years has the total revenue been greater than $500 million?" + }, + { + "input": "What was the percentage change in net income between 2019 and 2020?", + "output": "What was the percentage difference in net income from 2019 to 2020, and how can we calculate it accurately?" + }, + { + "input": "\"What was the variation in the increase in goodwill associated with mergers and acquisitions from 2019 to 2020?\"", + "output": "What was the change in the Increase in goodwill related to business combinations between 2019 and 2020?" + }, + { + "input": "What was the difference in balance at the start of 2019 and 2020?", + "output": "What was the change in balance at the beginning of the year between 2019 and 2020?" + }, + { + "input": "What was the difference in balance at the beginning of 2019 and 2020?", + "output": "What was the change in balance at the beginning of the year between 2019 and 2020?" + }, + { + "input": "What did the Senior Notes consists of?", + "output": "What were the contents or components of the Senior Notes?" + }, + { + "input": "What was the difference between total Senior Notes and total Notes payable to Dell?", + "output": "What is the distinction between the combined value of Senior Notes and the combined value of Notes payable to Dell?" + }, + { + "input": "What differentiates the total amount of Term Loans from the total amount of Future Lease Commitments?", + "output": "What was the difference between total Term Loans and total Future Lease Commitments?" + }, + { + "input": "What differentiates the total amount of Term Loans from the total value of Future Lease Commitments? Elaborate on the distinctions between these two financial concepts.", + "output": "What was the difference between total Term Loans and total Future Lease Commitments?" + }, + { + "input": "Which specific time periods are covered in the table containing data on long-lived assets categorized by geographic area? These assets primarily consist of net property and equipment.", + "output": "Which years does the table include information for long-lived assets by geographic area, which primarily include property and equipment, net?" + }, + { + "input": "How long has the outstanding balance owed to related parties been surpassing $100 million?", + "output": "How many years did current amounts due to related parties exceed $100 million?" + }, + { + "input": "What was the cumulative amount of sales invoices acknowledged and recorded as revenue throughout the entire fiscal year of 2020?", + "output": "What was the total billings recognized during the year ended 2020?" + }, + { + "input": "\"What was the exact amount of billings that were recorded and acknowledged as revenue throughout the entirety of the year that concluded in 2020?\"", + "output": "What was the total billings recognized during the year ended 2020?" + }, + { + "input": "What was the change in Unearned software maintenance revenue between 2019 and 2020?", + "output": "\"What was the difference in Unearned software maintenance revenue from 2019 to 2020 and how did it change?\"" + }, + { + "input": "What was the change in Unearned software maintenance revenue between 2019 and 2020?", + "output": "What was the difference in Unearned software maintenance revenue from 2019 to 2020?" + }, + { + "input": "What is the percentage of unearned revenue in the total revenue for the years 2019 and 2020?", + "output": "What was the percentage of total unearned revenue between 2019 and 2020?" + }, + { + "input": "\"What was the percentage of unearned revenue as a proportion of the total revenue generated in the years 2019 and 2020?\"", + "output": "What was the percentage of total unearned revenue between 2019 and 2020?" + }, + { + "input": "What was the average price per share in 2018, taking into account the weight assigned to each share?", + "output": "What was the Weighted-average price per share in 2018?" + }, + { + "input": "For how many years did the repurchase of Class A common stock surpass a cumulative value of $10,000 million?", + "output": "How many years did Class A common stock repurchased exceed $10,000 million?" + }, + { + "input": "For how many years did the repurchase of Class A common stock amount to more than $10 billion?", + "output": "How many years did Class A common stock repurchased exceed $10,000 million?" + }, + { + "input": "For which years does the table provide details regarding the exclusion of weighted-average common share equivalents of Class A common stock from the calculations of diluted net income per share?", + "output": "Which years does the table provide information for the weighted-average common share equivalents of Class A common stock that were excluded from the diluted net income per share calculations?" + }, + { + "input": "What were the employee stock options for the year 2019?", + "output": "What was the Employee stock options in 2019?" + }, + { + "input": "What were the employee stock options offered in 2019?", + "output": "What was the Employee stock options in 2019?" + }, + { + "input": "How does Dell engage in the procurement of products and services from other companies?", + "output": "How does Dell purchase products and services from the company?" + }, + { + "input": "What was the percentage change in the internal-use revenue between 2019 and 2020?", + "output": "What is the percentage difference in internal-use revenue from 2019 to 2020?" + }, + { + "input": "Which years does the table provide information for accrued expenses and other?", + "output": "For which specific years does the provided table offer information related to accrued expenses and other similar items?" + }, + { + "input": "Which years does the table provide information for accrued expenses and other?", + "output": "For which specific years does the table contain data regarding accrued expenses and other items?" + }, + { + "input": "What was the difference in the total amount of expenses that accumulated over time between the years 2019 and 2020?", + "output": "What was the change in the total accrued expenses between 2019 and 2020?" + }, + { + "input": "What is the difference in the total amount of expenses that accumulated over time between 2019 and 2020?", + "output": "What was the change in the total accrued expenses between 2019 and 2020?" + }, + { + "input": "In 2019, what were the specific support and administrative costs incurred by Dell's subsidiary?", + "output": "What was the Dell subsidiary support and administrative costs in 2019?" + }, + { + "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?", + "output": "What was the difference in the amount spent on purchasing and leasing products as well as purchasing services from 2018 to 2019?" + }, + { + "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?", + "output": "What was the difference in the amount spent on purchasing and leasing products and purchasing services from 2018 to 2019?" + }, + { + "input": "For how many consecutive years did Dell subsidiary incur support and administrative costs exceeding $200 million?", + "output": "How many years did Dell subsidiary support and administrative costs exceed $200 million?" + }, + { + "input": "What was the percentage change in the Dell subsidiary support and administrative costs between 2019 and 2020?", + "output": "What percentage increase or decrease was observed in the support and administrative costs of Dell's subsidiary from 2019 to 2020?" + }, + { + "input": "What was the significance or meaning of the aggregate intrinsic value?", + "output": "What did the aggregate intrinsic value represent?" + }, + { + "input": "What differentiates outstanding options that are both exercisable and vested as well as expected to vest?", + "output": "What was the difference between outstanding options that were exercisable and vested and expected to vest?" + }, + { + "input": "What was the specific amount allocated for the federal income tax provision in the year 2019?", + "output": "What was the current federal income tax provision in 2019?" + }, + { + "input": "What was the specific provision for federal income tax in 2019?", + "output": "What was the current federal income tax provision in 2019?" + }, + { + "input": "What was the change in the current federal income tax provision between 2018 and 2019?", + "output": "What was the difference in the amount allocated for federal income tax in 2018 compared to 2019?" + }, + { + "input": "What was the percentage change in the Total income tax provision between 2018 and 2019?", + "output": "What was the precise percentage increase or decrease in the Total income tax provision from 2018 to 2019?" + }, + { + "input": "What was the percentage change in the Total income tax provision between 2018 and 2019?", + "output": "What is the percentage difference in the Total income tax provision from 2018 to 2019?" + }, + { + "input": "What was the specific monetary value of Restricted cash categorized under other current assets in the financial records for the year 2019?", + "output": "What was the amount of Restricted cash within other current assets in 2019?" + }, + { + "input": "What were the specific additions made to tax positions related to the current year of 2020 in the field of taxation?", + "output": "What were the additions to tax positions related to current year in 2020?" + }, + { + "input": "What categories are encompassed within the scope of purchases and leases of products, and what does this also include in terms of purchases?", + "output": "What did Purchases and leases of products and purchases of services include?" + }, + { + "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?", + "output": "What was the year-on-year difference in the amounts spent on purchasing and leasing products and purchasing services between 2018 and 2019?" + }, + { + "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?", + "output": "What was the difference in the amount spent on purchasing and leasing products, as well as purchasing services, from 2018 to 2019?" + }, + { + "input": "For how long did the subsidiary of Dell exceed $150 million in annual support and administrative costs?", + "output": "How many years did Dell subsidiary support and administrative costs exceed $150 million?" + }, + { + "input": "What specific expenses or liabilities are encompassed within the current balances owed to affiliated individuals or entities?", + "output": "What did the current amounts due to related parties include?" + }, + { + "input": "What specific items or transactions are included in the present outstanding balances owed to related parties?", + "output": "What did the current amounts due to related parties include?" + }, + { + "input": "For how long has the current outstanding amount owed by related parties been over $1,000 million?", + "output": "How many years did current amount due from related parties exceed $1,000 million?" + }, + { + "input": "For how long has the present outstanding balance owed by affiliated organizations been over $1,000 million?", + "output": "How many years did current amount due from related parties exceed $1,000 million?" + }, + { + "input": "What was the percentage increase or decrease in the net current amount due from related parties from 2019 to 2020?", + "output": "What was the percentage change in the net current amount due from related parities between 2019 and 2020?" + }, + { + "input": "What was the change in Additions to intangible assets between 2019 and 2020?", + "output": "What was the difference in the amount of intangible assets added between the years 2019 and 2020?" + }, + { + "input": "What is the total amount of lease liabilities for operating leases recorded on the current financial statements?", + "output": "What were the current lease liabilities for operating leases?" + }, + { + "input": "What is the total amount of lease liabilities for operating leases as of the present moment?", + "output": "What were the current lease liabilities for operating leases?" + }, + { + "input": "How many lease liabilities for operating leases exceeded $500 million?", + "output": "How many operating lease liabilities were there that exceeded $500 million in amount?" + }, + { + "input": "What differentiates current lease liabilities from non-current lease liabilities in the context of operating leases?", + "output": "What was the difference between current and non-current lease liabilities for operating leases?" + }, + { + "input": "What differentiates current lease liabilities from non-current lease liabilities for operating leases?", + "output": "What was the difference between current and non-current lease liabilities for operating leases?" + }, + { + "input": "What was the discrepancy in the aggregate lease obligations between operating leases and finance leases?", + "output": "What was the difference in total lease liabilities between operating leases and finance leases?" + }, + { + "input": "Which years does the table provide information for unearned revenue?", + "output": "For which specific years does the table contain information regarding unearned revenue?" + }, + { + "input": "Which years does the table provide information for unearned revenue?", + "output": "\"For which specific years does the table provide data on unearned revenue?\"" + }, + { + "input": "What was the percentage change in Unearned subscription and SaaS revenue from 2019 to 2020?", + "output": "What was the change in Unearned subscription and SaaS revenue between 2019 and 2020?" + }, + { + "input": "What was the foreign income in 2018?", + "output": "What was the total amount of income received from foreign sources during the year 2018?" + }, + { + "input": "What is the percentage difference in the total income prior to income tax between the years 2019 and 2020?", + "output": "What was the percentage change in the total income before income tax between 2019 and 2020?" + }, + { + "input": "What was the total value of construction projects underway in 2020 globally or within a specific country/region?", + "output": "What was the amount of construction in progress in 2020?" + }, + { + "input": "What was the difference in the total value of property and equipment from 2019 to 2020?", + "output": "What was the change in Total property and equipment between 2019 and 2020?" + }, + { + "input": "What was the change in the Unrealized losses on available-for-sale securities between 2017 and 2018?", + "output": "What was the difference in the amount of Unrealized losses on available-for-sale securities from 2017 to 2018?" + }, + { + "input": "What was the percentage change in the Reclassification of realized transactions, net of taxes between 2018 and 2019?", + "output": "What percentage represents the difference in the Reclassification of realized transactions, net of taxes between the years 2018 and 2019?" + }, + { + "input": "What impact did the alteration in the balance of net accounts receivable have on the financial situation?", + "output": "What was the effect of change in the net accounts receivable?" + }, + { + "input": "What is the ratio of long-term deferred tax liability to accrued liabilities?", + "output": "What was the Long-term deferred tax liability as a ratio of Accrued liabilities?" + }, + { + "input": "How did the Other interest expense change between the years 2017 and 2018?", + "output": "What was the change in Other interest expense between 2017 and 2018?" + }, + { + "input": "What was the difference between the reported net sales and the reported cost of sales in terms of amount?", + "output": "What was the difference in amount as reported between net sales and cost of sales?" + }, + { + "input": "What was the difference between Net income from continuing operations and Income before income taxes?", + "output": "What is the distinction between net income from continuing operations and income before income taxes? Please provide a comprehensive response, retaining all the details provided in the original question. Keep the question brief, succinct, and in the same language as the original." + }, + { + "input": "What was the impact of stock options and RSUs on dilution in 2019?", + "output": "What was the Dilutive effect of stock options and RSUs in 2019?" + }, + { + "input": "How many consecutive years did the balance at the end of each year surpass $2 million?", + "output": "How many years did the balance at end of the year exceed $2 million?" + }, + { + "input": "For how many years was the balance at the end of each year above $2 million?", + "output": "How many years did the balance at end of the year exceed $2 million?" + }, + { + "input": "How many years did Intercompany prepaid tax asset amortization exceed $7 million?", + "output": "For how many years was the amortization of Intercompany prepaid tax asset greater than $7 million?" + }, + { + "input": "What was the service cost in 2018?", + "output": "What was the cost of the service provided in 2018?" + }, + { + "input": "What was the service cost in 2018?", + "output": "What was the cost of the service in the year 2018 specifically?" + }, + { + "input": "What was the total amount of interest incurred in the year 2019?", + "output": "What was the interest cost in 2019?" + }, + { + "input": "What was the total expense incurred as interest in the year 2019?", + "output": "What was the interest cost in 2019?" + }, + { + "input": "What was the percentage difference in the Discount rate from 2018 to 2019?", + "output": "What was the change in the Discount rate between 2018 and 2019?" + }, + { + "input": "What was the specific difference in the Discount rate from the year 2018 to the year 2019?", + "output": "What was the change in the Discount rate between 2018 and 2019?" + }, + { + "input": "Why could the sums fail to equal the overall total?", + "output": "Why might amounts may not add to the total?" + }, + { + "input": "\"What was the numerical distinction between the total amount obtained from sales after deducting all applicable costs, and the total revenue generated from sales before deducting any expenses or costs?\"", + "output": "What was the difference between the total net sales and gross profit?" + }, + { + "input": "What is the calculated percentage change in the operating income from the third quarter to the fourth quarter?", + "output": "What was the percentage change in the Operating income between the third and fourth quarter?" + }, + { + "input": "What was the change in the cost of sales between 2017 and 2018?", + "output": "How much did the cost of sales change from 2017 to 2018?" + }, + { + "input": "What was the percentage change in the Net deferred tax asset between 2018 and 2019?", + "output": "What was the percentage increase or decrease in the amount of Net deferred tax asset from 2018 to 2019?" + }, + { + "input": "What was the percentage change in the Net deferred tax asset between 2018 and 2019?", + "output": "What was the percentage increase or decrease in the Net deferred tax asset from 2018 to 2019?" + }, + { + "input": "What is the percentage difference in the total balance from 2018 to 2019? Calculate using the formula for percentage change.", + "output": "What was the percentage change in the total balance between 2018 and 2019?" + }, + { + "input": "What was the percentage increase or decrease in the total balance from 2018 to 2019?", + "output": "What was the percentage change in the total balance between 2018 and 2019?" + }, + { + "input": "For how long, in terms of number of years, has the cost of sales exceeded 40% of net sales?", + "output": "How many years did cost of sales of net sales exceed 40%?" + }, + { + "input": "What is the percentage increase or decrease in operating income as a proportion of net sales from 2018 to 2019?", + "output": "What was the percentage change in operating income of net sales between 2018 and 2019?" + }, + { + "input": "What was the exact percentage increase or decrease in operating income of net sales from 2018 to 2019?", + "output": "What was the percentage change in operating income of net sales between 2018 and 2019?" + }, + { + "input": "How many years have there been increases in tax positions related to the current year that exceeded $30 million?", + "output": "How many years did Increases related to current year tax positions exceed $30 million?" + }, + { + "input": "What was the percentage change in the Ending balance between 2018 and 2019?", + "output": "What was the percentage difference in the Ending balance from 2018 to 2019?" + }, + { + "input": "What was the percentage change in the Ending balance between 2018 and 2019?", + "output": "What is the percent increase or decrease in the Ending balance from 2018 to 2019?" + }, + { + "input": "According to what were the plan benefits provided, and to what were they in accordance with?", + "output": "What were plan benefits provided in accordance with?" + }, + { + "input": "In accordance with what, were the plan benefits provided? Can you expand on the specific benefits mentioned in the plan?", + "output": "What were plan benefits provided in accordance with?" + }, + { + "input": "What were the total service costs incurred during the year 2018?", + "output": "What were the service costs in 2018?" + }, + { + "input": "What were the specific costs associated with services provided in 2018?", + "output": "What were the service costs in 2018?" + }, + { + "input": "What were the total interest expenses incurred in the year 2017?", + "output": "What were the interest costs in 2017?" + }, + { + "input": "What was the total amount spent on interest expenses in the year 2017?", + "output": "What were the interest costs in 2017?" + }, + { + "input": "What was the difference in the amount of Amortization of actuarial loss between 2018 and 2019?", + "output": "What was the change in the Amortization of actuarial loss between 2018 and 2019?" + }, + { + "input": "What was the difference in the amount allocated for the gradual reduction of actuarial loss in the financial statements between the years 2018 and 2019?", + "output": "What was the change in the Amortization of actuarial loss between 2018 and 2019?" + }, + { + "input": "What was the percentage change in the net pension period cost between 2018 and 2019?", + "output": "What was the percentage increase or decrease in the net pension period cost from 2018 to 2019?" + }, + { + "input": "What was the percentage change in the net pension period cost between 2018 and 2019?", + "output": "What was the percentage difference in the net pension period cost from 2018 to 2019?" + }, + { + "input": "How was the lower of cost and net realizable value determined as the valuation method for inventories? Please provide a detailed explanation of the process.", + "output": "How were inventories valued at the lower of cost and net realizable value?" + }, + { + "input": "What was the quantity or volume of raw materials used in the year 2018?", + "output": "What was the amount of raw materials in 2018?" + }, + { + "input": "For how long has the value of Finished goods remained above $200 million?", + "output": "How many years did the amount of Finished goods exceed $200 million?" + }, + { + "input": "What was the total value of both Core and developed technology assets combined?", + "output": "What was the gross amount of Core and developed technology assets?" + }, + { + "input": "What was the value assigned to each share granted on a specific date at March 31, 2017, taking into account their respective weights?", + "output": "What was the Weighted Average Grant Date Fair Value for nonvested shares at March 31, 2017?" + }, + { + "input": "What was the percentage increase or decrease in the number of shares that have not yet vested from 2018 to 2019?", + "output": "What was the percentage change in the number of nonvested shares between 2018 and 2019?" + }, + { + "input": "What was the cumulative intrinsic value of options and stock appreciation rights (SARs) exercised throughout the fiscal years ending on March 31, 2019?", + "output": "What was the total intrinsic value of options and SARs exercised during the years ended March 31, 2019?" + }, + { + "input": "In 2019, how many options and SAR (stock appreciation rights) shares were available for exercise?", + "output": "What was the number of option and SAR shares exercisable in 2019?" + }, + { + "input": "Which years does the table provide the number of outstanding shares for?", + "output": "Which specific years are mentioned in the table that provides the information regarding the number of outstanding shares?" + }, + { + "input": "What was the difference in the Weighted Average Exercise Price per Share for outstanding shares between the years 2017 and 2018?", + "output": "What was the change in the Weighted Average Exercise Price per Share for outstanding shares between 2017 and 2018?" + }, + { + "input": "\"What was the calculated percentage difference in the quantity of outstanding shares from 2018 to 2019?\"", + "output": "What was the percentage change in the number of outstanding shares between 2018 and 2019?" + }, + { + "input": "For which specific years does the table provide information on Identifiable long-lived assets (property, plant, and equipment net of accumulated amortization) within different geographic areas?", + "output": "Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?" + }, + { + "input": "Which geographic areas are included in the table that provides information on identifiable long-lived assets, specifically property, plant, and equipment (net of accumulated amortization) for different years?", + "output": "Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?" + }, + { + "input": "What was the total value of assets held in different countries during the year 2019?", + "output": "What was the amount of assets in Various other countries in 2019?" + }, + { + "input": "What was the total value of assets in various countries in 2019?", + "output": "What was the amount of assets in Various other countries in 2019?" + }, + { + "input": "What percentage increase or decrease occurred in the total value of long-lived assets from 2018 to 2019?", + "output": "What was the percentage change in total long-lived assets between 2018 and 2019?" + }, + { + "input": "What was the change in the Deferred expense for State between 2018 and 2019?", + "output": "What is the difference in the amount of Deferred expense for the State from 2018 to 2019?" + }, + { + "input": "What was the change in the Deferred expense for State between 2018 and 2019?", + "output": "What was the difference in the amount of Deferred expense for State between the years 2018 and 2019?" + }, + { + "input": "What is the cumulative balance of all time deposits?", + "output": "What was the total balance time deposits?" + }, + { + "input": "What is the aggregate amount of money held in time deposits at present?", + "output": "What was the total balance time deposits?" + }, + { + "input": "What was the specific balance of Inventories on April 1, 2018, as recorded in the financial records or statements?", + "output": "What was the balance of Inventories on April 1, 2018?" + }, + { + "input": "What was the adjustment made in ASC 606 for other current assets? Please provide details regarding how this adjustment affected the accounting treatment and reporting of other current assets under ASC 606.", + "output": "What was the adjustment from ASC 606 for other current assets?" + }, + { + "input": "How many liabilities had a balance on March 31, 2018 that exceeded $300 million?", + "output": "How many liabilities, as of March 31, 2018, had a balance surpassing $300 million?" + }, + { + "input": "What was the percentage change in Other assets specifically attributed to the adjustments made?", + "output": "What was the percentage change in Other assets due to the adjustments?" + }, + { + "input": "What were the net sales in 2016?", + "output": "What was the total amount of sales generated in the year 2016?" + }, + { + "input": "What was the difference in net sales from 2016 to 2017, and how can this information assist in better answering the question?", + "output": "What was the change in net sales between 2016 and 2017?" + }, + { + "input": "What was the difference in net sales revenue from 2016 to 2017?", + "output": "What was the change in net sales between 2016 and 2017?" + }, + { + "input": "Which years did assets from Thailand exceed $200 million?", + "output": "Which specific years did the total value of assets originating from Thailand surpass $200 million?" + }, + { + "input": "Which years did assets from Thailand exceed $200 million?", + "output": "In which specific years did the total value of assets originating from Thailand surpass $200 million?" + }, + { + "input": "What was the percentage change in the total long-lived assets between 2018 and 2019?", + "output": "What was the percentage difference in the total value of long-lived assets from 2018 to 2019, indicating whether it increased or decreased?" + }, + { + "input": "In which years was the total annual compensation calculated in?", + "output": "What are the specific years for which the total annual compensation was calculated?" + }, + { + "input": "Which geographic locations are taken into account for calculating the overall current tax expense as mentioned in the table?", + "output": "What are the geographic locations in the table considered when calculating the total current tax expense?" + }, + { + "input": "What is the highest recorded tax expense in Germany and in which specific year did it occur?", + "output": "In which year was the current tax expense in Germany the largest?" + }, + { + "input": "What is the highest recorded tax expense in Germany, and in which specific year was it observed?", + "output": "In which year was the current tax expense in Germany the largest?" + }, + { + "input": "What was the amount of non-current assets in APJ in 2019?", + "output": "What was the specific value of non-current assets within the Asia-Pacific-Japan (APJ) region during the year 2019?" + }, + { + "input": "During which specific years were the calculations of Non-Current Assets by Region conducted?", + "output": "In which years were the Non-Current Assets by Region calculated?" + }, + { + "input": "In which year was the amount in Rest of Americas larger?", + "output": "In what specific year did the Rest of Americas region experience a higher amount compared to previous years?" + }, + { + "input": "What was the numerical difference in the quantity of Rest of Americas in 2019 compared to 2018?", + "output": "What was the change in the amount in Rest of Americas in 2019 from 2018?" + }, + { + "input": "What was the difference in the quantity of Rest of Americas in 2019 compared to 2018?", + "output": "What was the change in the amount in Rest of Americas in 2019 from 2018?" + }, + { + "input": "How was the Total expense for the share-based payment plans of Executive Board members determined?", + "output": "How was the total expense for the share-based payment plans determined specifically for the Executive Board members?" + }, + { + "input": "During which specific years is the Total Expense for Share-Based Payment given or provided?", + "output": "In which years is the Total Expense for Share-Based Payment provided?" + }, + { + "input": "For which specific years is the Total Expense for Share-Based Payment given?", + "output": "In which years is the Total Expense for Share-Based Payment provided?" + }, + { + "input": "What is the specific value or quantity for the EMEA region in 2019?", + "output": "What is the amount for EMEA in 2019?" + }, + { + "input": "What is the amount for APJ in 2018?", + "output": "What was the total amount of APJ in the year 2018?" + }, + { + "input": "When did Germany have the highest amount?", + "output": "In which year was the amount for Germany the largest?" + }, + { + "input": "When did Germany experience its highest amount?", + "output": "In which year was the amount for Germany the largest?" + }, + { + "input": "What is the specific monetary figure spent on employee benefits in the year 2019?", + "output": "What is the amount of employee benefits expenses in 2019?" + }, + { + "input": "What was the percentage difference in the amount spent on pensions in 2019 compared to 2018?", + "output": "What was the percentage change in Pension expenses in 2019 from 2018?" + }, + { + "input": "What was the total cost of services offered in the year 2019?", + "output": "What was the Cost of services in 2019?" + }, + { + "input": "What was the specific cost of services rendered in the year 2019?", + "output": "What was the Cost of services in 2019?" + }, + { + "input": "\"When did the Cost of services reach its highest value?\"", + "output": "In which year was Cost of services largest?" + }, + { + "input": "In which specific year did the Cost of services reach its maximum value, considering all available data?", + "output": "In which year was Cost of services largest?" + }, + { + "input": "What was the change in issued capital in 2019 from 2018?", + "output": "What was the net change in the amount of issued capital between the years 2018 and 2019?" + }, + { + "input": "What was the change in issued capital in 2019 from 2018?", + "output": "What was the difference in the amount of issued capital between 2019 and 2018? How much did the issued capital change from 2018 to 2019?" + }, + { + "input": "What is the percentage of unused tax losses in 2019 that can be attributed to state tax loss carryforwards in the United States?", + "output": "How much of unused tax losses relate to U.S. state tax loss carryforwards in 2019?" + }, + { + "input": "In which year was the amount Expiring after the following year the largest?", + "output": "In what year did the highest amount of expenses expire after the subsequent year?" + }, + { + "input": "What is the meaning and scope of the rights associated with Bill McDermott?", + "output": "What do the rights for Bill McDermott refer to? " + }, + { + "input": "What specific types of entitlements are included in the table when referring to annual pension entitlements?", + "output": "What kind of entitlements are annual pension entitlements as displayed in the table?" + }, + { + "input": "Which specific years are the annual pension entitlements fully and legally secured to an individual?", + "output": "In which years are the annual pension entitlements vested?" + }, + { + "input": "What was the profit before tax in 2019?", + "output": "What was the pre-tax profit for the fiscal year of 2019?" + }, + { + "input": "What was the profit before tax in 2019?", + "output": "What was the pre-tax profit for the year 2019?" + }, + { + "input": "\"What was the specific tax rate applicable in 2018 for individuals or businesses in a particular country?\"", + "output": "What was the applicable tax rate in 2018?" + }, + { + "input": "What was the percentage change in Other in 2019 from 2018?", + "output": "What was the percentage difference in the category labeled \"Other\" between the years 2018 and 2019?" + }, + { + "input": "What market is the information specifically targeting and representing for Airtel?", + "output": "Which market does the information reflect for Airtel?" + }, + { + "input": "What is the percentage increase in the number of mobile customers for AIS over a specific period?", + "output": "What is the % growth of mobile customers for AIS?" + }, + { + "input": "Which associate had the best % growth in mobile customers?", + "output": "\"Which associate experienced the highest percentage growth in the number of mobile customers over a specific period of time?\"" + }, + { + "input": "What specific components were encompassed within the exceptional items during the fiscal year 2018?", + "output": "What was included in the exceptional items in FY2018?" + }, + { + "input": "What specific items were considered exceptional in the financial year 2018?", + "output": "What was included in the exceptional items in FY2018?" + }, + { + "input": "What was the amount of cash generated from operations after accounting for capital expenditures and operating expenses in the specified year?", + "output": "What was the free cash flow for the year?" + }, + { + "input": "What is the % change in EBITDA margin from 2018 to 2019?", + "output": "What is the percentage difference in EBITDA margin between 2018 and 2019?" + }, + { + "input": "What is the % change in EBITDA margin from 2018 to 2019?", + "output": "What is the percentage change in EBITDA margin from the year 2018 to the year 2019?" + }, + { + "input": "What is the absolute difference in net profit between 2018 and 2019?", + "output": "What is the change in net profit from 2018 to 2019 in absolute numbers?" + }, + { + "input": "What does the line item labeled as 'Others' provide information about in a given context or dataset?", + "output": "What information does the line item 'Others' relate to?" + }, + { + "input": "What is the average total amount owed in debt for the duration of the two-year period?", + "output": "What is the average gross debt across the 2 years?" + }, + { + "input": "Why does this net deferred gain balance exist?", + "output": "Why is there a net deferred gain balance?" + }, + { + "input": "What is the reason for the continued presence of a net deferred gain balance after Singtel completed the sale of its entire 100% stake in NLT to NetLink NBN Trust in July 2017?", + "output": "Why is there still a balance of net deferred gain since Singtel sold its 100% interest in NLT to NetLink NBN Trust in July 2017?" + }, + { + "input": "What was the year when the net deferred gain balance reached its peak?", + "output": "In which year was the net deferred gain balance the highest?" + }, + { + "input": "What is the subject matter or main focus mentioned in note 22?", + "output": "What is the topic of note 22?" + }, + { + "input": "What is included in the \"Others\" category in the table?", + "output": "What does the line item \"Others\" in the table encompass?" + }, + { + "input": "What is the Singtel's largest joint venture in terms of the proportion of the Group's ownership?", + "output": "Which is the largest joint venture of Singtel, in terms of the proportion of Group's ownership?" + }, + { + "input": "Do the non-audit services offered by KPMG LLP have an impact on their independence in conducting audits?", + "output": "Does the non-audit services provided by KPMG LLP affect their independence?" + }, + { + "input": "What is included in the compensation for key management personnel?", + "output": "What does key management personnel compensation comprise?" + }, + { + "input": "What is included in the compensation package for key management personnel?", + "output": "What does key management personnel compensation comprise?" + }, + { + "input": "What are the components included in directors' remuneration?", + "output": "What does directors' remuneration comprise of?" + }, + { + "input": "To what topic does the content of note 18.1 correspond?", + "output": "What is the subject matter of note 18.1?" + }, + { + "input": "What percentage of the company's derivative financial liabilities are classified as non-current liabilities disclosed in their financial statements?", + "output": "How many % of the company's derivative financial liabilities are being disclosed as non-current?" + }, + { + "input": "What is Note 21 about?", + "output": "What is the topic of note 21?" + }, + { + "input": "Can you please provide details on the terms and conditions of the advances made by the shareholders?", + "output": "What are the terms of the shareholders' advances?" + }, + { + "input": "\"What are the specific terms and conditions associated with the advances provided to shareholders?\"", + "output": "What are the terms of the shareholders' advances?" + }, + { + "input": "How many factors need to be considered when calculating the balance for subsidiaries?", + "output": "How many factors are involved in calculating the balance for subsidiaries?" + }, + { + "input": "\"What is typically included in the category of selling and administrative costs?\"", + "output": "What does selling and administrative costs include?" + }, + { + "input": "How many distinct categories of operating expenses exist?", + "output": "How many different type of operating expenses are there?" + }, + { + "input": "What is the mean value of the three highest subcategories within operating expenses for the year 2019?", + "output": "What is the average of the top 3 operating expenses subcategories in 2019?" + }, + { + "input": "What is the average value of the three highest subcategories of operating expenses in the year 2019?", + "output": "What is the average of the top 3 operating expenses subcategories in 2019?" + }, + { + "input": "What are the projected maximum future payouts for Barry Litwin and Thomas Clark under the NEO plan of 2019?", + "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?" + }, + { + "input": "What is the maximum estimated future payout amount under the 2019 NEO plan specifically designated for Barry Litwin and Thomas Clark?", + "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?" + }, + { + "input": "What are the projected highest expected future payouts under the 2019 NEO plan for both Manoj Shetty and Lawrence Reinhold?", + "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Manoj Shetty and Lawrence Reinhold?" + }, + { + "input": "What are the audit fees incurred by the company in 2018 and 2019?", + "output": "What were the audit fees that the company had to pay in the years 2018 and 2019?" + }, + { + "input": "What is the percentage difference in the total amount of fees paid by the company for all other expenses between the years 2018 and 2019?", + "output": "What is the percentage change in all other fees incurred by the company between 2018 and 2019?" + }, + { + "input": "What percentage of common stock does Barry Litwin and Robert D. Rosenthal collectively own?", + "output": "What is the percent of common stock owned by Barry Litwin and Robert D. Rosenthal?" + }, + { + "input": "What is the total compensation received by Robert D. Rosenthal and Chad M. Lindbloom respectively during fiscal 2019?", + "output": "What were the individual total compensations received by Robert D. Rosenthal and Chad M. Lindbloom in fiscal year 2019?" + }, + { + "input": "What is the total compensation received by Paul S. Pearlman and Lawrence Reinhold respectively during fiscal 2019?", + "output": "What were the individual total compensations earned by Paul S. Pearlman and Lawrence Reinhold in fiscal year 2019?" + }, + { + "input": "What is the definition and significance of stock awards?", + "output": "What does stock awards refer to?" + }, + { + "input": "What are the Thomas Clark's accelerated vesting of stock options and unvested performance restricted stock units respectively?", + "output": "Can you please provide specific details about the accelerated vesting of stock options of Thomas Clark? Additionally, what are the details regarding the unvested performance restricted stock units held by Thomas Clark?" + }, + { + "input": "What is the meaning or definition of the \"other\" description in the allowance for sales return in the year 2017?", + "output": "What does the \"other\" description in allowance for sales return in 2017 refer to?" + }, + { + "input": "What is the cumulative amount of allowances for sales returns write-offs from 2017 to 2019?", + "output": "What is the total allowances for sales returns write-offs between 2017 to 2019?" + }, + { + "input": "What is the total amount of allowances accounted for sales returns and write-offs from 2017 to 2019?", + "output": "What is the total allowances for sales returns write-offs between 2017 to 2019?" + }, + { + "input": "What is the percentage difference in the allowance for deferred tax assets at the end of the period in 2018 compared to 2019?", + "output": "What is the percentage change in the allowance for deferred tax assets at the end of period between 2018 and 2019?" + }, + { + "input": "What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?", + "output": "What is the accelerated vesting schedule for Manoj Shetty's stock options? Additionally, how many unvested performance restricted stock units does he currently have?" + }, + { + "input": "What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?", + "output": "What is the accelerated vesting timeline for Manoj Shetty's stock options? Additionally, what is the status of his unvested performance restricted stock units?" + }, + { + "input": "What is the difference in consolidated gross profits comparing 2019 to 2018, and also 2018 to 2017?", + "output": "What is the change in consolidated gross profits between 2019 vs 2018 and 2018 vs 2017 respectively?" + }, + { + "input": "What is the total consolidated net sales in 2019 and 2018?", + "output": "What were the combined net sales for the fiscal years of 2019 and 2018?" + }, + { + "input": "What is the total consolidated net sales in 2017 and 2018", + "output": "What were the combined net sales for the years 2017 and 2018?" + }, + { + "input": "What is the total consolidated net sales in 2017 and 2018", + "output": "What were the combined net sales for 2017 and 2018?" + }, + { + "input": "What is the change in consolidated gross profit between 2017 and 2018?", + "output": "What is the difference in consolidated gross profit for the company between the years 2017 and 2018, and how does the value change during this period?" + }, + { + "input": "What is the change in consolidated gross profit between 2017 and 2018?", + "output": "What is the specific difference in consolidated gross profit when comparing the financial years of 2017 and 2018?" + }, + { + "input": "What was the percentage of the total population or total something specific (specify) from Singapore in the year 2019?", + "output": "What was the percentage of total from Singapore in 2019?" + }, + { + "input": "What was the average amount of money sent from Singapore in the years 2018 and 2019?", + "output": "What was the average amount from Singapore in 2018 and 2019?" + }, + { + "input": "What was the average amount of money, in Singapore currency, that was received from Singapore in the years 2018 and 2019 combined?", + "output": "What was the average amount from Singapore in 2018 and 2019?" + }, + { + "input": "How is the fair value of a financial instrument of a company determined and defined?", + "output": "How is the fair value of the company's financial instrument defined?" + }, + { + "input": "In what specific year did the interest rate swap fall below the threshold of 1,000 thousands?", + "output": "In which year was interest rate swap less than 1,000 thousands?" + }, + { + "input": "In which specific year did the interest rate swap fall below one million in thousands?", + "output": "In which year was interest rate swap less than 1,000 thousands?" + }, + { + "input": "What are the three levels of subjectivity and could you provide more information to assist in providing a comprehensive answer?", + "output": "What are the 3 levels of subjectivity?" + }, + { + "input": "What are the 3 distinct levels of subjectivity and how can they be described or defined?", + "output": "What are the 3 levels of subjectivity?" + }, + { + "input": "\"What were the additional features or updates introduced in the year 2019? Similarly, what were the new additions or improvements made in 2018?\"", + "output": "What were the other additions in 2019 and 2018 respectively?" + }, + { + "input": "\"What are the average number of other additions made in 2018 and 2019?\"", + "output": "What is the average other additions for 2018 and 2019?" + }, + { + "input": "\"What is the average number of non-inclusive additions made in the years 2018 and 2019 combined?\"", + "output": "What is the average other additions for 2018 and 2019?" + }, + { + "input": "In what specific year did the transfer of developed technology from IPR&D exceed an amount greater than 4,000 thousands?", + "output": "In which year was Transfers to developed technology from IPR&D greater than 4,000 thousands?" + }, + { + "input": "What were the average Selling, General, and Administrative expenses for the financial years ending on December 31, 2019 and 2018?", + "output": "What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?" + }, + { + "input": "\"What is the average amount spent on Selling, general and administrative expenses for the Years Ended December 31, 2019 and 2018?\"", + "output": "What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?" + }, + { + "input": "What was the percentage decrease in the Selling, general and administrative expenses during the year 2019?", + "output": "What was the decrease in Selling, general and administrative in 2019?" + }, + { + "input": "What was the number of shares granted in 2019?", + "output": "How many shares were awarded in the year 2019?" + }, + { + "input": "Define research and development activities.", + "output": "What are research and development activities?" + }, + { + "input": "What was the average amount spent on net revenue for the years ending on December 31, 2019 and 2018?", + "output": "What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?" + }, + { + "input": "What is the average Cost of net revenue for the period spanning from December 31, 2018, to December 31, 2019?", + "output": "What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?" + }, + { + "input": "In which year was Restructuring expense 0 thousands?", + "output": "In which specific year did the Restructuring expense amount to zero thousands (thousands referring to the unit) without any incurred costs?" + }, + { + "input": "In which year was Restructuring expense 0 thousands?", + "output": "What was the specific year when the Restructuring expense amounted to 0 thousands?" + }, + { + "input": "\"What were the transfers of developed technology from IPR&D in the years 2019 and 2018 respectively?\"", + "output": "What were the Transfers to developed technology from IPR&D in 2019 and 2018 respectively?" + }, + { + "input": "What was the change in the Transfers to developed technology from IPR&D from 2018 to 2019?", + "output": "What was the difference in the amount of transfers to developed technology from the Intangible Property, Research, and Development (IPR&D) category between the years 2018 and 2019?" + }, + { + "input": "In which year was Transfers to developed technology from IPR&D negative?", + "output": "In what specific year did Transfers to developed technology from IPR&D show a negative value?" + }, + { + "input": "What is the average Short-term restricted cash for 2018 and 2019?", + "output": "What is the average amount of Short-term restricted cash for the years 2018 and 2019?" + }, + { + "input": "What was the average Net cash provided by operating activities from December 31, 2018 to December 31, 2019?", + "output": "What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?" + }, + { + "input": "What is the average amount of cash generated from operating activities, received or used, for the period spanning from December 31, 2018 to December 31, 2019?", + "output": "What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?" + }, + { + "input": "What was the respective Net cash used in investing activities in 2019 and 2018?", + "output": "What were the net amounts of cash used in investing activities for the years 2019 and 2018 respectively?" + }, + { + "input": "What was the respective Net cash used in investing activities in 2019 and 2018?", + "output": "\"What was the amount of net cash utilized in investing activities during the years 2019 and 2018?\"" + }, + { + "input": "What was the net value of long-term debt in 2019 after deducting any related expenses or liabilities?", + "output": "What was the Net carrying amount of long-term debt in 2019?" + }, + { + "input": "What was the specific amount of long-term debt after deducting all necessary expenses from the total amount in 2018?", + "output": "What was the Net carrying amount of long-term debt in 2018?" + }, + { + "input": "What was the specific amount of long-term debt, after deducting any reductions or write-offs, recorded on the financial statements for the year 2018?", + "output": "What was the Net carrying amount of long-term debt in 2018?" + }, + { + "input": "What is the mean value of the unamortized debt discount for the fiscal years 2018 and 2019?", + "output": "What is the average Unamortized debt discount for 2018 and 2019?" + }, + { + "input": "When was the year when the net carrying amount of long-term debt was less than 210,000 thousands?", + "output": "In which year was Net carrying amount of long-term debt less than 210,000 thousands?" + }, + { + "input": "What is the average working capital for the period from December 31, 2018 to December 31, 2019?", + "output": "What is the average Working capital for December 31, 2019 to 2018?" + }, + { + "input": "What is the average Cash and cash equivalents for December 31, 2019 to 2018?", + "output": "What is the average amount of Cash and cash equivalents for the period from December 31, 2018 to December 31, 2019?" + }, + { + "input": "What is the average Cash and cash equivalents for December 31, 2019 to 2018?", + "output": "What is the average amount of Cash and cash equivalents as of December 31 for the years 2019 and 2018 combined?" + }, + { + "input": "What is the eligibility of Performance-based restricted stock to vest?", + "output": "What criteria must be met for Performance-based restricted stock to become vested and eligible for acquisition?" + }, + { + "input": "What were the adjustments in 2018?", + "output": "\"What specific adjustments, if any, occurred during the year 2018?\"" + }, + { + "input": "When was the year when the ending balance fell below $240,000,000?", + "output": "In which year was the ending balance less than 240,000 thousands?" + }, + { + "input": "What was the total amount of expenses incurred during the period from January 1 to December 31, 2018, after accounting for all deductions and offsets?", + "output": "What was the net expense in the year ended December 31, 2018?" + }, + { + "input": "What was the total amount of expenditures for the financial year that concluded on December 31, 2018?", + "output": "What was the net expense in the year ended December 31, 2018?" + }, + { + "input": "What was the total amount of expenses incurred during the period from January 1 to December 31, 2019?", + "output": "What was the net expense in the year ended December 31, 2019?" + }, + { + "input": "What was the total expenditure for the period that concluded on December 31, 2019?", + "output": "What was the net expense in the year ended December 31, 2019?" + }, + { + "input": "What was the percentage increase in the cost of services in 2019?", + "output": "How much did Cost of services increase in 2019?" + }, + { + "input": "What was the percentage increase in the cost of services in the year 2019 compared to the previous year?", + "output": "How much did Cost of services increase in 2019?" + }, + { + "input": "What was the Less Capital expenditures (including capitalized software) in 2019?", + "output": "What was the total amount spent on Less Capital Expenditures, including expenditure on capitalized software, during the year 2019?" + }, + { + "input": "What was the Less Capital expenditures (including capitalized software) in 2019?", + "output": "\"What was the total amount of capital expenditures, including capitalized software, that occurred in the year 2019?\"" + }, + { + "input": "What was the free cash flow in 2019?", + "output": "What was the amount of free cash flow generated in the year 2019?" + }, + { + "input": "What was the average free cash flow for 2018 and 2019?", + "output": "What was the average amount of free cash flow generated for the years 2018 and 2019?" + }, + { + "input": "What was the percentage change in the Less Capital expenditures from 2018 to 2019?", + "output": "What was the percentage difference in the amount spent on capital expenditures between 2018 and 2019?" + }, + { + "input": "What was the exact total value of the assets owned by a particular entity/company during the year 2019?", + "output": "What was the Total assets in 2019?" + }, + { + "input": "What was the total value of assets in the year 2019?", + "output": "What was the Total assets in 2019?" + }, + { + "input": "What was the average amount of operating income recorded during the period from 2015 to 2019?", + "output": "What was the average operating income for 2015-2019?" + }, + { + "input": "What was the change in the Per common share – basic from 2018 to 2019?", + "output": "What was the exact change in the Per common share – basic value from 2018 to 2019? Please provide the specific numerical difference between the two years." + }, + { + "input": "What were the amounts of Service and other revenues for the years ended December 31, 2019 and 2018?", + "output": "How much did Service and other revenues amounted for years ended December 31, 2019 and 2018 respectively?" + }, + { + "input": "What were the respective amounts of Service and other revenues for the years ended December 31, 2019 and 2018?", + "output": "How much did Service and other revenues amounted for years ended December 31, 2019 and 2018 respectively?" + }, + { + "input": "\"What were the respective revenue amounts generated from Wireless equipment for the years ending December 31, 2019 and 2018?\"", + "output": "How much did Wireless equipment revenues amounted for years ended December 31, 2019 and 2018 respectively?" + }, + { + "input": "What was the total amount of revenue generated from Wireless equipment during the years ending December 31, 2019 and 2018?", + "output": "How much did Wireless equipment revenues amounted for years ended December 31, 2019 and 2018 respectively?" + }, + { + "input": "What was the percentage change in the value of Small and Medium Businesses from 2018 to 2019?", + "output": "What is the change in Small and Medium Business value from 2018 to 2019?" + }, + { + "input": "What is the Consolidated Net Income in 2019?", + "output": "What is the Consolidated Net Income for the year 2019?" + }, + { + "input": "What is the Consolidated Net Income in 2019?", + "output": "What is the exact amount of Consolidated Net Income recorded in the fiscal year of 2019?" + }, + { + "input": "What is the difference in Consolidated Adjusted EBITDA between 2018 and 2019?", + "output": "What is the change in Consolidated Adjusted EBITDA from 2018 to 2019?" + }, + { + "input": "What was the net change in accumulated other comprehensive income for the year 2016?", + "output": "What was the net increase to accumulated other comprehensive income in 2016?" + }, + { + "input": "What is the difference in the amount of projected employee benefits that a company is obligated to pay, as of 2018 compared to 2019?", + "output": "What is the change in the projected benefit obligation from 2018 to 2019?" + }, + { + "input": "\"What was the specific percentage of income tax that individuals had to pay during the tax year of 2018?\"", + "output": "What was the effective income tax rate for the period 2018?" + }, + { + "input": "What is the percentage difference in the effective income tax rate between 2018 and 2019?", + "output": "What is the change in Effective income tax rate from 2018 to 2019?" + }, + { + "input": "What specific amount, included in the year 2018, would have a positive impact on the effective income tax rate?", + "output": "What was the amount that would favorably affect the effective income tax rate if included in 2018?" + }, + { + "input": "What specific amount, included in 2017, would positively impact the effective income tax rate?", + "output": "What was the amount that would favorably affect the effective income tax rate if included in 2017?" + }, + { + "input": "What specific amount, if included in the 2017 earnings, would have a positive impact on the effective income tax rate?", + "output": "What was the amount that would favorably affect the effective income tax rate if included in 2017?" + }, + { + "input": "How did the Additions based on tax positions related to the current year change from 2018 to 2019?", + "output": "What was the change in the Additions based on tax positions related to the current year from 2018 to 2019?" + }, + { + "input": "What was the average Additions for tax positions of prior years for 2017-2019?", + "output": "What were the average additions made for tax positions in the previous years, specifically from 2017 to 2019?" + }, + { + "input": "What was the total long-term debt?", + "output": "\"What was the precise amount of long-term debt accrued by the entity in question, taking into account all outstanding obligations over an extended period of time?\"" + }, + { + "input": "What was the value of the long-term debt that was payable within less than 1 year?", + "output": "What was the long-term debt less than 1 year?" + }, + { + "input": "What was the exact amount of the finance lease obligation in its entirety?", + "output": "What was the total finance lease obligation?" + }, + { + "input": "What is the difference between the long-term debt due less than 1 year and 1 to 3 years?", + "output": "What distinguishes long-term debt due within less than one year from long-term debt due within one to three years?" + }, + { + "input": "What is the difference between the long-term debt due less than 1 year and 1 to 3 years?", + "output": "What differentiates long-term debt with a maturity of less than 1 year from debt with a maturity ranging from 1 to 3 years?" + }, + { + "input": "By what percentage did Consumer's total operating revenues increase in the year 2019?", + "output": "How much did Consumer’s total operating revenues increase in 2019?" + }, + { + "input": "What was the percentage increase in Consumer's total operating revenues in 2019 compared to the previous year?", + "output": "How much did Consumer’s total operating revenues increase in 2019?" + }, + { + "input": "What was the percentage increase in Service Revenue for the year 2019 compared to the previous year?", + "output": "How much did Service Revenue increase in 2019?" + }, + { + "input": "What is the percentage change in revenue from wireless equipment sales between 2018 and 2019?", + "output": "What is the change in Wireless equipment revenue from 2018 to 2019?" + }, + { + "input": "By what percentage did the consolidated revenue increase from 2018 to 2019?", + "output": "What was the increase in the consolidated revenue from 2018 to 2019?" + }, + { + "input": "What was the percentage increase/decrease in consumer revenue from 2018 to 2019?", + "output": "What was the change in the consumer revenue from 2018 to 2019?" + }, + { + "input": "What is the source of financing for the firm's capital spending requirements?", + "output": "How is the capital spending requirements for the firm financed?" + }, + { + "input": "How does the firm obtain financing for its capital spending needs?", + "output": "How is the capital spending requirements for the firm financed?" + }, + { + "input": "What was the average cash flow used in investing activities for 2018 and 2019?", + "output": "What was the average amount of cash used in investing activities during the years 2018 and 2019?" + }, + { + "input": "What was the percentage change in cash flow used in financing activities from 2018 to 2019?", + "output": "What was the percentage change in cash flow utilized for financing activities between the fiscal years 2018 and 2019?" + }, + { + "input": "What was the percentage change in cash flow used in financing activities from 2018 to 2019?", + "output": "What was the exact percentage of increase or decrease in cash flow used for financing activities between 2018 and 2019?" + }, + { + "input": "What was the percentage difference in the cost of services between 2018 and 2019?", + "output": "What was the change in the cost of services from 2018 to 2019?" + }, + { + "input": "What were the adopted standards on January 1, 2018?", + "output": "Which Standards were Adopted on January 1, 2018?" + }, + { + "input": "What is the change in Accumulated other comprehensive income from December 31, 2017 to January 1, 2018?", + "output": "What is the difference in the amount of Accumulated Other Comprehensive Income between December 31, 2017 and January 1, 2018?" + }, + { + "input": "What is the net change in Noncontrolling interests between December 31, 2017, and January 1, 2018?", + "output": "What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?" + }, + { + "input": "What was the difference in Noncontrolling interests' value between December 31, 2017, and January 1, 2018?", + "output": "What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?" + }, + { + "input": "What was the ratio of the amortized cost due within one year or less to the fair value for the same period?", + "output": "What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?" + }, + { + "input": "What was the ratio between the amortized cost to be paid within one year or less and the fair value for the same period?", + "output": "What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?" + }, + { + "input": "What is the specific difference between the amortized cost and fair value, at the end of five years and at the end of ten years?", + "output": "What was the difference between the amortized cost and fair value that was due after five years through ten years?" + }, + { + "input": "Which specific years can be found in the table that offer additional details pertaining to the stock options of the company?", + "output": "Which years does the table provide Additional information related to the company's stock options?" + }, + { + "input": "What was the specific intrinsic value associated with exercises in the year 2019?", + "output": "What was the intrinsic value of exercises in 2019?" + }, + { + "input": "What were the Proceeds received from exercises in 2018?", + "output": "What was the total amount of money received from exercises held in 2018?" + }, + { + "input": "What were the Proceeds received from exercises in 2018?", + "output": "What was the total amount of money received from exercises in the year 2018?" + }, + { + "input": "What was the gross income in 2017, prior to deducting income taxes?", + "output": "What was the total income before income taxes in 2017?" + }, + { + "input": "What was the change in foreign income before income taxes between 2017 and 2018?", + "output": "What was the difference in the amount of income from foreign sources prior to income taxes, comparing the years 2017 and 2018? Please provide the change in figures for better understanding." + }, + { + "input": "What was the specific difference in foreign income, without considering income taxes, from 2018 to 2019?", + "output": "What was the change in foreign income before income taxes between 2018 and 2019?" + }, + { + "input": "What were the short-term investment options available in the year 2018?", + "output": "What were the short-term investments in 2018?" + }, + { + "input": "What were the types of short-term investments that were prevalent in 2018?", + "output": "What were the short-term investments in 2018?" + }, + { + "input": "What years does the table provide information for total assets?", + "output": "For which time period does the table provide data on total assets?" + }, + { + "input": "What years does the table provide information for total assets?", + "output": "For which years does the table present data regarding the total assets?" + }, + { + "input": "What was the total debt in 2015?", + "output": "What amount of debt, in terms of total outstanding obligations, was recorded in the year 2015?" + }, + { + "input": "What was the change in working capital between 2015 and 2016?", + "output": "What was the difference in the amount of working capital between 2015 and 2016?" + }, + { + "input": "By what percentage did the Total stockholders' equity change from 2018 to 2019?", + "output": "What was the percentage change in Total stockholders' equity between 2018 and 2019?" + }, + { + "input": "Which years does the table provide information for the funded status of the company's postretirement health care and other defined benefit plans?", + "output": "For which specific years does the provided table present data relating to the funded status of the company's postretirement health care and other defined benefit plans?" + }, + { + "input": "What was the specific amount of fair value attributed to the plan assets during the year 2019?", + "output": "What was the fair value of plan assets in 2019?" + }, + { + "input": "What was the fair value measurements using Level 2 for Corporate Bonds?", + "output": "What are the fair value measurements using Level 2 for Corporate Bonds?" + }, + { + "input": "What was the combined value of cash, cash equivalents, and short-term investments?", + "output": "What were the total cash, cash equivalents and short-term investments?" + }, + { + "input": "What was the sum of all cash, cash equivalents, and short-term investments?", + "output": "What were the total cash, cash equivalents and short-term investments?" + }, + { + "input": "What was the difference in the fair value for U.S. Treasury and government debt securities between Level 1 and Level 2?", + "output": "What was the discrepancy in the fair value of U.S. Treasury and government debt securities when classified as Level 1 and Level 2 assets?" + }, + { + "input": "What was the fair value of Level 2 Total cash, cash equivalents and short-term investments as a percentage of the total cash, cash equivalents and short-term investments?", + "output": "What is the fair value, expressed as a percentage, of Level 2 Total cash, cash equivalents, and short-term investments in relation to the overall total of cash, cash equivalents, and short-term investments?" + }, + { + "input": "What were the distinctions between the levels of Total cash, cash equivalents, and short-term investments in terms of Level 1 and Level 2?", + "output": "What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?" + }, + { + "input": "What were the disparities in the amounts of Total Cash, Cash Equivalents, and Short-Term Investments between Level 1 and Level 2?", + "output": "What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?" + }, + { + "input": "On what basis was the calculation for days inventory outstanding determined?", + "output": "What was days inventory outstanding based on?" + }, + { + "input": "What was the Days sales outstanding for three months ended april 2019?", + "output": "\"What was the specific value of Days Sales Outstanding (DSO) specifically for the three-month period ending in April 2019?\"" + }, + { + "input": "What was the change in day sales outstanding between 2018 and 2019?", + "output": "What was the difference in the number of days it took to collect sales revenue between the years 2018 and 2019?" + }, + { + "input": "What was the change in day sales outstanding between 2018 and 2019?", + "output": "What was the difference in the number of days it took to collect sales revenue between 2018 and 2019?" + }, + { + "input": "For how long was the days inventory outstanding higher than 20 days ?", + "output": "How many years did days inventory outstanding exceed 20 days?" + }, + { + "input": "What was the total amount spent on purchases in 2018?", + "output": "What was the Aggregate purchase price in 2018?" + }, + { + "input": "What was the difference in the total purchase price for all items combined in 2018 and 2019?", + "output": "What was the change in the Aggregate purchase price between 2018 and 2019?" + }, + { + "input": "What was the amount of finished goods in 2019?", + "output": "What was the specific quantity of fully completed and ready-for-sale products in the year 2019?" + }, + { + "input": "What was the exact quantity of inventories during the year 2018?", + "output": "What was the amount of inventories in 2018?" + }, + { + "input": "What was the change in finished goods between 2018 and 2019?", + "output": "How did the amount of finished goods change from 2018 to 2019?" + }, + { + "input": "What were the additions made to tax positions related to the current year in 2019, and how do they impact the overall financial situation?", + "output": "What were the Additions based on tax positions related to the current year in 2019?" + }, + { + "input": "What was the change in the Balance at beginning of period between 2017 and 2018?", + "output": "What was the difference in the starting balance from the beginning of the period in 2017 compared to 2018?" + }, + { + "input": "What is the overall difference in the total amount of Additions for tax positions of prior years from 2017 to 2019?", + "output": "What was the total change in Additions for tax positions of prior years between 2017 and 2019?" + }, + { + "input": "What was the difference in the total expenses recorded between 2018 and 2019?", + "output": "What was the change in expense accrued during the period between 2018 and 2019?" + }, + { + "input": "What was the difference in the amount of expenses recorded from 2018 to 2019?", + "output": "What was the change in expense accrued during the period between 2018 and 2019?" + }, + { + "input": "What was the percentage change in the Warranty costs incurred between 2018 and 2019?", + "output": "What was the difference in warranty costs incurred between 2018 and 2019, expressed as a percentage change?" + }, + { + "input": "What were the total assets in 2018?", + "output": "What was the total value of assets in the year 2018?" + }, + { + "input": "What was the percentage change in Total deferred commissions between 2018 and 2019?", + "output": "What was the percentage change in the total amount of deferred commissions from 2018 to 2019?" + }, + { + "input": "What was the final balance at the end of the specified period in the year 2018?", + "output": "What was the balance at the end of period in 2018?" + }, + { + "input": "What was the final balance at the end of period in 2018? Please provide the balance from the specified period in 2018.", + "output": "What was the balance at the end of period in 2018?" + }, + { + "input": "What was the change in the balance at beginning of period between 2018 and 2019?", + "output": "What was the difference in the balance at the start of the period between the years 2018 and 2019?" + }, + { + "input": "For how many years did the ending balance surpass $3,500 million?", + "output": "How many years did balance at end of period exceed $3,500 million?" + }, + { + "input": "How many years did interest income exceed $50 million?", + "output": "For how many years was the interest income greater than $50 million?" + }, + { + "input": "How many years did interest income exceed $50 million?", + "output": "How many years was the total interest income greater than $50 million?" + }, + { + "input": "What was the percentage change in net revenues between 2018 and 2019?", + "output": "What was the exact percentage difference in net revenues from 2018 to 2019, and how can I calculate it accurately?" + }, + { + "input": "Which years does the table provide information for net property and equipment?", + "output": "For which specific years is information regarding net property and equipment provided in the table?" + }, + { + "input": "Which years does the table provide information for net property and equipment?", + "output": "For which specific years does the table present data regarding net property and equipment?" + }, + { + "input": "What was the total revenue generated from operational activities during the year 2017?", + "output": "What was the income from operations in 2017?" + }, + { + "input": "What was the percentage change in the net income between 2018 and 2019?", + "output": "What was the precise percentage increase or decrease in the net income from 2018 to 2019?" + }, + { + "input": "What was the percentage change in the net income between 2018 and 2019?", + "output": "What is the percentage difference in the net income from 2018 to 2019?" + }, + { + "input": "How much money was in circulation in the form of physical cash in the year 2019?", + "output": "What was the amount of cash in 2019?" + }, + { + "input": "In 2019, how much cash was there?", + "output": "What was the amount of cash in 2019?" + }, + { + "input": "What was the specific value or quantity of cash equivalents recorded in the financial records for the year 2018?", + "output": "What was the amount of cash equivalents in 2018?" + }, + { + "input": "Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?", + "output": "For which specific years does the table provide information related to property and equipment in geographic areas, considering the assets' physical locations?" + }, + { + "input": "Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?", + "output": "For which years does the table present property and equipment information? This information is specifically related to geographic areas, and is based on the physical location of the assets." + }, + { + "input": "What was the total combined value of property and equipment in the United States during the year 2019?", + "output": "What was the amount of property and equipment in U.S. in 2019?" + }, + { + "input": "How many years did International property and equipment exceed $150 million?", + "output": "For how many consecutive years has the value of International property and equipment been greater than $150 million?" + }, + { + "input": "What was the financed unearned services revenue in 2019?", + "output": "\"What was the amount of unearned revenue from financed services in 2019?\"" + }, + { + "input": "What was the financed unearned services revenue in 2019?", + "output": "What was the specific amount of unearned services revenue that was financed during the year 2019?" + }, + { + "input": "For how many years has the amount of financed unearned services revenue been greater than $100 million?", + "output": "How many years did financed unearned services revenue exceed $100 million?" + }, + { + "input": "For how many consecutive years has the unearned services revenue in financed exceeded $100 million?", + "output": "How many years did financed unearned services revenue exceed $100 million?" + }, + { + "input": "What was the change in deferred services revenue between 2018 and 2019?", + "output": "What was the difference in the amount of deferred services revenue recorded in 2018 and 2019?" + }, + { + "input": "What was the sum of the net revenues from the last two quarters?", + "output": "What is the total net revenue for the most recent two quarters?" + }, + { + "input": "What was the change in the Dividends per share declared between 2018 and 2019?", + "output": "What was the difference in the amount of Dividends per share declared between the years 2018 and 2019?" + }, + { + "input": "What was the change in the Dividends per share declared between 2018 and 2019?", + "output": "What was the difference in the amount of dividends per share declared for the years 2018 and 2019?" + }, + { + "input": "What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?", + "output": "What was the difference in the amounts of dividends allocated to retained earnings (accumulated deficit) from 2017 to 2018?" + }, + { + "input": "What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?", + "output": "What was the variation in the amount of dividends transferred to retained earnings (accumulated deficit) from 2017 to 2018?" + }, + { + "input": "What factors contributed to the rise in the cash inflows generated from day-to-day business operations during the year 2019?", + "output": "What caused the increase in the cash flow from operating activities in 2019?" + }, + { + "input": "What is the percentage change in the Free Cash Flow, considering both an increase and decrease in value?", + "output": "What is the increase / (decrease) in the Free Cash Flow?" + }, + { + "input": "What is the percentage change in Free Cash Flow?", + "output": "What is the increase / (decrease) in the Free Cash Flow?" + }, + { + "input": "What are the definitions of underlying operating profit and underlying operating margin, and how do they differ from each other?", + "output": "What is the definition of the Underlying operating profit and underlying operating margin?" + }, + { + "input": "What does the underlying effective tax rate measure reflects?", + "output": "What is the definition and significance of the underlying effective tax rate and what does it reflect in terms of a company's financial situation or performance?" + }, + { + "input": "What does the underlying effective tax rate measure reflects?", + "output": "What is the meaning of the underlying effective tax rate measure and what does it reflect?" + }, + { + "input": "What is the methodology used to calculate the effective tax rate?", + "output": "How is the underlying effective tax rate calculated?" + }, + { + "input": "What is the exact difference in the underlying effective tax rate between 2018 and 2019?", + "output": "What is the Underlying effective tax rate change from 2018 to 2019?" + }, + { + "input": "What is the percentage change in the underlying effective tax rate between 2018 and 2019?", + "output": "What is the Underlying effective tax rate change from 2018 to 2019?" + }, + { + "input": "By what percentage did the Biogas production increase from the year 2018 to 2019?", + "output": "What is the increase in the Biogas from 2018 to 2019?" + }, + { + "input": "What is the average total energy?", + "output": "What is the average amount of energy, when considering all types of energy combined?" + }, + { + "input": "\"What is the percentage change in the price of Fuel Oils from 2018 to 2019? Please specify if it is an increase or decrease.\"", + "output": "What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?" + }, + { + "input": "What is the percentage change in the price of Fuel Oils between 2018 and 2019?", + "output": "What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?" + }, + { + "input": "What method is used to calculate ROIC (Return on Invested Capital)? Please provide a detailed explanation of the calculation process.", + "output": "How is ROIC calculated?" + }, + { + "input": "What is the difference in the diluted combined average number of share units (in millions) between 2018 and 2019?", + "output": "What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?" + }, + { + "input": "What is the difference in the diluted combined average number of share units (measured in millions) between 2018 and 2019?", + "output": "What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?" + }, + { + "input": "What was the specific price quotation for investments on 31 December 2018?", + "output": "What was the price quotation for investments as of 31 December 2018?" + }, + { + "input": "What was the average value of investments made by listed companies in the year 2019?", + "output": "What is the average amount of investments of listed companies in 2019?" + }, + { + "input": "What was the average investment amount of listed companies in 2019?", + "output": "What is the average amount of investments of listed companies in 2019?" + }, + { + "input": "What was the average percentage of ownership or voting rights held by shareholders in listed companies during the year 2019?", + "output": "What is the average Percentage of ownership or voting rights of listed companies in 2019?" + }, + { + "input": "What is the average Unsecured other loans?", + "output": "What is the average amount of unsecured loans that fall under the category of \"other loans\"?" + }, + { + "input": "What is the average value or rating of Integrated Device Manufacturers (IDMs)?", + "output": "What is the average of Integrated device manufacturers?" + }, + { + "input": "What is the average performance or profitability of Integrated device manufacturers?", + "output": "What is the average of Integrated device manufacturers?" + }, + { + "input": "What is the increase / (decrease) in the Integrated device manufacturers from 2017 to 2018?", + "output": "What is the net change, either increase or decrease, in the number of Integrated device manufacturers from 2017 to 2018?" + }, + { + "input": "What is the average amount of compensation received by Directors?", + "output": "What is the average Directors’ compensation?" + }, + { + "input": "What is the percentage change in compensation for Directors from the year 2018 to 2019?", + "output": "What is the increase/ (decrease) in Directors’ compensation from 2018 to 2019?" + }, + { + "input": "What calculation method is employed to determine the total sum of the Company's shares?", + "output": "What method is used to calculate the aggregate amount of the Company’s share?" + }, + { + "input": "What specific method is employed to calculate the total combined value of the shares held by the Company?", + "output": "What method is used to calculate the aggregate amount of the Company’s share?" + }, + { + "input": "What is the average revenue generated from the sale of goods and services from December 31, 2018 to December 31, 2019?", + "output": "What are the average Sales of goods and services for December 31, 2018 to 2019?" + }, + { + "input": "What is the increase/ (decrease) in Sales of goods and services for December 31, 2018 to 2019?", + "output": "What is the percentage change in the sales of goods and services when comparing December 31, 2018, to December 31, 2019?" + }, + { + "input": "What is the percentage change in the sales of goods and services from January 1, 2018, to December 31, 2018?", + "output": "What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?" + }, + { + "input": "What is the percentage change in the sales of goods and services from January 1, 2018 to December 31, 2018?", + "output": "What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?" + }, + { + "input": "\"What is the average amount of lease obligation for the period Less than 1 Year and 1-3 Years? Please provide the average lease obligations separately for both time periods.\"", + "output": "What is the average lease obligation for the period Less than 1 Year and 1-3 Years?" + }, + { + "input": "What does Purchase obligations incorporate?", + "output": "What is included in purchase obligations?" + }, + { + "input": "What is the criteria for an employee to receive additional bonuses?", + "output": "What are the specific requirements or factors that determine if an employee is eligible to receive extra bonuses in addition to their regular compensation?" + }, + { + "input": "What is the difference in Time Deposits: Non-Trading Purpose between Carrying Amount and Fair Amount?", + "output": "What is the distinction between the carrying amount and fair amount of Time Deposits used for non-trading purposes?" + }, + { + "input": "What is the difference between Bonds: Non-Trading Purpose Carrying Amount and Fair Amount?", + "output": "What distinguishes Bonds: Non-Trading Purpose Carrying Amount from Fair Amount in terms of their meaning, significance, and financial implications?" + }, + { + "input": "What is the average quantity of supplies and spare parts in general?", + "output": "What was the average Supplies and spare parts?" + }, + { + "input": "What was the average quantity of supplies and spare parts used?", + "output": "What was the average Supplies and spare parts?" + }, + { + "input": "What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?", + "output": "What was the percentage change in the amount of Finished goods from the year 2018 to 2019, indicating whether it increased or decreased?" + }, + { + "input": "What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?", + "output": "What was the percentage change in the value of Finished goods from 2018 to 2019, indicating whether it increased or decreased?" + }, + { + "input": "What is the percentage change in the value of financial assets at fair value through profit or loss between 2018 and 2019?", + "output": "What is the increase / (decrease) in the Financial assets at fair value through profit or loss from 2018 to 2019?" + }, + { + "input": "What is the increase / (decrease) in the Short-term loans from 2018 to 2019?", + "output": "What was the percentage change in Short-term loans from 2018 to 2019?" + }, + { + "input": "What is the increase / (decrease) in the Short-term loans from 2018 to 2019?", + "output": "What was the percentage change in Short-term loans between 2018 and 2019?" + }, + { + "input": "What measures does the company implement to effectively manage, maintain, or adapt its capital structure in response to changes in the business environment?", + "output": "What steps does the company take To maintain or adjust the capital structure?" + }, + { + "input": "What is the change in the total liabilities, expressed as a percentage, between 2018 and 2019?", + "output": "What is the increase / (decrease) in the Total liabilities from 2018 to 2019?" + }, + { + "input": "What is the percentage change in Total Capital from 2018 to 2019?", + "output": "What is the percentage increase / (decrease) of Total Capital from 2018 to 2019?" + }, + { + "input": "How do non-GAAP measures assist investors in making informed investment decisions and assessing a company's financial performance?", + "output": "How do these non-GAAP measures aid investors?" + }, + { + "input": "How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?", + "output": "What would be the diluted earnings per share for fiscal year 2018 if we exclude the net charge (benefit) associated with the enactment of the Tax Cuts and Jobs Act (TCJA)?" + }, + { + "input": "How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?", + "output": "What would be the diluted earnings per share for fiscal year 2018, excluding the impact of the net charge (benefit) resulting from the implementation of the Tax Cuts and Jobs Act (TCJA)?" + }, + { + "input": "What was the average revenue over the 3 year period from 2017 to 2019?", + "output": "What was the average revenue from 2017 to 2019, calculated over the span of three years?" + }, + { + "input": "What factors contributed to the change in the federal statutory rate during fiscal year 2017?", + "output": "Why did the federal statutory rate in fiscal year 2017? " + }, + { + "input": "How many items accounted for the difference between income taxes computed at the US federal statutory rate and the company's effective rate?", + "output": "How many specific items contributed to the variance between the income taxes calculated using the official US federal statutory rate and the actual rate paid by the company?" + }, + { + "input": "What was the average federal statutory rate over the 3 year period from 2017 to 2019? ", + "output": "What was the average federal statutory rate from 2017 to 2019?" + }, + { + "input": "What was the average effective rate for the years 2017, 2018, and 2019?", + "output": "What was the average effective rate over the 3 year period from 2017 to 2019? " + }, + { + "input": "How much were the top 3 components of property and equipment as a % of the total at cost, property and equipment for 2019?", + "output": "What were the percentages for the top 3 components of property and equipment in relation to the total at cost, property, and equipment in 2019?" + }, + { + "input": "What percentage of the overall revenue in 2019 was not generated by the commercial cloud revenue?", + "output": "How much of the total revenue in 2019 did not come from commercial cloud revenue?" + }, + { + "input": "Which were the bottom 2 revenue items for 2017?", + "output": "\"What were the two lowest revenue-generating items in 2017?\"" + }, + { + "input": "What was the average EPS (earnings per share) for the years 2017, 2018, and 2019?", + "output": "What was the average basic earnings per share over the 3 year period from 2017 to 2019?" + }, + { + "input": "\"Which specific locations does the company have both operating leases and finance leases in place?\"", + "output": "Which places does the company have operating and finance leases?" + }, + { + "input": "What is the topic or content covered in note 15?", + "output": "What is note 15 about?" + }, + { + "input": "\"What is the ascending order of the years when sorted by their total cost of finance leases?\"", + "output": "What are the years sorted by total finance lease cost, in ascending order?" + }, + { + "input": "How many different items are included when calculating operating income (loss)?", + "output": "How many items are there for operating income (loss)?" + }, + { + "input": "How many different items are included in the calculation of operating income (loss)?", + "output": "How many items are there for operating income (loss)?" + }, + { + "input": "How is the allocation of amortization and depreciation determined and distributed?", + "output": "How is the portion of amortization and depreciation allocated?" + }, + { + "input": "How is the allocation of the portion of amortization and depreciation determined and distributed?", + "output": "How is the portion of amortization and depreciation allocated?" + }, + { + "input": "How many countries have Long-lived assets classified based on the controlling statutory company's location, excluding financial instruments and tax assets? Also, can you identify the countries where the assets amount to over 10% of the total assets?", + "output": "How many countries have Long-lived assets, excluding financial instruments and tax assets, classified by the location of the controlling statutory company and with countriesover 10% of the total assets?" + }, + { + "input": "What was the percentage change in the total long-lived assets from 2017 to 2018?", + "output": "What was the precise percentage difference in the value of total long-lived assets between the years 2017 and 2018?" + }, + { + "input": "What was the percentage change in the total long-lived assets from 2017 to 2018?", + "output": "What was the percentage increase or decrease in the value of the total long-lived assets between the years 2017 and 2018?" + }, + { + "input": "What is the definition and significance of purchase commitments?", + "output": "What do the purchase commitments represent?" + }, + { + "input": "What is the meaning and significance of purchase commitments?", + "output": "What do the purchase commitments represent?" + }, + { + "input": "Where can the long-term debt be found?", + "output": "Where is the location to find the information regarding long-term debt?" + }, + { + "input": "How much do the top 3 contractual obligation terms add up to in 2020?", + "output": "What is the total value of the top three contractual obligation terms in 2020?" + }, + { + "input": "How much do the top 3 contractual obligation terms add up to in 2020?", + "output": "In 2020, what is the sum of the top three contractual obligation terms?" + }, + { + "input": "What was the number of stock awards that were granted in 2019, not including the PSUs granted?", + "output": "Excluding the PSUs, how many stock awards were granted in the year 2019?" + }, + { + "input": "What was the number of stock awards that were granted in 2019, not including the PSUs granted?", + "output": "How many stock awards were granted in 2019, excluding the granted PSUs?" + }, + { + "input": "What was the average fair value of the stock awards that were vested between 2017 and 2019 over a three-year period?", + "output": "What was the average fair value of stock awards vested over the 3 year period from 2017 to 2019?" + }, + { + "input": "What was the intelligent cloud as a percentage of total revenue in 2019?", + "output": "What percentage of the total revenue in 2019 was attributed to the intelligent cloud?" + }, + { + "input": "What was the intelligent cloud as a percentage of total revenue in 2019?", + "output": "What percentage of total revenue in 2019 was attributed to the intelligent cloud?" + }, + { + "input": "How many revenue categories are there?", + "output": "How many different categories of revenue exist in the given context?" + }, + { + "input": "What is the company's net loss in 2019?", + "output": "What was the net financial loss incurred by the company during the year 2019?" + }, + { + "input": "What was the cumulative unrealized gain on marketable securities for the company from 2017 to 2019?", + "output": "What is the company's total unrealized gain on marketable securities between 2017 to 2019?" + }, + { + "input": "What is the percentage difference in the total stock base compensation, categorized by type of award, from 2018 to 2019?", + "output": "What is the percentage change in total stock base compensation by type of award between 2018 and 2019?" + }, + { + "input": "What is the percentage change in the total stock base compensation for each type of award from 2018 to 2019?", + "output": "What is the percentage change in total stock base compensation by type of award between 2018 and 2019?" + }, + { + "input": "What is the cumulative amount of stock based compensation from 2017 through 2019?", + "output": "What is the total stock based compensation amount between 2017 to 2019?" + }, + { + "input": "What is the sum of stock-based compensation expenses for the years 2017 to 2019?", + "output": "What is the total stock based compensation amount between 2017 to 2019?" + }, + { + "input": "As of December 31, 2018, how many shares were granted and are currently outstanding?", + "output": "What is the number of outstanding shares granted as of December 31, 2018? " + }, + { + "input": "As of December 31, 2018, how many shares were granted and still remain outstanding?", + "output": "What is the number of outstanding shares granted as of December 31, 2018? " + }, + { + "input": "What was the change in the number of outstanding shares between December 31, 2018, and December 31, 2019?", + "output": "What is the difference in outstanding shares as of December 31, 2018 and 2019?" + }, + { + "input": "What data is shown in the table?", + "output": "\"What specific information does the table display?\"" + }, + { + "input": "What is the units for the data shown in the table?", + "output": "What units are being used to represent the information presented in the table?" + }, + { + "input": "What is the total revenue earned by the company between 2017 to 2019?", + "output": "What is the cumulative revenue generated by the company from 2017 to 2019?" + }, + { + "input": "What is the exact amount of revenue generated in the EMEA region from 2017 to 2019?", + "output": "What is the total revenue earned in EMEA between 2017 to 2019?" + }, + { + "input": "What is the change in total revenue between 2018 to 2019?", + "output": "What is the difference in total revenue from 2018 compared to 2019, and how can this information assist in better understanding the financial performance of the company during this period?" + }, + { + "input": "What is the sum of all non-current liabilities excluding total other non-current liabilities as of 31 December 2019?", + "output": "What is the total other non-current liabilities as at 31 December 2019?" + }, + { + "input": "What is the aggregate amount of all non-current liabilities, excluding any current liabilities, as of December 31, 2019?", + "output": "What is the total other non-current liabilities as at 31 December 2019?" + }, + { + "input": "What is the depreciation expense on property and equipment as at December 31, 2018?", + "output": "What is the specific amount of the depreciation expense recorded for property and equipment on the financial statement as at the closing date of December 31, 2018?" + }, + { + "input": "What is the depreciation expense on property and equipment as at December 31, 2018?", + "output": "What was the amount of depreciation expense booked for property and equipment up until December 31st, 2018?" + }, + { + "input": "What is the depreciation expense on property and equipment as at December 31, 2019?", + "output": "As of December 31, 2019, what is the recorded depreciation expense amount for property and equipment?" + }, + { + "input": "What is the depreciation expense on property and equipment as at December 31, 2019?", + "output": "What is the specific amount of depreciation expense recorded for property and equipment as on December 31, 2019?" + }, + { + "input": "What is the total depreciation expense on property and equipment from 2017 to 2019?", + "output": "From 2017 to 2019, what was the cumulative amount of depreciation expense incurred on property and equipment?" + }, + { + "input": "What is the total depreciation expense on property and equipment from 2017 to 2019?", + "output": "What is the cumulative depreciation expense on property and equipment for the years 2017, 2018, and 2019?" + }, + { + "input": "What is the allowance for doubtful accounts as at December 31, 2018?", + "output": "What is the amount set aside for doubtful accounts on the balance sheet as of December 31, 2018?" + }, + { + "input": "What are some examples of financial instruments that can subject the company to credit risk?", + "output": "What financial instruments expose companies to credit risk? Can you provide some examples of such instruments?" + }, + { + "input": "What are some examples of financial instruments that can subject the company to credit risk?", + "output": "What financial instruments can expose a company to credit risk? Can you provide some examples?" + }, + { + "input": "How does the company effectively manage and reduce its credit risk associated with accounts receivable?", + "output": "How does the company mitigate its credit risk pertaining to accounts receivable?" + }, + { + "input": "How does the company identify and determine its important or major customers in its business operations?", + "output": "How does the company determine its significant customers?" + }, + { + "input": "How does the company identify and assess its significant customers for business purposes?", + "output": "How does the company determine its significant customers?" + }, + { + "input": "What was the monetary value of the company's losses within its own country during the year 2019?", + "output": "What is the company's domestic loss in 2019?" + }, + { + "input": "\"What was the total financial loss incurred by the company within its own country during the year 2019?\"", + "output": "What is the company's domestic loss in 2019?" + }, + { + "input": "What is the amount of money that the company lost within its own country during the year 2017?", + "output": "What is the company's domestic loss in 2017?" + }, + { + "input": "\"What was the fiscal year 2017 domestic financial loss incurred by the company?\"", + "output": "What is the company's domestic loss in 2017?" + }, + { + "input": "What is the company's total loss before income taxes between 2017 to 2019?", + "output": "What is the cumulative pre-tax loss of the company from 2017 to 2019?" + }, + { + "input": "What is the company's change in foreign income between 2018 and 2019?", + "output": "What was the percentage change in the company's foreign income from 2018 to 2019?" + }, + { + "input": "What is the cumulative amount of loss incurred by the company domestically during the period from 2017 to 2019?", + "output": "What is the company's total domestic loss between 2017 to 2019?" + }, + { + "input": "What is the aggregate amount of losses incurred by the company within its domestic markets during the period from 2017 to 2019?", + "output": "What is the company's total domestic loss between 2017 to 2019?" + }, + { + "input": "What is the percentage change in total deferred revenue 2018 and 2019?", + "output": "What is the percentage difference in the total deferred revenue between 2018 and 2019?" + }, + { + "input": "What is the percentage change in total deferred revenue 2018 and 2019?", + "output": "What is the percentage difference in the total amount of deferred revenue between the years 2018 and 2019?" + }, + { + "input": "What is the proportion of the cost of revenue and research and development expenses as a percentage of the total restructuring expense?", + "output": "What is the ratio of the combined cost of revenue and research and development expenses to the total restructuring expense, expressed as a percentage?" + }, + { + "input": "What is the total legal fees and facilities expenses?", + "output": "What are the combined expenses for legal fees and facilities?" + }, + { + "input": "What is the total legal fees and facilities expenses?", + "output": "What is the cumulative sum of the expenses incurred in legal fees and facilities?" + }, + { + "input": "What is the total increases related to tax positions taken during current year in 2018 and 2019?", + "output": "What is the cumulative increase in tax positions taken during the current year across 2018 and 2019?" + }, + { + "input": "What is the total increases related to tax positions taken during current year in 2018 and 2019?", + "output": "What is the combined amount of increments in relation to tax positions that were assumed during the present year in both 2018 and 2019?" + }, + { + "input": "As of 31st December 2018, what is the sum of all liabilities that have accumulated or been incurred by the company up to that date?", + "output": "What is the total accrued liabilities as at 31 December 2018?" + }, + { + "input": "What are the units used to measure the values in the table?", + "output": "What is the units that the values in the table are measured in?" + }, + { + "input": "What is the percentage increase or decrease in the total amount of accrued liabilities from 2018 to 2019?", + "output": "What is the percentage change in total accrued liabilities between 2018 and 2019?" + }, + { + "input": "What is the percentage increase or decrease in the total amount of liabilities that have accumulated between the years 2018 and 2019?", + "output": "What is the percentage change in total accrued liabilities between 2018 and 2019?" + }, + { + "input": "What is the total amount of long-lived asset in Japan in both 2018 and 2019?", + "output": "What is the total value of long-lived assets in Japan for the years 2018 and 2019 combined?" + }, + { + "input": "What were the capitalized costs of software development in 2019?", + "output": "What was the Capitalized software development costs in 2019?" + }, + { + "input": "What were the average net costs of software development for both the years 2018 and 2019?", + "output": "What was the average Software development costs, net for 2018 and 2019?" + }, + { + "input": "What were the net average costs for software development in both 2018 and 2019?", + "output": "What was the average Software development costs, net for 2018 and 2019?" + }, + { + "input": "What was the Cash and cash equivalents in 2019 and 2018 respectively?", + "output": "What were the cash and cash equivalents figures for the years 2019 and 2018, respectively?" + }, + { + "input": "What was the Cash and cash equivalents in 2019 and 2018 respectively?", + "output": "What were the amounts of Cash and cash equivalents for the years 2019 and 2018, respectively?" + }, + { + "input": "What are the average amounts of Restricted cash for the years 2018 and 2019?", + "output": "What is the average Restricted cash for 2018 and 2019?" + }, + { + "input": "What was the Compensation and related benefits in 2019 and 2018 respectively?", + "output": "What were the compensation and benefits offered in 2019 and 2018?" + }, + { + "input": "\"In what specific year did the expenses for professional and legal services amount to a value below 500 thousand dollars?\"", + "output": "In which year was Professional and legal fees less than 500 thousands?" + }, + { + "input": "What was the percentage change in Compensation and related benefits from 2018 to 2019?", + "output": "What is the change in the Compensation and related benefits from 2018 to 2019?" + }, + { + "input": "What were the average professional and legal fees in 2018 and 2019?", + "output": "What was the average Professional and legal fees for 2018 and 2019?" + }, + { + "input": "What were the factors that led to the decrease in net pool allocation? Can you provide an explanation and further details regarding the decline in net pool allocation?", + "output": "What accounted for the decrease in net pool allocation?" + }, + { + "input": "What are the components of vessel operating and supervision costs?", + "output": "What are the key components that make up the costs associated with vessel operations and supervision?" + }, + { + "input": "How much did the technical maintenance expenses differ between 2017 and 2018?", + "output": "What was the change in technical maintenance expenses from 2017 to 2018?" + }, + { + "input": "What was the exact monetary difference in technical maintenance expenses between the years 2017 and 2018?", + "output": "What was the change in technical maintenance expenses from 2017 to 2018?" + }, + { + "input": "What was the percentage increase or decrease in the combined costs of operating and supervising vessels from 2018 to 2019?", + "output": "What was the percentage change in total vessel operating and supervision costs from 2018 to 2019?" + }, + { + "input": "Why does the Group choose to retain ownership of the trade receivables instead of selling them or assigning them to a third party?", + "output": "Why does the Group hold the trade receivables?" + }, + { + "input": "What does accrued income represent?", + "output": "\"What is the meaning and significance of accrued income?\"" + }, + { + "input": "What does accrued income represent?", + "output": "What is the meaning and significance of accrued income?" + }, + { + "input": "What was the change in trade receivables from 2018 to 2019?", + "output": "What was the difference in the amount of trade receivables between 2018 and 2019, indicating whether it increased or decreased?" + }, + { + "input": "What was the exact percentage increase or decrease in the total amount of money owed to a company from 2018 to 2019?", + "output": "What was the percentage change in total receivables from 2018 to 2019?" + }, + { + "input": "In which years was the fair value of the derivative assets recorded for?", + "output": "For which specific years were the fair values of the derivative assets recorded? Please provide all relevant information regarding the years for accurate response." + }, + { + "input": "In which specific year did the fair value of interest rate swaps experience an increase compared to other years?", + "output": "In which year was the fair value of Interest rate swaps higher?" + }, + { + "input": "When was the fair value of interest rate swaps higher: in which specific year did the fair value of interest rate swaps surpass that of other years?", + "output": "In which year was the fair value of Interest rate swaps higher?" + }, + { + "input": "In which year did the company record the highest amount of expenses pertaining to share-based compensation?", + "output": "In which year was the total expense recognized in respect of share-based compensation the highest?" + }, + { + "input": "What was the highest year in terms of total recognized expense for share-based compensation?", + "output": "In which year was the total expense recognized in respect of share-based compensation the highest?" + }, + { + "input": "What was the difference in the amount of money earned by The Cool Pool Limited in 2017 compared to 2018?", + "output": "What was the change in revenues from The Cool Pool Limited from 2017 to 2018?" + }, + { + "input": "What was the exact difference in the amount of money earned by The Cool Pool Limited in the year 2017 compared to the year 2018?", + "output": "What was the change in revenues from The Cool Pool Limited from 2017 to 2018?" + }, + { + "input": "For which years were the movements in tangible fixed assets and vessels under construction documented?", + "output": "In which years was the movements in tangible fixed assets and vessels under construction recorded for?" + }, + { + "input": "What was the percentage change in the cost of vessels from the end of 2018 to the end of 2019?", + "output": "What was the change in vessel cost from end 2018 to end 2019?" + }, + { + "input": "In which year was the other non-current assets recorded for?", + "output": "What is the specific year in which the recording of non-current assets other than the current ones occurred?" + }, + { + "input": "In which year did the value of long-term assets, other than the current year, surpass that of the current year?", + "output": "In which year was the other long-term assets higher?" + }, + { + "input": "What was the percentage change in the total value of other non-current assets from the year 2018 to the year 2019?", + "output": "What was the percentage change in total other non-current assets from 2018 to 2019?" + }, + { + "input": "For which years were the earnings per share (EPS) or losses per share (LPS) recorded?", + "output": "In which years was the earnings/(losses) per share recorded for?" + }, + { + "input": "What was the amount of ship management client accounts in 2018?", + "output": "How many ship management client accounts were there in the year 2018?" + }, + { + "input": "What was the change in current accounts from 2018 to 2019?", + "output": "What is the difference between the current accounts in 2018 and 2019?" + }, + { + "input": "\"What was the specific amount of compensation or payment given to individuals or employees in the year 2018?\"", + "output": "What was the remuneration in 2018?" + }, + { + "input": "What was the amount of compensation received in 2018?", + "output": "What was the remuneration in 2018?" + }, + { + "input": "Which year was the short-term benefits the highest?", + "output": "What was the peak year for short-term benefits?" + }, + { + "input": "Which year was the short-term benefits the highest?", + "output": "In which year did the short-term benefits reach their peak level?" + }, + { + "input": "For which specific years were the costs of constructing the vessels recorded?", + "output": "In which years was the vessels under construction costs recorded for?" + }, + { + "input": "For which years were the recorded construction costs of the vessels?", + "output": "In which years was the vessels under construction costs recorded for?" + }, + { + "input": "What is the significance or meaning behind vessels that are currently being constructed?", + "output": "What does vessels under construction represent?" + }, + { + "input": "In which year were the installment payments for the progress shipyard lower compared to other years?", + "output": "Which year was the progress shipyard installments lower?" + }, + { + "input": "In which specific year did the progress shipyard experience a decrease in the number of installments being made?", + "output": "Which year was the progress shipyard installments lower?" + }, + { + "input": "What was the difference in the average daily hire rate between 2017 and 2018?", + "output": "What was the change in average daily hire rate from 2017 to 2018?" + }, + { + "input": "What is the change in pool gross revenues from 2017 to 2018?", + "output": "What was the difference in the total revenue generated from pools in 2017 compared to 2018?" + }, + { + "input": "What is the change in pool gross revenues from 2017 to 2018?", + "output": "What is the difference in total gross revenues generated by the pool in 2017 compared to 2018?" + }, + { + "input": "In which years was the other payables and accruals recorded for?", + "output": "For which years was the recording of other payables and accruals made?" + }, + { + "input": "How many vessels were hired for employment in the year 2018?", + "output": "How many vessels were hired in 2018?" + }, + { + "input": "In 2018, what was the total number of vessels that were hired?", + "output": "How many vessels were hired in 2018?" + }, + { + "input": "In which year was the accrued interest higher?", + "output": "\"In which year did the accumulated interest amount exceed the interest accrued in other years?\"" + }, + { + "input": "What is the percentage difference in the combined value of total payables and accruals between 2018 and 2019?", + "output": "What was the percentage change in total payables and accruals from 2018 to 2019?" + }, + { + "input": "What was the difference in the ratio of a company's long-term debt to its equity from 2018 to 2019?", + "output": "What was the change in gearing ratio from 2018 to 2019?" + }, + { + "input": "In 2019, how many operating days were there and what was the total revenue generated during those days?", + "output": "What was the total revenue operating days in 2019?" + }, + { + "input": "How many operating days were there in 2019 and what was the total revenue generated during those days?", + "output": "What was the total revenue operating days in 2019?" + }, + { + "input": "What was the change in average daily hire rate from 2018 to 2019?", + "output": "What was the specific difference in the average rate of daily hiring between 2018 and 2019? How did the average daily hire rate change from one year to the other?" + }, + { + "input": "\"What was the specific monetary value of the cash distributions made in the year 2019 for common units?\"", + "output": "What was the amount of cash distributions in 2019 for common units?" + }, + { + "input": "How much did the size of the ATM Programme change?", + "output": "What was the change in size of the ATM Programme?" + }, + { + "input": "\"When was the specific year when the general and administrative expenses were officially documented or accounted for?\"", + "output": "In which year was the general and administrative expenses recorded for?" + }, + { + "input": "Where does the management intend to relocate its employees?", + "output": "\"What are the specific plans of the management regarding the relocation of its employees?\"" + }, + { + "input": "What was the selling price of Methane Julia Louise?", + "output": "How much was Methane Julia Louise sold for?" + }, + { + "input": "Which company was the vessel sold to?", + "output": "To which company was the vessel sold? Please provide the name of the company that purchased the vessel." + }, + { + "input": "Which company was the vessel sold to?", + "output": "\"To which company was the vessel sold?\"" + }, + { + "input": "What was the percentage change in total right-of use assets from start to end 2019?", + "output": "What was the percentage change in the total value of right-of-use assets between the beginning and end of 2019?" + }, + { + "input": "How much was the change in the repayment amount for the bank loan?", + "output": "By how much did the bank loan repayment change?" + }, + { + "input": "What year experienced a decrease in audit fees compared to previous years?", + "output": "In which year was the audit fees lower?" + }, + { + "input": "What was the percentage change in audit fees between 2018 and 2019?", + "output": "What was the change in audit fees from 2018 to 2019?" + }, + { + "input": "What was the percentage change in audit fees from the year 2018 to the year 2019?", + "output": "What was the change in audit fees from 2018 to 2019?" + }, + { + "input": "What are the components subjected to credit risk?", + "output": "What are the various components that are exposed to the risk of credit?" + }, + { + "input": "What is the significance of bunkers' consumption and other voyage expenses in relation to overall expenses for a voyage?", + "output": "What does bunkers’ consumption and other voyage expenses represent?" + }, + { + "input": "What is the highest year for brokers' commissions on revenue?", + "output": "In which year was the brokers' commissions on revenue the highest?" + }, + { + "input": "What was the change in bunkers’ consumption and other voyage expenses from 2018 to 2019?", + "output": "What was the difference in the consumption of bunkers and other voyage expenses between 2018 and 2019?" + }, + { + "input": "What is the specific monetary value that The Cool Pool Limited is required to pay in 2018?", + "output": "What is the amount due from The Cool Pool Limited in 2018?" + }, + { + "input": "In which years was the dividends receivable and other amounts due from related parties recorded for?", + "output": "For which years were the recorded dividends receivable and other amounts due from related parties?" + }, + { + "input": "In which years was the dividends receivable and other amounts due from related parties recorded for?", + "output": "In which specific years were the dividends receivable and other outstanding amounts due from related parties recorded?" + }, + { + "input": "In which years was the investments recorded for?", + "output": "For which specific years were the investments recorded?" + }, + { + "input": "What was the numerical difference in the number of additions made between 2018 and 2019?", + "output": "What was the change in additions from 2018 to 2019?" + }, + { + "input": "What was the difference in the number of additions between 2018 and 2019?", + "output": "What was the change in additions from 2018 to 2019?" + }, + { + "input": "What were the specific Level 2 municipal and corporate bonds held on December 31, 2018?", + "output": "What are the respective Level 2 municipal and corporate bonds as at December 31, 2018?" + }, + { + "input": "What is the value of municipal bonds as a percentage of the total marketable securities?", + "output": "What proportion of the overall marketable securities is represented by municipal bonds?" + }, + { + "input": "What are the respective goodwill amounts in 2015 and 2016?", + "output": "What were the goodwill amounts in 2015 and 2016, respectively?" + }, + { + "input": "What were the average amounts of total cash, cash equivalents, and marketable securities in the years 2015 and 2016?", + "output": "What is the average total cash, cash equivalents, and marketable securities in 2015 and 2016?" + }, + { + "input": "What is the proportion of shares bought in November out of the total shares purchased in the last three months of 2019?", + "output": "What percentage of the total shares purchased in the last three months of 2019 are bought in November?" + }, + { + "input": "What is the proportion of shares bought in November out of the total shares purchased during the last three months of 2019?", + "output": "What percentage of the total shares purchased in the last three months of 2019 are bought in November?" + }, + { + "input": "What is the percentage change in revenue from term license between 2018 and 2019?", + "output": "What is the percent increase or decrease in revenue from term license for the year 2019 compared to 2018?" + }, + { + "input": "What is the percentage increase or decrease in revenue generated from maintenance services from 2018 to 2019?", + "output": "What is the percentage change in revenue from maintenance between 2018 and 2019?" + }, + { + "input": "What were the pre-tax losses in 2018 and 2019 for the company?", + "output": "What are the respective loss before income tax benefits in 2018 and 2019?" + }, + { + "input": "What were the pre-tax domestic and foreign earnings in 2017?", + "output": "What are the respective domestic and foreign income before income taxes in 2017?" + }, + { + "input": "\"What were the pre-tax incomes in 2017, separated by domestic and foreign sources?\"", + "output": "What are the respective domestic and foreign income before income taxes in 2017?" + }, + { + "input": "\"What were the total losses incurred in the domestic market and international markets in 2019, excluding any income tax benefits received?\"", + "output": "What are the respective domestic and foreign losses before income tax benefits in 2019?" + }, + { + "input": "What is the sum of all outstanding receivables that are yet to be invoiced, and are expected to be collected within a period of two years?", + "output": "What is the total unbilled receivables due within 2 years?" + }, + { + "input": "What is the sum of all outstanding amounts receivable that have not been invoiced yet, and are expected to be collected within the next 1 to 5 years?", + "output": "What is the total unbilled receivables due between 1 to 5 years?" + }, + { + "input": "What is the cumulative value of unrealized billings that are yet to be accounted for and are expected to be collected within a period ranging from 1 to 5 years?", + "output": "What is the total unbilled receivables due between 1 to 5 years?" + }, + { + "input": "What is the difference between the company's federal and state net operating losses?", + "output": "What differentiates the federal and state net operating losses of the company?" + }, + { + "input": "What were the amounts that the company is authorised to repurchase in 2019 and 2018 respectively?", + "output": "What were the authorized repurchase amounts for the company in both 2019 and 2018?" + }, + { + "input": "What were the respective amounts of stock repurchases made by the company as at January 1, 2018 and 2019 respectively?", + "output": "What were the specific amounts of stock repurchases made by the company as of January 1, 2018 and January 1, 2019, respectively?" + }, + { + "input": "What was the average amount of stock repurchases as of January 1st in both 2017 and 2018?", + "output": "What is the average stock repurchases as at January 1, 2017 and 2018?" + }, + { + "input": "What is the average amount of stock repurchases as of January 1, 2017, and January 1, 2018?", + "output": "What is the average stock repurchases as at January 1, 2017 and 2018?" + }, + { + "input": "What is the exact percentage difference in the amount of long-term deferred revenue from 2018 to 2019?", + "output": "What is the percentage change in long-term deferred revenue between 2018 and 2019?" + }, + { + "input": "What are the respective values for Term in 2018 and 2019?", + "output": "What are the values for Term in the year 2018? What are the values for Term in the year 2019?" + }, + { + "input": "What are the company's respective stock-based compensation for cost of revenues in 2019 and 2018?", + "output": "What were the stock-based compensation expenses for the cost of revenues incurred by the company in both 2019 and 2018?" + }, + { + "input": "What are the company's respective stock-based compensation for cost of revenues in 2019 and 2018?", + "output": "\"What were the stock-based compensation expenses for cost of revenues in 2019 and 2018 for the company?\"" + }, + { + "input": "What is the average amount of stock-based compensation for the cost of revenue that the company has incurred from 2017 to 2019?", + "output": "What is the company's average stock-based compensation for the cost of revenue between 2017 to 2019?" + }, + { + "input": "What is the average amount of stock-based compensation granted by the company for the selling and marketing department in both 2018 and 2019?", + "output": "What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?" + }, + { + "input": "\"What was the average amount of stock-based compensation granted to employees in the selling and marketing department of the company in both 2018 and 2019?\"", + "output": "What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?" + }, + { + "input": "What is the percentage change in revenue earned from term license from 2015 to 2016?", + "output": "What is the percentage change in the revenue earned from term license between 2015 and 2016?" + }, + { + "input": "What is the total revenue earned from maintenance in 2017 and 2018?", + "output": "What is the combined revenue generated from maintenance services in the years 2017 and 2018?" + }, + { + "input": "What is the total revenue earned from maintenance in 2017 and 2018?", + "output": "What was the combined revenue generated from maintenance services in the years 2017 and 2018?" + }, + { + "input": "How did actuarial calculations impact the resulting changes in parameters?", + "output": "What did the changes in parameters on the basis of actuarial calculations lead to?" + }, + { + "input": "What was the precise amount of the actual return on plan assets in the fiscal year 2019?", + "output": "How much did the actual return on plan assets amount to in FY2019?" + }, + { + "input": "What items or expenses are typically excluded from the expected payments?", + "output": "What is not included in expected payments?" + }, + { + "input": "Explain what expenses or items are typically not encompassed within the estimated payments.", + "output": "What is not included in expected payments?" + }, + { + "input": "What was the change in interest income in FY2019 from FY2018?", + "output": "What was the difference in the amount of income generated from interest in the fiscal year 2019 compared to the fiscal year 2018?" + }, + { + "input": "What was the change in interest income in FY2019 from FY2018?", + "output": "What was the specific difference in interest income between the fiscal year 2019 and the fiscal year 2018?" + }, + { + "input": "In which year did Asia experience a higher year-on-year percentage change in GDP compared to 2018 during the period from 2018 to 2019?", + "output": "In which year was the Year-on-year percentage change of GDP in Asia larger from 2018 to 2019?" + }, + { + "input": "What was the change in the year-on-year percentage change in GDP for Germany from 2018 to 2019?", + "output": "What was the difference in the percentage change of Germany's GDP from one year to the next, comparing 2018 and 2019?" + }, + { + "input": "In which year did the total sum of taxes collected exceed the amount collected in all other years?", + "output": "In which year was the total amount of taxes larger?" + }, + { + "input": "What was the exact difference in the total amount of taxes paid in the time period from 2018 to 2019 compared to the time period from 2017 to 2018?", + "output": "What was the change in total taxes in 2018/2019 from 2017/2018?" + }, + { + "input": "What was the percentage increase or decrease in the total amount of taxes in 2018/2019 compared to 2017/2018?", + "output": "What was the percentage change in total taxes in 2018/2019 from 2017/2018?" + }, + { + "input": "What factors or criteria are used as a foundation for the measurements being taken?", + "output": "What is the basis for the measurements?" + }, + { + "input": "What is the fundamental basis or principle used to determine the measurements being referred to in the context?", + "output": "What is the basis for the measurements?" + }, + { + "input": "In which specific year did the amount for Belgium surpass or exceed the previous value?", + "output": "In which year was the amount for Belgium larger?" + }, + { + "input": "In which specific year did the total amount (not specified) exceed the amount for Belgium?", + "output": "In which year was the amount for Belgium larger?" + }, + { + "input": "What was the amount of cash and cash equivalents in FY2019?", + "output": "How much cash and cash equivalents were reported in the financial statements for the fiscal year 2019?" + }, + { + "input": "What was the exact percentage difference in Cash and cash equivalents between FY2018 and FY2019 after considering adjustments?", + "output": "What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?" + }, + { + "input": "What was the calculated percentage difference in Cash and cash equivalents between FY2019 and FY2018, after adjusting for any relevant factors?", + "output": "What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?" + }, + { + "input": "In which specific year did the absolute value of the Earnings per share exceed the value in another year?", + "output": "In which year was the absolute value of the Earnings per share larger?" + }, + { + "input": "\"What specific factor was used to calculate the net cost of the current service?\"", + "output": "What was the current service cost netted against?" + }, + { + "input": "\"In which specific year did the total sales exceed the sales of other years in terms of maximum revenue generated?\"", + "output": "In which year was the Total sales larger?" + }, + { + "input": "In what year did the headcount in METRO Asia exceed the corresponding headcount in other years?", + "output": "In which year was the amount in METRO Asia headcount larger?" + }, + { + "input": "What was the year-over-year change in the number of employees at METRO AG from 2018 to 2019?", + "output": "What was the change in METRO AG headcount in 2019 from 2018?" + }, + { + "input": "With which companies did METRO maintain business relations with, and who were the parties involved in these relations?", + "output": "Who were the parties to which METRO maintained business relations with related companies?" + }, + { + "input": "What were the business relations to related companies listed in the table?", + "output": "What were the business relations between the listed companies in the table and other related entities or organizations?" + }, + { + "input": "What was the difference in the amount and type of services received in the fiscal year 2019 compared to the fiscal year 2018?", + "output": "What was the change in services received in FY2019 from FY2018?" + }, + { + "input": "What is the exact percentage change in the number of services received during fiscal year 2019 compared to fiscal year 2018?", + "output": "What was the percentage change in services received in FY2019 from FY2018?" + }, + { + "input": "What was the exact percentage difference in the quantity of services received during the fiscal year 2019 compared to the fiscal year 2018?", + "output": "What was the percentage change in services received in FY2019 from FY2018?" + }, + { + "input": "In what format is the cash flow statement presented in the table?", + "output": "What version is the cash flow statement in the table in?" + }, + { + "input": "What specific expenses related to income tax were officially acknowledged in the year 2019?", + "output": "What are the recognised income tax expenses in 2019?" + }, + { + "input": "What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?", + "output": "What was the percentage increase or decrease in recognised income tax expenses for the financial year 2018/2019 compared to the financial year 2017/2018?" + }, + { + "input": "What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?", + "output": "What was the percentage increase or decrease in recognised income tax expenses for the fiscal year 2018/2019 compared to the fiscal year 2017/2018?" + }, + { + "input": "What was the change in EBT in 2018/2019 from 2017/2018?", + "output": "What was the difference in EBT (Earnings Before Taxes) between 2018 and 2019 when compared to 2017 and 2018?" + }, + { + "input": "What does the other operating income include?", + "output": "What type of income is included under \"other operating income\"? Please provide a comprehensive explanation of the various sources that fall under this category." + }, + { + "input": "In which specific year did the amount of Services surpass the quantity in other years?", + "output": "In which year was the amount of Services larger?" + }, + { + "input": "During which year did the quantity of services surpass the quantity in previous years?", + "output": "In which year was the amount of Services larger?" + }, + { + "input": "What was the exact amount of equity during the fiscal year 2019?", + "output": "What was the amount of equity in FY2019?" + }, + { + "input": "What proportion of the total earnings in FY2019 can be attributed to the Noncontrolling interests?", + "output": "How much of earnings does the Noncontrolling interests account for in FY2019?" + }, + { + "input": "What proportion of the FY2019 earnings is represented by the Noncontrolling interests account?", + "output": "How much of earnings does the Noncontrolling interests account for in FY2019?" + }, + { + "input": "What was the exact numerical difference in sales between FY2019 and FY2018?", + "output": "What was the change in Sales in FY2019 from FY2018?" + }, + { + "input": "What was the difference in the total amount of sales between fiscal year 2019 and fiscal year 2018?", + "output": "What was the change in Sales in FY2019 from FY2018?" + }, + { + "input": "When were the calculations of employee numbers by segments performed?", + "output": "When were the employee numbers by segments calculated?" + }, + { + "input": "What were the specific segments or categories mentioned under the \"METRO\" component in the table that accounted for the number of employees?", + "output": "What were the components under METRO in the table when accounting for the employee numbers by segments?" + }, + { + "input": "What was the change in METRO AG in 2019 from 2018?", + "output": "What was the year-on-year change in METRO AG's performance in 2019 compared to 2018?" + }, + { + "input": "What was the percentage difference in METRO AG's performance in 2019 compared to 2018?", + "output": "What was the percentage change in METRO AG in 2019 from 2018?" + }, + { + "input": "What was the percentage difference in the performance of METRO AG in 2019 compared to 2018?", + "output": "What was the percentage change in METRO AG in 2019 from 2018?" + }, + { + "input": "In which year was the amount of total assets larger?", + "output": "\"When comparing the amount of total assets, which year had a larger value?\"" + }, + { + "input": "What was the percentage change in total assets in FY2019 from FY2018?", + "output": "What was the percentage increase or decrease in the total assets for the fiscal year 2019 compared to the total assets for the fiscal year 2018?" + }, + { + "input": "What is the source of revenue that compensates for the expenses incurred in providing logistics services?", + "output": "What is offset by income from logistics services?" + }, + { + "input": "What are the components under Other operating expenses in the table?", + "output": "What specific components are included in the category of \"Other operating expenses\" as mentioned in the table?" + }, + { + "input": "What are the components under Other operating expenses in the table?", + "output": "What specifically are the individual components included in the category of Other operating expenses in the provided table?" + }, + { + "input": "What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?", + "output": "\"What is the difference in losses resulting from the sale or disposal of fixed assets between the fiscal years 2017/2018 and 2018/2019?\"" + }, + { + "input": "What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?", + "output": "What was the difference in the amount of losses incurred from selling fixed assets between the fiscal years 2018/2019 and 2017/2018?" + }, + { + "input": "What was the percentage increase or decrease in losses resulting from the disposal of fixed assets during the period of 2018/2019 compared to the period of 2017/2018?", + "output": "What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?" + }, + { + "input": "What was the percentage change in losses incurred from the disposal of fixed assets in the fiscal year 2018/2019 compared to the fiscal year 2017/2018?", + "output": "What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?" + }, + { + "input": "What was the change in interest carry-forwards in FY2019 from FY2018?", + "output": "What was the difference in the amount of interest carry-forwards between the fiscal years of 2019 and 2018?" + }, + { + "input": "What are the primary elements within the table that are utilized to determine the outcome of the interest calculation?", + "output": "What are the broad components in the table which are used to calculate the interest result?" + }, + { + "input": "What are the various broad components included in the table that are utilized for calculating the outcome of interest?", + "output": "What are the broad components in the table which are used to calculate the interest result?" + }, + { + "input": "What was the percentage change in the amount of income earned from interest during the period between 2018 and 2019 compared to the previous period between 2017 and 2018?", + "output": "What was the percentage change in interest income in 2018/2019 from 2017/2018?" + }, + { + "input": "In which specific year did the total sum of taxes other than general administrative expenses exceed the amount incurred in those administrative expenses?", + "output": "In which year was the amount of other taxes thereof from general administrative expenses larger?" + }, + { + "input": "In which years were the reserves retained from earnings calculated in?", + "output": "During which specific years were the calculations for retained reserves from earnings conducted?" + }, + { + "input": "In which years were the reserves retained from earnings calculated in?", + "output": "During which specific years were the calculations for retaining reserves from earnings carried out?" + }, + { + "input": "What was the change in the Income tax on components of other comprehensive income in FY2019 from FY2018?", + "output": "What was the difference in the income tax related to the components of other comprehensive income from fiscal year 2018 to fiscal year 2019?" + }, + { + "input": "What was the change in operating income in the Communications Solutions segment in 2019?", + "output": "What was the specific amount of increase or decrease in operating income for the Communications Solutions segment during the year 2019?" + }, + { + "input": "What was the change in operating income in the Communications Solutions segment in 2019?", + "output": "What was the percentage change in operating income specifically for the Communications Solutions segment during the year 2019 compared to the previous year?" + }, + { + "input": "Why did operating income decrease in fiscal 2019?", + "output": "\"What were the reasons for the decrease in operating income during fiscal year 2019?\"" + }, + { + "input": "Why did operating income decrease in fiscal 2019?", + "output": "What were the reasons for the decrease in operating income in fiscal 2019?" + }, + { + "input": "In which year did the volume or quantity of raw materials reach its highest point?", + "output": "In which year was Raw materials larger?" + }, + { + "input": "In which specific year did the total volume or quantity of raw materials experience a greater increase or reach a higher value compared to previous years?", + "output": "In which year was Raw materials larger?" + }, + { + "input": "In which specific years was the calculation of the Repurchase value conducted?", + "output": "Which years was the Repurchase value calculated in?" + }, + { + "input": "What was the exact difference in the total number of common shares repurchased in the year 2019 compared to 2018?", + "output": "What was the change in the Number of common shares repurchased in 2019 from 2018?" + }, + { + "input": "What was the calculation for determining the percentage change in the quantity of common shares repurchased during the year 2019 in comparison to 2018?", + "output": "What was the percentage change in the Number of common shares repurchased in 2019 from 2018?" + }, + { + "input": "What are the specific categories or divisions for which the net sales figures are displayed in the provided table?", + "output": "What are the segments for which the net sales are presented in the table?" + }, + { + "input": "What year did Industrial Solutions experience a larger size or growth compared to other years?", + "output": "In which year was Industrial Solutions larger?" + }, + { + "input": "What specific changes occurred in the Industrial Solutions sector between 2018 and 2019?", + "output": "What was the change in Industrial Solutions in 2019 from 2018?" + }, + { + "input": "In 2019, what specific changes occurred in the Industrial Solutions sector compared to the previous year of 2018?", + "output": "What was the change in Industrial Solutions in 2019 from 2018?" + }, + { + "input": "In which years was the basic earnings per share calculated for?", + "output": "For which specific years was the metric of basic earnings per share calculated and used for evaluation?" + }, + { + "input": "In which years was the basic earnings per share calculated for?", + "output": "For which years was the calculation of basic earnings per share carried out? Please provide specific details in your answer." + }, + { + "input": "What was the change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?", + "output": "What was the difference in the dilutive impact of share-based compensation arrangements between 2018 and 2019?" + }, + { + "input": "What was the year-on-year percentage change in the dilutive impact caused by share-based compensation arrangements in 2019 compared to 2018?", + "output": "What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?" + }, + { + "input": "What was the exact percentage change in the dilutive impact of share-based compensation arrangements between 2018 and 2019?", + "output": "What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?" + }, + { + "input": "In which specific year did the percentage of industrial solutions reach its lowest point?", + "output": "In which year was the percentage of industrial solutions the lowest in?" + }, + { + "input": "What was the percentage change in the Weighted-Average Grant-Date Fair Value for nonvested shares between 2018 and 2019?", + "output": "What was the change in the Weighted-Average Grant-Date Fair Value for nonvested shares in 2019 from 2018?" + }, + { + "input": "In the table, what segments were considered when calculating the backlog?", + "output": "What were the segments for which backlog was calculated in the table?" + }, + { + "input": "For which segments in the table was the backlog calculated?", + "output": "What were the segments for which backlog was calculated in the table?" + }, + { + "input": "What was the change in total backlog in 2019 from 2018?", + "output": "What was the difference in the total backlog between 2019 and 2018?" + }, + { + "input": "What was the percentage difference in the total amount of unfinished work in 2019 compared to 2018?", + "output": "What was the percentage change in total backlog in 2019 from 2018?" + }, + { + "input": "In which specific year did the amount spent on acquisition and integration costs exceed any other year?", + "output": "In which year was Acquisition and integration costs larger?" + }, + { + "input": "In which specific year did the expenses incurred for acquisition and integration costs exceed the expenses in other years?", + "output": "In which year was Acquisition and integration costs larger?" + }, + { + "input": "What are the components under U.S. in the table?", + "output": "Which components are listed under the U.S. category in the table?" + }, + { + "input": "What are the different categories of Income Tax Expense (Benefit) mentioned in the table?", + "output": "What are the types of Income Tax Expense (Benefit) in the table?" + }, + { + "input": "In which specific year did Non-U.S. experience the highest income tax expense (benefit)?", + "output": "Which year was the current income tax expense (benefit) for Non-U.S. the largest?" + }, + { + "input": "In which year did the Non-U.S. current income tax expense (benefit) reach its highest amount?", + "output": "Which year was the current income tax expense (benefit) for Non-U.S. the largest?" + }, + { + "input": "What was the change in Current income tax expense (benefit) in 2019 from 2018?", + "output": "What was the difference in Current income tax expense (benefit) between 2019 and 2018?" + }, + { + "input": "What is the company's policy regarding the allocation and management of funds?", + "output": "What is the company's funding policy?" + }, + { + "input": "What is the minimum amount of contributions that individuals are required to make to the pension plan outside of the United States?", + "output": "How much minimum required contributions is expected to be made to the non-U.S. pension plan?" + }, + { + "input": "Which fiscal year between 2020 and 2024 will witness the highest benefit payments under the United States plans?", + "output": "In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?" + }, + { + "input": "During which fiscal year from 2020 to 2024 will the benefit payments under the United States Plans reach their maximum extent?", + "output": "In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?" + }, + { + "input": "What is typically not included in the net earnings of companies operating outside the United States?", + "output": "What do the Non-U.S. net earnings exclude?" + }, + { + "input": "What is excluded from the net earnings of countries other than the U.S. and why?", + "output": "What do the Non-U.S. net earnings exclude?" + }, + { + "input": "What was the percentage increase or decrease in the expense for U.S. federal income tax at the statutory rate in 2019 compared to 2018?", + "output": "What was the percentage change in Notional U.S. federal income tax expense at the statutory rate in 2019 from 2018?" + }, + { + "input": "What are the specific elements or factors taken into account when calculating Restructuring and Other Charges, Net?", + "output": "What are the components considered under Restructuring and Other Charges, Net?" + }, + { + "input": "When did the lowest value of net restructuring charges occur?", + "output": "In which year was Restructuring charges, net the lowest?" + }, + { + "input": "When was the year with the lowest net restructuring charges recorded?", + "output": "In which year was Restructuring charges, net the lowest?" + }, + { + "input": "What was the change in Restructuring charges, net in 2019 from 2018?", + "output": "What was the difference in the amount of Restructuring charges, net between 2019 and 2018?" + }, + { + "input": "What was the change in Restructuring charges, net in 2019 from 2018?", + "output": "What was the net change in restructuring charges in 2019 compared to 2018?" + }, + { + "input": "In 2019, what was the percentage difference in Restructuring charges, net compared to 2018?", + "output": "What was the percentage change in Restructuring charges, net in 2019 from 2018?" + }, + { + "input": "What specific regions do the net sales to external customers correspond to?", + "output": "What are the net sales by geographic region to external customers attributed to?" + }, + { + "input": "In which specific year did the percentage in Americas reach its highest point compared to other years?", + "output": "In which year was the percentage in Americas the largest?" + }, + { + "input": "What is the specific year when the largest percentage was recorded in the Americas?", + "output": "In which year was the percentage in Americas the largest?" + }, + { + "input": "What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?", + "output": "What was the average proportion of net sales generated in the Asia-Pacific region, compared to the total net sales, for the years 2017, 2018, and 2019?" + }, + { + "input": "What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?", + "output": "What was the average percentage of total net sales in the Asia-Pacific region for the years 2017, 2018, and 2019 combined?" + }, + { + "input": "What was the percentage change in the amount of US dollar-denominated long-term debt from 2018 to 2019?", + "output": "What was the increase / (decrease) in US dollar-denominated long-term debt from 2018 to 2019?" + }, + { + "input": "What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?", + "output": "What is the change in Numerator (basic) – Net income from 2018 to 2019?" + }, + { + "input": "What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?", + "output": "What is the absolute change in Numerator (basic) – Net income from the year 2018 to the year 2019?" + }, + { + "input": "What is the percentage change in Basic Earnings per share from 2018 to 2019?", + "output": "What is the increase/ (decrease) in Basic Earnings per share from 2018 to 2019?" + }, + { + "input": "What is the percentage change in Diluted Earnings per share from 2018 to 2019?", + "output": "What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?" + }, + { + "input": "What is the percentage change in Diluted Earnings per share (EPS) between 2018 and 2019?", + "output": "What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?" + }, + { + "input": "What was the specific percentage of taxes paid in the year 2019, taking into account all applicable taxes and deductions?", + "output": "What was the effective tax rate in 2019?" + }, + { + "input": "What was the average Computed income tax expense?", + "output": "What was the average expense for income tax computed?" + }, + { + "input": "What was the percentage change in the amount of interest on borrowings from 2018 to 2019?", + "output": "What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?" + }, + { + "input": "What was the percentage change in Interest on borrowings from 2018 to 2019?", + "output": "What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?" + }, + { + "input": "What was the increase / (decrease) in the Finance costs before interest on lease liabilities from 2018 to 2019?", + "output": "What was the change in the Finance costs before interest on lease liabilities from 2018 to 2019?" + }, + { + "input": "What was the percentage change in Adjusted EBITDA between 2018 and 2019?", + "output": "What was the increase / (decrease) in Adjusted EBITDA from 2018 to 2019?" + }, + { + "input": "What was the average Depreciation and amortization?", + "output": "What was the average amount of depreciation and amortization expenses incurred over a specific period?" + }, + { + "input": "What factors contributed to the overall rise in both depreciation and amortization expenses?", + "output": "What caused the increase in the total depreciation and amortization?" + }, + { + "input": "What was the Depreciation of right-of-use assets in 2019?", + "output": "What was the amount of depreciation recorded for right-of-use assets during the year 2019?" + }, + { + "input": "What was the Depreciation of right-of-use assets in 2019?", + "output": "\"What was the amount of depreciation for right-of-use assets recorded in the financial statements for the year 2019?\"" + }, + { + "input": "What was the increase / (decrease) in the Depreciation from 2018 to 2019?", + "output": "What was the net change in Depreciation, either an increase or decrease, between the years 2018 and 2019?" + }, + { + "input": "What was the increase / (decrease) in the Depreciation from 2018 to 2019?", + "output": "What was the difference in the Depreciation amount from 2018 to 2019?" + }, + { + "input": "What was the percentage change in the total depreciation and amortization from 2018 to 2019?", + "output": "What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?" + }, + { + "input": "What was the percentage change in Total depreciation and amortization from 2018 to 2019?", + "output": "What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?" + }, + { + "input": "What was the change in the coupon rate between November 12, 2019, and April 30, 2019?", + "output": "What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?" + }, + { + "input": "What was the change in the coupon rate from November 12, 2019 to April 30, 2019?", + "output": "What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?" + }, + { + "input": "What was the percentage change in the Equivalent (Cdn$) value from 2018 to November 2019?", + "output": "What was the increase / (decrease) in the Equivalent (Cdn$) between 2018 and November 2019?" + }, + { + "input": "What is the percentage change in the amount of interest on borrowings from 2018 to 2019?", + "output": "What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?" + }, + { + "input": "By what percentage did the Interest on borrowings change from 2018 to 2019? Please state whether there was an increase or decrease.", + "output": "What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?" + }, + { + "input": "What is the purpose of utilizing valuation-related analysis and making capital structure-related decisions?", + "output": "What is used for to conduct valuation-related analysis and make capital structure-related decisions?" + }, + { + "input": "What items are considered when calculating the adjusted net debt?", + "output": "What is included in the adjusted net debt?" + }, + { + "input": "In which context or industry is the valuation of debt derivatives without incorporating credit risk used as an evaluation tool?", + "output": "Where is debt derivatives valued without adjustment for credit risk used for evaluation?" + }, + { + "input": "What is the net change in long-term debt from December 31, 2018, to December 31, 2019?", + "output": "What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?" + }, + { + "input": "What is the change in the amount of Long-term debt from December 31, 2018 to December 31, 2019?", + "output": "What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?" + }, + { + "input": "What is the change in the value of net debt derivative assets without credit risk adjustment from December 31, 2018 to December 31, 2019?", + "output": "What is the increase/ (decrease) in Net debt derivative assets valued without any adjustment for credit risk from December 31, 2018 to December 31, 2019?" + }, + { + "input": "What is the method of measuring accounts receivable and what factors are considered in the measurement process?", + "output": "How is accounts receivable measured?" + }, + { + "input": "What is the percentage change in the amount of customer accounts receivable from 2018 to 2019?", + "output": "What is the increase/ (decrease) in Customer accounts receivable from 2018 to 2019?" + }, + { + "input": "\"What is the method employed to settle the principal amount of Convertible Notes in cash upon conversion, and how does it factor into the computation of diluted earnings?\"", + "output": "Which method is used for to settle the principal of the Convertible Notes in cash on conversion and calculates diluted earnings?" + }, + { + "input": "What are the consequences when a loss is incurred that can be attributed to shareholders?", + "output": "What happens if a loss attributable to shareholders has been incurred?" + }, + { + "input": "What was the Convertible Notes that had an anti-dilutive effect on the calculation of diluted earnings per common share in the year end 2019, 2018 and 2017 respectively?", + "output": "What were the Convertible Notes that impacted diluted earnings per common share calculation in 2019, 2018, and 2017, specifically in terms of their anti-dilutive effects?" + }, + { + "input": "What is the change in Loss per common share - basic and diluted from 2018 to 2019?", + "output": "What is the difference in the loss per common share - basic and diluted between 2018 and 2019?" + }, + { + "input": "What is the change in Loss per common share - basic and diluted from 2018 to 2019?", + "output": "What is the difference in basic and diluted Loss per common share between 2018 and 2019?" + }, + { + "input": "How are the recognition processes for realized and unrealized losses different?", + "output": "How are the realized and unrealized losses recognized?" + }, + { + "input": "How are both realized and unrealized losses recognized? Please provide a detailed explanation of the recognition process for each type of loss.", + "output": "How are the realized and unrealized losses recognized?" + }, + { + "input": "In which specific year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands?", + "output": "In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?" + }, + { + "input": "\"In which fiscal year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands (1 million)?\"", + "output": "In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?" + }, + { + "input": "What were the total fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018, respectively?", + "output": "What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?" + }, + { + "input": "What were the fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018?", + "output": "What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?" + }, + { + "input": "What are the components or factors included in the income tax expense calculation?", + "output": "What is included in the income tax expense?" + }, + { + "input": "What items are incorporated into the income tax expense?", + "output": "What is included in the income tax expense?" + }, + { + "input": "What is the difference in the number of positions taken in previous years between December 31, 2019, and December 31, 2018?", + "output": "What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?" + }, + { + "input": "What is the net change in the number of positions taken in prior years from December 31, 2019 to December 31, 2018?", + "output": "What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?" + }, + { + "input": "What led to increase in Consolidated net cash flow from operating activities for the year ended December 31, 2019?", + "output": "What factors contributed to the rise in Consolidated net cash flow from operating activities during the year ending on December 31, 2019?" + }, + { + "input": "How much did Brookfield receive from the sale of its interests in Altera in 2019?", + "output": "How much was received from Brookfield for the sale of interests in Altera during 2019?" + }, + { + "input": "What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?", + "output": "What is the difference in the amount of cash generated or used for financing activities in the financial statements for the years ending December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?", + "output": "What is the difference in net cash flows from financing activities between the fiscal years ended on December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?", + "output": "What was the difference in the Net (loss) income between the fiscal years ending on December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?", + "output": "What is the difference in the amount of net (loss) income between the fiscal year ending on December 31, 2019, and the fiscal year ending on December 31, 2018?" + }, + { + "input": "What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?", + "output": "What is the difference in the amount of income tax expense or recovery reported for the fiscal year ended on December 31, 2019, compared to the fiscal year ended on December 31, 2018?" + }, + { + "input": "What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?", + "output": "How does the income tax expense (recovery) change between the years ended December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the sum of all minimum lease payments expected to be received in the year 2019?", + "output": "What was the total Total minimum lease payments to be received in 2019?" + }, + { + "input": "What is the total sum of minimum lease payments that were expected to be received in the year 2019?", + "output": "What was the total Total minimum lease payments to be received in 2019?" + }, + { + "input": "What is the change in Initial direct costs and other from December 31, 2019 to December 31, 2018?", + "output": "How did the initial direct costs and other expenses change from December 31, 2018, to December 31, 2019?" + }, + { + "input": "When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?", + "output": "\"When was the complete sale of Teekay's remaining holdings in Altera to Brookfield executed?\"" + }, + { + "input": "When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?", + "output": "\"When did Teekay sell its remaining interests in Altera to Brookfield?\"" + }, + { + "input": "What were the Fair values at the start of the year for 2019 and 2018, respectively? Please provide the Fair value figures for both years separately.", + "output": "What was the Fair value at the beginning of the year for 2019 and 2018 respectively?" + }, + { + "input": "In which year was Fair value at the beginning of the year less than 15,000 thousands?", + "output": "In which year did the fair value at the beginning of the year fall below 15,000,000 (15 million) dollars?" + }, + { + "input": "\"What items are typically included in the calculation of current income tax expense, and what factors can affect this expense?\"", + "output": "What is included in current income tax expense?" + }, + { + "input": "What components are typically encompassed in the calculation of contemporary income tax expense?", + "output": "What is included in current income tax expense?" + }, + { + "input": "What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?", + "output": "How much has the Current income tax expense changed from December 31, 2019, to December 31, 2018?" + }, + { + "input": "What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?", + "output": "What was the difference in Current income tax expense between December 31, 2019 and December 31, 2018?" + }, + { + "input": "\"Between two years, which one exhibits a higher expense for current income tax?\"", + "output": "Which year has higher current income tax expense?" + }, + { + "input": "What were the Lease and Non-Lease commitments in the year 2021, and can you provide specific details about each commitment?", + "output": "What was the Lease and Non-Lease commitment in 2021 respectively?" + }, + { + "input": "What was the amount of lease commitment for the fiscal year 2022?", + "output": "What was the Lease commitment in 2022?" + }, + { + "input": "What was the total value of lease commitments in the year 2022?", + "output": "What was the Lease commitment in 2022?" + }, + { + "input": "In which year was the Lease commitment less than 10,000 thousands?", + "output": "In which specific year did the Lease commitment amount to less than 10,000 thousand units?" + }, + { + "input": "In which year was the Lease commitment less than 10,000 thousands?", + "output": "In which specific year did the Lease commitment amount fall below 10,000 thousands? (For better answering, please state the year and the unit of the Lease commitment.)" + }, + { + "input": "What is the difference in the lease commitment amount between 2020 and 2021?", + "output": "What is the change in the Lease commitment from 2020 to 2021?" + }, + { + "input": "What is the average duration of lease agreements from 2020 to 2022?", + "output": "What is the average Lease Commitment from 2020 to 2022?" + }, + { + "input": "What is the average duration of lease commitments between 2020 and 2022?", + "output": "What is the average Lease Commitment from 2020 to 2022?" + }, + { + "input": "How much was the equity loss for the year ended December 31, 2019?", + "output": "What was the amount of equity lost during the period from January 1 to December 31, 2019?" + }, + { + "input": "How much was the equity loss for the year ended December 31, 2019?", + "output": "What was the total amount of equity loss recorded for the period of the year ending December 31, 2019?" + }, + { + "input": "What is the difference in the combined amount of cash and restricted cash on December 31, 2019, compared to December 31, 2018?", + "output": "What is the change in Cash and restricted cash from December 31, 2019 to December 31, 2018?" + }, + { + "input": "What was the increase or decrease in the category of Other assets – current on the company's balance sheet from December 31, 2019 to December 31, 2018?", + "output": "What is the change in Other assets – current from December 31, 2019 to December 31, 2018?" + }, + { + "input": "What were the values of the other current assets in the years 2019 and 2018?", + "output": "What was the other assets - current in 2019 and 2018 respectively?" + }, + { + "input": "What were the respective amounts of non-current liabilities in 2019 and 2018 aside from the liabilities mentioned earlier?", + "output": "What was the other liabilities - non current in 2019 and 2018 respectively?" + }, + { + "input": "What were the amounts of non-current liabilities in 2019 and 2018, respectively, excluding any other liabilities that may be present on the financial statements?", + "output": "What was the other liabilities - non current in 2019 and 2018 respectively?" + }, + { + "input": "What is the difference in the number of Consolidated LNG carriers between 2020 and 2021?", + "output": "What is the change in Consolidated LNG carriers from 2020 to 2021?" + }, + { + "input": "What is the net difference in the number of Consolidated LNG carriers between 2020 and 2021?", + "output": "What is the change in Consolidated LNG carriers from 2020 to 2021?" + }, + { + "input": "What is the annual growth or decline in the number of Consolidated LNG carriers from 2021 to 2022?", + "output": "What is the change in Consolidated LNG carriers from 2021 to 2022?" + }, + { + "input": "What is the average number of Consolidated LNG carriers for the period from 2020 to 2021?", + "output": "What is the average Consolidated LNG carriers, for the year 2020 to 2021?" + }, + { + "input": "What is the average number of Consolidated LNG carriers between the years 2020 and 2021?", + "output": "What is the average Consolidated LNG carriers, for the year 2020 to 2021?" + }, + { + "input": "What is the change in Realized (losses) gains relating to Interest rate swap agreements from Year Ended December 31, 2019 to December 31, 2018?", + "output": "What was the difference in Realized (losses) gains associated with Interest rate swap agreements between December 31, 2019 and December 31, 2018?" + }, + { + "input": "What is the difference in Realized losses and gains from Forward freight agreements between Year Ended December 31, 2019 and December 31, 2018?", + "output": "What is the change in Realized (losses) gains relating to Forward freight agreements from Year Ended December 31, 2019 to December 31, 2018?" + }, + { + "input": "What was the difference in revenue between 2019 and 2018?", + "output": "What is the change in Revenues from, 2019 to 2018?" + }, + { + "input": "What is the change in Voyage expenses from, 2019 to 2018?", + "output": "What is the difference in expenses for voyages comparing 2019 to 2018?" + }, + { + "input": "How is the expense of share-based compensation quantified and evaluated?", + "output": "How is share-based compensation expense measured?" + }, + { + "input": "How is the expense related to share-based compensation quantified and determined?", + "output": "How is share-based compensation expense measured?" + }, + { + "input": "What are the various types of share-based compensation plans in existence?", + "output": "What are the types of share-based compensation plans?" + }, + { + "input": "By what percentage did the stock-based compensation expense for general and administrative operations decrease from 2018 to 2019?", + "output": "How much did the stock-based compensation expense in the general and administrative operations decreased from 2018 to 2019?" + }, + { + "input": "What is the ending outstanding number of vested RSUs in 2019?", + "output": "What is the final count of vested Restricted Stock Units (RSUs) at the conclusion of 2019?" + }, + { + "input": "What is the ending outstanding number of vested RSUs in 2019?", + "output": "How many RSUs were left as outstanding in the year 2019 once they had vested?" + }, + { + "input": "What was the exact amount of depreciation expense recorded in the financial statements for the year 2018?", + "output": "What was the depreciation expense in 2018?" + }, + { + "input": "What are the accepted accounting methods for handling and recording disposed assets?", + "output": "What are the accounting treatments for disposed assets?" + }, + { + "input": "What are the accounting procedures to be followed specifically for assets that have been disposed of?", + "output": "What are the accounting treatments for disposed assets?" + }, + { + "input": "What is the net total for property, plant and equipment in 2019?", + "output": "What is the total value of property, plant, and equipment after deducting accumulated depreciation in 2019?" + }, + { + "input": "What is the net total for property, plant and equipment in 2019?", + "output": "What is the total value of property, plant, and equipment after deducting accumulated depreciation and any impairment losses, as reported in the financial statements for the year 2019?" + }, + { + "input": "What is the percentage decrease in the net value of Property, Plant, and Equipment between 2018 and 2019?", + "output": "What is the percentage decrease in Net Total Property, Plant and Equipment from 2018 to 2019?" + }, + { + "input": "How much was the increase in Federal R&D tax credits from 2018 to 2019?", + "output": "How much did R&D tax credits from the Federal increased from 2018 to 2019?" + }, + { + "input": "What was the percentage increase in R&D tax credits provided by the Federal government from 2018 to 2019?", + "output": "How much did R&D tax credits from the Federal increased from 2018 to 2019?" + }, + { + "input": "\"What was the amount recorded as a long-term liability in the financial statements for the year 2018?\"", + "output": "How much was reflected as a long-term liability in 2018?" + }, + { + "input": "What is the percentage difference in Net deferred tax assets from 2018 to 2019?", + "output": "What was the percentage change in Net deferred tax assets between 2018 and 2019?" + }, + { + "input": "What was the total cost of the service provided in the year 2018?", + "output": "What was the service cost in 2018?" + }, + { + "input": "What was the cost of the service provided in the year 2018?", + "output": "What was the service cost in 2018?" + }, + { + "input": "What was the difference in the cost of services from 2018 to 2019?", + "output": "What was the change in service cost between 2018 and 2019?" + }, + { + "input": "What was the change in interest cost between 2018 and 2019?", + "output": "What was the difference in the amount of money spent on interest between the years 2018 and 2019?" + }, + { + "input": "What was the change in interest cost between 2018 and 2019?", + "output": "How did the interest cost change from 2018 to 2019, and what is the difference between the two years?" + }, + { + "input": "What was the change in balances at the end of period between 2018 and 2019?", + "output": "What is the difference in balances at the end of the period between 2018 and 2019, and what caused this change?" + }, + { + "input": "What was the specific monetary value or total sum allocated for Machinery and equipment in the year 2018?", + "output": "What was the amount for Machinery and equipment in 2018?" + }, + { + "input": "\"What was the specific monetary value allocated to Machinery and equipment in the year 2018?\"", + "output": "What was the amount for Machinery and equipment in 2018?" + }, + { + "input": "What was the change in Net cash provided by (used in) operating activities from continuing operations between 2018 and 2019?", + "output": "What was the difference in Net cash provided by (used in) operating activities from continuing operations between the years 2018 and 2019?" + }, + { + "input": "What was the difference in the amount of Net cash provided by (used in) operating activities between the years 2018 and 2019?", + "output": "What was the change in Net cash provided by (used in) operating activities between 2018 and 2019?" + }, + { + "input": "What was the percentage change in Income (loss) from discontinued operations, net of income taxes between 2018 and 2019?", + "output": "What was the percentage change in net income (loss) from discontinued operations, after accounting for income taxes, between the fiscal years 2018 and 2019?" + }, + { + "input": "Which periods does the company's effective tax rates differ from the U.S. federal statutory rate?", + "output": "\"During which specific time periods does the effective tax rate of the company deviate from the U.S. federal statutory rate?\"" + }, + { + "input": "What was the specific framework or arrangement regarding Federal taxes in the year 2018, including any applicable laws, regulations, rates, and deductions?", + "output": "What was the current provision for Federal taxes in 2018?" + }, + { + "input": "What were the existing federal tax regulations and provisions for the year 2018?", + "output": "What was the current provision for Federal taxes in 2018?" + }, + { + "input": "What was the specific provision made for foreign taxes in the year 2019?", + "output": "What was the current provision for Foreign taxes in 2019?" + }, + { + "input": "What was the percentage increase or decrease in the fair value of intangible assets from 2018 to 2019?", + "output": "What was the percentage change in the fair value of intangible assets between 2018 and 2019?" + }, + { + "input": "What was the change in Pro Forma total sales between 2018 and 2019?", + "output": "What was the difference in the total sales of Pro Forma from 2018 to 2019?" + }, + { + "input": "What was the change in Pro Forma total sales between 2018 and 2019?", + "output": "What was the difference in Pro Forma total sales from 2018 to 2019?" + }, + { + "input": "What was the exact fair value of the property and equipment owned by LumaSense?", + "output": "What was the fair value of Property and equipment from LumaSense?" + }, + { + "input": "What was the fair value of Property and equipment acquired by LumaSense?", + "output": "What was the fair value of Property and equipment from LumaSense?" + }, + { + "input": "What was the difference in the assumed exercise of dilutive stock options and restricted stock units between the years 2018 and 2019?", + "output": "What was the change in Assumed exercise of dilutive stock options and restricted stock units between 2018 and 2019?" + }, + { + "input": "What was the disparity in the fair value of cash paid for acquisition between Trek and the Electrostatic Product Line?", + "output": "What was the difference in the fair value of cash paid for acquisition between Trek and Electrostatic Product Line?" + }, + { + "input": "What was the diluted earnings per share (EPS) specifically for Continuing Operations for the quarter ending in September?", + "output": "What was the diluted earnings per share of Continuing Operations in Quarter Ended September?" + }, + { + "input": "What was the diluted earnings per share of Continuing Operations in the quarter ending in September?", + "output": "What was the diluted earnings per share of Continuing Operations in Quarter Ended September?" + }, + { + "input": "What was the total Income (loss) from discontinued operations, net of income taxes in 2019?", + "output": "In 2019, what was the net income (loss) from discontinued operations, adjusted for income taxes?" + }, + { + "input": "What was the difference in total operating expense between the fiscal years 2018 and 2019?", + "output": "What was the change in total operating expense between 2018 and 2019?" + }, + { + "input": "What was the difference in the amount of money spent on operational expenses from 2018 to 2019?", + "output": "What was the change in total operating expense between 2018 and 2019?" + }, + { + "input": "How much money was owed to the company for goods or services provided in 2019 but not yet invoiced?", + "output": "What was the amount of unbilled receivables in 2019?" + }, + { + "input": "What were the precise amounts that were billed during the year 2018 after considering all deductions, fees, and payments?", + "output": "What were the net amounts billed in 2018?" + }, + { + "input": "What was the change in unbilled receivables between 2018 and 2019?", + "output": "What was the difference in the amount of unbilled receivables from 2018 to 2019?" + }, + { + "input": "What was the difference in the total amount billed between 2018 and 2019?", + "output": "What was the change in net amounts billed between 2018 and 2019?" + }, + { + "input": "What was the accumulated amortization of customer relationships in 2018?", + "output": "\"What was the total amount of accumulated amortization recorded for customer relationships during the year 2018?\"" + }, + { + "input": "What were the top two segments with the highest net carrying amounts in the year 2018?", + "output": "What was the two highest net carrying amounts segments in 2018?" + }, + { + "input": "What were the two segments with the highest net carrying amounts in 2018?", + "output": "What was the two highest net carrying amounts segments in 2018?" + }, + { + "input": "How much did the service cost in 2019 and can you provide more details about the specific service being referred to?", + "output": "What was service cost in 2019?" + }, + { + "input": "What was the total amount spent on interest expenses in the year 2018?", + "output": "What was interest cost in 2018?" + }, + { + "input": "What was the expected rate of return on plan assets for the year 2017?", + "output": "What was the Expected return on plan assets in 2017?" + }, + { + "input": "What was the anticipated return on assets for the plan in 2017?", + "output": "What was the Expected return on plan assets in 2017?" + }, + { + "input": "What was the change in Amortization of actuarial gains and losses between 2017 and 2018?", + "output": "What was the difference in the amount of amortization of actuarial gains and losses recorded between the years 2017 and 2018?" + }, + { + "input": "What was the change in Amortization of actuarial gains and losses between 2017 and 2018?", + "output": "What was the difference in the amount of Amortization of actuarial gains and losses between the fiscal years 2017 and 2018?" + }, + { + "input": "What were the specific expenses incurred for severance payments and any associated costs during the year 2019?", + "output": "What were the severance and related charges in 2019?" + }, + { + "input": "What was the amount of severance and associated expenses recorded as charges in the year 2019?", + "output": "What were the severance and related charges in 2019?" + }, + { + "input": "What was the change in Severance and related charges between 2018 and 2019?", + "output": "What was the year-on-year difference in the amount spent on Severance and related charges between 2018 and 2019?" + }, + { + "input": "What was the change in Severance and related charges between 2018 and 2019?", + "output": "What was the difference in the amount of Severance and related charges incurred in 2018 compared to 2019?" + }, + { + "input": "What is the proportion of facility relocation and closure charges in the total amount of restructuring charges incurred in 2019?", + "output": "What percentage of total restructuring charges in 2019 consisted of Facility relocation and closure charges?" + }, + { + "input": "What was the contrast between the combined assets in the Level 1 and Level 2 categories for the year 2019?", + "output": "What was the difference in the total assets in the Level 1 Level 2 categories for 2019?" + }, + { + "input": "What was the difference between the total assets in the Level 1 and Level 2 categories for the year 2019?", + "output": "What was the difference in the total assets in the Level 1 Level 2 categories for 2019?" + }, + { + "input": "What was the combined value of the top three assets in the Level 2 Category for the year 2019?", + "output": "What was the total of the three highest assets in the Level 2 Category for 2019?" + }, + { + "input": "What percentage of total assets across all categories consisted of assets from the Level One category for 2019?", + "output": "What was the proportion or share, in terms of percentage, of Level One assets out of the total assets across all categories for the year 2019?" + }, + { + "input": "What was the specific value or monetary figure of long-lived assets in the United States during the year 2019?", + "output": "What was the amount of long-lived assets in United States in 2019?" + }, + { + "input": "What was the net difference in the value of long-term assets in the United States from 2018 to 2019?", + "output": "What was the change in the amount of long-lived assets between 2018 and 2019 in United States?" + }, + { + "input": "\"What was the combined value of the two longest-lasting assets in the year 2018?\"", + "output": "What is the sum of the highest two long-lived assets in 2018?" + }, + { + "input": "\"What is the total value, in monetary terms, of the two long-lived assets with the highest worth that were recorded in the year 2018?\"", + "output": "What is the sum of the highest two long-lived assets in 2018?" + }, + { + "input": "What was the initial estimated value of property and equipment as of December 31, 2019 in relation to their market worth?", + "output": "What was the Preliminary fair value of property and equipment in December 31, 2019?" + }, + { + "input": "What was the initial estimated value of property and equipment as of December 31, 2019, before any adjustments or final calculations were made?", + "output": "What was the Preliminary fair value of property and equipment in December 31, 2019?" + }, + { + "input": "What was the initial estimated value assigned to inventories as of September 10th, 2019?", + "output": "What was the Preliminary fair value of inventories in September 10, 2019?" + }, + { + "input": "\"What was the initial estimated market value of the inventories as of September 10, 2019?\"", + "output": "What was the Preliminary fair value of inventories in September 10, 2019?" + }, + { + "input": "In 2019, what was the specific amount of revenue that was recognized for product sales and the provision of services at a particular point in time?", + "output": "What was the Product and service revenue recognized at point in time in 2019?" + }, + { + "input": "What specific extended warranty and service contracts were acknowledged and recorded throughout the year of 2018?", + "output": "What was the Extended warranty and service contracts recognized over time in 2018?" + }, + { + "input": "What were the specific extended warranty and service contracts that gained recognition throughout the year 2018?", + "output": "What was the Extended warranty and service contracts recognized over time in 2018?" + }, + { + "input": "What was the change in Product and service revenue recognized at point in time between 2018 and 2019?", + "output": "What was the specific difference in revenue from the sales of products and services recognized at a specific moment between the years 2018 and 2019?" + }, + { + "input": "What were the sales figures for the Industrial & Medical sectors in the year 2017?", + "output": "What was the sales for Industrial & Medical in 2017?" + }, + { + "input": "What were the sales figures specifically for Industrial & Medical sectors in the year 2017?", + "output": "What was the sales for Industrial & Medical in 2017?" + }, + { + "input": "What was the percentage change in sales of Semiconductor Equipment from 2017 to 2018?", + "output": "What was the change in sales of Semiconductor Equipment between 2017 and 2018?" + }, + { + "input": "What were the income taxes for the 2018 tax year, based on the federal statutory tax rates?", + "output": "What was the Income taxes per federal statutory rate in 2018?" + }, + { + "input": "What was the State income taxes, net of federal deduction in 2017?", + "output": "In 2017, what was the amount of state income taxes paid after deducting the federal deduction?" + }, + { + "input": "What were the specific modifications that occurred in the GILTI Tax legislation from 2018 to 2019?", + "output": "What was the change in GILTI Tax between 2018 and 2019?" + }, + { + "input": "What was the difference in the GILTI Tax rates from 2018 to 2019, and how did the change impact tax calculations?", + "output": "What was the change in GILTI Tax between 2018 and 2019?" + }, + { + "input": "What was the change in Unremitted earnings between 2018 and 2019?", + "output": "What was the difference in Unremitted earnings from 2018 to 2019? Please provide the specific amount of change in Unremitted earnings between these two years." + }, + { + "input": "What was the percentage difference in Withholding taxes from 2018 to 2019?", + "output": "What was the percentage change in Withholding taxes between 2018 and 2019?" + }, + { + "input": "What is the estimated rate of forfeiture?", + "output": "What was the forfeiture rate estimated to be?" + }, + { + "input": "What is total non-cash compensation expense related to stock-based awards in 2019?", + "output": "\"What was the amount of expenses incurred in 2019 for stock-based awards as a form of total non-cash compensation?\"" + }, + { + "input": "What is the range of years covered in the table?", + "output": "What are the years included in the table?" + }, + { + "input": "\"What is the final balance as of December 31, 2019?\"", + "output": "What is the balance at end of year for 2019?" + }, + { + "input": "In which specific markets does the company focus its marketing efforts and aim to capture a significant share of customers?", + "output": "Which markets does the company target?" + }, + { + "input": "What specific types of research and development (R&D) activities are sponsored by companies? Please provide a comprehensive list of these activities as outlined in the table.", + "output": "What are the types of company-sponsored R&D activities in the table?" + }, + { + "input": "What types of research and development (R&D) activities supported by companies are listed in the table?", + "output": "What are the types of company-sponsored R&D activities in the table?" + }, + { + "input": "What was the difference in the total expenditure on research and development sponsored by the company from 2018 to 2019?", + "output": "What is the change in the total company-sponsored research and development expense in 2019 from 2018?" + }, + { + "input": "What is the change in total backlog in 2019?", + "output": "What was the overall increase or decrease in the total backlog during the year 2019?" + }, + { + "input": "What are the segments under Total Backlog in the table?", + "output": "What specific categories or divisions are included within the Total Backlog section of the table?" + }, + { + "input": "How many distinct segments are there included in the Total Backlog?", + "output": "How many segments are there under Total Backlog?" + }, + { + "input": "How many distinct segments are included within the category of Total Backlog?", + "output": "How many segments are there under Total Backlog?" + }, + { + "input": "What is the specific maturity date or time period for the long-term debt that is expected to be due in 2021?", + "output": "What is the maturity of long-term debt for 2021?" + }, + { + "input": "What are the components recorded under income (loss) from continuing operations before income taxes?", + "output": "What specific items are included in the income (loss) from continuing operations category before taxes are deducted?" + }, + { + "input": "In which year was the amount under Foreign the smallest?", + "output": "\"When was the lowest recorded value for the amount under the category 'Foreign' observed?\"" + }, + { + "input": "What is the change in the amount under Foreign in 2019 from 2018?", + "output": "What is the difference in the amount under the category \"Foreign\" between 2019 and 2018?" + }, + { + "input": "What is the change in the amount under Foreign in 2019 from 2018?", + "output": "How much did the Foreign amount change from 2018 to 2019?" + }, + { + "input": "In which year was the amount of sales in Other the largest?", + "output": "In which year did the category labeled as \"Other\" witness the highest recorded sales volume?" + }, + { + "input": "What is the percentage change in the amount of Other in 2019 from 2018?", + "output": "What is the percentage difference in the amount of Other expenses or income in the year 2019 compared to 2018, and how can it be calculated?" + }, + { + "input": "What is the tax expense for 2019 calculated at the U.S. statutory rate?", + "output": "What is the Tax expense at U.S. statutory rate for 2019?" + }, + { + "input": "What was the tax expense for the year 2019 in the United States at the statutory rate?", + "output": "What is the Tax expense at U.S. statutory rate for 2019?" + }, + { + "input": "What is the percentage change in nondeductible expenses in 2019 from 2018?", + "output": "What is the percentage difference in the amount of expenses that cannot be deducted in 2019 compared to 2018?" + }, + { + "input": "What is the percentage change in nondeductible expenses in 2019 from 2018?", + "output": "What is the percentage increase or decrease in expenses that cannot be deducted from taxes in the year 2019 compared to the year 2018?" + }, + { + "input": "\"When were the costs associated with purchasing and developing the ERP system recorded?\"", + "output": "In which years were the incurred costs related to the purchase and development of the ERP system recorded?" + }, + { + "input": "In which year did the total value of property, plant, and equipment reach its highest level?", + "output": "In which year was the total amount of property, plant and equipment larger?" + }, + { + "input": "In which specific year did the combined value of property, plant, and equipment reach its highest point?", + "output": "In which year was the total amount of property, plant and equipment larger?" + }, + { + "input": "What is the change in leasehold improvements from 2018 to 2019?", + "output": "What was the year-on-year change in the value of leasehold improvements between 2018 and 2019?" + }, + { + "input": "What is the percentage change in leasehold improvements from 2018 to 2019?", + "output": "What is the percentage difference in the value of leasehold improvements from 2018 to 2019?" + }, + { + "input": "What is the percentage change in CGD sales in 2018?", + "output": "What was the percentage increase or decrease in CGD sales in 2018 compared to the previous year?" + }, + { + "input": "What is the percentage change in CGD sales in 2018?", + "output": "What is the exact percentage difference in sales of CGD products between the years 2018 and the previous year?" + }, + { + "input": "For which years is the amortization of purchased intangibles included in the CGD results recorded?", + "output": "In the recorded CGD results, during which specific years is the amortization of purchased intangibles included?" + }, + { + "input": "In which year does the inclusion of the amortization of purchased intangibles result in a higher amount in the Comprehensive Gain or Loss From Discontinued Operations (CGD)?", + "output": "In which year is the amortization of purchased intangibles included in the CGD results larger?" + }, + { + "input": "\"What criteria or requirements must be met in order to be eligible for receiving an extra cash payment amounting to $3.0 million?\"", + "output": "What is the eligibility to receive an additional cash payment of $3.0 million based on?" + }, + { + "input": "During which years was the method of calculating net income (loss) per common share using the weighted-average number of shares outstanding implemented and documented?", + "output": "For which years was the weighted-average number of shares outstanding used to compute net income (loss) per common share recorded?" + }, + { + "input": "What is the percentage change in adjusted EBITDA in 2019 from 2018?", + "output": "What is the exact percentage change in adjusted EBITDA between the years 2018 and 2019?" + }, + { + "input": "What is the percentage change in adjusted EBITDA in 2019 from 2018?", + "output": "What is the percentage difference in adjusted EBITDA between 2019 and 2018? How can this information be used to analyze the financial performance in these two years?" + }, + { + "input": "What resulted in a higher operating income?", + "output": "What factor led to an increase in operating income?" + }, + { + "input": "How many finished products were produced in the year 2019?", + "output": "What is the amount of finished products in 2019?" + }, + { + "input": "What was the total quantity of completed products manufactured in the year 2019?", + "output": "What is the amount of finished products in 2019?" + }, + { + "input": "In which year does the value of finished products surpass the value of finished products in other years, resulting in a higher overall value?", + "output": "In which year is the value of finished products higher?" + }, + { + "input": "What are the differences in the types of materials and purchased parts between 2018 and 2019?", + "output": "What is the change in materials and purchased parts from 2018 to 2019?" + }, + { + "input": "\"What factors contributed to the increase in operating profits?\"", + "output": "What resulted in the improvement in operating profits?" + }, + { + "input": "For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?", + "output": "In which fiscal year(s) is the CGD results recorded and does it include the amortization amount of purchased intangibles?" + }, + { + "input": "For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?", + "output": "In which fiscal year(s) is the amortization amount of purchased intangibles included in the recorded results of the Comprehensive Growth and Development (CGD)?" + }, + { + "input": "Which year, from the given weighted-average assumptions for determining benefit obligation as of September 30, exhibits the highest rate of compensation increase among all years?", + "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, which year has the largest rate of compensation increase?" + }, + { + "input": "What was the change in the discount rate between 2018 and 2019 for determining the benefit obligation at September 30?", + "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, what is the change in the discount rate in 2019 from 2018?" + }, + { + "input": "What is the weighted-average discount rate used to calculate the benefit obligation as of September 30 for the years 2017, 2018, and 2019?", + "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, what is the average discount rate across 2017, 2018 and 2019?" + }, + { + "input": "In what specific year did the rate differential for foreign exchange exceed 10.0?", + "output": "In which year was Foreign rate differential greater than 10.0?" + }, + { + "input": "What was the average amount of state taxes, excluding federal benefits, for the years 2017 to 2019?", + "output": "What was the average State taxes net of federal benefit for 2017-2019?" + }, + { + "input": "How much did the Research and Development credits shift between 2018 and 2019?", + "output": "What is the change in the Research and development credits from 2018 to 2019?" + }, + { + "input": "What is the difference in the amount of Research and Development credits between 2018 and 2019?", + "output": "What is the change in the Research and development credits from 2018 to 2019?" + }, + { + "input": "What were the respective gains in 2019 and 2018?", + "output": "What were the gains achieved in 2019 and 2018, respectively?" + }, + { + "input": "What were the respective gains in 2019 and 2018?", + "output": "What were the gains in 2019 and 2018, respectively?" + }, + { + "input": "What is the specific year when the Loss from discontinued operations was exactly zero?", + "output": "In which year was Loss from discontinued operations 0?" + }, + { + "input": "What was the average amount of money lost from 2017 to 2019?", + "output": "What was the average Net loss for 2017 to 2019?" + }, + { + "input": "What is the change in the Weighted average common shares outstanding-basic from 2018 to 2019?", + "output": "What is the difference in the number of weighted average common shares outstanding-basic between 2018 and 2019?" + }, + { + "input": "What was the depreciation and amortization expense related to property and equipment for 2019?", + "output": "What was the specific amount of depreciation and amortization expense attributed to property and equipment for the fiscal year 2019?" + }, + { + "input": "\"What was the precise monetary worth or estimated value, expressed in the currency of the specific country or region, of the category of Machinery and Equipment during the calendar year of 2019?\"", + "output": "What was the value of Machinery and equipment in 2019?" + }, + { + "input": "What was the exact monetary value of Machinery and equipment in the year 2019?", + "output": "What was the value of Machinery and equipment in 2019?" + }, + { + "input": "In what year did the construction in process have a value of less than 30,000 thousands?", + "output": "In which year was value of Construction in process less than 30,000 thousands?" + }, + { + "input": "In which specific year did the value of Construction in process fall below 30,000 thousands?", + "output": "In which year was value of Construction in process less than 30,000 thousands?" + }, + { + "input": "What were the average values of Machinery and equipment in both 2018 and 2019?", + "output": "What was the average value of Machinery and equipment for 2018 and 2019?" + }, + { + "input": "What is the average telecommunication expenditure during the fiscal years 2018 and 2019?", + "output": "What is the average Telecom for fiscal year 2018 and 2019?" + }, + { + "input": "During which year did the number of Telecom users fall below 200,000 thousand?", + "output": "In which year was Telecom less than 200,000 thousand?" + }, + { + "input": "What was the exact percentage decrease of the revenue in 2019 compared to the previous year?", + "output": "What was the decrease in the revenue in 2019?" + }, + { + "input": "What was the specific percentage decrease in revenue for the fiscal year of 2019?", + "output": "What was the decrease in the revenue in 2019?" + }, + { + "input": "What is the average value of the Telecom industry for the fiscal years 2017 and 2018 combined?", + "output": "What is the average Telecom value for fiscal year 2017 and 2018?" + }, + { + "input": "What was the average value of the telecommunications industry for the fiscal years 2017 and 2018?", + "output": "What is the average Telecom value for fiscal year 2017 and 2018?" + }, + { + "input": "In which year was Telecom value less than 300,000 thousand?", + "output": "In what specific year did the value of Telecom drop below 300,000 thousand?" + }, + { + "input": "What were the values of divestments and acquisitions in 2019, 2018, and 2017?", + "output": "What was the (Divested)/acquired values in 2019, 2018 and 2017 respectively?" + }, + { + "input": "In what specific year did the beginning balance fall below 5,000,000?", + "output": "In which year was Balance — beginning of year less than 5,000 thousands?" + }, + { + "input": "In what specific year did the beginning balance equate to less than 5,000 thousands (5 million)?", + "output": "In which year was Balance — beginning of year less than 5,000 thousands?" + }, + { + "input": "What was the difference in the amount of expenses allocated to provisions between the years 2017 and 2018?", + "output": "What was the change in the Provisions/(expense) from 2017 to 2018?" + }, + { + "input": "What was the difference in the Provisions/(expense) between 2017 and 2018?", + "output": "What was the change in the Provisions/(expense) from 2017 to 2018?" + }, + { + "input": "What was the amount of charge-offs recorded in the year 2019?", + "output": "What was the charge-offs in 2019?" + }, + { + "input": "\"What was the total amount of charge-offs incurred in 2019?\"", + "output": "What was the charge-offs in 2019?" + }, + { + "input": "\"What is the average net Provision for the years 2017, 2018, and 2019?\"", + "output": "What was the average Provision, net for 2017-2019?" + }, + { + "input": "What was the average net provision for the years 2017, 2018, and 2019 combined?", + "output": "What was the average Provision, net for 2017-2019?" + }, + { + "input": "What factors determine the value that is obtained upon vesting?", + "output": "What is the value realized on vesting based on?" + }, + { + "input": "What were the specific equity awards held by each named executive officer throughout the year 2019?", + "output": "What were the equity awards held by named executives during 2019?" + }, + { + "input": "What were the equity awards granted to the named executives in 2019?", + "output": "What were the equity awards held by named executives during 2019?" + }, + { + "input": "What is the significance of the number of shares obtained upon vesting?", + "output": "What does the number of shares acquired on vesting represent?" + }, + { + "input": "Where was the reported combined net loss of $82 million disclosed or published?", + "output": "Where was the combined net loss of $82 million reported in?" + }, + { + "input": "What was the percentage difference in the Adjusted EBITDA margin from 2017 to 2019?", + "output": "What is the total change in the Adjusted EBITDA margin between 2019 and 2017?" + }, + { + "input": "What specific expenses are included in the integration and transformation costs that have an impact on the Adjusted EBITDA?", + "output": "What does the integration and transformation costs impacting Adjusted EBITDA include?" + }, + { + "input": "What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?", + "output": "What was the difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?" + }, + { + "input": "What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?", + "output": "How did the total special items affecting adjusted EBITDA change from 2018 to 2019?" + }, + { + "input": "What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?", + "output": "What is the percentage difference in the amount of special items affecting adjusted EBITDA from 2018 to 2019?" + }, + { + "input": "What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?", + "output": "What is the percentage difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?" + }, + { + "input": "How is the amortization process for deferred acquisition and fulfillment costs conducted?", + "output": "How are the deferred acquisition and fulfillment costs amortized?" + }, + { + "input": "What is the percentage change for the end of period balance for Fulfillment Costs when comparing the beginning of period balance?", + "output": "What is the percentage change in the end of period balance for Fulfillment Costs when comparing it with the beginning of period balance?" + }, + { + "input": "What was the total value of options that were available to be exercised and outstanding as of December 31, 2019?", + "output": "What was the aggregate intrinsic value of options outstanding and exercisable at December 31, 2019? " + }, + { + "input": "Which year has a higher weighted-average exercise price for options outstanding and exercisable?", + "output": "Which year exhibits a greater weighted-average exercise price for options that are currently outstanding and available for exercise?" + }, + { + "input": "What does the deferred income tax benefit (expense) under post-retirement benefit plans currently recognize? ", + "output": "What recognition does the deferred income tax benefit (expense) currently provide for under post-retirement benefit plans?" + }, + { + "input": "What is the percentage increase or decrease in the deferred income tax benefit or expense for post-retirement benefit plans between 2017 and 2018?", + "output": "What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?" + }, + { + "input": "What was the percentage difference in the deferred income tax benefit (expense) for post-retirement benefit plans between 2018 and 2017?", + "output": "What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?" + }, + { + "input": "How is the Adjusted EBITDA Run Rate determined?", + "output": "How is the Adjusted EBITDA Run Rate calculated and what factors are considered in determining it?" + }, + { + "input": "What are the different performance levels?", + "output": "What are the various levels of performance and can you provide more details about each level?" + }, + { + "input": "How many levels of performance would result in a payout exceeding 50% of the target award?", + "output": "How many performance levels would the payout as % of target award be above 50%?" + }, + { + "input": "How many performance levels have a payout as a percentage of the target award that exceeds 50%?", + "output": "How many performance levels would the payout as % of target award be above 50%?" + }, + { + "input": "What is the main factor or aspect that goodwill is specifically connected or associated with?", + "output": "What is goodwill attributable to?" + }, + { + "input": "What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?", + "output": "What is the ratio of property, plant, and equipment to the total estimated aggregate consideration shown in the balance as of December 31, 2017?" + }, + { + "input": "What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?", + "output": "What is the ratio of property, plant and equipment to the total estimated aggregate consideration on the balance sheet as of December 31, 2017?" + }, + { + "input": "What is the percentage change in other non current assets in 2018?", + "output": "What was the percentage change in other non-current assets during the year 2018?" + }, + { + "input": "Under what conditions would officers be entitled to the years of welfare benefits?", + "output": "When would officers have the right to receive welfare benefits for a prolonged period of time?" + }, + { + "input": "Which types of officers are listed in the table?", + "output": "\"What types of officers are included in the table displayed?\"" + }, + { + "input": "Which type of officer has the longest protected period?", + "output": "Which rank of officer within a specific profession or organization is entitled to the longest period of protection?" + }, + { + "input": "Which type of officer has the longest protected period?", + "output": "Which type of officer is granted the longest period of protection against any form of termination or dismissal?" + }, + { + "input": "What was the gross debt amount in the year 2019?", + "output": "What is the gross debt in 2019?" + }, + { + "input": "What was the difference in the total amount of debt in 2019 compared to 2018?", + "output": "What is the change in gross debt in 2019 from 2018?" + }, + { + "input": "What is the percentage change in gross debt in 2019 from 2018?", + "output": "What was the percentage increase/decrease in the gross debt from 2018 to 2019?" + }, + { + "input": "What is the percentage change in gross debt in 2019 from 2018?", + "output": "What is the percentage difference in the amount of gross debt from 2018 to 2019?" + }, + { + "input": "On what basis are the estimated benefit payments made, considering the underlying assumptions?", + "output": "What assumptions are the estimated benefit payments based on?" + }, + { + "input": "How many separate periods are highlighted in the table?", + "output": "How many different period segments are highlighted in the table?" + }, + { + "input": "How many distinct period segments are specifically emphasized in the provided table?", + "output": "How many different period segments are highlighted in the table?" + }, + { + "input": "What is the percentage change in combined pension plan in 2021 from 2020?", + "output": "What is the percentage difference in the combined pension plan between 2021 and 2020?" + }, + { + "input": "What was the extent or percentage of the business that falls under the category of Operations and Other that was sold?", + "output": "What portion of business under Operations and Other was sold?" + }, + { + "input": "What is the total segment revenue in 2019?", + "output": "What is the exact amount of revenue generated by each segment in 2019 and what is the sum of these revenues overall?" + }, + { + "input": "What components are under operating revenue?", + "output": "What specific components contribute to the total operating revenue of a company or organization?" + }, + { + "input": "What additional information does Note 17 provide?", + "output": "What is the additional information provided in Note 17?" + }, + { + "input": "What additional information does Note 17 provide?", + "output": "What is the content of Note 17 and how does it provide additional information?" + }, + { + "input": "\"What was the exact sum of adjusted EBITDA for the entire year of 2019?\"", + "output": "What is the total adjusted EBITDA in 2019?" + }, + { + "input": "What is the change in the adjusted EBITDA under Wholesale in 2019 from 2018?", + "output": "What was the difference in adjusted EBITDA for the Wholesale category between 2019 and 2018?" + }, + { + "input": "What is the average total adjusted EBITDA over the three years?", + "output": "What is the mean value of the total adjusted EBITDA for the three-year period?" + }, + { + "input": "How many executives among the present ones in the organization receive a short-term incentive bonus that exceeds $500,000 in amount?", + "output": "How many current executives have a STI bonus amount greater than $500,000?" + }, + { + "input": "How many executives presently receive a Short-Term Incentive (STI) bonus exceeding $500,000?", + "output": "How many current executives have a STI bonus amount greater than $500,000?" + }, + { + "input": "What is the salary earned by Indraneel Dev in 2019, and how does it compare to his/her STI bonus amount, expressed as a ratio?", + "output": "What is Indraneel Dev's salary earned during 2019 expressed as a ratio of his/her STI bonus amount?" + }, + { + "input": "What is included in the category of 'Support assets'? Please provide a detailed explanation of the various components that constitute these assets.", + "output": "What do 'Support assets' consist of?" + }, + { + "input": "What is the specific figure for Reported Balances' operating revenue?", + "output": "What is the Reported Balances operating revenue?" + }, + { + "input": "What was the variation in the deferred income tax benefit (expense) related to pension plans between 2019 and 2018?", + "output": "What is the change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?" + }, + { + "input": "What is the percentage change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?", + "output": "What is the exact percentage difference in the deferred income tax benefit or expense for pension plans between the years 2018 and 2019?" + }, + { + "input": "What was the outcome of the continuous efforts made to downsize the company's workforce?", + "output": "What was recognized as a result of ongoing efforts to reduce the company's workforce?" + }, + { + "input": "What is the average amount of service costs across 2017, 2018 and 2019?", + "output": "What is the mean service cost from 2017 to 2019?" + }, + { + "input": "What was the total amount of money owed by customers that had not been received as of December 31, 2019?", + "output": "What was the gross customer receivables at December 31, 2019?" + }, + { + "input": "What was the total amount of customer receivables as recorded on the balance sheet at the end of December 31, 2019, including all outstanding invoices and amounts owed to the company by its customers?", + "output": "What was the gross customer receivables at December 31, 2019?" + }, + { + "input": "What is included in the total value of long-term debt?", + "output": "What does total long-term debt include?" + }, + { + "input": "What is included in the total amount of long-term debt?", + "output": "What does total long-term debt include?" + }, + { + "input": "What was the approach or method adopted in 2019 to determine the total assets of an entity?", + "output": "Regarding the total assets, what was adopted in 2019?" + }, + { + "input": "How many different dates declared had a dividend per share of $0.250 in 2018 and 2019?", + "output": "In 2018 and 2019, how many dates were declared with a dividend per share of $0.250?" + }, + { + "input": "\"What specific categories of expenses were documented in the records for the year 2018?\"", + "output": "What types of expenses were recorded during 2018?" + }, + { + "input": "What were the recorded expenses in 2018 and what categories do they fall under?", + "output": "What types of expenses were recorded during 2018?" + }, + { + "input": "\"What is the total amount of non-cash goodwill impairment charges that are not tax-deductible, for the years 2019 and 2018?\"", + "output": "What is the sum of non-cash, non-tax-deductible goodwill impairment charges for 2019 and 2018?" + }, + { + "input": "How much were the cumulative acquisition-related expenses incurred specifically by Level 3 in the year 2019?", + "output": "What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?" + }, + { + "input": "What was the total amount of expenses related to acquisitions that Level 3 incurred in the year 2019? Please provide the specific cumulative value.", + "output": "What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?" + }, + { + "input": "In which year did the company record the smallest total expenditure associated with acquisitions?", + "output": "Which year incurred the lowest amount of total acquisition-related expenses?" + }, + { + "input": "In which specific year did the company experience the least amount of expenses related to acquisition?", + "output": "Which year incurred the lowest amount of total acquisition-related expenses?" + }, + { + "input": "Against what were the estimates offset for the years 2019, 2018, and 2017?", + "output": "What are the 2019, 2018 and 2017 change in estimates offset against?" + }, + { + "input": "What factors were the changes in estimates for 2019, 2018, and 2017 set off against?", + "output": "What are the 2019, 2018 and 2017 change in estimates offset against?" + }, + { + "input": "In what year was the highest accretion expense recorded?", + "output": "Which year has the largest accretion expense?" + }, + { + "input": "What is the year with the highest accretion expense?", + "output": "Which year has the largest accretion expense?" + }, + { + "input": "What is the average accretion expense across 2017, 2018 and 2019?", + "output": "What is the average expense for accretion incurred during the years 2017, 2018, and 2019?" + }, + { + "input": "Where is the recording location for the current portion of the post-retirement benefit obligations?", + "output": "Where is the current portion of the post-retirement benefit obligations recorded?" + }, + { + "input": "In which specific year is the fair value of plan assets under post-retirement benefit plans higher than in other years?", + "output": "In which year is the fair value of plan assets under post-retirement benefit plans higher?" + }, + { + "input": "\"What were the depreciation and amortization figures for the fiscal year 2019?\"", + "output": "What is the Depreciation and amortization for 2019?" + }, + { + "input": "What is the total adjusted EBITDA for each segment in 2019?", + "output": "What is the total segment adjusted EBITDA in 2019?" + }, + { + "input": "What is the total amount of income tax expense (benefit) recorded in the financial statements for the years 2018 and 2019?", + "output": "What is the sum of income tax expense (benefit) in 2018 and 2019?" + }, + { + "input": "What is the percentage change in income tax expense (benefit) in 2019 from 2018?", + "output": "What is the percentage difference in income tax expense (benefit) between 2018 and 2019?" + }, + { + "input": "What was the total amount of interest expenses incurred in the year 2019?", + "output": "What is the total interest expense in 2019?" + }, + { + "input": "What was the total amount of interest paid in expenses during the year 2019?", + "output": "What is the total interest expense in 2019?" + }, + { + "input": "What segments of interest expense are presented in the table?", + "output": "Which specific segments of interest expense are displayed in the table provided?" + }, + { + "input": "What is the specific monetary difference in the total interest expense for the year 2019 compared to 2018?", + "output": "What is the change in the gross interest expense in 2019 from 2018?" + }, + { + "input": "What was the difference in the amount of money spent on interest expenses in 2019 compared to 2018?", + "output": "What is the change in the gross interest expense in 2019 from 2018?" + }, + { + "input": "What is the average amount of interest paid over the years 2017, 2018, and 2019?", + "output": "What is the average total interest expense for 2017 to 2019?" + }, + { + "input": "\"What is the average annual value of unvested restricted stock awards that were excluded from antidilutive considerations in the years 2017, 2018, and 2019?\"", + "output": "What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?" + }, + { + "input": "What were the respective average annual amounts of unvested restricted stock awards that were excluded due to being antidilutive in 2017, 2018, and 2019?", + "output": "What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?" + }, + { + "input": "Which year has a larger amount of contract acquisition costs?", + "output": "In which specific year do the contract acquisition costs exhibit a greater magnitude or quantity compared to other years?" + }, + { + "input": "What is the change in contract assets in 2019?", + "output": "What is the net increase or decrease in contract assets during the year 2019?" + }, + { + "input": "What percentage of Level 3 goodwill is specifically allocated to the consumer segment?", + "output": "How much of Level 3 goodwill is allocated to consumer?" + }, + { + "input": "What is the ratio of goodwill to the total amount as of December 31, 2018, in the context of Business?", + "output": "What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?" + }, + { + "input": "As of December 31, 2018, what is the ratio of goodwill to the total amount in Business?", + "output": "What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?" + }, + { + "input": "What is the combined goodwill figure for the years 2017 and 2018?", + "output": "What is the sum of the total amount of goodwill for 2017 and 2018?" + }, + { + "input": "What is the combined value of goodwill for both 2017 and 2018?", + "output": "What is the sum of the total amount of goodwill for 2017 and 2018?" + }, + { + "input": "\"What specific items are encompassed within the current liabilities section of the consolidated balance sheets?\"", + "output": "What do the current liabilities reflected in the consolidated balance sheets include?" + }, + { + "input": "What specific items are included under current liabilities in the consolidated balance sheets?", + "output": "What do the current liabilities reflected in the consolidated balance sheets include?" + }, + { + "input": "What items are typically categorized as Other current liabilities in financial statements?", + "output": "What are the items included under Other current liabilities?" + }, + { + "input": "In which specific year do capital expenditures account for a greater value of accounts payable?", + "output": "Which year has a larger amount of accounts payable associated with capital expenditures?" + }, + { + "input": "Which specific year indicates a higher level of accounts payable specifically linked to capital expenditures?", + "output": "Which year has a larger amount of accounts payable associated with capital expenditures?" + }, + { + "input": "What distinguishes the total grant values of Indraneel Dev and Stacey W. Goff?", + "output": "What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?" + }, + { + "input": "What is the disparity in total grant amounts between Indraneel Dev and Stacey W. Goff?", + "output": "What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?" + }, + { + "input": "What is the monthly average for the number of shares that are typically withheld for tax purposes?", + "output": "What is the average number of shares withheld for taxes per month?" + }, + { + "input": "\"What is the monthly average amount of shares that are typically withheld for tax purposes?\"", + "output": "What is the average number of shares withheld for taxes per month?" + }, + { + "input": "What is the total amount of minimum lease payments expected for non-cancellable operating leases in 2019?", + "output": "How much is the 2019 total future minimum lease payments under non-cancellable operating leases?" + }, + { + "input": "What was the percentage change in future minimum lease payments due within one year between 2018 and 2019?", + "output": "How much did future minimum lease payments due within one year change by between 2018 and 2019?" + }, + { + "input": "Which year, 2018 or 2019, had a higher value of capital?", + "output": "Between 2018 and 2019, which year had a greater amount of capital?" + }, + { + "input": "In terms of owned assets, which year, 2018 or 2019, had a higher overall value or quantity of assets?", + "output": "Between 2018 and 2019, which year had a greater amount of owned assets?" + }, + { + "input": "Which year, 2018 or 2019, had a higher total value of assets owned?", + "output": "Between 2018 and 2019, which year had a greater amount of owned assets?" + }, + { + "input": "What were the average values of leased assets for the years 2018 and 2019?", + "output": "What is the average leased assets for 2018 and 2019?" + }, + { + "input": "What was the average value of leased assets for the years 2018 and 2019?", + "output": "What is the average leased assets for 2018 and 2019?" + }, + { + "input": "What is the average trade receivables included within non-current assets?", + "output": "What is the average amount of trade receivables included within the category of non-current assets?" + }, + { + "input": "What is the average prepayments included in non-current assets?", + "output": "What is the average amount of prepayments that are included in non-current assets?" + }, + { + "input": "What financial items does guidance basis comprise of?", + "output": "What specific components does the guidance basis in financial terms consist of?" + }, + { + "input": "Which year, either 2018 or 2019, had a higher adjusted EBITDA on a guidance basis?", + "output": "Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?" + }, + { + "input": "Which year, 2018 or 2019, had a higher adjusted EBITDA on a guidance basis? Please provide the comparison between the two specified years.", + "output": "Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?" + }, + { + "input": "What is the 2019 average adjusted EBITDA, guidance basis?", + "output": "\"What was the average adjusted EBITDA on a guidance basis for the year 2019?\"" + }, + { + "input": "What is the average amount of cash and cash equivalents that are typically reported in the statement of cash flows?", + "output": "What is the average cash and cash equivalents presented in the statement of cash flows?" + }, + { + "input": "What is the average amount of cash and cash equivalents reported in the statement of cash flows?", + "output": "What is the average cash and cash equivalents presented in the statement of cash flows?" + }, + { + "input": "What are the different types of financial items that are included in the data of a consolidated income statement?", + "output": "What financial items does the consolidated income statement data comprise of?" + }, + { + "input": "\"What is the mean total income for the fiscal years 2018 and 2019?\"", + "output": "What is the average revenue for 2018 and 2019?" + }, + { + "input": "What were the average revenues for the years 2018 and 2019?", + "output": "What is the average revenue for 2018 and 2019?" + }, + { + "input": "What was the average revenue in 2017 and 2018?", + "output": "What is the average revenue for 2017 and 2018?" + }, + { + "input": "\"What was the average revenue in both 2017 and 2018? Please provide the specific revenue figures for each year.\"", + "output": "What is the average revenue for 2017 and 2018?" + }, + { + "input": "\"What is the revenue change reported specifically for Europe?\"", + "output": "What is the reported change in revenue - europe?" + }, + { + "input": "What is the reported change in revenue for Europe? How has the revenue in Europe been affected according to the reports?", + "output": "What is the reported change in revenue - europe?" + }, + { + "input": "\"Which country, Germany or Italy, has experienced a greater overall growth in the organic sector over a specified period of time?\"", + "output": "Between Germany and Italy, which one has a higher organic change?" + }, + { + "input": "What does cash consideration paid comprise of?", + "output": "What is included in the cash consideration paid?" + }, + { + "input": "\"In the table, what is the timeframe for the financial years whose information is displayed?\"", + "output": "Which financial years' information is shown in the table?" + }, + { + "input": "What is the total value of acquisitions made in 2019 throughout the year?", + "output": "How much is the 2019 acquisitions during the year ?" + }, + { + "input": "What was the total value of acquisitions made in 2019 throughout the year?", + "output": "How much is the 2019 acquisitions during the year ?" + }, + { + "input": "Which year, 2018 or 2019, witnessed a higher number of acquisitions throughout the entire year?", + "output": "Between 2018 and 2019, which year had a greater amount of acquisitions during the year?" + }, + { + "input": "What are the total operating lease commitments?", + "output": "What is the sum of all the commitments related to operating leases?" + }, + { + "input": "What are the total operating lease commitments?", + "output": "What is the sum of all commitments made for operating leases?" + }, + { + "input": "What is the total outstanding amount of financial liabilities that need to be paid within the shortest timeframe?", + "output": "How much financial liabilities are due for payment soonest?" + }, + { + "input": "What is the proportion of financial liabilities in relation to the overall sum of contractual obligations and commitments?", + "output": "What percentage of total contractual obligations and commitments is the financial liabilities?" + }, + { + "input": "\"What is the average amount of revenue generated from services according to IAS 18 for the years 2018 and 2019?\"", + "output": "What is the average service revenue between 2018 and 2019 IAS 18?" + }, + { + "input": "What is the difference between average service revenue and average other revenue?", + "output": "What distinguishes average service revenue from average other revenue?" + }, + { + "input": "Which specific financial years are being represented in the provided table?", + "output": "Which financial years' information is shown in the table?" + }, + { + "input": "What type of short-term investments are shown in the table?", + "output": "\"What specific types of short-term investments are displayed in the provided table?\"" + }, + { + "input": "What is the percentage change in the value of managed investment funds from 2018 to 2019?", + "output": "What is the change in managed investment funds between 2018 and 2019?" + }, + { + "input": "What is the difference in the value of managed investment funds from 2018 to 2019?", + "output": "What is the change in managed investment funds between 2018 and 2019?" + }, + { + "input": "What type of long-term borrowings are shown in the table?", + "output": "What are the specific types of long-term borrowings that are presented in the table?" + }, + { + "input": "What type of long-term borrowings are shown in the table?", + "output": "What is the specific category of long-term borrowings that is displayed in the table?" + }, + { + "input": "What is the percentage change in the number of shares acquired from February to March 2019?", + "output": "What is the percentage change between shares purchased in February and March 2019?" + }, + { + "input": "Which financial years' information is shown in the table?", + "output": "Which fiscal years' data is displayed in the given table?" + }, + { + "input": "Which financial years' information is shown in the table?", + "output": "Which specific financial year's information is depicted in the table?" + }, + { + "input": "How much is the 2019 audit fees ?", + "output": "What is the exact cost of the audit fees for the year 2019?" + }, + { + "input": "What were the average fees for audits in 2018 and 2019?", + "output": "What is the average audit fees for 2018 and 2019?" + }, + { + "input": "What were the average audit fees in both 2018 and 2019?", + "output": "What is the average audit fees for 2018 and 2019?" + }, + { + "input": "\"What is the average total fees for the years 2018 and 2019 combined?\"", + "output": "What is the average total fees for 2018 and 2019?" + }, + { + "input": "What is the average sum of fees for the years 2018 and 2019 combined?", + "output": "What is the average total fees for 2018 and 2019?" + }, + { + "input": "What is the discrepancy between the average fees charged for audits and the average total fees incurred in 2018 and 2019?", + "output": "What is the difference between the average audit fees and the average total fees for 2018 and 2019?" + }, + { + "input": "What is the disparity between the mean charges for audits and the mean overall charges, comparing the years 2018 and 2019?", + "output": "What is the difference between the average audit fees and the average total fees for 2018 and 2019?" + }, + { + "input": "What is the amount of the non-current liabilities' 2018 other payables?", + "output": "How much is the 2018 other payables included within non-current liabilities?" + }, + { + "input": "What is the value of the 2019 other payables included in current liabilities, excluding the amount payable for the share buyback?", + "output": "How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?" + }, + { + "input": "What is the value of the 2019 other payables included in the current liabilities, except for the amount payable related to the share buyback?", + "output": "How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?" + }, + { + "input": "What is the total amount of money that is owed to subsidiaries within one year as of 2019?", + "output": "What is the 2019 amounts owed to subsidiaries within one year?" + }, + { + "input": "Which year experiences a greater number of bank loans and other loans that are due within one year?", + "output": "Which year has a higher amount of bank loans and other loans under amounts falling due within one year?" + }, + { + "input": "How much is 2019 cash at bank and in hand ?", + "output": "What is the total amount of cash that was available in the bank and on hand in 2019?" + }, + { + "input": "Between 2018 and 2019, which year had a greater amount of money market funds?", + "output": "Which year, 2018 or 2019, had a higher value of money market funds?" + }, + { + "input": "Between 2018 and 2019, which year had a greater amount of money market funds?", + "output": "Which year had a higher volume of funds invested in money market funds: 2018 or 2019?" + }, + { + "input": "Which year, 2018 or 2019, witnessed a higher quantity of government securities being issued?", + "output": "Between 2018 and 2019, which year had a greater amount of government securities?" + }, + { + "input": "Between 2018 and 2019, which year had more ordinary shares allotted?", + "output": "In the period spanning from 2018 to 2019, which year witnessed a higher allocation of ordinary shares?" + }, + { + "input": "What is the current cost of servicing for the year 2019?", + "output": "How much is the 2019 current service cost?" + }, + { + "input": "What specific types of investments that are no longer held are displayed in the table?", + "output": "What type of non-current investments are shown in the table?" + }, + { + "input": "In the table, which types of investments that are not currently being utilized or held are depicted?", + "output": "What type of non-current investments are shown in the table?" + }, + { + "input": "How much is the 2018 debt securities?", + "output": "What is the value or price of the debt securities issued in 2018?" + }, + { + "input": "How much is the 2018 debt securities?", + "output": "What is the cost of the 2018 debt securities?" + }, + { + "input": "Which financial years' information is shown in the table?", + "output": "Which financial years are displayed in the provided table?" + }, + { + "input": "What is the total projected income and expenditure for the year 2018?", + "output": "How much is the 2018 aggregated expected income expense ?" + }, + { + "input": "What is the projected total income expense for 2018 after aggregating all relevant information?", + "output": "How much is the 2018 aggregated expected income expense ?" + }, + { + "input": "What is the 2018 deferred tax on overseas earnings, excluding the 15€m charge relating to the combination of Vodafone India with Idea Cellular?", + "output": "What is the amount of deferred tax on overseas earnings in 2018, excluding the charge of 15€m related to the merger of Vodafone India and Idea Cellular?" + }, + { + "input": "What is the difference in average income tax expense between the years 2017-2018 and 2018-2019?", + "output": "What is the change between 2017-2018 and 2018-2019 average income tax expense?" + }, + { + "input": "How much is the excluded depreciation and amortisation for 2019 adjusted EBITDA?", + "output": "What is the amount of depreciation and amortization that is excluded from the adjusted EBITDA calculation for the year 2019?" + }, + { + "input": "How much is the excluded depreciation and amortisation for 2018 adjusted EBITDA?", + "output": "What is the total amount of depreciation and amortization that were not included in calculating the adjusted EBITDA for the year 2018?" + }, + { + "input": "\"What is the exact amount of the adjusted profit before tax for the year 2018?\"", + "output": "How much is the adjusted profit before tax in 2018?" + }, + { + "input": "Which specific financial year's information is displayed in the provided table?", + "output": "Which financial years' information is shown in the table?" + }, + { + "input": "What is the distinction between the average net book value and the average total costs of shares in Group undertakings as of March 31, 2019? How can analyzing this difference assist in better understanding financial performance?", + "output": "What is the difference between 2019 average net book value and 2019 average total costs of shares in Group undertakings as at 31 March?" + }, + { + "input": "What is the significance of the recorded amount for developed technology?", + "output": "What does the amount recorded for developed technology represent?" + }, + { + "input": "What does the recorded amount for developed technology signify, and what is its purpose or significance?", + "output": "What does the amount recorded for developed technology represent?" + }, + { + "input": "What is the useful life for Developed technology?", + "output": "What is the average duration of usefulness or lifespan for technology that has been fully developed and is in practical use?" + }, + { + "input": "What is the difference in useful life between developed technology and customer relationships?", + "output": "What distinguishes the lifespan of developed technology from that of customer relationships?" + }, + { + "input": "What is the difference in fair value between developed technology and customer relationships?", + "output": "What distinguishes the fair value of developed technology from that of customer relationships?" + }, + { + "input": "What identifiable intangible assets have a useful life exceeding five years?", + "output": "What are the identifiable intangible assets with a useful life above 5 years?" + }, + { + "input": "What intangible assets can be identified and have a useful life of more than 5 years?", + "output": "What are the identifiable intangible assets with a useful life above 5 years?" + }, + { + "input": "What does the amount recorded for developed technology represent?", + "output": "\"What is the meaning or significance of the recorded amount for developed technology?\"" + }, + { + "input": "What is the useful life (in years) of developed technology?", + "output": "What is the average duration, measured in years, of the lifespan of technology that has been developed?" + }, + { + "input": "What is the difference in fair value between developed technology and customer relationships?", + "output": "What distinguishes the fair value of developed technology from that of customer relationships, and how do they differ from each other?" + }, + { + "input": "What are the intangible assets that have a fair value of above $20,000 thousands?", + "output": "What are the intangible assets with a fair value exceeding $20,000 thousands? Please provide a comprehensive list of these assets." + }, + { + "input": "What are the intangible assets that have a fair value of above $20,000 thousands?", + "output": "What are the intangible assets with a fair value exceeding $20 million dollars?" + }, + { + "input": "What is the distinction between the total amount invested in debt and equity, when considering amortized cost as the basis for evaluation?", + "output": "What is the difference in the total debt investments and the total equity investments, based on amortized cost?" + }, + { + "input": "What is the percentage composition of U.S. treasury securities in relation to the total debt investments when considering fair value?", + "output": "Based on fair value, what is the percentage constitution of U.S. treasury securities among the total debt investments?" + }, + { + "input": "What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? ", + "output": "What was the excluded number of potentially anti-dilutive shares for the fiscal years ending June 30, 2017, 2018, and 2019?" + }, + { + "input": "What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? ", + "output": "What is the total count of potentially anti-dilutive shares excluded for the fiscal years ending on June 30, 2017, 2018, and 2019, respectively?" + }, + { + "input": "What is the variance in the weighted-average ordinary shares outstanding for basic and diluted calculations in the fiscal year that ended on June 30, 2019?", + "output": "In fiscal year ended June 30, 2019, what is the difference in the weighted-average ordinary shares outstanding between the basic and the diluted?" + }, + { + "input": "What is the average total non-current operating assets for fiscal years ended June 30, 2018 and 2019?", + "output": "\"What is the average total value of non-current operating assets for the fiscal years that ended on June 30, 2018 and June 30, 2019?\"" + }, + { + "input": "How many geographic regions had non-current operating assets exceeding $10,000 thousand in the fiscal year ending on June 30, 2019?", + "output": "In fiscal year ended June 30, 2019, how many geographic regions have non-current operating assets of more than $10,000 thousand?" + }, + { + "input": "What percentage of the total non-current operating assets owned by the Group in the United States constituted the non-current operating assets in the fiscal year ending June 30, 2018?", + "output": "In fiscal year ended June 30, 2018, what is the percentage constitution of the non-current operating assets in the United States among the total non-current operating assets owned by the Group?" + }, + { + "input": "\"What specific adjustments were made to figures in the year 2018?\"", + "output": "What was the adjustment done to figures in 2018?" + }, + { + "input": "How much did the value of security deposits change from fiscal year 2018 to 2019?", + "output": "What is the change in value between security deposits between fiscal years 2018 and 2019?" + }, + { + "input": "What is the Balance at the beginning of 2019?", + "output": "What is the balance amount in the accounts at the start of the year 2019?" + }, + { + "input": "What is the average deferred tax expense for fiscal years 2018 and 2019?", + "output": "What is the average amount of deferred tax expense incurred during fiscal years 2018 and 2019?" + }, + { + "input": "What is the percentage change of deferred tax expenses between fiscal year 2018 to 2019?", + "output": "What is the percentage difference in deferred tax expenses between fiscal year 2018 and fiscal year 2019?" + }, + { + "input": "What is the amount recorded for short-term investments as of June 30, 2018, on the consolidated statements of financial position for the Group?", + "output": "As of June 30, 2018, what is the value of short-term investments on the Group's consolidated statements of financial position?" + }, + { + "input": "Based on fair value, what is the difference in value between U.S. treasury securities and agency securities?", + "output": "What is the quantifiable difference between U.S. treasury securities and agency securities in terms of their fair market value?" + }, + { + "input": "What are the specific revenue figures for the United Kingdom for the fiscal years that concluded in 2017, 2018, and 2019?", + "output": "What are the revenue amounts from the United Kingdom for fiscal years ended 2017, 2018 and 2019 respectively?" + }, + { + "input": "What is the difference in the revenues from Asia Pacific between fiscal years ended 2018 and 2019?", + "output": "How does the revenue for the Asia Pacific region differ between the fiscal years ending in 2018 and 2019?" + }, + { + "input": "What is the balance of capped call as of June 30, 2019?", + "output": "What is the specific capped call balance as of June 30, 2019?" + }, + { + "input": "What is the difference in the capped call balance between the fiscal year ending on June 30, 2018, and the fiscal year ending on June 30, 2019?", + "output": "What is the change in the balance of capped call between fiscal year ended June 30, 2018 and 2019?" + }, + { + "input": "What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?", + "output": "What is the difference in the balance of capped call and non-marketable investments as of June 30, 2019, and how do these two types of investments vary in terms of their marketability?" + }, + { + "input": "What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?", + "output": "What is the balance difference between capped call and non-marketable investments as of June 30, 2019?" + }, + { + "input": "\"What is the chronological order of the share capital amounts from June 30, 2015 to 2019?\"", + "output": "What are the share capital for the years as of June 30, 2015 to 2019 in chronological order?" + }, + { + "input": "List the short-term investments from June 30, 2015 to 2019 in chronological order.", + "output": "What are the short-term investments for the years as of June 30, 2015 to 2019 in chronological order?" + }, + { + "input": "What is the difference in the value of short-term investments between fiscal years 2018 and 2019?", + "output": "What is the variance in the monetary worth of short-term investments from fiscal year 2018 to fiscal year 2019?" + }, + { + "input": "What is the average Cash and cash equivalents for 2015-2019?", + "output": "What is the average amount of Cash and cash equivalents from 2015 to 2019?" + }, + { + "input": "What is the definition of other fees?", + "output": "What does the term \"other fees\" mean? Please provide a clear definition of this term, including any relevant details, to facilitate a more accurate response." + }, + { + "input": "What is the average amount of taxes paid in fees for the fiscal years 2018 and 2019?", + "output": "What is the average tax fees for fiscal years 2018 and 2019?" + }, + { + "input": "What is the disparity in the aggregate of obligations between operating leases and other types of contractual commitments?", + "output": "What is the difference in the total commitments between that of operating leases and other contractual commitments?" + }, + { + "input": "What percentage of the total commitments for operating leases for the year ending in 2020 is constituted by the commitments specifically for operating leases?", + "output": "What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?" + }, + { + "input": "What portion of the total commitments for operating leases for the year ending 2020 represents the percentage constitution?", + "output": "What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?" + }, + { + "input": "What was the percentage increase in maintenance revenues from the fiscal year ending in 2018 to the fiscal year ending in 2019?", + "output": "What was the increase of maintenance revenues from fiscal year ended 2018 to 2019?" + }, + { + "input": "What was the exact percentage increase in perpetual license revenues from the fiscal year ended 2018 to the fiscal year ended 2019?", + "output": "What was the increase of perpetual license revenues from fiscal year ended 2018 to 2019?" + }, + { + "input": "What percentage of total revenue for the fiscal year ending in 2018 is made up of perpetual license sales?", + "output": "For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?" + }, + { + "input": "What percentage of the total revenue for the fiscal year ended 2018 was generated from perpetual licenses?", + "output": "For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?" + }, + { + "input": "\"What are the average maintenance revenues for the fiscal years ending in 2018 and 2019, respectively?\"", + "output": "What is the average maintenance revenues for fiscal year ended 2018 and 2019?" + }, + { + "input": "What is the specific provision for dilapidation expenses allocated for the fiscal year 2019?", + "output": "What is the dilapidation provision for fiscal year 2019?" + }, + { + "input": "What is the formula to calculate free cash flow, and can you further explain the steps involved in deriving this measure?", + "output": "How is free cash flow calculated?" + }, + { + "input": "What was the percentage increase in free cash flow from fiscal year 2018 to 2019?", + "output": "What was the increase in free cash flow between fiscal year 2018 and 2019?" + }, + { + "input": "What is the average net cash flow generated by operating activities for the years 2017, 2018, and 2019?", + "output": "What is the average net cash provided by operating activities from 2017-2019?" + }, + { + "input": "\"What is the average amount of net cash generated from operating activities for the years 2017, 2018, and 2019?\"", + "output": "What is the average net cash provided by operating activities from 2017-2019?" + }, + { + "input": "What is the percentage difference in the amount of available cash generated by the company between the years 2017 and 2018?", + "output": "What is the percentage change in free cash flow between 2017 and 2018?" + }, + { + "input": "What is the change in capital expenditures between 2018 and 2019?", + "output": "What was the difference in capital expenditures from 2018 to 2019?" + }, + { + "input": "What is the change in capital expenditures between 2018 and 2019?", + "output": "What was the difference in the amount of money spent on capital expenditures from 2018 to 2019?" + }, + { + "input": "As of June 30, 2019, what is the total value or monetary amount of outstanding payments owed by customers or business partners for goods or services provided?", + "output": "As of June 30, 2019, what is the amount of trade receivables?" + }, + { + "input": "How does the net cash provided by operating activities compare between the fiscal years ending in 2018 and 2019?", + "output": "What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?" + }, + { + "input": "What is the change in net cash provided by operating activities between the fiscal years ending in 2018 and 2019?", + "output": "What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?" + }, + { + "input": "What are the specific components included in the \"other current assets\" category in the table?", + "output": "What are the components under other current assets in the table?" + }, + { + "input": "What was the difference in the total value of other current assets between 2019 and 2018?", + "output": "What was the change in total other current assets in 2019 from 2018?" + }, + { + "input": "Which items in the table include the cumulative effect of the 2017 Tax Reform Act in their 2017 amounts?", + "output": "For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?" + }, + { + "input": "Which items in the table include the sum related to the impact of the 2017 Tax Reform Act in their amount for 2017?", + "output": "For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?" + }, + { + "input": "What was the year when the Diluted Net Income per Common Share reached its highest value?", + "output": "In which year was the Diluted Net Income per Common Share largest?" + }, + { + "input": "In which year did the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reach its highest magnitude?", + "output": "In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?" + }, + { + "input": "What was the year when the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reached its highest value?", + "output": "In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?" + }, + { + "input": "Where is Income tax benefit included?", + "output": "Where is the income tax benefit typically included in financial statements or accounting records?" + }, + { + "input": "Where is Income tax benefit included?", + "output": "Where can income tax benefit be found in financial statements or reports?" + }, + { + "input": "When is the calculation of income tax benefit typically performed annually?", + "output": "In which years is income tax benefit calculated?" + }, + { + "input": "What does restricted cash include?", + "output": "What is included in restricted cash and can you provide further details about its components?" + }, + { + "input": "In which year was Restricted cash included in \"Other current assets\" the lowest?", + "output": "In which year did the inclusion of Restricted cash in the category of \"Other current assets\" reach its lowest level?" + }, + { + "input": "What was the difference in the amount of Restricted cash included in the category of \"Deferred charges and other assets\" in the year 2017 compared to 2016?", + "output": "What was the change in Restricted cash included in \"Deferred charges and other assets\" in 2017 from 2016?" + }, + { + "input": "What is the largest recorded year for Net periodic benefit cost?", + "output": "In which year was Net periodic benefit cost the largest?" + }, + { + "input": "What was the difference in the amount of money spent on interest in 2019 compared to 2018?", + "output": "What was the change in interest cost in 2019 from 2018?" + }, + { + "input": "What was the difference in the cost of interest between 2019 and 2018?", + "output": "What was the change in interest cost in 2019 from 2018?" + }, + { + "input": "\"In which year did the amount of deferred revenue reach its highest value and why?\"", + "output": "In which year was deferred revenue larger?" + }, + { + "input": "During which year was the accumulated income received in advance, known as deferred revenue, higher?", + "output": "In which year was deferred revenue larger?" + }, + { + "input": "What was the specific dollar amount of the change in deferred revenue between 2019 and 2018?", + "output": "What was the change in deferred revenue in 2019 from 2018?" + }, + { + "input": "What was the percentage change in deferred revenue in 2019 from 2018?", + "output": "What is the percentage increase or decrease in deferred revenue from 2018 to 2019?" + }, + { + "input": "What was the change in Fair value of share awards vested in 2019 from 2018?", + "output": "What was the variation in the fair value of share awards that were vested in the year 2019 compared to 2018?" + }, + { + "input": "What was the change in Fair value of share awards vested in 2019 from 2018?", + "output": "What was the difference in fair value of share awards that were granted in 2018 but vested in 2019?" + }, + { + "input": "During which years was information regarding the credit agreements provided?", + "output": "In which years is information related to the credit agreements provided?" + }, + { + "input": "What is the least year in which the Interest expense reached its minimum amount?", + "output": "In which year was the amount of Interest expense the smallest?" + }, + { + "input": "When was the least amount of interest expense recorded?", + "output": "In which year was the amount of Interest expense the smallest?" + }, + { + "input": "What was the change in Interest expense in 2019 from 2018?", + "output": "What was the difference in the amount of money spent on interest payments in 2019 compared to 2018?" + }, + { + "input": "What was the percentage increase or decrease in the amount spent on interest in 2019 compared to 2018?", + "output": "What was the percentage change in Interest expense in 2019 from 2018?" + }, + { + "input": "What was the value of purchases that were accumulated or outstanding as of the end of 2019?", + "output": "What was the amount of Accrued purchases in 2019?" + }, + { + "input": "What was the specific amount of Accrued legal and professional fees incurred during the year 2018?", + "output": "What was the amount of Accrued legal and professional fees in 2018?" + }, + { + "input": "\"How much money was recorded for Accrued legal and professional fees during the year 2018?\"", + "output": "What was the amount of Accrued legal and professional fees in 2018?" + }, + { + "input": "What was the percentage difference in Accrued roadside assistance claim costs between 2019 and 2018?", + "output": "What was the change in Accrued roadside assistance claim costs in 2019 from 2018?" + }, + { + "input": "\"What was the specific amount charged for the service in the year 2019?\"", + "output": "What was the service cost in 2019?" + }, + { + "input": "\"What was the cost of the service provided in the year 2019?\"", + "output": "What was the service cost in 2019?" + }, + { + "input": "What was the total cost of interest expenses in the year 2018?", + "output": "What was the Interest cost in 2018?" + }, + { + "input": "What was the total amount spent on interest expenses in 2018?", + "output": "What was the Interest cost in 2018?" + }, + { + "input": "\"In which specific year did the cost of services exceed the cost of services in the other years?\"", + "output": "In which year was service cost larger?" + }, + { + "input": "Which year had a greater service cost compared to other years?", + "output": "In which year was service cost larger?" + }, + { + "input": "What was the difference in the amount spent on interest in 2019 compared to 2018?", + "output": "What was the change in interest cost in 2019 from 2018?" + }, + { + "input": "\"What was the gross revenue generated by overseas operations, excluding any income taxes, in the fiscal year 2018?\"", + "output": "What was the Foreign income before income taxes in 2018?" + }, + { + "input": "What was the total income from foreign sources in 2018, before any deductions for income taxes were made?", + "output": "What was the Foreign income before income taxes in 2018?" + }, + { + "input": "What are the components under income before income taxes?", + "output": "What are the individual components that make up the category of income before income taxes?" + }, + { + "input": "In which specific year, prior to the implementation of income taxes, did individuals and/or organizations experience the highest level of income?", + "output": "In which year was income before income taxes the largest?" + }, + { + "input": "What is the largest recorded income before income taxes, and in which specific year was it achieved?", + "output": "In which year was income before income taxes the largest?" + }, + { + "input": "In which specific year did the quantity of an item categorized as \"Other\" exceed the quantity of another item, considering all provided information?", + "output": "In which year was the amount of Other larger?" + }, + { + "input": "In which year did the quantity of a different item exceed the quantity of the rest?", + "output": "In which year was the amount of Other larger?" + }, + { + "input": "What was the year-over-year difference in accrued severance and associated expenses between 2018 and 2019?", + "output": "What was the change in Accrued severance and related costs in 2019 from 2018?" + }, + { + "input": "What was the total payment made by the Company in the year 2019?", + "output": "How much has the Company paid in total in 2019?" + }, + { + "input": "What was the total amount paid by the Company in 2019?", + "output": "How much has the Company paid in total in 2019?" + }, + { + "input": "When did the amount of non-cash impairment charges exceed previous years?", + "output": "In which year was Non-cash impairment charges larger?" + }, + { + "input": "Which year had a greater amount of non-cash impairment charges compared to other years?", + "output": "In which year was Non-cash impairment charges larger?" + }, + { + "input": "During which years is the fair value of restricted shares or restricted stock units (RSUs) vested determined?", + "output": "In which years is the Fair value of restricted shares/RSUs vested calculated?" + }, + { + "input": "What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?", + "output": "What was the difference in the number of restricted shares/RSUs granted in 2019 compared to 2018?" + }, + { + "input": "What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?", + "output": "What was the difference in the number of restricted shares/ RSUs granted in 2019 compared to 2018?" + }, + { + "input": "What was the percentage change in Number of restricted shares/ RSUs granted in 2019 from 2018?", + "output": "What was the percentage change in the number of restricted shares/ RSUs granted from 2018 to 2019?" + }, + { + "input": "As of July 1, 2019, what was the total value of stapled securities of NSR listed on the Australian Securities Exchange?", + "output": "How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?" + }, + { + "input": "As of July 1, 2019, what was the total value of NSR stapled securities quoted on the Australian Securities Exchange?", + "output": "How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?" + }, + { + "input": "What would happen if the price of a security dropped by 2.5% and how would this affect the overall market?", + "output": "What would be the impact if security price decreased by 2.5%?" + }, + { + "input": "What is the change in Other intangible assets Opening net book value from 2018 to 2019?", + "output": "What is the difference in the net book value of Other intangible assets at the beginning of 2018 compared to the beginning of 2019?" + }, + { + "input": "What is the change in Other intangible assets Opening net book value from 2018 to 2019?", + "output": "What is the difference in the opening net book value of Other intangible assets between 2018 and 2019?" + }, + { + "input": "How many distinct business segments were there in the years 2019 and 2018?", + "output": "How many business segments were present in 2019 and 2018?" + }, + { + "input": "\"What is the average revenue generated from New Zealand in both 2018 and 2019?\"", + "output": "What is the average revenue from New Zealand for 2018 and 2019?" + }, + { + "input": "What is the average revenue generated from New Zealand in both 2018 and 2019?", + "output": "What is the average revenue from New Zealand for 2018 and 2019?" + }, + { + "input": "In which specific year did the revenue from New Zealand amount to less than 10,000 thousands?", + "output": "In which year was revenue from New Zealand under 10,000 thousands?" + }, + { + "input": "In what specific year did the revenue generated from New Zealand fall below 10,000 thousands (in the same currency)?", + "output": "In which year was revenue from New Zealand under 10,000 thousands?" + }, + { + "input": "What was the total NLA in Brisbane?", + "output": "What was the total number of Newly Listed Apartments (NLA) in Brisbane?" + }, + { + "input": "What is the difference in the NLA between Sunshine Cost and Brisbane?", + "output": "What are the NLA differences between the Sunshine Coast and Brisbane, and how do they vary from each other in terms of specific aspects?" + }, + { + "input": "In which year is the Net investment hedge negative?", + "output": "Which specific year does the Net investment hedge demonstrate a negative value?" + }, + { + "input": "What is the change in the Taxation impact on revaluation from 2018 to 2019?", + "output": "What is the difference in the taxation impact on revaluation between 2018 and 2019?" + }, + { + "input": "What is the change in the Taxation impact on revaluation from 2018 to 2019?", + "output": "What is the difference in the effect of taxation on revaluation between 2018 and 2019?" + }, + { + "input": "What was the share of profit from associates representing NSRs in 2019 and 2018?", + "output": "What were the percentages of profit from associates, specifically those representing NSRs, for the years 2019 and 2018?" + }, + { + "input": "What is the difference in the percentage of profit attributed to associates between 2018 and 2019?", + "output": "What is the change in the Share of profit from associates from 2018 to 2019?" + }, + { + "input": "What is the difference in the percentage of profit allocated to associates between 2018 and 2019?", + "output": "What is the change in the Share of profit from associates from 2018 to 2019?" + }, + { + "input": "When was the year when the closing balance on June 30th was below 11,000 thousand units?", + "output": "In which year was the Closing balance at 30 June less than 11,000 thousands?" + }, + { + "input": "What is the year-over-year change in face value for New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?", + "output": "What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?" + }, + { + "input": "What was the percentage change in face value for current New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?", + "output": "What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?" + }, + { + "input": "What was the assets held for sale under Level 1, Level 2 and Level 3 in 2018?", + "output": "In 2018, what were the assets classified as held for sale and valued at Level 1, Level 2, and Level 3 according to their respective classifications?" + }, + { + "input": "What was the amount of senior secured term loan for fiscal years 2019 and 2018?", + "output": "What is the value of senior secured term loan for fiscal years 2019 and 2018 respectively?" + }, + { + "input": "What is the percentage shift in the amount of finance leases and other financing obligations from fiscal year 2018 to fiscal year 2019?", + "output": "What is the percentage change in the finance leases and other financing obligations between fiscal years 2018 and 2019?" + }, + { + "input": "What is the average finance leases and other financing obligations for fiscal years 2018 and 2019?", + "output": "What is the mean value of finance leases and other financing obligations for fiscal years 2018 and 2019?" + }, + { + "input": "What are the income tax expenses for the years 2019, 2018, and 2017?", + "output": "What is the total current income tax expense for 2019, 2018 and 2017 respectively?" + }, + { + "input": "What is the amount of deferred income tax expense (benefit) recorded for the years 2019, 2018, and 2017, respectively?", + "output": "What is the Total deferred income tax expense (benefit) for 2019, 2018 and 2017 respectively?" + }, + { + "input": "What is the average total income tax expense for the years 2017, 2018, and 2019?", + "output": "What is the average Total income tax expense for 2019, 2018 and 2017?" + }, + { + "input": "What is the average aggregate size of the facilities in Arizona and Washington?", + "output": "What is the average total size of the facilities located in both Arizona and Washington?" + }, + { + "input": "\"How many states in the United States have a total of more than 5 facilities?\"", + "output": "How many states have more than 5 facilities?" + }, + { + "input": "What percentage of the total contractual obligations does the total inventory purchase commitments represent?", + "output": "What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?" + }, + { + "input": "What percentage of the total contractual obligations is constituted by the inventory purchase commitments in the overall inventory?", + "output": "What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?" + }, + { + "input": "\"What specific component within a contractual obligation holds the highest overall value?\"", + "output": "Which component of contractual obligation has the highest total value?" + }, + { + "input": "At December 29, 2017, how many shares were still unvested?", + "output": "What is the number of shares that are Unvested at December 29, 2017?" + }, + { + "input": "What is the length of time that buildings are typically considered useful for?", + "output": "What is the useful lives of buildings?" + }, + { + "input": "What are the estimated useful lives or lifespans of machinery and equipment commonly used in various industries or sectors?", + "output": "What is the useful lives of Machinery and equipment?" + }, + { + "input": "What are the estimated useful lifespans of machinery and equipment? How long can machinery and equipment be expected to remain functional and productive?", + "output": "What is the useful lives of Machinery and equipment?" + }, + { + "input": "What is the average monetary worth of buildings in the years 2018 and 2019?", + "output": "What is the average value of buildings for 2018 and 2019?" + }, + { + "input": "What is the average monetary worth of buildings in the years 2018 and 2019 combined?", + "output": "What is the average value of buildings for 2018 and 2019?" + }, + { + "input": "What is the average adjusted net income from 2017-2019?", + "output": "\"What is the average adjusted net income for the years 2017, 2018, and 2019?\"" + }, + { + "input": "What were the values of receivables and inventory in the years 2019 and 2018, respectively?", + "output": "What is the value of Receivables and inventory for 2019 and 2018 respectively?" + }, + { + "input": "What is the value of Accrued expenses for 2019 and 2018 respectively?", + "output": "What is the amount of Accrued expenses in 2019 and 2018?" + }, + { + "input": "What is the value of Self-insurance reserves for 2019 and 2018 respectively?", + "output": "What are the self-insurance reserve values for the years 2019 and 2018?" + }, + { + "input": "What are the average values of receivables and inventory for the years 2019 and 2018?", + "output": "What is the average value of Receivables and inventory for 2019 and 2018?" + }, + { + "input": "What are the average values of Receivables and inventory for the years 2019 and 2018?", + "output": "What is the average value of Receivables and inventory for 2019 and 2018?" + }, + { + "input": "In which year did the Accrued expenses reach the highest level?", + "output": "Which year has the highest Accrued expenses?" + }, + { + "input": "In which specific year did the Accrued expenses reach their highest value?", + "output": "Which year has the highest Accrued expenses?" + }, + { + "input": "Which year recorded the highest Senior Secured Term Loan amount?", + "output": "Which year has the highest Senior secured term loan?" + }, + { + "input": "What information does the table present?", + "output": "What data is being shown in the table and what details can be derived from it?" + }, + { + "input": "What is the change in long-term portion of contingent earn-out liabilities for 2018 and 2019?", + "output": "What is the difference in the long-term portion of contingent earn-out liabilities between 2018 and 2019?" + }, + { + "input": "What were the statutory U.S. Federal tax rates for the years 2019, 2018, and 2017?", + "output": "What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?" + }, + { + "input": "What are the respective statutory federal tax rates for the years 2019, 2018, and 2017 in the United States?", + "output": "What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?" + }, + { + "input": "How much did the valuation allowance change in 2017?", + "output": "What is the change in valuation allowance for 2017?" + }, + { + "input": "What was the difference in valuation allowance between 2016 and 2017?", + "output": "What is the change in valuation allowance for 2017?" + }, + { + "input": "What has been the average amount of Statutory U.S. Federal tax paid annually for the years 2017, 2018, and 2019?", + "output": "What is the average Statutory U.S. Federal tax from 2017-2019?" + }, + { + "input": "What was the average statutory U.S. federal tax rate for the years 2017-2019?", + "output": "What is the average Statutory U.S. Federal tax from 2017-2019?" + }, + { + "input": "What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?", + "output": "What is the year-end working capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017, respectively?" + }, + { + "input": "What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?", + "output": "What is the Working Capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017?" + }, + { + "input": "What are the average values of Cash and cash equivalents for fiscal years 2019, 2018, and 2017?", + "output": "What is the average value for the Cash and cash equivalents for fiscal years 2019, 2018 and 2017?" + }, + { + "input": "What are the average values of the Availability under asset-based loan facility for the fiscal years 2019, 2018, and 2017?", + "output": "What is the average value for the Availability under asset-based loan facility for fiscal years 2019, 2018 and 2017?" + }, + { + "input": "What were the net income (or net losses) for the years 2019, 2018, and 2017 respectively?", + "output": "What was the Net (loss) income in 2019, 2018 and 2017 respectively?" + }, + { + "input": "What were the respective net losses or incomes for the years 2019, 2018, and 2017?", + "output": "What was the Net (loss) income in 2019, 2018 and 2017 respectively?" + }, + { + "input": "In which year was Adjusted EBITDA less than 20,000 thousands?", + "output": "\"In which specific year, out of the given years, was the Adjusted EBITDA figure lower than 20,000 thousands?\"" + }, + { + "input": "In which year was Adjusted EBITDA less than 20,000 thousands?", + "output": "In which specific year was the Adjusted EBITDA, expressed in thousands, less than 20,000?" + }, + { + "input": "What was the percentage increase in the expense incurred for Research and Development in the year 2018 compared to the previous period?", + "output": "What was the increase in Research and development expense in 2018?" + }, + { + "input": "\"What was the percentage increase in Research and Development (R&D) expense during the year 2018 compared to the previous year?\"", + "output": "What was the increase in Research and development expense in 2018?" + }, + { + "input": "What were the average research and development expenses in both 2017 and 2018?", + "output": "What was the average Research and development expenses in 2017 and 2018?" + }, + { + "input": "What were the respective average amounts spent on research and development in 2017 and 2018?", + "output": "What was the average Research and development expenses in 2017 and 2018?" + }, + { + "input": "What was the change in the Net operating loss carryforwards from 2018 to 2019?", + "output": "What was the numerical difference between the Net operating loss carryforwards in 2018 and 2019?" + }, + { + "input": "\"In which specific year did the value of Deferred revenue fall below 2,000?\"", + "output": "In which year was Deferred revenue less than 2,000?" + }, + { + "input": "What is the difference in the amount of accumulated depreciation between March 31, 2018, and March 31, 2019?", + "output": "What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?" + }, + { + "input": "How much did the Accumulated depreciation increase between March 31, 2018 and March 31, 2019?", + "output": "What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?" + }, + { + "input": "What was the percentage change in the General and administrative expenses in 2019 compared to the previous year?", + "output": "What was the increase in the General and administrative expenses in 2019?" + }, + { + "input": "What was the percentage increase in the General and administrative expenses from 2018 to 2019?", + "output": "What was the increase in the General and administrative expenses in 2019?" + }, + { + "input": "\"Who has the authority over the management and administration of the company's operations in a specific country?\"", + "output": "Which country controls the company's affairs?" + }, + { + "input": "\"Who has jurisdiction over the overall management and operations of the company?\"", + "output": "Which country controls the company's affairs?" + }, + { + "input": "What was the change in the U.S. state taxes, net of federal from 2018 to 2019?", + "output": "What was the difference in the amount of state taxes paid in the United States, after deducting federal taxes, from 2018 to 2019?" + }, + { + "input": "What was the change in the U.S. state taxes, net of federal from 2018 to 2019?", + "output": "What was the difference in the amount of state taxes, after deducting federal taxes, between the years 2018 and 2019 in the United States?" + }, + { + "input": "In which year was Tax credits less than 10.0?", + "output": "When was the year when tax credits were less than 10.0 units?" + }, + { + "input": "In which year was Tax credits less than 10.0?", + "output": "In which specific year did the amount of Tax credits fall below 10.0?" + }, + { + "input": "What was the exact amount of share-based compensation expense incurred during the year 2019?", + "output": "What was the Total share-based compensation expense in 2019?" + }, + { + "input": "\"What changes, if any, took place once the IPO concluded?\"", + "output": "What became effective upon the closing of the IPO?" + }, + { + "input": "What were the initial balances in the years 2019 and 2018, respectively?", + "output": "What was the Beginning balance in 2019 and 2018 respectively?" + }, + { + "input": "What were the starting balances for the year 2019 and 2018?", + "output": "What was the Beginning balance in 2019 and 2018 respectively?" + }, + { + "input": "How much did the Company recorded amortization expense for the year ended March 31, 2018?", + "output": "What was the amount of amortization expense recorded by the Company for the fiscal year ending on March 31, 2018?" + }, + { + "input": "How much did the Company recorded amortization expense for the year ended March 31, 2018?", + "output": "What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2018?" + }, + { + "input": "How much did the Company recorded amortization expense for the year ended March 31, 2017?", + "output": "What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2017?" + }, + { + "input": "What are the main sources of liquidity in financial markets and institutions?", + "output": "What are the principal sources of liquidity?" + }, + { + "input": "What was the total value of outstanding letters of credit associated with specific operating leases as of March 31, 2018?", + "output": "How much was the outstanding letters of credit related to certain operating leases as of March 31, 2018?" + }, + { + "input": "What were the revenue retention rates for the years 2019, 2018, and 2017 in chronological order?", + "output": "What was the Revenue retention rate in 2019, 2018 and 2017 respectively?" + }, + { + "input": "What is the average number of customers in total from 2017 to 2019?", + "output": "What is the average Total customers between 2017-2019?" + }, + { + "input": "\"In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?\"", + "output": "In which year was Adjusted EBITDA less than 20,000 thousands?" + }, + { + "input": "In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?", + "output": "In which year was Adjusted EBITDA less than 20,000 thousands?" + }, + { + "input": "What was the amount of Depreciation and amortization expense recorded for the fiscal year that ended on March 31, 2017?", + "output": "How much was Depreciation and amortization expense for the years ended March 31, 2017?" + }, + { + "input": "What was the total amount of Depreciation and amortization expense recorded for the fiscal years ending on March 31, 2017?", + "output": "How much was Depreciation and amortization expense for the years ended March 31, 2017?" + }, + { + "input": "What is the net increase or decrease in the Accumulated Amortization balance from March 31, 2018, to March 31, 2019?", + "output": "What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?" + }, + { + "input": "What is the net increase or decrease in the amount of Accumulated Amortization between March 31, 2018, and March 31, 2019?", + "output": "What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?" + }, + { + "input": "What was the percentage change in the Risk-free interest rate from 2018 to 2019?", + "output": "What was the change in the Risk-free interest rate from 2018 to 2019?" + }, + { + "input": "What was the average level of expected volatility recorded between 2017 and 2019?", + "output": "What was the average Expected volatility between 2017-2019?" + }, + { + "input": "What was the average expected volatility during the period of 2017-2019?", + "output": "What was the average Expected volatility between 2017-2019?" + }, + { + "input": "In which specific year did the grant date fair value per ordinary share fall below 30.0?", + "output": "In which year was the Grant date fair value per ordinary share less than 30.0?" + }, + { + "input": "When was the year when the grant date fair value per ordinary share fell below 30.0?", + "output": "In which year was the Grant date fair value per ordinary share less than 30.0?" + }, + { + "input": "\"What was the specific quantity or value of work that was at the intermediate stage of completion in the year 2018?\"", + "output": "What was the amount of work in process in 2018?" + }, + { + "input": "What was the change in finished goods between 2018 and 2019?", + "output": "What was the numerical difference in the amount of finished goods produced between the years 2018 and 2019?" + }, + { + "input": "What was the specific amount of interest incurred in the year 2018?", + "output": "What was the interest cost in 2018?" + }, + { + "input": "What was the change in interest cost between 2018 and 2019?", + "output": "By how much did the interest cost change from 2018 to 2019?" + }, + { + "input": "For how many years did the actuarial loss surpass $1,000 thousand and receive recognition?", + "output": "How many years did the recognized actuarial loss exceed $1,000 thousand?" + }, + { + "input": "What was the total amount of insurance proceeds received during the fiscal year that ended on August 31, 2019?", + "output": "What were the insurance proceeds for the fiscal year ended August 31, 2019?" + }, + { + "input": "\"What was the total amount of insurance claims paid out during the fiscal year that concluded on August 31, 2019?\"", + "output": "What were the insurance proceeds for the fiscal year ended August 31, 2019?" + }, + { + "input": "What was the specific difference in the amount of Restructuring and related charges incurred by the company in the years 2018 and 2019?", + "output": "What was the change in Restructuring and related charges between 2018 and 2019?" + }, + { + "input": "What were the reductions for tax positions primarily related to?", + "output": "\"What was the primary reason for the reductions in tax positions?\"" + }, + { + "input": "What is the company's definition of working capital and how is it calculated?", + "output": "How does the company define working capital?" + }, + { + "input": "What was the amount of assets in 2019?", + "output": "What were the total assets in 2019?" + }, + { + "input": "What was the value of the total assets in the year 2019?", + "output": "What were the total assets in 2019?" + }, + { + "input": "What is the range of years covered in the table?", + "output": "What are the years included in the table?" + }, + { + "input": "What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?", + "output": "What was the difference in the amount of current installments of notes payable and long-term debt between the year 2018 and 2019?" + }, + { + "input": "What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?", + "output": "What is the difference in the amount of current installments of notes payable and long-term debt between the years 2018 and 2019?" + }, + { + "input": "What was the percentage increase or decrease in the stockholders' equity of Total Jabil Inc. from 2018 to 2019?", + "output": "What was the percentage change in Total Jabil Inc. stockholders’ equity between 2018 and 2019?" + }, + { + "input": "What was the specific net revenue figure reported?", + "output": "What was the net revenue as reported?" + }, + { + "input": "What was the operating income as reported?", + "output": "What was the reported operating income amount?" + }, + { + "input": "What was the specific distinction between the reported net revenue and cost of revenue? Please elaborate on the differences observed between these two financial measures.", + "output": "What was the difference between net revenue and cost of revenue as reported?" + }, + { + "input": "What was the distinction between the net revenue and cost of revenue as stated in the report, and can you provide further details to assist in answering?", + "output": "What was the difference between net revenue and cost of revenue as reported?" + }, + { + "input": "What was the operating income balance expressed as a ratio of the amount reported, before considering the implementation of ASU 2014-09?", + "output": "What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?" + }, + { + "input": "What was the ratio of the operating income balance without the adoption of ASU 2014-09 to the amount as reported?", + "output": "What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?" + }, + { + "input": "By what amount did the gross profit increase or decrease from May 2018 to August 2018?", + "output": "What was the change in gross profit between May 2018 and August 2018?" + }, + { + "input": "What are the distinctions between the September 2018 balances of contract assets and contract liabilities? Please provide a comprehensive explanation while keeping all the details intact.", + "output": "What is the difference between the balance in September 2018 for contract assets and contract liabilities?" + }, + { + "input": "What was the percentage change for Other accrued expenses due to adjustments by the new standard?", + "output": "What was the percentage change specifically for Other accrued expenses as a result of adjustments made under the new standard?" + }, + { + "input": "What was the percentage change for Other accrued expenses due to adjustments by the new standard?", + "output": "What was the percentage change in Other accrued expenses after incorporating adjustments made by the new standard? Please provide the specific percentage change." + }, + { + "input": "What is the exact amount of net revenue generated in the fiscal year 2019?", + "output": "What was the net revenue in 2019?" + }, + { + "input": "What was the total revenue generated during the year 2019 after deducting all expenses and costs?", + "output": "What was the net revenue in 2019?" + }, + { + "input": "What was the gross profit in 2018?", + "output": "What was the gross profit generated in the year 2018?" + }, + { + "input": "What was the operating income in 2017?", + "output": "What was the operating income for the year 2017?" + }, + { + "input": "What was the change in gross profit between 2017 and 2018?", + "output": "What was the difference in gross profit from 2017 to 2018?" + }, + { + "input": "What was the change in Prepaid expenses and other current assets due to the adoption of ASU 2014-09?", + "output": "\"What was the impact on Prepaid expenses and other current assets resulting from the adoption of ASU 2014-09? Please provide details and explain the specific changes that occurred as a result of this accounting standard.\"" + }, + { + "input": "How many reported segments have a value exceeding $2,000,000?", + "output": "How many segments as reported exceed $2,000,000 thousand?" + }, + { + "input": "What were the items incorporated within the amounts for the fiscal year that concluded on August 31, 2017?", + "output": "What did the amounts in fiscal year ended August 31, 2017 include?" + }, + { + "input": "What expenses or revenues were included in the amounts reported for the fiscal year that ended on August 31, 2017?", + "output": "What did the amounts in fiscal year ended August 31, 2017 include?" + }, + { + "input": "What was the difference in the costs of severance and employee benefits for the company from 2017 to 2018?", + "output": "What was the change in Employee severance and benefit costs between 2017 and 2018?" + }, + { + "input": "Which periods does the table include?", + "output": "What time periods are encompassed in the table's content?" + }, + { + "input": "\"What specific items or transactions were encompassed within the category of acquisitions and adjustments?\"", + "output": "What did Acquisitions and adjustments include?" + }, + { + "input": "What were the variations in acquisitions and adjustments made in fiscal year 2018 between the Electronic Manufacturing Services (EMS) and Document Management Systems (DMS)?", + "output": "What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?" + }, + { + "input": "What are the distinctions in acquisitions and adjustments, specifically pertaining to fiscal year 2018, when comparing EMS (Emergency Medical Services) and DMS (Document Management Systems)?", + "output": "What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?" + }, + { + "input": "What was the balance of EMS as in 2018 as a percentage of the total balance at the end of fiscal 2018?", + "output": "What percentage of the total balance at the end of fiscal 2018 represented the balance of EMS in 2018?" + }, + { + "input": "What categories of income or loss are included in the table's income (loss) figures before accounting for income tax expense?", + "output": "What types of income (loss) before income tax expense is provided by the table?" + }, + { + "input": "What was the foreign income (loss) in 2018?", + "output": "What was the amount of foreign income or loss for the year 2018?" + }, + { + "input": "What was the change in Foreign income (loss) between 2018 and 2019?", + "output": "What was the difference in the amount of income or loss generated from foreign sources, comparing the years 2018 and 2019?" + }, + { + "input": "What was the change in Foreign income (loss) between 2018 and 2019?", + "output": "What was the difference in the amount of foreign income or loss reported between 2018 and 2019?" + }, + { + "input": "\"How many annual periods experienced a Foreign income loss that was greater than $800,000 thousand dollars?\"", + "output": "How many years did Foreign income (loss) exceed $800,000 thousand?" + }, + { + "input": "For how many years was the amount of Foreign income (loss) greater than $800,000 thousand?", + "output": "How many years did Foreign income (loss) exceed $800,000 thousand?" + }, + { + "input": "What was the change in Accumulated benefit obligation between 2018 and 2019?", + "output": "What was the specific numerical difference in the Accumulated Benefit Obligation (ABO) amount between the fiscal years of 2018 and 2019?" + }, + { + "input": "\"What were the specific expenses incurred by the company for employee severance and benefits during the year 2018?\"", + "output": "What were the Employee severance and benefit costs in 2018?" + }, + { + "input": "What were the severance and benefit costs for employees in the year 2018?", + "output": "What were the Employee severance and benefit costs in 2018?" + }, + { + "input": "What was the precise difference in terms of employee severance and benefit costs from 2018 to 2019?", + "output": "What was the change in Employee severance and benefit costs between 2018 and 2019?" + }, + { + "input": "How many years did cash proceeds received exceed $5,000 million?", + "output": "How many years have the cash proceeds received been above $5,000 million?" + }, + { + "input": "How many years did cash proceeds received exceed $5,000 million?", + "output": "For how many years did the cash proceeds received exceed $5,000 million?" + }, + { + "input": "What was the percentage increase or decrease in pre-tax losses on the sale of receivables from 2017 to 2018?", + "output": "What was the percentage change in Pre-tax losses on sale of receivables between 2017 and 2018?" + }, + { + "input": "What was the difference in the amount of money spent on repurchasing shares in 2016 compared to 2017?", + "output": "What was the change in share repurchases between 2016 and 2017?" + }, + { + "input": "What was the percentage increase or decrease in dividend payments from 2018 to 2019?", + "output": "What was the percentage change in dividends paid between 2018 and 2019?" + }, + { + "input": "What was the percentage increase or decrease in dividends paid from 2018 to 2019?", + "output": "What was the percentage change in dividends paid between 2018 and 2019?" + }, + { + "input": "What was the specific topic of discussion in the DMS segment?", + "output": "What was the DMS segment focused on?" + }, + { + "input": "What was the change in the net revenue from EMS between 2018 and 2019?", + "output": "What was the difference in net revenue generated by EMS between the years 2018 and 2019?" + }, + { + "input": "What was the change in the net revenue from EMS between 2018 and 2019?", + "output": "What was the exact difference in net revenue for EMS between the years 2018 and 2019?" + }, + { + "input": "What was the average year-on-year percentage change in total net revenue from 2017-2019?", + "output": "What was the average annual percentage increase or decrease in the total net revenue from 2017 to 2019?" + }, + { + "input": "What was the average year-on-year percentage change in total net revenue from 2017-2019?", + "output": "What was the average percentage change in net revenue from year to year for the total period between 2017 and 2019?" + }, + { + "input": "What is the difference in the balance of Employee Severance and Benefit Costs as of August between 2017 and 2018?", + "output": "What is the change in the balance as of August in Employee Severance and Benefit Costs between 2017 and 2018?" + }, + { + "input": "What were the lease costs in 2017 as a percentage of the total balance in 2017?", + "output": "\"What was the percentage of lease costs in 2017 compared to the total balance in 2017?\"" + }, + { + "input": "What were the lease costs in 2017 as a percentage of the total balance in 2017?", + "output": "\"What proportion of the total balance in 2017 was allocated towards lease costs?\"" + }, + { + "input": "\"What was the amount of the domestic-federal income tax expense (benefit) recorded for the year 2017?\"", + "output": "What was the current domestic-federal income tax expense (benefit) in 2017?" + }, + { + "input": "What was the amount of domestic-federal income tax expense (benefit) recorded in the financial statements for the year 2017?", + "output": "What was the current domestic-federal income tax expense (benefit) in 2017?" + }, + { + "input": "What was the amount of income tax expense or benefit incurred for domestic operations in 2019?", + "output": "What was the current domestic-state income tax expense (benefit) in 2019?" + }, + { + "input": "In 2019, what was the amount of income tax expense or benefit incurred at the domestic state level?", + "output": "What was the current domestic-state income tax expense (benefit) in 2019?" + }, + { + "input": "What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?", + "output": "What is the difference in the domestic-state income tax expense (benefit) for the company from 2018 to 2019?" + }, + { + "input": "What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?", + "output": "What was the difference in the domestic-state income tax expense (benefit) of the company from 2018 to 2019?" + }, + { + "input": "What were the types of land and the associated improvements that existed in the year 2019?", + "output": "What were the land and improvements in 2019?" + }, + { + "input": "What was the total monetary worth of all buildings in the year 2018?", + "output": "What was the value of buildings in 2018?" + }, + { + "input": "What was the percentage change in total property, plant and equipment between 2018 and 2019?", + "output": "What was the percentage increase or decrease in the total value of property, plant, and equipment from 2018 to 2019?" + }, + { + "input": "Based on what factors or criteria were the fair value estimates for Level 3 determined?", + "output": "What were the Level 3 fair value estimates based on?" + }, + { + "input": "What factors or criteria were taken into consideration when determining the Level 3 fair value estimates?", + "output": "What were the Level 3 fair value estimates based on?" + }, + { + "input": "What was the change in the fair value for the 3.950% Senior Notes between 2018 and 2019?", + "output": "What was the difference in fair value for the 3.950% Senior Notes from 2018 to 2019?" + }, + { + "input": "What is the significance of the amount awarded as shares, which is contingent upon meeting specific performance criteria?", + "output": "What does the amount for shares granted that are based on the achievement of certain performance criteria represent?" + }, + { + "input": "What was the percentage difference in the number of shares from 2018 to 2019 and how do I calculate it accurately?", + "output": "What was the percentage change in the number of shares between 2018 and 2019?" + }, + { + "input": "What was the percentage change in Expected dividend yield between 2017 and 2018?", + "output": "What was the percentage adjustment in the expected dividend yield from 2017 to 2018?" + }, + { + "input": "What were the restricted stock units in 2019?", + "output": "What is the significance of restricted stock units in 2019, and can you provide more details about them?" + }, + { + "input": "What were the details and features of the Employee stock purchase plan (ESPP) implemented in 2018?", + "output": "What was the Employee stock purchase plan in 2018?" + }, + { + "input": "What was the change in Other stock-based compensation expenses between 2017 and 2018?", + "output": "What was the difference in the expenses related to stock-based compensation for Other category between the years 2017 and 2018?" + }, + { + "input": "\"What was the difference in the number of restricted stock units awarded by a company in 2018 compared to 2019?\"", + "output": "What was the change in the restricted stock units between 2018 and 2019?" + }, + { + "input": "What was the net change in the number of restricted stock units from 2018 to 2019?", + "output": "What was the change in the restricted stock units between 2018 and 2019?" + }, + { + "input": "What does the key management comprise of?", + "output": "What is included in key management?" + }, + { + "input": "What is the combined value of all shares held in joint ventures as of December 31, 2019?", + "output": "What is the total value of shares at 31 December 2019 for each joint venture?" + }, + { + "input": "What is the precise percentage change in revenue losses experienced by the United Kingdom from the year 2018 to 2019?", + "output": "What is the percentage change in the revenue losses in UK from 2018 to 2019?" + }, + { + "input": "What is the percentage increase or decrease in the revenue losses experienced in the United Kingdom from 2018 to 2019?", + "output": "What is the percentage change in the revenue losses in UK from 2018 to 2019?" + }, + { + "input": "What was the net amount of debt in 2019?", + "output": "What is the Net debt in 2019?" + }, + { + "input": "What was the net debt amount in the year 2019?", + "output": "What is the Net debt in 2019?" + }, + { + "input": "What is the percentage change in the net debt from 2018 to 2019?", + "output": "What is the percentage increase or decrease in the net debt from 2018 to 2019?" + }, + { + "input": "What were the fees payable by the Group's joint ventures in 2019?", + "output": "What were the specific fees that the Group's joint ventures had to pay in the year 2019?" + }, + { + "input": "What were the fees payable by the Group's joint ventures in 2019?", + "output": "What were the fees that the Group's joint ventures had to pay in 2019? Could you provide details on the specific amounts payable by each joint venture?" + }, + { + "input": "What proportion of the total borrowings in 2019, including both the principal and interest, will be due within a period of 5 years?", + "output": "What is the percentage of borrowings (including interest) that matures over 5 years in the total borrowings in 2019?" + }, + { + "input": "What was the net asset value (NAV) of EPRA in 2018?", + "output": "What is the EPRA NAV in 2018?" + }, + { + "input": "What is the precise percentage difference in the overall investment made in joint ventures for the years 2018 and 2019?", + "output": "What is the percentage change in the total investment in joint ventures from 2018 to 2019?" + }, + { + "input": "What was the interest rate protection provided in 2019?", + "output": "What is the interest rate protection in 2019?" + }, + { + "input": "What is the current interest rate protection in 2019 and how does it work?", + "output": "What is the interest rate protection in 2019?" + }, + { + "input": "What is the percentage difference in the fair value of unallocated swaps between 2018 and 2019?", + "output": "What is the percentage change in the fair value of unallocated swaps from 2018 to 2019?" + }, + { + "input": "What was the difference in the level of interest rate protection from 2018 to 2019?", + "output": "What is the change in interest rate protection between 2018 and 2019?" + }, + { + "input": "What is the year-on-year percentage change in the calculation of contingent rents based on tenants' turnover from 2018 to 2019?", + "output": "What is the percentage change in the amount of contingent rents calculated by reference to tenants' turnover from 2018 to 2019?" + }, + { + "input": "In what year will the future minimum lease amounts receivable be higher than usual?", + "output": "In which year is there higher future minimum lease amounts receivable?" + }, + { + "input": "What were the dividends in respect of these shares have been waived by agreement in 2018?", + "output": "What was the specific agreement regarding the waiver of dividends on these shares in 2018?" + }, + { + "input": "What is the percentage change in the disposals value from 2018 to 2019?", + "output": "What is the percentage difference in the value of disposals from 2018 to 2019, and how can it be calculated?" + }, + { + "input": "What is the current total quantity of units that the Company possesses?", + "output": "What is the total number of units the Company has?" + }, + { + "input": "What is the weighted average exercise price for the outstanding options in the Save As You Earn Scheme as of 31 December 2019?", + "output": "What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?" + }, + { + "input": "What is the calculated average exercise prices of the options that are currently outstanding and can be exercised as of December 31, 2019, under the Save As You Earn Scheme?", + "output": "What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?" + }, + { + "input": "What is the percentage change in the total provided deferred tax provision from 1 January 2018 to 31 December 2019?", + "output": "What is the precise percentage change in the total amount of deferred tax provision that was provided from 1 January 2018 to 31 December 2019?" + }, + { + "input": "What was the total amount of administration expenses incurred in the year 2019?", + "output": "What is the administration expense in 2019?" + }, + { + "input": "What is encompassed in diluted shares?", + "output": "What do the diluted shares include?" + }, + { + "input": "What is the percentage of counterparty #5 exposure in the total exposure in 2019?", + "output": "What is the proportion of the exposure to counterparty #5 compared to the total exposure in the year 2019?" + }, + { + "input": "What is the percentage of counterparty #5 exposure in the total exposure in 2019?", + "output": "What is the proportion of counterparty #5's exposure compared to the total exposure in the year 2019?" + }, + { + "input": "What is the percentage difference in the combined value of cash deposits and derivative financial instrument assets between 2018 and 2019?", + "output": "What is the percentage change in the sum of cash deposits and derivative financial instrument assets from 2018 to 2019?" + }, + { + "input": "What is the percentage difference in the loss before tax, including joint ventures and associates, from 2018 to 2019?", + "output": "What is the percentage change in the loss before tax, joint ventures and associates from 2018 to 2019?" + }, + { + "input": "What is the net exposure to foreign exchange risk (euro) in 2018?", + "output": "\"What is the total amount of foreign exchange risk (in euros) faced by a company in the year 2018?\"" + }, + { + "input": "What is the impact on equity attributable to owners of the Group when there is a 10% depreciation in foreign exchange rates from 2018 to 2019? Specifically, how does this affect the negative movement of money to equity?", + "output": "What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?" + }, + { + "input": "What is the impact on the Group's owners' equity when foreign exchange rates depreciate by 10% between 2018 and 2019?", + "output": "What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?" + }, + { + "input": "What is the percentage difference in the euro amount drawn from 2018 to 2019?", + "output": "What is the percentage change in the amount drawn in euros from 2018 to 2019?" + }, + { + "input": "What is the percentage increase or decrease in the euro amount withdrawn between 2018 and 2019?", + "output": "What is the percentage change in the amount drawn in euros from 2018 to 2019?" + }, + { + "input": "\"In which specific year did the total debt reach its highest level, and is it greater than the total debt in any other year?\"", + "output": "In which year is there a greater total debt?" + }, + { + "input": "In which year does the total debt amount exceed or surpass the debt levels of other years?", + "output": "In which year is there a greater total debt?" + }, + { + "input": "What is the percentage change in the total debt from 2018 to 2019?", + "output": "What is the percentage increase or decrease in the total debt from 2018 to 2019?" + }, + { + "input": "\"What were the average salaries for administrative positions in fiscal years 2019 and 2018?", + "output": "What is the average Administrative salaries for fiscal 2019 and 2018?" + }, + { + "input": "\"What was the average annual salary for administrative employees in fiscal years 2019 and 2018?", + "output": "What is the average Administrative salaries for fiscal 2019 and 2018?" + }, + { + "input": "What is the weekly processing capacity comparison between the plants located in Laurel, Mississippi and Collins, Mississippi?", + "output": "What is the difference in capacity per week between the processing plants at Laurel, Mississippi and Collins, Mississippi? " + }, + { + "input": "What are the average amounts for prepaid insurance in both fiscal years 2019 and 2018?", + "output": "What is the average Prepaid insurance for fiscal years 2019 and 2018?" + }, + { + "input": "What is the difference in the amount of Total prepaid expenses recorded in the financial statements for fiscal years 2019 and 2018? I need to determine the change in prepaid expenses for these two specific years.", + "output": "What is the change in Total prepaid expenses between fiscal years 2019 and 2018?" + }, + { + "input": "What is the difference in the total amount of prepaid expenses between fiscal years 2019 and 2018?", + "output": "What is the change in Total prepaid expenses between fiscal years 2019 and 2018?" + }, + { + "input": "What is the difference in the statutory income tax rates between fiscal years 2019 and 2018, and how does this impact income taxes?", + "output": "What is the change in Income taxes at statutory rate between fiscal years 2019 and 2018?" + }, + { + "input": "What is the net sales from Fresh, vacuum-sealed chicken for fiscal years 2019 to 2017 respectively?", + "output": "What were the net sales of Fresh, vacuum-sealed chicken for fiscal years 2019, 2018, and 2017?" + }, + { + "input": "What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?", + "output": "What are the net sales figures for Fresh, chill-packed chicken for fiscal years 2019, 2018, and 2017, respectively?" + }, + { + "input": "What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?", + "output": "How much revenue was generated from the sales of Fresh, chill-packed chicken for the fiscal years of 2019, 2018, and 2017, respectively?" + }, + { + "input": "What is the net sales from Fresh, ice-packed chicken for fiscal years 2019 to 2017 respectively?", + "output": "What were the net sales of Fresh, ice-packed chicken for fiscal years 2019, 2018, and 2017?" + }, + { + "input": "What was the purpose of the company's operating leases, and how did they contribute to the company's operations or financial performance?", + "output": "What was the company's operating leases for?" + }, + { + "input": "What is the discrepancy in the overall amount when comparing revenues gained from revolving loan facility fees to expenses incurred from operating leases?", + "output": "What was the difference in the total between Revolving loan facility fees and operating leases? " + }, + { + "input": "What was the change in the Net cash used in investing activities between 2017 and 2019?", + "output": "What was the difference in the amount of net cash used in investing activities from 2017 to 2019?" + }, + { + "input": "What was the percentage change in the Net cash used in financing activities between 2018 and 2019?", + "output": "What was the percentage difference in the amount of cash used in financing activities from 2018 to 2019?" + }, + { + "input": "Which years does the table provide information for the company's Selected Consolidated Statements of Operations Data?", + "output": "For which specific time periods does the table present the company's Selected Consolidated Statements of Operations Data?" + }, + { + "input": "What was the exact amount of revenue generated in the year 2015?", + "output": "What was the total revenue in 2015?" + }, + { + "input": "What was the specific amount of operating profit generated in the year 2017?", + "output": "What was the operating profit in 2017?" + }, + { + "input": "What was the exact amount of operating profit earned during the year 2017?", + "output": "What was the operating profit in 2017?" + }, + { + "input": "What is the precise percentage difference in operating profit from 2015 to 2016?", + "output": "What was the percentage change in operating profit between 2015 and 2016?" + }, + { + "input": "What was the percentage increase or decrease in operating profit from 2015 to 2016?", + "output": "What was the percentage change in operating profit between 2015 and 2016?" + }, + { + "input": "What was the cash used in Software in 2019?", + "output": "How much money was spent on Software in the year 2019?" + }, + { + "input": "What was the net income in 2019?", + "output": "What was the exact net income amount for the fiscal year of 2019?" + }, + { + "input": "What was the specific amount of interest that was recorded as an expense in the year 2018?", + "output": "What was the interest expense in 2018?" + }, + { + "input": "What is the amount of money paid as interest in 2018?", + "output": "What was the interest expense in 2018?" + }, + { + "input": "For how many consecutive years has the net income exceeded $100,000 thousand?", + "output": "How many years did net income exceed $100,000 thousand?" + }, + { + "input": "What was the change in the interest expense between 2017 and 2018?", + "output": "What were the differences in the amount of interest expense incurred in 2017 and 2018?" + }, + { + "input": "How much was the change in fair value of the company's servicing asset that was included in the fees charged for servicing?", + "output": "How much was the included change in fair value of the company's servicing asset included in its servicing fees?" + }, + { + "input": "For how many consecutive years have Interchange fees surpassed $50,000?", + "output": "How many years did Interchange fees exceed $50,000 thousand?" + }, + { + "input": "How many years have the Interchange fees been over $50,000 thousand?", + "output": "How many years did Interchange fees exceed $50,000 thousand?" + }, + { + "input": "What was the difference in the fees charged for transactions in 2017 compared to 2019? How did the transaction fees change between 2017 and 2019?", + "output": "What was the change in the transaction fees between 2017 and 2019?" + }, + { + "input": "What was the percentage change in the total revenue between 2018 and 2019?", + "output": "What is the percentage increase or decrease in the total revenue from 2018 to 2019?" + }, + { + "input": "What was the percentage change in the total revenue between 2018 and 2019?", + "output": "What was the percentage difference in the overall revenue from 2018 to 2019?" + }, + { + "input": "How much did the company invest in partnerships during the year 2018?", + "output": "What was the company's investment in partnership in 2018?" + }, + { + "input": "How much money did the company invest in partnership during the year 2018?", + "output": "What was the company's investment in partnership in 2018?" + }, + { + "input": "What is the significance and meaning of the number of Settlements?", + "output": "What does the amount of Settlements represent?" + }, + { + "input": "What is the significance or meaning of the number of Settlements?", + "output": "What does the amount of Settlements represent?" + }, + { + "input": "How many years did the face value of term loan exceed $300,000 thousand?", + "output": "For how many years has the term loan been valued above $300,000 thousand?" + }, + { + "input": "How many years did the face value of term loan exceed $300,000 thousand?", + "output": "How many years has the face value of the term loan been consistently higher than $300,000 thousand?" + }, + { + "input": "What was the change in the Loan Servicing Portfolio between 2017 and 2018?", + "output": "What was the difference in the size of the Loan Servicing Portfolio from 2017 to 2018?" + }, + { + "input": "What was the total costs and expenses?", + "output": "What was the overall amount of costs and expenses associated with the given context or situation?" + }, + { + "input": "What was the exact value of the restricted cash in the year 2017?", + "output": "What was the amount of restricted cash in 2017?" + }, + { + "input": "\"What is the range of years covered in the table?\"", + "output": "Which years does the table provide?" + }, + { + "input": "What was the change in Cash and cash equivalents between 2018 and 2019?", + "output": "What was the difference in the amount of Cash and cash equivalents between 2018 and 2019?" + }, + { + "input": "For how long did the amount of Restricted Cash surpass $200,000 thousand?", + "output": "How many years did Restricted Cash exceed $200,000 thousand?" + }, + { + "input": "What was the total quantity of Furniture sold or produced in the year 2018?", + "output": "What was the amount of Furniture in 2018?" + }, + { + "input": "What was the total quantity or number of furniture items in the year 2018?", + "output": "What was the amount of Furniture in 2018?" + }, + { + "input": "What was the total quantity of computer hardware devices manufactured or sold globally in the year 2019?", + "output": "What was the amount of Computer hardware in 2019?" + }, + { + "input": "How much computer hardware was present in the year 2019?", + "output": "What was the amount of Computer hardware in 2019?" + }, + { + "input": "What was the change in the amount of Software between 2018 and 2019?", + "output": "What was the difference in the quantity of Software from 2018 to 2019, specifically looking at the change in amount?" + }, + { + "input": "What was the change in the amount of Software between 2018 and 2019?", + "output": "How much did the amount of Software change from 2018 to 2019?" + }, + { + "input": "What was the amount of Receipts in 2017?", + "output": "How many receipts were recorded in the year 2017?" + }, + { + "input": "What was the amount of Receipts in 2017?", + "output": "\"How many total receipts were recorded in the year 2017?\"" + }, + { + "input": "What was the change in the amount of Settlements between 2017 and 2019?", + "output": "What was the net difference in the number of Settlements from 2017 to 2019?" + }, + { + "input": "What was the change in the amount of Settlements between 2017 and 2019?", + "output": "What was the difference in the number of Settlements from 2017 to 2019 and how did it change over the two-year period?" + }, + { + "input": "What was the exact amount of revenue generated in the year 2018?", + "output": "What was the total revenue in 2018?" + }, + { + "input": "What is the total amount of money earned in 2018?", + "output": "What was the total revenue in 2018?" + }, + { + "input": "\"What was the specific financial figure measuring earnings from regular business operations, known as operating profit, during the year 2019?\"", + "output": "What was the operating profit in 2019?" + }, + { + "input": "What was the operating profit in the first quarter?", + "output": "What is the exact amount of operating profit recorded in the first quarter of the fiscal year?" + }, + { + "input": "What was the specific amount of net income generated in the third quarter?", + "output": "What was the net income in the third quarter?" + }, + { + "input": "What was the percentage increase or decrease in the basic earnings per share of Class A common stock from the first quarter to the second quarter?", + "output": "What was the percentage change in the basic earnings per share of Class A common stock between the first and second quarter?" + }, + { + "input": "\"What is the range of years covered in the table?\"", + "output": "Which years does the table provide?" + }, + { + "input": "What was the difference in the amount of net income recorded between the years 2017 and 2018?", + "output": "What was the change in net income between 2017 and 2018?" + }, + { + "input": "\"What is the meaning and significance of net fair value changes recognized in other gains (losses)?\"", + "output": "What does the net Fair value changes recognized in other gains (losses) represent?" + }, + { + "input": "What is the representation of net fair value changes recognized in other gains (losses)?", + "output": "What does the net Fair value changes recognized in other gains (losses) represent?" + }, + { + "input": "What was the beginning balance in 2018?", + "output": "\"The starting balance for the year 2018, specifically referring to financial records or accounts, is requested. Please provide the initial amount present at the beginning of that year.\"" + }, + { + "input": "Which years did the ending balance exceed $3,000 thousand?", + "output": "In which years was the ending balance higher than $3,000 thousand?" + }, + { + "input": "What is the percentage of fair value time deposits compared to the overall fair value of marketable securities?", + "output": "What percentage of the total fair value marketable securities is made up of fair value time deposits?" + }, + { + "input": "\"What is the current interest rate specified in the credit facility agreement that governs our credit arrangement?\"", + "output": "What is the interest rate on the credit facility agreement?" + }, + { + "input": "\"What is the specific interest rate specified in the credit facility agreement that governs our borrowing terms?\"", + "output": "What is the interest rate on the credit facility agreement?" + }, + { + "input": "What factors contributed to the increase in research and development expenses in 2019?", + "output": "What are the reasons for higher research and development expense in 2019?" + }, + { + "input": "What is the net difference in research and development expense between 2019 and 2017?", + "output": "What is the exact amount of change in research and development expense between the fiscal years of 2019 and 2017?" + }, + { + "input": "How much were the marketable securities worth in 2019?", + "output": "What was the amount of marketable securities in 2019?" + }, + { + "input": "What was the precise value of marketable securities held in the year 2019?", + "output": "What was the amount of marketable securities in 2019?" + }, + { + "input": "What is the difference in the total amount of shareholders' equity between the years 2018 and 2019?", + "output": "What is the change in total shareholders' equity from 2018 to 2019?" + }, + { + "input": "How much did the total shareholders' equity change between 2018 and 2019?", + "output": "What is the change in total shareholders' equity from 2018 to 2019?" + }, + { + "input": "How does the net (loss) income of 2019 compare to the net (loss) income of 2018?", + "output": "What is the difference between Net (loss) income in 2018 and 2019?" + }, + { + "input": "What was the exact percentage increase in gross profit from the year 2018 to the year 2019?", + "output": "What was the percentage increase in gross profit from 2018 to 2019?" + }, + { + "input": "What are the different components of expenses that need to be considered when starting production?", + "output": "What are components of production start-up expense?" + }, + { + "input": "What is the total change in expenditure for initiating production between the years 2019 and 2017?", + "output": "What is the net difference in production start-up expense between 2019 and 2017?" + }, + { + "input": "What is the difference in net sales amount in 2019 and 2018?", + "output": "What is the difference in the net sales amount between the years 2019 and 2018? Please provide details and figures to compare the net sales amounts of these two specific years." + }, + { + "input": "What is the value of foreign government obligations as of December 31, 2019?", + "output": "What is the total amount of foreign government obligations recorded as of December 31, 2019?" + }, + { + "input": "What is the comparative value of money market funds and foreign debt as of December 31, 2019, and how do they differ?", + "output": "What is the difference in the value of money market funds and foreign debt as of December 31, 2019?" + }, + { + "input": "What is the increase in cash provided by operating activities from 2018 to 2019?", + "output": "How much did the cash provided by operating activities increase from 2018 to 2019?" + }, + { + "input": "What is the difference in net cash and cash equivalents between 2019 and 2018?", + "output": "What is the change in net cash and cash equivalents between the fiscal years 2019 and 2018?" + }, + { + "input": "\"What specific items or responsibilities are typically considered as part of other obligations?\"", + "output": "What are included in other obligations?" + }, + { + "input": "What does the category of \"other obligations\" encompass and include?", + "output": "What are included in other obligations?" + }, + { + "input": "Why was $72.2 million of unrecognized tax benefits excluded?", + "output": "Why were the $72.2 million in unrecognized tax benefits excluded from something?" + }, + { + "input": "What percentage of the total contractual obligations is made up of total long-term debt obligations?", + "output": "What is the proportion of the overall contractual obligations that consists specifically of the total amount owed in long-term debt obligations?" + }, + { + "input": "What is the distinction between the total amount paid in interest and the total sum of operating lease obligations?", + "output": "What difference between total interest payments and total operating lease obligations?" + }, + { + "input": "What is the interest rate for the term loan that I am inquiring about?", + "output": "What is the interest rate of the term loan?" + }, + { + "input": "What are the differences in the amounts of notes receivable recorded in 2018 and 2019?", + "output": "What is the difference between notes receivables from 2018 to 2019?" + }, + { + "input": "What is the precise percentage by which the total prepaid expenses and other current assets increased from 2018 to 2019?", + "output": "What is the percentage increase in total prepaid expenses and other current assets from 2018 to 2019?" + }, + { + "input": "How are the accounting procedures used to record changes in fair value of hedging instruments?", + "output": "How are changes in fair value of hedging instruments accounted for?" + }, + { + "input": "What are the types of derivative instruments included in the total for other liabilities on a financial statement?", + "output": "What is the total derivate instruments for other liabilities?" + }, + { + "input": "What are the types of derivative instruments used to measure the total value of other liabilities?", + "output": "What is the total derivate instruments for other liabilities?" + }, + { + "input": "What is the ratio of the total derivatives classified as hedging instruments to the total derivatives not classified as hedging instruments under prepaid expenses and other current assets?", + "output": "Under prepaid expenses and other current assets, what is the ratio of the total derivates designated as hedging instruments to those not designated as hedging instruments?" + }, + { + "input": "What was the increase in machinery and equipment from 2018 to 2019?", + "output": "What was the percentage change in the value of machinery and equipment between 2018 and 2019?" + }, + { + "input": "What was the increase in machinery and equipment from 2018 to 2019?", + "output": "What percentage growth was observed in the value of machinery and equipment between the years 2018 and 2019?" + }, + { + "input": "What are the differences in land statistics comparing the year 2018 and 2019?", + "output": "What is the difference between land from 2018 to 2019?" + }, + { + "input": "What are the discrepancies in land conditions between the years 2018 and 2019?", + "output": "What is the difference between land from 2018 to 2019?" + }, + { + "input": "What is the net change in selling, general, and administrative expense between the years 2019 and 2017?", + "output": "What is the net difference in selling, general and administrative expense between 2019 and 2017?" + }, + { + "input": "What is the current percentage shareholding of Tata Sons Private Limited and Life Insurance Corporation of India, and how does it differ between the two entities?", + "output": "What is the difference in percentage shareholding between Tata Sons Private Limited and Life Insurance Corporation of India?" + }, + { + "input": "What is the disparity in the quantity of equity shares possessed by Promoters and Insurance Companies?", + "output": "What is the difference between number of equity shares held between Promoters and Insurance Companies?" + }, + { + "input": "How many shareholding accounts were there as of March 31, 2019?", + "output": "What is the number of existing shareholding accounts as on March 31, 2019? " + }, + { + "input": "As of March 31, 2019, what is the overall sum of all assets owned by someone or something?", + "output": "What is the grand total of holdings as on March 31, 2019?" + }, + { + "input": "What is the overall value of assets and investments held as of March 31, 2019?", + "output": "What is the grand total of holdings as on March 31, 2019?" + }, + { + "input": "What is the difference in the number of holdings within the share categories of '1-100' and '101-500'? How do the share categories of '1-100' and '101-500' differ in terms of the number of holdings they encompass?", + "output": "What is the difference in holdings between the share categories of '1-100' and '101-500'?" + }, + { + "input": "What is the basis for consolidating shareholdings and how does it affect the overall company structure?", + "output": "On what basis is shareholding consolidated?" + }, + { + "input": "What is the basis for the consolidation of shareholding?", + "output": "On what basis is shareholding consolidated?" + }, + { + "input": "\"What is the proportion of unsecured loans out of the total debt at the start of the financial year?\"", + "output": "At the beginning of the financial year, what percentage of total indebtedness is made up of unsecured loans?" + }, + { + "input": "What was the change in the quantity of shares transferred from 2011 to 2012 into the Investor Education and Protection Fund (IEPF)?", + "output": "What is the difference in number of shares transferred to IEPF from 2011 to 2012?" + }, + { + "input": "What is the total remuneration for Ramakrishnan V?", + "output": "What is the complete amount of compensation that Ramakrishnan V receives?" + }, + { + "input": "What is the total remuneration for Ramakrishnan V?", + "output": "What is the total amount of money received as payment or compensation for services rendered by Ramakrishnan V?" + }, + { + "input": "What is the monetary worth of the stock option that the Company Secretary receives?", + "output": "What is the value of Stock Option given to the Company Secretary?" + }, + { + "input": "What is the monetary worth or amount of Stock Option that has been granted or allocated to the Company Secretary as part of their compensation or benefits package?", + "output": "What is the value of Stock Option given to the Company Secretary?" + }, + { + "input": "What is the specific value or percentage of commission that is usually given to the Chief Financial Officer (CFO) of a company?", + "output": "What is the value of Commission given to the Chief Financial Officer?" + }, + { + "input": "\"What is the specific amount or percentage of commission that is typically awarded to the Chief Financial Officer (CFO) for their role?\"", + "output": "What is the value of Commission given to the Chief Financial Officer?" + }, + { + "input": "What distinguishes the CFO from the Company Secretary in terms of Others and Allowances? Clarify and elaborate on the differences between these two roles regarding their responsibilities, remunerations, and any additional benefits they may receive.", + "output": "What is the difference in Others, Allowances between the CFO and Company Secretary?" + }, + { + "input": "Which key managerial personnel had the highest total remuneration?", + "output": "\"Who among the key managerial personnel received the highest total remuneration?\"" + }, + { + "input": "How does the ratio of median remuneration between the two Executive Directors differ?", + "output": "What is the difference in ratio to median remuneration between the two Executive Directors?" + }, + { + "input": "\"What is the difference in the amount of money received from sales or services between the years 2018 and 2019?\"", + "output": "What is the change in revenue between 2018 and 2019?" + }, + { + "input": "What is the difference in the amount of money earned by the company between the years 2018 and 2019?", + "output": "What is the change in revenue between 2018 and 2019?" + }, + { + "input": "What portion of total unconsolidated revenue in FY 2019 constitutes of export revenue?", + "output": "What percentage of the total unconsolidated revenue for the fiscal year 2019 is comprised of revenue from exports?" + }, + { + "input": "What portion of total unconsolidated revenue in FY 2018 constitutes of export revenue?", + "output": "What percentage of the overall unconsolidated revenue for the fiscal year 2018 is derived from export sales?" + }, + { + "input": "What is the difference in the amount of money earned from foreign exchanges between the fiscal years of 2018 and 2019?", + "output": "What is the change in foreign exchange earnings between FY 2018 and FY 2019?" + }, + { + "input": "What is the change in Cost, Insurance and Freight (CIF) value of imports from FY 2018 to FY 2019?", + "output": "How has the Cost, Insurance and Freight (CIF) value of imports changed from FY 2018 to FY 2019?" + }, + { + "input": "What is the specific amount of money charged as a sitting fee for N Chandrasekaran?", + "output": "What is the sitting fee of N Chandrasekaran?" + }, + { + "input": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?", + "output": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian, and in what context are these fees being compared or analyzed?" + }, + { + "input": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?", + "output": "What is the difference in the sitting fees charged by O P Bhatt and Aarthi Subramanian?" + }, + { + "input": "What is the percentage change in pre-tax margin between 2017 and 2018?", + "output": "What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018" + }, + { + "input": "What is the percentage change in Pre-tax margin between 2017 and 2018?", + "output": "What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018" + }, + { + "input": "What does Net Income include?", + "output": "\"What elements are included in the calculation of Net Income?\"" + }, + { + "input": "What is the increase / (decrease) in revenue from 2018 to 2019?", + "output": "How much did the revenue change from 2018 to 2019?" + }, + { + "input": "What is the percentage increase / (decrease) in Current liabilities from 2018 to 2019?", + "output": "What is the percentage change in Current Liabilities between 2018 and 2019?" + }, + { + "input": "What caused the decrease in the actual shares?", + "output": "What factors led to the decrease in the current number of shares held by individuals or entities?" + }, + { + "input": "What was the percentage change in Earnings per Share of common stock from continuing operations (basic) compared to the previous period?", + "output": "What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?" + }, + { + "input": "What was the percentage change in the basic earnings per share of common stock from continuing operations?", + "output": "What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?" + }, + { + "input": "How did the performance of hardware platforms compare from year to year?", + "output": "How was the performance of hardware platforms year to year" + }, + { + "input": "What does the Pre-tax income performance implied? ", + "output": "What is the meaning of Pre-tax income performance?" + }, + { + "input": "What was the pre-tax margin of 2019?", + "output": "\"What was the pre-tax margin percentage for the year 2019?\"" + }, + { + "input": "What was the pre-tax margin of 2019?", + "output": "\"What was the specific pre-tax margin for the fiscal year of 2019?\"" + }, + { + "input": "What was the External gross profit margin in 2019?", + "output": "What was the gross profit margin from sources external to the company for the year 2019?" + }, + { + "input": "What is the average of Balance at January 1?", + "output": "What is the average balance as of January 1st for all accounts (or customers, if applicable)?" + }, + { + "input": "What is the average balance amount as of December 31st?", + "output": "What is the average of Balance at December 31?" + }, + { + "input": "What is the average balance amount recorded on December 31st for a specific period of time?", + "output": "What is the average of Balance at December 31?" + }, + { + "input": "What is the increase / (decrease) in the net capitalized software from 2018 to 2019?", + "output": "What is the change in the net capitalized software amount from 2018 to 2019?" + }, + { + "input": "What was the percentage change in pre-tax income from 2018 to 2019?", + "output": "What was the increase / (decrease) in the Pre-tax income from 2018 to 2019?" + }, + { + "input": "What is the percentage change in gross profit for External Operating Systems Software from 2018 to 2019?", + "output": "What is the increase / (decrease) in the External Operating Systems Software gross profit from 2018 to 2019?" + }, + { + "input": "What was the percentage increase / (decrease) in total short term debt?", + "output": "What was the percentage change in the total amount of short term debt?" + }, + { + "input": "What was the percentage change in the pre-tax margin from 2018 to 2019?", + "output": "What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?" + }, + { + "input": "What percentage change in the pre-tax margin occurred between 2018 and 2019?", + "output": "What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?" + }, + { + "input": "What is the meaning or significance of the launch expense?", + "output": "What does launch expense represent?" + }, + { + "input": "How many types of expenses are listed in the table?", + "output": "How many categories of expenses are included in the table?" + }, + { + "input": "What was the average adjusted EBITDA for 2018 and 2019?", + "output": "What was the average amount of adjusted EBITDA for both the years 2018 and 2019?" + }, + { + "input": "\"What is the chronological order of the gross profit for each financial year end as depicted in the table?\"", + "output": "What is the gross profit for each financial year end shown in the table (in chronological order)?" + }, + { + "input": "What was the year with the highest value for other financial expenses?", + "output": "In which year was the amount of other financial expenses the largest?" + }, + { + "input": "What was the net difference in the overall financial expenses incurred during the year 2019 as compared to 2018?", + "output": "What was the change in the total financial expenses in 2019 from 2018?" + }, + { + "input": "What was the sum of all financial costs incurred in the year 2019?", + "output": "What is the total amount of financial expenses in 2019?" + }, + { + "input": "What was the specific year when the amount of Current lease liabilities was greater than in any other year?", + "output": "In which year was the amount of Current lease liabilities larger?" + }, + { + "input": "When did the amount of Current lease liabilities exceed the previous year's amount?", + "output": "In which year was the amount of Current lease liabilities larger?" + }, + { + "input": "What is the total financial income for 2019?", + "output": "What was the overall financial income for the year 2019?" + }, + { + "input": "What was the total number of financial items in the year 2019?", + "output": "What is the amount of total financial items in 2019?" + }, + { + "input": "What is the total number of financial items recorded in the year 2019?", + "output": "What is the amount of total financial items in 2019?" + }, + { + "input": "What are the primary categories that are examined within the Financial Items section of the table?", + "output": "What are the main categories analyzed under Financial Items in the table?" + }, + { + "input": "In which year was the amount of financial income the smallest?", + "output": "What is the year with the minimum financial income amount?" + }, + { + "input": "What was the change in the Average invested capital less average impairment in 2019 from 2018?", + "output": "What was the difference in the average amount of capital invested excluding average impairment between 2019 and 2018?" + }, + { + "input": "What was the change in the Average invested capital less average impairment in 2019 from 2018?", + "output": "What was the difference in the average amount of money invested minus the average amount of impairments in 2019 compared to 2018?" + }, + { + "input": "What was the percentage change in retained earnings in 2019 from 2018?", + "output": "What was the precise percentage difference in retained earnings between 2018 and 2019, specifically referring to the change observed in 2019?" + }, + { + "input": "What was the percentage change in retained earnings in 2019 from 2018?", + "output": "What was the percentage increase or decrease in the amount of retained earnings from the year 2018 to 2019?" + }, + { + "input": "How is Total Cost of Ownership (TCE) earnings utilized as a widely accepted performance measure for the shipping industry?", + "output": "How is TCE earnings used as a standard shipping industry performance measure?" + }, + { + "input": "What is the specific year when the revenue reached its maximum value?", + "output": "In which year was Revenue the largest?" + }, + { + "input": "What items were included in the freight receivables as of 2019?", + "output": "As of 2019, what did freight receivables include?" + }, + { + "input": "What was included in the category of freight receivables as of 2019?", + "output": "As of 2019, what did freight receivables include?" + }, + { + "input": "What is the determining basis for the calculation and establishment of an allowance for expected credit loss?", + "output": "What is the making of allowance for expected credit loss based on?" + }, + { + "input": "What is the basis for determining the provision for expected credit loss?", + "output": "What is the making of allowance for expected credit loss based on?" + }, + { + "input": "What is the specific year when the total gross freight receivables reached their highest amount?", + "output": "In which year was the amount of total gross freight receivables the largest?" + }, + { + "input": "What was the net increase or decrease in the total gross freight receivables between 2018 and 2019?", + "output": "What was the change in the total net gross freight receivables in 2019 from 2018?" + }, + { + "input": "What information can be derived in note 16?", + "output": "What data or details can be obtained from note 16?" + }, + { + "input": "What information can be derived in note 16?", + "output": "What kind of information can be obtained from note 16?" + }, + { + "input": "What was the change in the carrying amount as of 31 December from 2018 to 2019?", + "output": "What was the difference in the carrying amount as of December 31 between the years 2018 and 2019? Please provide the specific change in the carrying amount." + }, + { + "input": "What are the specific components present in the table that are utilized for liquidity calculations?", + "output": "What are the components in the table used to calculate liquidity?" + }, + { + "input": "What was the difference in liquidity levels between 2019 and 2018, and how did it change over the course of the year?", + "output": "What was the change in liquidity in 2019 from 2018?" + }, + { + "input": "What does TORM have leases for?", + "output": "What types of leases does TORM currently have?" + }, + { + "input": "How are leases represented on the balance sheet and what specific details are included for each lease?", + "output": "How is each lease reflected on the balance sheet?" + }, + { + "input": "\"What is the asset type that experiences the highest rate of depreciation in a year?\"", + "output": "Which asset type has the largest depreciation of the year?" + }, + { + "input": "What was the difference in the amount of gross profit in 2019 compared to 2018?", + "output": "What was the change in gross profit in 2019 from 2018?" + }, + { + "input": "What was the exact percentage increase or decrease in the gross profit from 2018 to 2019?", + "output": "What was the percentage change in gross profit in 2019 from 2018?" + }, + { + "input": "What was the percentage increase or decrease in gross profit from 2018 to 2019?", + "output": "What was the percentage change in gross profit in 2019 from 2018?" + }, + { + "input": "\"What specific details or data can be found in Note 21 of the provided information?\"", + "output": "What information is provided in Note 21?" + }, + { + "input": "What was the percentage change in interest rate swaps between the year 2018 and 2019?", + "output": "What was the change in interest rate swaps from 2018 to 2019?" + }, + { + "input": "How did interest rate swaps change from 2018 to 2019?", + "output": "What was the change in interest rate swaps from 2018 to 2019?" + }, + { + "input": "What was the percentage difference in the interest rate swaps between 2018 and 2019? I would like to know the rate at which the interest rate swaps changed between these two years.", + "output": "What was the percentage change in interest rate swaps from 2018 to 2019?" + }, + { + "input": "What was the precise percentage difference in the interest rate swap market between 2018 and 2019?", + "output": "What was the percentage change in interest rate swaps from 2018 to 2019?" + }, + { + "input": "What is TORM's belief or estimation of the amount of net interest-bearing debt they possess?", + "output": "What does TORM believe net interest-bearing debt to be?" + }, + { + "input": "What is the methodology used to calculate the net asset value (NAV) per share in investment funds?", + "output": "How is NAV/share calculated?" + }, + { + "input": "What are the types of audit fees in the table?", + "output": "What are the different types of audit fees listed in the table?" + }, + { + "input": "In what year did the auditor receive the highest overall compensation?", + "output": "In which year was the total remuneration to the auditor the largest?" + }, + { + "input": "For which specific years were the calculations of Other Liabilities amounts conducted?", + "output": "In which years were the amounts of Other Liabilities calculated for?" + }, + { + "input": "In which specific year did the amount allocated to Partners and commercial managements surpass the amount allocated in other years?", + "output": "In which year was the amount under Partners and commercial managements larger?" + }, + { + "input": "\"What specific expenses are included in the category of operating expenses that are related to staff costs?\"", + "output": "What are the staff costs included in operating expenses related to?" + }, + { + "input": "How is the average number of employees determined and what is the methodology behind its calculation?", + "output": "How is the average number of employees calculated?" + }, + { + "input": "What was the change in the total number of permanent employees from 2018 to 2019?", + "output": "How many permanent employees were added or lost between 2018 and 2019, resulting in the change in the total number of permanent employees?" + }, + { + "input": "What does note 21 include in terms of information?", + "output": "What information does note 21 contain?" + }, + { + "input": "What does accounts receivables in the table include?", + "output": "What specific components are included in the accounts receivables section of the table?" + }, + { + "input": "What specific items or costs are included in the income statement as Port expenses, including bunkers and commissions?", + "output": "What is recognized in the income statement under Port expenses, bunkers and commissions?" + }, + { + "input": "How is allowance for expected credit loss of freight receivables calculated?", + "output": "How is the calculation for allowance for expected credit loss determined specifically for freight receivables?" + }, + { + "input": "\"What specific components are accounted for in the carrying amount designated for 'Vessels and capitalized dry-docking'?\"", + "output": "What is included in the carrying amount for \"Vessels and capitalized dry-docking\"?" + }, + { + "input": "What specific sub-elements, denoted in capital letters, can be found under the category of \"Vessels\" in the table related to dry-docking?", + "output": "What are the sub-elements under Vessels and capitalized dry-docking in the table?" + }, + { + "input": "What specific components or sub-elements fall under the category of Vessels and capitalized dry-docking in the table?", + "output": "What are the sub-elements under Vessels and capitalized dry-docking in the table?" + }, + { + "input": "\"What is the meaning of the phrase 'company's retirement obligations' and what specific aspects does it encompass?\"", + "output": "What does the company's retirement obligations refer to?" + }, + { + "input": "\"What are the retirement obligations of the company and what do they encompass?\"", + "output": "What does the company's retirement obligations refer to?" + }, + { + "input": "What is the company's total contractual cash obligations due in more than 5 years?", + "output": "What is the total amount of cash that the company is obligated to pay according to its contracts, and when are these payments due, specifically for obligations that extend beyond a period of 5 years?" + }, + { + "input": "What is the company's total contractual cash obligations due in more than 5 years?", + "output": "\"What is the total amount of cash that the company owes based on contractual agreements with a maturity period of more than 5 years?\"" + }, + { + "input": "How much does operating leases account for total contractual cash obligations for period of less than 1 year?", + "output": "What is the percentage of total contractual cash obligations attributable to operating leases with a duration of less than one year?" + }, + { + "input": "What is the specific monetary difference in the company's operating income between the years 2018 and 2019?", + "output": "What is the company's increase in income from operating activities between 2018 and 2019? " + }, + { + "input": "What is the cash balance and the amount of cash equivalents held by the company as of August 31, 2019?", + "output": "What is the company's cash and cash equivalents as at 31 August 2019?" + }, + { + "input": "What is the change in cash flow from investing activities from 2018 to 2019, specifically referring to any increase or decrease in the amount of cash generated or used in investing activities during this period?", + "output": "What is the increase in cash flow from investing activities between 2018 and 2019?" + }, + { + "input": "What is the total cashflow from investing activities in both 2018 and 2019?", + "output": "What is the combined amount of cash inflows and outflows from investing activities in 2018 and 2019?" + }, + { + "input": "What is the total cashflow from investing activities in both 2018 and 2019?", + "output": "What was the net cash flow from investing activities in 2018 and 2019 combined?" + }, + { + "input": "What is the total effect of exchange rate changes on cash and cash equivalents in both 2018 and 2019?", + "output": "What is the cumulative impact of fluctuations in exchange rates on the combined balances of cash and cash equivalents for the years 2018 and 2019?" + }, + { + "input": "When was the approval by Accenture's Board of Directors granted for the implementation of an open-market share purchase program specifically designed for the acquisition of Accenture's Class A ordinary shares?", + "output": "When did Accenture's Board of Directors authorize and confirm an open-market share purchase program for acquiring Accenture pls Class A ordinary shares?" + }, + { + "input": "How much of the shares purchased in July were part of publicly announced plan or programs?", + "output": "What proportion of the shares acquired in the month of July were specifically disclosed as part of publicly announced plan or programs?" + }, + { + "input": "How much of the shares purchased in July were part of publicly announced plan or programs?", + "output": "What was the proportion of shares acquired in July that were part of publicly disclosed plans or programs?" + }, + { + "input": "What was the primary source of the company's operational revenue in the year 2019?", + "output": "What is the company's main source of operating income in 2019?" + }, + { + "input": "What was the specific amount spent on expenses directly related to the production or provision of goods and services in the year 2019?", + "output": "What was the cost of revenue in 2019?" + }, + { + "input": "What was the precise amount spent on expenses directly related to generating revenue in the year 2019?", + "output": "What was the cost of revenue in 2019?" + }, + { + "input": "What was the total count of shares that were approved and allocated?", + "output": "What was the number of granted shares?" + }, + { + "input": "How many shares were approved and given to individuals or entities as stocks?", + "output": "What was the number of granted shares?" + }, + { + "input": "What is the disparity in the weighted-average grant date fair value between shares that have been allotted and subsequently forfeited?", + "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?" + }, + { + "input": "What is the difference in the weighted-average grant date fair value between unvested shares and vested shares in 2018?", + "output": "What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?" + }, + { + "input": "What is the discrepancy in the weighted-average grant date fair value between unvested shares and vested shares in the year 2018?", + "output": "What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?" + }, + { + "input": "What was the previously reported amount of total current assets?", + "output": "What was the total current assets as previously reported?" + }, + { + "input": "What is the total value of the deferred commissions, including the current portion, and the total current assets as reported in the previous financial statement?", + "output": "What was the sum of deferred commissions, current portion and total current assets as previously reported?" + }, + { + "input": "What was the total amount reported for deferred commissions, the current portion of deferred commissions, and the overall total for current assets in the previous report?", + "output": "What was the sum of deferred commissions, current portion and total current assets as previously reported?" + }, + { + "input": "What is the revised percentage of accrued expenses in relation to the total sum of liabilities and stockholders' equity?", + "output": "What was accrued expenses as revised as a percentage of total liabilities and stockholders' equity?" + }, + { + "input": "What was the company's bad debt expense for the year ending December 31, 2018?", + "output": "What was the exact amount recorded as bad debt expense by the company for the entire year ending on December 31, 2018?" + }, + { + "input": "What was the company's bad debt expense for the year ending December 31, 2018?", + "output": "What was the amount of bad debts that the company recorded as an expense for the fiscal year that ended on December 31, 2018?" + }, + { + "input": "What was the total amount of interest earned as income in the year 2017?", + "output": "What was interest income in 2017?" + }, + { + "input": "What was the net income of others in 2019?", + "output": "What was others, net in 2019?" + }, + { + "input": "What was the percentage change in Interest expense between 2017 and 2018?", + "output": "What was the percentage increase or decrease in Interest expense from 2017 to 2018?" + }, + { + "input": "What is the average interest income from 2017-2019?", + "output": "What is the average income generated from interest for the years 2017 to 2019 inclusive?" + }, + { + "input": "What percentage increase or decrease in interest income occurred from 2018 to 2019?", + "output": "What was the percentage change in interest income between 2018 and 2019?" + }, + { + "input": "What is the average amount of net cash provided by operating activities for the years 2017, 2018, and 2019?", + "output": "What was the average net cash provided by operating activities from 2017-2019?" + }, + { + "input": "What was the percentage change in the free flow cash margin from 2017 to 2018?", + "output": "What was the change in free flow cash margin between 2017 and 2018?" + }, + { + "input": "What was the total amount of federal net operating losses incurred by the company in the year 2019?", + "output": "What was the company's federal net operating losses in 2019?" + }, + { + "input": "What was the amount of federal net operating losses incurred by the company during the year 2019 for tax purposes?", + "output": "What was the company's federal net operating losses in 2019?" + }, + { + "input": "What was the company's state net operating losses in 2019?", + "output": "What was the total amount of net operating losses incurred by the company in the state during the year 2019?" + }, + { + "input": "What is the percentage change in Deferred revenue between 2018 and 2019?", + "output": "What is the exact percentage difference in Deferred revenue from 2018 to 2019?" + }, + { + "input": "What was the precise amount spent on sales and marketing in the year 2018?", + "output": "What was the sales and marketing expense in 2018?" + }, + { + "input": "What was the specific fair value amount of cash that is being referred to in the context of a certain situation or event?", + "output": "What was the fair value amount of cash?" + }, + { + "input": "What was the specific fair value figure assigned to the amount of cash? Please provide the determined fair value amount for the cash in question.", + "output": "What was the fair value amount of cash?" + }, + { + "input": "What percentage of fair value total cash equivalents consist of agency bonds?", + "output": "What is the proportion of agency bonds, in terms of percentage, in the total cash equivalents that make up the fair value?" + }, + { + "input": "What updates and advancements were made in computer equipment and software from 2018 to 2019?", + "output": "What is the change in computer equipment and software between 2018 and 2019?" + }, + { + "input": "What are the differences in computer equipment and software between the years 2018 and 2019?", + "output": "What is the change in computer equipment and software between 2018 and 2019?" + }, + { + "input": "What was the percentage change in revenue from research and development from 2017 to 2018?", + "output": "What was the change in percentage of revenue of research and development between 2017 and 2018?" + }, + { + "input": "What are the main promises or obligations that the company is committed to?", + "output": "What are the company's principal commitments?" + }, + { + "input": "What are the main commitments of the company?", + "output": "What are the company's principal commitments?" + }, + { + "input": "What is the sum of total operating lease obligations and Long-term debt obligations including interest?", + "output": "What is the combined amount of operating lease obligations and long-term debt obligations, including accrued interest?" + }, + { + "input": "What portion of the total obligations is attributable to payments that are due within a time frame of 1-3 years?", + "output": "What is the percentage of the total obligations that consists of payments due in 1-3 years?" + }, + { + "input": "What is the specific percentage of the overall obligations that are categorized as payments to be made within a period of 1-3 years?", + "output": "What is the percentage of the total obligations that consists of payments due in 1-3 years?" + }, + { + "input": "\"What are the regulations or guidelines regarding the payment of Foreign income tax applicable for the year 2019?\"", + "output": "What is the provision for Foreign income tax in 2019?" + }, + { + "input": "\"What are the regulations or rules implemented in 2019 regarding the taxation of foreign income?\"", + "output": "What is the provision for Foreign income tax in 2019?" + }, + { + "input": "What is the disparity in the weighted-average grant date fair value when comparing shares that have been granted to those that have been forfeited?", + "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?" + }, + { + "input": "What is the disparity in weighted-average grant date fair value when comparing shares that have been granted with those that have been forfeited?", + "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?" + }, + { + "input": "What is the Net sales for 2019?", + "output": "What was the total global revenue generated from sales in the year 2019?" + }, + { + "input": "What was the percentage change in diluted net income per share from continuing operations from 2017 to 2018?", + "output": "What was the change in Diluted Net income per share from continuing operations in 2018 from 2017?" + }, + { + "input": "What was the percentage increase or decrease in the Diluted Net income per share from continuing operations between 2018 and 2017?", + "output": "What was the percentage change in Diluted Net income per share from continuing operations in 2018 from 2017?" + }, + { + "input": "What does the Euro Term Loan due 2024 consist of?", + "output": "What is included in the Euro Term Loan due in 2024?" + }, + { + "input": "What does the Euro Term Loan due 2024 consist of?", + "output": "\"What are the components and details of the Euro Term Loan due in 2024?\"" + }, + { + "input": "In which year did the value of the 1.0% State of Connecticut term loan, which has a maturity date in 2023, surpass its previous size?", + "output": "In which year was the 1.0% State of Connecticut term loan due 2023 larger?" + }, + { + "input": "In which year did the size of the 1.0% State of Connecticut term loan due in 2023 increase?", + "output": "In which year was the 1.0% State of Connecticut term loan due 2023 larger?" + }, + { + "input": "What was the change in Capital lease obligations from 2018 to 2019?", + "output": "What was the exact difference in the amount of Capital lease obligations between the years 2018 and 2019?" + }, + { + "input": "In which year was ILS a higher percentage of total net sales?", + "output": "In which specific year did the Incremental Lifetime Sales (ILS) constitute a larger proportion of the overall net sales?" + }, + { + "input": "What were the net sales figures for ILS in both 2018 and 2019, and what was the average amount for each year?", + "output": "What was the average amount of net sales for ILS in 2018 and 2019?" + }, + { + "input": "What were the average net sales for ILS for the years 2018 and 2019?", + "output": "What was the average amount of net sales for ILS in 2018 and 2019?" + }, + { + "input": "What is the estimated duration of a building and any associated enhancements before they are deemed no longer useful?", + "output": "What is the useful life of Buildings and improvements?" + }, + { + "input": "How is property and equipment stated in the table?", + "output": "In what manner is property and equipment presented and described within the table?" + }, + { + "input": "In which year was the amount of Land larger?", + "output": "\"When comparing the years, which year had a larger amount of land?\"" + }, + { + "input": "What was the change in Leasehold improvements from 2018 to 2019?", + "output": "What was the difference in Leasehold improvements value between 2018 and 2019?" + }, + { + "input": "What was the percentage change in Short-term investments in 2019 from 2018?", + "output": "What was the percentage difference in the value of Short-term investments between 2019 and 2018?" + }, + { + "input": "What was the percentage change in Short-term investments in 2019 from 2018?", + "output": "What was the exact percentage difference in the value of Short-term investments between 2018 and 2019?" + }, + { + "input": "What was the net income for the year 2019?", + "output": "What was Net income in 2019?" + }, + { + "input": "\"In which specific year did the company report the highest amount of comprehensive income?\"", + "output": "In which year was Comprehensive income largest?" + }, + { + "input": "What is the largest year for Comprehensive income?", + "output": "In which year was Comprehensive income largest?" + }, + { + "input": "What was the difference in the amount of money earned in 2018 compared to 2017 after all expenses and taxes were accounted for?", + "output": "What was the change in Net income in 2018 from 2017?" + }, + { + "input": "What was the Projected Benefit Obligation (PBO) amount for the year 2019? Please provide the numerical value for the PBO as of December 31st, 2019.", + "output": "What was the Projected benefit obligation in 2019?" + }, + { + "input": "In which years was information on defined benefit plans provided?", + "output": "During which specific years was information regarding defined benefit plans provided?" + }, + { + "input": "In which year was the amount of Research and development largest?", + "output": "In which specific year did Research and development reach its highest amount or expenditure?" + }, + { + "input": "What was the specific amount allocated for income taxes in the year 2019?", + "output": "What was the Provision for income taxes in 2019?" + }, + { + "input": "What specific amount was set aside for income taxes in the fiscal year of 2019?", + "output": "What was the Provision for income taxes in 2019?" + }, + { + "input": "What has been the modification in the amount of state income taxes that individuals are required to pay in 2019 compared to 2018?", + "output": "What was the change in state income taxes that is currently payable in 2019 from 2018?" + }, + { + "input": "How much Microelectronics was present in the year 2019?", + "output": "What is the amount of Microelectronics in 2019?" + }, + { + "input": "What is the total volume or quantity of Microelectronics produced or used worldwide in the year 2019?", + "output": "What is the amount of Microelectronics in 2019?" + }, + { + "input": "In which years is net sales calculated?", + "output": "In which specific years does the calculation of net sales occur?" + }, + { + "input": "In which years is net sales calculated?", + "output": "During which specific years is the calculation of net sales undertaken?" + }, + { + "input": "What was the percentage difference in the quantity of OEM components and instrumentation between 2018 and 2019?", + "output": "What was the percentage change in the amount of OEM components and instrumentation in 2019 from 2018?" + }, + { + "input": "What was the Net cash provided by operating activities in 2019?", + "output": "What was the amount of net cash generated from operating activities during the year 2019?" + }, + { + "input": "What was the Purchases of property and equipment in 2018?", + "output": "What was the total value of purchases made for property and equipment in the year 2018?" + }, + { + "input": "In which specific years are the instances or applications of physical currency emphasized or showcased in the table?", + "output": "In which years are the uses of cash highlighted in the table?" + }, + { + "input": "What years are specified in the table to illustrate the purpose of utilizing cash?", + "output": "In which years are the uses of cash highlighted in the table?" + }, + { + "input": "In which year was the Issuance of shares under employee stock plans larger?", + "output": "When comparing the issuance of shares under employee stock plans, in which specific year was the volume of shares issued found to be greater?" + }, + { + "input": "In which year were the Additions charged to expenses the largest?", + "output": "What is the specific year when the largest Additions were charged to expenses?" + }, + { + "input": "In which year were the Additions charged to expenses the largest?", + "output": "What was the year when the largest amount of Additions were charged to expenses?" + }, + { + "input": "In which year did tax fees have a higher value or amount than in other years?", + "output": "In which year were tax fees larger?" + }, + { + "input": "What was the difference in the amount of all other fees charged in 2019 compared to 2018?", + "output": "What was the change in All other fees in 2019 from 2018?" + }, + { + "input": "What was Long-term taxes payable in 2019?", + "output": "What was the amount of Long-term taxes payable in the year 2019?" + }, + { + "input": "In which year was Other long-term liabilities larger?", + "output": "\"In which specific year did Other long-term liabilities exhibit a greater amount compared to the other years?\"" + }, + { + "input": "In 2019, what was the exact amount of deferred compensation liability?", + "output": "What was the Total deferred compensation liability in 2019?" + }, + { + "input": "For which years was the calculation of Total deferred compensation liability performed?", + "output": "In which years was Total deferred compensation liability calculated?" + }, + { + "input": "How was the calculation for diluted earnings per share derived?", + "output": "How was diluted earnings per share computed?" + }, + { + "input": "In which specific years can we find the earnings per share data that was included in the table?", + "output": "In which years was earnings per share provided in the table?" + }, + { + "input": "What year experienced the highest dilutive impact from employee stock awards?", + "output": "In which year was the Dilutive effect of employee stock awards largest?" + }, + { + "input": "In what specific year did employee stock awards have the most significant dilutive effect?", + "output": "In which year was the Dilutive effect of employee stock awards largest?" + }, + { + "input": "What was the change in Dilutive effect of employee stock awards in 2019 from 2018?", + "output": "What was the difference in the dilutive impact caused by employee stock awards between 2019 and 2018?" + }, + { + "input": "What was the change in Dilutive effect of employee stock awards in 2019 from 2018?", + "output": "What was the difference in how much employee stock awards had a dilutive effect in 2019 compared to 2018?" + }, + { + "input": "What was the percentage change in Dilutive effect of employee stock awards in 2019 from 2018?", + "output": "What was the percentage change in the dilutive effect of employee stock awards from 2018 to 2019?" + }, + { + "input": "In which specific year did the Amortization of intangible assets account for a larger proportion of the overall net sales?", + "output": "In which year was Amortization of intangible assets a higher percentage of total net sales?" + }, + { + "input": "What was the change in the amount of Amortization of intangible assets in 2019 from 2018?", + "output": "What was the difference in the amount of Amortization of intangible assets between the fiscal years 2019 and 2018?" + }, + { + "input": "What was the value of Net deferred tax assets in the financial year of 2019?", + "output": "What was the Net deferred tax assets in 2019?" + }, + { + "input": "What was the difference in the initial balance for 2019 compared to 2018?", + "output": "What was the change in Beginning balance in 2019 from 2018?" + }, + { + "input": "What was the difference in the starting balance in 2019 compared to 2018?", + "output": "What was the change in Beginning balance in 2019 from 2018?" + }, + { + "input": "What was the balance in the accounts as of the start of the year in 2018?", + "output": "What was the Balance as of the beginning of the year in 2018?" + }, + { + "input": "\"What was the exact monetary balance in the accounts as of January 1st, 2018, at the start of the year?\"", + "output": "What was the Balance as of the beginning of the year in 2018?" + }, + { + "input": "What was the percentage change in Balance as of the beginning of the year in 2019 from 2018?", + "output": "What is the percentage change in the Balance as of the start of 2019 compared to the Balance at the start of 2018?" + }, + { + "input": "What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?", + "output": "What was the percentage difference in the amount of income taxes that were prepaid and refundable between the years 2018 and 2019?" + }, + { + "input": "What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?", + "output": "What was the percentage difference in the amount of prepaid and refundable income taxes between 2018 and 2019?" + }, + { + "input": "What does the Current portion of Euro Term Loan consist of?", + "output": "What is included in the current portion of the Euro Term Loan?" + }, + { + "input": "What was the net change in the amount of outstanding Capital lease obligations between the years 2018 and 2019?", + "output": "What was the change in Capital lease obligations from 2018 to 2019?" + }, + { + "input": "What was the percentage increase or decrease in Capital lease obligations from 2018 to 2019?", + "output": "What was the percentage change in Capital lease obligations from 2018 to 2019?" + }, + { + "input": "What was the percentage increase or decrease in Capital lease obligations between 2018 and 2019?", + "output": "What was the percentage change in Capital lease obligations from 2018 to 2019?" + }, + { + "input": "What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?", + "output": "What were the specific values of the company's work in process inventories as of March 31 in both 2018 and 2019?" + }, + { + "input": "What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?", + "output": "What is the dollar amount of the work in process inventories for the company on March 31 of both 2018 and 2019?" + }, + { + "input": "What was the value of the company's finished goods on March 31, 2018 and March 31, 2019?", + "output": "What are the respective values of the company's finished goods on March 31, 2018 and 2019?" + }, + { + "input": "What is the change in the company's raw materials on March 31, 2018 and 2019?", + "output": "What is the difference in the amount of raw materials held by the company as of March 31, 2018 and March 31, 2019?" + }, + { + "input": "What was the mean value of the company's raw materials on March 31 in both 2018 and 2019?", + "output": "What is the average value of the company's raw materials on March 31, 2018 and 2019?" + }, + { + "input": "What is the average worth of the company's raw materials specifically on March 31st, both in the years 2018 and 2019?", + "output": "What is the average value of the company's raw materials on March 31, 2018 and 2019?" + }, + { + "input": "What is the mean amount of the company's completed products on March 31, in both the years 2018 and 2019?", + "output": "What is the average value of the company's finished goods on March 31, 2018 and 2019?" + }, + { + "input": "What is the average value of the finished goods held by the company on March 31 in both 2018 and 2019?", + "output": "What is the average value of the company's finished goods on March 31, 2018 and 2019?" + }, + { + "input": "What are the company's respective foreign income before taxes in 2018 and 2019?", + "output": "What were the pre-tax foreign incomes of the company in 2018 and 2019?" + }, + { + "input": "What is the company's average loss before income taxes from the United States in 2018 and 2019?", + "output": "\"What was the average pre-tax loss incurred by the company in the United States for the years 2018 and 2019?\"" + }, + { + "input": "What is the company's average loss before income taxes from the United States in 2018 and 2019?", + "output": "What was the average pre-tax income loss for the company in the United States for both 2018 and 2019?" + }, + { + "input": "What is the company's average foreign income before taxes in 2018 and 2019?", + "output": "What was the average amount of foreign income before taxes for the company in both 2018 and 2019?" + }, + { + "input": "What are the average pre-tax earnings of the company for the fiscal years 2018 and 2019?", + "output": "What is the company's average total income before income taxes in 2018 and 2019?" + }, + { + "input": "What are the specific cost of goods sold for the company in both 2018 and 2019?", + "output": "What are the company's respective cost of goods sold in 2018 and 2019?" + }, + { + "input": "What were the cost of goods sold for the company in 2018 and 2019?", + "output": "What are the company's respective cost of goods sold in 2018 and 2019?" + } +] diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py b/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py new file mode 100644 index 0000000..3891f54 --- /dev/null +++ b/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py @@ -0,0 +1,37 @@ +from ktem.llms.manager import llms + +from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage +from kotaemon.llms import ChatLLM, PromptTemplate + +DEFAULT_REWRITE_PROMPT = ( + "Given the following question, rephrase and expand it " + "to help you do better answering. Maintain all information " + "in the original question. Keep the question as concise as possible. " + "Give answer in {lang}\n" + "Original question: {question}\n" + "Rephrased question: " +) + + +class RewriteQuestionPipeline(BaseComponent): + """Rewrite user question + + Args: + llm: the language model to rewrite question + rewrite_template: the prompt template for llm to paraphrase a text input + lang: the language of the answer. Currently support English and Japanese + """ + + llm: ChatLLM = Node(default_callback=lambda _: llms.get_default()) + rewrite_template: str = DEFAULT_REWRITE_PROMPT + + lang: str = "English" + + def run(self, question: str) -> Document: # type: ignore + prompt_template = PromptTemplate(self.rewrite_template) + prompt = prompt_template.populate(question=question, lang=self.lang) + messages = [ + SystemMessage(content="You are a helpful assistant"), + HumanMessage(content=prompt), + ] + return self.llm(messages) diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/suggest_conversation_name.py b/libs/ktem/ktem/reasoning/prompt_optimization/suggest_conversation_name.py new file mode 100644 index 0000000..c5b2b25 --- /dev/null +++ b/libs/ktem/ktem/reasoning/prompt_optimization/suggest_conversation_name.py @@ -0,0 +1,36 @@ +import logging + +from ktem.llms.manager import llms + +from kotaemon.base import AIMessage, BaseComponent, Document, HumanMessage, Node +from kotaemon.llms import ChatLLM, PromptTemplate + +logger = logging.getLogger(__name__) + + +class SuggestConvNamePipeline(BaseComponent): + """Suggest a good conversation name based on the chat history.""" + + llm: ChatLLM = Node(default_callback=lambda _: llms.get_default()) + SUGGEST_NAME_PROMPT_TEMPLATE = ( + "You are an expert at suggesting good and memorable conversation name. " + "Based on the chat history above, " + "suggest a good conversation name (max 10 words). " + "Give answer in {lang}. Just output the conversation " + "name without any extra." + ) + prompt_template: str = SUGGEST_NAME_PROMPT_TEMPLATE + lang: str = "English" + + def run(self, chat_history: list[tuple[str, str]]) -> Document: # type: ignore + prompt_template = PromptTemplate(self.prompt_template) + prompt = prompt_template.populate(lang=self.lang) + + messages = [] + for human, ai in chat_history: + messages.append(HumanMessage(content=human)) + messages.append(AIMessage(content=ai)) + + messages.append(HumanMessage(content=prompt)) + + return self.llm(messages) diff --git a/libs/ktem/ktem/reasoning/react.py b/libs/ktem/ktem/reasoning/react.py index 9f92023..afdd931 100644 --- a/libs/ktem/ktem/reasoning/react.py +++ b/libs/ktem/ktem/reasoning/react.py @@ -19,7 +19,10 @@ from kotaemon.agents import ( from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage from kotaemon.llms import ChatLLM, PromptTemplate +from ..utils import SUPPORTED_LANGUAGE_MAP + logger = logging.getLogger(__name__) +DEFAULT_AGENT_STEPS = 4 class DocSearchArgs(BaseModel): @@ -97,7 +100,7 @@ class DocSearchTool(BaseTool): ) print("Retrieved #{}: {}".format(_id, retrieved_content[:100])) - print("Score", retrieved_item.metadata.get("relevance_score", None)) + print("Score", retrieved_item.metadata.get("cohere_reranking_score", None)) # trim context by trim_len if evidence: @@ -190,7 +193,9 @@ class ReactAgentPipeline(BaseReasoning): "Action: {tool}[{input}]\n\nOutput: {output}" ).format( tool=step.tool if status == "thinking" else "", - input=step.tool_input.replace("\n", "") if status == "thinking" else "", + input=step.tool_input.replace("\n", "").replace('"', "") + if status == "thinking" + else "", output=output if status == "thinking" else "Finished", ) return Document( @@ -261,9 +266,17 @@ class ReactAgentPipeline(BaseReasoning): llm_name = settings[f"{prefix}.llm"] llm = llms.get(llm_name, llms.get_default()) + max_context_length_setting = settings.get("reasoning.max_context_length", None) + pipeline = ReactAgentPipeline(retrievers=retrievers) pipeline.agent.llm = llm pipeline.agent.max_iterations = settings[f"{prefix}.max_iterations"] + + if max_context_length_setting: + pipeline.agent.max_context_length = ( + max_context_length_setting // DEFAULT_AGENT_STEPS + ) + tools = [] for tool_name in settings[f"reasoning.options.{_id}.tools"]: tool = TOOL_REGISTRY[tool_name] @@ -273,7 +286,7 @@ class ReactAgentPipeline(BaseReasoning): tool.llm = llm tools.append(tool) pipeline.agent.plugins = tools - pipeline.agent.output_lang = {"en": "English", "ja": "Japanese"}.get( + pipeline.agent.output_lang = SUPPORTED_LANGUAGE_MAP.get( settings["reasoning.lang"], "English" ) pipeline.use_rewrite = states.get("app", {}).get("regen", False) @@ -298,6 +311,7 @@ class ReactAgentPipeline(BaseReasoning): "value": llm, "component": "dropdown", "choices": llm_choices, + "special_type": "llm", "info": ( "The language model to use for generating the answer. If None, " "the application default language model will be used." @@ -325,5 +339,10 @@ class ReactAgentPipeline(BaseReasoning): return { "id": "ReAct", "name": "ReAct Agent", - "description": "Implementing ReAct paradigm", + "description": ( + "Implementing ReAct paradigm: https://arxiv.org/abs/2210.03629. " + "ReAct agent answers the user's request by iteratively formulating " + "plan and executing it. The agent can use multiple tools to gather " + "information and generate the final answer." + ), } diff --git a/libs/ktem/ktem/reasoning/rewoo.py b/libs/ktem/ktem/reasoning/rewoo.py index 87210f7..e4d461f 100644 --- a/libs/ktem/ktem/reasoning/rewoo.py +++ b/libs/ktem/ktem/reasoning/rewoo.py @@ -20,7 +20,10 @@ from kotaemon.agents import ( from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage from kotaemon.llms import ChatLLM, PromptTemplate +from ..utils import SUPPORTED_LANGUAGE_MAP + logger = logging.getLogger(__name__) +DEFAULT_AGENT_STEPS = 4 DEFAULT_PLANNER_PROMPT = ( @@ -135,7 +138,7 @@ class DocSearchTool(BaseTool): ) print("Retrieved #{}: {}".format(_id, retrieved_content)) - print("Score", retrieved_item.metadata.get("relevance_score", None)) + print("Score", retrieved_item.metadata.get("cohere_reranking_score", None)) # trim context by trim_len if evidence: @@ -215,7 +218,7 @@ class RewooAgentPipeline(BaseReasoning): use_rewrite: bool = False enable_citation: bool = False - def format_info_panel(self, worker_log): + def format_info_panel_evidence(self, worker_log): header = "" content = [] @@ -223,6 +226,10 @@ class RewooAgentPipeline(BaseReasoning): if line.startswith("#Plan"): # line starts with #Plan should be marked as a new segment header = line + elif line.startswith("#Action"): + # small fix for markdown output + line = "\\" + line + "
" + content.append(line) elif line.startswith("#"): # stop markdown from rendering big headers line = "\\" + line @@ -238,6 +245,17 @@ class RewooAgentPipeline(BaseReasoning): content=Render.collapsible( header=header, content=Render.table("\n".join(content)), + open=False, + ), + ) + + def format_info_panel_planner(self, planner_output): + planner_output = planner_output.replace("\n", "
") + return Document( + channel="info", + content=Render.collapsible( + header="Planner Output", + content=planner_output, open=True, ), ) @@ -285,12 +303,19 @@ class RewooAgentPipeline(BaseReasoning): # line starts with #Plan should be marked as a new segment new_segment = [line] segments.append(new_segment) + elif line.startswith("#Action"): + # small fix for markdown output + line = "\\" + line + "
" + segments[-1].append(line) elif line.startswith("#"): # stop markdown from rendering big headers line = "\\" + line segments[-1].append(line) else: - segments[-1].append(line) + if segments: + segments[-1].append(line) + else: + segments.append([line]) outputs = [] for segment in segments: @@ -337,18 +362,23 @@ class RewooAgentPipeline(BaseReasoning): for item in output_stream: if item.intermediate_steps: for step in item.intermediate_steps: - yield Document( - channel="info", - content=self.format_info_panel(step["worker_log"]), - ) + if "planner_log" in step: + yield Document( + channel="info", + content=self.format_info_panel_planner(step["planner_log"]), + ) + else: + yield Document( + channel="info", + content=self.format_info_panel_evidence(step["worker_log"]), + ) if item.text: + # final answer yield Document(channel="chat", content=item.text) answer = output_stream.value yield Document(channel="info", content=None) - refined_citations = self.prepare_citation(answer) - for _ in refined_citations: - yield _ + yield from self.prepare_citation(answer) return answer @@ -360,6 +390,8 @@ class RewooAgentPipeline(BaseReasoning): prefix = f"reasoning.options.{_id}" pipeline = RewooAgentPipeline(retrievers=retrievers) + max_context_length_setting = settings.get("reasoning.max_context_length", None) + planner_llm_name = settings[f"{prefix}.planner_llm"] planner_llm = llms.get(planner_llm_name, llms.get_default()) solver_llm_name = settings[f"{prefix}.solver_llm"] @@ -367,6 +399,10 @@ class RewooAgentPipeline(BaseReasoning): pipeline.agent.planner_llm = planner_llm pipeline.agent.solver_llm = solver_llm + if max_context_length_setting: + pipeline.agent.max_context_length = ( + max_context_length_setting // DEFAULT_AGENT_STEPS + ) tools = [] for tool_name in settings[f"{prefix}.tools"]: @@ -377,7 +413,7 @@ class RewooAgentPipeline(BaseReasoning): tool.llm = solver_llm tools.append(tool) pipeline.agent.plugins = tools - pipeline.agent.output_lang = {"en": "English", "ja": "Japanese"}.get( + pipeline.agent.output_lang = SUPPORTED_LANGUAGE_MAP.get( settings["reasoning.lang"], "English" ) pipeline.agent.prompt_template["Planner"] = PromptTemplate( @@ -413,6 +449,7 @@ class RewooAgentPipeline(BaseReasoning): "value": llm, "component": "dropdown", "choices": llm_choices, + "special_type": "llm", "info": ( "The language model to use for planning. " "This model will generate a plan based on the " @@ -424,6 +461,7 @@ class RewooAgentPipeline(BaseReasoning): "value": llm, "component": "dropdown", "choices": llm_choices, + "special_type": "llm", "info": ( "The language model to use for solving. " "This model will generate the answer based on the " @@ -457,6 +495,10 @@ class RewooAgentPipeline(BaseReasoning): "id": "ReWOO", "name": "ReWOO Agent", "description": ( - "Implementing ReWOO paradigm " "https://arxiv.org/pdf/2305.18323.pdf" + "Implementing ReWOO paradigm: https://arxiv.org/abs/2305.18323. " + "The ReWOO agent makes a step by step plan in the first stage, " + "then solves each step in the second stage. The agent can use " + "external tools to help in the reasoning process. Once all stages " + "are completed, the agent will summarize the answer." ), } diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py index 5118b8f..8244690 100644 --- a/libs/ktem/ktem/reasoning/simple.py +++ b/libs/ktem/ktem/reasoning/simple.py @@ -1,14 +1,20 @@ -import asyncio import html import logging -import re +import threading from collections import defaultdict +from difflib import SequenceMatcher from functools import partial from typing import Generator +import numpy as np import tiktoken from ktem.llms.manager import llms +from ktem.reasoning.prompt_optimization import ( + DecomposeQuestionPipeline, + RewriteQuestionPipeline, +) from ktem.utils.render import Render +from theflow.settings import settings as flowsettings from kotaemon.base import ( AIMessage, @@ -22,8 +28,8 @@ from kotaemon.base import ( from kotaemon.indices.qa.citation import CitationPipeline from kotaemon.indices.splitters import TokenSplitter from kotaemon.llms import ChatLLM, PromptTemplate -from kotaemon.loaders.utils.gpt4v import generate_gpt4v, stream_gpt4v +from ..utils import SUPPORTED_LANGUAGE_MAP from .base import BaseReasoning logger = logging.getLogger(__name__) @@ -32,6 +38,22 @@ EVIDENCE_MODE_TEXT = 0 EVIDENCE_MODE_TABLE = 1 EVIDENCE_MODE_CHATBOT = 2 EVIDENCE_MODE_FIGURE = 3 +MAX_IMAGES = 10 + + +def find_text(search_span, context): + sentence_list = search_span.split("\n") + matches = [] + # don't search for small text + if len(search_span) > 5: + for sentence in sentence_list: + match = SequenceMatcher( + None, sentence, context, autojunk=False + ).find_longest_match() + if match.size > len(sentence) * 0.35: + matches.append((match.b, match.b + match.size)) + + return matches class PrepareEvidencePipeline(BaseComponent): @@ -44,21 +66,29 @@ class PrepareEvidencePipeline(BaseComponent): chunk of text into smaller ones. The first one will be retained. """ - trim_func: TokenSplitter = TokenSplitter.withx( - chunk_size=3000, - chunk_overlap=0, - separator=" ", - tokenizer=partial( - tiktoken.encoding_for_model("gpt-3.5-turbo").encode, - allowed_special=set(), - disallowed_special="all", - ), - ) + max_context_length: int = 32000 + trim_func: TokenSplitter | None = None def run(self, docs: list[RetrievedDocument]) -> Document: evidence = "" + images = [] table_found = 0 - evidence_mode = EVIDENCE_MODE_TEXT + evidence_modes = [] + + evidence_trim_func = ( + self.trim_func + if self.trim_func + else TokenSplitter( + chunk_size=self.max_context_length, + chunk_overlap=0, + separator=" ", + tokenizer=partial( + tiktoken.encoding_for_model("gpt-3.5-turbo").encode, + allowed_special=set(), + disallowed_special="all", + ), + ) + ) for _id, retrieved_item in enumerate(docs): retrieved_content = "" @@ -67,9 +97,11 @@ class PrepareEvidencePipeline(BaseComponent): if page: source += f" (Page {page})" if retrieved_item.metadata.get("type", "") == "table": - evidence_mode = EVIDENCE_MODE_TABLE + evidence_modes.append(EVIDENCE_MODE_TABLE) if table_found < 5: - retrieved_content = retrieved_item.metadata.get("table_origin", "") + retrieved_content = retrieved_item.metadata.get( + "table_origin", retrieved_item.text + ) if retrieved_content not in evidence: table_found += 1 evidence += ( @@ -78,7 +110,7 @@ class PrepareEvidencePipeline(BaseComponent): + "\n
" ) elif retrieved_item.metadata.get("type", "") == "chatbot": - evidence_mode = EVIDENCE_MODE_CHATBOT + evidence_modes.append(EVIDENCE_MODE_CHATBOT) retrieved_content = retrieved_item.metadata["window"] evidence += ( f"
Chatbot scenario from {filename} (Row {page})\n" @@ -86,15 +118,16 @@ class PrepareEvidencePipeline(BaseComponent): + "\n
" ) elif retrieved_item.metadata.get("type", "") == "image": - evidence_mode = EVIDENCE_MODE_FIGURE + evidence_modes.append(EVIDENCE_MODE_FIGURE) retrieved_content = retrieved_item.metadata.get("image_origin", "") retrieved_caption = html.escape(retrieved_item.get_content()) evidence += ( f"
Figure from {source}\n" - + f"" + "\n
" ) + images.append(retrieved_content) else: if "window" in retrieved_item.metadata: retrieved_content = retrieved_item.metadata["window"] @@ -108,27 +141,27 @@ class PrepareEvidencePipeline(BaseComponent): + " \n
" ) - print("Retrieved #{}: {}".format(_id, retrieved_content)) - print(retrieved_item.metadata) - print("Score", retrieved_item.metadata.get("relevance_score", None)) + # resolve evidence mode + evidence_mode = EVIDENCE_MODE_TEXT + if EVIDENCE_MODE_FIGURE in evidence_modes: + evidence_mode = EVIDENCE_MODE_FIGURE + elif EVIDENCE_MODE_TABLE in evidence_modes: + evidence_mode = EVIDENCE_MODE_TABLE - if evidence_mode != EVIDENCE_MODE_FIGURE: - # trim context by trim_len - print("len (original)", len(evidence)) - if evidence: - texts = self.trim_func([Document(text=evidence)]) - evidence = texts[0].text - print("len (trimmed)", len(evidence)) + # trim context by trim_len + print("len (original)", len(evidence)) + if evidence: + texts = evidence_trim_func([Document(text=evidence)]) + evidence = texts[0].text + print("len (trimmed)", len(evidence)) - print(f"PrepareEvidence with input {docs}\nOutput: {evidence}\n") - - return Document(content=(evidence_mode, evidence)) + return Document(content=(evidence_mode, evidence, images)) DEFAULT_QA_TEXT_PROMPT = ( - "Use the following pieces of context to answer the question at the end. " + "Use the following pieces of context to answer the question at the end in detail with clear explanation. " # noqa: E501 "If you don't know the answer, just say that you don't know, don't try to " - "make up an answer. Keep the answer as concise as possible. Give answer in " + "make up an answer. Give answer in " "{lang}.\n\n" "{context}\n" "Question: {question}\n" @@ -136,15 +169,15 @@ DEFAULT_QA_TEXT_PROMPT = ( ) DEFAULT_QA_TABLE_PROMPT = ( - "List all rows (row number) from the table context that related to the question, " - "then provide detail answer with clear explanation and citations. " + "Use the given context: texts, tables, and figures below to answer the question, " + "then provide answer with clear explanation." "If you don't know the answer, just say that you don't know, " "don't try to make up an answer. Give answer in {lang}.\n\n" "Context:\n" "{context}\n" "Question: {question}\n" "Helpful Answer:" -) +) # noqa DEFAULT_QA_CHATBOT_PROMPT = ( "Pick the most suitable chatbot scenarios to answer the question at the end, " @@ -155,7 +188,7 @@ DEFAULT_QA_CHATBOT_PROMPT = ( "{context}\n" "Question: {question}\n" "Answer:" -) +) # noqa DEFAULT_QA_FIGURE_PROMPT = ( "Use the given context: texts, tables, and figures below to answer the question. " @@ -165,7 +198,7 @@ DEFAULT_QA_FIGURE_PROMPT = ( "{context}\n" "Question: {question}\n" "Answer: " -) +) # noqa DEFAULT_REWRITE_PROMPT = ( "Given the following question, rephrase and expand it " @@ -174,7 +207,9 @@ DEFAULT_REWRITE_PROMPT = ( "Give answer in {lang}\n" "Original question: {question}\n" "Rephrased question: " -) +) # noqa + +CONTEXT_RELEVANT_WARNING_SCORE = 0.7 class AnswerWithContextPipeline(BaseComponent): @@ -193,7 +228,8 @@ class AnswerWithContextPipeline(BaseComponent): """ llm: ChatLLM = Node(default_callback=lambda _: llms.get_default()) - vlm_endpoint: str = "" + vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "") + use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True) citation_pipeline: CitationPipeline = Node( default_callback=lambda _: CitationPipeline(llm=llms.get_default()) ) @@ -210,33 +246,25 @@ class AnswerWithContextPipeline(BaseComponent): def get_prompt(self, question, evidence, evidence_mode: int): """Prepare the prompt and other information for LLM""" - images = [] - if evidence_mode == EVIDENCE_MODE_TEXT: prompt_template = PromptTemplate(self.qa_template) elif evidence_mode == EVIDENCE_MODE_TABLE: prompt_template = PromptTemplate(self.qa_table_template) elif evidence_mode == EVIDENCE_MODE_FIGURE: - prompt_template = PromptTemplate(self.qa_figure_template) + if self.use_multimodal: + prompt_template = PromptTemplate(self.qa_figure_template) + else: + prompt_template = PromptTemplate(self.qa_template) else: prompt_template = PromptTemplate(self.qa_chatbot_template) - if evidence_mode == EVIDENCE_MODE_FIGURE: - # isolate image from evidence - evidence, images = self.extract_evidence_images(evidence) - prompt = prompt_template.populate( - context=evidence, - question=question, - lang=self.lang, - ) - else: - prompt = prompt_template.populate( - context=evidence, - question=question, - lang=self.lang, - ) + prompt = prompt_template.populate( + context=evidence, + question=question, + lang=self.lang, + ) - return prompt, images + return prompt, evidence def run( self, question: str, evidence: str, evidence_mode: int = 0, **kwargs @@ -244,37 +272,22 @@ class AnswerWithContextPipeline(BaseComponent): return self.invoke(question, evidence, evidence_mode, **kwargs) def invoke( - self, question: str, evidence: str, evidence_mode: int = 0, **kwargs + self, + question: str, + evidence: str, + evidence_mode: int = 0, + images: list[str] = [], + **kwargs, ) -> Document: - history = kwargs.get("history", []) - prompt, images = self.get_prompt(question, evidence, evidence_mode) - - output = "" - if evidence_mode == EVIDENCE_MODE_FIGURE: - output = generate_gpt4v(self.vlm_endpoint, images, prompt, max_tokens=768) - else: - messages = [] - if self.system_prompt: - messages.append(SystemMessage(content=self.system_prompt)) - for human, ai in history[-self.n_last_interactions :]: - messages.append(HumanMessage(content=human)) - messages.append(AIMessage(content=ai)) - messages.append(HumanMessage(content=prompt)) - output = self.llm(messages).text - - # retrieve the citation - citation = None - if evidence and self.enable_citation: - citation = self.citation_pipeline.invoke( - context=evidence, question=question - ) - - answer = Document(text=output, metadata={"citation": citation}) - - return answer + raise NotImplementedError async def ainvoke( # type: ignore - self, question: str, evidence: str, evidence_mode: int = 0, **kwargs + self, + question: str, + evidence: str, + evidence_mode: int = 0, + images: list[str] = [], + **kwargs, ) -> Document: """Answer the question based on the evidence @@ -298,127 +311,94 @@ class AnswerWithContextPipeline(BaseComponent): (determined by retrieval pipeline) evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot """ - history = kwargs.get("history", []) - prompt, images = self.get_prompt(question, evidence, evidence_mode) - - citation_task = None - if evidence and self.enable_citation: - citation_task = asyncio.create_task( - self.citation_pipeline.ainvoke(context=evidence, question=question) - ) - print("Citation task created") - - output = "" - if evidence_mode == EVIDENCE_MODE_FIGURE: - for text in stream_gpt4v(self.vlm_endpoint, images, prompt, max_tokens=768): - output += text - self.report_output(Document(channel="chat", content=text)) - await asyncio.sleep(0) - else: - messages = [] - if self.system_prompt: - messages.append(SystemMessage(content=self.system_prompt)) - for human, ai in history[-self.n_last_interactions :]: - messages.append(HumanMessage(content=human)) - messages.append(AIMessage(content=ai)) - messages.append(HumanMessage(content=prompt)) - - try: - # try streaming first - print("Trying LLM streaming") - for text in self.llm.stream(messages): - output += text.text - self.report_output(Document(content=text.text, channel="chat")) - await asyncio.sleep(0) - except NotImplementedError: - print("Streaming is not supported, falling back to normal processing") - output = self.llm(messages).text - self.report_output(Document(content=output, channel="chat")) - - # retrieve the citation - print("Waiting for citation task") - if citation_task is not None: - citation = await citation_task - else: - citation = None - - answer = Document(text=output, metadata={"citation": citation}) - - return answer + raise NotImplementedError def stream( # type: ignore - self, question: str, evidence: str, evidence_mode: int = 0, **kwargs + self, + question: str, + evidence: str, + evidence_mode: int = 0, + images: list[str] = [], + **kwargs, ) -> Generator[Document, None, Document]: history = kwargs.get("history", []) - prompt, images = self.get_prompt(question, evidence, evidence_mode) - - output = "" - if evidence_mode == EVIDENCE_MODE_FIGURE: - for text in stream_gpt4v(self.vlm_endpoint, images, prompt, max_tokens=768): - output += text - yield Document(channel="chat", content=text) + print(f"Got {len(images)} images") + # check if evidence exists, use QA prompt + if evidence: + prompt, evidence = self.get_prompt(question, evidence, evidence_mode) else: - messages = [] - if self.system_prompt: - messages.append(SystemMessage(content=self.system_prompt)) - for human, ai in history[-self.n_last_interactions :]: - messages.append(HumanMessage(content=human)) - messages.append(AIMessage(content=ai)) - messages.append(HumanMessage(content=prompt)) - - try: - # try streaming first - print("Trying LLM streaming") - for text in self.llm.stream(messages): - output += text.text - yield Document(channel="chat", content=text.text) - except NotImplementedError: - print("Streaming is not supported, falling back to normal processing") - output = self.llm(messages).text - yield Document(channel="chat", content=output) + prompt = question # retrieve the citation citation = None - if evidence and self.enable_citation: - citation = self.citation_pipeline.invoke( - context=evidence, question=question - ) - answer = Document(text=output, metadata={"citation": citation}) + def citation_call(): + nonlocal citation + citation = self.citation_pipeline(context=evidence, question=question) + + if evidence and self.enable_citation: + # execute function call in thread + citation_thread = threading.Thread(target=citation_call) + citation_thread.start() + else: + citation_thread = None + + output = "" + logprobs = [] + + messages = [] + if self.system_prompt: + messages.append(SystemMessage(content=self.system_prompt)) + for human, ai in history[-self.n_last_interactions :]: + messages.append(HumanMessage(content=human)) + messages.append(AIMessage(content=ai)) + + if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE: + # create image message: + messages.append( + HumanMessage( + content=[ + {"type": "text", "text": prompt}, + ] + + [ + { + "type": "image_url", + "image_url": {"url": image}, + } + for image in images[:MAX_IMAGES] + ], + ) + ) + else: + # append main prompt + messages.append(HumanMessage(content=prompt)) + + try: + # try streaming first + print("Trying LLM streaming") + for out_msg in self.llm.stream(messages): + output += out_msg.text + logprobs += out_msg.logprobs + yield Document(channel="chat", content=out_msg.text) + except NotImplementedError: + print("Streaming is not supported, falling back to normal processing") + output = self.llm(messages).text + yield Document(channel="chat", content=output) + + if logprobs: + qa_score = np.exp(np.average(logprobs)) + else: + qa_score = None + + if citation_thread: + citation_thread.join() + answer = Document( + text=output, + metadata={"citation": citation, "qa_score": qa_score}, + ) return answer - def extract_evidence_images(self, evidence: str): - """Util function to extract and isolate images from context/evidence""" - image_pattern = r"src='(data:image\/[^;]+;base64[^']+)'" - matches = re.findall(image_pattern, evidence) - context = re.sub(image_pattern, "", evidence) - return context, matches - - -class RewriteQuestionPipeline(BaseComponent): - """Rewrite user question - - Args: - llm: the language model to rewrite question - rewrite_template: the prompt template for llm to paraphrase a text input - lang: the language of the answer. Currently support English and Japanese - """ - - llm: ChatLLM = Node(default_callback=lambda _: llms.get_default()) - rewrite_template: str = DEFAULT_REWRITE_PROMPT - - lang: str = "English" - - def run(self, question: str) -> Document: # type: ignore - prompt_template = PromptTemplate(self.rewrite_template) - prompt = prompt_template.populate(question=question, lang=self.lang) - messages = [ - SystemMessage(content="You are a helpful assistant"), - HumanMessage(content=prompt), - ] - return self.llm(messages) - class AddQueryContextPipeline(BaseComponent): @@ -470,50 +450,72 @@ class FullQAPipeline(BaseReasoning): class Config: allow_extra = True + # configuration parameters + trigger_context: int = 150 + use_rewrite: bool = False + retrievers: list[BaseComponent] evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx() answering_pipeline: AnswerWithContextPipeline = AnswerWithContextPipeline.withx() - rewrite_pipeline: RewriteQuestionPipeline = RewriteQuestionPipeline.withx() + rewrite_pipeline: RewriteQuestionPipeline | None = None add_query_context: AddQueryContextPipeline = AddQueryContextPipeline.withx() - trigger_context: int = 150 - use_rewrite: bool = False def retrieve( self, message: str, history: list ) -> tuple[list[RetrievedDocument], list[Document]]: """Retrieve the documents based on the message""" - if len(message) < self.trigger_context: - # prefer adding context for short user questions, avoid adding context for - # long questions, as they are likely to contain enough information - # plus, avoid the situation where the original message is already too long - # for the model to handle - query = self.add_query_context(message, history).content - else: - query = message - print(f"Rewritten query: {query}") + # if len(message) < self.trigger_context: + # # prefer adding context for short user questions, avoid adding context for + # # long questions, as they are likely to contain enough information + # # plus, avoid the situation where the original message is already too long + # # for the model to handle + # query = self.add_query_context(message, history).content + # else: + # query = message + # print(f"Rewritten query: {query}") + query = None if not query: - return [], [] + # TODO: previously return [], [] because we think this message as something + # like "Hello", "I need help"... + query = message docs, doc_ids = [], [] - for retriever in self.retrievers: - for doc in retriever(text=query): + plot_docs = [] + + for idx, retriever in enumerate(self.retrievers): + retriever_node = self._prepare_child(retriever, f"retriever_{idx}") + retriever_docs = retriever_node(text=query) + + retriever_docs_text = [] + retriever_docs_plot = [] + + for doc in retriever_docs: + if doc.metadata.get("type", "") == "plot": + retriever_docs_plot.append(doc) + else: + retriever_docs_text.append(doc) + + for doc in retriever_docs_text: if doc.doc_id not in doc_ids: docs.append(doc) doc_ids.append(doc.doc_id) - info = [] - for doc in docs: - info.append( - Document( - channel="info", - content=Render.collapsible( - header=doc.metadata["file_name"], - content=Render.table(doc.text), - open=True, - ), - ) + plot_docs.extend(retriever_docs_plot) + + info = [ + Document( + channel="info", + content=Render.collapsible_with_header(doc, open_collapsible=True), ) + for doc in docs + ] + [ + Document( + channel="plot", + content=doc.metadata.get("data", ""), + ) + for doc in plot_docs + ] return docs, info @@ -521,153 +523,172 @@ class FullQAPipeline(BaseReasoning): """Prepare the citations to show on the UI""" with_citation, without_citation = [], [] spans = defaultdict(list) + has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs) - if answer.metadata["citation"] is not None: + if answer.metadata["citation"] and answer.metadata["citation"].answer: for fact_with_evidence in answer.metadata["citation"].answer: for quote in fact_with_evidence.substring_quote: + matched_excerpts = [] for doc in docs: - start_idx = doc.text.find(quote) - if start_idx == -1: - continue + matches = find_text(quote, doc.text) - end_idx = start_idx + len(quote) - - current_idx = start_idx - if "|" not in doc.text[start_idx:end_idx]: - spans[doc.doc_id].append( - {"start": start_idx, "end": end_idx} - ) - else: - while doc.text[current_idx:end_idx].find("|") != -1: - match_idx = doc.text[current_idx:end_idx].find("|") + for start, end in matches: + if "|" not in doc.text[start:end]: spans[doc.doc_id].append( { - "start": current_idx, - "end": current_idx + match_idx, + "start": start, + "end": end, } ) - current_idx += match_idx + 2 - if current_idx > end_idx: - break - break + matched_excerpts.append(doc.text[start:end]) + + print("Matched citation:", quote, matched_excerpts), id2docs = {doc.doc_id: doc for doc in docs} not_detected = set(id2docs.keys()) - set(spans.keys()) - for id, ss in spans.items(): + + # render highlight spans + for _id, ss in spans.items(): if not ss: - not_detected.add(id) + not_detected.add(_id) continue + cur_doc = id2docs[_id] + highlight_text = "" + ss = sorted(ss, key=lambda x: x["start"]) - text = id2docs[id].text[: ss[0]["start"]] + text = cur_doc.text[: ss[0]["start"]] for idx, span in enumerate(ss): - text += Render.highlight(id2docs[id].text[span["start"] : span["end"]]) + to_highlight = cur_doc.text[span["start"] : span["end"]] + if len(to_highlight) > len(highlight_text): + highlight_text = to_highlight + text += Render.highlight(to_highlight) if idx < len(ss) - 1: - text += id2docs[id].text[span["end"] : ss[idx + 1]["start"]] - text += id2docs[id].text[ss[-1]["end"] :] + text += cur_doc.text[span["end"] : ss[idx + 1]["start"]] + text += cur_doc.text[ss[-1]["end"] :] + # add to display list with_citation.append( Document( channel="info", - content=Render.collapsible( - header=id2docs[id].metadata["file_name"], - content=Render.table(text), - open=True, + content=Render.collapsible_with_header_score( + cur_doc, + override_text=text, + highlight_text=highlight_text, + open_collapsible=True, ), ) ) - without_citation = [ - Document( - channel="info", - content=Render.collapsible( - header=id2docs[id].metadata["file_name"], - content=Render.table(id2docs[id].text), - open=False, - ), - ) - for id in list(not_detected) - ] + print("Got {} cited docs".format(len(with_citation))) + sorted_not_detected_items_with_scores = [ + (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0)) + for id_ in not_detected + ] + sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True) + + for id_, _ in sorted_not_detected_items_with_scores: + doc = id2docs[id_] + doc_score = doc.metadata.get("llm_trulens_score", 0.0) + is_open = not has_llm_score or ( + doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0 + ) + without_citation.append( + Document( + channel="info", + content=Render.collapsible_with_header_score( + doc, open_collapsible=is_open + ), + ) + ) return with_citation, without_citation + def show_citations(self, answer, docs): + # show the evidence + with_citation, without_citation = self.prepare_citations(answer, docs) + if not with_citation and not without_citation: + yield Document(channel="info", content="
No evidence found.
") + else: + # clear the Info panel + max_llm_rerank_score = max( + doc.metadata.get("llm_trulens_score", 0.0) for doc in docs + ) + has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs) + # clear previous info + yield Document(channel="info", content=None) + + # yield warning message + if has_llm_score and max_llm_rerank_score < CONTEXT_RELEVANT_WARNING_SCORE: + yield Document( + channel="info", + content=( + "
WARNING! Context relevance score is low. " + "Double check the model answer for correctness.
" + ), + ) + + # show QA score + qa_score = ( + round(answer.metadata["qa_score"], 2) + if answer.metadata.get("qa_score") + else None + ) + if qa_score: + yield Document( + channel="info", + content=f"
Answer confidence: {qa_score}
", + ) + + yield from with_citation + if without_citation: + yield from without_citation + async def ainvoke( # type: ignore self, message: str, conv_id: str, history: list, **kwargs # type: ignore ) -> Document: # type: ignore - if self.use_rewrite: - rewrite = await self.rewrite_pipeline(question=message) - message = rewrite.text - - docs, infos = self.retrieve(message, history) - for _ in infos: - self.report_output(_) - await asyncio.sleep(0.1) - - evidence_mode, evidence = self.evidence_pipeline(docs).content - answer = await self.answering_pipeline( - question=message, - history=history, - evidence=evidence, - evidence_mode=evidence_mode, - conv_id=conv_id, - **kwargs, - ) - - # show the evidence - with_citation, without_citation = self.prepare_citations(answer, docs) - if not with_citation and not without_citation: - self.report_output(Document(channel="info", content="No evidence found.\n")) - else: - self.report_output(Document(channel="info", content=None)) - for _ in with_citation: - self.report_output(_) - if without_citation: - self.report_output( - Document( - channel="info", - content="Retrieved segments without matching evidence:\n", - ) - ) - for _ in without_citation: - self.report_output(_) - - self.report_output(None) - return answer + raise NotImplementedError def stream( # type: ignore self, message: str, conv_id: str, history: list, **kwargs # type: ignore ) -> Generator[Document, None, Document]: - if self.use_rewrite: + if self.use_rewrite and self.rewrite_pipeline: + print("Chosen rewrite pipeline", self.rewrite_pipeline) message = self.rewrite_pipeline(question=message).text + print("Rewrite result", message) + print(f"Retrievers {self.retrievers}") # should populate the context docs, infos = self.retrieve(message, history) - for _ in infos: - yield _ + print(f"Got {len(docs)} retrieved documents") + yield from infos + + evidence_mode, evidence, images = self.evidence_pipeline(docs).content + + def generate_relevant_scores(): + nonlocal docs + docs = self.retrievers[0].generate_relevant_scores(message, docs) + + # generate relevant score using + if evidence and self.retrievers: + scoring_thread = threading.Thread(target=generate_relevant_scores) + scoring_thread.start() + else: + scoring_thread = None - evidence_mode, evidence = self.evidence_pipeline(docs).content answer = yield from self.answering_pipeline.stream( question=message, history=history, evidence=evidence, evidence_mode=evidence_mode, + images=images, conv_id=conv_id, **kwargs, ) # show the evidence - with_citation, without_citation = self.prepare_citations(answer, docs) - if not with_citation and not without_citation: - yield Document(channel="info", content="No evidence found.\n") - else: - yield Document(channel="info", content=None) - for _ in with_citation: - yield _ - if without_citation: - yield Document( - channel="info", - content="Retrieved segments without matching evidence:\n", - ) - for _ in without_citation: - yield _ + if scoring_thread: + scoring_thread.join() + + yield from self.show_citations(answer, docs) return answer @@ -679,12 +700,21 @@ class FullQAPipeline(BaseReasoning): settings: the settings for the pipeline retrievers: the retrievers to use """ - prefix = f"reasoning.options.{cls.get_info()['id']}" - pipeline = cls(retrievers=retrievers) + max_context_length_setting = settings.get("reasoning.max_context_length", 32000) + pipeline = cls( + retrievers=retrievers, + rewrite_pipeline=RewriteQuestionPipeline(), + ) + + prefix = f"reasoning.options.{cls.get_info()['id']}" llm_name = settings.get(f"{prefix}.llm", None) llm = llms.get(llm_name, llms.get_default()) + # prepare evidence pipeline configuration + evidence_pipeline = pipeline.evidence_pipeline + evidence_pipeline.max_context_length = max_context_length_setting + # answering pipeline configuration answer_pipeline = pipeline.answering_pipeline answer_pipeline.llm = llm @@ -693,7 +723,7 @@ class FullQAPipeline(BaseReasoning): answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"] answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"] answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"] - answer_pipeline.lang = {"en": "English", "ja": "Japanese"}.get( + answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get( settings["reasoning.lang"], "English" ) @@ -704,10 +734,11 @@ class FullQAPipeline(BaseReasoning): pipeline.trigger_context = settings[f"{prefix}.trigger_context"] pipeline.use_rewrite = states.get("app", {}).get("regen", False) - pipeline.rewrite_pipeline.llm = llm - pipeline.rewrite_pipeline.lang = {"en": "English", "ja": "Japanese"}.get( - settings["reasoning.lang"], "English" - ) + if pipeline.rewrite_pipeline: + pipeline.rewrite_pipeline.llm = llm + pipeline.rewrite_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get( + settings["reasoning.lang"], "English" + ) return pipeline @classmethod @@ -727,6 +758,7 @@ class FullQAPipeline(BaseReasoning): "value": llm, "component": "dropdown", "choices": choices, + "special_type": "llm", "info": ( "The language model to use for generating the answer. If None, " "the application default language model will be used." @@ -734,7 +766,7 @@ class FullQAPipeline(BaseReasoning): }, "highlight_citation": { "name": "Highlight Citation", - "value": False, + "value": True, "component": "checkbox", }, "system_prompt": { @@ -767,5 +799,163 @@ class FullQAPipeline(BaseReasoning): return { "id": "simple", "name": "Simple QA", - "description": "Simple QA pipeline", + "description": ( + "Simple RAG-based question answering pipeline. This pipeline can " + "perform both keyword search and similarity search to retrieve the " + "context. After that it includes that context to generate the answer." + ), + } + + +class FullDecomposeQAPipeline(FullQAPipeline): + def answer_sub_questions( + self, messages: list, conv_id: str, history: list, **kwargs + ): + output_str = "" + for idx, message in enumerate(messages): + yield Document( + channel="chat", + content=f"
Sub-question {idx + 1}" + f"
{message}
Answer
", + ) + # should populate the context + docs, infos = self.retrieve(message, history) + print(f"Got {len(docs)} retrieved documents") + + yield from infos + + evidence_mode, evidence, images = self.evidence_pipeline(docs).content + answer = yield from self.answering_pipeline.stream( + question=message, + history=history, + evidence=evidence, + evidence_mode=evidence_mode, + images=images, + conv_id=conv_id, + **kwargs, + ) + + output_str += ( + f"Sub-question {idx + 1}-th: '{message}'\nAnswer: '{answer.text}'\n\n" + ) + + return output_str + + def stream( # type: ignore + self, message: str, conv_id: str, history: list, **kwargs # type: ignore + ) -> Generator[Document, None, Document]: + sub_question_answer_output = "" + if self.rewrite_pipeline: + print("Chosen rewrite pipeline", self.rewrite_pipeline) + result = self.rewrite_pipeline(question=message) + print("Rewrite result", result) + if isinstance(result, Document): + message = result.text + elif ( + isinstance(result, list) + and len(result) > 0 + and isinstance(result[0], Document) + ): + yield Document( + channel="chat", + content="

Sub questions and their answers

", + ) + sub_question_answer_output = yield from self.answer_sub_questions( + [r.text for r in result], conv_id, history, **kwargs + ) + + yield Document( + channel="chat", + content=f"

Main question

{message}
Answer
", + ) + + # should populate the context + docs, infos = self.retrieve(message, history) + print(f"Got {len(docs)} retrieved documents") + yield from infos + + evidence_mode, evidence, images = self.evidence_pipeline(docs).content + answer = yield from self.answering_pipeline.stream( + question=message, + history=history, + evidence=evidence + "\n" + sub_question_answer_output, + evidence_mode=evidence_mode, + images=images, + conv_id=conv_id, + **kwargs, + ) + + # show the evidence + with_citation, without_citation = self.prepare_citations(answer, docs) + if not with_citation and not without_citation: + yield Document(channel="info", content="
No evidence found.
") + else: + yield Document(channel="info", content=None) + yield from with_citation + yield from without_citation + + return answer + + @classmethod + def get_user_settings(cls) -> dict: + user_settings = super().get_user_settings() + user_settings["decompose_prompt"] = { + "name": "Decompose Prompt", + "value": DecomposeQuestionPipeline.DECOMPOSE_SYSTEM_PROMPT_TEMPLATE, + } + return user_settings + + @classmethod + def get_pipeline(cls, settings, states, retrievers): + """Get the reasoning pipeline + + Args: + settings: the settings for the pipeline + retrievers: the retrievers to use + """ + prefix = f"reasoning.options.{cls.get_info()['id']}" + pipeline = cls( + retrievers=retrievers, + rewrite_pipeline=DecomposeQuestionPipeline( + prompt_template=settings.get(f"{prefix}.decompose_prompt") + ), + ) + + llm_name = settings.get(f"{prefix}.llm", None) + llm = llms.get(llm_name, llms.get_default()) + + # answering pipeline configuration + answer_pipeline = pipeline.answering_pipeline + answer_pipeline.llm = llm + answer_pipeline.citation_pipeline.llm = llm + answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"] + answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"] + answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"] + answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"] + answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get( + settings["reasoning.lang"], "English" + ) + + pipeline.add_query_context.llm = llm + pipeline.add_query_context.n_last_interactions = settings[ + f"{prefix}.n_last_interactions" + ] + + pipeline.trigger_context = settings[f"{prefix}.trigger_context"] + pipeline.use_rewrite = states.get("app", {}).get("regen", False) + if pipeline.rewrite_pipeline: + pipeline.rewrite_pipeline.llm = llm + return pipeline + + @classmethod + def get_info(cls) -> dict: + return { + "id": "complex", + "name": "Complex QA", + "description": ( + "Use multi-step reasoning to decompose a complex question into " + "multiple sub-questions. This pipeline can " + "perform both keyword search and similarity search to retrieve the " + "context. After that it includes that context to generate the answer." + ), } diff --git a/libs/ktem/ktem/settings.py b/libs/ktem/ktem/settings.py index dec1d4c..89f5c65 100644 --- a/libs/ktem/ktem/settings.py +++ b/libs/ktem/ktem/settings.py @@ -19,6 +19,7 @@ class SettingItem(BaseModel): choices: list = Field(default_factory=list) metadata: dict = Field(default_factory=dict) component: str = "text" + special_type: str = "" class BaseSettingGroup(BaseModel): @@ -55,6 +56,9 @@ class BaseSettingGroup(BaseModel): option = self.options[option_id] return option.get_setting_item(sub_path) + def __bool__(self): + return bool(self.settings) or bool(self.options) + class SettingReasoningGroup(BaseSettingGroup): def _get_options(self) -> dict: diff --git a/libs/ktem/ktem/utils/__init__.py b/libs/ktem/ktem/utils/__init__.py index e69de29..009c60a 100644 --- a/libs/ktem/ktem/utils/__init__.py +++ b/libs/ktem/ktem/utils/__init__.py @@ -0,0 +1,3 @@ +from .lang import SUPPORTED_LANGUAGE_MAP + +__all__ = ["SUPPORTED_LANGUAGE_MAP"] diff --git a/libs/ktem/ktem/utils/conversation.py b/libs/ktem/ktem/utils/conversation.py new file mode 100644 index 0000000..2550aa7 --- /dev/null +++ b/libs/ktem/ktem/utils/conversation.py @@ -0,0 +1,20 @@ +def sync_retrieval_n_message( + messages: list[list[str]], + retrievals: list[str], +) -> list[str]: + """Ensure len of messages history and retrieval history are equal + Empty string/Truncate will be used in case any difference exist + """ + n_message = len(messages) # include previous history + n_retrieval = min(n_message, len(retrievals)) + + diff = n_message - n_retrieval + retrievals = retrievals[:n_retrieval] + ["" for _ in range(diff)] + + assert len(retrievals) == n_message + + return retrievals + + +if __name__ == "__main__": + print(sync_retrieval_n_message([[""], [""], [""]], [])) diff --git a/libs/ktem/ktem/utils/lang.py b/libs/ktem/ktem/utils/lang.py new file mode 100644 index 0000000..af6601d --- /dev/null +++ b/libs/ktem/ktem/utils/lang.py @@ -0,0 +1 @@ +SUPPORTED_LANGUAGE_MAP = {"en": "English", "ja": "Japanese", "vi": "Vietnamese"} diff --git a/libs/ktem/ktem/utils/render.py b/libs/ktem/ktem/utils/render.py index 5890d33..b1695aa 100644 --- a/libs/ktem/ktem/utils/render.py +++ b/libs/ktem/ktem/utils/render.py @@ -1,4 +1,36 @@ +import os.path + import markdown +from fast_langdetect import detect + +from kotaemon.base import RetrievedDocument + + +def is_close(val1, val2, tolerance=1e-9): + return abs(val1 - val2) <= tolerance + + +def replace_mardown_header(text: str) -> str: + textlines = text.splitlines() + newlines = [] + for line in textlines: + if line.startswith("#"): + line = "" + line.replace("#", "") + "" + if line.startswith("=="): + line = "" + newlines.append(line) + + return "\n".join(newlines) + + +def get_header(doc: RetrievedDocument) -> str: + """Get the header for the document""" + header = "" + if "page_label" in doc.metadata: + header += f" [Page {doc.metadata['page_label']}]" + + header += f" {doc.metadata.get('file_name', '')}" + return header.strip() class Render: @@ -13,9 +45,152 @@ class Render: @staticmethod def table(text: str) -> str: """Render table from markdown format into HTML""" + text = replace_mardown_header(text) return markdown.markdown(text, extensions=["markdown.extensions.tables"]) + @staticmethod + def preview( + html_content: str, + doc: RetrievedDocument, + highlight_text: str | None = None, + ) -> str: + text = doc.content + pdf_path = doc.metadata.get("file_path", "") + + if not os.path.isfile(pdf_path): + print(f"pdf-path: {pdf_path} does not exist") + return html_content + + is_pdf = doc.metadata.get("file_type", "") == "application/pdf" + page_idx = int(doc.metadata.get("page_label", 1)) + + if not is_pdf: + print("Document is not pdf") + return html_content + + if page_idx < 0: + print("Fail to extract page number") + return html_content + + if not highlight_text: + try: + lang = detect(text.replace("\n", " "))["lang"] + print("lang", lang) + if lang not in ["ja", "cn"]: + highlight_words = [ + t[:-1] if t.endswith("-") else t for t in text.split("\n") + ] + highlight_text = highlight_words[0] + phrase = "true" + else: + highlight_text = text.replace("\n", "") + phrase = "false" + + print("highlight_text", highlight_text, phrase) + except Exception as e: + print(e) + highlight_text = text + else: + phrase = "true" + + return f""" + {html_content} + + [Preview] + + """ # noqa + @staticmethod def highlight(text: str) -> str: """Highlight text""" return f"{text}" + + @staticmethod + def image(url: str, text: str = "") -> str: + """Render an image""" + img = f'
' + if text: + caption = f"

{text}

" + return f"
{img}{caption}

" + return img + + @staticmethod + def collapsible_with_header( + doc: RetrievedDocument, + open_collapsible: bool = False, + ) -> str: + header = f"{get_header(doc)}" + if doc.metadata.get("type", "") == "image": + doc_content = Render.image(url=doc.metadata["image_origin"], text=doc.text) + else: + doc_content = Render.table(doc.text) + + return Render.collapsible( + header=Render.preview(header, doc), + content=doc_content, + open=open_collapsible, + ) + + @staticmethod + def collapsible_with_header_score( + doc: RetrievedDocument, + override_text: str | None = None, + highlight_text: str | None = None, + open_collapsible: bool = False, + ) -> str: + """Format the retrieval score and the document""" + # score from doc_store (Elasticsearch) + if is_close(doc.score, -1.0): + vectorstore_score = "" + text_search_str = " (full-text search)
" + else: + vectorstore_score = str(round(doc.score, 2)) + text_search_str = "
" + + llm_reranking_score = ( + round(doc.metadata["llm_trulens_score"], 2) + if doc.metadata.get("llm_trulens_score") is not None + else 0.0 + ) + cohere_reranking_score = ( + round(doc.metadata["cohere_reranking_score"], 2) + if doc.metadata.get("cohere_reranking_score") is not None + else 0.0 + ) + item_type_prefix = doc.metadata.get("type", "") + item_type_prefix = item_type_prefix.capitalize() + if item_type_prefix: + item_type_prefix += " from " + + rendered_score = Render.collapsible( + header=f" Relevance score: {llm_reranking_score}", + content="  Vectorstore score:" + f" {vectorstore_score}" + f"{text_search_str}" + "  LLM relevant score:" + f" {llm_reranking_score}
" + "  Reranking score:" + f" {cohere_reranking_score}
", + ) + + text = doc.text if not override_text else override_text + if doc.metadata.get("type", "") == "image": + rendered_doc_content = Render.image( + url=doc.metadata["image_origin"], + text=text, + ) + else: + rendered_doc_content = Render.table(text) + + rendered_header = Render.preview( + f"{item_type_prefix}{get_header(doc)}" + f" [score: {llm_reranking_score}]", + doc, + highlight_text=highlight_text, + ) + + return Render.collapsible( + header=rendered_header, + content=rendered_score + rendered_doc_content, + open=open_collapsible, + ) diff --git a/libs/ktem/pyproject.toml b/libs/ktem/pyproject.toml index eb6ac12..a995495 100644 --- a/libs/ktem/pyproject.toml +++ b/libs/ktem/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "SQLAlchemy>=2.0.29,<3", "sqlmodel>=0.0.16,<0.1", "tiktoken>=0.6.0,<1", - "gradio>=4.26.0,<5", + "gradio>=4.31.0,<5", "markdown>=3.6,<4", ] authors = [ diff --git a/scripts/migrate/__init__.py b/scripts/migrate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/migrate/migrate_chroma_db.py b/scripts/migrate/migrate_chroma_db.py new file mode 100644 index 0000000..96fa920 --- /dev/null +++ b/scripts/migrate/migrate_chroma_db.py @@ -0,0 +1,191 @@ +import uuid + +import chromadb +from ktem.index.models import Index +from sqlalchemy import ( + JSON, + Column, + DateTime, + Integer, + String, + UniqueConstraint, + create_engine, + select, +) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.ext.mutable import MutableDict +from sqlalchemy.orm import Session +from sqlalchemy.sql import func + + +def _init_resource(private: bool = True, id: int = 1): + """Init schemas. Hard-code""" + Base = declarative_base() + + if private: + Source = type( + "Source", + (Base,), + { + "__tablename__": f"index__{id}__source", + "__table_args__": ( + UniqueConstraint("name", "user", name="_name_user_uc"), + ), + "id": Column( + String, + primary_key=True, + default=lambda: str(uuid.uuid4()), + unique=True, + ), + "name": Column(String), + "path": Column(String), + "size": Column(Integer, default=0), + "date_created": Column( + DateTime(timezone=True), server_default=func.now() + ), + "user": Column(Integer, default=1), + "note": Column( + MutableDict.as_mutable(JSON), # type: ignore + default={}, + ), + }, + ) + else: + Source = type( + "Source", + (Base,), + { + "__tablename__": f"index__{id}__source", + "id": Column( + String, + primary_key=True, + default=lambda: str(uuid.uuid4()), + unique=True, + ), + "name": Column(String, unique=True), + "path": Column(String), + "size": Column(Integer, default=0), + "date_created": Column( + DateTime(timezone=True), server_default=func.now() + ), + "user": Column(Integer, default=1), + "note": Column( + MutableDict.as_mutable(JSON), # type: ignore + default={}, + ), + }, + ) + Index = type( + "IndexTable", + (Base,), + { + "__tablename__": f"index__{id}__index", + "id": Column(Integer, primary_key=True, autoincrement=True), + "source_id": Column(String), + "target_id": Column(String), + "relation_type": Column(Integer), + "user": Column(Integer, default=1), + }, + ) + + return {"Source": Source, "Index": Index} + + +def get_chromadb_collection( + db_dir: str = "../ktem_app_data/user_data/vectorstore", + collection_name: str = "index_1", +): + """Extract collection from chromadb""" + client = chromadb.PersistentClient(path=db_dir) + collection = client.get_or_create_collection(collection_name) + + return collection + + +def update_metadata(metadata, file_id): + """Update file_id""" + metadata["file_id"] = file_id + return metadata + + +def migrate_chroma_db( + chroma_db_dir: str, sqlite_path: str, is_private: bool = True, int_index: int = 1 +): + chroma_collection_name = f"index_{int_index}" + + """Update chromadb with metadata.file_id""" + engine = create_engine(sqlite_path) + resource = _init_resource(private=is_private, id=int_index) + print("Load sqlalchemy engine successfully!") + + chroma_db_collection = get_chromadb_collection( + db_dir=chroma_db_dir, collection_name=chroma_collection_name + ) + print( + f"Load chromadb collection: {chroma_collection_name}, " + f"path: {chroma_db_dir} successfully!" + ) + + # Load docs id of user + with Session(engine) as session: + stmt = select(resource["Source"]) + results = session.execute(stmt) + doc_ids = [r[0].id for r in results.all()] + print(f"Retrieve n-docs: {len(doc_ids)}") + print(doc_ids) + + for doc_id in doc_ids: + print("-") + # Find corresponding vector ids + with Session(engine) as session: + stmt = select(resource["Index"]).where( + resource["Index"].relation_type == "vector", + resource["Index"].source_id.in_([doc_id]), + ) + results = session.execute(stmt) + vs_ids = [r[0].target_id for r in results.all()] + + print(f"Got {len(vs_ids)} vs_ids for doc {doc_id}") + + # Update file_id + if len(vs_ids) > 0: + batch = chroma_db_collection.get(ids=vs_ids, include=["metadatas"]) + batch.update( + ids=batch["ids"], + metadatas=[ + update_metadata(metadata, doc_id) for metadata in batch["metadatas"] + ], + ) + + # Assert file_id. Skip + print(f"doc-{doc_id} got updated") + + +def main(chroma_db_dir: str, sqlite_path: str): + engine = create_engine(sqlite_path) + + with Session(engine) as session: + stmt = select(Index) + + results = session.execute(stmt) + file_indices = [r[0] for r in results.all()] + + for file_index in file_indices: + _id = file_index.id + _is_private = file_index.config["private"] + + print(f"Migrating for Index id: {_id}, is_private: {_is_private}") + + migrate_chroma_db( + chroma_db_dir=chroma_db_dir, + sqlite_path=sqlite_path, + is_private=_is_private, + int_index=_id, + ) + + +if __name__ == "__main__": + chrome_db_dir: str = "./vectorstore/kan_db" + sqlite_path: str = "sqlite:///../ktem_app_data/user_data/sql.db" + + main(chrome_db_dir, sqlite_path)