feat: modify base dependencies + remove unnecessary packages in lite docker (#310)

* feat: update base/adv dependencies

* feat: update Dockerfile

* ci: update free disk for docker build
This commit is contained in:
Khoi-Nguyen Nguyen-Ngoc 2024-09-21 12:11:58 +07:00 committed by GitHub
parent d6a9510441
commit a865e2b095
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 31 additions and 30 deletions

View File

@ -29,6 +29,22 @@ jobs:
- lite - lite
- full - full
steps: steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Set repository and image name - name: Set repository and image name
run: | run: |
echo "FULL_IMAGE_NAME=${{ env.REGISTRY }}/${IMAGE_NAME,,}" >>${GITHUB_ENV} echo "FULL_IMAGE_NAME=${{ env.REGISTRY }}/${IMAGE_NAME,,}" >>${GITHUB_ENV}
@ -72,22 +88,6 @@ jobs:
username: ${{ github.actor }} username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }} password: ${{ secrets.GITHUB_TOKEN }}
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Build docker image - name: Build docker image
uses: docker/build-push-action@v6 uses: docker/build-push-action@v6
with: with:

View File

@ -34,7 +34,7 @@ COPY . /app
# Install pip packages # Install pip packages
RUN --mount=type=ssh \ RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \ --mount=type=cache,target=/root/.cache/pip \
pip install -e "libs/kotaemon[all]" \ pip install -e "libs/kotaemon" \
&& pip install -e "libs/ktem" \ && pip install -e "libs/ktem" \
&& pip install graphrag future \ && pip install graphrag future \
&& pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"
@ -72,7 +72,8 @@ COPY . /app
# Install additional pip packages # Install additional pip packages
RUN --mount=type=ssh \ RUN --mount=type=ssh \
--mount=type=cache,target=/root/.cache/pip \ --mount=type=cache,target=/root/.cache/pip \
pip install unstructured[all-docs] pip install -e "libs/kotaemon[adv]" \
&& pip install unstructured[all-docs]
# Clean up # Clean up
RUN apt-get autoremove \ RUN apt-get autoremove \

View File

@ -21,28 +21,34 @@ dynamic = ["version"]
requires-python = ">= 3.10" requires-python = ">= 3.10"
description = "Kotaemon core library for AI development." description = "Kotaemon core library for AI development."
dependencies = [ dependencies = [
"azure-ai-documentintelligence",
"beautifulsoup4>=4.12.3,<4.13",
"click>=8.1.7,<9", "click>=8.1.7,<9",
"cohere>=5.3.2,<6", "cohere>=5.3.2,<6",
"cookiecutter>=2.6.0,<2.7", "cookiecutter>=2.6.0,<2.7",
"fast_langdetect", "fast_langdetect",
"fastapi<=0.112.1",
"gradio>=4.31.0,<4.40", "gradio>=4.31.0,<4.40",
"html2text==2024.2.26", "html2text==2024.2.26",
"langchain>=0.1.16,<0.2.0", "langchain>=0.1.16,<0.2.0",
"langchain-anthropic",
"langchain-community>=0.0.34,<0.1.0", "langchain-community>=0.0.34,<0.1.0",
"langchain-openai>=0.1.4,<0.2.0", "langchain-openai>=0.1.4,<0.2.0",
"langchain-anthropic",
"llama-hub>=0.0.79,<0.1.0", "llama-hub>=0.0.79,<0.1.0",
"llama-index>=0.10.40,<0.11.0", "llama-index>=0.10.40,<0.11.0",
"fastapi<=0.112.1",
"llama-index-vector-stores-chroma>=0.1.9", "llama-index-vector-stores-chroma>=0.1.9",
"llama-index-vector-stores-lancedb", "llama-index-vector-stores-lancedb",
"llama-index-vector-stores-milvus", "llama-index-vector-stores-milvus",
"llama-index-vector-stores-qdrant",
"openai>=1.23.6,<2", "openai>=1.23.6,<2",
"openpyxl>=3.1.2,<3.2", "openpyxl>=3.1.2,<3.2",
"opentelemetry-exporter-otlp-proto-grpc>=1.25.0", # https://github.com/chroma-core/chroma/issues/2571
"pandas>=2.2.2,<2.3", "pandas>=2.2.2,<2.3",
"plotly", "plotly",
"PyMuPDF>=1.23", "PyMuPDF>=1.23",
"pypdf>=4.2.0,<4.3", "pypdf>=4.2.0,<4.3",
"python-decouple", # for theflow
"python-docx>=1.1.0,<1.2",
"python-dotenv>=1.0.1,<1.1", "python-dotenv>=1.0.1,<1.1",
"tenacity>=8.2.3,<8.3", "tenacity>=8.2.3,<8.3",
"theflow>=0.8.6,<0.9.0", "theflow>=0.8.6,<0.9.0",
@ -62,18 +68,13 @@ classifiers = [
[project.optional-dependencies] [project.optional-dependencies]
adv = [ adv = [
"azure-ai-documentintelligence",
"beautifulsoup4>=4.12.3,<4.13",
"duckduckgo-search>=6.1.0,<6.2", "duckduckgo-search>=6.1.0,<6.2",
"elasticsearch>=8.13.0,<8.14", "elasticsearch>=8.13.0,<8.14",
"googlesearch-python>=1.2.4,<1.3",
"python-docx>=1.1.0,<1.2",
"tabulate",
"wikipedia>=1.4.0,<1.5",
"sentence-transformers",
"llama-cpp-python<0.2.8",
"fastembed", "fastembed",
"llama-index-vector-stores-qdrant", "googlesearch-python>=1.2.4,<1.3",
"llama-cpp-python<0.2.8",
"sentence-transformers",
"wikipedia>=1.4.0,<1.5",
] ]
dev = [ dev = [
"black", "black",
@ -83,7 +84,6 @@ dev = [
"pre-commit", "pre-commit",
"pytest", "pytest",
"pytest-mock", "pytest-mock",
"python-decouple",
"sphinx", "sphinx",
] ]
all = ["kotaemon[adv,dev]"] all = ["kotaemon[adv,dev]"]