# Lite version FROM python:3.10-slim AS lite # Common dependencies RUN apt-get update -qqy && \ apt-get install -y --no-install-recommends \ ssh \ git \ gcc \ g++ \ poppler-utils \ libpoppler-dev \ unzip \ curl \ cargo # Setup args ARG TARGETPLATFORM ARG TARGETARCH # Set environment variables ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING=UTF-8 ENV TARGETARCH=${TARGETARCH} # Create working directory WORKDIR /app # Download pdfjs COPY scripts/download_pdfjs.sh /app/scripts/download_pdfjs.sh RUN chmod +x /app/scripts/download_pdfjs.sh ENV PDFJS_PREBUILT_DIR="/app/libs/ktem/ktem/assets/prebuilt/pdfjs-dist" RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR # Copy contents COPY . /app COPY launch.sh /app/launch.sh COPY .env.example /app/.env # Install pip packages RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install -e "libs/kotaemon" \ && pip install -e "libs/ktem" \ && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ if [ "$TARGETARCH" = "amd64" ]; then pip install "graphrag<=0.3.6" future; fi # Clean up RUN apt-get autoremove \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && rm -rf ~/.cache ENTRYPOINT ["sh", "/app/launch.sh"] # Full version FROM lite AS full # Additional dependencies for full version RUN apt-get update -qqy && \ apt-get install -y --no-install-recommends \ tesseract-ocr \ tesseract-ocr-jpn \ libsm6 \ libxext6 \ libreoffice \ ffmpeg \ libmagic-dev # Install torch and torchvision for unstructured RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu # Install additional pip packages RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install -e "libs/kotaemon[adv]" \ && pip install unstructured[all-docs] # Install lightRAG ENV USE_LIGHTRAG=true RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install aioboto3 nano-vectordb ollama xxhash "lightrag-hku<=0.0.8" RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install "docling<=2.5.2" # Download NLTK data from LlamaIndex RUN python -c "from llama_index.core.readers.base import BaseReader" # Clean up RUN apt-get autoremove \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && rm -rf ~/.cache ENTRYPOINT ["sh", "/app/launch.sh"] # Ollama-bundled version FROM full AS ollama # Install ollama RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ curl -fsSL https://ollama.com/install.sh | sh # RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull qwen2.5:7b RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull nomic-embed-text ENTRYPOINT ["sh", "/app/launch.sh"]