docker: add options for CPU only, Intel GPU, AMD GPU (#5380)

2024-09-20 10:35:10 +02:00 · 2024-01-28 15:18:14 +01:00 · 2024-01-28 15:18:14 +01:00 · b1463df0a1
commit b1463df0a1
parent d921f80322
10 changed files with 252 additions and 46 deletions
--- a/README.md
+++ b/README.md
@ -160,12 +160,23 @@ The `requirements*.txt` above contain various wheels precompiled through GitHub
 ### Alternative: Docker
 ```
-ln -s docker/{nvidia/Dockerfile,docker-compose.yml,.dockerignore} .
+For NVIDIA GPU:
 ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .
 For AMD GPU: 
 ln -s docker/{amd/Dockerfile,intel/docker-compose.yml,.dockerignore} .
 For Intel GPU:
 ln -s docker/{intel/Dockerfile,amd/docker-compose.yml,.dockerignore} .
 For CPU only
 ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .
 cp docker/.env.example .env
 #Create logs/cache dir : 
 mkdir -p logs cache
 # Edit .env and set: 
 #   TORCH_CUDA_ARCH_LIST based on your GPU model
 #   APP_RUNTIME_GID      your host user's group id (run `id -g` in a terminal)
 #   BUILD_EXTENIONS      optionally add comma separated list of extensions to build
 # Edit CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
 # 
 docker compose up --build
 ```
--- a/docker/.env.example
+++ b/docker/.env.example
@ -20,3 +20,6 @@ APP_RUNTIME_GID=6972
 # override default app build permissions (handy for deploying to cloud)
 #APP_GID=6972
 #APP_UID=6972
 # Set cache env
 TRANSFORMERS_CACHE=/home/app/text-generation-webui/cache/
 HF_HOME=/home/app/text-generation-webui/cache/
--- a/docker/amd/Dockerfile
+++ b/docker/amd/Dockerfile
@ -0,0 +1,21 @@
 # BUILDER
 FROM ubuntu:22.04
 WORKDIR /builder
 ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
 ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
 ARG APP_UID="${APP_UID:-6972}"
 ARG APP_GID="${APP_GID:-6972}"
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
    apt update && \
    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
    rm -rf /var/lib/apt/lists/*
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
 RUN GPU_CHOICE=B USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 WORKDIR /home/app/text-generation-webui
 # set umask to ensure group read / write at runtime
 CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
--- a/docker/amd/docker-compose.yml
+++ b/docker/amd/docker-compose.yml
@ -0,0 +1,57 @@
 version: "3.3"
 services:
  text-generation-webui:
    build:
      context: .
      args:
        # Requirements file to use: 
        # | GPU | CPU | requirements file to use |
        # |--------|---------|---------|
        # | NVIDIA | has AVX2 | `requirements.txt` |
        # | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
        # | AMD | has AVX2 | `requirements_amd.txt` |
        # | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
        # | CPU only | has AVX2 | `requirements_cpu_only.txt` |
        # | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
        # | Apple | Intel | `requirements_apple_intel.txt` |
        # | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
        # Default: requirements.txt`
        # BUILD_REQUIREMENTS: requirements.txt
        # Extension requirements to build: 
        # BUILD_EXTENSIONS: 
        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} 
        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
        APP_GID: ${APP_GID:-6972} 
        APP_UID: ${APP_UID-6972} 
    env_file: .env
    user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
    ports:
      - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
      - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
    stdin_open: true
    group_add:
      - video
    tty: true
    ipc: host
    devices:
      - /dev/kfd
      - /dev/dri 
    cap_add: 
      - SYS_PTRACE
    security_opt:
      - seccomp=unconfined
    volumes:
      - ./cache:/home/app/text-generation-webui/cache
      - ./characters:/home/app/text-generation-webui/characters
      - ./extensions:/home/app/text-generation-webui/extensions
      - ./loras:/home/app/text-generation-webui/loras
      - ./logs:/home/app/text-generation-webui/logs
      - ./models:/home/app/text-generation-webui/models
      - ./presets:/home/app/text-generation-webui/presets
      - ./prompts:/home/app/text-generation-webui/prompts
      - ./softprompts:/home/app/text-generation-webui/softprompts
      - ./training:/home/app/text-generation-webui/training
      - ./cloudflared:/etc/cloudflared
--- a/docker/cpu/Dockerfile
+++ b/docker/cpu/Dockerfile
@ -0,0 +1,25 @@
 # BUILDER
 FROM ubuntu:22.04
 WORKDIR /builder
 ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
 ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
 ARG APP_UID="${APP_UID:-6972}"
 ARG APP_GID="${APP_GID:-6972}"
 ARG GPU_CHOICE=A
 ARG USE_CUDA118=FALSE 
 ARG LAUNCH_AFTER_INSTALL=FALSE 	
 ARG INSTALL_EXTENSIONS=TRUE
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
    apt update && \
    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
    rm -rf /var/lib/apt/lists/*
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
 RUN GPU_CHOICE=N USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 # set umask to ensure group read / write at runtime
 WORKDIR /home/app/text-generation-webui
 CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
--- a/docker/cpu/docker-compose.yml
+++ b/docker/cpu/docker-compose.yml
@ -0,0 +1,47 @@
 version: "3.3"
 services:
  text-generation-webui:
    build:
      context: .
      args:
        # Requirements file to use: 
        # | GPU | CPU | requirements file to use |
        # |--------|---------|---------|
        # | NVIDIA | has AVX2 | `requirements.txt` |
        # | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
        # | AMD | has AVX2 | `requirements_amd.txt` |
        # | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
        # | CPU only | has AVX2 | `requirements_cpu_only.txt` |
        # | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
        # | Apple | Intel | `requirements_apple_intel.txt` |
        # | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
        # Default: requirements.txt`
        # BUILD_REQUIREMENTS: requirements.txt
        # Extension requirements to build: 
        # BUILD_EXTENSIONS: 
        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} 
        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
        APP_GID: ${APP_GID:-6972} 
        APP_UID: ${APP_UID-6972} 
    env_file: .env
    user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
    ports:
      - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
      - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
    stdin_open: true
    tty: true
    volumes:
      - ./cache:/home/app/text-generation-webui/cache
      - ./characters:/home/app/text-generation-webui/characters
      - ./extensions:/home/app/text-generation-webui/extensions
      - ./loras:/home/app/text-generation-webui/loras
      - ./logs:/home/app/text-generation-webui/logs
      - ./models:/home/app/text-generation-webui/models
      - ./presets:/home/app/text-generation-webui/presets
      - ./prompts:/home/app/text-generation-webui/prompts
      - ./softprompts:/home/app/text-generation-webui/softprompts
      - ./training:/home/app/text-generation-webui/training
      - ./cloudflared:/etc/cloudflared
--- a/docker/intel/Dockerfile
+++ b/docker/intel/Dockerfile
@ -0,0 +1,21 @@
 # BUILDER
 FROM ubuntu:22.04
 WORKDIR /builder
 ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
 ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
 ARG APP_UID="${APP_UID:-6972}"
 ARG APP_GID="${APP_GID:-6972}"
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
    apt update && \
    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
    rm -rf /var/lib/apt/lists/*
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
 RUN GPU_CHOICE=D USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 # set umask to ensure group read / write at runtime
 WORKDIR /home/app/text-generation-webui
 CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
--- a/docker/intel/docker-compose.yml
+++ b/docker/intel/docker-compose.yml
@ -0,0 +1,55 @@
 version: "3.3"
 services:
  text-generation-webui:
    build:
      context: .
      args:
        # Requirements file to use: 
        # | GPU | CPU | requirements file to use |
        # |--------|---------|---------|
        # | NVIDIA | has AVX2 | `requirements.txt` |
        # | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
        # | AMD | has AVX2 | `requirements_amd.txt` |
        # | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
        # | CPU only | has AVX2 | `requirements_cpu_only.txt` |
        # | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
        # | Apple | Intel | `requirements_apple_intel.txt` |
        # | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
        # Default: requirements.txt`
        # BUILD_REQUIREMENTS: requirements.txt
        # Extension requirements to build: 
        # BUILD_EXTENSIONS: 
        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} 
        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
        APP_GID: ${APP_GID:-6972} 
        APP_UID: ${APP_UID-6972} 
    env_file: .env
    user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
    ports:
      - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
      - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
    stdin_open: true
    group_add:
      - video
    tty: true
    ipc: host
    devices:
      - /dev/kfd
      - /dev/dri 
    cap_add: 
      - SYS_PTRACE
    security_opt:
      - seccomp=unconfined
    volumes:
      - ./characters:/home/app/text-generation-webui/characters
      - ./extensions:/home/app/text-generation-webui/extensions
      - ./loras:/home/app/text-generation-webui/loras
      - ./models:/home/app/text-generation-webui/models
      - ./presets:/home/app/text-generation-webui/presets
      - ./prompts:/home/app/text-generation-webui/prompts
      - ./softprompts:/home/app/text-generation-webui/softprompts
      - ./training:/home/app/text-generation-webui/training
      - ./cloudflared:/etc/cloudflared
--- a/docker/nvidia/Dockerfile
+++ b/docker/nvidia/Dockerfile
@ -1,57 +1,21 @@
 # BUILDER
-FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder
+FROM ubuntu:22.04
 WORKDIR /builder
 ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
 ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
 ARG BUILD_REQUIREMENTS="${BUILD_REQUIREMENTS:-requirements.txt}"
 ARG APP_UID="${APP_UID:-6972}"
 ARG APP_GID="${APP_GID:-6972}"
 # create / update build env
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
    apt update && \
    apt install --no-install-recommends -y git vim build-essential python3-dev pip && \
    rm -rf /var/lib/apt/lists/*
 RUN --mount=type=cache,target=/root/.cache/pip,rw \
    pip3 install --global --upgrade pip wheel setuptools && \
    # make shared builder & runtime app user
    addgroup --gid $APP_GID app_grp && \
    useradd -m -u $APP_UID --gid app_grp app
 USER app:app_grp
 # build wheels for runtime
 WORKDIR /home/app/build
 COPY --chown=app:app_grp "$BUILD_REQUIREMENTS" /home/app/build/requirements.txt
 COPY --chown=app:app_grp extensions /home/app/build/extensions
 RUN --mount=type=cache,target=/root/.cache/pip,rw \
    # build all requirements files as wheel dists
    pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'`
    # drop wheel and setuptools .whl to avoid install issues
 RUN rm wheels/setuptools*.whl
 # RUNTIME
 FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
 ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}"
 ARG APP_UID="${APP_UID:-6972}"
 ARG APP_GID="${APP_GID:-6972}"
 ENV CLI_ARGS=""
 # create / update runtime env
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
    apt update && \
-    apt install --no-install-recommends -y git python3 pip && \
+    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
-    rm -rf /var/lib/apt/lists/* && \
+    rm -rf /var/lib/apt/lists/*
-    pip3 install --global --no-cache --upgrade pip wheel setuptools && \
+WORKDIR /home/app/
-    # make shared builder & runtime app user
+RUN git clone https://github.com/oobabooga/text-generation-webui.git 
    addgroup --gid $APP_GID app_grp && \
    useradd -m -u $APP_UID --gid app_grp app
 USER app:app_grp
 # install locally built wheels for app
 WORKDIR /home/app/wheels
 COPY --from=builder /home/app/build/wheels /home/app/wheels
 COPY --chown=app:app_grp . /home/app/text-generation-webui
 RUN umask 0002 && \
    chmod g+rwX /home/app/text-generation-webui && \
    pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \
    rm -r /home/app/wheels
 WORKDIR /home/app/text-generation-webui
 RUN GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 WORKDIR /home/app/text-generation-webui
 # set umask to ensure group read / write at runtime
-CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS}
+CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
--- a/docker/nvidia/docker-compose.yml
+++ b/docker/nvidia/docker-compose.yml
@ -34,9 +34,11 @@ services:
    stdin_open: true
    tty: true
    volumes:
      - ./cache:/home/app/text-generation-webui/cache
      - ./characters:/home/app/text-generation-webui/characters
      - ./extensions:/home/app/text-generation-webui/extensions
      - ./loras:/home/app/text-generation-webui/loras
      - ./logs:/home/app/text-generation-webui/logs
      - ./models:/home/app/text-generation-webui/models
      - ./presets:/home/app/text-generation-webui/presets
      - ./prompts:/home/app/text-generation-webui/prompts