From 88620c6b397f2c24848d1bd5df58477fb82bdd7c Mon Sep 17 00:00:00 2001 From: Callum Date: Thu, 30 Nov 2023 05:20:23 +0000 Subject: [PATCH] feature/docker_improvements (#4768) --- docker/.dockerignore => .dockerignore | 0 .gitignore | 7 ++- README.md | 8 ++- docker/.env.example | 14 ++--- docker/Dockerfile | 77 --------------------------- docker/docker-compose.yml | 25 +++++---- docker/nvidia/Dockerfile | 56 +++++++++++++++++++ 7 files changed, 90 insertions(+), 97 deletions(-) rename docker/.dockerignore => .dockerignore (100%) delete mode 100644 docker/Dockerfile create mode 100644 docker/nvidia/Dockerfile diff --git a/docker/.dockerignore b/.dockerignore similarity index 100% rename from docker/.dockerignore rename to .dockerignore diff --git a/.gitignore b/.gitignore index 7008f536..cf47b628 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,6 @@ .DS_Store .eslintrc.js .idea -.env .venv venv .envrc @@ -42,3 +41,9 @@ package.json package-lock.json Thumbs.db wandb + +# ignore user docker config and top level links to docker files +/docker-compose.yaml +/docker-compose.yml +/Dockerfile +.env diff --git a/README.md b/README.md index 1e22c18e..0b6bac72 100644 --- a/README.md +++ b/README.md @@ -163,14 +163,18 @@ The requirments*.txt above contain various precompiled wheels. If you wish to co ### Alternative: Docker ``` -ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} . +ln -s docker/{nvidia/Dockerfile,docker-compose.yml} . cp docker/.env.example .env -# Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model +# Edit .env and set: +# TORCH_CUDA_ARCH_LIST based on your GPU model +# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal) +# BUILD_EXTENIONS optionally add comma separated list of extensions to build docker compose up --build ``` * You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions. * For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker). +* Currently breaks GPTQ-for-Llama ### Updating the requirements diff --git a/docker/.env.example b/docker/.env.example index 1ef45dc2..bc46e95e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -2,19 +2,21 @@ # however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5 # https://developer.nvidia.com/cuda-gpus you can find the version for your card here TORCH_CUDA_ARCH_LIST=7.5 - # your command-line flags go here: CLI_ARGS=--listen - # the port the webui binds to on the host HOST_PORT=7860 # the port the webui binds to inside the container CONTAINER_PORT=7860 - # the port the api binds to on the host HOST_API_PORT=5000 # the port the api binds to inside the container CONTAINER_API_PORT=5000 - -# the version used to install text-generation-webui from -WEBUI_VERSION=HEAD +# Comma separated extensions to build +BUILD_EXTENSIONS="" +# Set APP_RUNTIME_GID to an appropriate host system group to enable access to mounted volumes +# You can find your current host user group id with the command `id -g` +APP_RUNTIME_GID=6972 +# override default app build permissions (handy for deploying to cloud) +#APP_GID=6972 +#APP_UID=6972 diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 2161fb18..00000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,77 +0,0 @@ -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as builder - -RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \ - apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \ - rm -rf /var/lib/apt/lists/* - -RUN git clone --depth=1 https://github.com/oobabooga/GPTQ-for-LLaMa /build - -WORKDIR /build - -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - python3 -m venv /build/venv && \ - . /build/venv/bin/activate && \ - pip3 install --upgrade pip setuptools wheel ninja && \ - pip3 install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121 && \ - pip3 install -r requirements.txt - -# https://developer.nvidia.com/cuda-gpus -# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" -ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}" -RUN . /build/venv/bin/activate && \ - python3 setup_cuda.py bdist_wheel -d . - -FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 - -LABEL maintainer="Your Name " -LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" - -RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \ - apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ ffmpeg && \ - rm -rf /var/lib/apt/lists/* - -RUN --mount=type=cache,target=/root/.cache/pip,rw pip3 install virtualenv - -RUN mkdir /app - -WORKDIR /app - -ARG WEBUI_VERSION -RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source" - -# Create virtualenv -RUN virtualenv /app/venv -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - pip3 install --upgrade pip setuptools wheel ninja && \ - pip3 install torch xformers --index-url https://download.pytorch.org/whl/cu121 && \ - pip3 install torchvision torchaudio sentence-transformers - -# Copy and install GPTQ-for-LLaMa -COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl - -# Install main requirements -COPY requirements.txt /app/requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - pip3 install -r requirements.txt - -COPY . /app/ - -RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so - -# Install extension requirements -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - for ext in /app/extensions/*/requirements.txt; do \ - cd "$(dirname "$ext")"; \ - pip3 install -r requirements.txt; \ - done - -ENV CLI_ARGS="" - -EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} -CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS} diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 29767d22..2aa6608e 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -5,28 +5,31 @@ services: context: . args: # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus - TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} - WEBUI_VERSION: ${WEBUI_VERSION:-HEAD} + TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} + BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-} + APP_GID: ${APP_GID:-6972} + APP_UID: ${APP_UID-6972} env_file: .env + user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}" ports: - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}" - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}" stdin_open: true tty: true volumes: - - ./characters:/app/characters - - ./extensions:/app/extensions - - ./loras:/app/loras - - ./models:/app/models - - ./presets:/app/presets - - ./prompts:/app/prompts - - ./softprompts:/app/softprompts - - ./training:/app/training + - ./characters:/home/app/text-generation-webui/characters + - ./extensions:/home/app/text-generation-webui/extensions + - ./loras:/home/app/text-generation-webui/loras + - ./models:/home/app/text-generation-webui/models + - ./presets:/home/app/text-generation-webui/presets + - ./prompts:/home/app/text-generation-webui/prompts + - ./softprompts:/home/app/text-generation-webui/softprompts + - ./training:/home/app/text-generation-webui/training - ./cloudflared:/etc/cloudflared deploy: resources: reservations: devices: - driver: nvidia - device_ids: ['0'] + count: all capabilities: [gpu] diff --git a/docker/nvidia/Dockerfile b/docker/nvidia/Dockerfile new file mode 100644 index 00000000..8da55643 --- /dev/null +++ b/docker/nvidia/Dockerfile @@ -0,0 +1,56 @@ +# BUILDER +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder +WORKDIR /builder +ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}" +ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}" +ARG APP_UID="${APP_UID:-6972}" +ARG APP_GID="${APP_GID:-6972}" +# create / update build env +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \ + apt update && \ + apt install --no-install-recommends -y git vim build-essential python3-dev pip && \ + rm -rf /var/lib/apt/lists/* +RUN --mount=type=cache,target=/root/.cache/pip,rw \ + pip3 install --global --upgrade pip wheel setuptools && \ + # make shared builder & runtime app user + addgroup --gid $APP_GID app_grp && \ + useradd -m -u $APP_UID --gid app_grp app +USER app:app_grp +# build wheels for runtime +WORKDIR /home/app/build +COPY --chown=app:app_grp requirements.txt /home/app/build +COPY --chown=app:app_grp extensions /home/app/build/extensions +RUN --mount=type=cache,target=/root/.cache/pip,rw \ + # build all requirements files as wheel dists + pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'` + # drop wheel and setuptools .whl to avoid install issues +RUN rm wheels/setuptools*.whl + +# RUNTIME +FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 +ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}" +ARG APP_UID="${APP_UID:-6972}" +ARG APP_GID="${APP_GID:-6972}" +ENV CLI_ARGS="" +# create / update runtime env +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \ + apt update && \ + apt install --no-install-recommends -y git python3 pip && \ + rm -rf /var/lib/apt/lists/* && \ + pip3 install --global --no-cache --upgrade pip wheel setuptools && \ + # make shared builder & runtime app user + addgroup --gid $APP_GID app_grp && \ + useradd -m -u $APP_UID --gid app_grp app +USER app:app_grp +# install locally built wheels for app +WORKDIR /home/app/wheels +COPY --from=builder /home/app/build/wheels /home/app/wheels +COPY --chown=app:app_grp . /home/app/text-generation-webui +RUN umask 0002 && \ + chmod g+rwX /home/app/text-generation-webui && \ + pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \ + rm -r /home/app/wheels +WORKDIR /home/app/text-generation-webui +EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005} +# set umask to ensure group read / write at runtime +CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS}