From 22a0769933448719aa682001748397da9b380625 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 17 May 2023 14:26:50 +0200 Subject: [PATCH] Update 3 docker files to use cu118 (#23406) * fix --------- Co-authored-by: ydshieh --- .github/workflows/build-docker-images.yml | 20 +++++++++++++++++++ docker/transformers-all-latest-gpu/Dockerfile | 2 +- .../Dockerfile | 6 ++++-- .../Dockerfile | 6 ++++-- docker/transformers-pytorch-gpu/Dockerfile | 4 ++-- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 26c5e517d2..dfc407e69a 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -71,6 +71,16 @@ jobs: name: "Latest PyTorch + DeepSpeed" runs-on: ubuntu-latest steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 @@ -98,6 +108,16 @@ jobs: name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" runs-on: ubuntu-latest steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index 3ad87790d6..c5ada9209b 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -35,7 +35,7 @@ RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION RUN python3 -m pip install --no-cache-dir -U tensorflow==2.12 protobuf==3.20.3 tensorflow_text tensorflow_probability RUN python3 -m pip uninstall -y flax jax -RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT+cpu -f https://software.intel.com/ipex-whl-stable +RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT+cpu -f https://developer.intel.com/ipex-whl-stable-cpu RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract RUN python3 -m pip install -U "itsdangerous<2.1.0" diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile index 6d54b971d2..b6e892a5e1 100644 --- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -1,12 +1,12 @@ # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08 -FROM nvcr.io/nvidia/pytorch:22.08-py3 +FROM nvcr.io/nvidia/pytorch:22.12-py3 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive ARG PYTORCH='2.0.1' # Example: `cu102`, `cu113`, etc. -ARG CUDA='cu117' +ARG CUDA='cu118' RUN apt -y update RUN apt install -y libaio-dev @@ -15,6 +15,8 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip ARG REF=main RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF +RUN python3 -m pip uninstall -y torch torchvision torchaudio + # Install latest release PyTorch # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) diff --git a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile index 1e69546a21..50efc08129 100644 --- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile @@ -1,11 +1,11 @@ # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08 -FROM nvcr.io/nvidia/pytorch:22.08-py3 +FROM nvcr.io/nvidia/pytorch:22.12-py3 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive # Example: `cu102`, `cu113`, etc. -ARG CUDA='cu117' +ARG CUDA='cu118' RUN apt -y update RUN apt install -y libaio-dev @@ -14,6 +14,8 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip ARG REF=main RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF +RUN python3 -m pip uninstall -y torch torchvision torchaudio + # Install **nightly** release PyTorch (flag `--pre`) # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) diff --git a/docker/transformers-pytorch-gpu/Dockerfile b/docker/transformers-pytorch-gpu/Dockerfile index 9bf91674af..d06a523af0 100644 --- a/docker/transformers-pytorch-gpu/Dockerfile +++ b/docker/transformers-pytorch-gpu/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive @@ -16,7 +16,7 @@ ARG PYTORCH='2.0.1' ARG TORCH_VISION='' ARG TORCH_AUDIO='' # Example: `cu102`, `cu113`, etc. -ARG CUDA='cu117' +ARG CUDA='cu118' RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' || VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA