Enable PyTorch nightly build CI (#17335)
* nightly build pytorch CI * fix working dir * change time and event name Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -3,6 +3,9 @@ LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands)
|
||||
SHELL ["sh", "-lc"]
|
||||
|
||||
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
|
||||
# to be used as arguments for docker build (so far).
|
||||
|
||||
@@ -21,11 +24,20 @@ ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime]
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
# TODO: Handle these in a python utility script
|
||||
RUN [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile
|
||||
RUN echo torch=$VERSION
|
||||
# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
|
||||
# Currently, let's just use their latest releases (when `torch` is installed with a release version)
|
||||
# TODO: We might need to specify proper versions that work with a specific torch version (especially for past CI).
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -U tensorflow
|
||||
RUN python3 -m pip uninstall -y flax jax
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$PYTORCH+$CUDA.html
|
||||
# Use installed torch version for `torch-scatter` to avid to deal with PYTORCH='pre'.
|
||||
# If torch is nightly version, the link is likely to be invalid, but the installation falls back to the latest torch-scatter
|
||||
RUN python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+$CUDA.html
|
||||
RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT+cpu -f https://software.intel.com/ipex-whl-stable
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract https://github.com/kpu/kenlm/archive/master.zip
|
||||
|
||||
@@ -3,6 +3,9 @@ LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu113'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
@@ -13,13 +16,16 @@ RUN git clone https://github.com/huggingface/transformers && cd transformers &&
|
||||
# Install latest release PyTorch
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip install --no-cache-dir -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
||||
RUN python3 -m pip install --no-cache-dir -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||
|
||||
# Pre-build DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
|
||||
# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
|
||||
RUN python3 -m pip uninstall -y deepspeed
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
# This has to be run (again) inside the GPU VMs running the tests.
|
||||
# The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests.
|
||||
# TODO: Find out why test fail.
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
|
||||
35
docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
Normal file
35
docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
Normal file
@@ -0,0 +1,35 @@
|
||||
FROM nvcr.io/nvidia/pytorch:21.03-py3
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu113'
|
||||
|
||||
RUN apt -y update
|
||||
RUN apt install -y libaio-dev
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
# Install **nightly** release PyTorch (flag `--pre`)
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
|
||||
|
||||
# Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
|
||||
RUN python3 -m pip uninstall -y deepspeed
|
||||
# This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
|
||||
# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
|
||||
# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
|
||||
# DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
|
||||
# When installing in editable mode, `transformers` is not recognized as a package.
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# Disable for now as deepspeed is not installed above. To be enabled once the issue is fixed.
|
||||
# RUN python3 -c "from deepspeed.launcher.runner import main"
|
||||
Reference in New Issue
Block a user