CI: update to ROCm 6.0.2 and test MI300 (#30266)
* update to ROCm 6.0.2 and test MI300 * add callers for mi300 * update dockerfile * fix trainer tests * remove apex * style * Update tests/trainer/test_trainer_seq2seq.py * Update tests/trainer/test_trainer_seq2seq.py * Update tests/trainer/test_trainer_seq2seq.py * Update tests/trainer/test_trainer_seq2seq.py * update to torch 2.3 * add workflow dispatch target * we may need branches: mi300-ci after all * nit * fix docker build * nit * add check runner * remove docker-gpu * fix issues * fix --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -1,24 +1,19 @@
|
||||
FROM rocm/dev-ubuntu-20.04:5.6
|
||||
FROM rocm/dev-ubuntu-22.04:6.0.2
|
||||
# rocm/pytorch has no version with 2.1.0
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ARG PYTORCH='2.1.0'
|
||||
ARG TORCH_VISION='0.16.0'
|
||||
ARG TORCH_AUDIO='2.1.0'
|
||||
ARG ROCM='5.6'
|
||||
|
||||
RUN apt update && \
|
||||
apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip ffmpeg && \
|
||||
apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg && \
|
||||
apt clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
|
||||
|
||||
RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM
|
||||
RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
|
||||
|
||||
ARG REF=main
|
||||
WORKDIR /
|
||||
@@ -35,5 +30,5 @@ RUN python3 -m pip uninstall -y tensorflow flax
|
||||
# this line must be added in order for python to be aware of transformers.
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# Remove nvml as it is not compatible with ROCm
|
||||
RUN python3 -m pip uninstall py3nvml pynvml -y
|
||||
# Remove nvml as it is not compatible with ROCm. apex is not tested on NVIDIA either.
|
||||
RUN python3 -m pip uninstall py3nvml pynvml apex -y
|
||||
|
||||
Reference in New Issue
Block a user