Fix AMD CI not showing GPU (#27555)
fix Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
22
.github/workflows/self-push-amd.yml
vendored
22
.github/workflows/self-push-amd.yml
vendored
@@ -38,14 +38,16 @@ jobs:
|
|||||||
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
||||||
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- name: ROCM-SMI
|
- name: ROCM-SMI
|
||||||
run: |
|
run: |
|
||||||
rocminfo | grep "Agent" -A 14
|
rocm-smi
|
||||||
- name: Show HIP environment
|
- name: ROCM-INFO
|
||||||
|
run: |
|
||||||
|
rocminfo | grep "Agent" -A 14
|
||||||
|
- name: Show ROCR environment
|
||||||
run: |
|
run: |
|
||||||
echo "HIP: $HIP_VISIBLE_DEVICES"
|
|
||||||
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
||||||
|
|
||||||
setup_gpu:
|
setup_gpu:
|
||||||
@@ -57,7 +59,7 @@ jobs:
|
|||||||
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
||||||
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
outputs:
|
outputs:
|
||||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
test_map: ${{ steps.set-matrix.outputs.test_map }}
|
test_map: ${{ steps.set-matrix.outputs.test_map }}
|
||||||
@@ -155,7 +157,7 @@ jobs:
|
|||||||
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
runs-on: [self-hosted, docker-gpu, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
||||||
options: --device /dev/kfd --device /dev/dri --env HIP_VISIBLE_DEVICES --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||||
@@ -207,10 +209,12 @@ jobs:
|
|||||||
|
|
||||||
- name: ROCM-SMI
|
- name: ROCM-SMI
|
||||||
run: |
|
run: |
|
||||||
rocminfo | grep "Agent" -A 14
|
rocm-smi
|
||||||
- name: Show HIP environment
|
- name: ROCM-INFO
|
||||||
|
run: |
|
||||||
|
rocminfo | grep "Agent" -A 14
|
||||||
|
- name: Show ROCR environment
|
||||||
run: |
|
run: |
|
||||||
echo "HIP: $HIP_VISIBLE_DEVICES"
|
|
||||||
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
||||||
|
|
||||||
- name: Environment
|
- name: Environment
|
||||||
|
|||||||
Reference in New Issue
Block a user