Print more library versions in CI (#17384)
* print more lib. versions and just befor test runs * update print_env_pt.py * rename to print_env * Disable warning + better job name * print python version Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
4
.github/workflows/doctests.yml
vendored
4
.github/workflows/doctests.yml
vendored
@@ -32,9 +32,7 @@ jobs:
|
|||||||
|
|
||||||
- name: GPU visibility
|
- name: GPU visibility
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
python3 utils/print_env.py
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
||||||
|
|
||||||
- name: Prepare files for doctests
|
- name: Prepare files for doctests
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
8
.github/workflows/self-nightly-scheduled.yml
vendored
8
.github/workflows/self-nightly-scheduled.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env.py
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
run: |
|
run: |
|
||||||
@@ -109,7 +109,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env.py
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
env:
|
env:
|
||||||
@@ -163,7 +163,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env.py
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
run: |
|
run: |
|
||||||
@@ -206,7 +206,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env.py
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
64
.github/workflows/self-push.yml
vendored
64
.github/workflows/self-push.yml
vendored
@@ -87,17 +87,6 @@ jobs:
|
|||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- name: NVIDIA-SMI
|
|
||||||
run: |
|
|
||||||
nvidia-smi
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
|
||||||
working-directory: /transformers
|
|
||||||
run: |
|
|
||||||
utils/print_env_pt.py
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
||||||
|
|
||||||
- name: Echo folder ${{ matrix.folders }}
|
- name: Echo folder ${{ matrix.folders }}
|
||||||
shell: bash
|
shell: bash
|
||||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||||
@@ -114,6 +103,15 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
- name: Run all non-slow selected tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
@@ -146,17 +144,6 @@ jobs:
|
|||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- name: NVIDIA-SMI
|
|
||||||
run: |
|
|
||||||
nvidia-smi
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
|
||||||
working-directory: /transformers
|
|
||||||
run: |
|
|
||||||
utils/print_env_pt.py
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
||||||
|
|
||||||
- name: Echo folder ${{ matrix.folders }}
|
- name: Echo folder ${{ matrix.folders }}
|
||||||
shell: bash
|
shell: bash
|
||||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||||
@@ -173,6 +160,15 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
- name: Run all non-slow selected tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
env:
|
env:
|
||||||
MKL_SERVICE_FORCE_INTEL: 1
|
MKL_SERVICE_FORCE_INTEL: 1
|
||||||
@@ -210,19 +206,19 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 2
|
fetch-depth: 2
|
||||||
|
|
||||||
- name: NVIDIA-SMI
|
|
||||||
run: |
|
|
||||||
nvidia-smi
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
apt -y update && apt install -y libaio-dev
|
apt -y update && apt install -y libaio-dev
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[deepspeed-testing]
|
pip install .[deepspeed-testing]
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: NVIDIA-SMI
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
run: |
|
||||||
|
python utils/print_env.py
|
||||||
|
|
||||||
- name: Run all non-slow selected tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||||
@@ -259,10 +255,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 2
|
fetch-depth: 2
|
||||||
|
|
||||||
- name: NVIDIA-SMI
|
|
||||||
run: |
|
|
||||||
nvidia-smi
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
apt -y update && apt install -y libaio-dev
|
apt -y update && apt install -y libaio-dev
|
||||||
@@ -270,9 +262,13 @@ jobs:
|
|||||||
rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds
|
rm -rf ~/.cache/torch_extensions/ # shared between conflicting builds
|
||||||
pip install .[testing,deepspeed,fairscale]
|
pip install .[testing,deepspeed,fairscale]
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: NVIDIA-SMI
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
run: |
|
||||||
|
python utils/print_env.py
|
||||||
|
|
||||||
- name: Run all non-slow selected tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||||
|
|||||||
62
.github/workflows/self-scheduled.yml
vendored
62
.github/workflows/self-scheduled.yml
vendored
@@ -56,13 +56,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
|
|
||||||
- name: GPU visibility
|
|
||||||
working-directory: /transformers
|
|
||||||
run: |
|
|
||||||
utils/print_env_pt.py
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
||||||
|
|
||||||
run_tests_single_gpu:
|
run_tests_single_gpu:
|
||||||
name: Model tests
|
name: Model tests
|
||||||
strategy:
|
strategy:
|
||||||
@@ -91,6 +84,15 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||||
@@ -135,6 +137,15 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||||
@@ -163,6 +174,15 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
- name: Run examples tests on GPU
|
- name: Run examples tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
@@ -197,6 +217,15 @@ jobs:
|
|||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
- name: Run all pipeline tests on GPU
|
- name: Run all pipeline tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
env:
|
env:
|
||||||
@@ -233,6 +262,15 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
git fetch && git checkout ${{ github.sha }}
|
git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /transformers
|
||||||
|
run: |
|
||||||
|
python3 utils/print_env.py
|
||||||
|
|
||||||
- name: Run all pipeline tests on GPU
|
- name: Run all pipeline tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
env:
|
env:
|
||||||
@@ -276,6 +314,15 @@ jobs:
|
|||||||
git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
|
git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
|
||||||
DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install -e . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
DS_BUILD_CPU_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install -e . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
- name: Environment
|
||||||
|
working-directory: /workspace/transformers
|
||||||
|
run: |
|
||||||
|
python utils/print_env.py
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /workspace/transformers
|
working-directory: /workspace/transformers
|
||||||
run: |
|
run: |
|
||||||
@@ -293,7 +340,6 @@ jobs:
|
|||||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||||
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||||
|
|
||||||
|
|
||||||
send_results:
|
send_results:
|
||||||
name: Send results to webhook
|
name: Send results to webhook
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
57
utils/print_env.py
Normal file
57
utils/print_env.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# coding=utf-8
|
||||||
|
# Copyright 2020 The HuggingFace Inc. team.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# this script dumps information about the environment
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import transformers
|
||||||
|
|
||||||
|
|
||||||
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||||
|
|
||||||
|
print("Python version:", sys.version)
|
||||||
|
print("transformers version:", transformers.__version__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
|
||||||
|
print("Torch version:", torch.__version__)
|
||||||
|
print("Cuda available:", torch.cuda.is_available())
|
||||||
|
print("Cuda version:", torch.version.cuda)
|
||||||
|
print("CuDNN version:", torch.backends.cudnn.version())
|
||||||
|
print("Number of GPUs available:", torch.cuda.device_count())
|
||||||
|
print("NCCL version:", torch.cuda.nccl.version())
|
||||||
|
except ImportError:
|
||||||
|
print("Torch version:", None)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import deepspeed
|
||||||
|
|
||||||
|
print("DeepSpeed version:", deepspeed.__version__)
|
||||||
|
except ImportError:
|
||||||
|
print("DeepSpeed version:", None)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
print("TensorFlow version:", tf.__version__)
|
||||||
|
print("TF GPUs available:", bool(tf.config.list_physical_devices("GPU")))
|
||||||
|
print("Number of TF GPUs available:", len(tf.config.list_physical_devices("GPU")))
|
||||||
|
except ImportError:
|
||||||
|
print("TensorFlow version:", None)
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
# coding=utf-8
|
|
||||||
# Copyright 2020 The HuggingFace Inc. team.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# this script dumps information about the environment
|
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
|
|
||||||
print("Torch version:", torch.__version__)
|
|
||||||
print("Cuda available:", torch.cuda.is_available())
|
|
||||||
print("Cuda version:", torch.version.cuda)
|
|
||||||
print("CuDNN version:", torch.backends.cudnn.version())
|
|
||||||
print("Number of GPUs available:", torch.cuda.device_count())
|
|
||||||
print("NCCL version:", torch.cuda.nccl.version())
|
|
||||||
Reference in New Issue
Block a user