Refine Bf16 test for deepspeed (#17734)
* Refine BF16 check in CPU/GPU * Fixes * Renames
This commit is contained in:
@@ -125,6 +125,8 @@ from .import_utils import (
|
|||||||
is_tokenizers_available,
|
is_tokenizers_available,
|
||||||
is_torch_available,
|
is_torch_available,
|
||||||
is_torch_bf16_available,
|
is_torch_bf16_available,
|
||||||
|
is_torch_bf16_cpu_available,
|
||||||
|
is_torch_bf16_gpu_available,
|
||||||
is_torch_cuda_available,
|
is_torch_cuda_available,
|
||||||
is_torch_fx_available,
|
is_torch_fx_available,
|
||||||
is_torch_fx_proxy,
|
is_torch_fx_proxy,
|
||||||
|
|||||||
@@ -272,7 +272,7 @@ def is_torch_cuda_available():
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_torch_bf16_available():
|
def is_torch_bf16_gpu_available():
|
||||||
if not is_torch_available():
|
if not is_torch_available():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -288,30 +288,42 @@ def is_torch_bf16_available():
|
|||||||
# 4. torch.autocast exists
|
# 4. torch.autocast exists
|
||||||
# XXX: one problem here is that it may give invalid results on mixed gpus setup, so it's
|
# XXX: one problem here is that it may give invalid results on mixed gpus setup, so it's
|
||||||
# really only correct for the 0th gpu (or currently set default device if different from 0)
|
# really only correct for the 0th gpu (or currently set default device if different from 0)
|
||||||
is_torch_gpu_bf16_available = True
|
|
||||||
is_torch_cpu_bf16_available = True
|
|
||||||
if version.parse(torch.__version__) < version.parse("1.10"):
|
if version.parse(torch.__version__) < version.parse("1.10"):
|
||||||
is_torch_gpu_bf16_available = False
|
return False
|
||||||
is_torch_cpu_bf16_available = False
|
|
||||||
|
|
||||||
if torch.cuda.is_available() and torch.version.cuda is not None:
|
if torch.cuda.is_available() and torch.version.cuda is not None:
|
||||||
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
|
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
|
||||||
is_torch_gpu_bf16_available = False
|
return False
|
||||||
if int(torch.version.cuda.split(".")[0]) < 11:
|
if int(torch.version.cuda.split(".")[0]) < 11:
|
||||||
is_torch_gpu_bf16_available = False
|
return False
|
||||||
if not hasattr(torch.cuda.amp, "autocast"):
|
if not hasattr(torch.cuda.amp, "autocast"):
|
||||||
is_torch_gpu_bf16_available = False
|
return False
|
||||||
else:
|
else:
|
||||||
is_torch_gpu_bf16_available = False
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_torch_bf16_cpu_available():
|
||||||
|
if not is_torch_available():
|
||||||
|
return False
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
if version.parse(torch.__version__) < version.parse("1.10"):
|
||||||
|
return False
|
||||||
|
|
||||||
# checking CPU
|
|
||||||
try:
|
try:
|
||||||
# multiple levels of AttributeError depending on the pytorch version so do them all in one check
|
# multiple levels of AttributeError depending on the pytorch version so do them all in one check
|
||||||
_ = torch.cpu.amp.autocast
|
_ = torch.cpu.amp.autocast
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
is_torch_cpu_bf16_available = False
|
return False
|
||||||
|
|
||||||
return is_torch_cpu_bf16_available or is_torch_gpu_bf16_available
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_torch_bf16_available():
|
||||||
|
return is_torch_bf16_cpu_available() or is_torch_bf16_gpu_available()
|
||||||
|
|
||||||
|
|
||||||
def is_torch_tf32_available():
|
def is_torch_tf32_available():
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ from transformers.testing_utils import (
|
|||||||
slow,
|
slow,
|
||||||
)
|
)
|
||||||
from transformers.trainer_utils import get_last_checkpoint, set_seed
|
from transformers.trainer_utils import get_last_checkpoint, set_seed
|
||||||
from transformers.utils import WEIGHTS_NAME, is_torch_bf16_available
|
from transformers.utils import WEIGHTS_NAME, is_torch_bf16_gpu_available
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
@@ -129,7 +129,7 @@ FP16 = "fp16"
|
|||||||
BF16 = "bf16"
|
BF16 = "bf16"
|
||||||
|
|
||||||
stages = [ZERO2, ZERO3]
|
stages = [ZERO2, ZERO3]
|
||||||
if is_torch_bf16_available():
|
if is_torch_bf16_gpu_available():
|
||||||
dtypes = [FP16, BF16]
|
dtypes = [FP16, BF16]
|
||||||
else:
|
else:
|
||||||
dtypes = [FP16]
|
dtypes = [FP16]
|
||||||
@@ -920,7 +920,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
|
|||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
@parameterized.expand(["bf16", "fp16", "fp32"])
|
@parameterized.expand(["bf16", "fp16", "fp32"])
|
||||||
def test_inference(self, dtype):
|
def test_inference(self, dtype):
|
||||||
if dtype == "bf16" and not is_torch_bf16_available():
|
if dtype == "bf16" and not is_torch_bf16_gpu_available():
|
||||||
self.skipTest("test requires bfloat16 hardware support")
|
self.skipTest("test requires bfloat16 hardware support")
|
||||||
|
|
||||||
# this is just inference, so no optimizer should be loaded
|
# this is just inference, so no optimizer should be loaded
|
||||||
|
|||||||
Reference in New Issue
Block a user