Apply ruff flake8-comprehensions (#21694)
This commit is contained in:
@@ -157,9 +157,13 @@ class CoreIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
|
||||
super().setUp()
|
||||
|
||||
master_port = get_master_port(real_launcher=False)
|
||||
self.dist_env_1_gpu = dict(
|
||||
MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1"
|
||||
)
|
||||
self.dist_env_1_gpu = {
|
||||
"MASTER_ADDR": "localhost",
|
||||
"MASTER_PORT": master_port,
|
||||
"RANK": "0",
|
||||
"LOCAL_RANK": "0",
|
||||
"WORLD_SIZE": "1",
|
||||
}
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
@@ -212,14 +216,18 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
|
||||
self.batch_size = args.train_batch_size
|
||||
|
||||
master_port = get_master_port(real_launcher=False)
|
||||
self.dist_env_1_gpu = dict(
|
||||
MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1"
|
||||
)
|
||||
self.dist_env_1_gpu = {
|
||||
"MASTER_ADDR": "localhost",
|
||||
"MASTER_PORT": master_port,
|
||||
"RANK": "0",
|
||||
"LOCAL_RANK": "0",
|
||||
"WORLD_SIZE": "1",
|
||||
}
|
||||
|
||||
self.ds_config_file = dict(
|
||||
zero2=f"{self.test_file_dir_str}/ds_config_zero2.json",
|
||||
zero3=f"{self.test_file_dir_str}/ds_config_zero3.json",
|
||||
)
|
||||
self.ds_config_file = {
|
||||
"zero2": f"{self.test_file_dir_str}/ds_config_zero2.json",
|
||||
"zero3": f"{self.test_file_dir_str}/ds_config_zero3.json",
|
||||
}
|
||||
|
||||
# use self.get_config_dict(stage) to use these to ensure the original is not modified
|
||||
with io.open(self.ds_config_file[ZERO2], "r", encoding="utf-8") as f:
|
||||
@@ -230,10 +238,10 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
|
||||
# It's in the file as a demo for users since we want everything to work out of the box even if slower.
|
||||
config_zero3["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = False
|
||||
|
||||
self.ds_config_dict = dict(
|
||||
zero2=config_zero2,
|
||||
zero3=config_zero3,
|
||||
)
|
||||
self.ds_config_dict = {
|
||||
"zero2": config_zero2,
|
||||
"zero3": config_zero3,
|
||||
}
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
@@ -370,7 +378,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
# this actually doesn't have to be on NVMe, any storage will do since this test only
|
||||
# runs a simple check that we can use some directory as if it were NVMe
|
||||
nvme_path = self.get_auto_remove_tmp_dir()
|
||||
nvme_config = dict(device="nvme", nvme_path=nvme_path)
|
||||
nvme_config = {"device": "nvme", "nvme_path": nvme_path}
|
||||
ds_config_zero3_dict = self.get_config_dict(ZERO3)
|
||||
ds_config_zero3_dict["zero_optimization"]["offload_optimizer"] = nvme_config
|
||||
ds_config_zero3_dict["zero_optimization"]["offload_param"] = nvme_config
|
||||
@@ -415,7 +423,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
# force cpu offload
|
||||
ds_config_dict["zero_optimization"]["offload_optimizer"]["device"] = "cpu"
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
kwargs = dict(local_rank=0, deepspeed=ds_config_dict)
|
||||
kwargs = {"local_rank": 0, "deepspeed": ds_config_dict}
|
||||
kwargs[dtype] = True
|
||||
trainer = get_regression_trainer(**kwargs)
|
||||
with CaptureLogger(deepspeed_logger) as cl:
|
||||
@@ -431,7 +439,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
# it's run not as a first test as `sys.stdout` will no longer be the same. So we either have
|
||||
# to reset `deepspeed_logger.handlers[0].setStream(sys.stdout)` or directly capture from the deepspeed_logger.
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
kwargs = dict(local_rank=0, deepspeed=self.get_config_dict(stage))
|
||||
kwargs = {"local_rank": 0, "deepspeed": self.get_config_dict(stage)}
|
||||
kwargs[dtype] = True
|
||||
trainer = get_regression_trainer(**kwargs)
|
||||
|
||||
@@ -449,15 +457,15 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
# `self.lr_scheduler.get_last_lr()` and originally it'd fail on the very first step.
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
a = b = 0.0
|
||||
kwargs = dict(
|
||||
a=a,
|
||||
b=b,
|
||||
local_rank=0,
|
||||
train_len=8,
|
||||
deepspeed=self.get_config_dict(stage),
|
||||
per_device_train_batch_size=8,
|
||||
logging_steps=1,
|
||||
)
|
||||
kwargs = {
|
||||
"a": a,
|
||||
"b": b,
|
||||
"local_rank": 0,
|
||||
"train_len": 8,
|
||||
"deepspeed": self.get_config_dict(stage),
|
||||
"per_device_train_batch_size": 8,
|
||||
"logging_steps": 1,
|
||||
}
|
||||
kwargs[dtype] = True
|
||||
trainer = get_regression_trainer(**kwargs)
|
||||
|
||||
@@ -494,13 +502,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
train_len = 64
|
||||
a = b = 0.0
|
||||
|
||||
kwargs = dict(
|
||||
a=a,
|
||||
b=b,
|
||||
local_rank=0,
|
||||
train_len=train_len,
|
||||
deepspeed=self.get_config_dict(stage),
|
||||
)
|
||||
kwargs = {
|
||||
"a": a,
|
||||
"b": b,
|
||||
"local_rank": 0,
|
||||
"train_len": train_len,
|
||||
"deepspeed": self.get_config_dict(stage),
|
||||
}
|
||||
kwargs[dtype] = True
|
||||
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
@@ -583,11 +591,11 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
|
||||
# save checkpoints
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
kwargs = dict(
|
||||
output_dir=output_dir,
|
||||
save_steps=freq,
|
||||
deepspeed=ds_config_dict,
|
||||
)
|
||||
kwargs = {
|
||||
"output_dir": output_dir,
|
||||
"save_steps": freq,
|
||||
"deepspeed": ds_config_dict,
|
||||
}
|
||||
kwargs[dtype] = True
|
||||
trainer = get_regression_trainer(**kwargs)
|
||||
trainer.train()
|
||||
@@ -600,7 +608,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
ds_config_dict = self.get_config_dict(stage)
|
||||
output_dir = self.get_auto_remove_tmp_dir()
|
||||
kwargs = dict(output_dir=output_dir, deepspeed=ds_config_dict)
|
||||
kwargs = {"output_dir": output_dir, "deepspeed": ds_config_dict}
|
||||
kwargs[dtype] = True
|
||||
trainer = get_regression_trainer(**kwargs)
|
||||
|
||||
@@ -632,7 +640,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
if stage == ZERO3:
|
||||
ds_config_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True
|
||||
|
||||
kwargs = dict(output_dir=output_dir, train_len=128, save_steps=5, learning_rate=0.1, deepspeed=ds_config_dict)
|
||||
kwargs = {
|
||||
"output_dir": output_dir,
|
||||
"train_len": 128,
|
||||
"save_steps": 5,
|
||||
"learning_rate": 0.1,
|
||||
"deepspeed": ds_config_dict,
|
||||
}
|
||||
kwargs[dtype] = True
|
||||
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
@@ -679,16 +693,16 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
|
||||
ds_config_dict = self.get_config_dict(stage)
|
||||
|
||||
kwargs = dict(
|
||||
output_dir=output_dir,
|
||||
train_len=4,
|
||||
per_device_train_batch_size=4,
|
||||
num_train_epochs=1,
|
||||
save_strategy="steps",
|
||||
save_steps=1,
|
||||
learning_rate=0.1,
|
||||
deepspeed=ds_config_dict,
|
||||
)
|
||||
kwargs = {
|
||||
"output_dir": output_dir,
|
||||
"train_len": 4,
|
||||
"per_device_train_batch_size": 4,
|
||||
"num_train_epochs": 1,
|
||||
"save_strategy": "steps",
|
||||
"save_steps": 1,
|
||||
"learning_rate": 0.1,
|
||||
"deepspeed": ds_config_dict,
|
||||
}
|
||||
kwargs[dtype] = True
|
||||
|
||||
with mockenv_context(**self.dist_env_1_gpu):
|
||||
@@ -710,7 +724,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
# test that we can switch from zero2 to zero3 in the same process for example
|
||||
# test is_zero, etc.
|
||||
output_dir = self.get_auto_remove_tmp_dir()
|
||||
kwargs = dict(output_dir=output_dir, train_len=8, fp16=True)
|
||||
kwargs = {"output_dir": output_dir, "train_len": 8, "fp16": True}
|
||||
|
||||
ds_config_zero3_dict = self.get_config_dict(ZERO3)
|
||||
ds_config_zero2_dict = self.get_config_dict(ZERO2)
|
||||
@@ -808,7 +822,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
|
||||
|
||||
def get_dataset():
|
||||
data_file = str(self.tests_dir / "fixtures/tests_samples/SQUAD/sample.json")
|
||||
data_files = dict(train=data_file, validation=data_file)
|
||||
data_files = {"train": data_file, "validation": data_file}
|
||||
raw_datasets = datasets.load_dataset("json", data_files=data_files, field="data")
|
||||
train_dataset = raw_datasets["train"].map(_add_eos_to_examples).map(_convert_to_features, batched=True)
|
||||
valid_dataset = deepcopy(train_dataset)
|
||||
@@ -903,7 +917,14 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
|
||||
|
||||
do_train = True
|
||||
do_eval = False
|
||||
kwargs = dict(stage=stage, dtype=dtype, eval_steps=1, distributed=True, do_train=do_train, do_eval=do_eval)
|
||||
kwargs = {
|
||||
"stage": stage,
|
||||
"dtype": dtype,
|
||||
"eval_steps": 1,
|
||||
"distributed": True,
|
||||
"do_train": do_train,
|
||||
"do_eval": do_eval,
|
||||
}
|
||||
|
||||
# 1. normal training
|
||||
output_dir = self.run_and_check(**kwargs)
|
||||
|
||||
@@ -166,8 +166,8 @@ def make_task_cmds():
|
||||
# but need a tiny model for each
|
||||
#
|
||||
# should have "{model_type.upper()}_TINY" corresponding vars defined, e.g., T5_TINY, etc.
|
||||
tasks2models = dict(
|
||||
trans=[
|
||||
tasks2models = {
|
||||
"trans": [
|
||||
"bart",
|
||||
"fsmt",
|
||||
"m2m_100",
|
||||
@@ -177,10 +177,10 @@ def make_task_cmds():
|
||||
"t5_v1",
|
||||
# "mt5", missing model files
|
||||
],
|
||||
sum=[
|
||||
"sum": [
|
||||
"pegasus",
|
||||
],
|
||||
clm=[
|
||||
"clm": [
|
||||
"big_bird",
|
||||
"bigbird_pegasus",
|
||||
"blenderbot",
|
||||
@@ -192,7 +192,7 @@ def make_task_cmds():
|
||||
"prophetnet",
|
||||
# "camembert", missing model files
|
||||
],
|
||||
mlm=[
|
||||
"mlm": [
|
||||
"albert",
|
||||
"deberta",
|
||||
"deberta-v2",
|
||||
@@ -203,7 +203,7 @@ def make_task_cmds():
|
||||
"layoutlm",
|
||||
# "reformer", # multiple issues with either mlm/qa/clas
|
||||
],
|
||||
qa=[
|
||||
"qa": [
|
||||
"led",
|
||||
"longformer",
|
||||
"mobilebert",
|
||||
@@ -213,7 +213,7 @@ def make_task_cmds():
|
||||
# "convbert", # missing tokenizer files
|
||||
# "layoutlmv2", missing model files
|
||||
],
|
||||
clas=[
|
||||
"clas": [
|
||||
"bert",
|
||||
"xlnet",
|
||||
# "hubert", # missing tokenizer files
|
||||
@@ -223,54 +223,54 @@ def make_task_cmds():
|
||||
# "openai-gpt", missing model files
|
||||
# "tapas", multiple issues
|
||||
],
|
||||
img_clas=[
|
||||
"img_clas": [
|
||||
"vit",
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
scripts_dir = f"{ROOT_DIRECTORY}/examples/pytorch"
|
||||
|
||||
tasks = dict(
|
||||
trans=f"""
|
||||
tasks = {
|
||||
"trans": f"""
|
||||
{scripts_dir}/translation/run_translation.py
|
||||
--train_file {data_dir_wmt}/train.json
|
||||
--source_lang en
|
||||
--target_lang ro
|
||||
""",
|
||||
sum=f"""
|
||||
"sum": f"""
|
||||
{scripts_dir}/summarization/run_summarization.py
|
||||
--train_file {data_dir_xsum}/sample.json
|
||||
--max_source_length 12
|
||||
--max_target_length 12
|
||||
--lang en
|
||||
""",
|
||||
clm=f"""
|
||||
"clm": f"""
|
||||
{scripts_dir}/language-modeling/run_clm.py
|
||||
--train_file {FIXTURE_DIRECTORY}/sample_text.txt
|
||||
--block_size 8
|
||||
""",
|
||||
mlm=f"""
|
||||
"mlm": f"""
|
||||
{scripts_dir}/language-modeling/run_mlm.py
|
||||
--train_file {FIXTURE_DIRECTORY}/sample_text.txt
|
||||
""",
|
||||
qa=f"""
|
||||
"qa": f"""
|
||||
{scripts_dir}/question-answering/run_qa.py
|
||||
--train_file {data_dir_samples}/SQUAD/sample.json
|
||||
""",
|
||||
clas=f"""
|
||||
"clas": f"""
|
||||
{scripts_dir}/text-classification/run_glue.py
|
||||
--train_file {data_dir_samples}/MRPC/train.csv
|
||||
--max_seq_length 12
|
||||
--task_name MRPC
|
||||
""",
|
||||
img_clas=f"""
|
||||
"img_clas": f"""
|
||||
{scripts_dir}/image-classification/run_image_classification.py
|
||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||
--remove_unused_columns False
|
||||
--max_steps 10
|
||||
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
|
||||
""",
|
||||
)
|
||||
}
|
||||
|
||||
launcher = get_launcher(distributed=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user