Apply ruff flake8-comprehensions (#21694)

2023-02-22 03:14:54 -05:00
parent df06fb1f0b
commit 5e8c8eb5ba
230 changed files with 971 additions and 955 deletions
--- a/tests/deepspeed/test_deepspeed.py
+++ b/tests/deepspeed/test_deepspeed.py
@@ -157,9 +157,13 @@ class CoreIntegrationDeepSpeed(TestCasePlus, TrainerIntegrationCommon):
        super().setUp()

        master_port = get_master_port(real_launcher=False)
-        self.dist_env_1_gpu = dict(
-            MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1"
-        )
+        self.dist_env_1_gpu = {
+            "MASTER_ADDR": "localhost",
+            "MASTER_PORT": master_port,
+            "RANK": "0",
+            "LOCAL_RANK": "0",
+            "WORLD_SIZE": "1",
+        }

    def tearDown(self):
        super().tearDown()
@@ -212,14 +216,18 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
        self.batch_size = args.train_batch_size

        master_port = get_master_port(real_launcher=False)
-        self.dist_env_1_gpu = dict(
-            MASTER_ADDR="localhost", MASTER_PORT=master_port, RANK="0", LOCAL_RANK="0", WORLD_SIZE="1"
-        )
+        self.dist_env_1_gpu = {
+            "MASTER_ADDR": "localhost",
+            "MASTER_PORT": master_port,
+            "RANK": "0",
+            "LOCAL_RANK": "0",
+            "WORLD_SIZE": "1",
+        }

-        self.ds_config_file = dict(
-            zero2=f"{self.test_file_dir_str}/ds_config_zero2.json",
-            zero3=f"{self.test_file_dir_str}/ds_config_zero3.json",
-        )
+        self.ds_config_file = {
+            "zero2": f"{self.test_file_dir_str}/ds_config_zero2.json",
+            "zero3": f"{self.test_file_dir_str}/ds_config_zero3.json",
+        }

        # use self.get_config_dict(stage) to use these to ensure the original is not modified
        with io.open(self.ds_config_file[ZERO2], "r", encoding="utf-8") as f:
@@ -230,10 +238,10 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
            # It's in the file as a demo for users since we want everything to work out of the box even if slower.
            config_zero3["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = False

-        self.ds_config_dict = dict(
-            zero2=config_zero2,
-            zero3=config_zero3,
-        )
+        self.ds_config_dict = {
+            "zero2": config_zero2,
+            "zero3": config_zero3,
+        }

    def tearDown(self):
        super().tearDown()
@@ -370,7 +378,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
            # this actually doesn't have to be on NVMe, any storage will do since this test only
            # runs a simple check that we can use some directory as if it were NVMe
            nvme_path = self.get_auto_remove_tmp_dir()
-            nvme_config = dict(device="nvme", nvme_path=nvme_path)
+            nvme_config = {"device": "nvme", "nvme_path": nvme_path}
            ds_config_zero3_dict = self.get_config_dict(ZERO3)
            ds_config_zero3_dict["zero_optimization"]["offload_optimizer"] = nvme_config
            ds_config_zero3_dict["zero_optimization"]["offload_param"] = nvme_config
@@ -415,7 +423,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        # force cpu offload
        ds_config_dict["zero_optimization"]["offload_optimizer"]["device"] = "cpu"
        with mockenv_context(**self.dist_env_1_gpu):
-            kwargs = dict(local_rank=0, deepspeed=ds_config_dict)
+            kwargs = {"local_rank": 0, "deepspeed": ds_config_dict}
            kwargs[dtype] = True
            trainer = get_regression_trainer(**kwargs)
            with CaptureLogger(deepspeed_logger) as cl:
@@ -431,7 +439,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        # it's run not as a first test as `sys.stdout` will no longer be the same. So we either have
        # to reset `deepspeed_logger.handlers[0].setStream(sys.stdout)` or directly capture from the deepspeed_logger.
        with mockenv_context(**self.dist_env_1_gpu):
-            kwargs = dict(local_rank=0, deepspeed=self.get_config_dict(stage))
+            kwargs = {"local_rank": 0, "deepspeed": self.get_config_dict(stage)}
            kwargs[dtype] = True
            trainer = get_regression_trainer(**kwargs)

@@ -449,15 +457,15 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        # `self.lr_scheduler.get_last_lr()` and originally it'd fail on the very first step.
        with mockenv_context(**self.dist_env_1_gpu):
            a = b = 0.0
-            kwargs = dict(
-                a=a,
-                b=b,
-                local_rank=0,
-                train_len=8,
-                deepspeed=self.get_config_dict(stage),
-                per_device_train_batch_size=8,
-                logging_steps=1,
-            )
+            kwargs = {
+                "a": a,
+                "b": b,
+                "local_rank": 0,
+                "train_len": 8,
+                "deepspeed": self.get_config_dict(stage),
+                "per_device_train_batch_size": 8,
+                "logging_steps": 1,
+            }
            kwargs[dtype] = True
            trainer = get_regression_trainer(**kwargs)

@@ -494,13 +502,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        train_len = 64
        a = b = 0.0

-        kwargs = dict(
-            a=a,
-            b=b,
-            local_rank=0,
-            train_len=train_len,
-            deepspeed=self.get_config_dict(stage),
-        )
+        kwargs = {
+            "a": a,
+            "b": b,
+            "local_rank": 0,
+            "train_len": train_len,
+            "deepspeed": self.get_config_dict(stage),
+        }
        kwargs[dtype] = True

        with mockenv_context(**self.dist_env_1_gpu):
@@ -583,11 +591,11 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T

        # save checkpoints
        with mockenv_context(**self.dist_env_1_gpu):
-            kwargs = dict(
-                output_dir=output_dir,
-                save_steps=freq,
-                deepspeed=ds_config_dict,
-            )
+            kwargs = {
+                "output_dir": output_dir,
+                "save_steps": freq,
+                "deepspeed": ds_config_dict,
+            }
            kwargs[dtype] = True
            trainer = get_regression_trainer(**kwargs)
            trainer.train()
@@ -600,7 +608,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        with mockenv_context(**self.dist_env_1_gpu):
            ds_config_dict = self.get_config_dict(stage)
            output_dir = self.get_auto_remove_tmp_dir()
-            kwargs = dict(output_dir=output_dir, deepspeed=ds_config_dict)
+            kwargs = {"output_dir": output_dir, "deepspeed": ds_config_dict}
            kwargs[dtype] = True
            trainer = get_regression_trainer(**kwargs)

@@ -632,7 +640,13 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        if stage == ZERO3:
            ds_config_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True

-        kwargs = dict(output_dir=output_dir, train_len=128, save_steps=5, learning_rate=0.1, deepspeed=ds_config_dict)
+        kwargs = {
+            "output_dir": output_dir,
+            "train_len": 128,
+            "save_steps": 5,
+            "learning_rate": 0.1,
+            "deepspeed": ds_config_dict,
+        }
        kwargs[dtype] = True

        with mockenv_context(**self.dist_env_1_gpu):
@@ -679,16 +693,16 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T

        ds_config_dict = self.get_config_dict(stage)

-        kwargs = dict(
-            output_dir=output_dir,
-            train_len=4,
-            per_device_train_batch_size=4,
-            num_train_epochs=1,
-            save_strategy="steps",
-            save_steps=1,
-            learning_rate=0.1,
-            deepspeed=ds_config_dict,
-        )
+        kwargs = {
+            "output_dir": output_dir,
+            "train_len": 4,
+            "per_device_train_batch_size": 4,
+            "num_train_epochs": 1,
+            "save_strategy": "steps",
+            "save_steps": 1,
+            "learning_rate": 0.1,
+            "deepspeed": ds_config_dict,
+        }
        kwargs[dtype] = True

        with mockenv_context(**self.dist_env_1_gpu):
@@ -710,7 +724,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
        # test that we can switch from zero2 to zero3 in the same process for example
        # test is_zero, etc.
        output_dir = self.get_auto_remove_tmp_dir()
-        kwargs = dict(output_dir=output_dir, train_len=8, fp16=True)
+        kwargs = {"output_dir": output_dir, "train_len": 8, "fp16": True}

        ds_config_zero3_dict = self.get_config_dict(ZERO3)
        ds_config_zero2_dict = self.get_config_dict(ZERO2)
@@ -808,7 +822,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T

            def get_dataset():
                data_file = str(self.tests_dir / "fixtures/tests_samples/SQUAD/sample.json")
-                data_files = dict(train=data_file, validation=data_file)
+                data_files = {"train": data_file, "validation": data_file}
                raw_datasets = datasets.load_dataset("json", data_files=data_files, field="data")
                train_dataset = raw_datasets["train"].map(_add_eos_to_examples).map(_convert_to_features, batched=True)
                valid_dataset = deepcopy(train_dataset)
@@ -903,7 +917,14 @@ class TestDeepSpeedWithLauncher(TestCasePlus):

        do_train = True
        do_eval = False
-        kwargs = dict(stage=stage, dtype=dtype, eval_steps=1, distributed=True, do_train=do_train, do_eval=do_eval)
+        kwargs = {
+            "stage": stage,
+            "dtype": dtype,
+            "eval_steps": 1,
+            "distributed": True,
+            "do_train": do_train,
+            "do_eval": do_eval,
+        }

        # 1. normal training
        output_dir = self.run_and_check(**kwargs)
--- a/tests/deepspeed/test_model_zoo.py
+++ b/tests/deepspeed/test_model_zoo.py
@@ -166,8 +166,8 @@ def make_task_cmds():
    # but need a tiny model for each
    #
    # should have "{model_type.upper()}_TINY" corresponding vars defined, e.g., T5_TINY, etc.
-    tasks2models = dict(
-        trans=[
+    tasks2models = {
+        "trans": [
            "bart",
            "fsmt",
            "m2m_100",
@@ -177,10 +177,10 @@ def make_task_cmds():
            "t5_v1",
            # "mt5", missing model files
        ],
-        sum=[
+        "sum": [
            "pegasus",
        ],
-        clm=[
+        "clm": [
            "big_bird",
            "bigbird_pegasus",
            "blenderbot",
@@ -192,7 +192,7 @@ def make_task_cmds():
            "prophetnet",
            # "camembert", missing model files
        ],
-        mlm=[
+        "mlm": [
            "albert",
            "deberta",
            "deberta-v2",
@@ -203,7 +203,7 @@ def make_task_cmds():
            "layoutlm",
            # "reformer", # multiple issues with either mlm/qa/clas
        ],
-        qa=[
+        "qa": [
            "led",
            "longformer",
            "mobilebert",
@@ -213,7 +213,7 @@ def make_task_cmds():
            # "convbert", # missing tokenizer files
            # "layoutlmv2", missing model files
        ],
-        clas=[
+        "clas": [
            "bert",
            "xlnet",
            # "hubert", # missing tokenizer files
@@ -223,54 +223,54 @@ def make_task_cmds():
            # "openai-gpt", missing model files
            # "tapas", multiple issues
        ],
-        img_clas=[
+        "img_clas": [
            "vit",
        ],
-    )
+    }

    scripts_dir = f"{ROOT_DIRECTORY}/examples/pytorch"

-    tasks = dict(
-        trans=f"""
+    tasks = {
+        "trans": f"""
        {scripts_dir}/translation/run_translation.py
        --train_file {data_dir_wmt}/train.json
        --source_lang en
        --target_lang ro
        """,
-        sum=f"""
+        "sum": f"""
        {scripts_dir}/summarization/run_summarization.py
        --train_file {data_dir_xsum}/sample.json
        --max_source_length 12
        --max_target_length 12
        --lang en
        """,
-        clm=f"""
+        "clm": f"""
        {scripts_dir}/language-modeling/run_clm.py
        --train_file {FIXTURE_DIRECTORY}/sample_text.txt
        --block_size 8
        """,
-        mlm=f"""
+        "mlm": f"""
        {scripts_dir}/language-modeling/run_mlm.py
        --train_file {FIXTURE_DIRECTORY}/sample_text.txt
        """,
-        qa=f"""
+        "qa": f"""
        {scripts_dir}/question-answering/run_qa.py
        --train_file {data_dir_samples}/SQUAD/sample.json
        """,
-        clas=f"""
+        "clas": f"""
        {scripts_dir}/text-classification/run_glue.py
        --train_file {data_dir_samples}/MRPC/train.csv
        --max_seq_length 12
        --task_name MRPC
        """,
-        img_clas=f"""
+        "img_clas": f"""
        {scripts_dir}/image-classification/run_image_classification.py
            --dataset_name hf-internal-testing/cats_vs_dogs_sample
            --remove_unused_columns False
            --max_steps 10
            --image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
        """,
-    )
+    }

    launcher = get_launcher(distributed=True)