Make tiny model creation + pipeline testing more robust (#22500)
* Final Tiny things --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -18,6 +18,7 @@ import collections.abc
|
||||
import copy
|
||||
import inspect
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
@@ -679,12 +680,22 @@ def convert_processors(processors, tiny_config, output_folder, result):
|
||||
|
||||
if hasattr(tiny_config, "max_position_embeddings") and tiny_config.max_position_embeddings > 0:
|
||||
if fast_tokenizer is not None:
|
||||
if fast_tokenizer.__class__.__name__ in ["RobertaTokenizerFast", "XLMRobertaTokenizerFast"]:
|
||||
if fast_tokenizer.__class__.__name__ in [
|
||||
"RobertaTokenizerFast",
|
||||
"XLMRobertaTokenizerFast",
|
||||
"LongformerTokenizerFast",
|
||||
"MPNetTokenizerFast",
|
||||
]:
|
||||
fast_tokenizer.model_max_length = tiny_config.max_position_embeddings - 2
|
||||
else:
|
||||
fast_tokenizer.model_max_length = tiny_config.max_position_embeddings
|
||||
if slow_tokenizer is not None:
|
||||
if slow_tokenizer.__class__.__name__ in ["RobertaTokenizer", "XLMRobertaTokenizer"]:
|
||||
if slow_tokenizer.__class__.__name__ in [
|
||||
"RobertaTokenizer",
|
||||
"XLMRobertaTokenizer",
|
||||
"LongformerTokenizer",
|
||||
"MPNetTokenizer",
|
||||
]:
|
||||
slow_tokenizer.model_max_length = tiny_config.max_position_embeddings - 2
|
||||
else:
|
||||
slow_tokenizer.model_max_length = tiny_config.max_position_embeddings
|
||||
@@ -1047,6 +1058,10 @@ def build(config_class, models_to_create, output_dir):
|
||||
The directory to save all the checkpoints. Each model architecture will be saved in a subdirectory under
|
||||
it. Models in different frameworks with the same architecture will be saved in the same subdirectory.
|
||||
"""
|
||||
if data["training_ds"] is None or data["testing_ds"] is None:
|
||||
ds = load_dataset("wikitext", "wikitext-2-raw-v1")
|
||||
data["training_ds"] = ds["train"]
|
||||
data["testing_ds"] = ds["test"]
|
||||
|
||||
if config_class.model_type in [
|
||||
"encoder-decoder",
|
||||
@@ -1323,6 +1338,7 @@ def create_tiny_models(
|
||||
upload,
|
||||
organization,
|
||||
token,
|
||||
num_workers=1,
|
||||
):
|
||||
clone_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
|
||||
if os.getcwd() != clone_path:
|
||||
@@ -1343,10 +1359,6 @@ def create_tiny_models(
|
||||
pytorch_arch_mappings = [getattr(transformers_module, x) for x in _pytorch_arch_mappings]
|
||||
tensorflow_arch_mappings = [getattr(transformers_module, x) for x in _tensorflow_arch_mappings]
|
||||
|
||||
ds = load_dataset("wikitext", "wikitext-2-raw-v1")
|
||||
data["training_ds"] = ds["train"]
|
||||
data["testing_ds"] = ds["test"]
|
||||
|
||||
config_classes = CONFIG_MAPPING.values()
|
||||
if not all:
|
||||
config_classes = [CONFIG_MAPPING[model_type] for model_type in model_types]
|
||||
@@ -1363,11 +1375,19 @@ def create_tiny_models(
|
||||
to_create[c] = {"processor": processors, "pytorch": models, "tensorflow": tf_models}
|
||||
|
||||
results = {}
|
||||
for c, models_to_create in list(to_create.items()):
|
||||
print(f"Create models for {c.__name__} ...")
|
||||
result = build(c, models_to_create, output_dir=os.path.join(output_path, c.model_type))
|
||||
results[c.__name__] = result
|
||||
print("=" * 40)
|
||||
if num_workers <= 1:
|
||||
for c, models_to_create in list(to_create.items()):
|
||||
print(f"Create models for {c.__name__} ...")
|
||||
result = build(c, models_to_create, output_dir=os.path.join(output_path, c.model_type))
|
||||
results[c.__name__] = result
|
||||
print("=" * 40)
|
||||
else:
|
||||
all_build_args = []
|
||||
for c, models_to_create in list(to_create.items()):
|
||||
all_build_args.append((c, models_to_create, os.path.join(output_path, c.model_type)))
|
||||
with multiprocessing.Pool() as pool:
|
||||
results = pool.starmap(build, all_build_args)
|
||||
results = {buid_args[0].__name__: result for buid_args, result in zip(all_build_args, results)}
|
||||
|
||||
if upload:
|
||||
if organization is None:
|
||||
@@ -1426,9 +1446,8 @@ def create_tiny_models(
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ds = load_dataset("wikitext", "wikitext-2-raw-v1")
|
||||
training_ds = ds["train"]
|
||||
testing_ds = ds["test"]
|
||||
# This has to be `spawn` to avoid hanging forever!
|
||||
multiprocessing.set_start_method("spawn")
|
||||
|
||||
def list_str(values):
|
||||
return values.split(",")
|
||||
@@ -1465,6 +1484,7 @@ if __name__ == "__main__":
|
||||
"--token", default=None, type=str, help="A valid authentication token for HuggingFace Hub with write access."
|
||||
)
|
||||
parser.add_argument("output_path", type=Path, help="Path indicating where to store generated model.")
|
||||
parser.add_argument("--num_workers", default=1, type=int, help="The number of workers to run.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -1480,4 +1500,5 @@ if __name__ == "__main__":
|
||||
args.upload,
|
||||
args.organization,
|
||||
args.token,
|
||||
args.num_workers,
|
||||
)
|
||||
|
||||
@@ -21,8 +21,10 @@ version of `tests/utils/tiny_model_summary.json`. That updated file should be me
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
import copy
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import time
|
||||
|
||||
@@ -197,6 +199,13 @@ def update_tiny_model_summary_file(report_path):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--num_workers", default=1, type=int, help="The number of workers to run.")
|
||||
args = parser.parse_args()
|
||||
|
||||
# This has to be `spawn` to avoid hanging forever!
|
||||
multiprocessing.set_start_method("spawn")
|
||||
|
||||
output_path = "tiny_models"
|
||||
all = True
|
||||
model_types = None
|
||||
@@ -214,6 +223,7 @@ if __name__ == "__main__":
|
||||
upload,
|
||||
organization,
|
||||
token=os.environ.get("TOKEN", None),
|
||||
num_workers=args.num_workers,
|
||||
)
|
||||
|
||||
update_tiny_model_summary_file(report_path=os.path.join(output_path, "reports"))
|
||||
|
||||
Reference in New Issue
Block a user