Auto modelcard (#11599)
* Autogenerate model cards from the Trainer * ModelCard deprecated * Fix test * Style * Apply suggestions from code review Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Address review comments * Quality * With all metadata * Metadata * Post-merge conflict mess * Data args and all examples * Default license and languages when possible Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
@@ -447,7 +447,16 @@ def main():
|
|||||||
trainer.save_metrics("eval", metrics)
|
trainer.save_metrics("eval", metrics)
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "text-generation"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -476,7 +476,16 @@ def main():
|
|||||||
trainer.save_metrics("eval", metrics)
|
trainer.save_metrics("eval", metrics)
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "fill-mask"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -452,7 +452,16 @@ def main():
|
|||||||
trainer.save_metrics("eval", metrics)
|
trainer.save_metrics("eval", metrics)
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "language-modeling"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -428,7 +428,14 @@ def main():
|
|||||||
trainer.save_metrics("eval", metrics)
|
trainer.save_metrics("eval", metrics)
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
trainer.push_to_hub(
|
||||||
|
finetuned_from=model_args.model_name_or_path,
|
||||||
|
tags="multiple-choice",
|
||||||
|
dataset_tags="swag",
|
||||||
|
dataset_args="regular",
|
||||||
|
dataset="SWAG",
|
||||||
|
language="en",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -601,7 +601,16 @@ def main():
|
|||||||
trainer.save_metrics("predict", metrics)
|
trainer.save_metrics("predict", metrics)
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "question-answering"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -640,7 +640,16 @@ def main():
|
|||||||
trainer.save_metrics("predict", metrics)
|
trainer.save_metrics("predict", metrics)
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "question-answering"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -583,7 +583,16 @@ def main():
|
|||||||
writer.write("\n".join(predictions))
|
writer.write("\n".join(predictions))
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "summarization"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|||||||
@@ -516,7 +516,14 @@ def main():
|
|||||||
writer.write(f"{index}\t{item}\n")
|
writer.write(f"{index}\t{item}\n")
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "text-classification"}
|
||||||
|
if data_args.task_name is not None:
|
||||||
|
kwargs["language"] = "en"
|
||||||
|
kwargs["dataset_tags"] = "glue"
|
||||||
|
kwargs["dataset_args"] = data_args.task_name
|
||||||
|
kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}"
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -491,7 +491,16 @@ def main():
|
|||||||
writer.write(" ".join(prediction) + "\n")
|
writer.write(" ".join(prediction) + "\n")
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "token-classification"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _mp_fn(index):
|
def _mp_fn(index):
|
||||||
|
|||||||
@@ -575,7 +575,20 @@ def main():
|
|||||||
writer.write("\n".join(predictions))
|
writer.write("\n".join(predictions))
|
||||||
|
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
trainer.push_to_hub()
|
kwargs = {"finetuned_from": model_args.model_name_or_path, "tags": "translation"}
|
||||||
|
if data_args.dataset_name is not None:
|
||||||
|
kwargs["dataset_tags"] = data_args.dataset_name
|
||||||
|
if data_args.dataset_config_name is not None:
|
||||||
|
kwargs["dataset_args"] = data_args.dataset_config_name
|
||||||
|
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
||||||
|
else:
|
||||||
|
kwargs["dataset"] = data_args.dataset_name
|
||||||
|
|
||||||
|
languages = [l for l in [data_args.source_lang, data_args.target_lang] if l is not None]
|
||||||
|
if len(languages) > 0:
|
||||||
|
kwargs["language"] = languages
|
||||||
|
|
||||||
|
trainer.push_to_hub(**kwargs)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,15 @@
|
|||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import warnings
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from huggingface_hub import HfApi
|
||||||
|
|
||||||
|
from . import __version__
|
||||||
from .file_utils import (
|
from .file_utils import (
|
||||||
CONFIG_NAME,
|
CONFIG_NAME,
|
||||||
MODEL_CARD_NAME,
|
MODEL_CARD_NAME,
|
||||||
@@ -26,9 +34,14 @@ from .file_utils import (
|
|||||||
WEIGHTS_NAME,
|
WEIGHTS_NAME,
|
||||||
cached_path,
|
cached_path,
|
||||||
hf_bucket_url,
|
hf_bucket_url,
|
||||||
|
is_datasets_available,
|
||||||
|
is_offline_mode,
|
||||||
is_remote_url,
|
is_remote_url,
|
||||||
|
is_tokenizers_available,
|
||||||
|
is_torch_available,
|
||||||
)
|
)
|
||||||
from .models.auto.configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
|
from .models.auto.configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
|
||||||
|
from .training_args import ParallelMode
|
||||||
from .utils import logging
|
from .utils import logging
|
||||||
|
|
||||||
|
|
||||||
@@ -49,6 +62,9 @@ class ModelCard:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
|
warnings.warn(
|
||||||
|
"The class `ModelCard` is deprecated and will be removed in version 5 of Transformers", FutureWarning
|
||||||
|
)
|
||||||
# Recommended attributes from https://arxiv.org/abs/1810.03993 (see papers)
|
# Recommended attributes from https://arxiv.org/abs/1810.03993 (see papers)
|
||||||
self.model_details = kwargs.pop("model_details", {})
|
self.model_details = kwargs.pop("model_details", {})
|
||||||
self.intended_use = kwargs.pop("intended_use", {})
|
self.intended_use = kwargs.pop("intended_use", {})
|
||||||
@@ -218,3 +234,403 @@ class ModelCard:
|
|||||||
"""Save this instance to a json file."""
|
"""Save this instance to a json file."""
|
||||||
with open(json_file_path, "w", encoding="utf-8") as writer:
|
with open(json_file_path, "w", encoding="utf-8") as writer:
|
||||||
writer.write(self.to_json_string())
|
writer.write(self.to_json_string())
|
||||||
|
|
||||||
|
|
||||||
|
AUTOGENERATED_COMMENT = """
|
||||||
|
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||||
|
should probably proofread and complete it, then remove this comment. -->
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
TASK_TAG_TO_NAME_MAPPING = {
|
||||||
|
"fill-mask": "Masked Language Modeling",
|
||||||
|
"multiple-choice": "Multiple Choice",
|
||||||
|
"question-answering": "Question Answering",
|
||||||
|
"summarization": "Summarization",
|
||||||
|
"text-classification": "Text Classification",
|
||||||
|
"text-generation": "Causal Language Modeling",
|
||||||
|
"text2text-generation": "Sequence-to-sequence Language Modeling",
|
||||||
|
"token-classification": "Token Classification",
|
||||||
|
"translation": "Translation",
|
||||||
|
"zero-shot-classification": "Zero Shot Classification",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
METRIC_TAGS = [
|
||||||
|
"accuracy",
|
||||||
|
"bleu",
|
||||||
|
"f1",
|
||||||
|
"matthews_correlation",
|
||||||
|
"pearsonr",
|
||||||
|
"precision",
|
||||||
|
"recall",
|
||||||
|
"rouge",
|
||||||
|
"sacrebleu",
|
||||||
|
"spearmanr",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _listify(obj):
|
||||||
|
if obj is None:
|
||||||
|
return []
|
||||||
|
elif isinstance(obj, str):
|
||||||
|
return [obj]
|
||||||
|
else:
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
def _list_possibilities(name, tags):
|
||||||
|
if tags is None:
|
||||||
|
return ""
|
||||||
|
if isinstance(tags, str):
|
||||||
|
tags = [tags]
|
||||||
|
if len(tags) == 0:
|
||||||
|
return ""
|
||||||
|
name_tags = [f"- {tag}" for tag in tags]
|
||||||
|
return f"{name}:\n" + "\n".join(name_tags) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def infer_metric_tags_from_eval_results(eval_results):
|
||||||
|
if eval_results is None:
|
||||||
|
return {}
|
||||||
|
result = {}
|
||||||
|
for key in eval_results.keys():
|
||||||
|
if key.lower().replace(" ", "_") in METRIC_TAGS:
|
||||||
|
result[key.lower().replace(" ", "_")] = key
|
||||||
|
elif key.lower() == "rouge1":
|
||||||
|
result["rouge"] = key
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TrainingSummary:
|
||||||
|
model_name: str
|
||||||
|
language: Optional[Union[str, List[str]]] = None
|
||||||
|
license: Optional[str] = None
|
||||||
|
tags: Optional[Union[str, List[str]]] = None
|
||||||
|
finetuned_from: Optional[str] = None
|
||||||
|
dataset: Optional[Union[str, List[str]]] = None
|
||||||
|
dataset_tags: Optional[Union[str, List[str]]] = None
|
||||||
|
dataset_args: Optional[Union[str, List[str]]] = None
|
||||||
|
eval_results: Optional[Dict[str, float]] = None
|
||||||
|
eval_lines: Optional[List[str]] = None
|
||||||
|
hyperparameters: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
# Infer default license from the checkpoint used, if possible.
|
||||||
|
if self.license is None and not is_offline_mode() and self.finetuned_from is not None:
|
||||||
|
try:
|
||||||
|
model_info = HfApi().model_info(self.finetuned_from)
|
||||||
|
for tag in model_info.tags:
|
||||||
|
if tag.startswith("license:"):
|
||||||
|
self.license = tag[8:]
|
||||||
|
except requests.exceptions.HTTPError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def create_model_index(self, metric_mapping):
|
||||||
|
model_index = f"model-index:\n- name: {self.model_name}\n"
|
||||||
|
|
||||||
|
# Dataset mapping tag -> name
|
||||||
|
dataset_names = _listify(self.dataset)
|
||||||
|
dataset_tags = _listify(self.dataset_tags)
|
||||||
|
dataset_args = _listify(self.dataset_args)
|
||||||
|
if len(dataset_args) < len(dataset_tags):
|
||||||
|
dataset_args = dataset_args + [None] * (len(dataset_tags) - len(dataset_args))
|
||||||
|
dataset_mapping = {tag: name for tag, name in zip(dataset_tags, dataset_names)}
|
||||||
|
dataset_arg_mapping = {tag: arg for tag, arg in zip(dataset_tags, dataset_args)}
|
||||||
|
|
||||||
|
task_mapping = {
|
||||||
|
tag: TASK_TAG_TO_NAME_MAPPING[tag] for tag in _listify(self.tags) if tag in TASK_TAG_TO_NAME_MAPPING
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(task_mapping) == 0 and len(dataset_mapping) == 0:
|
||||||
|
return model_index
|
||||||
|
if len(task_mapping) == 0:
|
||||||
|
task_mapping = {None: None}
|
||||||
|
if len(dataset_mapping) == 0:
|
||||||
|
dataset_mapping = {None: None}
|
||||||
|
all_possibilities = [(task_tag, ds_tag) for task_tag in task_mapping for ds_tag in dataset_mapping]
|
||||||
|
|
||||||
|
model_index += " results:\n"
|
||||||
|
for task_tag, ds_tag in all_possibilities:
|
||||||
|
result = ""
|
||||||
|
if task_tag is not None:
|
||||||
|
result += f" - task:\n name: {task_mapping[task_tag]}\n type: {task_tag}\n"
|
||||||
|
if ds_tag is not None:
|
||||||
|
prefix = " - " if task_tag is None else " "
|
||||||
|
result += f"{prefix}dataset:\n name: {dataset_mapping[ds_tag]}\n type: {ds_tag}\n"
|
||||||
|
if dataset_arg_mapping[ds_tag] is not None:
|
||||||
|
result += f" args: {dataset_arg_mapping[ds_tag]}\n"
|
||||||
|
if len(metric_mapping) > 0:
|
||||||
|
result += " metrics:\n"
|
||||||
|
for metric_tag, metric_name in metric_mapping.items():
|
||||||
|
value = self.eval_results[metric_name]
|
||||||
|
result += f" - name: {metric_name}\n type: {metric_tag}\n value: {value}\n"
|
||||||
|
|
||||||
|
model_index += result
|
||||||
|
|
||||||
|
return model_index
|
||||||
|
|
||||||
|
def to_model_card(self):
|
||||||
|
model_card = ""
|
||||||
|
|
||||||
|
metric_mapping = infer_metric_tags_from_eval_results(self.eval_results)
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
metadata = ""
|
||||||
|
metadata += _list_possibilities("language", self.language)
|
||||||
|
if self.license is not None:
|
||||||
|
metadata += f"license: {self.license}\n"
|
||||||
|
metadata += _list_possibilities("tags", self.tags)
|
||||||
|
metadata += _list_possibilities("datasets", self.dataset_tags)
|
||||||
|
metadata += _list_possibilities("metrics", list(metric_mapping.keys()))
|
||||||
|
metadata += "\n" + self.create_model_index(metric_mapping)
|
||||||
|
if len(metadata) > 0:
|
||||||
|
model_card = f"---\n{metadata}---\n"
|
||||||
|
|
||||||
|
# Now the model card for realsies.
|
||||||
|
model_card += AUTOGENERATED_COMMENT
|
||||||
|
|
||||||
|
model_card += f"\n# {self.model_name}\n\n"
|
||||||
|
|
||||||
|
if self.finetuned_from is None:
|
||||||
|
model_card += "This model was trained from scratch on "
|
||||||
|
else:
|
||||||
|
model_card += f"This model is a fine-tuned version of [{self.finetuned_from}](https://huggingface.co/{self.finetuned_from}) on "
|
||||||
|
|
||||||
|
if self.dataset is None:
|
||||||
|
model_card += "an unkown dataset."
|
||||||
|
else:
|
||||||
|
if isinstance(self.dataset, str):
|
||||||
|
model_card += f"the {self.dataset} dataset."
|
||||||
|
else:
|
||||||
|
model_card += (
|
||||||
|
", ".join([f"the {ds}" for ds in self.dataset[:-1]]) + f" and the {self.dataset[-1]} datasets."
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.eval_results is not None:
|
||||||
|
model_card += "\nIt achieves the following results on the evaluation set:\n"
|
||||||
|
model_card += "\n".join([f"- {name}: {_maybe_round(value)}" for name, value in self.eval_results.items()])
|
||||||
|
model_card += "\n"
|
||||||
|
|
||||||
|
model_card += "\n## Model description\n\nMore information needed\n"
|
||||||
|
model_card += "\n## Intended uses & limitations\n\nMore information needed\n"
|
||||||
|
model_card += "\n## Training and evaluation data\n\nMore information needed\n"
|
||||||
|
|
||||||
|
model_card += "\n## Training procedure\n"
|
||||||
|
model_card += "\n### Training hyperparameters\n"
|
||||||
|
if self.hyperparameters is not None:
|
||||||
|
model_card += "\nThe following hyperparameters were used during training:\n"
|
||||||
|
model_card += "\n".join([f"- {name}: {value}" for name, value in self.hyperparameters.items()])
|
||||||
|
model_card += "\n"
|
||||||
|
else:
|
||||||
|
model_card += "\nMore information needed\n"
|
||||||
|
|
||||||
|
if self.eval_lines is not None:
|
||||||
|
model_card += "\n### Training results\n\n"
|
||||||
|
model_card += make_markdown_table(self.eval_lines)
|
||||||
|
model_card += "\n"
|
||||||
|
|
||||||
|
model_card += "\n### Framework versions\n\n"
|
||||||
|
model_card += f"- Transformers {__version__}\n"
|
||||||
|
if is_torch_available():
|
||||||
|
import torch
|
||||||
|
|
||||||
|
model_card += f"- Pytorch {torch.__version__}\n"
|
||||||
|
if is_datasets_available():
|
||||||
|
import datasets
|
||||||
|
|
||||||
|
model_card += f"- Datasets {datasets.__version__}\n"
|
||||||
|
if is_tokenizers_available():
|
||||||
|
import tokenizers
|
||||||
|
|
||||||
|
model_card += f"- Tokenizers {tokenizers.__version__}\n"
|
||||||
|
|
||||||
|
return model_card
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_trainer(
|
||||||
|
cls,
|
||||||
|
trainer,
|
||||||
|
language=None,
|
||||||
|
license=None,
|
||||||
|
tags=None,
|
||||||
|
model_name=None,
|
||||||
|
finetuned_from=None,
|
||||||
|
dataset_tags=None,
|
||||||
|
dataset=None,
|
||||||
|
dataset_args=None,
|
||||||
|
):
|
||||||
|
# TODO (Sylvain) Add a default for `pipeline-tag` inferred from the model.
|
||||||
|
if model_name is None:
|
||||||
|
model_name = Path(trainer.args.output_dir).name
|
||||||
|
|
||||||
|
_, eval_lines, eval_results = parse_log_history(trainer.state.log_history)
|
||||||
|
hyperparameters = extract_hyperparameters_from_trainer(trainer)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
language=language,
|
||||||
|
license=license,
|
||||||
|
tags=tags,
|
||||||
|
model_name=model_name,
|
||||||
|
finetuned_from=finetuned_from,
|
||||||
|
dataset_tags=dataset_tags,
|
||||||
|
dataset=dataset,
|
||||||
|
dataset_args=dataset_args,
|
||||||
|
eval_results=eval_results,
|
||||||
|
eval_lines=eval_lines,
|
||||||
|
hyperparameters=hyperparameters,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_log_history(log_history):
|
||||||
|
"""
|
||||||
|
Parse the `log_history` of a Trainer to get the intermediate and final evaluation results.
|
||||||
|
"""
|
||||||
|
idx = 0
|
||||||
|
while idx < len(log_history) and "train_runtime" not in log_history[idx]:
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
# If there are no training logs
|
||||||
|
if idx == len(log_history):
|
||||||
|
idx -= 1
|
||||||
|
while idx >= 0 and "eval_loss" not in log_history[idx]:
|
||||||
|
idx -= 1
|
||||||
|
|
||||||
|
if idx > 0:
|
||||||
|
return None, None, log_history[idx]
|
||||||
|
else:
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
# From now one we can assume we have training logs:
|
||||||
|
train_log = log_history[idx]
|
||||||
|
lines = []
|
||||||
|
training_loss = "No log"
|
||||||
|
for i in range(idx):
|
||||||
|
if "loss" in log_history[i]:
|
||||||
|
training_loss = log_history[i]["loss"]
|
||||||
|
if "eval_loss" in log_history[i]:
|
||||||
|
metrics = log_history[i].copy()
|
||||||
|
_ = metrics.pop("total_flos", None)
|
||||||
|
epoch = metrics.pop("epoch", None)
|
||||||
|
step = metrics.pop("step", None)
|
||||||
|
_ = metrics.pop("eval_runtime", None)
|
||||||
|
_ = metrics.pop("eval_samples_per_second", None)
|
||||||
|
values = {"Training Loss": training_loss, "Epoch": epoch, "Step": step}
|
||||||
|
for k, v in metrics.items():
|
||||||
|
if k == "eval_loss":
|
||||||
|
values["Validation Loss"] = v
|
||||||
|
else:
|
||||||
|
splits = k.split("_")
|
||||||
|
name = " ".join([part.capitalize() for part in splits[1:]])
|
||||||
|
values[name] = v
|
||||||
|
lines.append(values)
|
||||||
|
|
||||||
|
idx = len(log_history) - 1
|
||||||
|
while idx >= 0 and "eval_loss" not in log_history[idx]:
|
||||||
|
idx -= 1
|
||||||
|
|
||||||
|
if idx > 0:
|
||||||
|
eval_results = {}
|
||||||
|
for key, value in log_history[idx].items():
|
||||||
|
if key.startswith("eval_"):
|
||||||
|
key = key[5:]
|
||||||
|
if key not in ["runtime", "samples_per_second", "epoch", "step"]:
|
||||||
|
camel_cased_key = " ".join([part.capitalize() for part in key.split("_")])
|
||||||
|
eval_results[camel_cased_key] = value
|
||||||
|
return train_log, lines, eval_results
|
||||||
|
else:
|
||||||
|
return train_log, lines, None
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_round(v, decimals=4):
|
||||||
|
if isinstance(v, float) and len(str(v).split(".")) > 1 and len(str(v).split(".")[1]) > decimals:
|
||||||
|
return f"{v:.{decimals}f}"
|
||||||
|
return str(v)
|
||||||
|
|
||||||
|
|
||||||
|
def _regular_table_line(values, col_widths):
|
||||||
|
values_with_space = [f"| {v}" + " " * (w - len(v) + 1) for v, w in zip(values, col_widths)]
|
||||||
|
return "".join(values_with_space) + "|\n"
|
||||||
|
|
||||||
|
|
||||||
|
def _second_table_line(col_widths):
|
||||||
|
values = ["|:" + "-" * w + ":" for w in col_widths]
|
||||||
|
return "".join(values) + "|\n"
|
||||||
|
|
||||||
|
|
||||||
|
def make_markdown_table(lines):
|
||||||
|
"""
|
||||||
|
Create a nice Markdown table from the results in `lines`.
|
||||||
|
"""
|
||||||
|
if lines is None or len(lines) == 0:
|
||||||
|
return ""
|
||||||
|
col_widths = {key: len(str(key)) for key in lines[0].keys()}
|
||||||
|
for line in lines:
|
||||||
|
for key, value in line.items():
|
||||||
|
if col_widths[key] < len(_maybe_round(value)):
|
||||||
|
col_widths[key] = len(_maybe_round(value))
|
||||||
|
|
||||||
|
table = _regular_table_line(list(lines[0].keys()), list(col_widths.values()))
|
||||||
|
table += _second_table_line(list(col_widths.values()))
|
||||||
|
for line in lines:
|
||||||
|
table += _regular_table_line([_maybe_round(v) for v in line.values()], list(col_widths.values()))
|
||||||
|
return table
|
||||||
|
|
||||||
|
|
||||||
|
_TRAINING_ARGS_KEYS = [
|
||||||
|
"learning_rate",
|
||||||
|
"train_batch_size",
|
||||||
|
"eval_batch_size",
|
||||||
|
"seed",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def extract_hyperparameters_from_trainer(trainer):
|
||||||
|
hyperparameters = {k: getattr(trainer.args, k) for k in _TRAINING_ARGS_KEYS}
|
||||||
|
|
||||||
|
if trainer.args.parallel_mode not in [ParallelMode.NOT_PARALLEL, ParallelMode.NOT_DISTRIBUTED]:
|
||||||
|
hyperparameters["distributed_type"] = (
|
||||||
|
"multi-GPU" if trainer.args.parallel_mode == ParallelMode.DISTRIBUTED else trainer.args.parallel_mode.value
|
||||||
|
)
|
||||||
|
if trainer.args.world_size > 1:
|
||||||
|
hyperparameters["num_devices"] = trainer.args.world_size
|
||||||
|
if trainer.args.gradient_accumulation_steps > 1:
|
||||||
|
hyperparameters["gradient_accumulation_steps"] = trainer.args.gradient_accumulation_steps
|
||||||
|
|
||||||
|
total_train_batch_size = (
|
||||||
|
trainer.args.train_batch_size * trainer.args.world_size * trainer.args.gradient_accumulation_steps
|
||||||
|
)
|
||||||
|
if total_train_batch_size != hyperparameters["train_batch_size"]:
|
||||||
|
hyperparameters["total_train_batch_size"] = total_train_batch_size
|
||||||
|
total_eval_batch_size = trainer.args.eval_batch_size * trainer.args.world_size
|
||||||
|
if total_eval_batch_size != hyperparameters["eval_batch_size"]:
|
||||||
|
hyperparameters["total_eval_batch_size"] = total_eval_batch_size
|
||||||
|
|
||||||
|
if trainer.args.adafactor:
|
||||||
|
hyperparameters["optimizer"] = "Adafactor"
|
||||||
|
else:
|
||||||
|
hyperparameters[
|
||||||
|
"optimizer"
|
||||||
|
] = f"Adam with betas=({trainer.args.adam_beta1},{trainer.args.adam_beta2}) and epsilon={trainer.args.adam_epsilon}"
|
||||||
|
|
||||||
|
hyperparameters["lr_scheduler_type"] = trainer.args.lr_scheduler_type.value
|
||||||
|
if trainer.args.warmup_ratio != 0.0:
|
||||||
|
hyperparameters["lr_scheduler_warmup_ratio"] = trainer.args.warmup_ratio
|
||||||
|
if trainer.args.warmup_steps != 0.0:
|
||||||
|
hyperparameters["lr_scheduler_warmup_steps"] = trainer.args.warmup_steps
|
||||||
|
if trainer.args.max_steps != -1:
|
||||||
|
hyperparameters["training_steps"] = trainer.args.max_steps
|
||||||
|
else:
|
||||||
|
hyperparameters["num_epochs"] = trainer.args.num_train_epochs
|
||||||
|
|
||||||
|
if trainer.args.fp16:
|
||||||
|
if trainer.use_amp:
|
||||||
|
hyperparameters["mixed_precision_training"] = "Native AMP"
|
||||||
|
elif trainer._use_apex:
|
||||||
|
hyperparameters["mixed_precision_training"] = f"Apex, opt level {trainer.args.fp16_opt_level}"
|
||||||
|
|
||||||
|
if trainer.args.label_smoothing_factor != 0.0:
|
||||||
|
hyperparameters["label_smoothing_factor"] = trainer.args.label_smoothing_factor
|
||||||
|
|
||||||
|
return hyperparameters
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
|||||||
from ..configuration_utils import PretrainedConfig
|
from ..configuration_utils import PretrainedConfig
|
||||||
from ..feature_extraction_utils import PreTrainedFeatureExtractor
|
from ..feature_extraction_utils import PreTrainedFeatureExtractor
|
||||||
from ..file_utils import is_tf_available, is_torch_available
|
from ..file_utils import is_tf_available, is_torch_available
|
||||||
from ..modelcard import ModelCard
|
|
||||||
from ..models.auto.configuration_auto import AutoConfig
|
from ..models.auto.configuration_auto import AutoConfig
|
||||||
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
|
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
|
||||||
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
|
||||||
@@ -384,12 +383,6 @@ def pipeline(
|
|||||||
model = get_default_model(targeted_task, framework, task_options)
|
model = get_default_model(targeted_task, framework, task_options)
|
||||||
|
|
||||||
model_name = model if isinstance(model, str) else None
|
model_name = model if isinstance(model, str) else None
|
||||||
modelcard = None
|
|
||||||
# Try to infer modelcard from model or config name (if provided as str)
|
|
||||||
if isinstance(model, str):
|
|
||||||
modelcard = model
|
|
||||||
elif isinstance(config, str):
|
|
||||||
modelcard = config
|
|
||||||
|
|
||||||
# Infer the framework form the model
|
# Infer the framework form the model
|
||||||
if framework is None:
|
if framework is None:
|
||||||
@@ -404,10 +397,6 @@ def pipeline(
|
|||||||
if isinstance(config, str):
|
if isinstance(config, str):
|
||||||
config = AutoConfig.from_pretrained(config, revision=revision, _from_pipeline=task, **model_kwargs)
|
config = AutoConfig.from_pretrained(config, revision=revision, _from_pipeline=task, **model_kwargs)
|
||||||
|
|
||||||
# Instantiate modelcard if needed
|
|
||||||
if isinstance(modelcard, str):
|
|
||||||
modelcard = ModelCard.from_pretrained(modelcard, revision=revision, _from_pipeline=task)
|
|
||||||
|
|
||||||
# Instantiate model if needed
|
# Instantiate model if needed
|
||||||
if isinstance(model, str):
|
if isinstance(model, str):
|
||||||
# Handle transparent TF/PT model conversion
|
# Handle transparent TF/PT model conversion
|
||||||
@@ -504,10 +493,4 @@ def pipeline(
|
|||||||
if feature_extractor is not None:
|
if feature_extractor is not None:
|
||||||
kwargs["feature_extractor"] = feature_extractor
|
kwargs["feature_extractor"] = feature_extractor
|
||||||
|
|
||||||
return task_class(
|
return task_class(model=model, framework=framework, task=task, **kwargs)
|
||||||
model=model,
|
|
||||||
modelcard=modelcard,
|
|
||||||
framework=framework,
|
|
||||||
task=task,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ from .file_utils import (
|
|||||||
is_torch_tpu_available,
|
is_torch_tpu_available,
|
||||||
is_training_run_on_sagemaker,
|
is_training_run_on_sagemaker,
|
||||||
)
|
)
|
||||||
|
from .modelcard import TrainingSummary
|
||||||
from .modeling_utils import PreTrainedModel, unwrap_model
|
from .modeling_utils import PreTrainedModel, unwrap_model
|
||||||
from .optimization import Adafactor, AdamW, get_scheduler
|
from .optimization import Adafactor, AdamW, get_scheduler
|
||||||
from .tokenization_utils_base import PreTrainedTokenizerBase
|
from .tokenization_utils_base import PreTrainedTokenizerBase
|
||||||
@@ -2381,25 +2382,49 @@ class Trainer:
|
|||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
def create_model_card(
|
||||||
|
self,
|
||||||
|
language: Optional[str] = None,
|
||||||
|
license: Optional[str] = None,
|
||||||
|
tags: Optional[str] = None,
|
||||||
|
model_name: Optional[str] = None,
|
||||||
|
finetuned_from: Optional[str] = None,
|
||||||
|
dataset_tags: Optional[Union[str, List[str]]] = None,
|
||||||
|
dataset: Optional[Union[str, List[str]]] = None,
|
||||||
|
dataset_args: Optional[Union[str, List[str]]] = None,
|
||||||
|
):
|
||||||
|
training_summary = TrainingSummary.from_trainer(
|
||||||
|
self,
|
||||||
|
language=language,
|
||||||
|
license=license,
|
||||||
|
tags=tags,
|
||||||
|
model_name=model_name,
|
||||||
|
finetuned_from=finetuned_from,
|
||||||
|
dataset_tags=dataset_tags,
|
||||||
|
dataset=dataset,
|
||||||
|
dataset_args=dataset_args,
|
||||||
|
)
|
||||||
|
model_card = training_summary.to_model_card()
|
||||||
|
with open(os.path.join(self.args.output_dir, "README.md"), "w") as f:
|
||||||
|
f.write(model_card)
|
||||||
|
|
||||||
def push_to_hub(
|
def push_to_hub(
|
||||||
self,
|
self,
|
||||||
save_directory: Optional[str] = None,
|
|
||||||
repo_name: Optional[str] = None,
|
repo_name: Optional[str] = None,
|
||||||
repo_url: Optional[str] = None,
|
repo_url: Optional[str] = None,
|
||||||
commit_message: Optional[str] = "add model",
|
commit_message: Optional[str] = "add model",
|
||||||
organization: Optional[str] = None,
|
organization: Optional[str] = None,
|
||||||
private: bool = None,
|
private: bool = None,
|
||||||
use_auth_token: Optional[Union[bool, str]] = None,
|
use_auth_token: Optional[Union[bool, str]] = None,
|
||||||
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Upload `self.model` to the 🤗 model hub.
|
Upload `self.model` to the 🤗 model hub.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
save_directory (:obj:`str` or :obj:`os.PathLike`):
|
|
||||||
Folder containing the model weights and config. Will default to :obj:`self.args.output_dir`.
|
|
||||||
repo_name (:obj:`str`, `optional`):
|
repo_name (:obj:`str`, `optional`):
|
||||||
Repository name for your model or tokenizer in the hub. If not specified, the repository name will be
|
Repository name for your model or tokenizer in the hub. If not specified and :obj:`repo_url` is not
|
||||||
the stem of :obj:`save_directory`.
|
specified either, will default to the stem of :obj:`self.args.output_dir`.
|
||||||
repo_url (:obj:`str`, `optional`):
|
repo_url (:obj:`str`, `optional`):
|
||||||
Specify this in case you want to push to an existing repository in the hub. If unspecified, a new
|
Specify this in case you want to push to an existing repository in the hub. If unspecified, a new
|
||||||
repository will be created in your namespace (unless you specify an :obj:`organization`) with
|
repository will be created in your namespace (unless you specify an :obj:`organization`) with
|
||||||
@@ -2415,6 +2440,8 @@ class Trainer:
|
|||||||
The token to use as HTTP bearer authorization for remote files. If :obj:`True`, will use the token
|
The token to use as HTTP bearer authorization for remote files. If :obj:`True`, will use the token
|
||||||
generated when running :obj:`transformers-cli login` (stored in :obj:`~/.huggingface`). Will default to
|
generated when running :obj:`transformers-cli login` (stored in :obj:`~/.huggingface`). Will default to
|
||||||
:obj:`True` if :obj:`repo_url` is not specified.
|
:obj:`True` if :obj:`repo_url` is not specified.
|
||||||
|
kwargs:
|
||||||
|
Additional keyword arguments passed along to :meth:`~transformers.Trainer.create_model_card`.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The url of the commit of your model in the given repository.
|
The url of the commit of your model in the given repository.
|
||||||
@@ -2426,15 +2453,23 @@ class Trainer:
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The `upload_model_to_hub` method only works for models that inherit from `PushToHubMixin` models."
|
"The `upload_model_to_hub` method only works for models that inherit from `PushToHubMixin` models."
|
||||||
)
|
)
|
||||||
if save_directory is None:
|
|
||||||
save_directory = self.args.output_dir
|
|
||||||
|
|
||||||
# To avoid pushing all checkpoints, we just copy all the files in save_directory in a tmp dir.
|
if repo_url is None and repo_name is None:
|
||||||
|
repo_name = Path(self.args.output_dir).name
|
||||||
|
|
||||||
|
if repo_name is not None:
|
||||||
|
model_name = repo_name
|
||||||
|
elif repo_url is not None:
|
||||||
|
model_name = repo_url.split("/")[-1]
|
||||||
|
else:
|
||||||
|
model_name = None
|
||||||
|
self.create_model_card(model_name=model_name, **kwargs)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
for f in os.listdir(save_directory):
|
shutil.copy(os.path.join(self.args.output_dir, "README.md"), os.path.join(tmp_dir, "README.md"))
|
||||||
fname = os.path.join(save_directory, f)
|
unwrap_model(self.model).save_pretrained(tmp_dir)
|
||||||
if os.path.isfile(fname):
|
if self.tokenizer is not None:
|
||||||
shutil.copy(fname, os.path.join(tmp_dir, f))
|
self.tokenizer.save_pretrained(tmp_dir)
|
||||||
|
|
||||||
return unwrap_model(self.model)._push_to_hub(
|
return unwrap_model(self.model)._push_to_hub(
|
||||||
save_directory=tmp_dir,
|
save_directory=tmp_dir,
|
||||||
|
|||||||
@@ -1168,7 +1168,6 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
|
|||||||
def test_push_to_hub(self):
|
def test_push_to_hub(self):
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
trainer = get_regression_trainer(output_dir=tmp_dir)
|
trainer = get_regression_trainer(output_dir=tmp_dir)
|
||||||
trainer.save_model()
|
|
||||||
url = trainer.push_to_hub(repo_name="test-trainer", use_auth_token=self._token)
|
url = trainer.push_to_hub(repo_name="test-trainer", use_auth_token=self._token)
|
||||||
|
|
||||||
# Extract repo_name from the url
|
# Extract repo_name from the url
|
||||||
|
|||||||
Reference in New Issue
Block a user