From 0d0c392c45a0f9e0330ed750246d87bae8fc7bbc Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Wed, 22 Jun 2022 16:50:21 +0100 Subject: [PATCH] CLI: use hub's `create_commit` (#17755) * use create_commit * better commit message and description * touch setup.py to trigger cache update * add hub version gating --- .github/workflows/add-model-like.yml | 2 +- .github/workflows/model-templates.yml | 2 +- .github/workflows/update_metdata.yml | 2 +- src/transformers/commands/pt_to_tf.py | 81 +++++++++++++++++---------- 4 files changed, 55 insertions(+), 32 deletions(-) diff --git a/.github/workflows/add-model-like.yml b/.github/workflows/add-model-like.yml index b6e8126616..2d2ab5b2e1 100644 --- a/.github/workflows/add-model-like.yml +++ b/.github/workflows/add-model-like.yml @@ -27,7 +27,7 @@ jobs: id: cache with: path: ~/venv/ - key: v3-tests_model_like-${{ hashFiles('setup.py') }} + key: v4-tests_model_like-${{ hashFiles('setup.py') }} - name: Create virtual environment on cache miss if: steps.cache.outputs.cache-hit != 'true' diff --git a/.github/workflows/model-templates.yml b/.github/workflows/model-templates.yml index 6ade77a279..ad57d331c2 100644 --- a/.github/workflows/model-templates.yml +++ b/.github/workflows/model-templates.yml @@ -21,7 +21,7 @@ jobs: id: cache with: path: ~/venv/ - key: v3-tests_templates-${{ hashFiles('setup.py') }} + key: v4-tests_templates-${{ hashFiles('setup.py') }} - name: Create virtual environment on cache miss if: steps.cache.outputs.cache-hit != 'true' diff --git a/.github/workflows/update_metdata.yml b/.github/workflows/update_metdata.yml index dcd2ac5021..1fc71893aa 100644 --- a/.github/workflows/update_metdata.yml +++ b/.github/workflows/update_metdata.yml @@ -21,7 +21,7 @@ jobs: id: cache with: path: ~/venv/ - key: v2-metadata-${{ hashFiles('setup.py') }} + key: v3-metadata-${{ hashFiles('setup.py') }} - name: Create virtual environment on cache miss if: steps.cache.outputs.cache-hit != 'true' diff --git a/src/transformers/commands/pt_to_tf.py b/src/transformers/commands/pt_to_tf.py index 3a2465093c..b71ba4c18a 100644 --- a/src/transformers/commands/pt_to_tf.py +++ b/src/transformers/commands/pt_to_tf.py @@ -18,8 +18,9 @@ from importlib import import_module import numpy as np from datasets import load_dataset +from packaging import version -from huggingface_hub import Repository, upload_file +import huggingface_hub from .. import AutoConfig, AutoFeatureExtractor, AutoTokenizer, is_tf_available, is_torch_available from ..utils import logging @@ -45,7 +46,9 @@ def convert_command_factory(args: Namespace): Returns: ServeCommand """ - return PTtoTFCommand(args.model_name, args.local_dir, args.new_weights, args.no_pr, args.push) + return PTtoTFCommand( + args.model_name, args.local_dir, args.new_weights, args.no_pr, args.push, args.extra_commit_description + ) class PTtoTFCommand(BaseTransformersCLICommand): @@ -89,6 +92,12 @@ class PTtoTFCommand(BaseTransformersCLICommand): action="store_true", help="Optional flag to push the weights directly to `main` (requires permissions)", ) + train_parser.add_argument( + "--extra-commit-description", + type=str, + default="", + help="Optional additional commit description to use when opening a PR (e.g. to tag the owner).", + ) train_parser.set_defaults(func=convert_command_factory) @staticmethod @@ -134,13 +143,23 @@ class PTtoTFCommand(BaseTransformersCLICommand): return _find_pt_tf_differences(pt_outputs, tf_outputs, {}) - def __init__(self, model_name: str, local_dir: str, new_weights: bool, no_pr: bool, push: bool, *args): + def __init__( + self, + model_name: str, + local_dir: str, + new_weights: bool, + no_pr: bool, + push: bool, + extra_commit_description: str, + *args + ): self._logger = logging.get_logger("transformers-cli/pt_to_tf") self._model_name = model_name self._local_dir = local_dir if local_dir else os.path.join("/tmp", model_name) self._new_weights = new_weights self._no_pr = no_pr self._push = push + self._extra_commit_description = extra_commit_description def get_text_inputs(self): tokenizer = AutoTokenizer.from_pretrained(self._local_dir) @@ -170,10 +189,17 @@ class PTtoTFCommand(BaseTransformersCLICommand): return pt_input, tf_input def run(self): + if version.parse(huggingface_hub.__version__) < version.parse("0.8.1"): + raise ImportError( + "The huggingface_hub version must be >= 0.8.1 to use this command. Please update your huggingface_hub" + " installation." + ) + else: + from huggingface_hub import Repository, create_commit + from huggingface_hub._commit_api import CommitOperationAdd + # Fetch remote data - # TODO: implement a solution to pull a specific PR/commit, so we can use this CLI to validate pushes. repo = Repository(local_dir=self._local_dir, clone_from=self._model_name) - repo.git_pull() # in case the repo already exists locally, but with an older commit # Load config and get the appropriate architecture -- the latter is needed to convert the head's weights config = AutoConfig.from_pretrained(self._local_dir) @@ -240,32 +266,29 @@ class PTtoTFCommand(BaseTransformersCLICommand): ) ) + commit_message = "Update TF weights" if self._new_weights else "Add TF weights" if self._push: repo.git_add(auto_lfs_track=True) - repo.git_commit("Add TF weights") + repo.git_commit(commit_message) repo.git_push(blocking=True) # this prints a progress bar with the upload self._logger.warn(f"TF weights pushed into {self._model_name}") elif not self._no_pr: - # TODO: remove try/except when the upload to PR feature is released - # (https://github.com/huggingface/huggingface_hub/pull/884) - try: - self._logger.warn("Uploading the weights into a new PR...") - hub_pr_url = upload_file( - path_or_fileobj=tf_weights_path, - path_in_repo=TF_WEIGHTS_NAME, - repo_id=self._model_name, - create_pr=True, - pr_commit_summary="Add TF weights", - pr_commit_description=( - "Model converted by the `transformers`' `pt_to_tf` CLI -- all converted model outputs and" - " hidden layers were validated against its Pytorch counterpart. Maximum crossload output" - f" difference={max_crossload_diff:.3e}; Maximum converted output" - f" difference={max_conversion_diff:.3e}." - ), - ) - self._logger.warn(f"PR open in {hub_pr_url}") - except TypeError: - self._logger.warn( - f"You can now open a PR in https://huggingface.co/{self._model_name}/discussions, manually" - f" uploading the file in {tf_weights_path}" - ) + self._logger.warn("Uploading the weights into a new PR...") + commit_descrition = ( + "Model converted by the [`transformers`' `pt_to_tf`" + " CLI](https://github.com/huggingface/transformers/blob/main/src/transformers/commands/pt_to_tf.py)." + "\n\nAll converted model outputs and hidden layers were validated against its Pytorch counterpart." + f" Maximum crossload output difference={max_crossload_diff:.3e}; Maximum converted output" + f" difference={max_conversion_diff:.3e}." + ) + if self._extra_commit_description: + commit_descrition += "\n\n" + self._extra_commit_description + hub_pr_url = create_commit( + repo_id=self._model_name, + operations=[CommitOperationAdd(path_in_repo=TF_WEIGHTS_NAME, path_or_fileobj=tf_weights_path)], + commit_message=commit_message, + commit_description=commit_descrition, + repo_type="model", + create_pr=True, + ) + self._logger.warn(f"PR open in {hub_pr_url}")