add push_to_hub to pipeline (#29172)

* add `push_to_hub` to pipeline * fix docs * format with ruff * update save_pretrained * update save_pretrained * remove unnecessary comment * switch to push_to_hub method in DynamicPipelineTester * remove unused imports * update docs for add_new_pipeline * fix docs for add_new_pipeline * add comment * fix italien docs * changes to token retrieval for pipelines * Update src/transformers/pipelines/base.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-04-16 15:34:04 +01:00
parent 60dea593ed
commit 0eaef0c709
7 changed files with 51 additions and 44 deletions
--- a/docs/source/de/add_new_pipeline.md
+++ b/docs/source/de/add_new_pipeline.md
@@ -208,14 +208,10 @@ from transformers import pipeline
 classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
 ```
-Dann können wir sie auf dem Hub mit der Methode `save_pretrained` in einem `Repository` freigeben:
+Dann können wir sie auf dem Hub mit der Methode `push_to_hub` freigeben:
 ```py
-from huggingface_hub import Repository
+classifier.push_to_hub("test-dynamic-pipeline")
 repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
 classifier.save_pretrained("test-dynamic-pipeline")
 repo.push_to_hub()
 ```
 Dadurch wird die Datei, in der Sie `PairClassificationPipeline` definiert haben, in den Ordner `"test-dynamic-pipeline"` kopiert,
--- a/docs/source/en/add_new_pipeline.md
+++ b/docs/source/en/add_new_pipeline.md
@@ -208,14 +208,10 @@ from transformers import pipeline
 classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
 ```
-Then we can share it on the Hub by using the `save_pretrained` method in a `Repository`:
+Then we can share it on the Hub by using the `push_to_hub` method:
 ```py
-from huggingface_hub import Repository
+classifier.push_to_hub("test-dynamic-pipeline")
 repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
 classifier.save_pretrained("test-dynamic-pipeline")
 repo.push_to_hub()
 ```
 This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`,
--- a/docs/source/es/add_new_pipeline.md
+++ b/docs/source/es/add_new_pipeline.md
@@ -212,14 +212,10 @@ from transformers import pipeline
 classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
 ```
-Ahora podemos compartirlo en el Hub usando el método `save_pretrained` (guardar pre-entrenado) en un `Repository`:
+Ahora podemos compartirlo en el Hub usando el método `save_pretrained`:
 ```py
-from huggingface_hub import Repository
+classifier.push_to_hub("test-dynamic-pipeline")
 repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
 classifier.save_pretrained("test-dynamic-pipeline")
 repo.push_to_hub()
 ```
 Esto copiará el archivo donde definiste `PairClassificationPipeline` dentro de la carpeta `"test-dynamic-pipeline"`,
--- a/docs/source/it/add_new_pipeline.md
+++ b/docs/source/it/add_new_pipeline.md
@@ -202,14 +202,10 @@ from transformers import pipeline
 classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
 ```
-Successivamente possiamo condividerlo sull'Hub usando il metodo `save_pretrained` in un `Repository`:
+Successivamente possiamo condividerlo sull'Hub usando il metodo `push_to_hub`
 ```py
-from huggingface_hub import Repository
+classifier.push_to_hub("test-dynamic-pipeline")
 repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
 classifier.save_pretrained("test-dynamic-pipeline")
 repo.push_to_hub()
 ```
 Questo codice copierà il file dove è stato definitp `PairClassificationPipeline` all'interno della cartella `"test-dynamic-pipeline"`,
--- a/docs/source/ko/add_new_pipeline.md
+++ b/docs/source/ko/add_new_pipeline.md
@@ -203,14 +203,10 @@ from transformers import pipeline
 classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
 ```
-그런 다음 `Repository`의 `save_pretrained` 메소드를 사용하여 허브에 공유할 수 있습니다:
+그런 다음 `push_to_hub` 메소드를 사용하여 허브에 공유할 수 있습니다:
 ```py
-from huggingface_hub import Repository
+classifier.push_to_hub("test-dynamic-pipeline")
 repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
 classifier.save_pretrained("test-dynamic-pipeline")
 repo.push_to_hub()
 ```
 이렇게 하면 "test-dynamic-pipeline" 폴더 내에 `PairClassificationPipeline`을 정의한 파일이 복사되며, 파이프라인의 모델과 토크나이저도 저장한 후, `{your_username}/test-dynamic-pipeline` 저장소에 있는 모든 것을 푸시합니다.
--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -36,7 +36,9 @@ from ..models.auto.configuration_auto import AutoConfig
 from ..tokenization_utils import PreTrainedTokenizer
 from ..utils import (
    ModelOutput,
    PushToHubMixin,
    add_end_docstrings,
    copy_func,
    infer_framework,
    is_tf_available,
    is_torch_available,
@@ -781,7 +783,7 @@ if is_torch_available():
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_feature_extractor=True, has_image_processor=True))
-class Pipeline(_ScikitCompat):
+class Pipeline(_ScikitCompat, PushToHubMixin):
    """
    The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across
    different pipelines.
@@ -908,16 +910,36 @@ class Pipeline(_ScikitCompat):
                # then we should keep working
                self.image_processor = self.feature_extractor
-    def save_pretrained(self, save_directory: str, safe_serialization: bool = True):
+    def save_pretrained(
        self,
        save_directory: Union[str, os.PathLike],
        safe_serialization: bool = True,
        **kwargs,
    ):
        """
        Save the pipeline's model and tokenizer.
        Args:
-            save_directory (`str`):
+            save_directory (`str` or `os.PathLike`):
                A path to the directory where to saved. It will be created if it doesn't exist.
            safe_serialization (`str`):
                Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow.
            kwargs (`Dict[str, Any]`, *optional*):
                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
        """
        use_auth_token = kwargs.pop("use_auth_token", None)
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
                FutureWarning,
            )
            if kwargs.get("token", None) is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            kwargs["token"] = use_auth_token
        if os.path.isfile(save_directory):
            logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
            return
@@ -944,16 +966,17 @@ class Pipeline(_ScikitCompat):
            # Save the pipeline custom code
            custom_object_save(self, save_directory)
-        self.model.save_pretrained(save_directory, safe_serialization=safe_serialization)
+        kwargs["safe_serialization"] = safe_serialization
        self.model.save_pretrained(save_directory, **kwargs)
        if self.tokenizer is not None:
-            self.tokenizer.save_pretrained(save_directory)
+            self.tokenizer.save_pretrained(save_directory, **kwargs)
        if self.feature_extractor is not None:
-            self.feature_extractor.save_pretrained(save_directory)
+            self.feature_extractor.save_pretrained(save_directory, **kwargs)
        if self.image_processor is not None:
-            self.image_processor.save_pretrained(save_directory)
+            self.image_processor.save_pretrained(save_directory, **kwargs)
        if self.modelcard is not None:
            self.modelcard.save_pretrained(save_directory)
@@ -1234,6 +1257,13 @@ class Pipeline(_ScikitCompat):
            yield self.run_single(input_, preprocess_params, forward_params, postprocess_params)
 Pipeline.push_to_hub = copy_func(Pipeline.push_to_hub)
 if Pipeline.push_to_hub.__doc__ is not None:
    Pipeline.push_to_hub.__doc__ = Pipeline.push_to_hub.__doc__.format(
        object="pipe", object_class="pipeline", object_files="pipeline file"
    ).replace(".from_pretrained", "")
 class ChunkPipeline(Pipeline):
    def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
        all_outputs = []
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -22,7 +22,7 @@ from pathlib import Path
 import datasets
 import numpy as np
-from huggingface_hub import HfFolder, Repository, create_repo, delete_repo
+from huggingface_hub import HfFolder, delete_repo
 from requests.exceptions import HTTPError
 from transformers import (
@@ -846,9 +846,6 @@ class DynamicPipelineTester(unittest.TestCase):
        model = BertForSequenceClassification(config).eval()
        with tempfile.TemporaryDirectory() as tmp_dir:
            create_repo(f"{USER}/test-dynamic-pipeline", token=self._token)
            repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", token=self._token)
            vocab_file = os.path.join(tmp_dir, "vocab.txt")
            with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
                vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
@@ -860,7 +857,7 @@ class DynamicPipelineTester(unittest.TestCase):
            del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
            classifier.save_pretrained(tmp_dir)
-            # checks
+            # checks if the configuration has been added after calling the save_pretrained method
            self.assertDictEqual(
                classifier.model.config.custom_pipelines,
                {
@@ -871,8 +868,8 @@ class DynamicPipelineTester(unittest.TestCase):
                    }
                },
            )
-
+            # use push_to_hub method to push the pipeline
-            repo.push_to_hub()
+            classifier.push_to_hub(f"{USER}/test-dynamic-pipeline", token=self._token)
        # Fails if the user forget to pass along `trust_remote_code=True`
        with self.assertRaises(ValueError):