add push_to_hub to pipeline (#29172)

* add `push_to_hub` to pipeline

* fix docs

* format with ruff

* update save_pretrained

* update save_pretrained

* remove unnecessary comment

* switch to push_to_hub method in DynamicPipelineTester

* remove unused imports

* update docs for add_new_pipeline

* fix docs for add_new_pipeline

* add comment

* fix italien docs

* changes to token retrieval for pipelines

* Update src/transformers/pipelines/base.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

---------

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Hafedh
2024-04-16 15:34:04 +01:00
committed by GitHub
parent 60dea593ed
commit 0eaef0c709
7 changed files with 51 additions and 44 deletions

View File

@@ -208,14 +208,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Dann können wir sie auf dem Hub mit der Methode `save_pretrained` in einem `Repository` freigeben: Dann können wir sie auf dem Hub mit der Methode `push_to_hub` freigeben:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
Dadurch wird die Datei, in der Sie `PairClassificationPipeline` definiert haben, in den Ordner `"test-dynamic-pipeline"` kopiert, Dadurch wird die Datei, in der Sie `PairClassificationPipeline` definiert haben, in den Ordner `"test-dynamic-pipeline"` kopiert,

View File

@@ -208,14 +208,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Then we can share it on the Hub by using the `save_pretrained` method in a `Repository`: Then we can share it on the Hub by using the `push_to_hub` method:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`, This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`,

View File

@@ -212,14 +212,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Ahora podemos compartirlo en el Hub usando el método `save_pretrained` (guardar pre-entrenado) en un `Repository`: Ahora podemos compartirlo en el Hub usando el método `save_pretrained`:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
Esto copiará el archivo donde definiste `PairClassificationPipeline` dentro de la carpeta `"test-dynamic-pipeline"`, Esto copiará el archivo donde definiste `PairClassificationPipeline` dentro de la carpeta `"test-dynamic-pipeline"`,

View File

@@ -202,14 +202,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Successivamente possiamo condividerlo sull'Hub usando il metodo `save_pretrained` in un `Repository`: Successivamente possiamo condividerlo sull'Hub usando il metodo `push_to_hub`
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
Questo codice copierà il file dove è stato definitp `PairClassificationPipeline` all'interno della cartella `"test-dynamic-pipeline"`, Questo codice copierà il file dove è stato definitp `PairClassificationPipeline` all'interno della cartella `"test-dynamic-pipeline"`,

View File

@@ -203,14 +203,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
그런 다음 `Repository``save_pretrained` 메소드를 사용하여 허브에 공유할 수 있습니다: 그런 다음 `push_to_hub` 메소드를 사용하여 허브에 공유할 수 있습니다:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
이렇게 하면 "test-dynamic-pipeline" 폴더 내에 `PairClassificationPipeline`을 정의한 파일이 복사되며, 파이프라인의 모델과 토크나이저도 저장한 후, `{your_username}/test-dynamic-pipeline` 저장소에 있는 모든 것을 푸시합니다. 이렇게 하면 "test-dynamic-pipeline" 폴더 내에 `PairClassificationPipeline`을 정의한 파일이 복사되며, 파이프라인의 모델과 토크나이저도 저장한 후, `{your_username}/test-dynamic-pipeline` 저장소에 있는 모든 것을 푸시합니다.

View File

@@ -36,7 +36,9 @@ from ..models.auto.configuration_auto import AutoConfig
from ..tokenization_utils import PreTrainedTokenizer from ..tokenization_utils import PreTrainedTokenizer
from ..utils import ( from ..utils import (
ModelOutput, ModelOutput,
PushToHubMixin,
add_end_docstrings, add_end_docstrings,
copy_func,
infer_framework, infer_framework,
is_tf_available, is_tf_available,
is_torch_available, is_torch_available,
@@ -781,7 +783,7 @@ if is_torch_available():
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_feature_extractor=True, has_image_processor=True)) @add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_feature_extractor=True, has_image_processor=True))
class Pipeline(_ScikitCompat): class Pipeline(_ScikitCompat, PushToHubMixin):
""" """
The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across
different pipelines. different pipelines.
@@ -908,16 +910,36 @@ class Pipeline(_ScikitCompat):
# then we should keep working # then we should keep working
self.image_processor = self.feature_extractor self.image_processor = self.feature_extractor
def save_pretrained(self, save_directory: str, safe_serialization: bool = True): def save_pretrained(
self,
save_directory: Union[str, os.PathLike],
safe_serialization: bool = True,
**kwargs,
):
""" """
Save the pipeline's model and tokenizer. Save the pipeline's model and tokenizer.
Args: Args:
save_directory (`str`): save_directory (`str` or `os.PathLike`):
A path to the directory where to saved. It will be created if it doesn't exist. A path to the directory where to saved. It will be created if it doesn't exist.
safe_serialization (`str`): safe_serialization (`str`):
Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow. Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow.
kwargs (`Dict[str, Any]`, *optional*):
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
""" """
use_auth_token = kwargs.pop("use_auth_token", None)
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
FutureWarning,
)
if kwargs.get("token", None) is not None:
raise ValueError(
"`token` and `use_auth_token` are both specified. Please set only the argument `token`."
)
kwargs["token"] = use_auth_token
if os.path.isfile(save_directory): if os.path.isfile(save_directory):
logger.error(f"Provided path ({save_directory}) should be a directory, not a file") logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
return return
@@ -944,16 +966,17 @@ class Pipeline(_ScikitCompat):
# Save the pipeline custom code # Save the pipeline custom code
custom_object_save(self, save_directory) custom_object_save(self, save_directory)
self.model.save_pretrained(save_directory, safe_serialization=safe_serialization) kwargs["safe_serialization"] = safe_serialization
self.model.save_pretrained(save_directory, **kwargs)
if self.tokenizer is not None: if self.tokenizer is not None:
self.tokenizer.save_pretrained(save_directory) self.tokenizer.save_pretrained(save_directory, **kwargs)
if self.feature_extractor is not None: if self.feature_extractor is not None:
self.feature_extractor.save_pretrained(save_directory) self.feature_extractor.save_pretrained(save_directory, **kwargs)
if self.image_processor is not None: if self.image_processor is not None:
self.image_processor.save_pretrained(save_directory) self.image_processor.save_pretrained(save_directory, **kwargs)
if self.modelcard is not None: if self.modelcard is not None:
self.modelcard.save_pretrained(save_directory) self.modelcard.save_pretrained(save_directory)
@@ -1234,6 +1257,13 @@ class Pipeline(_ScikitCompat):
yield self.run_single(input_, preprocess_params, forward_params, postprocess_params) yield self.run_single(input_, preprocess_params, forward_params, postprocess_params)
Pipeline.push_to_hub = copy_func(Pipeline.push_to_hub)
if Pipeline.push_to_hub.__doc__ is not None:
Pipeline.push_to_hub.__doc__ = Pipeline.push_to_hub.__doc__.format(
object="pipe", object_class="pipeline", object_files="pipeline file"
).replace(".from_pretrained", "")
class ChunkPipeline(Pipeline): class ChunkPipeline(Pipeline):
def run_single(self, inputs, preprocess_params, forward_params, postprocess_params): def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
all_outputs = [] all_outputs = []

View File

@@ -22,7 +22,7 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo from huggingface_hub import HfFolder, delete_repo
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from transformers import ( from transformers import (
@@ -846,9 +846,6 @@ class DynamicPipelineTester(unittest.TestCase):
model = BertForSequenceClassification(config).eval() model = BertForSequenceClassification(config).eval()
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
create_repo(f"{USER}/test-dynamic-pipeline", token=self._token)
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", token=self._token)
vocab_file = os.path.join(tmp_dir, "vocab.txt") vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer: with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
@@ -860,7 +857,7 @@ class DynamicPipelineTester(unittest.TestCase):
del PIPELINE_REGISTRY.supported_tasks["pair-classification"] del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
classifier.save_pretrained(tmp_dir) classifier.save_pretrained(tmp_dir)
# checks # checks if the configuration has been added after calling the save_pretrained method
self.assertDictEqual( self.assertDictEqual(
classifier.model.config.custom_pipelines, classifier.model.config.custom_pipelines,
{ {
@@ -871,8 +868,8 @@ class DynamicPipelineTester(unittest.TestCase):
} }
}, },
) )
# use push_to_hub method to push the pipeline
repo.push_to_hub() classifier.push_to_hub(f"{USER}/test-dynamic-pipeline", token=self._token)
# Fails if the user forget to pass along `trust_remote_code=True` # Fails if the user forget to pass along `trust_remote_code=True`
with self.assertRaises(ValueError): with self.assertRaises(ValueError):