add push_to_hub to pipeline (#29172)
* add `push_to_hub` to pipeline * fix docs * format with ruff * update save_pretrained * update save_pretrained * remove unnecessary comment * switch to push_to_hub method in DynamicPipelineTester * remove unused imports * update docs for add_new_pipeline * fix docs for add_new_pipeline * add comment * fix italien docs * changes to token retrieval for pipelines * Update src/transformers/pipelines/base.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -208,14 +208,10 @@ from transformers import pipeline
|
|||||||
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
||||||
```
|
```
|
||||||
|
|
||||||
Dann können wir sie auf dem Hub mit der Methode `save_pretrained` in einem `Repository` freigeben:
|
Dann können wir sie auf dem Hub mit der Methode `push_to_hub` freigeben:
|
||||||
|
|
||||||
```py
|
```py
|
||||||
from huggingface_hub import Repository
|
classifier.push_to_hub("test-dynamic-pipeline")
|
||||||
|
|
||||||
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
|
|
||||||
classifier.save_pretrained("test-dynamic-pipeline")
|
|
||||||
repo.push_to_hub()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Dadurch wird die Datei, in der Sie `PairClassificationPipeline` definiert haben, in den Ordner `"test-dynamic-pipeline"` kopiert,
|
Dadurch wird die Datei, in der Sie `PairClassificationPipeline` definiert haben, in den Ordner `"test-dynamic-pipeline"` kopiert,
|
||||||
|
|||||||
@@ -208,14 +208,10 @@ from transformers import pipeline
|
|||||||
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
||||||
```
|
```
|
||||||
|
|
||||||
Then we can share it on the Hub by using the `save_pretrained` method in a `Repository`:
|
Then we can share it on the Hub by using the `push_to_hub` method:
|
||||||
|
|
||||||
```py
|
```py
|
||||||
from huggingface_hub import Repository
|
classifier.push_to_hub("test-dynamic-pipeline")
|
||||||
|
|
||||||
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
|
|
||||||
classifier.save_pretrained("test-dynamic-pipeline")
|
|
||||||
repo.push_to_hub()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`,
|
This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`,
|
||||||
|
|||||||
@@ -212,14 +212,10 @@ from transformers import pipeline
|
|||||||
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
||||||
```
|
```
|
||||||
|
|
||||||
Ahora podemos compartirlo en el Hub usando el método `save_pretrained` (guardar pre-entrenado) en un `Repository`:
|
Ahora podemos compartirlo en el Hub usando el método `save_pretrained`:
|
||||||
|
|
||||||
```py
|
```py
|
||||||
from huggingface_hub import Repository
|
classifier.push_to_hub("test-dynamic-pipeline")
|
||||||
|
|
||||||
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
|
|
||||||
classifier.save_pretrained("test-dynamic-pipeline")
|
|
||||||
repo.push_to_hub()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Esto copiará el archivo donde definiste `PairClassificationPipeline` dentro de la carpeta `"test-dynamic-pipeline"`,
|
Esto copiará el archivo donde definiste `PairClassificationPipeline` dentro de la carpeta `"test-dynamic-pipeline"`,
|
||||||
|
|||||||
@@ -202,14 +202,10 @@ from transformers import pipeline
|
|||||||
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
||||||
```
|
```
|
||||||
|
|
||||||
Successivamente possiamo condividerlo sull'Hub usando il metodo `save_pretrained` in un `Repository`:
|
Successivamente possiamo condividerlo sull'Hub usando il metodo `push_to_hub`
|
||||||
|
|
||||||
```py
|
```py
|
||||||
from huggingface_hub import Repository
|
classifier.push_to_hub("test-dynamic-pipeline")
|
||||||
|
|
||||||
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
|
|
||||||
classifier.save_pretrained("test-dynamic-pipeline")
|
|
||||||
repo.push_to_hub()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Questo codice copierà il file dove è stato definitp `PairClassificationPipeline` all'interno della cartella `"test-dynamic-pipeline"`,
|
Questo codice copierà il file dove è stato definitp `PairClassificationPipeline` all'interno della cartella `"test-dynamic-pipeline"`,
|
||||||
|
|||||||
@@ -203,14 +203,10 @@ from transformers import pipeline
|
|||||||
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
|
||||||
```
|
```
|
||||||
|
|
||||||
그런 다음 `Repository`의 `save_pretrained` 메소드를 사용하여 허브에 공유할 수 있습니다:
|
그런 다음 `push_to_hub` 메소드를 사용하여 허브에 공유할 수 있습니다:
|
||||||
|
|
||||||
```py
|
```py
|
||||||
from huggingface_hub import Repository
|
classifier.push_to_hub("test-dynamic-pipeline")
|
||||||
|
|
||||||
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
|
|
||||||
classifier.save_pretrained("test-dynamic-pipeline")
|
|
||||||
repo.push_to_hub()
|
|
||||||
```
|
```
|
||||||
|
|
||||||
이렇게 하면 "test-dynamic-pipeline" 폴더 내에 `PairClassificationPipeline`을 정의한 파일이 복사되며, 파이프라인의 모델과 토크나이저도 저장한 후, `{your_username}/test-dynamic-pipeline` 저장소에 있는 모든 것을 푸시합니다.
|
이렇게 하면 "test-dynamic-pipeline" 폴더 내에 `PairClassificationPipeline`을 정의한 파일이 복사되며, 파이프라인의 모델과 토크나이저도 저장한 후, `{your_username}/test-dynamic-pipeline` 저장소에 있는 모든 것을 푸시합니다.
|
||||||
|
|||||||
@@ -36,7 +36,9 @@ from ..models.auto.configuration_auto import AutoConfig
|
|||||||
from ..tokenization_utils import PreTrainedTokenizer
|
from ..tokenization_utils import PreTrainedTokenizer
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ModelOutput,
|
ModelOutput,
|
||||||
|
PushToHubMixin,
|
||||||
add_end_docstrings,
|
add_end_docstrings,
|
||||||
|
copy_func,
|
||||||
infer_framework,
|
infer_framework,
|
||||||
is_tf_available,
|
is_tf_available,
|
||||||
is_torch_available,
|
is_torch_available,
|
||||||
@@ -781,7 +783,7 @@ if is_torch_available():
|
|||||||
|
|
||||||
|
|
||||||
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_feature_extractor=True, has_image_processor=True))
|
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_feature_extractor=True, has_image_processor=True))
|
||||||
class Pipeline(_ScikitCompat):
|
class Pipeline(_ScikitCompat, PushToHubMixin):
|
||||||
"""
|
"""
|
||||||
The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across
|
The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across
|
||||||
different pipelines.
|
different pipelines.
|
||||||
@@ -908,16 +910,36 @@ class Pipeline(_ScikitCompat):
|
|||||||
# then we should keep working
|
# then we should keep working
|
||||||
self.image_processor = self.feature_extractor
|
self.image_processor = self.feature_extractor
|
||||||
|
|
||||||
def save_pretrained(self, save_directory: str, safe_serialization: bool = True):
|
def save_pretrained(
|
||||||
|
self,
|
||||||
|
save_directory: Union[str, os.PathLike],
|
||||||
|
safe_serialization: bool = True,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Save the pipeline's model and tokenizer.
|
Save the pipeline's model and tokenizer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
save_directory (`str`):
|
save_directory (`str` or `os.PathLike`):
|
||||||
A path to the directory where to saved. It will be created if it doesn't exist.
|
A path to the directory where to saved. It will be created if it doesn't exist.
|
||||||
safe_serialization (`str`):
|
safe_serialization (`str`):
|
||||||
Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow.
|
Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow.
|
||||||
|
kwargs (`Dict[str, Any]`, *optional*):
|
||||||
|
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||||
"""
|
"""
|
||||||
|
use_auth_token = kwargs.pop("use_auth_token", None)
|
||||||
|
|
||||||
|
if use_auth_token is not None:
|
||||||
|
warnings.warn(
|
||||||
|
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
|
||||||
|
FutureWarning,
|
||||||
|
)
|
||||||
|
if kwargs.get("token", None) is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"`token` and `use_auth_token` are both specified. Please set only the argument `token`."
|
||||||
|
)
|
||||||
|
kwargs["token"] = use_auth_token
|
||||||
|
|
||||||
if os.path.isfile(save_directory):
|
if os.path.isfile(save_directory):
|
||||||
logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
|
logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
|
||||||
return
|
return
|
||||||
@@ -944,16 +966,17 @@ class Pipeline(_ScikitCompat):
|
|||||||
# Save the pipeline custom code
|
# Save the pipeline custom code
|
||||||
custom_object_save(self, save_directory)
|
custom_object_save(self, save_directory)
|
||||||
|
|
||||||
self.model.save_pretrained(save_directory, safe_serialization=safe_serialization)
|
kwargs["safe_serialization"] = safe_serialization
|
||||||
|
self.model.save_pretrained(save_directory, **kwargs)
|
||||||
|
|
||||||
if self.tokenizer is not None:
|
if self.tokenizer is not None:
|
||||||
self.tokenizer.save_pretrained(save_directory)
|
self.tokenizer.save_pretrained(save_directory, **kwargs)
|
||||||
|
|
||||||
if self.feature_extractor is not None:
|
if self.feature_extractor is not None:
|
||||||
self.feature_extractor.save_pretrained(save_directory)
|
self.feature_extractor.save_pretrained(save_directory, **kwargs)
|
||||||
|
|
||||||
if self.image_processor is not None:
|
if self.image_processor is not None:
|
||||||
self.image_processor.save_pretrained(save_directory)
|
self.image_processor.save_pretrained(save_directory, **kwargs)
|
||||||
|
|
||||||
if self.modelcard is not None:
|
if self.modelcard is not None:
|
||||||
self.modelcard.save_pretrained(save_directory)
|
self.modelcard.save_pretrained(save_directory)
|
||||||
@@ -1234,6 +1257,13 @@ class Pipeline(_ScikitCompat):
|
|||||||
yield self.run_single(input_, preprocess_params, forward_params, postprocess_params)
|
yield self.run_single(input_, preprocess_params, forward_params, postprocess_params)
|
||||||
|
|
||||||
|
|
||||||
|
Pipeline.push_to_hub = copy_func(Pipeline.push_to_hub)
|
||||||
|
if Pipeline.push_to_hub.__doc__ is not None:
|
||||||
|
Pipeline.push_to_hub.__doc__ = Pipeline.push_to_hub.__doc__.format(
|
||||||
|
object="pipe", object_class="pipeline", object_files="pipeline file"
|
||||||
|
).replace(".from_pretrained", "")
|
||||||
|
|
||||||
|
|
||||||
class ChunkPipeline(Pipeline):
|
class ChunkPipeline(Pipeline):
|
||||||
def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
|
def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
|
||||||
all_outputs = []
|
all_outputs = []
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import datasets
|
import datasets
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo
|
from huggingface_hub import HfFolder, delete_repo
|
||||||
from requests.exceptions import HTTPError
|
from requests.exceptions import HTTPError
|
||||||
|
|
||||||
from transformers import (
|
from transformers import (
|
||||||
@@ -846,9 +846,6 @@ class DynamicPipelineTester(unittest.TestCase):
|
|||||||
model = BertForSequenceClassification(config).eval()
|
model = BertForSequenceClassification(config).eval()
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
create_repo(f"{USER}/test-dynamic-pipeline", token=self._token)
|
|
||||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", token=self._token)
|
|
||||||
|
|
||||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||||
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
|
||||||
@@ -860,7 +857,7 @@ class DynamicPipelineTester(unittest.TestCase):
|
|||||||
del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
|
del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
|
||||||
|
|
||||||
classifier.save_pretrained(tmp_dir)
|
classifier.save_pretrained(tmp_dir)
|
||||||
# checks
|
# checks if the configuration has been added after calling the save_pretrained method
|
||||||
self.assertDictEqual(
|
self.assertDictEqual(
|
||||||
classifier.model.config.custom_pipelines,
|
classifier.model.config.custom_pipelines,
|
||||||
{
|
{
|
||||||
@@ -871,8 +868,8 @@ class DynamicPipelineTester(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
# use push_to_hub method to push the pipeline
|
||||||
repo.push_to_hub()
|
classifier.push_to_hub(f"{USER}/test-dynamic-pipeline", token=self._token)
|
||||||
|
|
||||||
# Fails if the user forget to pass along `trust_remote_code=True`
|
# Fails if the user forget to pass along `trust_remote_code=True`
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
|
|||||||
Reference in New Issue
Block a user