fix for custom pipeline configuration (#29004)
* fix for custom pipeline configuration * fix for custom pipelines * remove extra exception * added test for custom pipelines extra tag * format with ruff * limit extra tag for first time only * format with ruff * improve tests for custom pipelines
This commit is contained in:
@@ -32,6 +32,7 @@ from .utils import (
|
|||||||
CONFIG_NAME,
|
CONFIG_NAME,
|
||||||
PushToHubMixin,
|
PushToHubMixin,
|
||||||
add_model_info_to_auto_map,
|
add_model_info_to_auto_map,
|
||||||
|
add_model_info_to_custom_pipelines,
|
||||||
cached_file,
|
cached_file,
|
||||||
copy_func,
|
copy_func,
|
||||||
download_url,
|
download_url,
|
||||||
@@ -736,6 +737,10 @@ class PretrainedConfig(PushToHubMixin):
|
|||||||
config_dict["auto_map"] = add_model_info_to_auto_map(
|
config_dict["auto_map"] = add_model_info_to_auto_map(
|
||||||
config_dict["auto_map"], pretrained_model_name_or_path
|
config_dict["auto_map"], pretrained_model_name_or_path
|
||||||
)
|
)
|
||||||
|
if "custom_pipelines" in config_dict and not is_local:
|
||||||
|
config_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
|
||||||
|
config_dict["custom_pipelines"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
return config_dict, kwargs
|
return config_dict, kwargs
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ from .utils import (
|
|||||||
PushToHubMixin,
|
PushToHubMixin,
|
||||||
TensorType,
|
TensorType,
|
||||||
add_model_info_to_auto_map,
|
add_model_info_to_auto_map,
|
||||||
|
add_model_info_to_custom_pipelines,
|
||||||
cached_file,
|
cached_file,
|
||||||
copy_func,
|
copy_func,
|
||||||
download_url,
|
download_url,
|
||||||
@@ -539,10 +540,15 @@ class FeatureExtractionMixin(PushToHubMixin):
|
|||||||
f"loading configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}"
|
f"loading configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if "auto_map" in feature_extractor_dict and not is_local:
|
if not is_local:
|
||||||
feature_extractor_dict["auto_map"] = add_model_info_to_auto_map(
|
if "auto_map" in feature_extractor_dict:
|
||||||
feature_extractor_dict["auto_map"], pretrained_model_name_or_path
|
feature_extractor_dict["auto_map"] = add_model_info_to_auto_map(
|
||||||
)
|
feature_extractor_dict["auto_map"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
|
if "custom_pipelines" in feature_extractor_dict:
|
||||||
|
feature_extractor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
|
||||||
|
feature_extractor_dict["custom_pipelines"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
|
|
||||||
return feature_extractor_dict, kwargs
|
return feature_extractor_dict, kwargs
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ from .utils import (
|
|||||||
IMAGE_PROCESSOR_NAME,
|
IMAGE_PROCESSOR_NAME,
|
||||||
PushToHubMixin,
|
PushToHubMixin,
|
||||||
add_model_info_to_auto_map,
|
add_model_info_to_auto_map,
|
||||||
|
add_model_info_to_custom_pipelines,
|
||||||
cached_file,
|
cached_file,
|
||||||
copy_func,
|
copy_func,
|
||||||
download_url,
|
download_url,
|
||||||
@@ -375,11 +376,15 @@ class ImageProcessingMixin(PushToHubMixin):
|
|||||||
f"loading configuration file {image_processor_file} from cache at {resolved_image_processor_file}"
|
f"loading configuration file {image_processor_file} from cache at {resolved_image_processor_file}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if "auto_map" in image_processor_dict and not is_local:
|
if not is_local:
|
||||||
image_processor_dict["auto_map"] = add_model_info_to_auto_map(
|
if "auto_map" in image_processor_dict:
|
||||||
image_processor_dict["auto_map"], pretrained_model_name_or_path
|
image_processor_dict["auto_map"] = add_model_info_to_auto_map(
|
||||||
)
|
image_processor_dict["auto_map"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
|
if "custom_pipelines" in image_processor_dict:
|
||||||
|
image_processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
|
||||||
|
image_processor_dict["custom_pipelines"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
return image_processor_dict, kwargs
|
return image_processor_dict, kwargs
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ from .utils import (
|
|||||||
PROCESSOR_NAME,
|
PROCESSOR_NAME,
|
||||||
PushToHubMixin,
|
PushToHubMixin,
|
||||||
add_model_info_to_auto_map,
|
add_model_info_to_auto_map,
|
||||||
|
add_model_info_to_custom_pipelines,
|
||||||
cached_file,
|
cached_file,
|
||||||
copy_func,
|
copy_func,
|
||||||
direct_transformers_import,
|
direct_transformers_import,
|
||||||
@@ -355,10 +356,15 @@ class ProcessorMixin(PushToHubMixin):
|
|||||||
else:
|
else:
|
||||||
logger.info(f"loading configuration file {processor_file} from cache at {resolved_processor_file}")
|
logger.info(f"loading configuration file {processor_file} from cache at {resolved_processor_file}")
|
||||||
|
|
||||||
if "auto_map" in processor_dict and not is_local:
|
if not is_local:
|
||||||
processor_dict["auto_map"] = add_model_info_to_auto_map(
|
if "auto_map" in processor_dict:
|
||||||
processor_dict["auto_map"], pretrained_model_name_or_path
|
processor_dict["auto_map"] = add_model_info_to_auto_map(
|
||||||
)
|
processor_dict["auto_map"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
|
if "custom_pipelines" in processor_dict:
|
||||||
|
processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
|
||||||
|
processor_dict["custom_pipelines"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
|
|
||||||
return processor_dict, kwargs
|
return processor_dict, kwargs
|
||||||
|
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ from .utils import (
|
|||||||
TensorType,
|
TensorType,
|
||||||
add_end_docstrings,
|
add_end_docstrings,
|
||||||
add_model_info_to_auto_map,
|
add_model_info_to_auto_map,
|
||||||
|
add_model_info_to_custom_pipelines,
|
||||||
cached_file,
|
cached_file,
|
||||||
copy_func,
|
copy_func,
|
||||||
download_url,
|
download_url,
|
||||||
@@ -2177,13 +2178,18 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||||||
config_tokenizer_class = None
|
config_tokenizer_class = None
|
||||||
init_kwargs = init_configuration
|
init_kwargs = init_configuration
|
||||||
|
|
||||||
if "auto_map" in init_kwargs and not _is_local:
|
if not _is_local:
|
||||||
# For backward compatibility with odl format.
|
if "auto_map" in init_kwargs:
|
||||||
if isinstance(init_kwargs["auto_map"], (tuple, list)):
|
# For backward compatibility with odl format.
|
||||||
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
|
if isinstance(init_kwargs["auto_map"], (tuple, list)):
|
||||||
init_kwargs["auto_map"] = add_model_info_to_auto_map(
|
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
|
||||||
init_kwargs["auto_map"], pretrained_model_name_or_path
|
init_kwargs["auto_map"] = add_model_info_to_auto_map(
|
||||||
)
|
init_kwargs["auto_map"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
|
if "custom_pipelines" in init_kwargs:
|
||||||
|
init_kwargs["custom_pipelines"] = add_model_info_to_custom_pipelines(
|
||||||
|
init_kwargs["custom_pipelines"], pretrained_model_name_or_path
|
||||||
|
)
|
||||||
|
|
||||||
if config_tokenizer_class is None:
|
if config_tokenizer_class is None:
|
||||||
# Matt: This entire block is only used to decide if the tokenizer class matches the class in the repo.
|
# Matt: This entire block is only used to decide if the tokenizer class matches the class in the repo.
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ from .generic import (
|
|||||||
PaddingStrategy,
|
PaddingStrategy,
|
||||||
TensorType,
|
TensorType,
|
||||||
add_model_info_to_auto_map,
|
add_model_info_to_auto_map,
|
||||||
|
add_model_info_to_custom_pipelines,
|
||||||
cached_property,
|
cached_property,
|
||||||
can_return_loss,
|
can_return_loss,
|
||||||
expand_dims,
|
expand_dims,
|
||||||
|
|||||||
@@ -721,6 +721,19 @@ def add_model_info_to_auto_map(auto_map, repo_id):
|
|||||||
return auto_map
|
return auto_map
|
||||||
|
|
||||||
|
|
||||||
|
def add_model_info_to_custom_pipelines(custom_pipeline, repo_id):
|
||||||
|
"""
|
||||||
|
Adds the information of the repo_id to a given custom pipeline.
|
||||||
|
"""
|
||||||
|
# {custom_pipelines : {task: {"impl": "path.to.task"},...} }
|
||||||
|
for task in custom_pipeline.keys():
|
||||||
|
if "impl" in custom_pipeline[task]:
|
||||||
|
module = custom_pipeline[task]["impl"]
|
||||||
|
if "--" not in module:
|
||||||
|
custom_pipeline[task]["impl"] = f"{repo_id}--{module}"
|
||||||
|
return custom_pipeline
|
||||||
|
|
||||||
|
|
||||||
def infer_framework(model_class):
|
def infer_framework(model_class):
|
||||||
"""
|
"""
|
||||||
Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant
|
Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant
|
||||||
|
|||||||
@@ -925,6 +925,24 @@ class DynamicPipelineTester(unittest.TestCase):
|
|||||||
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
|
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
|
||||||
# dynamic module
|
# dynamic module
|
||||||
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
|
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
|
||||||
|
# check for tag exitence, tag needs to be added when we are calling a custom pipeline from the hub
|
||||||
|
# useful for cases such as finetuning
|
||||||
|
self.assertDictEqual(
|
||||||
|
new_classifier.model.config.custom_pipelines,
|
||||||
|
{
|
||||||
|
"pair-classification": {
|
||||||
|
"impl": f"{USER}/test-dynamic-pipeline--custom_pipeline.PairClassificationPipeline",
|
||||||
|
"pt": ("AutoModelForSequenceClassification",),
|
||||||
|
"tf": (),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# test if the pipeline still works after the model is finetuned
|
||||||
|
# (we are actually testing if the pipeline still works from the final repo)
|
||||||
|
# this is where the user/repo--module.class is used for
|
||||||
|
new_classifier.model.push_to_hub(repo_name=f"{USER}/test-pipeline-for-a-finetuned-model", token=self._token)
|
||||||
|
del new_classifier # free up memory
|
||||||
|
new_classifier = pipeline(model=f"{USER}/test-pipeline-for-a-finetuned-model", trust_remote_code=True)
|
||||||
|
|
||||||
results = classifier("I hate you", second_text="I love you")
|
results = classifier("I hate you", second_text="I love you")
|
||||||
new_results = new_classifier("I hate you", second_text="I love you")
|
new_results = new_classifier("I hate you", second_text="I love you")
|
||||||
|
|||||||
Reference in New Issue
Block a user