[generate] Run custom generation code from the Hub (#36405)

* mvp

* remove trust_remote_code

* generate_from_hub

* handle requirements; docs

* english

* doc PR suggestions

* Apply suggestions from code review

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>

* changed remote code path to generate/generate.py

* model repo has custom generate -> override base generate

* check for proper inheritance

* some doc updates (missing: tag-related docs)

* update docs to model repo

* nit

* nit

* nits

* Update src/transformers/dynamic_module_utils.py

* Apply suggestions from code review

* Update docs/source/en/generation_strategies.md

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>

* trust remote code is required

* use new import utils for requirements version parsing

* use  org examples

* add tests

* Apply suggestions from code review

Co-authored-by: Manuel de Prada Corral <6536835+manueldeprada@users.noreply.github.com>

* ascii file structure; tag instructions on readme.md

---------

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
Co-authored-by: Manuel de Prada Corral <6536835+manueldeprada@users.noreply.github.com>
This commit is contained in:
Joao Gante
2025-05-15 10:35:54 +01:00
committed by GitHub
parent 955e61b0da
commit 0e0e5c1044
6 changed files with 522 additions and 97 deletions

View File

@@ -4954,6 +4954,68 @@ class GenerationIntegrationTests(unittest.TestCase):
_ = model_cpu.generate(input_ids, **generate_kwargs)
self.assertFalse(hasattr(model_cpu, "_compiled_call"))
def test_custom_generate_from_argument_in_generate(self):
"""Tests that the `custom_generate` argument is used when passed to `generate`"""
model = AutoModelForCausalLM.from_pretrained(
"hf-internal-testing/tiny-random-MistralForCausalLM", device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
model_inputs = tokenizer("Hello, world!", return_tensors="pt").to(model.device)
# Note: `transformers-community/custom_generate_example` has a custom decoding method with a `left_padding`
# argument (int), which prepends as many pad tokens.
gen_out = model.generate(
**model_inputs,
left_padding=5,
max_new_tokens=5,
custom_generate="transformers-community/custom_generate_example",
trust_remote_code=True,
)
text_output = tokenizer.decode(gen_out[0])
self.assertTrue(text_output.startswith("<unk><unk><unk><unk><unk>")) # <unk> is the pad token
def test_custom_generate_from_model_repo_with_custom_generate_code(self):
"""
Tests that models from model repos containing custom generation code override `generate` with the custom code
"""
model = AutoModelForCausalLM.from_pretrained(
"transformers-community/custom_generate_example", device_map="auto", trust_remote_code=True
)
generate_signature = inspect.signature(model.generate)
# `left_padding` is a custom argument, doesn't exist in the base `generate` method
self.assertTrue(generate_signature.parameters.get("left_padding"))
def test_custom_generate_bad_requirements(self):
"""Tests that we check the `requirements.txt` file from custom generation repos"""
model = AutoModelForCausalLM.from_pretrained(
"hf-internal-testing/tiny-random-MistralForCausalLM", device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
model_inputs = tokenizer("Hello, world!", return_tensors="pt").to(model.device)
with self.assertRaises(ImportError):
# Note: `transformers-community/custom_generate_bad_requirements` has a `requirements.txt` with
# impossible requirements
model.generate(
**model_inputs,
custom_generate="transformers-community/custom_generate_bad_requirements",
trust_remote_code=True,
)
def test_custom_generate_requires_trust_remote_code(self):
"""Tests that `trust_remote_code` is required when using `custom_generate`"""
# Case 1: A model from a repo containing custom generation code must be loaded with `trust_remote_code`
with self.assertRaises(ValueError):
AutoModelForCausalLM.from_pretrained("transformers-community/custom_generate_example", device_map="auto")
# Case 2: Using the `custom_generate` argument in `generate` requires `trust_remote_code` if the code is not
# local
model = AutoModelForCausalLM.from_pretrained(
"hf-internal-testing/tiny-random-MistralForCausalLM", device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
model_inputs = tokenizer("Hello, world!", return_tensors="pt").to(model.device)
with self.assertRaises(ValueError):
model.generate(**model_inputs, custom_generate="transformers-community/custom_generate_example")
@require_torch
class TokenHealingTestCase(unittest.TestCase):