fix: providing a tensor to cache_position in model.generate kwargs always crashes because of boolean test (#39300)

* fix: cache_position: RuntimeError: Boolean value of Tensor with more than one value is ambiguous

* test cache_position

* move test

* propagate changes

---------

Co-authored-by: Masataro Asai <guicho2.71828@gmail.com>
This commit is contained in:
Joao Gante
2025-07-30 18:30:28 +01:00
committed by GitHub
parent 9b3203f47b
commit 4f93cc9174
6 changed files with 150 additions and 49 deletions

View File

@@ -40,7 +40,7 @@ from transformers.utils import (
is_vision_available,
)
from ...generation.test_utils import GenerationTesterMixin
from ...generation.test_utils import GenerationTesterMixin, has_similar_generate_outputs
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import (
TEST_EAGER_MATCHES_SDPA_INFERENCE_PARAMETERIZATION,
@@ -650,7 +650,7 @@ class Kosmos2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
outputs_from_embeds = model.generate(
input_ids=input_ids, inputs_embeds=inputs_embeds, **generation_kwargs, **inputs_dict
)
self._check_similar_generate_outputs(outputs_from_ids, outputs_from_embeds)
self.assertTrue(has_similar_generate_outputs(outputs_from_ids, outputs_from_embeds))
# input_ids is not a required input on most models -- if we don't pass it, the newly generated tokens will
# be the same
@@ -658,7 +658,7 @@ class Kosmos2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
inputs_embeds=inputs_embeds, **generation_kwargs, **inputs_dict
)
outputs_from_embeds.sequences = outputs_from_embeds.sequences[:, inputs_embeds.shape[1] :]
self._check_similar_generate_outputs(outputs_from_embeds_wo_ids, outputs_from_embeds)
self.assertTrue(has_similar_generate_outputs(outputs_from_embeds_wo_ids, outputs_from_embeds))
# We will verify our results on an image of cute cats