Mamba / FalconMamba: Fix mamba left padding (#32677)
* fix mamba left padding * Apply suggestions from code review Co-authored-by: Pablo Montalvo <39954772+molbap@users.noreply.github.com> * fix copies * test with `inputs_embeds` * Update src/transformers/models/falcon_mamba/modeling_falcon_mamba.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * copies * clairfy * fix last comments * remove --------- Co-authored-by: Pablo Montalvo <39954772+molbap@users.noreply.github.com> Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
@@ -101,6 +101,7 @@ class FalconMambaModelTester:
|
||||
self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False
|
||||
):
|
||||
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
|
||||
attention_mask = ids_tensor([self.batch_size, self.seq_length], 1)
|
||||
|
||||
sequence_labels = None
|
||||
token_labels = None
|
||||
@@ -119,7 +120,7 @@ class FalconMambaModelTester:
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
None,
|
||||
attention_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
@@ -153,6 +154,7 @@ class FalconMambaModelTester:
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
attention_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
@@ -161,6 +163,7 @@ class FalconMambaModelTester:
|
||||
return (
|
||||
config,
|
||||
input_ids,
|
||||
attention_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
@@ -253,12 +256,12 @@ class FalconMambaModelTester:
|
||||
(
|
||||
config,
|
||||
input_ids,
|
||||
_,
|
||||
attention_mask,
|
||||
sequence_labels,
|
||||
token_labels,
|
||||
choice_labels,
|
||||
) = self.prepare_config_and_inputs()
|
||||
inputs_dict = {"input_ids": input_ids}
|
||||
inputs_dict = {"input_ids": input_ids, "attention_mask": attention_mask}
|
||||
return config, inputs_dict
|
||||
|
||||
|
||||
@@ -491,3 +494,33 @@ class FalconMambaIntegrationTests(unittest.TestCase):
|
||||
self.tokenizer.batch_decode(out, skip_special_tokens=False)[0],
|
||||
"Hello today I am going to show you how to make a simple and easy to make paper plane.\nStep",
|
||||
)
|
||||
|
||||
def test_batched_generation(self):
|
||||
model_id = "tiiuae/falcon-mamba-7b"
|
||||
tok = AutoTokenizer.from_pretrained(model_id)
|
||||
tok.pad_token_id = tok.eos_token_id
|
||||
|
||||
texts = ["Hello today", "Hello my name is Younes and today"]
|
||||
|
||||
EXPECTED_OUTPUT = [
|
||||
"Hello today I'm going to show you how to make a 3D model of a house.\n",
|
||||
"Hello my name is Younes and today I will be talking about the topic of “The importance of the internet in our life”.\n",
|
||||
]
|
||||
|
||||
inputs = tok(texts, return_tensors="pt", padding=True, return_token_type_ids=False).to(torch_device)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, device_map=0, torch_dtype=torch.bfloat16)
|
||||
|
||||
out = model.generate(**inputs, max_new_tokens=20)
|
||||
out = tok.batch_decode(out, skip_special_tokens=True)
|
||||
|
||||
self.assertListEqual(out, EXPECTED_OUTPUT)
|
||||
|
||||
# We test the same generations with inputs_embeds
|
||||
with torch.no_grad():
|
||||
inputs_embeds = model.get_input_embeddings()(inputs.pop("input_ids"))
|
||||
|
||||
inputs["inputs_embeds"] = inputs_embeds
|
||||
out = model.generate(**inputs, max_new_tokens=20)
|
||||
out = tok.batch_decode(out, skip_special_tokens=True)
|
||||
|
||||
self.assertListEqual(out, EXPECTED_OUTPUT)
|
||||
|
||||
Reference in New Issue
Block a user