Llama 3.1: Fix incorrect inv_freq assignment (#32330)

fix 💩
This commit is contained in:
Joao Gante
2024-07-31 11:12:46 +01:00
committed by GitHub
parent 7f552e28e0
commit b75ad56620
2 changed files with 33 additions and 5 deletions

View File

@@ -22,7 +22,7 @@ import pytest
from packaging import version
from parameterized import parameterized
from transformers import LlamaConfig, StaticCache, is_torch_available, set_seed
from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed
from transformers.testing_utils import (
require_bitsandbytes,
require_flash_attn,
@@ -718,6 +718,34 @@ class LlamaIntegrationTest(unittest.TestCase):
# 8 is for A100 / A10 and 7 for T4
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
@slow
@require_read_token
def test_llama_3_1_hard(self):
"""
An integration test for llama 3.1. It tests against a long output to ensure the subtle numerical differences
from llama 3.1.'s RoPE can be detected
"""
EXPECTED_TEXT = (
"Tell me about the french revolution. The french revolution was a period of radical social and political "
"upheaval in France that lasted from 1789 until 1799. It was a time of great change and upheaval, marked "
"by the overthrow of the monarchy, the rise of the middle class, and the eventual establishment of the "
"First French Republic.\nThe revolution began in 1789 with the Estates-General, a representative "
"assembly that had not met since 1614. The Third Estate, which represented the common people, "
"demanded greater representation and eventually broke away to form the National Assembly. This marked "
"the beginning of the end of the absolute monarchy and the rise of the middle class.\n"
)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
model = LlamaForCausalLM.from_pretrained(
"meta-llama/Meta-Llama-3.1-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16
)
input_text = ["Tell me about the french revolution."]
model_inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
generated_ids = model.generate(**model_inputs, max_new_tokens=128, do_sample=False)
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
self.assertEqual(generated_text, EXPECTED_TEXT)
@slow
@require_read_token
def test_model_7b_logits_bf16(self):