Support for Falcon2-11B (#30771)
* remove unrelated changes * remove unrelated changes on phi and stable LM * add: Test for Falcon 10B * fix: formatting * fix: loading the falcon 10B in 8 bit precision using bitsanbytes. * fix: device placement * fix: broken tests. * fix: backwards compatibility for falcon 1B architecture. * chore: updated test. * chore: test_modeling_falcon.py to use the 11B model. * chore: minor edit * chore: formating. --------- Co-authored-by: Pablo Montalvo <39954772+molbap@users.noreply.github.com> Co-authored-by: ArthurZucker <arthur.zucker@gmail.com>
This commit is contained in:
committed by
GitHub
parent
f63d822242
commit
e52741f601
@@ -602,6 +602,25 @@ class FalconLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
self.assertEqual(output_str, EXPECTED_OUTPUT)
|
||||
|
||||
@slow
|
||||
@require_bitsandbytes
|
||||
def test_lm_generate_falcon_11b(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-11B", padding_side="left")
|
||||
model = FalconForCausalLM.from_pretrained(
|
||||
"tiiuae/falcon-11B", device_map={"": torch_device}, load_in_8bit=True
|
||||
)
|
||||
model.eval()
|
||||
inputs = tokenizer(
|
||||
"Two roads diverged in a yellow wood,", return_tensors="pt", return_token_type_ids=False
|
||||
).to(torch_device)
|
||||
|
||||
EXPECTED_OUTPUT = "Two roads diverged in a yellow wood,\nAnd sorry I could not travel both\n"
|
||||
|
||||
output_ids = model.generate(**inputs, do_sample=False, max_new_tokens=9)
|
||||
output_str = tokenizer.batch_decode(output_ids)[0]
|
||||
|
||||
self.assertEqual(output_str, EXPECTED_OUTPUT)
|
||||
|
||||
@slow
|
||||
def test_lm_generation_big_models(self):
|
||||
# The big models are way too big for the CI, so we use tiny random models that resemble their
|
||||
@@ -647,7 +666,7 @@ class FalconLanguageGenerationTest(unittest.TestCase):
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"tiiuae/falcon-7b",
|
||||
device_map="auto",
|
||||
device_map={"": torch_device},
|
||||
load_in_4bit=True,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user