Update BLOOM parameter counts (#18531)
* Update BLOOM parameter counts * Update BLOOM parameter counts
This commit is contained in:
committed by
GitHub
parent
153d1361c7
commit
56ef0ba447
@@ -18,11 +18,11 @@ The BLOOM model has been proposed with its various versions through the [BigScie
|
|||||||
The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
|
The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
|
||||||
Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
|
Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
|
||||||
|
|
||||||
- [bloom-350m](https://huggingface.co/bigscience/bloom-350m)
|
- [bloom-560m](https://huggingface.co/bigscience/bloom-560m)
|
||||||
- [bloom-760m](https://huggingface.co/bigscience/bloom-760m)
|
- [bloom-1b1](https://huggingface.co/bigscience/bloom-1b1)
|
||||||
- [bloom-1b3](https://huggingface.co/bigscience/bloom-1b3)
|
- [bloom-1b7](https://huggingface.co/bigscience/bloom-1b7)
|
||||||
- [bloom-2b5](https://huggingface.co/bigscience/bloom-2b5)
|
- [bloom-3b](https://huggingface.co/bigscience/bloom-3b)
|
||||||
- [bloom-6b3](https://huggingface.co/bigscience/bloom-6b3)
|
- [bloom-7b1](https://huggingface.co/bigscience/bloom-7b1)
|
||||||
- [bloom](https://huggingface.co/bigscience/bloom) (176B parameters)
|
- [bloom](https://huggingface.co/bigscience/bloom) (176B parameters)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -31,11 +31,11 @@ logger = logging.get_logger(__name__)
|
|||||||
|
|
||||||
BLOOM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
BLOOM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||||
"bigscience/bloom": "https://huggingface.co/bigscience/bloom/resolve/main/config.json",
|
"bigscience/bloom": "https://huggingface.co/bigscience/bloom/resolve/main/config.json",
|
||||||
"bigscience/bloom-350m": "https://huggingface.co/bigscience/bloom-350m/blob/main/config.json",
|
"bigscience/bloom-560m": "https://huggingface.co/bigscience/bloom-560m/blob/main/config.json",
|
||||||
"bigscience/bloom-760m": "https://huggingface.co/bigscience/bloom-760m/blob/main/config.json",
|
"bigscience/bloom-1b1": "https://huggingface.co/bigscience/bloom-1b1/blob/main/config.json",
|
||||||
"bigscience/bloom-1b3": "https://huggingface.co/bigscience/bloom-1b3/blob/main/config.json",
|
"bigscience/bloom-1b7": "https://huggingface.co/bigscience/bloom-1b7/blob/main/config.json",
|
||||||
"bigscience/bloom-2b5": "https://huggingface.co/bigscience/bloom-2b5/blob/main/config.json",
|
"bigscience/bloom-3b": "https://huggingface.co/bigscience/bloom-3b/blob/main/config.json",
|
||||||
"bigscience/bloom-6b3": "https://huggingface.co/bigscience/bloom-6b3/blob/main/config.json",
|
"bigscience/bloom-7b1": "https://huggingface.co/bigscience/bloom-7b1/blob/main/config.json",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -38,17 +38,17 @@ from .configuration_bloom import BloomConfig
|
|||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
_CHECKPOINT_FOR_DOC = "bigscience/bloom-350m"
|
_CHECKPOINT_FOR_DOC = "bigscience/bloom-560m"
|
||||||
_CONFIG_FOR_DOC = "BloomConfig"
|
_CONFIG_FOR_DOC = "BloomConfig"
|
||||||
_TOKENIZER_FOR_DOC = "BloomTokenizerFast"
|
_TOKENIZER_FOR_DOC = "BloomTokenizerFast"
|
||||||
|
|
||||||
BLOOM_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
BLOOM_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||||
"bigscience/bigscience-small-testing",
|
"bigscience/bigscience-small-testing",
|
||||||
"bigscience/bloom-350m",
|
"bigscience/bloom-560m",
|
||||||
"bigscience/bloom-760m",
|
"bigscience/bloom-1b1",
|
||||||
"bigscience/bloom-1b3",
|
"bigscience/bloom-1b7",
|
||||||
"bigscience/bloom-2b5",
|
"bigscience/bloom-3b",
|
||||||
"bigscience/bloom-6b3",
|
"bigscience/bloom-7b1",
|
||||||
"bigscience/bloom",
|
"bigscience/bloom",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -36,11 +36,11 @@ VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"}
|
|||||||
PRETRAINED_VOCAB_FILES_MAP = {
|
PRETRAINED_VOCAB_FILES_MAP = {
|
||||||
"tokenizer_file": {
|
"tokenizer_file": {
|
||||||
"bigscience/tokenizer": "https://huggingface.co/bigscience/tokenizer/blob/main/tokenizer.json",
|
"bigscience/tokenizer": "https://huggingface.co/bigscience/tokenizer/blob/main/tokenizer.json",
|
||||||
"bigscience/bloom-350m": "https://huggingface.co/bigscience/bloom-350m/blob/main/tokenizer.json",
|
"bigscience/bloom-560m": "https://huggingface.co/bigscience/bloom-560m/blob/main/tokenizer.json",
|
||||||
"bigscience/bloom-760m": "https://huggingface.co/bigscience/bloom-760m/blob/main/tokenizer.json",
|
"bigscience/bloom-1b1": "https://huggingface.co/bigscience/bloom-1b1/blob/main/tokenizer.json",
|
||||||
"bigscience/bloom-1b3": "https://huggingface.co/bigscience/bloom-1b3/blob/main/tokenizer.json",
|
"bigscience/bloom-1b7": "https://huggingface.co/bigscience/bloom-1b7/blob/main/tokenizer.json",
|
||||||
"bigscience/bloom-2b5": "https://huggingface.co/bigscience/bloom-2b5/blob/main/tokenizer.json",
|
"bigscience/bloom-3b": "https://huggingface.co/bigscience/bloom-3b/blob/main/tokenizer.json",
|
||||||
"bigscience/bloom-6b3": "https://huggingface.co/bigscience/bloom-2b5/blob/main/tokenizer.json",
|
"bigscience/bloom-7b1": "https://huggingface.co/bigscience/bloom-7b1/blob/main/tokenizer.json",
|
||||||
"bigscience/bloom": "https://huggingface.co/bigscience/bloom/blob/main/tokenizer.json",
|
"bigscience/bloom": "https://huggingface.co/bigscience/bloom/blob/main/tokenizer.json",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -379,27 +379,27 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase)
|
|||||||
def test_simple_generation(self):
|
def test_simple_generation(self):
|
||||||
# This test is a bit flaky. For some GPU architectures, pytorch sets by default allow_fp16_reduced_precision_reduction = True and some operations
|
# This test is a bit flaky. For some GPU architectures, pytorch sets by default allow_fp16_reduced_precision_reduction = True and some operations
|
||||||
# do not give the same results under this configuration, especially torch.baddmm and torch.bmm. https://pytorch.org/docs/stable/notes/numerical_accuracy.html#fp16-on-mi200
|
# do not give the same results under this configuration, especially torch.baddmm and torch.bmm. https://pytorch.org/docs/stable/notes/numerical_accuracy.html#fp16-on-mi200
|
||||||
# As we leave the default value (True) for allow_fp16_reduced_precision_reduction , the tests failed when running in half-precision with smaller models (350m)
|
# As we leave the default value (True) for allow_fp16_reduced_precision_reduction , the tests failed when running in half-precision with smaller models (560m)
|
||||||
# Please see: https://pytorch.org/docs/stable/notes/cuda.html#reduced-precision-reduction-in-fp16-gemms
|
# Please see: https://pytorch.org/docs/stable/notes/cuda.html#reduced-precision-reduction-in-fp16-gemms
|
||||||
# This discrepancy is observed only when using small models and seems to be stable for larger models.
|
# This discrepancy is observed only when using small models and seems to be stable for larger models.
|
||||||
# Our conclusion is that these operations are flaky for small inputs but seems to be stable for larger inputs (for the functions `baddmm` and `bmm`), and therefore for larger models.
|
# Our conclusion is that these operations are flaky for small inputs but seems to be stable for larger inputs (for the functions `baddmm` and `bmm`), and therefore for larger models.
|
||||||
|
|
||||||
# Here is a summary of an ablation study of our observations
|
# Here is a summary of an ablation study of our observations
|
||||||
# EXPECTED_OUTPUT = "I enjoy walking with my cute dog, and I love to watch the kids play. I am a very active person, and I am a very good listener. I am a very good person, and I am a very good person. I am a"
|
# EXPECTED_OUTPUT = "I enjoy walking with my cute dog, and I love to watch the kids play. I am a very active person, and I am a very good listener. I am a very good person, and I am a very good person. I am a"
|
||||||
# 350m + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
|
# 560m + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
|
||||||
# 350m + allow_fp16_reduced_precision_reduction = False + torch.baddm ==> PASS
|
# 560m + allow_fp16_reduced_precision_reduction = False + torch.baddm ==> PASS
|
||||||
# 350m + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS
|
# 560m + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS
|
||||||
# 350m + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> FAIL
|
# 560m + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> FAIL
|
||||||
|
|
||||||
# EXPECTED_OUTPUT = "I enjoy walking with my cute dog, but I also enjoy hiking, biking, and swimming. I love to cook and bake. I love to cook and bake. I love to cook and bake. I love to cook and bake. I love"
|
# EXPECTED_OUTPUT = "I enjoy walking with my cute dog, but I also enjoy hiking, biking, and swimming. I love to cook and bake. I love to cook and bake. I love to cook and bake. I love to cook and bake. I love"
|
||||||
# >=760m + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS (for use_cache=True and use_cache=False)
|
# >=1b1 + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS (for use_cache=True and use_cache=False)
|
||||||
# >=760m + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> PASS
|
# >=1b1 + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> PASS
|
||||||
# >=760m + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
|
# >=1b1 + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
|
||||||
|
|
||||||
path_350m = "bigscience/bloom-350m"
|
path_560m = "bigscience/bloom-560m"
|
||||||
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
|
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(path_350m)
|
tokenizer = BloomTokenizerFast.from_pretrained(path_560m)
|
||||||
|
|
||||||
input_sentence = "I enjoy walking with my cute dog"
|
input_sentence = "I enjoy walking with my cute dog"
|
||||||
# This output has been obtained using fp32 model on the huggingface DGX workstation - NVIDIA A100 GPU
|
# This output has been obtained using fp32 model on the huggingface DGX workstation - NVIDIA A100 GPU
|
||||||
@@ -416,10 +416,10 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase)
|
|||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
def test_batch_generation(self):
|
def test_batch_generation(self):
|
||||||
path_350m = "bigscience/bloom-350m"
|
path_560m = "bigscience/bloom-560m"
|
||||||
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
|
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")
|
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
||||||
|
|
||||||
input_sentence = ["I enjoy walking with my cute dog", "I enjoy walking with my cute dog"]
|
input_sentence = ["I enjoy walking with my cute dog", "I enjoy walking with my cute dog"]
|
||||||
|
|
||||||
@@ -437,10 +437,10 @@ class BloomModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase)
|
|||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
def test_batch_generation_padd(self):
|
def test_batch_generation_padd(self):
|
||||||
|
|
||||||
path_350m = "bigscience/bloom-350m"
|
path_560m = "bigscience/bloom-560m"
|
||||||
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
|
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")
|
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
|
||||||
|
|
||||||
input_sentence = ["I enjoy walking with my cute dog", "Hello my name is"]
|
input_sentence = ["I enjoy walking with my cute dog", "Hello my name is"]
|
||||||
input_sentence_without_pad = "Hello my name is"
|
input_sentence_without_pad = "Hello my name is"
|
||||||
|
|||||||
@@ -215,7 +215,7 @@ PYTORCH_EXPORT_MODELS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
PYTORCH_EXPORT_WITH_PAST_MODELS = {
|
PYTORCH_EXPORT_WITH_PAST_MODELS = {
|
||||||
("bloom", "bigscience/bloom-350m"),
|
("bloom", "bigscience/bloom-560m"),
|
||||||
("gpt2", "gpt2"),
|
("gpt2", "gpt2"),
|
||||||
("gpt-neo", "EleutherAI/gpt-neo-125M"),
|
("gpt-neo", "EleutherAI/gpt-neo-125M"),
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user