MarianMTModel.from_pretrained('Helsinki-NLP/opus-marian-en-de') (#3908)
Co-Authored-By: Stefan Schweter <stefan@schweter.it>
This commit is contained in:
@@ -65,6 +65,9 @@ class BartConfig(PretrainedConfig):
|
||||
normalize_before=False,
|
||||
add_final_layer_norm=False,
|
||||
scale_embedding=False,
|
||||
normalize_embedding=True,
|
||||
static_position_embeddings=False,
|
||||
add_bias_logits=False,
|
||||
**common_kwargs
|
||||
):
|
||||
r"""
|
||||
@@ -73,6 +76,8 @@ class BartConfig(PretrainedConfig):
|
||||
config = BartConfig.from_pretrained('bart-large')
|
||||
model = BartModel(config)
|
||||
"""
|
||||
if "hidden_size" in common_kwargs:
|
||||
raise ValueError("hidden size is called d_model")
|
||||
super().__init__(
|
||||
num_labels=num_labels,
|
||||
pad_token_id=pad_token_id,
|
||||
@@ -94,12 +99,17 @@ class BartConfig(PretrainedConfig):
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.init_std = init_std # Normal(0, this parameter)
|
||||
self.activation_function = activation_function
|
||||
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
|
||||
|
||||
# True for mbart, False otherwise
|
||||
# Params introduced for Mbart
|
||||
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
|
||||
self.normalize_embedding = normalize_embedding # True for mbart, False otherwise
|
||||
self.normalize_before = normalize_before # combo of fairseq's encoder_ and decoder_normalize_before
|
||||
self.add_final_layer_norm = add_final_layer_norm
|
||||
|
||||
# Params introduced for Marian
|
||||
self.add_bias_logits = add_bias_logits
|
||||
self.static_position_embeddings = static_position_embeddings
|
||||
|
||||
# 3 Types of Dropout
|
||||
self.attention_dropout = attention_dropout
|
||||
self.activation_dropout = activation_dropout
|
||||
|
||||
Reference in New Issue
Block a user