MarianMTModel.from_pretrained('Helsinki-NLP/opus-marian-en-de') (#3908)

Co-Authored-By: Stefan Schweter <stefan@schweter.it>
This commit is contained in:
Sam Shleifer
2020-04-28 18:22:37 -04:00
committed by GitHub
parent d714dfeaa8
commit 847e7f3379
12 changed files with 887 additions and 26 deletions

View File

@@ -65,6 +65,9 @@ class BartConfig(PretrainedConfig):
normalize_before=False,
add_final_layer_norm=False,
scale_embedding=False,
normalize_embedding=True,
static_position_embeddings=False,
add_bias_logits=False,
**common_kwargs
):
r"""
@@ -73,6 +76,8 @@ class BartConfig(PretrainedConfig):
config = BartConfig.from_pretrained('bart-large')
model = BartModel(config)
"""
if "hidden_size" in common_kwargs:
raise ValueError("hidden size is called d_model")
super().__init__(
num_labels=num_labels,
pad_token_id=pad_token_id,
@@ -94,12 +99,17 @@ class BartConfig(PretrainedConfig):
self.max_position_embeddings = max_position_embeddings
self.init_std = init_std # Normal(0, this parameter)
self.activation_function = activation_function
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
# True for mbart, False otherwise
# Params introduced for Mbart
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
self.normalize_embedding = normalize_embedding # True for mbart, False otherwise
self.normalize_before = normalize_before # combo of fairseq's encoder_ and decoder_normalize_before
self.add_final_layer_norm = add_final_layer_norm
# Params introduced for Marian
self.add_bias_logits = add_bias_logits
self.static_position_embeddings = static_position_embeddings
# 3 Types of Dropout
self.attention_dropout = attention_dropout
self.activation_dropout = activation_dropout