From c14a22272f3fc17bb2eaeca62986c31a7d26bc85 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Thu, 31 Oct 2019 14:04:10 +0000 Subject: [PATCH] ALBERT passes all tests --- transformers/configuration_albert.py | 4 +--- transformers/modeling_albert.py | 9 +++------ transformers/tests/tokenization_albert_test.py | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/transformers/configuration_albert.py b/transformers/configuration_albert.py index 15437dbbea..04f9fa8d60 100644 --- a/transformers/configuration_albert.py +++ b/transformers/configuration_albert.py @@ -7,7 +7,7 @@ class AlbertConfig(PretrainedConfig): """ def __init__(self, - vocab_size_or_config_json_file, + vocab_size_or_config_json_file=30000, embedding_size=128, hidden_size=4096, num_hidden_layers=12, @@ -15,7 +15,6 @@ class AlbertConfig(PretrainedConfig): num_attention_heads=64, intermediate_size=16384, inner_group_num=1, - down_scale_factor=1, hidden_act="gelu_new", hidden_dropout_prob=0, attention_probs_dropout_prob=0, @@ -61,7 +60,6 @@ class AlbertConfig(PretrainedConfig): self.num_hidden_groups = num_hidden_groups self.num_attention_heads = num_attention_heads self.inner_group_num = inner_group_num - self.down_scale_factor = down_scale_factor self.hidden_act = hidden_act self.intermediate_size = intermediate_size self.hidden_dropout_prob = hidden_dropout_prob diff --git a/transformers/modeling_albert.py b/transformers/modeling_albert.py index f906352311..9bb38dead9 100644 --- a/transformers/modeling_albert.py +++ b/transformers/modeling_albert.py @@ -202,17 +202,14 @@ class AlbertLayerGroup(nn.Module): layer_attentions = () for albert_layer in self.albert_layers: - if self.output_hidden_states: - layer_hidden_states = layer_hidden_states + (hidden_states,) - layer_output = albert_layer(hidden_states, attention_mask, head_mask) hidden_states = layer_output[0] if self.output_attentions: layer_attentions = layer_attentions + (layer_output[1],) - if self.output_hidden_states: - layer_hidden_states = layer_hidden_states + (hidden_states,) + if self.output_hidden_states: + layer_hidden_states = layer_hidden_states + (hidden_states,) outputs = (hidden_states,) if self.output_hidden_states: @@ -247,7 +244,7 @@ class AlbertTransformer(nn.Module): hidden_states = layer_group_output[0] if self.output_attentions: - all_attentions = all_attentions + layer_group_output[1] + all_attentions = all_attentions + layer_group_output[-1] if self.output_hidden_states: all_hidden_states = all_hidden_states + (hidden_states,) diff --git a/transformers/tests/tokenization_albert_test.py b/transformers/tests/tokenization_albert_test.py index dd63f6756b..59eb3bceb0 100644 --- a/transformers/tests/tokenization_albert_test.py +++ b/transformers/tests/tokenization_albert_test.py @@ -22,7 +22,7 @@ from transformers.tokenization_albert import (AlbertTokenizer, SPIECE_UNDERLINE) from .tokenization_tests_commons import CommonTestCases SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), - 'fixtures/30k-clean.model') + 'fixtures/spiece.model') class AlbertTokenizationTest(CommonTestCases.CommonTokenizerTester):