diff --git a/pytorch_pretrained_bert/modeling.py b/pytorch_pretrained_bert/modeling.py index 473b7e7022..70a41d91e7 100644 --- a/pytorch_pretrained_bert/modeling.py +++ b/pytorch_pretrained_bert/modeling.py @@ -569,10 +569,10 @@ class BertModel(PreTrainedBertModel): # Already been converted into WordPiece token ids input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - config = modeling.BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + config = modeling.BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) model = modeling.BertModel(config=config) all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) @@ -658,10 +658,10 @@ class BertForPreTraining(PreTrainedBertModel): # Already been converted into WordPiece token ids input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - config = BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) model = BertForPreTraining(config) masked_lm_logits_scores, seq_relationship_logits = model(input_ids, token_type_ids, input_mask) @@ -721,10 +721,10 @@ class BertForMaskedLM(PreTrainedBertModel): # Already been converted into WordPiece token ids input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - config = BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) model = BertForMaskedLM(config) masked_lm_logits_scores = model(input_ids, token_type_ids, input_mask) @@ -785,8 +785,8 @@ class BertForNextSentencePrediction(PreTrainedBertModel): input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - config = BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) model = BertForNextSentencePrediction(config) seq_relationship_logits = model(input_ids, token_type_ids, input_mask) @@ -845,10 +845,10 @@ class BertForSequenceClassification(PreTrainedBertModel): # Already been converted into WordPiece token ids input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - config = BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) num_labels = 2 @@ -989,10 +989,10 @@ class BertForQuestionAnswering(PreTrainedBertModel): # Already been converted into WordPiece token ids input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) - token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) - config = BertConfig(vocab_size=32000, hidden_size=512, - num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, + num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) model = BertForQuestionAnswering(config) start_logits, end_logits = model(input_ids, token_type_ids, input_mask)