From 2c731fd129b55a6bb437d637a7187579a1c3b7ab Mon Sep 17 00:00:00 2001 From: thomwolf Date: Fri, 2 Nov 2018 01:38:22 +0100 Subject: [PATCH] small tweaks --- modeling_pytorch.py | 32 ++++++++++++++++++++++---------- run_classifier_pytorch.py | 25 ++++++++++--------------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/modeling_pytorch.py b/modeling_pytorch.py index 2ed222071e..c1f1c185d0 100644 --- a/modeling_pytorch.py +++ b/modeling_pytorch.py @@ -349,7 +349,6 @@ class BertModel(nn.Module): """BERT model ("Bidirectional Embedding Representations from a Transformer"). Example usage: - ```python # Already been converted into WordPiece token ids input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) @@ -359,16 +358,10 @@ class BertModel(nn.Module): config = modeling.BertConfig(vocab_size=32000, hidden_size=512, num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) - model = modeling.BertModel(config=config, is_training=True, - input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids) - - label_embeddings = tf.get_variable(...) - pooled_output = model.get_pooled_output() - logits = tf.matmul(pooled_output, label_embeddings) - ... + model = modeling.BertModel(config=config) + all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) ``` """ - def __init__(self, config: BertConfig): """Constructor for BertModel. @@ -400,7 +393,26 @@ class BertModel(nn.Module): return all_encoder_layers, pooled_output class BertForSequenceClassification(nn.Module): - def __init__(self, config, num_labels): + """BERT model for classification. + This module is composed of the BERT model with a linear layer on top of + the pooled output. + + Example usage: + ```python + # Already been converted into WordPiece token ids + input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) + input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) + token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]]) + + config = modeling.BertConfig(vocab_size=32000, hidden_size=512, + num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024) + + num_labels = 2 + + model = modeling.BertModel(config, num_labels) + logits = model(input_ids, token_type_ids, input_mask) + ``` + """ def __init__(self, config, num_labels): super(BertForSequenceClassification, self).__init__() self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) diff --git a/run_classifier_pytorch.py b/run_classifier_pytorch.py index 3ad28726a3..0fde0938df 100644 --- a/run_classifier_pytorch.py +++ b/run_classifier_pytorch.py @@ -115,16 +115,10 @@ parser.add_argument("--save_checkpoints_steps", default = 1000, type = int, help = "How often to save the model checkpoint.") -parser.add_argument("--iterations_per_loop", - default = 1000, - type = int, - help = "How many steps to make in each estimator call.") - parser.add_argument("--no_cuda", default = False, type = bool, help = "Whether not to use CUDA when available") - parser.add_argument("--local_rank", type=int, default=-1, @@ -518,16 +512,17 @@ def main(): model.train() global_step = 0 - for input_ids, input_mask, segment_ids, label_ids in train_dataloader: - input_ids = input_ids.to(device) - input_mask = input_mask.float().to(device) - segment_ids = segment_ids.to(device) - label_ids = label_ids.to(device) + for epoch in args.num_train_epochs: + for input_ids, input_mask, segment_ids, label_ids in train_dataloader: + input_ids = input_ids.to(device) + input_mask = input_mask.float().to(device) + segment_ids = segment_ids.to(device) + label_ids = label_ids.to(device) - loss, _ = model(input_ids, segment_ids, input_mask, label_ids) - loss.backward() - optimizer.step() - global_step += 1 + loss, _ = model(input_ids, segment_ids, input_mask, label_ids) + loss.backward() + optimizer.step() + global_step += 1 if args.do_eval: eval_examples = processor.get_dev_examples(args.data_dir)