From 822915142b2f201c0b01acd7cffe1b05994d2d82 Mon Sep 17 00:00:00 2001
From: thomwolf <thomwolf@gmail.com>
Date: Tue, 5 Feb 2019 16:34:32 +0100
Subject: [PATCH] fix docstring

---
 pytorch_pretrained_bert/modeling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_pretrained_bert/modeling.py b/pytorch_pretrained_bert/modeling.py
index d05ccd36bf..e4d6d9159f 100644
--- a/pytorch_pretrained_bert/modeling.py
+++ b/pytorch_pretrained_bert/modeling.py
@@ -985,7 +985,7 @@ class BertForTokenClassification(PreTrainedBertModel):
             selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
             input sequence length in the current batch. It's the mask that we typically use for attention when
             a batch has varying length sentences.
-        `labels`: labels for the classification output: torch.LongTensor of shape [batch_size]
+        `labels`: labels for the classification output: torch.LongTensor of shape [batch_size, sequence_length]
             with indices selected in [0, ..., num_labels].
 
     Outputs: