From 4d1ad832368254310eae058dae4dc07e7ed57a6e Mon Sep 17 00:00:00 2001
From: John Hewitt <john.hewitt64@gmail.com>
Date: Wed, 27 Feb 2019 14:50:41 -0800
Subject: [PATCH] update docstring of BERT tokenizer to reflect
 do_wordpiece_only

---
 pytorch_pretrained_bert/tokenization.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pytorch_pretrained_bert/tokenization.py b/pytorch_pretrained_bert/tokenization.py
index 9ee8be2039..4ea8de6f70 100644
--- a/pytorch_pretrained_bert/tokenization.py
+++ b/pytorch_pretrained_bert/tokenization.py
@@ -79,8 +79,16 @@ class BertTokenizer(object):
         """Constructs a BertTokenizer.
 
         Args:
-          do_lower_case: Whether to lower case the input.
-          do_wordpiece_only: Whether to do basic tokenization before wordpiece.
+          vocab_file: Path to a one-wordpiece-per-line vocabulary file
+          do_lower_case: Whether to lower case the input
+                         Only has an effect when do_wordpiece_only=False
+          do_basic_tokenize: Whether to do basic tokenization before wordpiece.
+          max_len: An artificial maximum length to truncate tokenized sequences to;
+                         Effective maximum length is always the minimum of this
+                         value (if specified) and the underlying BERT model's
+                         sequence length.
+          never_split: List of tokens which will never be split during tokenization.
+                         Only has an effect when do_wordpiece_only=False
         """
         if not os.path.isfile(vocab_file):
             raise ValueError(