From 9b3093311f5f139a49629293c2227dc8094ec262 Mon Sep 17 00:00:00 2001 From: Funtowicz Morgan Date: Thu, 20 Feb 2020 17:53:32 +0100 Subject: [PATCH] Expose all constructor parameter for BertTokenizerFast (#2921) Signed-off-by: Morgan Funtowicz --- src/transformers/tokenization_bert.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/transformers/tokenization_bert.py b/src/transformers/tokenization_bert.py index 834a610bce..c76523d318 100644 --- a/src/transformers/tokenization_bert.py +++ b/src/transformers/tokenization_bert.py @@ -549,8 +549,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast): pad_token="[PAD]", cls_token="[CLS]", mask_token="[MASK]", + clean_text=True, tokenize_chinese_chars=True, add_special_tokens=True, + strip_accents=True, + wordpieces_prefix="##", **kwargs ): super().__init__( @@ -560,8 +563,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast): unk_token=unk_token, sep_token=sep_token, cls_token=cls_token, + clean_text=clean_text, handle_chinese_chars=tokenize_chinese_chars, + strip_accents=strip_accents, lowercase=do_lower_case, + wordpieces_prefix=wordpieces_prefix, ), unk_token=unk_token, sep_token=sep_token,