Expose all constructor parameter for BertTokenizerFast (#2921)

Signed-off-by: Morgan Funtowicz <morgan@huggingface.co>
This commit is contained in:
Funtowicz Morgan
2020-02-20 17:53:32 +01:00
committed by GitHub
parent b662f0e625
commit 9b3093311f

View File

@@ -549,8 +549,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
pad_token="[PAD]", pad_token="[PAD]",
cls_token="[CLS]", cls_token="[CLS]",
mask_token="[MASK]", mask_token="[MASK]",
clean_text=True,
tokenize_chinese_chars=True, tokenize_chinese_chars=True,
add_special_tokens=True, add_special_tokens=True,
strip_accents=True,
wordpieces_prefix="##",
**kwargs **kwargs
): ):
super().__init__( super().__init__(
@@ -560,8 +563,11 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
unk_token=unk_token, unk_token=unk_token,
sep_token=sep_token, sep_token=sep_token,
cls_token=cls_token, cls_token=cls_token,
clean_text=clean_text,
handle_chinese_chars=tokenize_chinese_chars, handle_chinese_chars=tokenize_chinese_chars,
strip_accents=strip_accents,
lowercase=do_lower_case, lowercase=do_lower_case,
wordpieces_prefix=wordpieces_prefix,
), ),
unk_token=unk_token, unk_token=unk_token,
sep_token=sep_token, sep_token=sep_token,