Update for changes in tokenizers API

This commit is contained in:
Anthony MOI
2019-12-26 14:37:55 -05:00
parent 734d29b03d
commit 1f82a5d910
4 changed files with 20 additions and 16 deletions

View File

@@ -583,12 +583,14 @@ class BertTokenizerFast(FastPreTrainedTokenizer):
)
)
if max_length is not None:
self._tokenizer.with_truncation(max_length, stride, truncation_strategy)
self._tokenizer.with_truncation(max_length,
stride=stride,
strategy=truncation_strategy)
self._tokenizer.with_padding(
max_length if pad_to_max_length else None,
self.padding_side,
self.pad_token_id,
self.pad_token_type_id,
self.pad_token,
max_length=max_length if pad_to_max_length else None,
direction=self.padding_side,
pad_id=self.pad_token_id,
pad_type_id=self.pad_token_type_id,
pad_token=self.pad_token,
)
self._decoder = tk.decoders.WordPiece.new()