Sort unique_no_split_tokens to make it deterministic (#6461)
* change unique_no_split_tokens's type to set * use sorted list instead of set * style
This commit is contained in:
@@ -207,10 +207,10 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
|
|
||||||
# Make sure we don't split on any special tokens (even they were already in the vocab before e.g. for Albert)
|
# Make sure we don't split on any special tokens (even they were already in the vocab before e.g. for Albert)
|
||||||
if special_tokens:
|
if special_tokens:
|
||||||
self.unique_no_split_tokens = list(set(self.unique_no_split_tokens).union(set(new_tokens)))
|
self.unique_no_split_tokens = sorted(set(self.unique_no_split_tokens).union(set(new_tokens)))
|
||||||
else:
|
else:
|
||||||
# Or on the newly added tokens
|
# Or on the newly added tokens
|
||||||
self.unique_no_split_tokens = list(set(self.unique_no_split_tokens).union(set(tokens_to_add)))
|
self.unique_no_split_tokens = sorted(set(self.unique_no_split_tokens).union(set(tokens_to_add)))
|
||||||
|
|
||||||
return len(tokens_to_add)
|
return len(tokens_to_add)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user