Sort unique_no_split_tokens to make it deterministic (#6461)
* change unique_no_split_tokens's type to set * use sorted list instead of set * style
This commit is contained in:
@@ -207,10 +207,10 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
||||
|
||||
# Make sure we don't split on any special tokens (even they were already in the vocab before e.g. for Albert)
|
||||
if special_tokens:
|
||||
self.unique_no_split_tokens = list(set(self.unique_no_split_tokens).union(set(new_tokens)))
|
||||
self.unique_no_split_tokens = sorted(set(self.unique_no_split_tokens).union(set(new_tokens)))
|
||||
else:
|
||||
# Or on the newly added tokens
|
||||
self.unique_no_split_tokens = list(set(self.unique_no_split_tokens).union(set(tokens_to_add)))
|
||||
self.unique_no_split_tokens = sorted(set(self.unique_no_split_tokens).union(set(tokens_to_add)))
|
||||
|
||||
return len(tokens_to_add)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user