From b390a5672aea995e65d031b3759274d92188e553 Mon Sep 17 00:00:00 2001 From: Gong Linyuan Date: Tue, 4 Aug 2020 14:38:30 +0800 Subject: [PATCH] Make the order of additional special tokens deterministic (#5704) * Make the order of additional special tokens deterministic regardless of hash seeds * Fix --- src/transformers/tokenization_utils_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index f4cab85c71..267d72485f 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -23,7 +23,7 @@ import json import logging import os import warnings -from collections import UserDict +from collections import OrderedDict, UserDict from enum import Enum from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union @@ -1071,7 +1071,7 @@ class SpecialTokensMixin: set_attr = self.special_tokens_map_extended for attr_value in set_attr.values(): all_toks = all_toks + (list(attr_value) if isinstance(attr_value, (list, tuple)) else [attr_value]) - all_toks = list(set(all_toks)) + all_toks = list(OrderedDict.fromkeys(all_toks)) return all_toks @property