From 217349016af379e9a107f293a77f998adb4d937e Mon Sep 17 00:00:00 2001 From: Lysandre Date: Wed, 29 Jan 2020 16:15:39 -0500 Subject: [PATCH] Copy object instead of passing the reference --- src/transformers/tokenization_utils.py | 2 +- tests/test_tokenization_common.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index b4545a2d44..a0208791d4 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -326,7 +326,7 @@ class PreTrainedTokenizer(object): cls.pretrained_init_configuration and pretrained_model_name_or_path in cls.pretrained_init_configuration ): - init_configuration = cls.pretrained_init_configuration[pretrained_model_name_or_path] + init_configuration = cls.pretrained_init_configuration[pretrained_model_name_or_path].copy() else: # Get the vocabulary from local files logger.info( diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index cd84c88020..95bb06a0f0 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -495,3 +495,16 @@ class TokenizerTesterMixin: assert [token_type_padding_idx] * padding_size + token_type_ids == padded_token_type_ids assert [0] * padding_size + attention_mask == padded_attention_mask assert [1] * padding_size + special_tokens_mask == padded_special_tokens_mask + + def test_separate_tokenizers(self): + # This tests that tokenizers don't impact others. Unfortunately the case where it fails is when + # we're loading an S3 configuration from a pre-trained identifier, and we have no way of testing those today. + + tokenizer = self.get_tokenizer(random_argument=True) + print(tokenizer.init_kwargs) + assert tokenizer.init_kwargs['random_argument'] is True + new_tokenizer = self.get_tokenizer(random_argument=False) + print(tokenizer.init_kwargs) + print(new_tokenizer.init_kwargs) + assert tokenizer.init_kwargs['random_argument'] is True + assert new_tokenizer.init_kwargs['random_argument'] is False