From 10bd1ddb39235b2f58594e48867595e7d38cd619 Mon Sep 17 00:00:00 2001 From: VictorSanh Date: Mon, 25 Nov 2019 19:41:00 +0000 Subject: [PATCH] soft launch distilbert multilingual --- transformers/configuration_distilbert.py | 3 ++- transformers/modeling_distilbert.py | 3 ++- transformers/tokenization_distilbert.py | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/transformers/configuration_distilbert.py b/transformers/configuration_distilbert.py index 2a8a149acf..f929a9bc39 100644 --- a/transformers/configuration_distilbert.py +++ b/transformers/configuration_distilbert.py @@ -27,7 +27,8 @@ logger = logging.getLogger(__name__) DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { 'distilbert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json", - 'distilbert-base-uncased-distilled-squad': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-distilled-squad-config.json" + 'distilbert-base-uncased-distilled-squad': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-distilled-squad-config.json", + 'distilbert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-multilingual-cased-config.json", } diff --git a/transformers/modeling_distilbert.py b/transformers/modeling_distilbert.py index d30f493c69..62c623ff6c 100644 --- a/transformers/modeling_distilbert.py +++ b/transformers/modeling_distilbert.py @@ -42,7 +42,8 @@ logger = logging.getLogger(__name__) DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { 'distilbert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-pytorch_model.bin", - 'distilbert-base-uncased-distilled-squad': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-distilled-squad-pytorch_model.bin" + 'distilbert-base-uncased-distilled-squad': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-distilled-squad-pytorch_model.bin", + 'distilbert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-multilingual-cased-pytorch_model.bin", } diff --git a/transformers/tokenization_distilbert.py b/transformers/tokenization_distilbert.py index dfa02926d8..832f0c3d0b 100644 --- a/transformers/tokenization_distilbert.py +++ b/transformers/tokenization_distilbert.py @@ -33,12 +33,14 @@ PRETRAINED_VOCAB_FILES_MAP = { { 'distilbert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt", 'distilbert-base-uncased-distilled-squad': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt", + 'distilbert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt", } } PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { 'distilbert-base-uncased': 512, 'distilbert-base-uncased-distilled-squad': 512, + 'distilbert-base-multilingual-cased': 512, }