adding proxies options for the from_pretrained methods

2019-08-20 16:59:11 +02:00
parent 6d0aa73981
commit 43489756ad
4 changed files with 36 additions and 18 deletions
--- a/pytorch_transformers/tokenization_utils.py
+++ b/pytorch_transformers/tokenization_utils.py
@@ -196,6 +196,10 @@ class PreTrainedTokenizer(object):
            force_download: (`optional`) boolean, default False:
                Force to (re-)download the vocabulary files and override the cached versions if they exists.

+            proxies: (`optional`) dict, default None:
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
+                The proxies are used on each request.
+
            inputs: (`optional`) positional arguments: will be passed to the Tokenizer ``__init__`` method.

            kwargs: (`optional`) keyword arguments: will be passed to the Tokenizer ``__init__`` method. Can be used to set special tokens like ``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``, ``mask_token``, ``additional_special_tokens``. See parameters in the doc string of :class:`~pytorch_transformers.PreTrainedTokenizer` for details.
@@ -227,6 +231,7 @@ class PreTrainedTokenizer(object):
    def _from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
        cache_dir = kwargs.pop('cache_dir', None)
        force_download = kwargs.pop('force_download', False)
+        proxies = kwargs.pop('proxies', None)

        s3_models = list(cls.max_model_input_sizes.keys())
        vocab_files = {}
@@ -287,7 +292,7 @@ class PreTrainedTokenizer(object):
                if file_path is None:
                    resolved_vocab_files[file_id] = None
                else:
-                    resolved_vocab_files[file_id] = cached_path(file_path, cache_dir=cache_dir, force_download=force_download)
+                    resolved_vocab_files[file_id] = cached_path(file_path, cache_dir=cache_dir, force_download=force_download, proxies=proxies)
        except EnvironmentError:
            if pretrained_model_name_or_path in s3_models:
                logger.error("Couldn't reach server to download vocabulary.")