offline mode for firewalled envs (#10407)

* offline mode start

* add specific values

* fix fallback

* add test

* better values check and range

* test that actually works

* document the offline mode

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* more strict check

* cleaner test

* pt-only test

* style

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Stas Bekman
2021-03-05 17:27:48 -08:00
committed by GitHub
parent 90ecc29656
commit 88a951e3cc
7 changed files with 111 additions and 5 deletions

View File

@@ -44,15 +44,22 @@ from transformers import (
default_data_collator,
set_seed,
)
from transformers.file_utils import is_offline_mode
from transformers.trainer_utils import get_last_checkpoint, is_main_process
with FileLock(".lock") as lock:
nltk.download("punkt", quiet=True)
logger = logging.getLogger(__name__)
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
if is_offline_mode():
raise LookupError(
"Offline mode: run this script without TRANSFORMERS_OFFLINE first to download nltk data files"
)
with FileLock(".lock") as lock:
nltk.download("punkt", quiet=True)
@dataclass
class ModelArguments: