From 07f0bb691d733a93e5eefd104145649810c7ebb0 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Fri, 9 Apr 2021 11:39:12 -0700 Subject: [PATCH] [examples run_clm] fix _LazyModule hasher error (#11168) * fix _LazyModule hasher error * reword --- examples/language-modeling/run_clm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index 31221dffd5..505f8f68c4 100755 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -317,8 +317,10 @@ def main(): column_names = datasets["validation"].column_names text_column_name = "text" if "text" in column_names else column_names[0] + # since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function + tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base") + def tokenize_function(examples): - tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base") with CaptureLogger(tok_logger) as cl: output = tokenizer(examples[text_column_name]) # clm input could be much much longer than block_size