[examples run_clm] fix _LazyModule hasher error (#11168)

* fix _LazyModule hasher error

* reword
This commit is contained in:
Stas Bekman
2021-04-09 11:39:12 -07:00
committed by GitHub
parent c161dd56df
commit 07f0bb691d

View File

@@ -317,8 +317,10 @@ def main():
column_names = datasets["validation"].column_names
text_column_name = "text" if "text" in column_names else column_names[0]
def tokenize_function(examples):
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
def tokenize_function(examples):
with CaptureLogger(tok_logger) as cl:
output = tokenizer(examples[text_column_name])
# clm input could be much much longer than block_size