[examples run_clm] fix _LazyModule hasher error (#11168)
* fix _LazyModule hasher error * reword
This commit is contained in:
@@ -317,8 +317,10 @@ def main():
|
|||||||
column_names = datasets["validation"].column_names
|
column_names = datasets["validation"].column_names
|
||||||
text_column_name = "text" if "text" in column_names else column_names[0]
|
text_column_name = "text" if "text" in column_names else column_names[0]
|
||||||
|
|
||||||
def tokenize_function(examples):
|
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
|
||||||
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
|
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
|
||||||
|
|
||||||
|
def tokenize_function(examples):
|
||||||
with CaptureLogger(tok_logger) as cl:
|
with CaptureLogger(tok_logger) as cl:
|
||||||
output = tokenizer(examples[text_column_name])
|
output = tokenizer(examples[text_column_name])
|
||||||
# clm input could be much much longer than block_size
|
# clm input could be much much longer than block_size
|
||||||
|
|||||||
Reference in New Issue
Block a user