From 2553363826944c0b808e50d1e06bcc15ce541d7e Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Fri, 20 Jan 2023 07:38:15 -0800 Subject: [PATCH] Fix code example in training tutorial (#21201) change text to sentence --- docs/source/en/training.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/training.mdx b/docs/source/en/training.mdx index 336ce05b83..4d802db563 100644 --- a/docs/source/en/training.mdx +++ b/docs/source/en/training.mdx @@ -184,7 +184,7 @@ so we can just convert that directly to a NumPy array without tokenization! from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") -tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True) +tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True) # Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras tokenized_data = dict(tokenized_data)