From c1b9a11dd4be8af32b3274be7c9774d5a917c56d Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 12 Dec 2022 16:14:04 +0000 Subject: [PATCH] Convert tokenizer outputs for Keras in doc example (#20732) * Convert tokenizer outputs for Keras in doc example * Das deutsche Beispiel auch korrigieren --- docs/source/de/training.mdx | 2 ++ docs/source/en/training.mdx | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/source/de/training.mdx b/docs/source/de/training.mdx index a4b762a34e..e38779ba55 100644 --- a/docs/source/de/training.mdx +++ b/docs/source/de/training.mdx @@ -185,6 +185,8 @@ from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True) +# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras +tokenized_data = dict(tokenized_data) labels = np.array(dataset["label"]) # Label is already an array of 0 and 1 ``` diff --git a/docs/source/en/training.mdx b/docs/source/en/training.mdx index 45fabdec96..336ce05b83 100644 --- a/docs/source/en/training.mdx +++ b/docs/source/en/training.mdx @@ -185,6 +185,8 @@ from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True) +# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras +tokenized_data = dict(tokenized_data) labels = np.array(dataset["label"]) # Label is already an array of 0 and 1 ```