Convert tokenizer outputs for Keras in doc example (#20732)
* Convert tokenizer outputs for Keras in doc example * Das deutsche Beispiel auch korrigieren
This commit is contained in:
@@ -185,6 +185,8 @@ from transformers import AutoTokenizer
|
|||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||||
tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True)
|
tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True)
|
||||||
|
# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
|
||||||
|
tokenized_data = dict(tokenized_data)
|
||||||
|
|
||||||
labels = np.array(dataset["label"]) # Label is already an array of 0 and 1
|
labels = np.array(dataset["label"]) # Label is already an array of 0 and 1
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -185,6 +185,8 @@ from transformers import AutoTokenizer
|
|||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
|
||||||
tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True)
|
tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True)
|
||||||
|
# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
|
||||||
|
tokenized_data = dict(tokenized_data)
|
||||||
|
|
||||||
labels = np.array(dataset["label"]) # Label is already an array of 0 and 1
|
labels = np.array(dataset["label"]) # Label is already an array of 0 and 1
|
||||||
```
|
```
|
||||||
|
|||||||
Reference in New Issue
Block a user