Fixing the output of code examples in the preprocessing chapter (#17162)
This commit is contained in:
@@ -158,14 +158,17 @@ Set the `return_tensors` parameter to either `pt` for PyTorch, or `tf` for Tenso
|
|||||||
... "Don't think he knows about second breakfast, Pip.",
|
... "Don't think he knows about second breakfast, Pip.",
|
||||||
... "What about elevensies?",
|
... "What about elevensies?",
|
||||||
... ]
|
... ]
|
||||||
>>> encoded_input = tokenizer(batch, padding=True, truncation=True, return_tensors="pt")
|
>>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
|
||||||
>>> print(encoded_input)
|
>>> print(encoded_input)
|
||||||
{'input_ids': tensor([[ 101, 153, 7719, 21490, 1122, 1114, 9582, 1623, 102],
|
{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
|
||||||
[ 101, 5226, 1122, 9649, 1199, 2610, 1236, 102, 0]]),
|
[101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
|
||||||
'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0],
|
[101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
|
||||||
[0, 0, 0, 0, 0, 0, 0, 0, 0]]),
|
'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||||
'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||||
[1, 1, 1, 1, 1, 1, 1, 1, 0]])}
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
|
||||||
|
'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
|
||||||
|
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
|
||||||
```
|
```
|
||||||
</pt>
|
</pt>
|
||||||
<tf>
|
<tf>
|
||||||
@@ -178,15 +181,18 @@ Set the `return_tensors` parameter to either `pt` for PyTorch, or `tf` for Tenso
|
|||||||
>>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
|
>>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
|
||||||
>>> print(encoded_input)
|
>>> print(encoded_input)
|
||||||
{'input_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
|
{'input_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
|
||||||
array([[ 101, 153, 7719, 21490, 1122, 1114, 9582, 1623, 102],
|
array([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
|
||||||
[ 101, 5226, 1122, 9649, 1199, 2610, 1236, 102, 0]],
|
[101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
|
||||||
|
[101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
|
||||||
dtype=int32)>,
|
dtype=int32)>,
|
||||||
'token_type_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
|
'token_type_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
|
||||||
array([[0, 0, 0, 0, 0, 0, 0, 0, 0],
|
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||||
[0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>,
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||||
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>,
|
||||||
'attention_mask': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
|
'attention_mask': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
|
||||||
array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
|
array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
|
||||||
[1, 1, 1, 1, 1, 1, 1, 1, 0]], dtype=int32)>}
|
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||||
|
[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>}
|
||||||
```
|
```
|
||||||
</tf>
|
</tf>
|
||||||
</frameworkcontent>
|
</frameworkcontent>
|
||||||
|
|||||||
Reference in New Issue
Block a user