BatchEncoding.to with device with tests (#9584)
This commit is contained in:
@@ -65,6 +65,12 @@ def _is_torch(x):
|
|||||||
return isinstance(x, torch.Tensor)
|
return isinstance(x, torch.Tensor)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_torch_device(x):
|
||||||
|
import torch
|
||||||
|
|
||||||
|
return isinstance(x, torch.device)
|
||||||
|
|
||||||
|
|
||||||
def _is_tensorflow(x):
|
def _is_tensorflow(x):
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
@@ -801,7 +807,7 @@ class BatchEncoding(UserDict):
|
|||||||
# This check catches things like APEX blindly calling "to" on all inputs to a module
|
# This check catches things like APEX blindly calling "to" on all inputs to a module
|
||||||
# Otherwise it passes the casts down and casts the LongTensor containing the token idxs
|
# Otherwise it passes the casts down and casts the LongTensor containing the token idxs
|
||||||
# into a HalfTensor
|
# into a HalfTensor
|
||||||
if isinstance(device, str) or isinstance(device, torch.device) or isinstance(device, int):
|
if isinstance(device, str) or _is_torch_device(device) or isinstance(device, int):
|
||||||
self.data = {k: v.to(device=device) for k, v in self.data.items()}
|
self.data = {k: v.to(device=device) for k, v in self.data.items()}
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|||||||
@@ -1704,6 +1704,10 @@ class TokenizerTesterMixin:
|
|||||||
first_ten_tokens = list(tokenizer.get_vocab().keys())[:10]
|
first_ten_tokens = list(tokenizer.get_vocab().keys())[:10]
|
||||||
sequence = " ".join(first_ten_tokens)
|
sequence = " ".join(first_ten_tokens)
|
||||||
encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="pt")
|
encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="pt")
|
||||||
|
|
||||||
|
# Ensure that the BatchEncoding.to() method works.
|
||||||
|
encoded_sequence.to(model.device)
|
||||||
|
|
||||||
batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="pt")
|
batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="pt")
|
||||||
# This should not fail
|
# This should not fail
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user