From 9c18f156856a293492e0cda4eb83906ac703e575 Mon Sep 17 00:00:00 2001 From: Adam Pocock Date: Tue, 1 Dec 2020 13:01:52 -0500 Subject: [PATCH] Prevent BatchEncoding from blindly passing casts down to the tensors it contains. Fixes #6582. (#8860) Update src/transformers/tokenization_utils_base.py with review fix Co-authored-by: Lysandre Debut Co-authored-by: Lysandre Debut --- src/transformers/tokenization_utils_base.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 01441fa704..9eb1a5ae40 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -776,7 +776,16 @@ class BatchEncoding(UserDict): :class:`~transformers.BatchEncoding`: The same instance of :class:`~transformers.BatchEncoding` after modification. """ - self.data = {k: v.to(device) for k, v in self.data.items()} + + # This check catches things like APEX blindly calling "to" on all inputs to a module + # Otherwise it passes the casts down and casts the LongTensor containing the token idxs + # into a HalfTensor + if isinstance(device, str) or isinstance(device, torch.device): + self.data = {k: v.to(device=device) for k, v in self.data.items()} + else: + logger.warning( + f"Attempting to cast a BatchEncoding to another type, {str(device)}. This is not supported." + ) return self