Remove dependency on pytest for running tests (#2055)

* Switch to plain unittest for skipping slow tests. Add a RUN_SLOW environment variable for running them. * Switch to plain unittest for PyTorch dependency. * Switch to plain unittest for TensorFlow dependency. * Avoid leaking open files in the test suite. This prevents spurious warnings when running tests. * Fix unicode warning on Python 2 when running tests. The warning was: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal * Support running PyTorch tests on a GPU. Reverts 27e015bd. * Tests no longer require pytest. * Make tests pass on cuda
2019-12-06 19:57:38 +01:00
parent e4679cddce
commit 35401fe50f
50 changed files with 344 additions and 231 deletions
--- a/transformers/tokenization_albert.py
+++ b/transformers/tokenization_albert.py
@@ -141,7 +141,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
            pieces = self.sp_model.SampleEncodeAsPieces(text, 64, 0.1)
        new_pieces = []
        for piece in pieces:
-            if len(piece) > 1 and piece[-1] == ',' and piece[-2].isdigit():
+            if len(piece) > 1 and piece[-1] == str(',') and piece[-2].isdigit():
                cur_pieces = self.sp_model.EncodeAsPieces(
                    piece[:-1].replace(SPIECE_UNDERLINE, ''))
                if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
@@ -225,9 +225,9 @@ class AlbertTokenizer(PreTrainedTokenizer):
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
        An ALBERT sequence pair mask has the following format:
-        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 
-        | first sequence    | second sequence     
-        
+        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1
+        | first sequence    | second sequence
+
        if token_ids_1 is None, only returns the first portion of the mask (0's).
        """
        sep = [self.sep_token_id]