[RoFormer] Fix some issues (#12397)

* add RoFormerTokenizerFast into AutoTokenizer * fix typo in roformer docs * make onnx export happy * update RoFormerConfig embedding_size * use jieba not rjieba * fix 12244 and make test_alignement passed * update ARCHIVE_MAP * make style & quality & fixup * update * make style & quality & fixup * make style quality fixup * update * suggestion from LysandreJik Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * make style * use rjieba Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
2021-07-06 15:31:57 +08:00
parent f5b0c1ecf0
commit 626a0a0147
11 changed files with 97 additions and 53 deletions
--- a/tests/test_tokenization_roformer.py
+++ b/tests/test_tokenization_roformer.py
@@ -13,29 +13,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import importlib
 import unittest

 from transformers import RoFormerTokenizer, RoFormerTokenizerFast
-from transformers.testing_utils import require_tokenizers
+from transformers.testing_utils import require_rjieba, require_tokenizers

 from .test_tokenization_common import TokenizerTesterMixin


-def is_rjieba_available():
-    return importlib.util.find_spec("rjieba") is not None
-
-
-def require_rjieba(test_case):
-    """
-    Decorator marking a test that requires Jieba. These tests are skipped when Jieba isn't installed.
-    """
-    if not is_rjieba_available():
-        return unittest.skip("test requires rjieba")(test_case)
-    else:
-        return test_case
-
-
@require_rjieba
@require_tokenizers
 class RoFormerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@@ -79,6 +64,10 @@ class RoFormerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        exp_tokens = [22943, 21332, 34431, 45904, 117, 306, 1231, 1231, 2653, 33994, 1266, 100]
        self.assertListEqual(tokenizer.convert_tokens_to_ids(input_tokens), exp_tokens)

-    # due to custom pre_tokenize , char_to_token may be error
-    def test_alignement_methods(self):
+    # can't train new_tokenizer via Tokenizers lib
+    def test_training_new_tokenizer(self):
+        pass
+
+    # can't train new_tokenizer via Tokenizers lib
+    def test_training_new_tokenizer_with_special_tokens_change(self):
        pass