Make OpenAIGPTTokenizer work with SpaCy 2.x and 3.x (#15019)
* Make OpenAIGPTTokenizer work with SpaCy 3.x SpaCy 3.x introduced an API change to creating the tokenizer that breaks OpenAIGPTTokenizer. The old API for creating the tokenizer in SpaCy 2.x no longer works under SpaCy 3.x, but the new API for creating the tokenizer in SpaCy 3.x DOES work under SpaCy 2.x. Switching to the new API should allow OpenAIGPTTokenizer to work under both SpaCy 2.x and SpaCy 3.x versions. * Add is_spacy_available and is_ftfy_available methods to file utils * Add spacy and ftfy unittest decorator to testing utils * Add tests for OpenAIGPTTokenizer that require spacy and ftfy * Modify CircleCI config to run tests that require spacy and ftfy * Remove unneeded unittest decorators are reuse test code * Run make fixup
This commit is contained in:
@@ -20,13 +20,14 @@ import unittest
|
||||
|
||||
from transformers import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
|
||||
from transformers.models.openai.tokenization_openai import VOCAB_FILES_NAMES
|
||||
from transformers.testing_utils import require_tokenizers
|
||||
from transformers.testing_utils import require_ftfy, require_spacy, require_tokenizers
|
||||
|
||||
from .test_tokenization_common import TokenizerTesterMixin
|
||||
|
||||
|
||||
@require_tokenizers
|
||||
class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
"""Tests OpenAIGPTTokenizer that uses BERT BasicTokenizer."""
|
||||
|
||||
tokenizer_class = OpenAIGPTTokenizer
|
||||
rust_tokenizer_class = OpenAIGPTTokenizerFast
|
||||
@@ -132,3 +133,12 @@ class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
# tokenizer has no padding token
|
||||
def test_padding_different_model_input_name(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_ftfy
|
||||
@require_spacy
|
||||
@require_tokenizers
|
||||
class OpenAIGPTTokenizationTestWithSpacy(OpenAIGPTTokenizationTest):
|
||||
"""Tests OpenAIGPTTokenizer that uses SpaCy and ftfy."""
|
||||
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user