From 175da8d1821f108da7c0a61b32b6db861730f15f Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Wed, 20 Apr 2022 17:05:39 -0400 Subject: [PATCH] Fix custom init sorting script (#16864) --- src/transformers/__init__.py | 14 +++++++------- src/transformers/models/marian/__init__.py | 1 + utils/custom_init_isort.py | 17 +++++++++++++---- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 88d6747787..5695ff57c5 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -446,20 +446,16 @@ else: # tokenizers-backed objects if is_tokenizers_available(): # Fast tokenizers - _import_structure["models.realm"].append("RealmTokenizerFast") - _import_structure["models.xglm"].append("XGLMTokenizerFast") - _import_structure["models.fnet"].append("FNetTokenizerFast") - _import_structure["models.roformer"].append("RoFormerTokenizerFast") - _import_structure["models.clip"].append("CLIPTokenizerFast") - _import_structure["models.convbert"].append("ConvBertTokenizerFast") - _import_structure["models.blenderbot_small"].append("BlenderbotSmallTokenizerFast") _import_structure["models.albert"].append("AlbertTokenizerFast") _import_structure["models.bart"].append("BartTokenizerFast") _import_structure["models.barthez"].append("BarthezTokenizerFast") _import_structure["models.bert"].append("BertTokenizerFast") _import_structure["models.big_bird"].append("BigBirdTokenizerFast") _import_structure["models.blenderbot"].append("BlenderbotTokenizerFast") + _import_structure["models.blenderbot_small"].append("BlenderbotSmallTokenizerFast") _import_structure["models.camembert"].append("CamembertTokenizerFast") + _import_structure["models.clip"].append("CLIPTokenizerFast") + _import_structure["models.convbert"].append("ConvBertTokenizerFast") _import_structure["models.deberta"].append("DebertaTokenizerFast") _import_structure["models.deberta_v2"].append("DebertaV2TokenizerFast") _import_structure["models.distilbert"].append("DistilBertTokenizerFast") @@ -467,6 +463,7 @@ if is_tokenizers_available(): ["DPRContextEncoderTokenizerFast", "DPRQuestionEncoderTokenizerFast", "DPRReaderTokenizerFast"] ) _import_structure["models.electra"].append("ElectraTokenizerFast") + _import_structure["models.fnet"].append("FNetTokenizerFast") _import_structure["models.funnel"].append("FunnelTokenizerFast") _import_structure["models.gpt2"].append("GPT2TokenizerFast") _import_structure["models.herbert"].append("HerbertTokenizerFast") @@ -483,13 +480,16 @@ if is_tokenizers_available(): _import_structure["models.mt5"].append("MT5TokenizerFast") _import_structure["models.openai"].append("OpenAIGPTTokenizerFast") _import_structure["models.pegasus"].append("PegasusTokenizerFast") + _import_structure["models.realm"].append("RealmTokenizerFast") _import_structure["models.reformer"].append("ReformerTokenizerFast") _import_structure["models.rembert"].append("RemBertTokenizerFast") _import_structure["models.retribert"].append("RetriBertTokenizerFast") _import_structure["models.roberta"].append("RobertaTokenizerFast") + _import_structure["models.roformer"].append("RoFormerTokenizerFast") _import_structure["models.splinter"].append("SplinterTokenizerFast") _import_structure["models.squeezebert"].append("SqueezeBertTokenizerFast") _import_structure["models.t5"].append("T5TokenizerFast") + _import_structure["models.xglm"].append("XGLMTokenizerFast") _import_structure["models.xlm_roberta"].append("XLMRobertaTokenizerFast") _import_structure["models.xlnet"].append("XLNetTokenizerFast") _import_structure["tokenization_utils_fast"] = ["PreTrainedTokenizerFast"] diff --git a/src/transformers/models/marian/__init__.py b/src/transformers/models/marian/__init__.py index b26355b484..5971d2d574 100644 --- a/src/transformers/models/marian/__init__.py +++ b/src/transformers/models/marian/__init__.py @@ -48,6 +48,7 @@ if is_tf_available(): if is_flax_available(): _import_structure["modeling_flax_marian"] = ["FlaxMarianModel", "FlaxMarianMTModel", "FlaxMarianPreTrainedModel"] + if TYPE_CHECKING: from .configuration_marian import MARIAN_PRETRAINED_CONFIG_ARCHIVE_MAP, MarianConfig, MarianOnnxConfig diff --git a/utils/custom_init_isort.py b/utils/custom_init_isort.py index 06a89b166a..456ff4aedc 100644 --- a/utils/custom_init_isort.py +++ b/utils/custom_init_isort.py @@ -183,11 +183,20 @@ def sort_imports(file, check_only=True): # Check if the block contains some `_import_structure`s thingy to sort. block = main_blocks[block_idx] block_lines = block.split("\n") - if len(block_lines) < 3 or "_import_structure" not in "".join(block_lines[:2]): + + # Get to the start of the imports. + line_idx = 0 + while line_idx < len(block_lines) and "_import_structure" not in block_lines[line_idx]: + # Skip dummy import blocks + if "import dummy" in block_lines[line_idx]: + line_idx = len(block_lines) + else: + line_idx += 1 + if line_idx >= len(block_lines): continue - # Ignore first and last line: they don't contain anything. - internal_block_code = "\n".join(block_lines[1:-1]) + # Ignore beginning and last line: they don't contain anything. + internal_block_code = "\n".join(block_lines[line_idx:-1]) indent = get_indent(block_lines[1]) # Slit the internal block into blocks of indent level 1. internal_blocks = split_code_in_indented_blocks(internal_block_code, indent_level=indent) @@ -211,7 +220,7 @@ def sort_imports(file, check_only=True): count += 1 # And we put our main block back together with its first and last line. - main_blocks[block_idx] = "\n".join([block_lines[0]] + reorderded_blocks + [block_lines[-1]]) + main_blocks[block_idx] = "\n".join(block_lines[:line_idx] + reorderded_blocks + [block_lines[-1]]) if code != "\n".join(main_blocks): if check_only: