More utils doc (#25457)

* Document and clean more utils.

* More documentation and fixes

* Switch to Lysandre's token

* Address review comments

* Actually put else
This commit is contained in:
Sylvain Gugger
2023-08-17 07:58:35 +02:00
committed by GitHub
parent 36f183ebab
commit 2defb6b048
9 changed files with 411 additions and 84 deletions

View File

@@ -12,7 +12,26 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Utility that checks the list of models in the tips in the task-specific pages of the doc is up to date and potentially
fixes it.
Use from the root of the repo with:
```bash
python utils/check_task_guides.py
```
for a check that will error in case of inconsistencies (used by `make repo-consistency`).
To auto-fix issues run:
```bash
python utils/check_task_guides.py --fix_and_overwrite
```
which is used by `make fix-copies`.
"""
import argparse
import os
@@ -25,10 +44,17 @@ TRANSFORMERS_PATH = "src/transformers"
PATH_TO_TASK_GUIDES = "docs/source/en/tasks"
def _find_text_in_file(filename, start_prompt, end_prompt):
def _find_text_in_file(filename: str, start_prompt: str, end_prompt: str) -> str:
"""
Find the text in `filename` between a line beginning with `start_prompt` and before `end_prompt`, removing empty
lines.
Find the text in filename between two prompts.
Args:
filename (`str`): The file to search into.
start_prompt (`str`): A string to look for at the start of the content searched.
end_prompt (`str`): A string that will mark the end of the content to look for.
Returns:
`str`: The content between the prompts.
"""
with open(filename, "r", encoding="utf-8", newline="\n") as f:
lines = f.readlines()
@@ -38,6 +64,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
start_index += 1
start_index += 1
# Now go until the end prompt.
end_index = start_index
while not lines[end_index].startswith(end_prompt):
end_index += 1
@@ -54,6 +81,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
# This is to make sure the transformers module imported is the one in the repo.
transformers_module = direct_transformers_import(TRANSFORMERS_PATH)
# Map between a task guide and the corresponding auto class.
TASK_GUIDE_TO_MODELS = {
"asr.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING_NAMES,
"audio_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
@@ -81,9 +109,15 @@ SPECIAL_TASK_GUIDE_TO_MODEL_TYPES = {
}
def get_model_list_for_task(task_guide):
def get_model_list_for_task(task_guide: str) -> str:
"""
Return the list of models supporting given task.
Return the list of models supporting a given task.
Args:
task_guide (`str`): The name of the task guide to check.
Returns:
`str`: The list of models supporting this task, as links to their respective doc pages separated by commas.
"""
model_maping_names = TASK_GUIDE_TO_MODELS[task_guide]
special_model_types = SPECIAL_TASK_GUIDE_TO_MODEL_TYPES.get(task_guide, set())
@@ -95,9 +129,17 @@ def get_model_list_for_task(task_guide):
return ", ".join([f"[{name}](../model_doc/{code})" for code, name in model_names.items()]) + "\n"
def check_model_list_for_task(task_guide, overwrite=False):
"""For a given task guide, checks the model list in the generated tip for consistency with the state of the lib and overwrites if needed."""
def check_model_list_for_task(task_guide: str, overwrite: bool = False):
"""
For a given task guide, checks the model list in the generated tip for consistency with the state of the lib and
updates it if needed.
Args:
task_guide (`str`):
The name of the task guide to check.
overwrite (`bool`, *optional*, defaults to `False`):
Whether or not to overwrite the table when it's not up to date.
"""
current_list, start_index, end_index, lines = _find_text_in_file(
filename=os.path.join(PATH_TO_TASK_GUIDES, task_guide),
start_prompt="<!--This tip is automatically generated by `make fix-copies`, do not fill manually!-->",