[Styling] stylify using ruff (#27144)

* try to stylify using ruff

* might need to remove these changes?

* use ruf format andruff check

* use isinstance instead of type comparision

* use # fmt: skip

* use # fmt: skip

* nits

* soem styling changes

* update ci job

* nits isinstance

* more files update

* nits

* more nits

* small nits

* check and format

* revert wrong changes

* actually use formatter instead of checker

* nits

* well docbuilder is overwriting this commit

* revert notebook changes

* try to nuke docbuilder

* style

* fix feature exrtaction test

* remve `indent-width = 4`

* fixup

* more nits

* update the ruff version that we use

* style

* nuke docbuilder styling

* leve the print for detected changes

* nits

* Remove file I/O

Co-authored-by: charliermarsh
 <charlie.r.marsh@gmail.com>

* style

* nits

* revert notebook changes

* Add # fmt skip when possible

* Add # fmt skip when possible

* Fix

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* NIts

* more fixes

* fix tapas

* Another way to skip

* Recommended way

* Fix two more fiels

* Remove asynch
Remove asynch

---------

Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Arthur
2023-11-16 17:43:19 +01:00
committed by GitHub
parent acb5b4aff5
commit 651408a077
480 changed files with 867 additions and 1059 deletions

View File

@@ -24,18 +24,19 @@
#
# It will be used then as "stas/tiny-wmt19-en-ru"
from pathlib import Path
import json
import tempfile
from pathlib import Path
from transformers import FSMTTokenizer, FSMTConfig, FSMTForConditionalGeneration
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
from transformers.models.fsmt.tokenization_fsmt import VOCAB_FILES_NAMES
mname_tiny = "tiny-wmt19-en-ru"
# Build
# borrowed from a test
# borrowed from a test
vocab = [ "l", "o", "w", "e", "r", "s", "t", "i", "d", "n", "w</w>", "r</w>", "t</w>", "lo", "low", "er</w>", "low</w>", "lowest</w>", "newer</w>", "wider</w>", "<unk>", ]
vocab_tokens = dict(zip(vocab, range(len(vocab))))
merges = ["l o 123", "lo w 1456", "e r</w> 1789", ""]
@@ -57,7 +58,7 @@ with tempfile.TemporaryDirectory() as tmpdirname:
tgt_vocab_file=tgt_vocab_file,
merges_file=merges_file,
)
config = FSMTConfig(
langs=['ru', 'en'],
src_vocab_size=1000, tgt_vocab_size=1000,

View File

@@ -27,16 +27,18 @@
# It will be used then as "stas/tiny-wmt19-en-de"
# Build
from transformers import FSMTTokenizer, FSMTConfig, FSMTForConditionalGeneration
from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTTokenizer
mname = "facebook/wmt19-en-de"
tokenizer = FSMTTokenizer.from_pretrained(mname)
# get the correct vocab sizes, etc. from the master model
config = FSMTConfig.from_pretrained(mname)
config.update(dict(
d_model=4,
encoder_layers=1, decoder_layers=1,
encoder_ffn_dim=4, decoder_ffn_dim=4,
encoder_attention_heads=1, decoder_attention_heads=1))
config.update({
"d_model": 4,
"encoder_layers": 1, "decoder_layers": 1,
"encoder_ffn_dim": 4, "decoder_ffn_dim": 4,
"encoder_attention_heads": 1, "decoder_attention_heads": 1})
tiny_model = FSMTForConditionalGeneration(config)
print(f"num of params {tiny_model.num_parameters()}")

View File

@@ -19,6 +19,7 @@
import os
from pathlib import Path
def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
texts = {

View File

@@ -19,6 +19,7 @@
import os
from pathlib import Path
def write_model_card(model_card_dir, src_lang, tgt_lang, model_name):
texts = {

View File

@@ -19,6 +19,7 @@
import os
from pathlib import Path
def write_model_card(model_card_dir, src_lang, tgt_lang):
texts = {
@@ -39,7 +40,7 @@ def write_model_card(model_card_dir, src_lang, tgt_lang):
readme = f"""
---
language:
language:
- {src_lang}
- {tgt_lang}
thumbnail: