Doc styler examples (#14953)
* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
This commit is contained in:
@@ -79,12 +79,13 @@ class MyCallback(TrainerCallback):
|
||||
def on_train_begin(self, args, state, control, **kwargs):
|
||||
print("Starting training")
|
||||
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
callbacks=[MyCallback] # We can either pass the callback class this way or an instance of it (MyCallback())
|
||||
callbacks=[MyCallback], # We can either pass the callback class this way or an instance of it (MyCallback())
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@@ -295,11 +295,12 @@ If you're using only 1 GPU, here is how you'd have to adjust your training code
|
||||
# DeepSpeed requires a distributed environment even when only one process is used.
|
||||
# This emulates a launcher in the notebook
|
||||
import os
|
||||
os.environ['MASTER_ADDR'] = 'localhost'
|
||||
os.environ['MASTER_PORT'] = '9994' # modify if RuntimeError: Address already in use
|
||||
os.environ['RANK'] = "0"
|
||||
os.environ['LOCAL_RANK'] = "0"
|
||||
os.environ['WORLD_SIZE'] = "1"
|
||||
|
||||
os.environ["MASTER_ADDR"] = "localhost"
|
||||
os.environ["MASTER_PORT"] = "9994" # modify if RuntimeError: Address already in use
|
||||
os.environ["RANK"] = "0"
|
||||
os.environ["LOCAL_RANK"] = "0"
|
||||
os.environ["WORLD_SIZE"] = "1"
|
||||
|
||||
# Now proceed as normal, plus pass the deepspeed config file
|
||||
training_args = TrainingArguments(..., deepspeed="ds_config_zero3.json")
|
||||
@@ -316,7 +317,7 @@ at the beginning of this section.
|
||||
If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated
|
||||
cell with:
|
||||
|
||||
```python
|
||||
```python no-style
|
||||
%%bash
|
||||
cat <<'EOT' > ds_config_zero3.json
|
||||
{
|
||||
@@ -382,14 +383,14 @@ EOT
|
||||
If the training script is in a normal file and not in the notebook cells, you can launch `deepspeed` normally via
|
||||
shell from a cell. For example, to use `run_translation.py` you would launch it with:
|
||||
|
||||
```python
|
||||
```python no-style
|
||||
!git clone https://github.com/huggingface/transformers
|
||||
!cd transformers; deepspeed examples/pytorch/translation/run_translation.py ...
|
||||
```
|
||||
|
||||
or with `%%bash` magic, where you can write a multi-line code for the shell program to run:
|
||||
|
||||
```python
|
||||
```python no-style
|
||||
%%bash
|
||||
|
||||
git clone https://github.com/huggingface/transformers
|
||||
@@ -512,7 +513,7 @@ TrainingArguments(..., deepspeed="/path/to/ds_config.json")
|
||||
or:
|
||||
|
||||
```python
|
||||
ds_config_dict=dict(scheduler=scheduler_params, optimizer=optimizer_params)
|
||||
ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
|
||||
TrainingArguments(..., deepspeed=ds_config_dict)
|
||||
```
|
||||
|
||||
@@ -1430,6 +1431,7 @@ If you have saved at least one checkpoint, and you want to use the latest one, y
|
||||
```python
|
||||
from transformers.trainer_utils import get_last_checkpoint
|
||||
from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
|
||||
|
||||
checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
|
||||
fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
|
||||
```
|
||||
@@ -1439,6 +1441,7 @@ checkpoint), then you can finish the training by first saving the final model ex
|
||||
|
||||
```python
|
||||
from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
|
||||
|
||||
checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
|
||||
trainer.deepspeed.save_checkpoint(checkpoint_dir)
|
||||
fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
|
||||
@@ -1461,7 +1464,8 @@ these yourself as is shown in the following example:
|
||||
|
||||
```python
|
||||
from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
|
||||
state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
|
||||
|
||||
state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
|
||||
model = model.cpu()
|
||||
model.load_state_dict(state_dict)
|
||||
```
|
||||
@@ -1529,9 +1533,10 @@ context manager (which is also a function decorator), like so:
|
||||
```python
|
||||
from transformers import T5ForConditionalGeneration, T5Config
|
||||
import deepspeed
|
||||
|
||||
with deepspeed.zero.Init():
|
||||
config = T5Config.from_pretrained("t5-small")
|
||||
model = T5ForConditionalGeneration(config)
|
||||
config = T5Config.from_pretrained("t5-small")
|
||||
model = T5ForConditionalGeneration(config)
|
||||
```
|
||||
|
||||
As you can see this gives you a randomly initialized model.
|
||||
@@ -1544,6 +1549,7 @@ section. Thus you must create the [`TrainingArguments`] object **before** callin
|
||||
|
||||
```python
|
||||
from transformers import AutoModel, Trainer, TrainingArguments
|
||||
|
||||
training_args = TrainingArguments(..., deepspeed=ds_config)
|
||||
model = AutoModel.from_pretrained("t5-small")
|
||||
trainer = Trainer(model=model, args=training_args, ...)
|
||||
@@ -1574,7 +1580,7 @@ limitations.
|
||||
Also under ZeRO-3, if you write your own code and run into a model parameter weight that looks like:
|
||||
|
||||
```python
|
||||
tensor([1.], device='cuda:0', dtype=torch.float16, requires_grad=True)
|
||||
tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
|
||||
```
|
||||
|
||||
stress on `tensor([1.])`, or if you get an error where it says the parameter is of size `1`, instead of some much
|
||||
@@ -1715,9 +1721,9 @@ For example for a pretrained model:
|
||||
from transformers.deepspeed import HfDeepSpeedConfig
|
||||
from transformers import AutoModel, deepspeed
|
||||
|
||||
ds_config = { ... } # deepspeed config object or path to the file
|
||||
ds_config = {...} # deepspeed config object or path to the file
|
||||
# must run before instantiating the model
|
||||
dschf = HfDeepSpeedConfig(ds_config) # keep this object alive
|
||||
dschf = HfDeepSpeedConfig(ds_config) # keep this object alive
|
||||
model = AutoModel.from_pretrained("gpt2")
|
||||
engine = deepspeed.initialize(model=model, config_params=ds_config, ...)
|
||||
```
|
||||
@@ -1728,9 +1734,9 @@ or for non-pretrained model:
|
||||
from transformers.deepspeed import HfDeepSpeedConfig
|
||||
from transformers import AutoModel, AutoConfig, deepspeed
|
||||
|
||||
ds_config = { ... } # deepspeed config object or path to the file
|
||||
ds_config = {...} # deepspeed config object or path to the file
|
||||
# must run before instantiating the model
|
||||
dschf = HfDeepSpeedConfig(ds_config) # keep this object alive
|
||||
dschf = HfDeepSpeedConfig(ds_config) # keep this object alive
|
||||
config = AutoConfig.from_pretrained("gpt2")
|
||||
model = AutoModel.from_config(config)
|
||||
engine = deepspeed.initialize(model=model, config_params=ds_config, ...)
|
||||
|
||||
@@ -21,6 +21,7 @@ to the INFO level.
|
||||
|
||||
```python
|
||||
import transformers
|
||||
|
||||
transformers.logging.set_verbosity_info()
|
||||
```
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ Let's see of this looks on an example:
|
||||
from transformers import BertTokenizer, BertForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
||||
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
|
||||
|
||||
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
|
||||
@@ -101,6 +101,7 @@ from transformers import pipeline
|
||||
|
||||
pipe = pipeline("text-classification")
|
||||
|
||||
|
||||
def data():
|
||||
while True:
|
||||
# This could come from a dataset, a database, a queue or HTTP request
|
||||
@@ -110,6 +111,7 @@ def data():
|
||||
# does the preprocessing while the main runs the big inference
|
||||
yield "This is a test"
|
||||
|
||||
|
||||
for out in pipe(data()):
|
||||
print(out)
|
||||
# {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
|
||||
@@ -125,10 +127,10 @@ All pipelines can use batching. This will work
|
||||
whenever the pipeline uses its streaming ability (so when passing lists or `Dataset` or `generator`).
|
||||
|
||||
```python
|
||||
from transformers import pipeline
|
||||
from transformers import pipeline
|
||||
from transformers.pipelines.base import KeyDataset
|
||||
import datasets
|
||||
import tqdm
|
||||
import tqdm
|
||||
|
||||
dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
|
||||
pipe = pipeline("text-classification", device=0)
|
||||
@@ -149,28 +151,28 @@ Example where it's mostly a speedup:
|
||||
</Tip>
|
||||
|
||||
```python
|
||||
from transformers import pipeline
|
||||
from torch.utils.data import Dataset
|
||||
import tqdm
|
||||
from transformers import pipeline
|
||||
from torch.utils.data import Dataset
|
||||
import tqdm
|
||||
|
||||
|
||||
pipe = pipeline("text-classification", device=0)
|
||||
pipe = pipeline("text-classification", device=0)
|
||||
|
||||
|
||||
class MyDataset(Dataset):
|
||||
def __len__(self):
|
||||
return 5000
|
||||
class MyDataset(Dataset):
|
||||
def __len__(self):
|
||||
return 5000
|
||||
|
||||
def __getitem__(self, i):
|
||||
return "This is a test"
|
||||
def __getitem__(self, i):
|
||||
return "This is a test"
|
||||
|
||||
|
||||
dataset = MyDataset()
|
||||
dataset = MyDataset()
|
||||
|
||||
for batch_size in [1, 8, 64, 256]:
|
||||
print("-" * 30)
|
||||
print(f"Streaming batch_size={batch_size}")
|
||||
for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
|
||||
print("-" * 30)
|
||||
print(f"Streaming batch_size={batch_size}")
|
||||
for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
|
||||
pass
|
||||
```
|
||||
|
||||
@@ -194,15 +196,15 @@ Streaming batch_size=256
|
||||
Example where it's most a slowdown:
|
||||
|
||||
```python
|
||||
class MyDataset(Dataset):
|
||||
def __len__(self):
|
||||
return 5000
|
||||
class MyDataset(Dataset):
|
||||
def __len__(self):
|
||||
return 5000
|
||||
|
||||
def __getitem__(self, i):
|
||||
if i % 64 == 0:
|
||||
n = 100
|
||||
else:
|
||||
n = 1
|
||||
def __getitem__(self, i):
|
||||
if i % 64 == 0:
|
||||
n = 100
|
||||
else:
|
||||
n = 1
|
||||
return "This is a test" * n
|
||||
```
|
||||
|
||||
@@ -298,10 +300,11 @@ If you want to try simply you can:
|
||||
|
||||
```python
|
||||
class MyPipeline(TextClassificationPipeline):
|
||||
def postprocess(...):
|
||||
...
|
||||
def postprocess():
|
||||
# Your code goes here
|
||||
scores = scores * 100
|
||||
...
|
||||
# And here
|
||||
|
||||
|
||||
my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, ...)
|
||||
# or if you use *pipeline* function, then:
|
||||
|
||||
@@ -122,7 +122,7 @@ examples = processor.get_dev_examples(squad_v2_data_dir)
|
||||
processor = SquadV1Processor()
|
||||
examples = processor.get_dev_examples(squad_v1_data_dir)
|
||||
|
||||
features = squad_convert_examples_to_features(
|
||||
features = squad_convert_examples_to_features(
|
||||
examples=examples,
|
||||
tokenizer=tokenizer,
|
||||
max_seq_length=max_seq_length,
|
||||
@@ -139,7 +139,7 @@ Using *tensorflow_datasets* is as easy as using a data file:
|
||||
tfds_examples = tfds.load("squad")
|
||||
examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
|
||||
|
||||
features = squad_convert_examples_to_features(
|
||||
features = squad_convert_examples_to_features(
|
||||
examples=examples,
|
||||
tokenizer=tokenizer,
|
||||
max_seq_length=max_seq_length,
|
||||
|
||||
@@ -53,14 +53,16 @@ Here is an example of how to customize [`Trainer`] using a custom loss function
|
||||
from torch import nn
|
||||
from transformers import Trainer
|
||||
|
||||
|
||||
class MultilabelTrainer(Trainer):
|
||||
def compute_loss(self, model, inputs, return_outputs=False):
|
||||
labels = inputs.get("labels")
|
||||
outputs = model(**inputs)
|
||||
logits = outputs.get('logits')
|
||||
logits = outputs.get("logits")
|
||||
loss_fct = nn.BCEWithLogitsLoss()
|
||||
loss = loss_fct(logits.view(-1, self.model.config.num_labels),
|
||||
labels.float().view(-1, self.model.config.num_labels))
|
||||
loss = loss_fct(
|
||||
logits.view(-1, self.model.config.num_labels), labels.float().view(-1, self.model.config.num_labels)
|
||||
)
|
||||
return (loss, outputs) if return_outputs else loss
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user