Doc styler examples (#14953)
* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
This commit is contained in:
@@ -209,7 +209,7 @@ Here is a `pytorch-pretrained-bert` to 🤗 Transformers conversion example for
|
||||
|
||||
```python
|
||||
# Let's load our model
|
||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
|
||||
|
||||
# If you used to have this line in pytorch-pretrained-bert:
|
||||
loss = model(input_ids, labels=labels)
|
||||
@@ -222,7 +222,7 @@ loss = outputs[0]
|
||||
loss, logits = outputs[:2]
|
||||
|
||||
# And even the attention weights if you configure the model to output them (and other outputs too, see the docstrings and documentation)
|
||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', output_attentions=True)
|
||||
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", output_attentions=True)
|
||||
outputs = model(input_ids, labels=labels)
|
||||
loss, logits, attentions = outputs
|
||||
```
|
||||
@@ -241,23 +241,23 @@ Here is an example:
|
||||
|
||||
```python
|
||||
### Let's load a model and tokenizer
|
||||
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
||||
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
||||
|
||||
### Do some stuff to our model and tokenizer
|
||||
# Ex: add new tokens to the vocabulary and embeddings of our model
|
||||
tokenizer.add_tokens(['[SPECIAL_TOKEN_1]', '[SPECIAL_TOKEN_2]'])
|
||||
tokenizer.add_tokens(["[SPECIAL_TOKEN_1]", "[SPECIAL_TOKEN_2]"])
|
||||
model.resize_token_embeddings(len(tokenizer))
|
||||
# Train our model
|
||||
train(model)
|
||||
|
||||
### Now let's save our model and tokenizer to a directory
|
||||
model.save_pretrained('./my_saved_model_directory/')
|
||||
tokenizer.save_pretrained('./my_saved_model_directory/')
|
||||
model.save_pretrained("./my_saved_model_directory/")
|
||||
tokenizer.save_pretrained("./my_saved_model_directory/")
|
||||
|
||||
### Reload the model and the tokenizer
|
||||
model = BertForSequenceClassification.from_pretrained('./my_saved_model_directory/')
|
||||
tokenizer = BertTokenizer.from_pretrained('./my_saved_model_directory/')
|
||||
model = BertForSequenceClassification.from_pretrained("./my_saved_model_directory/")
|
||||
tokenizer = BertTokenizer.from_pretrained("./my_saved_model_directory/")
|
||||
```
|
||||
|
||||
### Optimizers: BertAdam & OpenAIAdam are now AdamW, schedules are standard PyTorch schedules
|
||||
@@ -283,7 +283,13 @@ num_warmup_steps = 100
|
||||
warmup_proportion = float(num_warmup_steps) / float(num_training_steps) # 0.1
|
||||
|
||||
### Previously BertAdam optimizer was instantiated like this:
|
||||
optimizer = BertAdam(model.parameters(), lr=lr, schedule='warmup_linear', warmup=warmup_proportion, num_training_steps=num_training_steps)
|
||||
optimizer = BertAdam(
|
||||
model.parameters(),
|
||||
lr=lr,
|
||||
schedule="warmup_linear",
|
||||
warmup=warmup_proportion,
|
||||
num_training_steps=num_training_steps,
|
||||
)
|
||||
### and used like this:
|
||||
for batch in train_data:
|
||||
loss = model(batch)
|
||||
@@ -291,13 +297,19 @@ for batch in train_data:
|
||||
optimizer.step()
|
||||
|
||||
### In 🤗 Transformers, optimizer and schedules are split and instantiated like this:
|
||||
optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # PyTorch scheduler
|
||||
optimizer = AdamW(
|
||||
model.parameters(), lr=lr, correct_bias=False
|
||||
) # To reproduce BertAdam specific behavior set correct_bias=False
|
||||
scheduler = get_linear_schedule_with_warmup(
|
||||
optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps
|
||||
) # PyTorch scheduler
|
||||
### and used like this:
|
||||
for batch in train_data:
|
||||
loss = model(batch)
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
|
||||
torch.nn.utils.clip_grad_norm_(
|
||||
model.parameters(), max_grad_norm
|
||||
) # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
|
||||
optimizer.step()
|
||||
scheduler.step()
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user