Doc styler examples (#14953)

* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
2021-12-27 19:07:46 -05:00
parent e13f72fbff
commit b5e2b183af
211 changed files with 2738 additions and 1711 deletions
--- a/docs/source/model_doc/t5.mdx
+++ b/docs/source/model_doc/t5.mdx
@@ -98,8 +98,8 @@ language modeling head on top of the decoder.
  tokenizer = T5Tokenizer.from_pretrained("t5-small")
  model = T5ForConditionalGeneration.from_pretrained("t5-small")

-  input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
-  labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
+  input_ids = tokenizer("The <extra_id_0> walks in <extra_id_1> park", return_tensors="pt").input_ids
+  labels = tokenizer("<extra_id_0> cute dog <extra_id_1> the <extra_id_2>", return_tensors="pt").input_ids
  # the forward function automatically creates the correct decoder_input_ids
  loss = model(input_ids=input_ids, labels=labels).loss
  ```
@@ -120,8 +120,8 @@ language modeling head on top of the decoder.
  tokenizer = T5Tokenizer.from_pretrained("t5-small")
  model = T5ForConditionalGeneration.from_pretrained("t5-small")

-  input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
-  labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
+  input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+  labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
  # the forward function automatically creates the correct decoder_input_ids
  loss = model(input_ids=input_ids, labels=labels).loss
  ```
@@ -148,7 +148,7 @@ language modeling head on top of the decoder.
  ignored. The code example below illustrates all of this.

  ```python
-  from transformers import T5Tokenizer, T5ForConditionalGeneration 
+  from transformers import T5Tokenizer, T5ForConditionalGeneration
  import torch

  tokenizer = T5Tokenizer.from_pretrained("t5-small")
@@ -168,18 +168,19 @@ language modeling head on top of the decoder.
  # encode the inputs
  task_prefix = "translate English to French: "
  input_sequences = [input_sequence_1, input_sequence_2]
-  encoding = tokenizer([task_prefix + sequence for sequence in input_sequences], 
-                      padding='longest', 
-                      max_length=max_source_length, 
-                      truncation=True, 
-                      return_tensors="pt")
+  encoding = tokenizer(
+      [task_prefix + sequence for sequence in input_sequences],
+      padding="longest",
+      max_length=max_source_length,
+      truncation=True,
+      return_tensors="pt",
+  )
  input_ids, attention_mask = encoding.input_ids, encoding.attention_mask

  # encode the targets
-  target_encoding = tokenizer([output_sequence_1, output_sequence_2], 
-                              padding='longest', 
-                              max_length=max_target_length, 
-                              truncation=True)
+  target_encoding = tokenizer(
+      [output_sequence_1, output_sequence_2], padding="longest", max_length=max_target_length, truncation=True
+  )
  labels = target_encoding.input_ids

  # replace padding token id's of the labels by -100
@@ -218,12 +219,12 @@ There's also [this blog post](https://huggingface.co/blog/encoder-decoder#encode
 generation works in general in encoder-decoder models.

 ```python
-from transformers import T5Tokenizer, T5ForConditionalGeneration 
+from transformers import T5Tokenizer, T5ForConditionalGeneration

 tokenizer = T5Tokenizer.from_pretrained("t5-small")
 model = T5ForConditionalGeneration.from_pretrained("t5-small")

-input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
 outputs = model.generate(input_ids)
 print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 # Das Haus ist wunderbar.
@@ -242,17 +243,17 @@ model = T5ForConditionalGeneration.from_pretrained("t5-small")

 # when generating, we will use the logits of right-most token to predict the next token
 # so the padding should be on the left
-tokenizer.padding_side = "left" 
-tokenizer.pad_token = tokenizer.eos_token # to avoid an error
+tokenizer.padding_side = "left"
+tokenizer.pad_token = tokenizer.eos_token  # to avoid an error

-task_prefix = 'translate English to German: '
-sentences = ['The house is wonderful.', 'I like to work in NYC.'] # use different length sentences to test batching
+task_prefix = "translate English to German: "
+sentences = ["The house is wonderful.", "I like to work in NYC."]  # use different length sentences to test batching
 inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)

 output_sequences = model.generate(
-    input_ids=inputs['input_ids'],
-    attention_mask=inputs['attention_mask'],
-    do_sample=False, # disable sampling to test if batching affects output
+    input_ids=inputs["input_ids"],
+    attention_mask=inputs["attention_mask"],
+    do_sample=False,  # disable sampling to test if batching affects output
 )

 print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))