Indent code block in the documentation (#11233)

* Indent code block * Indent code blocks version 2 * Quality
2021-04-13 15:36:36 -04:00
parent 9d8e8a8703
commit f38cd4373f
16 changed files with 382 additions and 357 deletions
--- a/docs/source/model_doc/bert_japanese.rst
+++ b/docs/source/model_doc/bert_japanese.rst
@@ -33,38 +33,38 @@ Example of using a model with MeCab and WordPiece tokenization:

 .. code-block::

-  >>> import torch
-  >>> from transformers import AutoModel, AutoTokenizer 
+    >>> import torch
+    >>> from transformers import AutoModel, AutoTokenizer 

-  >>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
-  >>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+    >>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+    >>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")

-  >>> ## Input Japanese Text
-  >>> line = "吾輩は猫である。"
+    >>> ## Input Japanese Text
+    >>> line = "吾輩は猫である。"

-  >>> inputs = tokenizer(line, return_tensors="pt")
+    >>> inputs = tokenizer(line, return_tensors="pt")

-  >>> print(tokenizer.decode(inputs['input_ids'][0]))
-  [CLS] 吾輩 は 猫 で ある 。 [SEP]
+    >>> print(tokenizer.decode(inputs['input_ids'][0]))
+    [CLS] 吾輩 は 猫 で ある 。 [SEP]

-  >>> outputs = bertjapanese(**inputs)
+    >>> outputs = bertjapanese(**inputs)

 Example of using a model with Character tokenization:

 .. code-block::

-  >>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
-  >>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+    >>> bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+    >>> tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")

-  >>> ## Input Japanese Text
-  >>> line = "吾輩は猫である。"
+    >>> ## Input Japanese Text
+    >>> line = "吾輩は猫である。"

-  >>> inputs = tokenizer(line, return_tensors="pt")
+    >>> inputs = tokenizer(line, return_tensors="pt")

-  >>> print(tokenizer.decode(inputs['input_ids'][0]))
-  [CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+    >>> print(tokenizer.decode(inputs['input_ids'][0]))
+    [CLS] 吾 輩 は 猫 で あ る 。 [SEP]

-  >>> outputs = bertjapanese(**inputs)
+    >>> outputs = bertjapanese(**inputs)

 Tips:

--- a/docs/source/model_doc/bertgeneration.rst
+++ b/docs/source/model_doc/bertgeneration.rst
@@ -38,22 +38,22 @@ Usage:

 .. code-block::

-  # leverage checkpoints for Bert2Bert model...
-  # use BERT's cls token as BOS token and sep token as EOS token
-  encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
-  # add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
-  decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
-  bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+    # leverage checkpoints for Bert2Bert model...
+    # use BERT's cls token as BOS token and sep token as EOS token
+    encoder = BertGenerationEncoder.from_pretrained("bert-large-uncased", bos_token_id=101, eos_token_id=102)
+    # add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+    decoder = BertGenerationDecoder.from_pretrained("bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102)
+    bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)

-  # create tokenizer...
-  tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
+    # create tokenizer...
+    tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")

-  input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
-  labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
+    input_ids = tokenizer('This is a long article to summarize', add_special_tokens=False, return_tensors="pt").input_ids
+    labels = tokenizer('This is a short summary', return_tensors="pt").input_ids

-  # train...
-  loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
-  loss.backward()
+    # train...
+    loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+    loss.backward()


 - Pretrained :class:`~transformers.EncoderDecoderModel` are also directly available in the model hub, e.g.,
@@ -61,15 +61,15 @@ Usage:

 .. code-block::

-  # instantiate sentence fusion model
-  sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
-  tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+    # instantiate sentence fusion model
+    sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+    tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")

-  input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids
+    input_ids = tokenizer('This is the first sentence. This is the second sentence.', add_special_tokens=False, return_tensors="pt").input_ids

-  outputs = sentence_fuser.generate(input_ids)
+    outputs = sentence_fuser.generate(input_ids)

-  print(tokenizer.decode(outputs[0]))
+    print(tokenizer.decode(outputs[0]))


 Tips:
--- a/docs/source/model_doc/bertweet.rst
+++ b/docs/source/model_doc/bertweet.rst
@@ -31,28 +31,28 @@ Example of use:

 .. code-block::

-  import torch
-  from transformers import AutoModel, AutoTokenizer 
+    import torch
+    from transformers import AutoModel, AutoTokenizer 

-  bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+    bertweet = AutoModel.from_pretrained("vinai/bertweet-base")

-  # For transformers v4.x+: 
-  tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+    # For transformers v4.x+: 
+    tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)

-  # For transformers v3.x: 
-  # tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+    # For transformers v3.x: 
+    # tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")

-  # INPUT TWEET IS ALREADY NORMALIZED!
-  line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+    # INPUT TWEET IS ALREADY NORMALIZED!
+    line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"

-  input_ids = torch.tensor([tokenizer.encode(line)])
+    input_ids = torch.tensor([tokenizer.encode(line)])

-  with torch.no_grad():
-      features = bertweet(input_ids)  # Models outputs are now tuples
+    with torch.no_grad():
+        features = bertweet(input_ids)  # Models outputs are now tuples

-  ## With TensorFlow 2.0+:
-  # from transformers import TFAutoModel
-  # bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
+    ## With TensorFlow 2.0+:
+    # from transformers import TFAutoModel
+    # bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")


 The original code can be found `here <https://github.com/VinAIResearch/BERTweet>`__.
--- a/docs/source/model_doc/herbert.rst
+++ b/docs/source/model_doc/herbert.rst
@@ -40,20 +40,20 @@ Examples of use:

 .. code-block::

-  from transformers import HerbertTokenizer, RobertaModel
+    from transformers import HerbertTokenizer, RobertaModel

-  tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
-  model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+    tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+    model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")

-  encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
-  outputs = model(encoded_input)
+    encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors='pt')
+    outputs = model(encoded_input)

-  # HerBERT can also be loaded using AutoTokenizer and AutoModel:
-  import torch
-  from transformers import AutoModel, AutoTokenizer
+    # HerBERT can also be loaded using AutoTokenizer and AutoModel:
+    import torch
+    from transformers import AutoModel, AutoTokenizer

-  tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
-  model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
+    tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+    model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")


 The original code can be found `here <https://github.com/allegro/HerBERT>`__.
--- a/docs/source/model_doc/layoutlm.rst
+++ b/docs/source/model_doc/layoutlm.rst
@@ -56,24 +56,24 @@ Tips:

 .. code-block::

-   def normalize_bbox(bbox, width, height):
-        return [
-            int(1000 * (bbox[0] / width)),
-            int(1000 * (bbox[1] / height)),
-            int(1000 * (bbox[2] / width)),
-            int(1000 * (bbox[3] / height)),
-        ]
+    def normalize_bbox(bbox, width, height):
+         return [
+             int(1000 * (bbox[0] / width)),
+             int(1000 * (bbox[1] / height)),
+             int(1000 * (bbox[2] / width)),
+             int(1000 * (bbox[3] / height)),
+         ]

 Here, :obj:`width` and :obj:`height` correspond to the width and height of the original document in which the token
 occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:

 .. code-block::

-   from PIL import Image
+    from PIL import Image

-   image = Image.open("name_of_your_document - can be a png file, pdf, etc.")
+    image = Image.open("name_of_your_document - can be a png file, pdf, etc.")

-   width, height = image.size
+    width, height = image.size

 - For a demo which shows how to fine-tune :class:`LayoutLMForTokenClassification` on the `FUNSD dataset
  <https://guillaumejaume.github.io/FUNSD/>`__ (a collection of annotated forms), see `this notebook
--- a/docs/source/model_doc/megatron_bert.rst
+++ b/docs/source/model_doc/megatron_bert.rst
@@ -53,15 +53,15 @@ BERT-345M-uncased::

 .. code-block:: bash

-  wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
-  -O megatron_bert_345m_v0_1_uncased.zip
+    wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+    -O megatron_bert_345m_v0_1_uncased.zip

 BERT-345M-cased::

 .. code-block:: bash

-  wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
-  megatron_bert_345m_v0_1_cased.zip
+    wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+    megatron_bert_345m_v0_1_cased.zip

 Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
 easily be loaded by Hugging Face Transformers and our port of the BERT code.
@@ -71,11 +71,11 @@ The following commands allow you to do the conversion. We assume that the folder

 .. code-block:: bash

-  python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip 
+    python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip 

 .. code-block:: bash

-  python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+    python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip

 The original code can be found `here <https://github.com/NVIDIA/Megatron-LM>`__. That repository contains a multi-GPU
 and multi-node implementation of the Megatron Language models. In particular, it contains a hybrid model parallel
--- a/docs/source/model_doc/megatron_gpt2.rst
+++ b/docs/source/model_doc/megatron_gpt2.rst
@@ -51,8 +51,8 @@ Alternatively, you can directly download the checkpoints using::

 .. code-block:: bash

-  wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
-  megatron_gpt2_345m_v0_0.zip
+    wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+    megatron_gpt2_345m_v0_0.zip

 Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
 be loaded by Hugging Face Transformers GPT2 implementation.
@@ -62,7 +62,7 @@ The following command allows you to do the conversion. We assume that the folder

 .. code-block:: bash

-  python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+    python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip

 The original code can be found `here <https://github.com/NVIDIA/Megatron-LM>`__. That repository contains a multi-GPU
 and multi-node implementation of the Megatron Language models. In particular, it contains a hybrid model parallel
--- a/docs/source/model_doc/phobert.rst
+++ b/docs/source/model_doc/phobert.rst
@@ -31,23 +31,23 @@ Example of use:

 .. code-block::

-  import torch
-  from transformers import AutoModel, AutoTokenizer
+    import torch
+    from transformers import AutoModel, AutoTokenizer

-  phobert = AutoModel.from_pretrained("vinai/phobert-base")
-  tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+    phobert = AutoModel.from_pretrained("vinai/phobert-base")
+    tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")

-  # INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
-  line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+    # INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+    line = "Tôi là sinh_viên trường đại_học Công_nghệ ."

-  input_ids = torch.tensor([tokenizer.encode(line)])
+    input_ids = torch.tensor([tokenizer.encode(line)])

-  with torch.no_grad():
-      features = phobert(input_ids)  # Models outputs are now tuples
+    with torch.no_grad():
+        features = phobert(input_ids)  # Models outputs are now tuples

-  ## With TensorFlow 2.0+:
-  # from transformers import TFAutoModel
-  # phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
+    ## With TensorFlow 2.0+:
+    # from transformers import TFAutoModel
+    # phobert = TFAutoModel.from_pretrained("vinai/phobert-base")


 The original code can be found `here <https://github.com/VinAIResearch/PhoBERT>`__.
--- a/docs/source/model_doc/reformer.rst
+++ b/docs/source/model_doc/reformer.rst
@@ -145,8 +145,8 @@ For training, the :class:`~transformers.ReformerModelWithLMHead` should be used

 .. code-block::

-  input_ids = tokenizer.encode('This is a sentence from the training data', return_tensors='pt')
-  loss = model(input_ids, labels=input_ids)[0]
+    input_ids = tokenizer.encode('This is a sentence from the training data', return_tensors='pt')
+    loss = model(input_ids, labels=input_ids)[0]


 ReformerConfig
--- a/docs/source/model_doc/t5.rst
+++ b/docs/source/model_doc/t5.rst
@@ -73,10 +73,10 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash

 .. code-block::

-  input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
-  labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
-  # the forward function automatically creates the correct decoder_input_ids
-  loss = model(input_ids=input_ids, labels=labels).loss
+    input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
+    labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
+    # the forward function automatically creates the correct decoder_input_ids
+    loss = model(input_ids=input_ids, labels=labels).loss

 - Supervised training

@@ -86,10 +86,10 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash

 .. code-block::

-  input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
-  labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
-  # the forward function automatically creates the correct decoder_input_ids
-  loss = model(input_ids=input_ids, labels=labels).loss
+    input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
+    labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
+    # the forward function automatically creates the correct decoder_input_ids
+    loss = model(input_ids=input_ids, labels=labels).loss


 T5Config