From 02f48b9bfc9d7a1f04cce20b2df4c4b478971e6b Mon Sep 17 00:00:00 2001
From: Lysandre Debut <lysandre@huggingface.co>
Date: Mon, 23 Nov 2020 20:14:48 -0500
Subject: [PATCH] Model parallel documentation (#8741)

* Add parallelize methods to the .rst files

* Correct format
---
 docs/source/model_doc/gpt2.rst                | 4 ++--
 docs/source/model_doc/t5.rst                  | 4 ++--
 src/transformers/models/gpt2/modeling_gpt2.py | 6 ++++--
 src/transformers/models/t5/modeling_t5.py     | 6 ++++--
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/docs/source/model_doc/gpt2.rst b/docs/source/model_doc/gpt2.rst
index 5572e08784..feedffe62c 100644
--- a/docs/source/model_doc/gpt2.rst
+++ b/docs/source/model_doc/gpt2.rst
@@ -71,14 +71,14 @@ GPT2Model
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.GPT2Model
-    :members: forward
+    :members: forward, parallelize, deparallelize
 
 
 GPT2LMHeadModel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.GPT2LMHeadModel
-    :members: forward
+    :members: forward, parallelize, deparallelize
 
 
 GPT2DoubleHeadsModel
diff --git a/docs/source/model_doc/t5.rst b/docs/source/model_doc/t5.rst
index e065daf1b4..2799028d72 100644
--- a/docs/source/model_doc/t5.rst
+++ b/docs/source/model_doc/t5.rst
@@ -99,14 +99,14 @@ T5Model
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.T5Model
-    :members: forward
+    :members: forward, parallelize, deparallelize
 
 
 T5ForConditionalGeneration
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autoclass:: transformers.T5ForConditionalGeneration
-    :members: forward
+    :members: forward, parallelize, deparallelize
 
 
 TFT5Model
diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py
index 12c9d14369..1d03c98b61 100644
--- a/src/transformers/models/gpt2/modeling_gpt2.py
+++ b/src/transformers/models/gpt2/modeling_gpt2.py
@@ -492,7 +492,8 @@ PARALLELIZE_DOCSTRING = r"""
                 - gpt2-xl: 48
 
     Example::
-        Here is an example of a device map on a machine with 4 GPUs using gpt2-xl, which has a total of 48 attention modules:
+
+            # Here is an example of a device map on a machine with 4 GPUs using gpt2-xl, which has a total of 48 attention modules:
             model = GPT2LMHeadModel.from_pretrained('gpt2-xl')
             device_map = {0: [0, 1, 2, 3, 4, 5, 6, 7, 8],
 
@@ -505,7 +506,8 @@ DEPARALLELIZE_DOCSTRING = r"""
     Moves the model to cpu from a model parallel state.
 
     Example::
-        On a 4 GPU machine with gpt2-large:
+
+        # On a 4 GPU machine with gpt2-large:
         model = GPT2LMHeadModel.from_pretrained('gpt2-large')
         device_map = {0: [0, 1, 2, 3, 4, 5, 6, 7],
 
diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py
index adba0b79fc..c35439372e 100644
--- a/src/transformers/models/t5/modeling_t5.py
+++ b/src/transformers/models/t5/modeling_t5.py
@@ -196,7 +196,8 @@ PARALLELIZE_DOCSTRING = r"""
                 - t5-11b: 24
 
     Example::
-        Here is an example of a device map on a machine with 4 GPUs using t5-3b, which has a total of 24 attention modules:
+
+            # Here is an example of a device map on a machine with 4 GPUs using t5-3b, which has a total of 24 attention modules:
             model = T5ForConditionalGeneration.from_pretrained('t5-3b')
             device_map = {0: [0, 1, 2],
 
@@ -209,7 +210,8 @@ DEPARALLELIZE_DOCSTRING = r"""
     Moves the model to cpu from a model parallel state.
 
     Example::
-        On a 4 GPU machine with t5-3b:
+
+        # On a 4 GPU machine with t5-3b:
         model = T5ForConditionalGeneration.from_pretrained('t5-3b')
         device_map = {0: [0, 1, 2],