Doc styler examples (#14953)

* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
2021-12-27 19:07:46 -05:00
parent e13f72fbff
commit b5e2b183af
211 changed files with 2738 additions and 1711 deletions
--- a/docs/source/model_doc/layoutlmv2.mdx
+++ b/docs/source/model_doc/layoutlmv2.mdx
@@ -70,12 +70,12 @@ Tips:

 ```python
 def normalize_bbox(bbox, width, height):
-     return [
-         int(1000 * (bbox[0] / width)),
-         int(1000 * (bbox[1] / height)),
-         int(1000 * (bbox[2] / width)),
-         int(1000 * (bbox[3] / height)),
-     ]
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
 ```

 Here, `width` and `height` correspond to the width and height of the original document in which the token
@@ -123,7 +123,7 @@ modality.
 ```python
 from transformers import LayoutLMv2FeatureExtractor, LayoutLMv2TokenizerFast, LayoutLMv2Processor

-feature_extractor = LayoutLMv2FeatureExtractor() # apply_ocr is set to True by default
+feature_extractor = LayoutLMv2FeatureExtractor()  # apply_ocr is set to True by default
 tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
 processor = LayoutLMv2Processor(feature_extractor, tokenizer)
 ```
@@ -158,7 +158,9 @@ from PIL import Image
 processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")

 image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
-encoding = processor(image, return_tensors="pt") # you can also add all tokenizer parameters here such as padding, truncation
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
 print(encoding.keys())
 # dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
 ```
@@ -177,7 +179,7 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas

 image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
 words = ["hello", "world"]
-boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
 encoding = processor(image, words, boxes=boxes, return_tensors="pt")
 print(encoding.keys())
 # dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
@@ -199,7 +201,7 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas

 image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
 words = ["hello", "world"]
-boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
 word_labels = [1, 2]
 encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
 print(encoding.keys())
@@ -219,7 +221,7 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas

 image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
 question = "What's his name?"
-encoding = processor(image, question, return_tensors="pt") 
+encoding = processor(image, question, return_tensors="pt")
 print(encoding.keys())
 # dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
 ```
@@ -238,8 +240,8 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas
 image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
 question = "What's his name?"
 words = ["hello", "world"]
-boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
-encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")  
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
 print(encoding.keys())
 # dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
 ```