Doc styler examples (#14953)
* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
This commit is contained in:
@@ -70,12 +70,12 @@ Tips:
|
||||
|
||||
```python
|
||||
def normalize_bbox(bbox, width, height):
|
||||
return [
|
||||
int(1000 * (bbox[0] / width)),
|
||||
int(1000 * (bbox[1] / height)),
|
||||
int(1000 * (bbox[2] / width)),
|
||||
int(1000 * (bbox[3] / height)),
|
||||
]
|
||||
return [
|
||||
int(1000 * (bbox[0] / width)),
|
||||
int(1000 * (bbox[1] / height)),
|
||||
int(1000 * (bbox[2] / width)),
|
||||
int(1000 * (bbox[3] / height)),
|
||||
]
|
||||
```
|
||||
|
||||
Here, `width` and `height` correspond to the width and height of the original document in which the token
|
||||
@@ -123,7 +123,7 @@ modality.
|
||||
```python
|
||||
from transformers import LayoutLMv2FeatureExtractor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
|
||||
|
||||
feature_extractor = LayoutLMv2FeatureExtractor() # apply_ocr is set to True by default
|
||||
feature_extractor = LayoutLMv2FeatureExtractor() # apply_ocr is set to True by default
|
||||
tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||
processor = LayoutLMv2Processor(feature_extractor, tokenizer)
|
||||
```
|
||||
@@ -158,7 +158,9 @@ from PIL import Image
|
||||
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||
|
||||
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
|
||||
encoding = processor(image, return_tensors="pt") # you can also add all tokenizer parameters here such as padding, truncation
|
||||
encoding = processor(
|
||||
image, return_tensors="pt"
|
||||
) # you can also add all tokenizer parameters here such as padding, truncation
|
||||
print(encoding.keys())
|
||||
# dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
|
||||
```
|
||||
@@ -177,7 +179,7 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas
|
||||
|
||||
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
|
||||
words = ["hello", "world"]
|
||||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
|
||||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
|
||||
encoding = processor(image, words, boxes=boxes, return_tensors="pt")
|
||||
print(encoding.keys())
|
||||
# dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
|
||||
@@ -199,7 +201,7 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas
|
||||
|
||||
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
|
||||
words = ["hello", "world"]
|
||||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
|
||||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
|
||||
word_labels = [1, 2]
|
||||
encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
|
||||
print(encoding.keys())
|
||||
@@ -219,7 +221,7 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas
|
||||
|
||||
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
|
||||
question = "What's his name?"
|
||||
encoding = processor(image, question, return_tensors="pt")
|
||||
encoding = processor(image, question, return_tensors="pt")
|
||||
print(encoding.keys())
|
||||
# dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
|
||||
```
|
||||
@@ -238,8 +240,8 @@ processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncas
|
||||
image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB")
|
||||
question = "What's his name?"
|
||||
words = ["hello", "world"]
|
||||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
|
||||
encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
|
||||
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes
|
||||
encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
|
||||
print(encoding.keys())
|
||||
# dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user