From e02f95b2298997016cd01fdb182442093b34e8d2 Mon Sep 17 00:00:00 2001 From: Nathan Glenn Date: Mon, 28 Mar 2022 02:00:29 -0700 Subject: [PATCH] remove references to PDF reading via PIL (#15293) * fix confusing PIL instructions As stated in the documentation [here](https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html?highlight=pdf#write-only-formats), PIL can only write PDF's, not read them. Remove references to reading PDF's via PIL from this page to avoid confusion. * mention PDF in doc examples using PIL Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> * Be explicit: PDFs must be converted to images * fix formatting Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> --- docs/source/model_doc/layoutlmv2.mdx | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/docs/source/model_doc/layoutlmv2.mdx b/docs/source/model_doc/layoutlmv2.mdx index b1db86e2a2..374cbcb775 100644 --- a/docs/source/model_doc/layoutlmv2.mdx +++ b/docs/source/model_doc/layoutlmv2.mdx @@ -85,7 +85,9 @@ follows: ```python from PIL import Image -image = Image.open("name_of_your_document - can be a png file, pdf, etc.") +image = Image.open( + "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)." +) width, height = image.size ``` @@ -157,7 +159,9 @@ from PIL import Image processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") -image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") +image = Image.open( + "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)." +).convert("RGB") encoding = processor( image, return_tensors="pt" ) # you can also add all tokenizer parameters here such as padding, truncation @@ -177,7 +181,9 @@ from PIL import Image processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") -image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") +image = Image.open( + "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)." +).convert("RGB") words = ["hello", "world"] boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes encoding = processor(image, words, boxes=boxes, return_tensors="pt") @@ -199,7 +205,9 @@ from PIL import Image processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") -image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") +image = Image.open( + "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)." +).convert("RGB") words = ["hello", "world"] boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes word_labels = [1, 2] @@ -219,7 +227,9 @@ from PIL import Image processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") -image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") +image = Image.open( + "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)." +).convert("RGB") question = "What's his name?" encoding = processor(image, question, return_tensors="pt") print(encoding.keys()) @@ -237,7 +247,9 @@ from PIL import Image processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr") -image = Image.open("name_of_your_document - can be a png file, pdf, etc.").convert("RGB") +image = Image.open( + "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)." +).convert("RGB") question = "What's his name?" words = ["hello", "world"] boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] # make sure to normalize your bounding boxes