Doc styler examples (#14953)

* Fix bad examples * Add black formatting to style_doc * Use first nonempty line * Put it at the right place * Don't add spaces to empty lines * Better templates * Deal with triple quotes in docstrings * Result of style_doc * Enable mdx treatment and fix code examples in MDXs * Result of doc styler on doc source files * Last fixes * Break copy from
2021-12-27 19:07:46 -05:00
parent e13f72fbff
commit b5e2b183af
211 changed files with 2738 additions and 1711 deletions
--- a/docs/source/main_classes/pipelines.mdx
+++ b/docs/source/main_classes/pipelines.mdx
@@ -101,6 +101,7 @@ from transformers import pipeline

 pipe = pipeline("text-classification")

+
 def data():
    while True:
        # This could come from a dataset, a database, a queue or HTTP request
@@ -110,6 +111,7 @@ def data():
        # does the preprocessing while the main runs the big inference
        yield "This is a test"

+
 for out in pipe(data()):
    print(out)
    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
@@ -125,10 +127,10 @@ All pipelines can use batching. This will work
 whenever the pipeline uses its streaming ability (so when passing lists or `Dataset` or `generator`).

 ```python
-from transformers import pipeline                                                   
+from transformers import pipeline
 from transformers.pipelines.base import KeyDataset
 import datasets
-import tqdm                                                                         
+import tqdm

 dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
 pipe = pipeline("text-classification", device=0)
@@ -149,28 +151,28 @@ Example where it's mostly a speedup:
 </Tip>

 ```python
-from transformers import pipeline                                                   
-from torch.utils.data import Dataset                                                
-import tqdm                                                                         
+from transformers import pipeline
+from torch.utils.data import Dataset
+import tqdm


-pipe = pipeline("text-classification", device=0)                                    
+pipe = pipeline("text-classification", device=0)


-class MyDataset(Dataset):                                                           
-    def __len__(self):                                                              
-        return 5000                                                                 
+class MyDataset(Dataset):
+    def __len__(self):
+        return 5000

-    def __getitem__(self, i):                                                       
-        return "This is a test"                                                     
+    def __getitem__(self, i):
+        return "This is a test"


-dataset = MyDataset()   
+dataset = MyDataset()

 for batch_size in [1, 8, 64, 256]:
-    print("-" * 30)                                                                     
-    print(f"Streaming batch_size={batch_size}")    
-    for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):              
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
        pass
 ```

@@ -194,15 +196,15 @@ Streaming batch_size=256
 Example where it's most a slowdown:

 ```python
-class MyDataset(Dataset):                                                           
-    def __len__(self):                                                              
-        return 5000                                                                 
+class MyDataset(Dataset):
+    def __len__(self):
+        return 5000

-    def __getitem__(self, i):                                                       
-        if i % 64 == 0:                                                          
-            n = 100                                                              
-        else:                                                                    
-            n = 1                                                                
+    def __getitem__(self, i):
+        if i % 64 == 0:
+            n = 100
+        else:
+            n = 1
        return "This is a test" * n
 ```

@@ -298,10 +300,11 @@ If you want to try simply you can:

 ```python
 class MyPipeline(TextClassificationPipeline):
-    def postprocess(...):
-        ...
+    def postprocess():
+        # Your code goes here
        scores = scores * 100
-        ...
+        # And here
+

 my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, ...)
 # or if you use *pipeline* function, then: