From be02176a4b70570049a00427f61132c8e897da19 Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <morgan@huggingface.co>
Date: Thu, 5 Mar 2020 16:00:38 +0100
Subject: [PATCH] Fixing sentiment pipeline in 03-pipelines notebook.

Signed-off-by: Morgan Funtowicz <morgan@huggingface.co>
---
 notebooks/03-pipelines.ipynb | 239 +++++++++++------------------------
 1 file changed, 71 insertions(+), 168 deletions(-)

diff --git a/notebooks/03-pipelines.ipynb b/notebooks/03-pipelines.ipynb
index ddaffcee06..483fbe758f 100644
--- a/notebooks/03-pipelines.ipynb
+++ b/notebooks/03-pipelines.ipynb
@@ -67,27 +67,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 6,
    "metadata": {
     "pycharm": {
      "is_executing": false,
      "name": "#%% code \n"
     }
    },
-   "outputs": [
-    {
-     "ename": "SyntaxError",
-     "evalue": "from __future__ imports must occur at the beginning of the file (<ipython-input-29-c3a037bd4c55>, line 5)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-29-c3a037bd4c55>\"\u001b[0;36m, line \u001b[0;32m5\u001b[0m\n\u001b[0;31m    from transformers import pipeline\u001b[0m\n\u001b[0m           ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m from __future__ imports must occur at the beginning of the file\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "import numpy as np\n",
     "from __future__ import print_function\n",
-    "from ipywidgets import interact, interactive, fixed, interact_manual\n",
     "import ipywidgets as widgets\n",
     "from transformers import pipeline"
    ]
@@ -105,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {
     "pycharm": {
      "is_executing": false,
@@ -115,40 +104,35 @@
    "outputs": [
     {
      "data": {
+      "text/plain": "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6aeccfdf51994149bdd1f3d3533e380f",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
-      ]
+       "version_minor": 0,
+       "model_id": "c9db53f30b9446c0af03268633a966c0"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "name": "stdout",
-     "output_type": "stream",
      "text": [
       "\n"
-     ]
+     ],
+     "output_type": "stream"
     },
     {
      "data": {
-      "text/plain": [
-       "[{'label': 'POSITIVE', 'score': 0.800251},\n",
-       " {'label': 'NEGATIVE', 'score': 1.2489903}]"
-      ]
+      "text/plain": "[{'label': 'POSITIVE', 'score': 0.9997656}]"
      },
-     "execution_count": 6,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "execution_count": 8
     }
    ],
    "source": [
     "nlp_sentence_classif = pipeline('sentiment-analysis')\n",
-    "nlp_sentence_classif(['Such a nice weather outside !', 'This movie was kind of boring.'])"
+    "nlp_sentence_classif('Such a nice weather outside !')"
    ]
   },
   {
@@ -164,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 9,
    "metadata": {
     "pycharm": {
      "is_executing": false,
@@ -174,40 +158,30 @@
    "outputs": [
     {
      "data": {
+      "text/plain": "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b5549c53c27346a899af553c977f00bc",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
-      ]
+       "version_minor": 0,
+       "model_id": "1e300789e22644f1aed66a5ed60e75c4"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "name": "stdout",
-     "output_type": "stream",
      "text": [
       "\n"
-     ]
+     ],
+     "output_type": "stream"
     },
     {
      "data": {
-      "text/plain": [
-       "[{'word': 'Hu', 'score': 0.9970937967300415, 'entity': 'I-ORG'},\n",
-       " {'word': '##gging', 'score': 0.9345750212669373, 'entity': 'I-ORG'},\n",
-       " {'word': 'Face', 'score': 0.9787060022354126, 'entity': 'I-ORG'},\n",
-       " {'word': 'French', 'score': 0.9981995820999146, 'entity': 'I-MISC'},\n",
-       " {'word': 'New', 'score': 0.9983047246932983, 'entity': 'I-LOC'},\n",
-       " {'word': '-', 'score': 0.8913455009460449, 'entity': 'I-LOC'},\n",
-       " {'word': 'York', 'score': 0.9979523420333862, 'entity': 'I-LOC'}]"
-      ]
+      "text/plain": "[{'word': 'Hu', 'score': 0.9970937967300415, 'entity': 'I-ORG'},\n {'word': '##gging', 'score': 0.9345750212669373, 'entity': 'I-ORG'},\n {'word': 'Face', 'score': 0.9787060022354126, 'entity': 'I-ORG'},\n {'word': 'French', 'score': 0.9981995820999146, 'entity': 'I-MISC'},\n {'word': 'New', 'score': 0.9983047246932983, 'entity': 'I-LOC'},\n {'word': '-', 'score': 0.8913455009460449, 'entity': 'I-LOC'},\n {'word': 'York', 'score': 0.9979523420333862, 'entity': 'I-LOC'}]"
      },
-     "execution_count": 16,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "execution_count": 9
     }
    ],
    "source": [
@@ -224,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 10,
    "metadata": {
     "pycharm": {
      "is_executing": false,
@@ -234,42 +208,38 @@
    "outputs": [
     {
      "data": {
+      "text/plain": "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6e56a8edcef44ec2ae838711ecd22d3a",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
-      ]
+       "version_minor": 0,
+       "model_id": "82aca58f1ea24b4cb37f16402e8a5923"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "name": "stdout",
-     "output_type": "stream",
      "text": [
       "\n"
-     ]
+     ],
+     "output_type": "stream"
     },
     {
      "name": "stderr",
-     "output_type": "stream",
      "text": [
-      "convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 53.05it/s]\n",
-      "add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 2673.23it/s]\n"
-     ]
+      "convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 225.51it/s]\n",
+      "add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 2158.67it/s]\n"
+     ],
+     "output_type": "stream"
     },
     {
      "data": {
-      "text/plain": [
-       "{'score': 0.9632966867654424, 'start': 42, 'end': 50, 'answer': 'New-York.'}"
-      ]
+      "text/plain": "{'score': 0.9632966867654424, 'start': 42, 'end': 50, 'answer': 'New-York.'}"
      },
-     "execution_count": 18,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "execution_count": 10
     }
    ],
    "source": [
@@ -286,7 +256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 11,
    "metadata": {
     "pycharm": {
      "is_executing": false,
@@ -296,48 +266,30 @@
    "outputs": [
     {
      "data": {
+      "text/plain": "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1930695ea2d24ca98c6d7c13842d377f",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
-      ]
+       "version_minor": 0,
+       "model_id": "49df2227b4fa4eb28dcdcfc3d9261d0f"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "name": "stdout",
-     "output_type": "stream",
      "text": [
       "\n"
-     ]
+     ],
+     "output_type": "stream"
     },
     {
      "data": {
-      "text/plain": [
-       "[{'sequence': '<s> Hugging Face is a French company based in Paris</s>',\n",
-       "  'score': 0.25288480520248413,\n",
-       "  'token': 2201},\n",
-       " {'sequence': '<s> Hugging Face is a French company based in Lyon</s>',\n",
-       "  'score': 0.07639515399932861,\n",
-       "  'token': 12790},\n",
-       " {'sequence': '<s> Hugging Face is a French company based in Brussels</s>',\n",
-       "  'score': 0.055500105023384094,\n",
-       "  'token': 6497},\n",
-       " {'sequence': '<s> Hugging Face is a French company based in Geneva</s>',\n",
-       "  'score': 0.04264815151691437,\n",
-       "  'token': 11559},\n",
-       " {'sequence': '<s> Hugging Face is a French company based in France</s>',\n",
-       "  'score': 0.03868963569402695,\n",
-       "  'token': 1470}]"
-      ]
+      "text/plain": "[{'sequence': '<s> Hugging Face is a French company based in Paris</s>',\n  'score': 0.23106691241264343,\n  'token': 2201},\n {'sequence': '<s> Hugging Face is a French company based in Lyon</s>',\n  'score': 0.0819825753569603,\n  'token': 12790},\n {'sequence': '<s> Hugging Face is a French company based in Geneva</s>',\n  'score': 0.04769463092088699,\n  'token': 11559},\n {'sequence': '<s> Hugging Face is a French company based in Brussels</s>',\n  'score': 0.047622501850128174,\n  'token': 6497},\n {'sequence': '<s> Hugging Face is a French company based in France</s>',\n  'score': 0.04130595177412033,\n  'token': 1470}]"
      },
-     "execution_count": 20,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "execution_count": 11
     }
    ],
    "source": [
@@ -354,7 +306,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 12,
    "metadata": {
     "pycharm": {
      "is_executing": false,
@@ -364,34 +316,30 @@
    "outputs": [
     {
      "data": {
+      "text/plain": "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "92fa4d67290f49a3943dc0abd7529892",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
-      ]
+       "version_minor": 0,
+       "model_id": "2af4cfb19e3243dda014d0f56b48f4b2"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "name": "stdout",
-     "output_type": "stream",
      "text": [
       "\n"
-     ]
+     ],
+     "output_type": "stream"
     },
     {
      "data": {
-      "text/plain": [
-       "(1, 12, 768)"
-      ]
+      "text/plain": "(1, 12, 768)"
      },
-     "execution_count": 32,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "execute_result",
+     "execution_count": 12
     }
    ],
    "source": [
@@ -417,7 +365,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 13,
    "metadata": {
     "pycharm": {
      "is_executing": false,
@@ -427,41 +375,27 @@
    "outputs": [
     {
      "data": {
+      "text/plain": "Dropdown(description='Task:', index=1, options=('sentiment-analysis', 'ner', 'fill_mask'), value='ner')",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "261ae9fa30e84d1d84a3b0d9682ac477",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Dropdown(description='Task:', index=1, options=('sentiment-analysis', 'ner', 'fill_mask'), value='ner')"
-      ]
+       "version_minor": 0,
+       "model_id": "10bac065d46f4e4d9a8498dcc8104ecd"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "data": {
+      "text/plain": "Text(value='', description='Your input:', placeholder='Enter something')",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ddc51b71c6eb40e5ab60998664e6a857",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Text(value='', description='Your input:', placeholder='Enter something')"
-      ]
+       "version_minor": 0,
+       "model_id": "2c5f1411f7a94714bc00f01b0e3b27b2"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[{'word': 'Paris', 'score': 0.9991844296455383, 'entity': 'I-LOC'}]\n",
-      "[{'sequence': '<s> I\\'m from Paris.\"</s>', 'score': 0.224044069647789, 'token': 72}, {'sequence': \"<s> I'm from Paris.)</s>\", 'score': 0.16959427297115326, 'token': 1592}, {'sequence': \"<s> I'm from Paris.]</s>\", 'score': 0.10994981974363327, 'token': 21838}, {'sequence': '<s> I\\'m from Paris!\"</s>', 'score': 0.0706234946846962, 'token': 2901}, {'sequence': \"<s> I'm from Paris.</s>\", 'score': 0.0698278620839119, 'token': 4}]\n",
-      "[{'sequence': \"<s> I'm from Paris and London</s>\", 'score': 0.12238534539937973, 'token': 928}, {'sequence': \"<s> I'm from Paris and Brussels</s>\", 'score': 0.07107886672019958, 'token': 6497}, {'sequence': \"<s> I'm from Paris and Belgium</s>\", 'score': 0.040912602096796036, 'token': 7320}, {'sequence': \"<s> I'm from Paris and Berlin</s>\", 'score': 0.039884064346551895, 'token': 5459}, {'sequence': \"<s> I'm from Paris and Melbourne</s>\", 'score': 0.038133684545755386, 'token': 5703}]\n",
-      "[{'sequence': '<s> I like go to sleep</s>', 'score': 0.08942786604166031, 'token': 3581}, {'sequence': '<s> I like go to bed</s>', 'score': 0.07789064943790436, 'token': 3267}, {'sequence': '<s> I like go to concerts</s>', 'score': 0.06356740742921829, 'token': 12858}, {'sequence': '<s> I like go to school</s>', 'score': 0.03660670667886734, 'token': 334}, {'sequence': '<s> I like go to dinner</s>', 'score': 0.032155368477106094, 'token': 3630}]\n"
-     ]
     }
    ],
    "source": [
@@ -498,7 +432,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 14,
    "metadata": {
     "pycharm": {
      "is_executing": false,
@@ -508,46 +442,15 @@
    "outputs": [
     {
      "data": {
+      "text/plain": "Textarea(value='Einstein is famous for the general theory of relativity', description='Context:', placeholder=…",
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5ae68677bd8a41f990355aa43840d3f8",
        "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Textarea(value='Einstein is famous for the general theory of relativity', description='Context:', placeholder=…"
-      ]
+       "version_minor": 0,
+       "model_id": "019fde2343634e94b6f32d04f6350ec1"
+      }
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "14bcfd9a2c5a47e6b1383989ab7632c8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Text(value='Why is Einstein famous for ?', description='Question:', placeholder='Enter something')"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 168.83it/s]\n",
-      "add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 1919.59it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'score': 0.40340670623875496, 'start': 27, 'end': 54, 'answer': 'general theory of relativity'}\n"
-     ]
     }
    ],
    "source": [