[Docs, Notebook] Include generation pipeline (#4295)

* add first text for generation

* add generation pipeline to usage

* Created using Colaboratory

* correct docstring

* finish
This commit is contained in:
Patrick von Platen
2020-05-13 20:24:08 +02:00
committed by GitHub
parent 2d184cb553
commit 839bfaedb2
2 changed files with 515 additions and 58 deletions

View File

@@ -30,7 +30,8 @@
},
"colab": {
"name": "03-pipelines.ipynb",
"provenance": []
"provenance": [],
"include_colab_link": true
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
@@ -1504,6 +1505,251 @@
"left": null
}
},
"3c86415352574190b71e1fe5a15d36f1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_dd2c9dd935754cf2802233053554c21c",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_8ae3be32d9c845e59fdb1c47884d48aa",
"IPY_MODEL_4dea0031f3554752ad5aad01fe516a60"
]
}
},
"dd2c9dd935754cf2802233053554c21c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8ae3be32d9c845e59fdb1c47884d48aa": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_1efb96d931a446de92f1930b973ae846",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 230,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 230,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_6a4f5aab5ba949fd860b5a35bba7db9c"
}
},
"4dea0031f3554752ad5aad01fe516a60": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_4b02b2e964ad49af9f7ce7023131ceb8",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 230/230 [00:00<00:00, 8.69kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_0ae8a68c3668401da8d8a6d5ec9cac8f"
}
},
"1efb96d931a446de92f1930b973ae846": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"6a4f5aab5ba949fd860b5a35bba7db9c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"4b02b2e964ad49af9f7ce7023131ceb8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"0ae8a68c3668401da8d8a6d5ec9cac8f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"fd44cf6ab17e4b768b2e1d5cb8ce5af9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
@@ -2105,6 +2351,16 @@
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/huggingface/transformers/blob/generation_pipeline_docs/notebooks/03-pipelines.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
@@ -2170,13 +2426,29 @@
},
"id": "4maAknWNrl_N",
"colab_type": "code",
"colab": {}
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
},
"outputId": "467e3cc8-a069-47da-8029-86e4142c7dde"
},
"source": [
"!pip install -q transformers"
],
"execution_count": 0,
"outputs": []
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"\u001b[K |████████████████████████████████| 645kB 4.4MB/s \n",
"\u001b[K |████████████████████████████████| 3.8MB 11.7MB/s \n",
"\u001b[K |████████████████████████████████| 890kB 51.5MB/s \n",
"\u001b[K |████████████████████████████████| 1.0MB 46.0MB/s \n",
"\u001b[?25h Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
@@ -2219,6 +2491,7 @@
},
"id": "AMRXHQw9rl_d",
"colab_type": "code",
"outputId": "a7a10851-b71e-4553-9afc-04066120410d",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 83,
@@ -2232,14 +2505,13 @@
"ad84da685cf44abb90d17d9d2e023b48",
"a246f9eea2d7440cb979e728741d2e32"
]
},
"outputId": "a7a10851-b71e-4553-9afc-04066120410d"
}
},
"source": [
"nlp_sentence_classif = pipeline('sentiment-analysis')\n",
"nlp_sentence_classif('Such a nice weather outside !')"
],
"execution_count": 3,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2300,6 +2572,7 @@
},
"id": "B3BDRX_Krl_n",
"colab_type": "code",
"outputId": "a6b90b11-a272-4ecb-960d-4c682551b399",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 185,
@@ -2313,14 +2586,13 @@
"405afa5bb8b840d8bc0850e02f593ce4",
"78c718e3d5fa4cb892217260bea6d540"
]
},
"outputId": "a6b90b11-a272-4ecb-960d-4c682551b399"
}
},
"source": [
"nlp_token_class = pipeline('ner')\n",
"nlp_token_class('Hugging Face is a French company based in New-York.')"
],
"execution_count": 4,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2384,6 +2656,7 @@
},
"id": "ND_8LzQKrl_u",
"colab_type": "code",
"outputId": "c59ae695-c465-4de6-fa6e-181d8f1a3992",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 117,
@@ -2397,14 +2670,13 @@
"cd64e3f20b23483daa79712bde6622ea",
"67cbaa1f55d24e62ad6b022af36bca56"
]
},
"outputId": "c59ae695-c465-4de6-fa6e-181d8f1a3992"
}
},
"source": [
"nlp_qa = pipeline('question-answering')\n",
"nlp_qa(context='Hugging Face is a French company based in New-York.', question='Where is based Hugging Face ?')"
],
"execution_count": 5,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2470,6 +2742,7 @@
},
"id": "zpJQ2HXNrl_4",
"colab_type": "code",
"outputId": "3fb62e7a-25a6-4b06-ced8-51eb8aa6bf33",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 321,
@@ -2483,14 +2756,13 @@
"a35703cc8ff44e93a8c0eb413caddc40",
"9df7014c99b343f3b178fa020ff56010"
]
},
"outputId": "3fb62e7a-25a6-4b06-ced8-51eb8aa6bf33"
}
},
"source": [
"nlp_fill = pipeline('fill-mask')\n",
"nlp_fill('Hugging Face is a French company based in ' + nlp_fill.tokenizer.mask_token)"
],
"execution_count": 6,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2560,11 +2832,11 @@
"metadata": {
"id": "8BaOgzi1u1Yc",
"colab_type": "code",
"outputId": "2168e437-cfba-4247-a38c-07f02f555c6e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"outputId": "2168e437-cfba-4247-a38c-07f02f555c6e"
}
},
"source": [
"TEXT_TO_SUMMARIZE = \"\"\" \n",
@@ -2590,7 +2862,7 @@
"summarizer = pipeline('summarization')\n",
"summarizer(TEXT_TO_SUMMARIZE)"
],
"execution_count": 7,
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
@@ -2631,6 +2903,7 @@
"metadata": {
"id": "8FwayP4nwV3Z",
"colab_type": "code",
"outputId": "66956816-c924-4718-fe58-cabef7d51974",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 83,
@@ -2644,15 +2917,14 @@
"ad78042ee71a41fd989e4b4ce9d2e3c1",
"40c8d2617f3d4c84b923b140456fa5da"
]
},
"outputId": "66956816-c924-4718-fe58-cabef7d51974"
}
},
"source": [
"# English to French\n",
"translator = pipeline('translation_en_to_fr')\n",
"translator(\"HuggingFace is a French company that is based in New York City. HuggingFace's mission is to solve NLP one commit at a time\")"
],
"execution_count": 8,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2696,6 +2968,7 @@
"metadata": {
"colab_type": "code",
"id": "ra0-WfznwoIW",
"outputId": "278a3d5f-cc42-40bc-a9db-c92ec5a3a2f0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 83,
@@ -2709,15 +2982,14 @@
"4486f8a2efc34b9aab3864eb5ad2ba48",
"d6228324f3444aa6bd1323d65ae4ff75"
]
},
"outputId": "278a3d5f-cc42-40bc-a9db-c92ec5a3a2f0"
}
},
"source": [
"# English to German\n",
"translator = pipeline('translation_en_to_de')\n",
"translator(\"The history of natural language processing (NLP) generally started in the 1950s, although work can be found from earlier periods.\")"
],
"execution_count": 9,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2756,6 +3028,89 @@
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "qPUpg0M8hCtB",
"colab_type": "text"
},
"source": [
"## 7. Text Generation\n",
"\n",
"Text generation is currently supported by GPT-2, OpenAi-GPT, TransfoXL, XLNet, CTRL and Reformer."
]
},
{
"cell_type": "code",
"metadata": {
"id": "5pKfxTxohXuZ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 120,
"referenced_widgets": [
"3c86415352574190b71e1fe5a15d36f1",
"dd2c9dd935754cf2802233053554c21c",
"8ae3be32d9c845e59fdb1c47884d48aa",
"4dea0031f3554752ad5aad01fe516a60",
"1efb96d931a446de92f1930b973ae846",
"6a4f5aab5ba949fd860b5a35bba7db9c",
"4b02b2e964ad49af9f7ce7023131ceb8",
"0ae8a68c3668401da8d8a6d5ec9cac8f"
]
},
"outputId": "8705f6b4-2413-4ac6-f72d-e5ecce160662"
},
"source": [
"text_generator = pipeline(\"text-generation\")\n",
"text_generator(\"Today is a beautiful day and I will\")"
],
"execution_count": 5,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3c86415352574190b71e1fe5a15d36f1",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=230.0, style=ProgressStyle(description_…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence\n"
],
"name": "stderr"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[{'generated_text': 'Today is a beautiful day and I will celebrate my birthday!\"\\n\\nThe mother told CNN the two had planned their meal together. After dinner, she added that she and I walked down the street and stopped at a diner near her home. \"He'}]"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "markdown",
"metadata": {
@@ -2763,7 +3118,7 @@
"colab_type": "text"
},
"source": [
"## 7. Projection - Features Extraction "
"## 8. Projection - Features Extraction "
]
},
{
@@ -2775,6 +3130,7 @@
},
"id": "O4SjR1QQrl__",
"colab_type": "code",
"outputId": "2ce966d5-7a89-4488-d48f-626d1c2a8222",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 83,
@@ -2788,8 +3144,7 @@
"31d97ecf78fa412c99e6659196d82828",
"c6be5d48ec3c4c799d1445607e5f1ac6"
]
},
"outputId": "2ce966d5-7a89-4488-d48f-626d1c2a8222"
}
},
"source": [
"import numpy as np\n",
@@ -2797,7 +3152,7 @@
"output = nlp_features('Hugging Face is a French company based in Paris')\n",
"np.array(output).shape # (Samples, Tokens, Vector Size)\n"
],
"execution_count": 10,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2861,6 +3216,7 @@
},
"id": "yFlBPQHtrmAH",
"colab_type": "code",
"outputId": "03cc3207-a7e8-49fd-904a-63a7a1d0eb7a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 116,
@@ -2872,8 +3228,7 @@
"62b10ca525cc4ac68f3a006434eb7416",
"211109537fbe4e60b89a238c89db1346"
]
},
"outputId": "03cc3207-a7e8-49fd-904a-63a7a1d0eb7a"
}
},
"source": [
"task = widgets.Dropdown(\n",
@@ -2906,7 +3261,7 @@
"input.on_submit(forward)\n",
"display(task, input)"
],
"execution_count": 11,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
@@ -2958,6 +3313,7 @@
},
"id": "GCoKbBTYrmAN",
"colab_type": "code",
"outputId": "57c3a647-160a-4b3a-e852-e7a1daf1294a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143,
@@ -2969,8 +3325,7 @@
"d305ba1662e3466c93ab5cca7ebf8f33",
"879f7a3747ad455d810c7a29918648ee"
]
},
"outputId": "57c3a647-160a-4b3a-e852-e7a1daf1294a"
}
},
"source": [
"context = widgets.Textarea(\n",
@@ -2995,7 +3350,7 @@
"query.on_submit(forward)\n",
"display(context, query)"
],
"execution_count": 12,
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",