From 4c3be2e71809f606b42b8af4486e6277d855c012 Mon Sep 17 00:00:00 2001
From: Julien Plu <plu.julien@gmail.com>
Date: Wed, 6 May 2020 16:40:55 +0200
Subject: [PATCH] Add model card for the NER model (#4162)

---
 .../jplu/tf-xlm-r-ner-40-lang/README.md       | 556 ++++++++++++++++++
 1 file changed, 556 insertions(+)
 create mode 100644 model_cards/jplu/tf-xlm-r-ner-40-lang/README.md

diff --git a/model_cards/jplu/tf-xlm-r-ner-40-lang/README.md b/model_cards/jplu/tf-xlm-r-ner-40-lang/README.md
new file mode 100644
index 0000000000..bec69f52eb
--- /dev/null
+++ b/model_cards/jplu/tf-xlm-r-ner-40-lang/README.md
@@ -0,0 +1,556 @@
+# XLM-R + NER
+
+This model is a fine-tuned  [XLM-Roberta-base](https://arxiv.org/abs/1911.02116) over the 40 languages proposed in [XTREME]([https://github.com/google-research/xtreme](https://github.com/google-research/xtreme)) from [Wikiann](https://aclweb.org/anthology/P17-1178). This is still an on-going work and the results will be updated everytime an improvement is reached. 
+
+The covered labels are:
+```
+LOC
+ORG
+PER
+O
+```
+
+## Metrics on evaluation set:
+### Average over the 40 languages
+```
+           precision    recall  f1-score   support
+
+      ORG       0.81      0.81      0.81    102452
+      PER       0.90      0.91      0.91    108978
+      LOC       0.86      0.89      0.87    121868
+
+micro avg       0.86      0.87      0.87    333298
+macro avg       0.86      0.87      0.87    333298
+```
+
+### Afrikaans
+```
+           precision    recall  f1-score   support
+
+      ORG       0.89      0.88      0.88       582
+      PER       0.89      0.97      0.93       369
+      LOC       0.84      0.90      0.86       518
+
+micro avg       0.87      0.91      0.89      1469
+macro avg       0.87      0.91      0.89      1469
+``` 
+
+### Arabic
+```
+           precision    recall  f1-score   support
+
+      ORG       0.83      0.84      0.84      3507
+      PER       0.90      0.91      0.91      3643
+      LOC       0.88      0.89      0.88      3604
+
+micro avg       0.87      0.88      0.88     10754
+macro avg       0.87      0.88      0.88     10754
+```
+
+### Basque
+```
+           precision    recall  f1-score   support
+
+      LOC       0.88      0.93      0.91      5228
+      ORG       0.86      0.81      0.83      3654
+      PER       0.91      0.91      0.91      4072
+
+micro avg       0.89      0.89      0.89     12954
+macro avg       0.89      0.89      0.89     12954
+```
+
+### Bengali
+```
+           precision    recall  f1-score   support
+
+      ORG       0.86      0.89      0.87       325
+      LOC       0.91      0.91      0.91       406
+      PER       0.96      0.95      0.95       364
+
+micro avg       0.91      0.92      0.91      1095
+macro avg       0.91      0.92      0.91      1095
+```
+
+### Bulgarian
+```
+           precision    recall  f1-score   support
+
+      ORG       0.86      0.83      0.84      3661
+      PER       0.92      0.95      0.94      4006
+      LOC       0.92      0.95      0.94      6449
+
+micro avg       0.91      0.92      0.91     14116
+macro avg       0.91      0.92      0.91     14116
+```
+
+### Burmese
+```
+           precision    recall  f1-score   support
+
+      LOC       0.60      0.86      0.71        37
+      ORG       0.68      0.63      0.66        30
+      PER       0.44      0.44      0.44        36
+
+micro avg       0.57      0.65      0.61       103
+macro avg       0.57      0.65      0.60       103
+```
+
+### Chinese
+```
+           precision    recall  f1-score   support
+
+      ORG       0.70      0.69      0.70      4022
+      LOC       0.76      0.81      0.78      3830
+      PER       0.84      0.84      0.84      3706
+
+micro avg       0.76      0.78      0.77     11558
+macro avg       0.76      0.78      0.77     11558
+```
+
+### Dutch
+```
+           precision    recall  f1-score   support
+
+      ORG       0.87      0.87      0.87      3930
+      PER       0.95      0.95      0.95      4377
+      LOC       0.91      0.92      0.91      4813
+
+micro avg       0.91      0.92      0.91     13120
+macro avg       0.91      0.92      0.91     13120
+```
+
+### English
+```
+           precision    recall  f1-score   support
+
+      LOC       0.83      0.84      0.84      4781
+      PER       0.89      0.90      0.89      4559
+      ORG       0.75      0.75      0.75      4633
+
+micro avg       0.82      0.83      0.83     13973
+macro avg       0.82      0.83      0.83     13973
+```
+
+### Estonian
+```
+           precision    recall  f1-score   support
+
+      LOC       0.89      0.92      0.91      5654
+      ORG       0.85      0.85      0.85      3878
+      PER       0.94      0.94      0.94      4026
+
+micro avg       0.90      0.91      0.90     13558
+macro avg       0.90      0.91      0.90     13558
+```
+
+### Finnish
+```
+           precision    recall  f1-score   support
+
+      ORG       0.84      0.83      0.84      4104
+      LOC       0.88      0.90      0.89      5307
+      PER       0.95      0.94      0.94      4519
+
+micro avg       0.89      0.89      0.89     13930
+macro avg       0.89      0.89      0.89     13930
+```
+
+### French
+```
+           precision    recall  f1-score   support
+
+      LOC       0.90      0.89      0.89      4808
+      ORG       0.84      0.87      0.85      3876
+      PER       0.94      0.93      0.94      4249
+
+micro avg       0.89      0.90      0.90     12933
+macro avg       0.89      0.90      0.90     12933
+```
+
+### Georgian
+```
+           precision    recall  f1-score   support
+
+      PER       0.90      0.91      0.90      3964
+      ORG       0.83      0.77      0.80      3757
+      LOC       0.82      0.88      0.85      4894
+
+micro avg       0.84      0.86      0.85     12615
+macro avg       0.84      0.86      0.85     12615
+```
+
+### German
+```
+           precision    recall  f1-score   support
+
+      LOC       0.85      0.90      0.87      4939
+      PER       0.94      0.91      0.92      4452
+      ORG       0.79      0.78      0.79      4247
+
+micro avg       0.86      0.86      0.86     13638
+macro avg       0.86      0.86      0.86     13638
+```
+
+### Greek
+```
+           precision    recall  f1-score   support
+
+      ORG       0.86      0.85      0.85      3771
+      LOC       0.88      0.91      0.90      4436
+      PER       0.91      0.93      0.92      3894
+
+micro avg       0.88      0.90      0.89     12101
+macro avg       0.88      0.90      0.89     12101
+```
+
+### Hebrew
+```
+           precision    recall  f1-score   support
+
+      PER       0.87      0.88      0.87      4206
+      ORG       0.76      0.75      0.76      4190
+      LOC       0.85      0.85      0.85      4538
+
+micro avg       0.83      0.83      0.83     12934
+macro avg       0.82      0.83      0.83     12934
+```
+
+### Hindi
+```
+           precision    recall  f1-score   support
+
+      ORG       0.78      0.81      0.79       362
+      LOC       0.83      0.85      0.84       422
+      PER       0.90      0.95      0.92       427
+
+micro avg       0.84      0.87      0.85      1211
+macro avg       0.84      0.87      0.85      1211
+```
+
+### Hungarian
+```
+           precision    recall  f1-score   support
+
+      PER       0.95      0.95      0.95      4347
+      ORG       0.87      0.88      0.87      3988
+      LOC       0.90      0.92      0.91      5544
+
+micro avg       0.91      0.92      0.91     13879
+macro avg       0.91      0.92      0.91     13879
+```
+
+### Indonesian
+```
+           precision    recall  f1-score   support
+
+      ORG       0.88      0.89      0.88      3735
+      LOC       0.93      0.95      0.94      3694
+      PER       0.93      0.93      0.93      3947
+
+micro avg       0.91      0.92      0.92     11376
+macro avg       0.91      0.92      0.92     11376
+```
+
+### Italian
+```
+           precision    recall  f1-score   support
+
+      LOC       0.88      0.88      0.88      4592
+      ORG       0.86      0.86      0.86      4088
+      PER       0.96      0.96      0.96      4732
+
+micro avg       0.90      0.90      0.90     13412
+macro avg       0.90      0.90      0.90     13412
+```
+
+### Japanese
+```
+           precision    recall  f1-score   support
+
+      ORG       0.62      0.61      0.62      4184
+      PER       0.76      0.81      0.78      3812
+      LOC       0.68      0.74      0.71      4281
+
+micro avg       0.69      0.72      0.70     12277
+macro avg       0.69      0.72      0.70     12277
+```
+
+### Javanese
+```
+           precision    recall  f1-score   support
+
+      ORG       0.79      0.80      0.80        46
+      PER       0.81      0.96      0.88        26
+      LOC       0.75      0.75      0.75        40
+
+micro avg       0.78      0.82      0.80       112
+macro avg       0.78      0.82      0.80       112
+```
+
+### Kazakh
+```
+           precision    recall  f1-score   support
+
+      ORG       0.76      0.61      0.68       307
+      LOC       0.78      0.90      0.84       461
+      PER       0.87      0.91      0.89       367
+
+micro avg       0.81      0.83      0.82      1135
+macro avg       0.81      0.83      0.81      1135
+```
+
+### Korean
+```
+           precision    recall  f1-score   support
+
+      LOC       0.86      0.89      0.88      5097
+      ORG       0.79      0.74      0.77      4218
+      PER       0.83      0.86      0.84      4014
+
+micro avg       0.83      0.83      0.83     13329
+macro avg       0.83      0.83      0.83     13329
+```
+
+### Malay
+```
+           precision    recall  f1-score   support
+
+      ORG       0.87      0.89      0.88       368
+      PER       0.92      0.91      0.91       366
+      LOC       0.94      0.95      0.95       354
+
+micro avg       0.91      0.92      0.91      1088
+macro avg       0.91      0.92      0.91      1088
+```
+
+### Malayalam
+```
+           precision    recall  f1-score   support
+
+      ORG       0.75      0.74      0.75       347
+      PER       0.84      0.89      0.86       417
+      LOC       0.74      0.75      0.75       391
+
+micro avg       0.78      0.80      0.79      1155
+macro avg       0.78      0.80      0.79      1155
+```
+
+### Marathi
+```
+           precision    recall  f1-score   support
+
+      PER       0.89      0.94      0.92       394
+      LOC       0.82      0.84      0.83       457
+      ORG       0.84      0.78      0.81       339
+
+micro avg       0.85      0.86      0.85      1190
+macro avg       0.85      0.86      0.85      1190
+```
+
+### Persian
+```
+           precision    recall  f1-score   support
+
+      PER       0.93      0.92      0.93      3540
+      LOC       0.93      0.93      0.93      3584
+      ORG       0.89      0.92      0.90      3370
+
+micro avg       0.92      0.92      0.92     10494
+macro avg       0.92      0.92      0.92     10494
+```
+
+### Portuguese
+```
+           precision    recall  f1-score   support
+
+      LOC       0.90      0.91      0.91      4819
+      PER       0.94      0.92      0.93      4184
+      ORG       0.84      0.88      0.86      3670
+
+micro avg       0.89      0.91      0.90     12673
+macro avg       0.90      0.91      0.90     12673
+```
+
+### Russian
+```
+           precision    recall  f1-score   support
+
+      PER       0.93      0.96      0.95      3574
+      LOC       0.87      0.89      0.88      4619
+      ORG       0.82      0.80      0.81      3858
+
+micro avg       0.87      0.88      0.88     12051
+macro avg       0.87      0.88      0.88     12051
+```
+
+### Spanish
+```
+           precision    recall  f1-score   support
+
+      PER       0.95      0.93      0.94      3891
+      ORG       0.86      0.88      0.87      3709
+      LOC       0.89      0.91      0.90      4553
+
+micro avg       0.90      0.91      0.90     12153
+macro avg       0.90      0.91      0.90     12153
+```
+
+### Swahili
+```
+           precision    recall  f1-score   support
+
+      ORG       0.82      0.85      0.83       349
+      PER       0.95      0.92      0.94       403
+      LOC       0.86      0.89      0.88       450
+
+micro avg       0.88      0.89      0.88      1202
+macro avg       0.88      0.89      0.88      1202
+```
+
+### Tagalog
+```
+           precision    recall  f1-score   support
+
+      LOC       0.90      0.91      0.90       338
+      ORG       0.83      0.91      0.87       339
+      PER       0.96      0.93      0.95       350
+
+micro avg       0.90      0.92      0.91      1027
+macro avg       0.90      0.92      0.91      1027
+```
+
+### Tamil
+```
+           precision    recall  f1-score   support
+
+      PER       0.90      0.92      0.91       392
+      ORG       0.77      0.76      0.76       370
+      LOC       0.78      0.81      0.79       421
+
+micro avg       0.82      0.83      0.82      1183
+macro avg       0.82      0.83      0.82      1183
+```
+
+### Telugu
+```
+           precision    recall  f1-score   support
+
+      ORG       0.67      0.55      0.61       347
+      LOC       0.78      0.87      0.82       453
+      PER       0.73      0.86      0.79       393
+
+micro avg       0.74      0.77      0.76      1193
+macro avg       0.73      0.77      0.75      1193
+```
+
+### Thai
+```
+           precision    recall  f1-score   support
+
+      LOC       0.63      0.76      0.69      3928
+      PER       0.78      0.83      0.80      6537
+      ORG       0.59      0.59      0.59      4257
+
+micro avg       0.68      0.74      0.71     14722
+macro avg       0.68      0.74      0.71     14722
+```
+
+### Turkish
+```
+           precision    recall  f1-score   support
+
+      PER       0.94      0.94      0.94      4337
+      ORG       0.88      0.89      0.88      4094
+      LOC       0.90      0.92      0.91      4929
+
+micro avg       0.90      0.92      0.91     13360
+macro avg       0.91      0.92      0.91     13360
+```
+
+### Urdu
+```
+           precision    recall  f1-score   support
+
+      LOC       0.90      0.95      0.93       352
+      PER       0.96      0.96      0.96       333
+      ORG       0.91      0.90      0.90       326
+
+micro avg       0.92      0.94      0.93      1011
+macro avg       0.92      0.94      0.93      1011
+```
+
+### Vietnamese
+```
+           precision    recall  f1-score   support
+
+      ORG       0.86      0.87      0.86      3579
+      LOC       0.88      0.91      0.90      3811
+      PER       0.92      0.93      0.93      3717
+
+micro avg       0.89      0.90      0.90     11107
+macro avg       0.89      0.90      0.90     11107
+```
+
+### Yoruba
+```
+           precision    recall  f1-score   support
+
+      LOC       0.54      0.72      0.62        36
+      ORG       0.58      0.31      0.41        35
+      PER       0.77      1.00      0.87        36
+
+micro avg       0.64      0.68      0.66       107
+macro avg       0.63      0.68      0.63       107
+```
+
+## Reproduce the results
+Download and prepare the dataset from the [[https://github.com/google-research/xtreme#download-the-data](https://github.com/google-research/xtreme#download-the-data)](XTREME repo). Next, from the root of the transformers repo run:
+```
+cd examples/ner
+python run_tf_ner.py \
+--data_dir . \
+--labels ./labels.txt \
+--model_name_or_path jplu/tf-xlm-roberta-base \
+--output_dir model \
+--max-seq-length 128 \
+--num_train_epochs 2 \
+--per_gpu_train_batch_size 16 \
+--per_gpu_eval_batch_size 32 \
+--do_train \
+--do_eval \
+--logging_dir logs \
+--mode token-classification \
+--evaluate_during_training \
+--optimizer_name adamw
+```
+
+## Usage with pipelines
+```python
+from transformers import pipeline
+
+nlp_ner = pipeline(
+    "ner",
+    model="jplu/tf-xlm-r-ner-40-lang",
+    tokenizer=(
+        'jplu/tf-xlm-r-ner-40-lang',  
+        {"use_fast": True}
+))
+
+text_fr = "Barack Obama est né à Hawaï."
+text_en = "Barack Obama was born in Hawaii."
+text_es = "Barack Obama nació en Hawai."
+text_zh = "巴拉克·奧巴馬（Barack Obama）出生於夏威夷。"
+text_ar = "ولد باراك أوباما في هاواي."
+
+nlp_ner(text_fr)
+#Output: [{'word': '▁Barack', 'score': 0.9894659519195557, 'entity': 'PER'}, {'word': '▁Obama', 'score': 0.9888848662376404, 'entity': 'PER'}, {'word': '▁Hawa', 'score': 0.998701810836792, 'entity': 'LOC'}, {'word': 'ï', 'score': 0.9987035989761353, 'entity': 'LOC'}]
+nlp_ner(text_en)
+#Output: [{'word': '▁Barack', 'score': 0.9929141998291016, 'entity': 'PER'}, {'word': '▁Obama', 'score': 0.9930834174156189, 'entity': 'PER'}, {'word': '▁Hawaii', 'score': 0.9986202120780945, 'entity': 'LOC'}]
+nlp_ner(test_es)
+#Output: [{'word': '▁Barack', 'score': 0.9944776296615601, 'entity': 'PER'}, {'word': '▁Obama', 'score': 0.9949177503585815, 'entity': 'PER'}, {'word': '▁Hawa', 'score': 0.9987911581993103, 'entity': 'LOC'}, {'word': 'i', 'score': 0.9984861612319946, 'entity': 'LOC'}]
+nlp_ner(test_zh)
+#Output: [{'word': '夏威夷', 'score': 0.9988449215888977, 'entity': 'LOC'}]
+nlp_ner(test_ar)
+#Output: [{'word': '▁با', 'score': 0.9903655648231506, 'entity': 'PER'}, {'word': 'راك', 'score': 0.9850614666938782, 'entity': 'PER'}, {'word': '▁أوباما', 'score': 0.9850308299064636, 'entity': 'PER'}, {'word': '▁ها', 'score': 0.9477543234825134, 'entity': 'LOC'}, {'word': 'وا', 'score': 0.9428229928016663, 'entity': 'LOC'}, {'word': 'ي', 'score': 0.9319471716880798, 'entity': 'LOC'}]
+
+```