From 20c3b8cab4cddf4d90351a32654da60a49f0aa19 Mon Sep 17 00:00:00 2001 From: Manuel Romero Date: Mon, 27 Apr 2020 10:26:59 +0200 Subject: [PATCH] Create model card --- .../README.md | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 model_cards/mrm8488/distilbert-base-multi-cased-finetuned-typo-detection/README.md diff --git a/model_cards/mrm8488/distilbert-base-multi-cased-finetuned-typo-detection/README.md b/model_cards/mrm8488/distilbert-base-multi-cased-finetuned-typo-detection/README.md new file mode 100644 index 0000000000..5daec9d867 --- /dev/null +++ b/model_cards/mrm8488/distilbert-base-multi-cased-finetuned-typo-detection/README.md @@ -0,0 +1,53 @@ +--- +language: multilingual +thumbnail: +--- + +# DISTILBERT 🌎 + Typo Detection βœβŒβœβœ” + +[distilbert-base-multilingual-cased](https://huggingface.co/distilbert-base-multilingual-cased) fine-tuned on [GitHub Typo Corpus](https://github.com/mhagiwara/github-typo-corpus) for **typo detection** (using *NER* style) + +## Details of the downstream task (Typo detection as NER) + +- Dataset: [GitHub Typo Corpus](https://github.com/mhagiwara/github-typo-corpus) πŸ“š for 15 languages + +- [Fine-tune script on NER dataset provided by Huggingface](https://github.com/huggingface/transformers/blob/master/examples/run_ner.py) πŸ‹οΈβ€β™‚οΈ + +## Metrics on test set πŸ“‹ + +| Metric | # score | +| :-------: | :-------: | +| F1 | **93.51** | +| Precision | **96.08** | +| Recall | **91.06** | + +## Model in action πŸ”¨ + +Fast usage with **pipelines** πŸ§ͺ + +```python +from transformers import pipeline + +typo_checker = pipeline( + "ner", + model="mrm8488/distilbert-base-multi-cased-finetuned-typo-detection", + tokenizer="mrm8488/distilbert-base-multi-cased-finetuned-typo-detection" +) + +result = typo_checker("Adddd validation midelware") +result[1:-1] + +# Output: +[{'entity': 'ok', 'score': 0.7128152847290039, 'word': 'add'}, + {'entity': 'typo', 'score': 0.5388424396514893, 'word': '##dd'}, + {'entity': 'ok', 'score': 0.94792640209198, 'word': 'validation'}, + {'entity': 'typo', 'score': 0.5839331746101379, 'word': 'mid'}, + {'entity': 'ok', 'score': 0.5195121765136719, 'word': '##el'}, + {'entity': 'ok', 'score': 0.7222476601600647, 'word': '##ware'}] +``` +It worksπŸŽ‰! We typed wrong ```Add and middleware``` + + +> Created by [Manuel Romero/@mrm8488](https://twitter.com/mrm8488) + +> Made with in Spain