From beaf60e589a38a7e5963c106069ed3b583a9b6dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sava=C5=9F=20Y=C4=B1ld=C4=B1r=C4=B1m?= <savasy@gmail.com>
Date: Tue, 7 Jul 2020 13:43:09 +0300
Subject: [PATCH] bert-turkish-text-classification model card (#5493)

---
 .../README.md                                 | 102 ++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 model_cards/savasy/bert-turkish-text-classification/README.md

diff --git a/model_cards/savasy/bert-turkish-text-classification/README.md b/model_cards/savasy/bert-turkish-text-classification/README.md
new file mode 100644
index 0000000000..a22c2ccf9e
--- /dev/null
+++ b/model_cards/savasy/bert-turkish-text-classification/README.md
@@ -0,0 +1,102 @@
+---
+language: turkish
+---
+
+# Turkish Text Classification
+
+This model is a fine-tune model of https://github.com/stefan-it/turkish-bert by using text classification data where there are 7 categories as follows
+
+```
+code_to_label={
+ 'LABEL_0': 'dunya ',
+ 'LABEL_1': 'ekonomi ',
+ 'LABEL_2': 'kultur ',
+ 'LABEL_3': 'saglik ',
+ 'LABEL_4': 'siyaset ',
+ 'LABEL_5': 'spor ',
+ 'LABEL_6': 'teknoloji '}
+ 
+ ```
+
+
+## Data 
+The following Turkish benchmark dataset is used for fine-tuning
+
+https://www.kaggle.com/savasy/ttc4900
+
+## Quick Start
+
+Bewgin with installing transformers as follows
+> pip install transformers
+
+```
+# Code:
+# import libraries
+from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer, AutoModelForSequenceClassification
+tokenizer= AutoTokenizer.from_pretrained("savasy/bert-turkish-text-classification")
+
+# build and load model, it take time depending on your internet connection
+model= AutoModelForSequenceClassification.from_pretrained("savasy/bert-turkish-text-classification")
+
+# make pipeline
+nlp=pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+
+# apply model
+nlp("bla bla")
+# [{'label': 'LABEL_2', 'score': 0.4753005802631378}]
+
+code_to_label={
+ 'LABEL_0': 'dunya ',
+ 'LABEL_1': 'ekonomi ',
+ 'LABEL_2': 'kultur ',
+ 'LABEL_3': 'saglik ',
+ 'LABEL_4': 'siyaset ',
+ 'LABEL_5': 'spor ',
+ 'LABEL_6': 'teknoloji '}
+ 
+code_to_label[nlp("bla bla")[0]['label']]
+# > 'kultur '
+```
+
+## How the model was trained
+
+```
+
+## loading data for Turkish text classification
+import pandas as pd
+# https://www.kaggle.com/savasy/ttc4900
+df=pd.read_csv("7allV03.csv")
+df.columns=["labels","text"]
+df.labels=pd.Categorical(df.labels)
+
+traind_df=...
+eval_df=...
+
+# model
+from simpletransformers.classification import ClassificationModel
+import torch,sklearn
+
+model_args = {
+    "use_early_stopping": True,
+    "early_stopping_delta": 0.01,
+    "early_stopping_metric": "mcc",
+    "early_stopping_metric_minimize": False,
+    "early_stopping_patience": 5,
+    "evaluate_during_training_steps": 1000,
+    "fp16": False,
+    "num_train_epochs":3
+}
+
+model = ClassificationModel(
+    "bert", 
+    "dbmdz/bert-base-turkish-cased",
+     use_cuda=cuda_available, 
+     args=model_args, 
+     num_labels=7
+)
+model.train_model(train_df, acc=sklearn.metrics.accuracy_score)
+```
+For other training models please check https://simpletransformers.ai/
+
+
+For the detailed usage of Turkish Text Classification please check [python notebook](https://github.com/savasy/TurkishTextClassification/blob/master/Bert_base_Text_Classification_for_Turkish.ipynb)