diff --git a/examples/distillation/lm_seqs_dataset.py b/examples/distillation/lm_seqs_dataset.py
index 691e010cf2..a29e9efb28 100644
--- a/examples/distillation/lm_seqs_dataset.py
+++ b/examples/distillation/lm_seqs_dataset.py
@@ -42,6 +42,7 @@ class LmSeqsDataset(Dataset):
         self.check()
         self.remove_long_sequences()
         self.remove_empty_sequences()
+        self.remove_unknown_sequences()
         self.check()
         self.print_statistics()
 
@@ -109,6 +110,22 @@ class LmSeqsDataset(Dataset):
         new_size = len(self)
         logger.info(f"Remove {init_size - new_size} too short (<=11 tokens) sequences.")
 
+    def remove_unknown_sequences(self):
+        """
+        Remove sequences with a (too) high level of unknown tokens.
+        """
+        if 'unk_token' not in self.params.special_tok_ids:
+            return
+        else:
+            unk_token_id = self.params.special_tok_ids['unk_token']
+        init_size = len(self)
+        unk_occs = np.array([np.count_nonzero(a == unk_token_id) for a in self.token_ids])
+        indices = (unk_occs/self.lengths) < 0.5
+        self.token_ids = self.token_ids[indices]
+        self.lengths = self.lengths[indices]
+        new_size = len(self)
+        logger.info(f'Remove {init_size - new_size} sequences with a high level of unknown tokens (50%).')
+
     def print_statistics(self):
         """
         Print some statistics on the corpus. Only the master process.
diff --git a/examples/distillation/training_configs/distilbert-base-multilingual-cased.json b/examples/distillation/training_configs/distilbert-base-multilingual-cased.json
new file mode 100644
index 0000000000..f76e7febcb
--- /dev/null
+++ b/examples/distillation/training_configs/distilbert-base-multilingual-cased.json
@@ -0,0 +1,15 @@
+{
+	"activation": "gelu",
+	"attention_dropout": 0.1,
+	"dim": 768,
+	"dropout": 0.1,
+	"hidden_dim": 3072,
+	"initializer_range": 0.02,
+	"max_position_embeddings": 512,
+	"n_heads": 12,
+	"n_layers": 6,
+	"sinusoidal_pos_embds": true,
+	"tie_weights_": true,
+	"vocab_size": 119547
+  }
+  
\ No newline at end of file
diff --git a/examples/distillation/training_configs/distilroberta-base.json b/examples/distillation/training_configs/distilroberta-base.json
new file mode 100644
index 0000000000..2d90ef6380
--- /dev/null
+++ b/examples/distillation/training_configs/distilroberta-base.json
@@ -0,0 +1,14 @@
+{
+    "vocab_size": 50265,
+    "hidden_size": 768,
+    "num_hidden_layers": 6,
+    "num_attention_heads": 12,
+    "intermediate_size": 3072,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.1,
+    "attention_probs_dropout_prob": 0.1,
+    "max_position_embeddings": 514,
+    "type_vocab_size": 1,
+    "initializer_range": 0.02,
+    "layer_norm_eps": 0.00001
+}
\ No newline at end of file