From fd85734e0efe281887020f51d26ae6cd1df493e9 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Wed, 28 Jul 2021 09:38:12 -0400
Subject: [PATCH] Add option to set max_len in run_ner (#12929)

---
 examples/pytorch/token-classification/run_ner.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py
index c9cf2b0890..824f98574e 100755
--- a/examples/pytorch/token-classification/run_ner.py
+++ b/examples/pytorch/token-classification/run_ner.py
@@ -123,6 +123,13 @@ class DataTrainingArguments:
         default=None,
         metadata={"help": "The number of processes to use for the preprocessing."},
     )
+    max_seq_length: int = field(
+        default=None,
+        metadata={
+            "help": "The maximum total input sequence length after tokenization. If set, sequences longer "
+            "than this will be truncated, sequences shorter will be padded."
+        },
+    )
     pad_to_max_length: bool = field(
         default=False,
         metadata={
@@ -358,6 +365,7 @@ def main():
             examples[text_column_name],
             padding=padding,
             truncation=True,
+            max_length=data_args.max_seq_length,
             # We use this argument because the texts in our dataset are lists of words (with a label for each word).
             is_split_into_words=True,
         )