From 06b05d4575732489e6a1252578d37a2ebdb40273 Mon Sep 17 00:00:00 2001 From: Luc CAILLIAU <74506016+luccailliau@users.noreply.github.com> Date: Tue, 11 Apr 2023 15:06:54 +0200 Subject: [PATCH] Clarify stride option (#22684) * Clarify stride option * formatting --- src/transformers/pipelines/token_classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py index 5d0244328c..1a2a96b398 100644 --- a/src/transformers/pipelines/token_classification.py +++ b/src/transformers/pipelines/token_classification.py @@ -68,7 +68,8 @@ class AggregationStrategy(ExplicitEnum): same entity together in the predictions or not. stride (`int`, *optional*): If stride is provided, the pipeline is applied on all the text. The text is split into chunks of size - model_max_length. Works only with fast tokenizers and `aggregation_strategy` different from `NONE`. + model_max_length. Works only with fast tokenizers and `aggregation_strategy` different from `NONE`. The + value of this argument defines the number of overlapping tokens between chunks. aggregation_strategy (`str`, *optional*, defaults to `"none"`): The strategy to fuse (or not) tokens based on the model prediction.