Incorrect setting for num_beams in translation and summarization examples (#27519)
* Remove the torch main_process_first context manager from TF examples * Correctly set num_beams=1 in our examples, and add a guard in GenerationConfig.validate() * Update src/transformers/generation/configuration_utils.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -312,7 +312,7 @@ class DataTrainingArguments:
|
|||||||
default=False, metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."}
|
default=False, metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."}
|
||||||
)
|
)
|
||||||
num_beams: Optional[int] = field(
|
num_beams: Optional[int] = field(
|
||||||
default=None,
|
default=1,
|
||||||
metadata={
|
metadata={
|
||||||
"help": (
|
"help": (
|
||||||
"Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
|
"Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
|
||||||
|
|||||||
@@ -249,7 +249,7 @@ class DataTrainingArguments:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
num_beams: Optional[int] = field(
|
num_beams: Optional[int] = field(
|
||||||
default=None,
|
default=1,
|
||||||
metadata={
|
metadata={
|
||||||
"help": (
|
"help": (
|
||||||
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
||||||
|
|||||||
@@ -217,7 +217,7 @@ class DataTrainingArguments:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
num_beams: Optional[int] = field(
|
num_beams: Optional[int] = field(
|
||||||
default=None,
|
default=1,
|
||||||
metadata={
|
metadata={
|
||||||
"help": (
|
"help": (
|
||||||
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
||||||
|
|||||||
@@ -415,7 +415,6 @@ def main():
|
|||||||
if data_args.max_train_samples is not None:
|
if data_args.max_train_samples is not None:
|
||||||
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
||||||
train_dataset = train_dataset.select(range(max_train_samples))
|
train_dataset = train_dataset.select(range(max_train_samples))
|
||||||
with training_args.main_process_first(desc="train dataset map pre-processing"):
|
|
||||||
train_dataset = train_dataset.map(
|
train_dataset = train_dataset.map(
|
||||||
preprocess_function,
|
preprocess_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
@@ -430,7 +429,6 @@ def main():
|
|||||||
if data_args.max_eval_samples is not None:
|
if data_args.max_eval_samples is not None:
|
||||||
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
||||||
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
||||||
with training_args.main_process_first(desc="validation dataset map pre-processing"):
|
|
||||||
eval_dataset = eval_dataset.map(
|
eval_dataset = eval_dataset.map(
|
||||||
preprocess_function,
|
preprocess_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
|
|||||||
@@ -238,7 +238,7 @@ class DataTrainingArguments:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
num_beams: Optional[int] = field(
|
num_beams: Optional[int] = field(
|
||||||
default=None,
|
default=1,
|
||||||
metadata={
|
metadata={
|
||||||
"help": (
|
"help": (
|
||||||
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
||||||
@@ -488,7 +488,6 @@ def main():
|
|||||||
if data_args.max_train_samples is not None:
|
if data_args.max_train_samples is not None:
|
||||||
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
||||||
train_dataset = train_dataset.select(range(max_train_samples))
|
train_dataset = train_dataset.select(range(max_train_samples))
|
||||||
with training_args.main_process_first(desc="train dataset map pre-processing"):
|
|
||||||
train_dataset = train_dataset.map(
|
train_dataset = train_dataset.map(
|
||||||
preprocess_function,
|
preprocess_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
@@ -508,7 +507,6 @@ def main():
|
|||||||
if data_args.max_eval_samples is not None:
|
if data_args.max_eval_samples is not None:
|
||||||
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
||||||
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
||||||
with training_args.main_process_first(desc="validation dataset map pre-processing"):
|
|
||||||
eval_dataset = eval_dataset.map(
|
eval_dataset = eval_dataset.map(
|
||||||
preprocess_function,
|
preprocess_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
|
|||||||
@@ -226,7 +226,7 @@ class DataTrainingArguments:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
num_beams: Optional[int] = field(
|
num_beams: Optional[int] = field(
|
||||||
default=None,
|
default=1,
|
||||||
metadata={
|
metadata={
|
||||||
"help": (
|
"help": (
|
||||||
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
|
||||||
@@ -454,7 +454,6 @@ def main():
|
|||||||
if data_args.max_train_samples is not None:
|
if data_args.max_train_samples is not None:
|
||||||
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
||||||
train_dataset = train_dataset.select(range(max_train_samples))
|
train_dataset = train_dataset.select(range(max_train_samples))
|
||||||
with training_args.main_process_first(desc="train dataset map pre-processing"):
|
|
||||||
train_dataset = train_dataset.map(
|
train_dataset = train_dataset.map(
|
||||||
preprocess_function,
|
preprocess_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
@@ -474,7 +473,6 @@ def main():
|
|||||||
if data_args.max_eval_samples is not None:
|
if data_args.max_eval_samples is not None:
|
||||||
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
||||||
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
||||||
with training_args.main_process_first(desc="validation dataset map pre-processing"):
|
|
||||||
eval_dataset = eval_dataset.map(
|
eval_dataset = eval_dataset.map(
|
||||||
preprocess_function,
|
preprocess_function,
|
||||||
batched=True,
|
batched=True,
|
||||||
|
|||||||
@@ -409,6 +409,10 @@ class GenerationConfig(PushToHubMixin):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 2. detect beam-only parameterization when not in beam mode
|
# 2. detect beam-only parameterization when not in beam mode
|
||||||
|
if self.num_beams is None:
|
||||||
|
logging.warning("`num_beams` is set to None - defaulting to 1.", UserWarning)
|
||||||
|
self.num_beams = 1
|
||||||
|
|
||||||
if self.num_beams == 1:
|
if self.num_beams == 1:
|
||||||
single_beam_wrong_parameter_msg = (
|
single_beam_wrong_parameter_msg = (
|
||||||
"`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "
|
"`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "
|
||||||
|
|||||||
Reference in New Issue
Block a user