Fix typo (#25966)
* Update feature_extraction_clap.py * changed all lenght to length
This commit is contained in:
@@ -31,7 +31,7 @@ class Seq2SeqTrainingArguments(TrainingArguments):
|
||||
label_smoothing (:obj:`float`, `optional`, defaults to 0):
|
||||
The label smoothing epsilon to apply (if not zero).
|
||||
sortish_sampler (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Whether to SortishSamler or not. It sorts the inputs according to lenghts in-order to minimizing the padding size.
|
||||
Whether to SortishSamler or not. It sorts the inputs according to lengths in-order to minimizing the padding size.
|
||||
predict_with_generate (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Whether to use generate to calculate generative metrics (ROUGE, BLEU).
|
||||
"""
|
||||
|
||||
@@ -311,7 +311,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lenghts and need
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
||||
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
||||
|
||||
@@ -307,7 +307,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lenghts and need
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
||||
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
||||
|
||||
@@ -292,7 +292,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lenghts and need
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
||||
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
||||
|
||||
@@ -284,7 +284,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lenghts and need
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = []
|
||||
label_features = []
|
||||
|
||||
@@ -254,7 +254,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lenghts and need
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
||||
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
||||
|
||||
@@ -173,7 +173,7 @@ class DataCollatorCTCWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lenghts and need
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
||||
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
||||
|
||||
@@ -335,7 +335,7 @@ class SpeechDataCollatorWithPadding:
|
||||
pad_to_multiple_of_labels: Optional[int] = None
|
||||
|
||||
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
||||
# split inputs and labels since they have to be of different lenghts and need
|
||||
# split inputs and labels since they have to be of different lengths and need
|
||||
# different padding methods
|
||||
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class ClapFeatureExtractor(SequenceFeatureExtractor):
|
||||
Length of the overlaping windows for the STFT used to obtain the Mel Spectrogram. The audio will be split
|
||||
in smaller `frames` with a step of `hop_length` between each frame.
|
||||
max_length_s (`int`, defaults to 10):
|
||||
The maximum input lenght of the model in seconds. This is used to pad the audio.
|
||||
The maximum input length of the model in seconds. This is used to pad the audio.
|
||||
fft_window_size (`int`, defaults to 1024):
|
||||
Size of the window (in samples) on which the Fourier transform is applied. This controls the frequency
|
||||
resolution of the spectrogram. 400 means that the fourrier transform is computed on windows of 400 samples.
|
||||
|
||||
@@ -283,8 +283,8 @@ class Data2VecAudioModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -252,8 +252,8 @@ class HubertModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -222,8 +222,8 @@ class SEWModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -243,8 +243,8 @@ class SEWDModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -340,7 +340,7 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest
|
||||
feat_dict["return_attention_mask"] = True
|
||||
feat_extract = self.feature_extraction_class(**feat_dict)
|
||||
speech_inputs = self.feat_extract_tester.prepare_inputs_for_target()
|
||||
input_lenghts = [len(x) for x in speech_inputs]
|
||||
input_lengths = [len(x) for x in speech_inputs]
|
||||
input_name = feat_extract.model_input_names[0]
|
||||
|
||||
processed = BatchFeature({input_name: speech_inputs})
|
||||
@@ -350,18 +350,18 @@ class SpeechT5FeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest
|
||||
processed = feat_extract.pad(processed, padding="longest", return_tensors="np")
|
||||
self.assertIn("attention_mask", processed)
|
||||
self.assertListEqual(list(processed.attention_mask.shape), list(processed[input_name].shape[:2]))
|
||||
self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lenghts)
|
||||
self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lengths)
|
||||
|
||||
def test_attention_mask_with_truncation_target(self):
|
||||
feat_dict = self.feat_extract_dict
|
||||
feat_dict["return_attention_mask"] = True
|
||||
feat_extract = self.feature_extraction_class(**feat_dict)
|
||||
speech_inputs = self.feat_extract_tester.prepare_inputs_for_target()
|
||||
input_lenghts = [len(x) for x in speech_inputs]
|
||||
input_lengths = [len(x) for x in speech_inputs]
|
||||
input_name = feat_extract.model_input_names[0]
|
||||
|
||||
processed = BatchFeature({input_name: speech_inputs})
|
||||
max_length = min(input_lenghts)
|
||||
max_length = min(input_lengths)
|
||||
|
||||
feat_extract.feature_size = feat_extract.num_mel_bins # hack!
|
||||
|
||||
|
||||
@@ -245,8 +245,8 @@ class UniSpeechModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -265,8 +265,8 @@ class UniSpeechSatModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -404,8 +404,8 @@ class Wav2Vec2ModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -314,8 +314,8 @@ class Wav2Vec2ConformerModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -256,8 +256,8 @@ class WavLMModelTester:
|
||||
input_values[i, input_lengths[i] :] = 0.0
|
||||
|
||||
if max_length_labels[i] < labels.shape[-1]:
|
||||
# it's important that we make sure that target lenghts are at least
|
||||
# one shorter than logit lenghts to prevent -inf
|
||||
# it's important that we make sure that target lengths are at least
|
||||
# one shorter than logit lengths to prevent -inf
|
||||
labels[i, max_length_labels[i] - 1 :] = -100
|
||||
|
||||
loss = model(input_values, labels=labels).loss
|
||||
|
||||
@@ -391,7 +391,7 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin):
|
||||
feat_dict["return_attention_mask"] = True
|
||||
feat_extract = self.feature_extraction_class(**feat_dict)
|
||||
speech_inputs = self.feat_extract_tester.prepare_inputs_for_common()
|
||||
input_lenghts = [len(x) for x in speech_inputs]
|
||||
input_lengths = [len(x) for x in speech_inputs]
|
||||
input_name = feat_extract.model_input_names[0]
|
||||
|
||||
processed = BatchFeature({input_name: speech_inputs})
|
||||
@@ -399,18 +399,18 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin):
|
||||
processed = feat_extract.pad(processed, padding="longest", return_tensors="np")
|
||||
self.assertIn("attention_mask", processed)
|
||||
self.assertListEqual(list(processed.attention_mask.shape), list(processed[input_name].shape[:2]))
|
||||
self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lenghts)
|
||||
self.assertListEqual(processed.attention_mask.sum(-1).tolist(), input_lengths)
|
||||
|
||||
def test_attention_mask_with_truncation(self):
|
||||
feat_dict = self.feat_extract_dict
|
||||
feat_dict["return_attention_mask"] = True
|
||||
feat_extract = self.feature_extraction_class(**feat_dict)
|
||||
speech_inputs = self.feat_extract_tester.prepare_inputs_for_common()
|
||||
input_lenghts = [len(x) for x in speech_inputs]
|
||||
input_lengths = [len(x) for x in speech_inputs]
|
||||
input_name = feat_extract.model_input_names[0]
|
||||
|
||||
processed = BatchFeature({input_name: speech_inputs})
|
||||
max_length = min(input_lenghts)
|
||||
max_length = min(input_lengths)
|
||||
|
||||
processed_pad = feat_extract.pad(
|
||||
processed, padding="max_length", max_length=max_length, truncation=True, return_tensors="np"
|
||||
|
||||
Reference in New Issue
Block a user