Remove trust_remote_code when loading Libri Dummy (#31748)
* [whisper integration] use parquet dataset for testing * propagate to others * more propagation * last one
This commit is contained in:
@@ -1835,9 +1835,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
return WhisperProcessor.from_pretrained("openai/whisper-base")
|
||||
|
||||
def _load_datasamples(self, num_samples):
|
||||
ds = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
|
||||
|
||||
@@ -2718,9 +2716,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
assistant_model.to(torch_device)
|
||||
|
||||
dataset = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
|
||||
)
|
||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
sample = dataset[0]["audio"]
|
||||
|
||||
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
|
||||
@@ -2769,9 +2765,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
assistant_model.to(torch_device)
|
||||
|
||||
dataset = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
|
||||
)
|
||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
sample = dataset[0]["audio"]
|
||||
|
||||
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
|
||||
@@ -2812,7 +2806,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
|
||||
model = model.to(torch_device)
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
|
||||
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
|
||||
|
||||
input_features = processor(
|
||||
@@ -2848,9 +2842,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
prompt = "Mr. Kilter, Brionno." # let's force Quilter -> Kilter, Brion -> Brionno
|
||||
prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt").to(torch_device)
|
||||
|
||||
ds = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]", trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]")
|
||||
one_audio = np.concatenate([x["array"] for x in ds["audio"]], dtype=np.float32)
|
||||
|
||||
first_text = ds[0]["text"].lower()
|
||||
@@ -2901,7 +2893,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
|
||||
model = model.to(torch_device)
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
|
||||
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
|
||||
|
||||
input_features = processor(
|
||||
@@ -2983,7 +2975,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
|
||||
model = model.to(torch_device)
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
|
||||
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
|
||||
|
||||
input_features = processor(
|
||||
@@ -3025,7 +3017,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
|
||||
model = model.to(torch_device)
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
|
||||
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
|
||||
audios = []
|
||||
audios.append(one_audio[110000:])
|
||||
@@ -3079,7 +3071,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
|
||||
model = model.to(torch_device)
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
|
||||
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
|
||||
audios = []
|
||||
audios.append(one_audio[110000:])
|
||||
|
||||
Reference in New Issue
Block a user