Remove trust_remote_code when loading Libri Dummy (#31748)

* [whisper integration] use parquet dataset for testing

* propagate to others

* more propagation

* last one
This commit is contained in:
Sanchit Gandhi
2024-07-23 14:54:38 +08:00
committed by GitHub
parent 3aefb4ec7f
commit f83c6f1d02
56 changed files with 110 additions and 254 deletions

View File

@@ -1835,9 +1835,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
return WhisperProcessor.from_pretrained("openai/whisper-base")
def _load_datasamples(self, num_samples):
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
@@ -2718,9 +2716,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
)
assistant_model.to(torch_device)
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
sample = dataset[0]["audio"]
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
@@ -2769,9 +2765,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
)
assistant_model.to(torch_device)
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
sample = dataset[0]["audio"]
input_features = processor(sample["array"], return_tensors="pt", sampling_rate=16_000).input_features
@@ -2812,7 +2806,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor(
@@ -2848,9 +2842,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
prompt = "Mr. Kilter, Brionno." # let's force Quilter -> Kilter, Brion -> Brionno
prompt_ids = processor.get_prompt_ids(prompt, return_tensors="pt").to(torch_device)
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]", trust_remote_code=True
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:-1]")
one_audio = np.concatenate([x["array"] for x in ds["audio"]], dtype=np.float32)
first_text = ds[0]["text"].lower()
@@ -2901,7 +2893,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor(
@@ -2983,7 +2975,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
input_features = processor(
@@ -3025,7 +3017,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
audios = []
audios.append(one_audio[110000:])
@@ -3079,7 +3071,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
model = model.to(torch_device)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean")
one_audio = np.concatenate([x["array"] for x in ds["validation"]["audio"]], dtype=np.float32)
audios = []
audios.append(one_audio[110000:])