Remove script datasets in tests (#38940)
* remove trust_remote_code
* again
* Revert "Skip some tests for now (#38931)"
This reverts commit 31d30b7224.
* again
* style
* again
* again
* style
* fix integration test
* fix tests
* style
* fix
* fix
* fix the last ones
* style
* last one
* fix last
* fix
---------
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -179,7 +179,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
|
||||
model = "superb/wav2vec2-base-superb-ks"
|
||||
|
||||
audio_classifier = pipeline("audio-classification", model=model)
|
||||
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True)
|
||||
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
|
||||
|
||||
audio = np.array(dataset[3]["speech"], dtype=np.float32)
|
||||
output = audio_classifier(audio, top_k=4)
|
||||
|
||||
@@ -265,9 +265,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
@require_torch
|
||||
@require_pyctcdecode
|
||||
def test_large_model_pt_with_lm(self):
|
||||
dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True)
|
||||
third_item = next(iter(dataset["test"].skip(3)))
|
||||
filename = third_item["file"]
|
||||
filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset")
|
||||
|
||||
speech_recognizer = pipeline(
|
||||
task="automatic-speech-recognition",
|
||||
@@ -388,7 +386,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
chunk_length_s=8,
|
||||
stride_length_s=1,
|
||||
)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||
sample = next(iter(data))
|
||||
|
||||
res = pipe(sample["audio"]["array"])
|
||||
@@ -434,7 +432,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
stride_length_s=1,
|
||||
return_language=True,
|
||||
)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||
sample = next(iter(data))
|
||||
|
||||
res = pipe(sample["audio"]["array"])
|
||||
@@ -489,7 +487,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
task="automatic-speech-recognition",
|
||||
model="openai/whisper-tiny.en",
|
||||
)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||
samples = [next(iter(data)) for _ in range(8)]
|
||||
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
|
||||
|
||||
@@ -1125,9 +1123,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_speculative_decoding_whisper_non_distil(self):
|
||||
# Load data:
|
||||
dataset = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
||||
)
|
||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
||||
sample = dataset[0]["audio"]
|
||||
|
||||
# Load model:
|
||||
@@ -1169,9 +1165,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_speculative_decoding_whisper_distil(self):
|
||||
# Load data:
|
||||
dataset = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
||||
)
|
||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
||||
sample = dataset[0]["audio"]
|
||||
|
||||
# Load model:
|
||||
|
||||
@@ -601,9 +601,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
||||
|
||||
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
|
||||
|
||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
file = image[0]["file"]
|
||||
outputs = image_segmenter(file, threshold=threshold)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
image = ds[0]["image"].convert("RGB")
|
||||
outputs = image_segmenter(image, threshold=threshold)
|
||||
|
||||
# Shortening by hashing
|
||||
for o in outputs:
|
||||
@@ -655,9 +655,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
||||
def test_oneformer(self):
|
||||
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
|
||||
|
||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
file = image[0]["file"]
|
||||
outputs = image_segmenter(file, threshold=0.99)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
image = ds[0]["image"].convert("RGB")
|
||||
outputs = image_segmenter(image, threshold=0.99)
|
||||
# Shortening by hashing
|
||||
for o in outputs:
|
||||
o["mask"] = mask_to_test_readable(o["mask"])
|
||||
@@ -679,7 +679,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
# Different task
|
||||
outputs = image_segmenter(file, threshold=0.99, subtask="instance")
|
||||
outputs = image_segmenter(image, threshold=0.99, subtask="instance")
|
||||
# Shortening by hashing
|
||||
for o in outputs:
|
||||
o["mask"] = mask_to_test_readable(o["mask"])
|
||||
@@ -701,7 +701,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
# Different task
|
||||
outputs = image_segmenter(file, subtask="semantic")
|
||||
outputs = image_segmenter(image, subtask="semantic")
|
||||
# Shortening by hashing
|
||||
for o in outputs:
|
||||
o["mask"] = mask_to_test_readable(o["mask"])
|
||||
|
||||
Reference in New Issue
Block a user