diff --git a/docs/source/en/quicktour.mdx b/docs/source/en/quicktour.mdx
index c1b23995aa..7c7a39d568 100644
--- a/docs/source/en/quicktour.mdx
+++ b/docs/source/en/quicktour.mdx
@@ -134,8 +134,7 @@ Audio files are automatically loaded and resampled when calling the `"audio"` co
 Let's extract the raw waveform arrays of the first 4 samples and pass it as a list to the pipeline:
 
 ```py
->>> raw_audio_waveforms = [d["array"] for d in dataset[:4]["audio"]]
->>> speech_recognizer(raw_audio_waveforms)
+>>> speech_recognizer(dataset[:4]["audio"])
 [{'text': 'I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT'},
  {'text': "FONDERING HOW I'D SET UP A JOIN TO HET WITH MY WIFE AND WHERE THE AP MIGHT BE"}, 
  {'text': "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AND I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS"},
diff --git a/docs/source/es/quicktour.mdx b/docs/source/es/quicktour.mdx
index 16a2eacd9e..8410b6cf50 100644
--- a/docs/source/es/quicktour.mdx
+++ b/docs/source/es/quicktour.mdx
@@ -133,8 +133,7 @@ Los archivos de audio se cargan y remuestrean automáticamente cuando se llama a
 Extraigamos las matrices de forma de onda cruda de las primeras 4 muestras y pasémosla como una lista al pipeline:
 
 ```py
->>> raw_audio_waveforms = [d["array"] for d in dataset[:4]["audio"]]
->>> speech_recognizer(raw_audio_waveforms)
+>>> speech_recognizer(dataset[:4]["audio"])
 [{'text': 'I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT'},
  {'text': "FONDERING HOW I'D SET UP A JOIN TO HET WITH MY WIFE AND WHERE THE AP MIGHT BE"}, 
  {'text': "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AND I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS"},
diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py
index 79e4e8b757..e10dc1208e 100644
--- a/src/transformers/pipelines/automatic_speech_recognition.py
+++ b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -209,7 +209,18 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
         extra = {}
         if isinstance(inputs, dict):
             stride = inputs.pop("stride", None)
-            _inputs = inputs.pop("raw")
+            # Accepting `"array"` which is the key defined in `datasets` for
+            # better integration
+            if not ("sampling_rate" in inputs and ("raw" in inputs or "array" in inputs)):
+                raise ValueError(
+                    "When passing a dictionnary to AutomaticSpeechRecognitionPipeline, the dict needs to contain a "
+                    '"raw" key containing the numpy array representing the audio and a "sampling_rate" key, '
+                    "containing the sampling_rate associated with that array"
+                )
+
+            _inputs = inputs.pop("raw", None)
+            if _inputs is None:
+                _inputs = inputs.pop("array", None)
             in_sampling_rate = inputs.pop("sampling_rate")
             extra = inputs
             inputs = _inputs