Add MusicGen Melody (#28819)
* first modeling code * make repository * still WIP * update model * add tests * add latest change * clean docstrings and copied from * update docstrings md and readme * correct chroma function * correct copied from and remove unreleated test * add doc to toctree * correct imports * add convert script to notdoctested * Add suggestion from Sanchit Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> * correct get_uncoditional_inputs docstrings * modify README according to SANCHIT feedback * add chroma to audio utils * clean librosa and torchaudio hard dependencies * fix FE * refactor audio decoder -> audio encoder for consistency with previous musicgen * refactor conditional -> encoder * modify sampling rate logics * modify license at the beginning * refactor all_self_attns->all_attentions * remove ignore copy from causallm generate * add copied from for from_sub_models * fix make copies * add warning if audio is truncated * add copied from where relevant * remove artefact * fix convert script * fix torchaudio and FE * modify chroma method according to feedback-> better naming * refactor input_values->input_features * refactor input_values->input_features and fix import fe * add input_features to docstrigs * correct inputs_embeds logics * remove dtype conversion * refactor _prepare_conditional_hidden_states_kwargs_for_generation ->_prepare_encoder_hidden_states_kwargs_for_generation * change warning for chroma length * Update src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> * change way to save wav, using soundfile * correct docs and change to soundfile * fix import * fix init proj layers * remove line breaks from md * fix issue with docstrings * add FE suggestions * improve is in logics and remove useless imports * remove custom from_pretrained * simplify docstring code * add suggestions for modeling tests * make style * update converting script with sanity check * remove encoder attention mask from conditional generation * replace musicgen melody checkpoints with official orga * rename ylacombe->facebook in checkpoints * fix copies * remove unecessary warning * add shape in code docstrings * add files to slow doc tests * fix md bug and add md to not_tested * make fix-copies * fix hidden states test and batching --------- Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
This commit is contained in:
@@ -20,6 +20,7 @@ import pytest
|
||||
|
||||
from transformers.audio_utils import (
|
||||
amplitude_to_db,
|
||||
chroma_filter_bank,
|
||||
hertz_to_mel,
|
||||
mel_filter_bank,
|
||||
mel_to_hertz,
|
||||
@@ -27,6 +28,11 @@ from transformers.audio_utils import (
|
||||
spectrogram,
|
||||
window_function,
|
||||
)
|
||||
from transformers.testing_utils import is_librosa_available, require_librosa
|
||||
|
||||
|
||||
if is_librosa_available():
|
||||
from librosa.filters import chroma
|
||||
|
||||
|
||||
class AudioUtilsFunctionTester(unittest.TestCase):
|
||||
@@ -755,3 +761,57 @@ class AudioUtilsFunctionTester(unittest.TestCase):
|
||||
amplitude_to_db(spectrogram, min_value=0.0)
|
||||
with pytest.raises(ValueError):
|
||||
amplitude_to_db(spectrogram, db_range=-80)
|
||||
|
||||
@require_librosa
|
||||
def test_chroma_equivalence(self):
|
||||
num_frequency_bins = 25
|
||||
num_chroma = 6
|
||||
sampling_rate = 24000
|
||||
|
||||
# test default parameters
|
||||
original_chroma = chroma(sr=sampling_rate, n_chroma=num_chroma, n_fft=num_frequency_bins)
|
||||
utils_chroma = chroma_filter_bank(
|
||||
num_frequency_bins=num_frequency_bins, num_chroma=num_chroma, sampling_rate=sampling_rate
|
||||
)
|
||||
|
||||
self.assertTrue(np.allclose(original_chroma, utils_chroma))
|
||||
|
||||
# test no weighting_parameters
|
||||
original_chroma = chroma(sr=sampling_rate, n_chroma=num_chroma, n_fft=num_frequency_bins, octwidth=None)
|
||||
utils_chroma = chroma_filter_bank(
|
||||
num_frequency_bins=num_frequency_bins,
|
||||
num_chroma=num_chroma,
|
||||
sampling_rate=sampling_rate,
|
||||
weighting_parameters=None,
|
||||
)
|
||||
|
||||
self.assertTrue(np.allclose(original_chroma, utils_chroma))
|
||||
|
||||
# test with L1 norm
|
||||
original_chroma = chroma(sr=sampling_rate, n_chroma=num_chroma, n_fft=num_frequency_bins, norm=1.0)
|
||||
utils_chroma = chroma_filter_bank(
|
||||
num_frequency_bins=num_frequency_bins, num_chroma=num_chroma, sampling_rate=sampling_rate, power=1.0
|
||||
)
|
||||
|
||||
self.assertTrue(np.allclose(original_chroma, utils_chroma))
|
||||
|
||||
# test starting at 'A' chroma, power = None, tuning = 0, different weighting_parameters
|
||||
original_chroma = chroma(
|
||||
sr=sampling_rate,
|
||||
n_chroma=num_chroma,
|
||||
n_fft=num_frequency_bins,
|
||||
norm=None,
|
||||
base_c=None,
|
||||
octwidth=1.0,
|
||||
ctroct=4.0,
|
||||
)
|
||||
utils_chroma = chroma_filter_bank(
|
||||
num_frequency_bins=num_frequency_bins,
|
||||
num_chroma=num_chroma,
|
||||
sampling_rate=sampling_rate,
|
||||
power=None,
|
||||
start_at_c_chroma=False,
|
||||
weighting_parameters=(4.0, 1.0),
|
||||
)
|
||||
|
||||
self.assertTrue(np.allclose(original_chroma, utils_chroma))
|
||||
|
||||
Reference in New Issue
Block a user