[CSM] infer codec model with no_grad + audio eos label (#38215)

* infer codec model with no_grad

* codec_model eval

* training labels: add audio eos token
This commit is contained in:
eustlb
2025-05-27 16:10:17 +02:00
committed by GitHub
parent 10ae443ec0
commit 3142bd8592
4 changed files with 40 additions and 31 deletions

View File

@@ -315,6 +315,7 @@ device = "cuda"
processor = AutoProcessor.from_pretrained(model_id)
model = CsmForConditionalGeneration.from_pretrained(model_id, device_map=device)
model.train()
model.codec_model.eval()
ds = load_dataset("hf-internal-testing/dailytalk-dummy", split="train")
# ensure the audio is 24kHz