Update Phi4 converter (#37594)

* fix converter

* Update phi4_multimodal.md
This commit is contained in:
Cyril Vallez
2025-04-17 23:08:24 +02:00
committed by GitHub
parent 40cba20e87
commit 4acf692ace
2 changed files with 15 additions and 3 deletions

View File

@@ -64,7 +64,7 @@ inputs = processor.apply_chat_template(
tokenize=True, tokenize=True,
return_dict=True, return_dict=True,
return_tensors="pt", return_tensors="pt",
).to(device, torch.float16) ).to(device)
# Generate response # Generate response
generate_ids = model.generate( generate_ids = model.generate(
@@ -98,8 +98,7 @@ inputs = processor.apply_chat_template(
tokenize=True, tokenize=True,
return_dict=True, return_dict=True,
return_tensors="pt", return_tensors="pt",
sample_rate=sample_rate, ).to(device)
).to(device, torch.float16)
generate_ids = model.generate( generate_ids = model.generate(
**inputs, **inputs,

View File

@@ -170,12 +170,25 @@ def convert_and_save_processor(input_dir: str, output_dir: str):
"""Convert the processor.""" """Convert the processor."""
original_processor = AutoProcessor.from_pretrained(input_dir, trust_remote_code=True) original_processor = AutoProcessor.from_pretrained(input_dir, trust_remote_code=True)
original_processor.tokenizer.extra_special_tokens = {"image_token": "<|image|>", "audio_token": "<|audio|>"} original_processor.tokenizer.extra_special_tokens = {"image_token": "<|image|>", "audio_token": "<|audio|>"}
# We need to add those temporarily to instantiate the processor
original_processor.tokenizer.image_token = "<|image|>"
original_processor.tokenizer.audio_token = "<|audio|>"
original_processor.tokenizer.image_token_id = 200010
original_processor.tokenizer.audio_token_id = 200011
converted_processor = Phi4MultimodalProcessor( converted_processor = Phi4MultimodalProcessor(
tokenizer=original_processor.tokenizer, tokenizer=original_processor.tokenizer,
image_processor=Phi4MultimodalImageProcessorFast(), image_processor=Phi4MultimodalImageProcessorFast(),
audio_processor=Phi4MultimodalFeatureExtractor(), audio_processor=Phi4MultimodalFeatureExtractor(),
chat_template=CHAT_TEMPLATE, chat_template=CHAT_TEMPLATE,
) )
# We remove them before saving to avoid polluting somehow
del converted_processor.tokenizer.image_token
del converted_processor.tokenizer.image_token_id
del converted_processor.tokenizer.audio_token
del converted_processor.tokenizer.audio_token_id
# Save the processor
converted_processor.save_pretrained(output_dir) converted_processor.save_pretrained(output_dir)
# we need to rename a few tokens but tokenizers doesn't allow doing that programatically # we need to rename a few tokens but tokenizers doesn't allow doing that programatically