Update Phi4 converter (#37594)
* fix converter * Update phi4_multimodal.md
This commit is contained in:
@@ -64,7 +64,7 @@ inputs = processor.apply_chat_template(
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
return_tensors="pt",
|
||||
).to(device, torch.float16)
|
||||
).to(device)
|
||||
|
||||
# Generate response
|
||||
generate_ids = model.generate(
|
||||
@@ -98,8 +98,7 @@ inputs = processor.apply_chat_template(
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
return_tensors="pt",
|
||||
sample_rate=sample_rate,
|
||||
).to(device, torch.float16)
|
||||
).to(device)
|
||||
|
||||
generate_ids = model.generate(
|
||||
**inputs,
|
||||
|
||||
@@ -170,12 +170,25 @@ def convert_and_save_processor(input_dir: str, output_dir: str):
|
||||
"""Convert the processor."""
|
||||
original_processor = AutoProcessor.from_pretrained(input_dir, trust_remote_code=True)
|
||||
original_processor.tokenizer.extra_special_tokens = {"image_token": "<|image|>", "audio_token": "<|audio|>"}
|
||||
# We need to add those temporarily to instantiate the processor
|
||||
original_processor.tokenizer.image_token = "<|image|>"
|
||||
original_processor.tokenizer.audio_token = "<|audio|>"
|
||||
original_processor.tokenizer.image_token_id = 200010
|
||||
original_processor.tokenizer.audio_token_id = 200011
|
||||
|
||||
converted_processor = Phi4MultimodalProcessor(
|
||||
tokenizer=original_processor.tokenizer,
|
||||
image_processor=Phi4MultimodalImageProcessorFast(),
|
||||
audio_processor=Phi4MultimodalFeatureExtractor(),
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
)
|
||||
# We remove them before saving to avoid polluting somehow
|
||||
del converted_processor.tokenizer.image_token
|
||||
del converted_processor.tokenizer.image_token_id
|
||||
del converted_processor.tokenizer.audio_token
|
||||
del converted_processor.tokenizer.audio_token_id
|
||||
|
||||
# Save the processor
|
||||
converted_processor.save_pretrained(output_dir)
|
||||
|
||||
# we need to rename a few tokens but tokenizers doesn't allow doing that programatically
|
||||
|
||||
Reference in New Issue
Block a user