Update Phi4 converter (#37594)
* fix converter * Update phi4_multimodal.md
This commit is contained in:
@@ -64,7 +64,7 @@ inputs = processor.apply_chat_template(
|
|||||||
tokenize=True,
|
tokenize=True,
|
||||||
return_dict=True,
|
return_dict=True,
|
||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
).to(device, torch.float16)
|
).to(device)
|
||||||
|
|
||||||
# Generate response
|
# Generate response
|
||||||
generate_ids = model.generate(
|
generate_ids = model.generate(
|
||||||
@@ -98,8 +98,7 @@ inputs = processor.apply_chat_template(
|
|||||||
tokenize=True,
|
tokenize=True,
|
||||||
return_dict=True,
|
return_dict=True,
|
||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
sample_rate=sample_rate,
|
).to(device)
|
||||||
).to(device, torch.float16)
|
|
||||||
|
|
||||||
generate_ids = model.generate(
|
generate_ids = model.generate(
|
||||||
**inputs,
|
**inputs,
|
||||||
|
|||||||
@@ -170,12 +170,25 @@ def convert_and_save_processor(input_dir: str, output_dir: str):
|
|||||||
"""Convert the processor."""
|
"""Convert the processor."""
|
||||||
original_processor = AutoProcessor.from_pretrained(input_dir, trust_remote_code=True)
|
original_processor = AutoProcessor.from_pretrained(input_dir, trust_remote_code=True)
|
||||||
original_processor.tokenizer.extra_special_tokens = {"image_token": "<|image|>", "audio_token": "<|audio|>"}
|
original_processor.tokenizer.extra_special_tokens = {"image_token": "<|image|>", "audio_token": "<|audio|>"}
|
||||||
|
# We need to add those temporarily to instantiate the processor
|
||||||
|
original_processor.tokenizer.image_token = "<|image|>"
|
||||||
|
original_processor.tokenizer.audio_token = "<|audio|>"
|
||||||
|
original_processor.tokenizer.image_token_id = 200010
|
||||||
|
original_processor.tokenizer.audio_token_id = 200011
|
||||||
|
|
||||||
converted_processor = Phi4MultimodalProcessor(
|
converted_processor = Phi4MultimodalProcessor(
|
||||||
tokenizer=original_processor.tokenizer,
|
tokenizer=original_processor.tokenizer,
|
||||||
image_processor=Phi4MultimodalImageProcessorFast(),
|
image_processor=Phi4MultimodalImageProcessorFast(),
|
||||||
audio_processor=Phi4MultimodalFeatureExtractor(),
|
audio_processor=Phi4MultimodalFeatureExtractor(),
|
||||||
chat_template=CHAT_TEMPLATE,
|
chat_template=CHAT_TEMPLATE,
|
||||||
)
|
)
|
||||||
|
# We remove them before saving to avoid polluting somehow
|
||||||
|
del converted_processor.tokenizer.image_token
|
||||||
|
del converted_processor.tokenizer.image_token_id
|
||||||
|
del converted_processor.tokenizer.audio_token
|
||||||
|
del converted_processor.tokenizer.audio_token_id
|
||||||
|
|
||||||
|
# Save the processor
|
||||||
converted_processor.save_pretrained(output_dir)
|
converted_processor.save_pretrained(output_dir)
|
||||||
|
|
||||||
# we need to rename a few tokens but tokenizers doesn't allow doing that programatically
|
# we need to rename a few tokens but tokenizers doesn't allow doing that programatically
|
||||||
|
|||||||
Reference in New Issue
Block a user