Open
Description
deepspeed run.py
--deepspeed ds_zero2_no_offload.json
--model_name_or_path /home/wangfeiyu/feiyu/train_llava/show_model/model001
--train_type use_lora
--data_path /home/wangfeiyu/feiyu/LLaVA-CC3M-Pretrain-595K
--remove_unused_columns false
--bf16 true
--fp16 false
--dataloader_pin_memory True
--dataloader_num_workers 5
--dataloader_persistent_workers True
--output_dir output_model_user_lora_0705
--num_train_epochs 1
--per_device_train_batch_size 1
--per_device_eval_batch_size 1
--gradient_accumulation_steps 8
--evaluation_strategy "no"
--save_strategy "epoch"
--save_total_limit 3
--report_to "tensorboard"
--learning_rate 4e-4
--logging_steps 10
训练完以后,想要去推理发生报错

raw_model_name_or_path = "/home/wangfeiyu/feiyu/train_llava/show_model/model001"
peft_model_name_or_path = "/home/wangfeiyu/feiyu/train_llava/output_model_user_lora_0705"
model = LlavaForConditionalGeneration.from_pretrained(raw_model_name_or_path,device_map="cuda:1",
torch_dtype=torch.bfloat16)
model = PeftModel.from_pretrained(model, peft_model_name_or_path, adapter_name="peft_v1")
processor = AutoProcessor.from_pretrained(raw_model_name_or_path)
model.eval()
print('ok')
Metadata
Metadata
Assignees
Labels
No labels