멀티모달 모델 학습 시 권장 옵션
권장 VRAM [출처]
전체 LLM 학습시: 8x 32G/40G
LoRA 사용시 2x 32G/40G
권장 하이퍼 파라미터
{
"max_seq_length": 1024,
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"learning_rate": 0.00004,
"log_level": "warning",
"logging_dir": "./logs",
"logging_strategy": "no",
"logging_first_step": 1,
"logging_steps": 10,
"fp16": 0,
"bf16": 1,
"seed": 42,
"conv_style": "Hermes-2",
"force_image_size": 448,
"max_dynamic_patch": 6,
"down_sample_ratio": 0.5,
"drop_path_rate": 0,
"freeze_llm": true,
"freeze_mlp": true,
"freeze_backbone": false,
"use_llm_lora": 16,
"vision_select_layer": -1,
"dataloader_num_workers": 4,
"save_total_limit": 1,
"weight_decay": 0.05,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine",
"do_train": true,
"grad_checkpoint": true,
"group_by_length": true,
"use_thumbnail": true,
"ps_version": "v2",
"eval_ratio": 0.1
}
권장 학습 설정 정보
{
"tensorboard": {
"enabled": true,
"output_path": "/opt/output/experiment/",
"job_name": "test-train"
},
"zero_optimization": {
"stage": 1,
"allgather_partitions": true,
"allgather_bucket_size": 1000000000,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 1000000000,
"contiguous_gradients": true
},
"fp16": {
"enabled": "auto",
"auto_cast": true,
"loss_scale": 0,
"initial_scale_power": 32,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": [
0.9,
0.999
],
"eps": 1e-8,
"weight_decay": "auto"
}
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 2000,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": true,
"column_map": {
"input": "question",
"image": "image_base64",
"output": "answer"
}
}
Last updated
Was this helpful?