Zaynes's picture
Upload folder using huggingface_hub
2203975 verified
_target_: null
job:
name: testing__pvv2_lora
mode: local
dry_run: false
work_dir: null
slurm:
time_limit: null
constraint: null
memory: null
cpus_per_task: 16
partition: null
mail_user: null
execution:
nodes: 1
gpus_per_node: 2
num_gpus: null
hostfile: null
secrets_file: ./secrets.env
model:
name_or_path: Qwen/Qwen2.5-1.5B-Instruct
finetuning_type: lora
dataset:
name: TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
dir: null
info_json: null
template: qwen
cutoff_len: 16192
val_size: 0.0
tokenized_path: /scratch/zrs2020/.cache/llamafactory/tokenized/TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
hf_hub_url: TAUR-dev/D-SFT_C-ours_cd3arg_10responses_reflections10_formats-C_full
formatting: sharegpt
ranking: false
subset: null
split: train
folder: null
num_samples: null
columns:
prompt: null
query: null
response: null
history: null
messages: conversations
system: null
tools: null
images: null
videos: null
audios: null
chosen: null
rejected: null
kto_tag: null
tags:
role: role
content: content
user: user
assistant: assistant
observation: null
function: null
system: null
output:
experiment_dir: ./experiments
wandb:
project: null
run_name: testing__pvv2_lora
entity: null
hf:
repo_id: TAUR-dev/testing__pvv2_lora
private: false
upload_artifacts: true
cleanup:
checkpoints: false
merged: false
training:
stage: sft
do_train: true
max_samples: 100000
do_eval: false
save_strategy: steps
save_steps: 5
logging_steps: 10
fp16: false
bf16: true
adam_beta1: 0.9
adam_beta2: 0.95
overwrite_output_dir: true
per_device_train_batch_size: 1
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 1.0e-06
lr_scheduler_type: cosine
num_train_epochs: 2
warmup_ratio: 0.05
weight_decay: 0.0001
template: qwen
max_steps: 10
preprocessing_num_workers: 16
overwrite_cache: true
finetuning:
training:
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.05
lora_target: all
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 0.0001
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
resume_from_checkpoint: null
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
do_eval: true
merge:
export_dir: null
export_size: 5
export_device: cpu
export_legacy_format: false