# /// script # dependencies = [ # "trl>=0.12.0", # "peft>=0.7.0", # "trackio", # "bitsandbytes>=0.43.0", # "accelerate>=0.30.0", # "datasets", # "transformers>=4.57.0", # "torch", # ] # /// """ Fine-tune GLM-4.7-Flash on Smart Contract Security Audit Findings Uses QLoRA for memory-efficient training on a 31B MoE model. """ import os import torch from datasets import load_dataset from peft import LoraConfig, TaskType, prepare_model_for_kbit_training from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, ) from trl import SFTTrainer, SFTConfig # Configuration MODEL_ID = "zai-org/GLM-4.7-Flash" DATASET_ID = "SkywardNomad92/smart-contract-audit-findings" OUTPUT_MODEL = "SkywardNomad92/glm-4.7-flash-security-auditor" print(f"Loading dataset from {DATASET_ID}...") dataset = load_dataset(DATASET_ID) train_dataset = dataset["train"] eval_dataset = dataset["validation"] if "validation" in dataset else None print(f"Train examples: {len(train_dataset)}") if eval_dataset: print(f"Eval examples: {len(eval_dataset)}") # 4-bit quantization config for memory efficiency bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) print(f"Loading model {MODEL_ID} with 4-bit quantization and CPU offloading...") # Create offload folder import os os.makedirs("offload", exist_ok=True) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="sdpa", max_memory={0: "40GiB", "cpu": "80GiB"}, # Allow CPU offloading offload_folder="offload", offload_state_dict=True, ) tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, trust_remote_code=True, ) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # Prepare model for QLoRA training model = prepare_model_for_kbit_training(model) # LoRA configuration - targeting attention layers lora_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], task_type=TaskType.CAUSAL_LM, bias="none", ) # Training configuration training_args = SFTConfig( output_dir="./glm-flash-security-auditor", # Training params num_train_epochs=1, per_device_train_batch_size=1, per_device_eval_batch_size=1, gradient_accumulation_steps=16, # Effective batch size = 16 # Learning rate learning_rate=2e-4, lr_scheduler_type="cosine", warmup_ratio=0.03, # Memory optimization gradient_checkpointing=True, optim="paged_adamw_8bit", bf16=True, # Evaluation eval_strategy="steps" if eval_dataset else "no", eval_steps=200, # Saving save_strategy="steps", save_steps=200, save_total_limit=3, # Hub push_to_hub=True, hub_model_id=OUTPUT_MODEL, hub_strategy="every_save", # Logging logging_steps=10, report_to="trackio", run_name="glm-flash-security-auditor", # Other max_seq_length=2048, dataset_text_field=None, # Using chat template ) print("Starting training...") trainer = SFTTrainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, peft_config=lora_config, tokenizer=tokenizer, ) trainer.train() print("Saving final model...") trainer.save_model() trainer.push_to_hub() print(f"✅ Training complete! Model saved to {OUTPUT_MODEL}")