| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os |
| from typing import TYPE_CHECKING |
|
|
| import fire |
| from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
| if TYPE_CHECKING: |
| from transformers import PreTrainedModel |
|
|
|
|
| def quantize_loftq( |
| model_name_or_path: str, |
| output_dir: str, |
| loftq_bits: int = 4, |
| loftq_iter: int = 4, |
| lora_alpha: int = None, |
| lora_rank: int = 16, |
| lora_dropout: float = 0, |
| lora_target: tuple = ("q_proj", "v_proj"), |
| save_safetensors: bool = True, |
| ): |
| r""" |
| Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) |
| Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir |
| """ |
| if isinstance(lora_target, str): |
| lora_target = [name.strip() for name in lora_target.split(",")] |
|
|
| tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) |
| model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") |
|
|
| loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter) |
| lora_config = LoraConfig( |
| task_type=TaskType.CAUSAL_LM, |
| inference_mode=True, |
| r=lora_rank, |
| lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, |
| lora_dropout=lora_dropout, |
| target_modules=lora_target, |
| init_lora_weights="loftq", |
| loftq_config=loftq_config, |
| ) |
|
|
| |
| print("Initializing LoftQ weights, it may be take several minutes, wait patiently.") |
| peft_model = get_peft_model(model, lora_config) |
| loftq_dir = os.path.join(output_dir, "loftq_init") |
|
|
| |
| setattr(peft_model.peft_config["default"], "base_model_name_or_path", os.path.abspath(output_dir)) |
| setattr(peft_model.peft_config["default"], "init_lora_weights", True) |
| peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors) |
| print("Adapter weights saved in {}".format(loftq_dir)) |
|
|
| |
| base_model: "PreTrainedModel" = peft_model.unload() |
| base_model.save_pretrained(output_dir, safe_serialization=save_safetensors) |
| tokenizer.save_pretrained(output_dir) |
| print("Model weights saved in {}".format(output_dir)) |
|
|
| print("- Fine-tune this model with:") |
| print("model_name_or_path: {}".format(output_dir)) |
| print("adapter_name_or_path: {}".format(loftq_dir)) |
| print("finetuning_type: lora") |
| print("quantization_bit: {}".format(loftq_bits)) |
|
|
|
|
| if __name__ == "__main__": |
| fire.Fire(quantize_loftq) |
|
|