| dataset_path: /global/cfs/cdirs/m4717/azton/galaxy-foundations/object_foundation/utils/supermock_dataset_11.2-14.json |
| mask_token: 0 |
| masked_generation: false |
| masking_prob: |
| - 0.2 |
| - 0.2 |
| - 0.2 |
| - 0.2 |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| modalities: |
| - SFH |
| - SED |
| - mag_{band}_lsst |
| - mag_{band}_spherex |
| - redshift |
| - halo_mass |
| - stellar_mass |
| model_config: |
| attention_probs_dropout_prob: 0.1 |
| classifier_dropout: 0.0 |
| contrastive_temperature: 0.05 |
| hidden_dropout_prob: 0.1 |
| hidden_size: 768 |
| intermediate_size: 3072 |
| loss_weights: |
| contrastive: |
| rounds: 0 |
| w0T: |
| - 0 |
| - 0 |
| masked: |
| rounds: 0 |
| w0T: |
| - 0.8 |
| - 3 |
| smooth: |
| rounds: 0 |
| w0T: |
| - 0 |
| - 0.3 |
| unmasked: |
| rounds: 0 |
| w0T: |
| - 0.2 |
| - 0.3 |
| max_position_embeddings: 1149 |
| num_attention_heads: 12 |
| num_hidden_layers: 18 |
| pad_token_id: -1 |
| transform_numeric: false |
| use_contrastive_loss: false |
| use_mlm_loss: true |
| use_regression_loss: false |
| use_xval_loss: false |
| vocab_size: 2048 |
| model_name_or_path: galaxybert |
| tokenizer_name_or_path: Salesforce/SFR-Embedding-Mistral |
| training_args: |
| _n_gpu: 1 |
| accelerator_config: |
| dispatch_batches: null |
| even_batches: true |
| gradient_accumulation_kwargs: null |
| non_blocking: false |
| split_batches: false |
| use_configured_state: false |
| use_seedable_sampler: true |
| adafactor: false |
| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_epsilon: 1.0e-08 |
| auto_find_batch_size: false |
| average_tokens_across_devices: false |
| batch_eval_metrics: false |
| bf16: true |
| bf16_full_eval: false |
| data_seed: null |
| dataloader_drop_last: false |
| dataloader_num_workers: 16 |
| dataloader_persistent_workers: false |
| dataloader_pin_memory: true |
| dataloader_prefetch_factor: 8 |
| ddp_backend: null |
| ddp_broadcast_buffers: null |
| ddp_bucket_cap_mb: null |
| ddp_find_unused_parameters: null |
| ddp_timeout: 1800 |
| debug: [] |
| deepspeed: null |
| disable_tqdm: false |
| dispatch_batches: null |
| do_eval: true |
| do_predict: false |
| do_train: false |
| eval_accumulation_steps: 5 |
| eval_delay: 0 |
| eval_do_concat_batches: true |
| eval_on_start: false |
| eval_steps: 20 |
| eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy |
| - steps |
| eval_use_gather_object: false |
| evaluation_strategy: null |
| fp16: false |
| fp16_backend: auto |
| fp16_full_eval: false |
| fp16_opt_level: O1 |
| fsdp: [] |
| fsdp_config: |
| min_num_params: 0 |
| xla: false |
| xla_fsdp_grad_ckpt: false |
| xla_fsdp_v2: false |
| fsdp_min_num_params: 0 |
| fsdp_transformer_layer_cls_to_wrap: null |
| full_determinism: false |
| gradient_accumulation_steps: 5 |
| gradient_checkpointing: false |
| gradient_checkpointing_kwargs: null |
| greater_is_better: null |
| group_by_length: false |
| half_precision_backend: auto |
| hub_always_push: false |
| hub_model_id: null |
| hub_private_repo: false |
| hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy |
| - every_save |
| hub_token: null |
| ignore_data_skip: false |
| include_for_metrics: [] |
| include_inputs_for_metrics: false |
| include_num_input_tokens_seen: false |
| include_tokens_per_second: false |
| jit_mode_eval: false |
| label_names: null |
| label_smoothing_factor: 0.0 |
| learning_rate: 0.0001 |
| length_column_name: length |
| load_best_model_at_end: false |
| local_rank: 0 |
| log_level: passive |
| log_level_replica: warning |
| log_on_each_node: true |
| logging_dir: sm_foundation_lg_gmm_nomasklab |
| logging_first_step: true |
| logging_nan_inf_filter: true |
| logging_steps: 1 |
| logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy |
| - steps |
| lr_scheduler_kwargs: {} |
| lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType |
| - cosine |
| max_grad_norm: 1.0 |
| max_steps: -1 |
| metric_for_best_model: null |
| mp_parameters: '' |
| neftune_noise_alpha: null |
| no_cuda: false |
| num_train_epochs: 60 |
| optim: !!python/object/apply:transformers.training_args.OptimizerNames |
| - adamw_torch |
| optim_args: null |
| optim_target_modules: null |
| output_dir: supermock_te60_ |
| overwrite_output_dir: true |
| past_index: -1 |
| per_device_eval_batch_size: 100 |
| per_device_train_batch_size: 100 |
| per_gpu_eval_batch_size: null |
| per_gpu_train_batch_size: null |
| prediction_loss_only: false |
| push_to_hub: false |
| push_to_hub_model_id: null |
| push_to_hub_organization: null |
| push_to_hub_token: null |
| ray_scope: last |
| remove_unused_columns: false |
| report_to: |
| - wandb |
| restore_callback_states_from_checkpoint: false |
| resume_from_checkpoint: null |
| run_name: NO_SHARD_b50 |
| save_on_each_node: false |
| save_only_model: false |
| save_safetensors: true |
| save_steps: 30 |
| save_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy |
| - steps |
| save_total_limit: 360 |
| seed: 42 |
| skip_memory_metrics: true |
| split_batches: null |
| tf32: null |
| torch_compile: false |
| torch_compile_backend: null |
| torch_compile_mode: null |
| torch_empty_cache_steps: null |
| torchdynamo: null |
| tpu_metrics_debug: false |
| tpu_num_cores: null |
| use_cpu: false |
| use_ipex: false |
| use_legacy_prediction_loop: false |
| use_liger_kernel: false |
| use_mps_device: false |
| warmup_ratio: 0.0 |
| warmup_steps: 0 |
| weight_decay: 0.1 |
| transform_numeric: false |
| wandb_project: supermock-foundation-perl |
| wandb_run_name: '' |
|
|