| encoder: SenseVoiceEncoderSmall | |
| encoder_conf: | |
| output_size: 512 | |
| attention_heads: 4 | |
| linear_units: 2048 | |
| num_blocks: 50 | |
| tp_blocks: 20 | |
| dropout_rate: 0.1 | |
| positional_dropout_rate: 0.1 | |
| attention_dropout_rate: 0.1 | |
| input_layer: pe | |
| pos_enc_class: SinusoidalPositionEncoder | |
| normalize_before: true | |
| kernel_size: 11 | |
| sanm_shfit: 0 | |
| selfattention_layer_type: sanm | |
| model: SenseVoiceSmall | |
| model_conf: | |
| length_normalized_loss: true | |
| sos: 1 | |
| eos: 2 | |
| ignore_id: -1 | |
| tokenizer: SentencepiecesTokenizer | |
| tokenizer_conf: | |
| bpemodel: /root/.cache/modelscope/hub/iic/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model | |
| unk_symbol: <unk> | |
| split_with_space: true | |
| frontend: WavFrontend | |
| frontend_conf: | |
| fs: 16000 | |
| window: hamming | |
| n_mels: 80 | |
| frame_length: 25 | |
| frame_shift: 10 | |
| lfr_m: 7 | |
| lfr_n: 6 | |
| cmvn_file: /root/.cache/modelscope/hub/iic/SenseVoiceSmall/am.mvn | |
| dataset: SenseVoiceCTCDataset | |
| dataset_conf: | |
| index_ds: IndexDSJsonl | |
| batch_sampler: BatchSampler | |
| data_split_num: 1 | |
| batch_type: token | |
| batch_size: 25000 | |
| max_token_length: 2000 | |
| min_token_length: 60 | |
| max_source_length: 2000 | |
| min_source_length: 60 | |
| max_target_length: 200 | |
| min_target_length: 0 | |
| shuffle: true | |
| num_workers: 4 | |
| sos: 1 | |
| eos: 2 | |
| IndexDSJsonl: IndexDSJsonl | |
| retry: 20 | |
| sort_size: 1024 | |
| train_conf: | |
| accum_grad: 1 | |
| grad_clip: 5 | |
| max_epoch: 100 | |
| keep_nbest_models: 10 | |
| avg_nbest_model: 10 | |
| log_interval: 1 | |
| resume: true | |
| validate_interval: 2000 | |
| save_checkpoint_interval: 2000 | |
| use_deepspeed: false | |
| deepspeed_config: /root/autodl-tmp/SenseVoice/deepspeed_conf/ds_stage1.json | |
| optim: adamw | |
| optim_conf: | |
| lr: 0.0002 | |
| scheduler: warmuplr | |
| scheduler_conf: | |
| warmup_steps: 25000 | |
| specaug: SpecAugLFR | |
| specaug_conf: | |
| apply_time_warp: false | |
| time_warp_window: 5 | |
| time_warp_mode: bicubic | |
| apply_freq_mask: true | |
| freq_mask_width_range: | |
| - 0 | |
| - 30 | |
| lfr_rate: 6 | |
| num_freq_mask: 1 | |
| apply_time_mask: true | |
| time_mask_width_range: | |
| - 0 | |
| - 12 | |
| num_time_mask: 1 | |
| init_param: /root/.cache/modelscope/hub/iic/SenseVoiceSmall/model.pt | |
| config: /root/.cache/modelscope/hub/iic/SenseVoiceSmall/config.yaml | |
| is_training: true | |
| trust_remote_code: true | |
| train_data_set_list: /root/autodl-tmp/train/train.jsonl | |
| valid_data_set_list: /root/autodl-tmp/val/val.jsonl | |
| output_dir: ./outputs | |
| model_path: /root/.cache/modelscope/hub/iic/SenseVoiceSmall | |
| device: cpu | |