Smarter, Better, Faster, Longer: A Modern Bidirectional Encoder for Fast, Memory Efficient, and Long Context Finetuning and Inference
Paper • 2412.13663 • Published • 163
This is a sentence-transformers model finetuned from answerdotai/ModernBERT-base on the code_search_net dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 4096, 'do_lower_case': False}) with Transformer model: ModernBertModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("juanwisz/modernbert-python-code-retrieval")
# Run inference
sentences = [
'Validates control dictionary for the experiment context',
'def __validateExperimentControl(self, control):\n """ Validates control dictionary for the experiment context"""\n # Validate task list\n taskList = control.get(\'tasks\', None)\n if taskList is not None:\n taskLabelsList = []\n\n for task in taskList:\n validateOpfJsonValue(task, "opfTaskSchema.json")\n validateOpfJsonValue(task[\'taskControl\'], "opfTaskControlSchema.json")\n\n taskLabel = task[\'taskLabel\']\n\n assert isinstance(taskLabel, types.StringTypes), \\\n "taskLabel type: %r" % type(taskLabel)\n assert len(taskLabel) > 0, "empty string taskLabel not is allowed"\n\n taskLabelsList.append(taskLabel.lower())\n\n taskLabelDuplicates = filter(lambda x: taskLabelsList.count(x) > 1,\n taskLabelsList)\n assert len(taskLabelDuplicates) == 0, \\\n "Duplcate task labels are not allowed: %s" % taskLabelDuplicates\n\n return',
'def load_file_list(path=None, regx=\'\\.jpg\', printable=True, keep_prefix=False):\n r"""Return a file list in a folder by given a path and regular expression.\n\n Parameters\n ----------\n path : str or None\n A folder path, if `None`, use the current directory.\n regx : str\n The regx of file name.\n printable : boolean\n Whether to print the files infomation.\n keep_prefix : boolean\n Whether to keep path in the file name.\n\n Examples\n ----------\n >>> file_list = tl.files.load_file_list(path=None, regx=\'w1pre_[0-9]+\\.(npz)\')\n\n """\n if path is None:\n path = os.getcwd()\n file_list = os.listdir(path)\n return_list = []\n for _, f in enumerate(file_list):\n if re.search(regx, f):\n return_list.append(f)\n # return_list.sort()\n if keep_prefix:\n for i, f in enumerate(return_list):\n return_list[i] = os.path.join(path, f)\n\n if printable:\n logging.info(\'Match file list = %s\' % return_list)\n logging.info(\'Number of files = %d\' % len(return_list))\n return return_list',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
query and positive| query | positive | |
|---|---|---|
| type | string | string |
| details |
|
|
| query | positive |
|---|---|
Extracts the list of arguments that start with any of the specified prefix values |
def findArgs(args, prefixes): |
Removes any arguments in the supplied list that are contained in the specified blacklist |
def stripArgs(args, blacklist): |
Executes a child process and captures its output |
def capture(command, input=None, cwd=None, shell=False, raiseOnError=False): |
MultipleNegativesRankingLoss with these parameters:{
"scale": 20.0,
"similarity_fct": "cos_sim"
}
query and positive| query | positive | |
|---|---|---|
| type | string | string |
| details |
|
|
| query | positive |
|---|---|
Train a deepq model. |
def learn(env, |
Save model to a pickle located at |
def save_act(self, path=None): |
CNN from Nature paper. |
def nature_cnn(unscaled_images, **conv_kwargs): |
MultipleNegativesRankingLoss with these parameters:{
"scale": 20.0,
"similarity_fct": "cos_sim"
}
eval_strategy: epochper_device_train_batch_size: 4gradient_accumulation_steps: 4learning_rate: 2e-05num_train_epochs: 10warmup_steps: 1000fp16: Trueoverwrite_output_dir: Falsedo_predict: Falseeval_strategy: epochprediction_loss_only: Trueper_device_train_batch_size: 4per_device_eval_batch_size: 8per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 4eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 2e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 10max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: {}warmup_ratio: 0.0warmup_steps: 1000log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Truefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Falseignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torchoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Nonedispatch_batches: Nonesplit_batches: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseeval_use_gather_object: Falseaverage_tokens_across_devices: Falseprompts: Nonebatch_sampler: batch_samplermulti_dataset_batch_sampler: proportional| Epoch | Step | Training Loss | Validation Loss |
|---|---|---|---|
| 0.0078 | 200 | 0.634 | - |
| 0.0155 | 400 | 0.0046 | - |
| 0.0233 | 600 | 0.0009 | - |
| 0.0311 | 800 | 0.0004 | - |
| 0.0388 | 1000 | 0.0001 | - |
| 0.0466 | 1200 | 0.0002 | - |
| 0.0543 | 1400 | 0.0001 | - |
| 0.0621 | 1600 | 0.0001 | - |
| 0.0699 | 1800 | 0.0001 | - |
| 0.0776 | 2000 | 0.0 | - |
| 0.0854 | 2200 | 0.0 | - |
| 0.0932 | 2400 | 0.0 | - |
| 0.1009 | 2600 | 0.0 | - |
| 0.1087 | 2800 | 0.0005 | - |
| 0.1165 | 3000 | 0.0005 | - |
| 0.1242 | 3200 | 0.0002 | - |
| 0.1320 | 3400 | 0.0 | - |
| 0.1397 | 3600 | 0.0 | - |
| 0.1475 | 3800 | 0.0 | - |
| 0.1553 | 4000 | 0.0001 | - |
| 0.1630 | 4200 | 0.0 | - |
| 0.1708 | 4400 | 0.0001 | - |
| 0.1786 | 4600 | 0.0001 | - |
| 0.1863 | 4800 | 0.0 | - |
| 0.1941 | 5000 | 0.0 | - |
| 0.2019 | 5200 | 0.0 | - |
| 0.2096 | 5400 | 0.0 | - |
| 0.2174 | 5600 | 0.0 | - |
| 0.2251 | 5800 | 0.0 | - |
| 0.2329 | 6000 | 0.0004 | - |
| 0.2407 | 6200 | 0.0 | - |
| 0.2484 | 6400 | 0.0001 | - |
| 0.2562 | 6600 | 0.0 | - |
| 0.2640 | 6800 | 0.0 | - |
| 0.2717 | 7000 | 0.0 | - |
| 0.2795 | 7200 | 0.0 | - |
| 0.2873 | 7400 | 0.0 | - |
| 0.2950 | 7600 | 0.0 | - |
| 0.3028 | 7800 | 0.0 | - |
| 0.3105 | 8000 | 0.0 | - |
| 0.3183 | 8200 | 0.0 | - |
| 0.3261 | 8400 | 0.0004 | - |
| 0.3338 | 8600 | 0.0 | - |
| 0.3416 | 8800 | 0.0 | - |
| 0.3494 | 9000 | 0.0 | - |
| 0.3571 | 9200 | 0.0 | - |
| 0.3649 | 9400 | 0.0 | - |
| 0.3727 | 9600 | 0.0 | - |
| 0.3804 | 9800 | 0.0 | - |
| 0.3882 | 10000 | 0.0 | - |
| 0.3959 | 10200 | 0.0 | - |
| 0.4037 | 10400 | 0.0 | - |
| 0.4115 | 10600 | 0.0 | - |
| 0.4192 | 10800 | 0.0 | - |
| 0.4270 | 11000 | 0.0 | - |
| 0.4348 | 11200 | 0.0 | - |
| 0.4425 | 11400 | 0.0 | - |
| 0.4503 | 11600 | 0.0 | - |
| 0.4581 | 11800 | 0.0 | - |
| 0.4658 | 12000 | 0.0 | - |
| 0.4736 | 12200 | 0.0 | - |
| 0.4813 | 12400 | 0.0 | - |
| 0.4891 | 12600 | 0.0005 | - |
| 0.4969 | 12800 | 0.0 | - |
| 0.5046 | 13000 | 0.0 | - |
| 0.5124 | 13200 | 0.0001 | - |
| 0.5202 | 13400 | 0.0 | - |
| 0.5279 | 13600 | 0.0 | - |
| 0.5357 | 13800 | 0.0 | - |
| 0.5435 | 14000 | 0.0 | - |
| 0.5512 | 14200 | 0.0 | - |
| 0.5590 | 14400 | 0.0004 | - |
| 0.5667 | 14600 | 0.0 | - |
| 0.5745 | 14800 | 0.0 | - |
| 0.5823 | 15000 | 0.0 | - |
| 0.5900 | 15200 | 0.0 | - |
| 0.5978 | 15400 | 0.0 | - |
| 0.6056 | 15600 | 0.0 | - |
| 0.6133 | 15800 | 0.0 | - |
| 0.6211 | 16000 | 0.0 | - |
| 0.6289 | 16200 | 0.0 | - |
| 0.6366 | 16400 | 0.0006 | - |
| 0.6444 | 16600 | 0.0 | - |
| 0.6521 | 16800 | 0.0005 | - |
| 0.6599 | 17000 | 0.0 | - |
| 0.6677 | 17200 | 0.0 | - |
| 0.6754 | 17400 | 0.0 | - |
| 0.6832 | 17600 | 0.0 | - |
| 0.6910 | 17800 | 0.0 | - |
| 0.6987 | 18000 | 0.0005 | - |
| 0.7065 | 18200 | 0.0001 | - |
| 0.7143 | 18400 | 0.0 | - |
| 0.7220 | 18600 | 0.0 | - |
| 0.7298 | 18800 | 0.0 | - |
| 0.7375 | 19000 | 0.0 | - |
| 0.7453 | 19200 | 0.0 | - |
| 0.7531 | 19400 | 0.0 | - |
| 0.7608 | 19600 | 0.0 | - |
| 0.7686 | 19800 | 0.0001 | - |
| 0.7764 | 20000 | 0.0 | - |
| 0.7841 | 20200 | 0.0 | - |
| 0.7919 | 20400 | 0.0 | - |
| 0.7997 | 20600 | 0.0004 | - |
| 0.8074 | 20800 | 0.0 | - |
| 0.8152 | 21000 | 0.0 | - |
| 0.8229 | 21200 | 0.0 | - |
| 0.8307 | 21400 | 0.0009 | - |
| 0.8385 | 21600 | 0.0 | - |
| 0.8462 | 21800 | 0.0 | - |
| 0.8540 | 22000 | 0.0 | - |
| 0.8618 | 22200 | 0.0 | - |
| 0.8695 | 22400 | 0.0002 | - |
| 0.8773 | 22600 | 0.0 | - |
| 0.8851 | 22800 | 0.0 | - |
| 0.8928 | 23000 | 0.0001 | - |
| 0.9006 | 23200 | 0.0 | - |
| 0.9083 | 23400 | 0.0 | - |
| 0.9161 | 23600 | 0.0 | - |
| 0.9239 | 23800 | 0.0 | - |
| 0.9316 | 24000 | 0.0 | - |
| 0.9394 | 24200 | 0.0 | - |
| 0.9472 | 24400 | 0.0 | - |
| 0.9549 | 24600 | 0.0 | - |
| 0.9627 | 24800 | 0.0 | - |
| 0.9704 | 25000 | 0.0 | - |
| 0.9782 | 25200 | 0.0 | - |
| 0.9860 | 25400 | 0.0 | - |
| 0.9937 | 25600 | 0.0 | - |
| 1.0 | 25762 | - | 0.0001 |
| 1.0015 | 25800 | 0.0005 | - |
| 1.0092 | 26000 | 0.0 | - |
| 1.0170 | 26200 | 0.0 | - |
| 1.0248 | 26400 | 0.0 | - |
| 1.0325 | 26600 | 0.0 | - |
| 1.0403 | 26800 | 0.0 | - |
| 1.0481 | 27000 | 0.0 | - |
| 1.0558 | 27200 | 0.0 | - |
| 1.0636 | 27400 | 0.0 | - |
| 1.0713 | 27600 | 0.0 | - |
| 1.0791 | 27800 | 0.0 | - |
| 1.0869 | 28000 | 0.0 | - |
| 1.0946 | 28200 | 0.0 | - |
| 1.1024 | 28400 | 0.0 | - |
| 1.1102 | 28600 | 0.0 | - |
| 1.1179 | 28800 | 0.0 | - |
| 1.1257 | 29000 | 0.0 | - |
| 1.1335 | 29200 | 0.0 | - |
| 1.1412 | 29400 | 0.0 | - |
| 1.1490 | 29600 | 0.0 | - |
| 1.1567 | 29800 | 0.0 | - |
| 1.1645 | 30000 | 0.0 | - |
| 1.1723 | 30200 | 0.0 | - |
| 1.1800 | 30400 | 0.0 | - |
| 1.1878 | 30600 | 0.0 | - |
| 1.1956 | 30800 | 0.0 | - |
| 1.2033 | 31000 | 0.0 | - |
| 1.2111 | 31200 | 0.0 | - |
| 1.2189 | 31400 | 0.0 | - |
| 1.2266 | 31600 | 0.0004 | - |
| 1.2344 | 31800 | 0.0004 | - |
| 1.2421 | 32000 | 0.0 | - |
| 1.2499 | 32200 | 0.0 | - |
| 1.2577 | 32400 | 0.0 | - |
| 1.2654 | 32600 | 0.0 | - |
| 1.2732 | 32800 | 0.0 | - |
| 1.2810 | 33000 | 0.0 | - |
| 1.2887 | 33200 | 0.0 | - |
| 1.2965 | 33400 | 0.0 | - |
| 1.3043 | 33600 | 0.0 | - |
| 1.3120 | 33800 | 0.0 | - |
| 1.3198 | 34000 | 0.0 | - |
| 1.3275 | 34200 | 0.0 | - |
| 1.3353 | 34400 | 0.0 | - |
| 1.3431 | 34600 | 0.0 | - |
| 1.3508 | 34800 | 0.0004 | - |
| 1.3586 | 35000 | 0.0005 | - |
| 1.3664 | 35200 | 0.0004 | - |
| 1.3741 | 35400 | 0.0011 | - |
| 1.3819 | 35600 | 0.0 | - |
| 1.3897 | 35800 | 0.0 | - |
| 1.3974 | 36000 | 0.0 | - |
| 1.4052 | 36200 | 0.0 | - |
| 1.4129 | 36400 | 0.0 | - |
| 1.4207 | 36600 | 0.0 | - |
| 1.4285 | 36800 | 0.0 | - |
| 1.4362 | 37000 | 0.0 | - |
| 1.4440 | 37200 | 0.0001 | - |
| 1.4518 | 37400 | 0.0 | - |
| 1.4595 | 37600 | 0.0 | - |
| 1.4673 | 37800 | 0.0 | - |
| 1.4751 | 38000 | 0.0 | - |
| 1.4828 | 38200 | 0.0004 | - |
| 1.4906 | 38400 | 0.0003 | - |
| 1.4983 | 38600 | 0.0 | - |
| 1.5061 | 38800 | 0.0 | - |
| 1.5139 | 39000 | 0.0 | - |
| 1.5216 | 39200 | 0.0 | - |
| 1.5294 | 39400 | 0.0004 | - |
| 1.5372 | 39600 | 0.0004 | - |
| 1.5449 | 39800 | 0.0 | - |
| 1.5527 | 40000 | 0.0 | - |
| 1.5605 | 40200 | 0.0 | - |
| 1.5682 | 40400 | 0.0 | - |
| 1.5760 | 40600 | 0.0009 | - |
| 1.5837 | 40800 | 0.0 | - |
| 1.5915 | 41000 | 0.0009 | - |
| 1.5993 | 41200 | 0.0 | - |
| 1.6070 | 41400 | 0.0 | - |
| 1.6148 | 41600 | 0.0 | - |
| 1.6226 | 41800 | 0.0 | - |
| 1.6303 | 42000 | 0.0 | - |
| 1.6381 | 42200 | 0.0 | - |
| 1.6459 | 42400 | 0.0 | - |
| 1.6536 | 42600 | 0.0 | - |
| 1.6614 | 42800 | 0.0 | - |
| 1.6691 | 43000 | 0.0 | - |
| 1.6769 | 43200 | 0.0 | - |
| 1.6847 | 43400 | 0.0 | - |
| 1.6924 | 43600 | 0.0 | - |
| 1.7002 | 43800 | 0.0 | - |
| 1.7080 | 44000 | 0.0 | - |
| 1.7157 | 44200 | 0.0 | - |
| 1.7235 | 44400 | 0.0 | - |
| 1.7313 | 44600 | 0.0 | - |
| 1.7390 | 44800 | 0.0 | - |
| 1.7468 | 45000 | 0.0 | - |
| 1.7545 | 45200 | 0.0 | - |
| 1.7623 | 45400 | 0.0 | - |
| 1.7701 | 45600 | 0.0 | - |
| 1.7778 | 45800 | 0.0 | - |
| 1.7856 | 46000 | 0.0 | - |
| 1.7934 | 46200 | 0.0 | - |
| 1.8011 | 46400 | 0.0 | - |
| 1.8089 | 46600 | 0.0 | - |
| 1.8167 | 46800 | 0.0 | - |
| 1.8244 | 47000 | 0.0 | - |
| 1.8322 | 47200 | 0.0 | - |
| 1.8399 | 47400 | 0.0 | - |
| 1.8477 | 47600 | 0.0 | - |
| 1.8555 | 47800 | 0.0004 | - |
| 1.8632 | 48000 | 0.0 | - |
| 1.8710 | 48200 | 0.0 | - |
| 1.8788 | 48400 | 0.0 | - |
| 1.8865 | 48600 | 0.0 | - |
| 1.8943 | 48800 | 0.0 | - |
| 1.9021 | 49000 | 0.0004 | - |
| 1.9098 | 49200 | 0.0 | - |
| 1.9176 | 49400 | 0.0 | - |
| 1.9253 | 49600 | 0.0004 | - |
| 1.9331 | 49800 | 0.0 | - |
| 1.9409 | 50000 | 0.0 | - |
| 1.9486 | 50200 | 0.0 | - |
| 1.9564 | 50400 | 0.0 | - |
| 1.9642 | 50600 | 0.0004 | - |
| 1.9719 | 50800 | 0.0 | - |
| 1.9797 | 51000 | 0.0 | - |
| 1.9875 | 51200 | 0.0 | - |
| 1.9952 | 51400 | 0.0004 | - |
| 2.0 | 51524 | - | 0.0001 |
| 2.0030 | 51600 | 0.0 | - |
| 2.0107 | 51800 | 0.0 | - |
| 2.0185 | 52000 | 0.0 | - |
| 2.0262 | 52200 | 0.0 | - |
| 2.0340 | 52400 | 0.0004 | - |
| 2.0418 | 52600 | 0.0004 | - |
| 2.0495 | 52800 | 0.0 | - |
| 2.0573 | 53000 | 0.0008 | - |
| 2.0651 | 53200 | 0.0 | - |
| 2.0728 | 53400 | 0.0 | - |
| 2.0806 | 53600 | 0.0 | - |
| 2.0883 | 53800 | 0.0 | - |
| 2.0961 | 54000 | 0.0 | - |
| 2.1039 | 54200 | 0.0 | - |
| 2.1116 | 54400 | 0.0 | - |
| 2.1194 | 54600 | 0.0 | - |
| 2.1272 | 54800 | 0.0 | - |
| 2.1349 | 55000 | 0.0 | - |
| 2.1427 | 55200 | 0.0 | - |
| 2.1505 | 55400 | 0.0 | - |
| 2.1582 | 55600 | 0.0 | - |
| 2.1660 | 55800 | 0.0 | - |
| 2.1737 | 56000 | 0.0 | - |
| 2.1815 | 56200 | 0.0 | - |
| 2.1893 | 56400 | 0.0 | - |
| 2.1970 | 56600 | 0.0 | - |
| 2.2048 | 56800 | 0.0 | - |
| 2.2126 | 57000 | 0.0 | - |
| 2.2203 | 57200 | 0.0 | - |
| 2.2281 | 57400 | 0.0 | - |
| 2.2359 | 57600 | 0.0 | - |
| 2.2436 | 57800 | 0.0 | - |
| 2.2514 | 58000 | 0.0004 | - |
| 2.2591 | 58200 | 0.0 | - |
| 2.2669 | 58400 | 0.0004 | - |
| 2.2747 | 58600 | 0.0 | - |
| 2.2824 | 58800 | 0.0 | - |
| 2.2902 | 59000 | 0.0 | - |
| 2.2980 | 59200 | 0.0 | - |
| 2.3057 | 59400 | 0.0 | - |
| 2.3135 | 59600 | 0.0 | - |
| 2.3213 | 59800 | 0.0004 | - |
| 2.3290 | 60000 | 0.0 | - |
| 2.3368 | 60200 | 0.0004 | - |
| 2.3445 | 60400 | 0.0 | - |
| 2.3523 | 60600 | 0.0 | - |
| 2.3601 | 60800 | 0.0 | - |
| 2.3678 | 61000 | 0.0 | - |
| 2.3756 | 61200 | 0.0 | - |
| 2.3834 | 61400 | 0.0 | - |
| 2.3911 | 61600 | 0.0 | - |
| 2.3989 | 61800 | 0.0 | - |
| 2.4067 | 62000 | 0.0005 | - |
| 2.4144 | 62200 | 0.0 | - |
| 2.4222 | 62400 | 0.0 | - |
| 2.4299 | 62600 | 0.0 | - |
| 2.4377 | 62800 | 0.0 | - |
| 2.4455 | 63000 | 0.0 | - |
| 2.4532 | 63200 | 0.0 | - |
| 2.4610 | 63400 | 0.0 | - |
| 2.4688 | 63600 | 0.0 | - |
| 2.4765 | 63800 | 0.0 | - |
| 2.4843 | 64000 | 0.0 | - |
| 2.4921 | 64200 | 0.0 | - |
| 2.4998 | 64400 | 0.0 | - |
| 2.5076 | 64600 | 0.0 | - |
| 2.5153 | 64800 | 0.0 | - |
| 2.5231 | 65000 | 0.0 | - |
| 2.5309 | 65200 | 0.0 | - |
| 2.5386 | 65400 | 0.0 | - |
| 2.5464 | 65600 | 0.0004 | - |
| 2.5542 | 65800 | 0.0 | - |
| 2.5619 | 66000 | 0.0 | - |
| 2.5697 | 66200 | 0.0 | - |
| 2.5775 | 66400 | 0.0 | - |
| 2.5852 | 66600 | 0.0 | - |
| 2.5930 | 66800 | 0.0 | - |
| 2.6007 | 67000 | 0.0 | - |
| 2.6085 | 67200 | 0.0 | - |
| 2.6163 | 67400 | 0.0 | - |
| 2.6240 | 67600 | 0.0 | - |
| 2.6318 | 67800 | 0.0 | - |
| 2.6396 | 68000 | 0.0 | - |
| 2.6473 | 68200 | 0.0 | - |
| 2.6551 | 68400 | 0.0 | - |
| 2.6629 | 68600 | 0.0 | - |
| 2.6706 | 68800 | 0.0004 | - |
| 2.6784 | 69000 | 0.0 | - |
| 2.6861 | 69200 | 0.0 | - |
| 2.6939 | 69400 | 0.0 | - |
| 2.7017 | 69600 | 0.0004 | - |
| 2.7094 | 69800 | 0.0004 | - |
| 2.7172 | 70000 | 0.0 | - |
| 2.7250 | 70200 | 0.0 | - |
| 2.7327 | 70400 | 0.0 | - |
| 2.7405 | 70600 | 0.0 | - |
| 2.7483 | 70800 | 0.0 | - |
| 2.7560 | 71000 | 0.0004 | - |
| 2.7638 | 71200 | 0.0 | - |
| 2.7715 | 71400 | 0.0 | - |
| 2.7793 | 71600 | 0.0 | - |
| 2.7871 | 71800 | 0.0 | - |
| 2.7948 | 72000 | 0.0 | - |
| 2.8026 | 72200 | 0.0 | - |
| 2.8104 | 72400 | 0.0 | - |
| 2.8181 | 72600 | 0.0 | - |
| 2.8259 | 72800 | 0.0 | - |
| 2.8337 | 73000 | 0.0004 | - |
| 2.8414 | 73200 | 0.0 | - |
| 2.8492 | 73400 | 0.0 | - |
| 2.8569 | 73600 | 0.0 | - |
| 2.8647 | 73800 | 0.0004 | - |
| 2.8725 | 74000 | 0.0 | - |
| 2.8802 | 74200 | 0.0 | - |
| 2.8880 | 74400 | 0.0 | - |
| 2.8958 | 74600 | 0.0 | - |
| 2.9035 | 74800 | 0.0 | - |
| 2.9113 | 75000 | 0.0 | - |
| 2.9191 | 75200 | 0.0 | - |
| 2.9268 | 75400 | 0.0004 | - |
| 2.9346 | 75600 | 0.0 | - |
| 2.9423 | 75800 | 0.0 | - |
| 2.9501 | 76000 | 0.0 | - |
| 2.9579 | 76200 | 0.0 | - |
| 2.9656 | 76400 | 0.0 | - |
| 2.9734 | 76600 | 0.0004 | - |
| 2.9812 | 76800 | 0.0 | - |
| 2.9889 | 77000 | 0.0 | - |
| 2.9967 | 77200 | 0.0 | - |
| 3.0 | 77286 | - | 0.0000 |
@misc{warner2024smarterbetterfasterlonger,
title={Smarter, Better, Faster, Longer: A Modern Bidirectional Encoder for Fast, Memory Efficient, and Long Context Finetuning and Inference},
author={Benjamin Warner and Antoine Chaffin and Benjamin Clavié and Orion Weller and Oskar Hallström and Said Taghadouini and Alexis Gallagher and Raja Biswas and Faisal Ladhak and Tom Aarsen and Nathan Cooper and Griffin Adams and Jeremy Howard and Iacopo Poli},
year={2024},
eprint={2412.13663},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2412.13663},
}
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
@misc{henderson2017efficient,
title={Efficient Natural Language Response Suggestion for Smart Reply},
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
year={2017},
eprint={1705.00652},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
Base model
answerdotai/ModernBERT-base
from sentence_transformers import SentenceTransformer model = SentenceTransformer("juanwisz/modernbert-python-code-retrieval") sentences = [ "Clip off all parts from all bounding boxes that are outside of the image.\n\n Returns\n -------\n imgaug.BoundingBoxesOnImage\n Bounding boxes, clipped to fall within the image dimensions.", "def model_best(y1, y2, samples=1000, progressbar=True):\n \"\"\"\n Bayesian Estimation Supersedes the T-Test\n\n This model runs a Bayesian hypothesis comparing if y1 and y2 come\n from the same distribution. Returns are assumed to be T-distributed.\n\n In addition, computes annual volatility and Sharpe of in and\n out-of-sample periods.\n\n This model replicates the example used in:\n Kruschke, John. (2012) Bayesian estimation supersedes the t\n test. Journal of Experimental Psychology: General.\n\n Parameters\n ----------\n y1 : array-like\n Array of returns (e.g. in-sample)\n y2 : array-like\n Array of returns (e.g. out-of-sample)\n samples : int, optional\n Number of posterior samples to draw.\n\n Returns\n -------\n model : pymc.Model object\n PyMC3 model containing all random variables.\n trace : pymc3.sampling.BaseTrace object\n A PyMC3 trace object that contains samples for each parameter\n of the posterior.\n\n See Also\n --------\n plot_stoch_vol : plotting of tochastic volatility model\n \"\"\"\n\n y = np.concatenate((y1, y2))\n\n mu_m = np.mean(y)\n mu_p = 0.000001 * 1 / np.std(y)**2\n\n sigma_low = np.std(y) / 1000\n sigma_high = np.std(y) * 1000\n with pm.Model() as model:\n group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p,\n testval=y1.mean())\n group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p,\n testval=y2.mean())\n group1_std = pm.Uniform('group1_std', lower=sigma_low,\n upper=sigma_high, testval=y1.std())\n group2_std = pm.Uniform('group2_std', lower=sigma_low,\n upper=sigma_high, testval=y2.std())\n nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2.\n\n returns_group1 = pm.StudentT('group1', nu=nu, mu=group1_mean,\n lam=group1_std**-2, observed=y1)\n returns_group2 = pm.StudentT('group2', nu=nu, mu=group2_mean,\n lam=group2_std**-2, observed=y2)\n\n diff_of_means = pm.Deterministic('difference of means',\n group2_mean - group1_mean)\n pm.Deterministic('difference of stds',\n group2_std - group1_std)\n pm.Deterministic('effect size', diff_of_means /\n pm.math.sqrt((group1_std**2 +\n group2_std**2) / 2))\n\n pm.Deterministic('group1_annual_volatility',\n returns_group1.distribution.variance**.5 *\n np.sqrt(252))\n pm.Deterministic('group2_annual_volatility',\n returns_group2.distribution.variance**.5 *\n np.sqrt(252))\n\n pm.Deterministic('group1_sharpe', returns_group1.distribution.mean /\n returns_group1.distribution.variance**.5 *\n np.sqrt(252))\n pm.Deterministic('group2_sharpe', returns_group2.distribution.mean /\n returns_group2.distribution.variance**.5 *\n np.sqrt(252))\n\n trace = pm.sample(samples, progressbar=progressbar)\n return model, trace", "def clip_out_of_image(self):\n \"\"\"\n Clip off all parts from all bounding boxes that are outside of the image.\n\n Returns\n -------\n imgaug.BoundingBoxesOnImage\n Bounding boxes, clipped to fall within the image dimensions.\n\n \"\"\"\n bbs_cut = [bb.clip_out_of_image(self.shape)\n for bb in self.bounding_boxes if bb.is_partly_within_image(self.shape)]\n return BoundingBoxesOnImage(bbs_cut, shape=self.shape)", "def _initPermanence(self, potential, connectedPct):\n \"\"\"\n Initializes the permanences of a column. The method\n returns a 1-D array the size of the input, where each entry in the\n array represents the initial permanence value between the input bit\n at the particular index in the array, and the column represented by\n the 'index' parameter.\n\n Parameters:\n ----------------------------\n :param potential: A numpy array specifying the potential pool of the column.\n Permanence values will only be generated for input bits\n corresponding to indices for which the mask value is 1.\n :param connectedPct: A value between 0 or 1 governing the chance, for each\n permanence, that the initial permanence value will\n be a value that is considered connected.\n \"\"\"\n # Determine which inputs bits will start out as connected\n # to the inputs. Initially a subset of the input bits in a\n # column's potential pool will be connected. This number is\n # given by the parameter \"connectedPct\"\n perm = numpy.zeros(self._numInputs, dtype=realDType)\n for i in xrange(self._numInputs):\n if (potential[i] < 1):\n continue\n\n if (self._random.getReal64() <= connectedPct):\n perm[i] = self._initPermConnected()\n else:\n perm[i] = self._initPermNonConnected()\n\n # Clip off low values. Since we use a sparse representation\n # to store the permanence values this helps reduce memory\n # requirements.\n perm[perm < self._synPermTrimThreshold] = 0\n\n return perm" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4]