Text Generation
Transformers
Safetensors
llama
text-generation-inference
8-bit precision
bitsandbytes
Instructions to use Chat-Error/7B-XXL with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Chat-Error/7B-XXL with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Chat-Error/7B-XXL")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Chat-Error/7B-XXL") model = AutoModelForCausalLM.from_pretrained("Chat-Error/7B-XXL") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Chat-Error/7B-XXL with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Chat-Error/7B-XXL" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Chat-Error/7B-XXL", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/Chat-Error/7B-XXL
- SGLang
How to use Chat-Error/7B-XXL with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Chat-Error/7B-XXL" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Chat-Error/7B-XXL", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Chat-Error/7B-XXL" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Chat-Error/7B-XXL", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use Chat-Error/7B-XXL with Docker Model Runner:
docker model run hf.co/Chat-Error/7B-XXL
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2503401360544218, | |
| "eval_steps": 500, | |
| "global_step": 1058, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-06, | |
| "loss": 2.2147, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1e-05, | |
| "loss": 2.4802, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5e-05, | |
| "loss": 2.0475, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6983, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.6212, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3e-05, | |
| "loss": 2.0843, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.8442, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4e-05, | |
| "loss": 1.3359, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.5e-05, | |
| "loss": 2.3761, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6338, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999999826891235e-05, | |
| "loss": 1.7487, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999999307564964e-05, | |
| "loss": 2.0834, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9999984420212596e-05, | |
| "loss": 1.5794, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999997230260242e-05, | |
| "loss": 1.7222, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9999956722820765e-05, | |
| "loss": 1.4002, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999993768086981e-05, | |
| "loss": 1.7314, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999991517675219e-05, | |
| "loss": 1.7053, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999988921047102e-05, | |
| "loss": 1.9546, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.99998597820299e-05, | |
| "loss": 1.6122, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9999826891432896e-05, | |
| "loss": 1.7275, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999979053868456e-05, | |
| "loss": 1.8279, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999750723789936e-05, | |
| "loss": 1.6764, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999707446754546e-05, | |
| "loss": 1.7501, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999966070758437e-05, | |
| "loss": 1.6349, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999961050628588e-05, | |
| "loss": 1.6796, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999556842866034e-05, | |
| "loss": 1.6249, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999949971733225e-05, | |
| "loss": 2.0866, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999943912969247e-05, | |
| "loss": 1.6658, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999937507995506e-05, | |
| "loss": 1.7794, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999930756812889e-05, | |
| "loss": 1.743, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999923659422332e-05, | |
| "loss": 2.1359, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999916215824817e-05, | |
| "loss": 1.7393, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999908426021375e-05, | |
| "loss": 1.6387, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999900290013085e-05, | |
| "loss": 1.974, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999891807801075e-05, | |
| "loss": 1.7787, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9998829793865176e-05, | |
| "loss": 2.1738, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999873804770636e-05, | |
| "loss": 1.73, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999864283954701e-05, | |
| "loss": 1.9363, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999854416940032e-05, | |
| "loss": 1.248, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999844203727993e-05, | |
| "loss": 2.0461, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999833644320002e-05, | |
| "loss": 2.1294, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999822738717518e-05, | |
| "loss": 1.3963, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9998114869220525e-05, | |
| "loss": 1.7942, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999799888935164e-05, | |
| "loss": 1.4804, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.99978794475846e-05, | |
| "loss": 1.4482, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999775654393591e-05, | |
| "loss": 1.0604, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9997630178422624e-05, | |
| "loss": 1.6902, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999750035106222e-05, | |
| "loss": 1.4982, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9997367061872694e-05, | |
| "loss": 2.0546, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.99972303108725e-05, | |
| "loss": 1.581, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999709009808057e-05, | |
| "loss": 1.4683, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996946423516326e-05, | |
| "loss": 1.9496, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996799287199666e-05, | |
| "loss": 1.3316, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996648689150966e-05, | |
| "loss": 2.0274, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996494629391076e-05, | |
| "loss": 2.1621, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999633710794135e-05, | |
| "loss": 1.6314, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999617612482358e-05, | |
| "loss": 1.8074, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996011680060064e-05, | |
| "loss": 1.8467, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999584377367359e-05, | |
| "loss": 1.2585, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.99956724056874e-05, | |
| "loss": 1.8106, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9995497576125225e-05, | |
| "loss": 1.7955, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999531928501128e-05, | |
| "loss": 1.9608, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.999513753237025e-05, | |
| "loss": 2.1124, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999495231822732e-05, | |
| "loss": 1.8793, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999476364260812e-05, | |
| "loss": 1.6794, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.99945715055388e-05, | |
| "loss": 1.7991, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999437590704595e-05, | |
| "loss": 2.0194, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999417684715668e-05, | |
| "loss": 1.4938, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999397432589853e-05, | |
| "loss": 1.9159, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999376834329956e-05, | |
| "loss": 1.3969, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9993558899388305e-05, | |
| "loss": 1.727, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999334599419375e-05, | |
| "loss": 1.8063, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.99931296277454e-05, | |
| "loss": 1.9437, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999290980007321e-05, | |
| "loss": 1.5978, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9992686511207614e-05, | |
| "loss": 1.9653, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9992459761179545e-05, | |
| "loss": 2.2229, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999222955002041e-05, | |
| "loss": 1.7991, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9991995877762074e-05, | |
| "loss": 2.0293, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999175874443692e-05, | |
| "loss": 1.5131, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9991518150077765e-05, | |
| "loss": 1.41, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999127409471794e-05, | |
| "loss": 2.0329, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9991026578391245e-05, | |
| "loss": 1.7566, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999077560113196e-05, | |
| "loss": 2.1238, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9990521162974824e-05, | |
| "loss": 1.4365, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999026326395509e-05, | |
| "loss": 1.7559, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999000190410848e-05, | |
| "loss": 2.3886, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998973708347116e-05, | |
| "loss": 1.5783, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998946880207983e-05, | |
| "loss": 2.0597, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998919705997164e-05, | |
| "loss": 1.684, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998892185718422e-05, | |
| "loss": 1.5005, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998864319375568e-05, | |
| "loss": 1.8982, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998836106972461e-05, | |
| "loss": 2.4997, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998807548513008e-05, | |
| "loss": 1.9038, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998778644001165e-05, | |
| "loss": 1.4326, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998749393440933e-05, | |
| "loss": 1.9809, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998719796836366e-05, | |
| "loss": 1.727, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9986898541915595e-05, | |
| "loss": 1.5198, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998659565510662e-05, | |
| "loss": 1.7004, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998628930797866e-05, | |
| "loss": 1.7584, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9985979500574166e-05, | |
| "loss": 1.7247, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998566623293603e-05, | |
| "loss": 1.7967, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998534950510764e-05, | |
| "loss": 1.282, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9985029317132845e-05, | |
| "loss": 1.7519, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998470566905601e-05, | |
| "loss": 1.8924, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9984378560921937e-05, | |
| "loss": 1.411, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9984047992775926e-05, | |
| "loss": 1.7251, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9983713964663776e-05, | |
| "loss": 1.6784, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998337647663173e-05, | |
| "loss": 1.9045, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998303552872652e-05, | |
| "loss": 1.1327, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998269112099538e-05, | |
| "loss": 1.6382, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998234325348599e-05, | |
| "loss": 1.8228, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998199192624654e-05, | |
| "loss": 1.9773, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998163713932567e-05, | |
| "loss": 1.6035, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998127889277252e-05, | |
| "loss": 2.3095, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998091718663671e-05, | |
| "loss": 1.4727, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998055202096832e-05, | |
| "loss": 1.8166, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.998018339581792e-05, | |
| "loss": 1.2842, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997981131123657e-05, | |
| "loss": 1.8485, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997943576727579e-05, | |
| "loss": 1.1914, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997905676398759e-05, | |
| "loss": 1.9409, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997867430142446e-05, | |
| "loss": 2.3757, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997828837963937e-05, | |
| "loss": 1.6337, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997789899868575e-05, | |
| "loss": 1.5789, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9977506158617535e-05, | |
| "loss": 1.61, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9977109859489133e-05, | |
| "loss": 1.3976, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997671010135542e-05, | |
| "loss": 1.3944, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997630688427176e-05, | |
| "loss": 1.5922, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9975900208293984e-05, | |
| "loss": 2.0075, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997549007347842e-05, | |
| "loss": 1.4363, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9975076479881864e-05, | |
| "loss": 1.9972, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.99746594275616e-05, | |
| "loss": 1.8052, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997423891657538e-05, | |
| "loss": 1.9088, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997381494698143e-05, | |
| "loss": 1.7322, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9973387518838474e-05, | |
| "loss": 1.6391, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9972956632205704e-05, | |
| "loss": 1.8346, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997252228714279e-05, | |
| "loss": 1.9651, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997208448370988e-05, | |
| "loss": 1.5544, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997164322196762e-05, | |
| "loss": 1.4443, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.99711985019771e-05, | |
| "loss": 2.1679, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997075032379992e-05, | |
| "loss": 1.6557, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997029868749813e-05, | |
| "loss": 1.5759, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.99698435931343e-05, | |
| "loss": 1.8057, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9969385040771445e-05, | |
| "loss": 1.1004, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.996892303047306e-05, | |
| "loss": 1.6915, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.996845756230314e-05, | |
| "loss": 1.7905, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9967988636326136e-05, | |
| "loss": 2.0294, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.996751625260699e-05, | |
| "loss": 1.9933, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996704041121113e-05, | |
| "loss": 1.5048, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996656111220443e-05, | |
| "loss": 2.0881, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996607835565331e-05, | |
| "loss": 1.3425, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9965592141624586e-05, | |
| "loss": 1.3072, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9965102470185596e-05, | |
| "loss": 2.5566, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9964609341404175e-05, | |
| "loss": 1.3695, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.99641127553486e-05, | |
| "loss": 2.0205, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9963612712087646e-05, | |
| "loss": 2.1791, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996310921169056e-05, | |
| "loss": 1.6322, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996260225422707e-05, | |
| "loss": 1.8695, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9962091839767386e-05, | |
| "loss": 2.1406, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.99615779683822e-05, | |
| "loss": 1.7668, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996106064014265e-05, | |
| "loss": 1.6439, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9960539855120415e-05, | |
| "loss": 1.5069, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.996001561338759e-05, | |
| "loss": 1.9099, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995948791501679e-05, | |
| "loss": 1.5446, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9958956760081085e-05, | |
| "loss": 1.8427, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9958422148654045e-05, | |
| "loss": 1.4109, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995788408080969e-05, | |
| "loss": 1.7544, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9957342556622557e-05, | |
| "loss": 1.2727, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995679757616762e-05, | |
| "loss": 1.4266, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995624913952036e-05, | |
| "loss": 1.3209, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995569724675673e-05, | |
| "loss": 1.5993, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995514189795316e-05, | |
| "loss": 1.8296, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9954583093186544e-05, | |
| "loss": 1.5827, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9954020832534296e-05, | |
| "loss": 1.6316, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9953455116074263e-05, | |
| "loss": 1.7216, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9952885943884795e-05, | |
| "loss": 1.7878, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995231331604472e-05, | |
| "loss": 1.7959, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995173723263332e-05, | |
| "loss": 1.4636, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.99511576937304e-05, | |
| "loss": 1.365, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.995057469941621e-05, | |
| "loss": 1.9666, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994998824977147e-05, | |
| "loss": 1.4686, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994939834487742e-05, | |
| "loss": 1.4651, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994880498481575e-05, | |
| "loss": 1.8047, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9948208169668606e-05, | |
| "loss": 1.779, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9947607899518675e-05, | |
| "loss": 1.172, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994700417444908e-05, | |
| "loss": 1.6413, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9946396994543405e-05, | |
| "loss": 2.0329, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994578635988576e-05, | |
| "loss": 1.8365, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9945172270560695e-05, | |
| "loss": 1.8724, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.994455472665327e-05, | |
| "loss": 1.7607, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9943933728248996e-05, | |
| "loss": 1.6619, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994330927543387e-05, | |
| "loss": 1.9951, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994268136829438e-05, | |
| "loss": 1.5133, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994205000691747e-05, | |
| "loss": 2.238, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994141519139058e-05, | |
| "loss": 1.6153, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.994077692180164e-05, | |
| "loss": 1.2494, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9940135198239024e-05, | |
| "loss": 1.7389, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9939490020791614e-05, | |
| "loss": 1.2335, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993884138954875e-05, | |
| "loss": 1.4512, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993818930460026e-05, | |
| "loss": 1.8298, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993753376603645e-05, | |
| "loss": 1.333, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9936874773948094e-05, | |
| "loss": 1.8395, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993621232842648e-05, | |
| "loss": 1.7438, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993554642956332e-05, | |
| "loss": 1.2777, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993487707745086e-05, | |
| "loss": 1.5628, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9934204272181764e-05, | |
| "loss": 1.6458, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9933528013849235e-05, | |
| "loss": 1.3878, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993284830254691e-05, | |
| "loss": 1.6489, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.993216513836893e-05, | |
| "loss": 1.4492, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9931478521409895e-05, | |
| "loss": 1.598, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.99307884517649e-05, | |
| "loss": 1.9357, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9930094929529506e-05, | |
| "loss": 1.4987, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992939795479976e-05, | |
| "loss": 1.6394, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992869752767218e-05, | |
| "loss": 2.1398, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992799364824377e-05, | |
| "loss": 1.4962, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992728631661201e-05, | |
| "loss": 1.1668, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9926575532874847e-05, | |
| "loss": 1.3898, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9925861297130724e-05, | |
| "loss": 1.828, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992514360947855e-05, | |
| "loss": 1.4453, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9924422470017715e-05, | |
| "loss": 1.774, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992369787884809e-05, | |
| "loss": 2.0532, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992296983607002e-05, | |
| "loss": 1.881, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992223834178433e-05, | |
| "loss": 1.7959, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992150339609232e-05, | |
| "loss": 2.1218, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992076499909578e-05, | |
| "loss": 1.9065, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.992002315089695e-05, | |
| "loss": 1.783, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9919277851598575e-05, | |
| "loss": 1.5766, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991852910130388e-05, | |
| "loss": 1.2045, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9917776900116556e-05, | |
| "loss": 1.7663, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991702124814075e-05, | |
| "loss": 1.6417, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991626214548113e-05, | |
| "loss": 1.6406, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991549959224282e-05, | |
| "loss": 1.9329, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.991473358853142e-05, | |
| "loss": 2.0136, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9913964134453014e-05, | |
| "loss": 1.4504, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9913191230114156e-05, | |
| "loss": 1.9222, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.991241487562189e-05, | |
| "loss": 1.9192, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.991163507108373e-05, | |
| "loss": 1.5397, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.991085181660766e-05, | |
| "loss": 2.1056, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9910065112302175e-05, | |
| "loss": 1.8552, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9909274958276185e-05, | |
| "loss": 1.5675, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990848135463915e-05, | |
| "loss": 1.7246, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990768430150096e-05, | |
| "loss": 1.6873, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9906883798971995e-05, | |
| "loss": 2.0968, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9906079847163115e-05, | |
| "loss": 1.662, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990527244618566e-05, | |
| "loss": 2.1308, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990446159615144e-05, | |
| "loss": 1.6716, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9903647297172764e-05, | |
| "loss": 1.833, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990282954936237e-05, | |
| "loss": 2.0275, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990200835283353e-05, | |
| "loss": 1.7828, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990118370769997e-05, | |
| "loss": 1.8333, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.990035561407588e-05, | |
| "loss": 1.5876, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989952407207594e-05, | |
| "loss": 1.6541, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989868908181532e-05, | |
| "loss": 1.9533, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989785064340965e-05, | |
| "loss": 2.1998, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989700875697503e-05, | |
| "loss": 1.2615, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9896163422628076e-05, | |
| "loss": 2.2554, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9895314640485835e-05, | |
| "loss": 1.6735, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9894462410665856e-05, | |
| "loss": 1.6169, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989360673328617e-05, | |
| "loss": 0.9541, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989274760846527e-05, | |
| "loss": 1.4256, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989188503632213e-05, | |
| "loss": 1.576, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.989101901697621e-05, | |
| "loss": 1.315, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9890149550547454e-05, | |
| "loss": 1.6165, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988927663715626e-05, | |
| "loss": 1.7303, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9888400276923505e-05, | |
| "loss": 1.389, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9887520469970574e-05, | |
| "loss": 1.6816, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9886637216419295e-05, | |
| "loss": 1.7488, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988575051639199e-05, | |
| "loss": 1.3843, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9884860370011453e-05, | |
| "loss": 2.1209, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988396677740097e-05, | |
| "loss": 1.7382, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9883069738684286e-05, | |
| "loss": 1.3885, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988216925398562e-05, | |
| "loss": 1.8881, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988126532342968e-05, | |
| "loss": 2.1026, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9880357947141664e-05, | |
| "loss": 1.7111, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9879447125247215e-05, | |
| "loss": 1.3994, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9878532857872476e-05, | |
| "loss": 1.9436, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9877615145144055e-05, | |
| "loss": 1.2814, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9876693987189054e-05, | |
| "loss": 1.5255, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.987576938413504e-05, | |
| "loss": 1.7781, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9874841336110044e-05, | |
| "loss": 1.834, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.987390984324261e-05, | |
| "loss": 1.0663, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9872974905661726e-05, | |
| "loss": 1.8486, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9872036523496866e-05, | |
| "loss": 1.4602, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9871094696877995e-05, | |
| "loss": 2.1106, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.987014942593553e-05, | |
| "loss": 1.1824, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986920071080039e-05, | |
| "loss": 1.5234, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9868248551603945e-05, | |
| "loss": 1.2072, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986729294847807e-05, | |
| "loss": 1.961, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986633390155511e-05, | |
| "loss": 0.9717, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986537141096786e-05, | |
| "loss": 1.7839, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986440547684963e-05, | |
| "loss": 2.0075, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986343609933418e-05, | |
| "loss": 1.4374, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986246327855576e-05, | |
| "loss": 1.3924, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986148701464909e-05, | |
| "loss": 1.5884, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.986050730774937e-05, | |
| "loss": 1.6415, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985952415799228e-05, | |
| "loss": 1.8095, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9858537565513976e-05, | |
| "loss": 1.9536, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985754753045107e-05, | |
| "loss": 1.1484, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9856554052940705e-05, | |
| "loss": 1.6013, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9855557133120436e-05, | |
| "loss": 1.5254, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985455677112832e-05, | |
| "loss": 1.9474, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985355296710291e-05, | |
| "loss": 1.816, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985254572118321e-05, | |
| "loss": 1.451, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.985153503350872e-05, | |
| "loss": 1.353, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98505209042194e-05, | |
| "loss": 1.6463, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98495033334557e-05, | |
| "loss": 1.8436, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984848232135853e-05, | |
| "loss": 1.5973, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984745786806929e-05, | |
| "loss": 1.6524, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984642997372987e-05, | |
| "loss": 1.499, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984539863848259e-05, | |
| "loss": 1.8387, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98443638624703e-05, | |
| "loss": 2.1306, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.98433256458363e-05, | |
| "loss": 1.9113, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984228398872436e-05, | |
| "loss": 1.7941, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984123889127874e-05, | |
| "loss": 1.957, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.984019035364418e-05, | |
| "loss": 1.5927, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.983913837596588e-05, | |
| "loss": 1.6275, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.983808295838953e-05, | |
| "loss": 1.5985, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.983702410106128e-05, | |
| "loss": 1.5638, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.983596180412778e-05, | |
| "loss": 1.431, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.983489606773615e-05, | |
| "loss": 1.6035, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9833826892033966e-05, | |
| "loss": 2.0482, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.98327542771693e-05, | |
| "loss": 1.7167, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9831678223290705e-05, | |
| "loss": 1.2944, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9830598730547185e-05, | |
| "loss": 1.6265, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.982951579908824e-05, | |
| "loss": 1.6205, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.982842942906386e-05, | |
| "loss": 2.1644, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9827339620624466e-05, | |
| "loss": 1.4672, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9826246373920994e-05, | |
| "loss": 1.6029, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9825149689104846e-05, | |
| "loss": 1.5438, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.98240495663279e-05, | |
| "loss": 1.9061, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9822946005742497e-05, | |
| "loss": 1.8342, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9821839007501484e-05, | |
| "loss": 1.8016, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9820728571758155e-05, | |
| "loss": 1.6384, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9819614698666295e-05, | |
| "loss": 1.4692, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9818497388380154e-05, | |
| "loss": 1.4886, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9817376641054466e-05, | |
| "loss": 1.4903, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981625245684445e-05, | |
| "loss": 2.093, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981512483590578e-05, | |
| "loss": 1.9255, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981399377839463e-05, | |
| "loss": 1.6346, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981285928446762e-05, | |
| "loss": 1.6704, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.981172135428188e-05, | |
| "loss": 0.981, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9810579987994974e-05, | |
| "loss": 1.7046, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9809435185765e-05, | |
| "loss": 1.6879, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980828694775046e-05, | |
| "loss": 2.1196, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980713527411041e-05, | |
| "loss": 1.6099, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9805980165004304e-05, | |
| "loss": 1.6977, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980482162059213e-05, | |
| "loss": 1.8916, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980365964103434e-05, | |
| "loss": 2.0914, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980249422649183e-05, | |
| "loss": 1.268, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.980132537712602e-05, | |
| "loss": 1.7568, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9800153093098756e-05, | |
| "loss": 1.3604, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9798977374572395e-05, | |
| "loss": 1.5185, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979779822170977e-05, | |
| "loss": 1.8898, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9796615634674155e-05, | |
| "loss": 2.0664, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979542961362934e-05, | |
| "loss": 1.4639, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9794240158739566e-05, | |
| "loss": 1.2679, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9793047270169566e-05, | |
| "loss": 1.6697, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979185094808453e-05, | |
| "loss": 1.3827, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979065119265013e-05, | |
| "loss": 1.471, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9789448004032533e-05, | |
| "loss": 2.0245, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978824138239835e-05, | |
| "loss": 1.6898, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978703132791469e-05, | |
| "loss": 1.8281, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978581784074913e-05, | |
| "loss": 1.8089, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.978460092106971e-05, | |
| "loss": 1.6343, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9783380569044974e-05, | |
| "loss": 2.2785, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9782156784843916e-05, | |
| "loss": 1.9567, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9780929568636015e-05, | |
| "loss": 1.3461, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.977969892059123e-05, | |
| "loss": 1.4954, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.977846484087998e-05, | |
| "loss": 1.305, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9777227329673174e-05, | |
| "loss": 1.8285, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.977598638714219e-05, | |
| "loss": 1.4243, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.97747420134589e-05, | |
| "loss": 1.7667, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9773494208795604e-05, | |
| "loss": 1.9451, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9772242973325125e-05, | |
| "loss": 1.5445, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9770988307220736e-05, | |
| "loss": 1.6328, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976973021065619e-05, | |
| "loss": 1.6148, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976846868380572e-05, | |
| "loss": 1.3625, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976720372684404e-05, | |
| "loss": 1.4694, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9765935339946326e-05, | |
| "loss": 1.5375, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976466352328822e-05, | |
| "loss": 1.805, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976338827704586e-05, | |
| "loss": 1.8427, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976210960139586e-05, | |
| "loss": 1.9649, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.976082749651529e-05, | |
| "loss": 1.4552, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9759541962581715e-05, | |
| "loss": 1.7111, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975825299977315e-05, | |
| "loss": 2.0492, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9756960608268104e-05, | |
| "loss": 2.4812, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975566478824556e-05, | |
| "loss": 1.3533, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975436553988498e-05, | |
| "loss": 1.7609, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9753062863366276e-05, | |
| "loss": 1.3683, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.975175675886986e-05, | |
| "loss": 1.5215, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9750447226576617e-05, | |
| "loss": 1.2101, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9749134266667894e-05, | |
| "loss": 1.5135, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9747817879325514e-05, | |
| "loss": 1.9498, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9746498064731786e-05, | |
| "loss": 1.9672, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9745174823069486e-05, | |
| "loss": 2.1517, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9743848154521863e-05, | |
| "loss": 1.411, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.974251805927266e-05, | |
| "loss": 1.3972, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.974118453750605e-05, | |
| "loss": 1.6883, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973984758940672e-05, | |
| "loss": 1.4422, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973850721515983e-05, | |
| "loss": 1.5046, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973716341495099e-05, | |
| "loss": 1.9012, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.973581618896631e-05, | |
| "loss": 1.6414, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.973446553739236e-05, | |
| "loss": 1.4029, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.973311146041619e-05, | |
| "loss": 1.3923, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9731753958225316e-05, | |
| "loss": 0.9104, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.973039303100773e-05, | |
| "loss": 1.4314, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972902867895191e-05, | |
| "loss": 1.5725, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972766090224681e-05, | |
| "loss": 1.8915, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972628970108183e-05, | |
| "loss": 2.0525, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972491507564688e-05, | |
| "loss": 1.4409, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9723537026132315e-05, | |
| "loss": 1.4878, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972215555272899e-05, | |
| "loss": 1.6305, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.972077065562821e-05, | |
| "loss": 1.6004, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971938233502178e-05, | |
| "loss": 1.6837, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971799059110195e-05, | |
| "loss": 1.6939, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971659542406145e-05, | |
| "loss": 1.6326, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971519683409352e-05, | |
| "loss": 2.0437, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9713794821391825e-05, | |
| "loss": 1.8919, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9712389386150535e-05, | |
| "loss": 1.4444, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.971098052856428e-05, | |
| "loss": 1.7376, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970956824882816e-05, | |
| "loss": 1.2145, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970815254713779e-05, | |
| "loss": 1.7657, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970673342368919e-05, | |
| "loss": 1.4632, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.97053108786789e-05, | |
| "loss": 2.0926, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.970388491230393e-05, | |
| "loss": 1.6105, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9702455524761764e-05, | |
| "loss": 0.8295, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9701022716250346e-05, | |
| "loss": 1.5312, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9699586486968094e-05, | |
| "loss": 1.2171, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.969814683711391e-05, | |
| "loss": 1.6475, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9696703766887174e-05, | |
| "loss": 1.3856, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9695257276487736e-05, | |
| "loss": 1.413, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9693807366115905e-05, | |
| "loss": 1.2724, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.969235403597248e-05, | |
| "loss": 2.0263, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.969089728625873e-05, | |
| "loss": 1.6963, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968943711717638e-05, | |
| "loss": 1.6354, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968797352892768e-05, | |
| "loss": 1.7197, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968650652171528e-05, | |
| "loss": 1.7091, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9685036095742365e-05, | |
| "loss": 2.0653, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968356225121256e-05, | |
| "loss": 1.8299, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968208498832997e-05, | |
| "loss": 2.0181, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968060430729918e-05, | |
| "loss": 1.7343, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.967912020832526e-05, | |
| "loss": 1.9446, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.967763269161372e-05, | |
| "loss": 1.4359, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.967614175737056e-05, | |
| "loss": 2.211, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967464740580227e-05, | |
| "loss": 1.5243, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967314963711579e-05, | |
| "loss": 1.0646, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967164845151855e-05, | |
| "loss": 1.9905, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.967014384921842e-05, | |
| "loss": 1.9194, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.96686358304238e-05, | |
| "loss": 1.5094, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966712439534351e-05, | |
| "loss": 1.8683, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9665609544186867e-05, | |
| "loss": 1.422, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966409127716367e-05, | |
| "loss": 1.8794, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966256959448416e-05, | |
| "loss": 1.8767, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.966104449635909e-05, | |
| "loss": 1.4035, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.965951598299965e-05, | |
| "loss": 1.9975, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9657984054617526e-05, | |
| "loss": 2.1903, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9656448711424876e-05, | |
| "loss": 1.2803, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9654909953634316e-05, | |
| "loss": 1.4093, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.965336778145895e-05, | |
| "loss": 1.5228, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.965182219511234e-05, | |
| "loss": 1.8209, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9650273194808546e-05, | |
| "loss": 1.7129, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964872078076207e-05, | |
| "loss": 1.9, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.96471649531879e-05, | |
| "loss": 1.5979, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964560571230151e-05, | |
| "loss": 1.6498, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964404305831883e-05, | |
| "loss": 2.0043, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964247699145626e-05, | |
| "loss": 1.7263, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.964090751193069e-05, | |
| "loss": 2.1116, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9639334619959464e-05, | |
| "loss": 2.2474, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963775831576041e-05, | |
| "loss": 1.6965, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963617859955183e-05, | |
| "loss": 1.7008, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963459547155249e-05, | |
| "loss": 1.7393, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.963300893198164e-05, | |
| "loss": 1.9297, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9631418981058974e-05, | |
| "loss": 1.6991, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9629825619004704e-05, | |
| "loss": 1.2928, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962822884603948e-05, | |
| "loss": 1.7254, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9626628662384434e-05, | |
| "loss": 1.9312, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962502506826117e-05, | |
| "loss": 1.5214, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962341806389176e-05, | |
| "loss": 1.3667, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9621807649498764e-05, | |
| "loss": 1.7049, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.962019382530521e-05, | |
| "loss": 1.7479, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961857659153456e-05, | |
| "loss": 1.5101, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961695594841082e-05, | |
| "loss": 1.3799, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961533189615839e-05, | |
| "loss": 2.4575, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961370443500221e-05, | |
| "loss": 1.5559, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961207356516765e-05, | |
| "loss": 1.5502, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961043928688056e-05, | |
| "loss": 1.3477, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.960880160036728e-05, | |
| "loss": 1.7157, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960716050585459e-05, | |
| "loss": 1.4801, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960551600356977e-05, | |
| "loss": 1.3484, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960386809374057e-05, | |
| "loss": 1.8751, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.960221677659519e-05, | |
| "loss": 1.6072, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9600562052362333e-05, | |
| "loss": 2.4259, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9598903921271135e-05, | |
| "loss": 1.9676, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.959724238355123e-05, | |
| "loss": 2.193, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.959557743943274e-05, | |
| "loss": 1.8709, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9593909089146224e-05, | |
| "loss": 1.5639, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.959223733292272e-05, | |
| "loss": 1.5942, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9590562170993755e-05, | |
| "loss": 2.474, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958888360359131e-05, | |
| "loss": 1.8667, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958720163094786e-05, | |
| "loss": 1.9014, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958551625329631e-05, | |
| "loss": 1.3199, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958382747087008e-05, | |
| "loss": 1.4193, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958213528390305e-05, | |
| "loss": 1.6693, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.958043969262955e-05, | |
| "loss": 1.3332, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957874069728441e-05, | |
| "loss": 1.3463, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957703829810292e-05, | |
| "loss": 1.3705, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9575332495320826e-05, | |
| "loss": 2.1182, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957362328917437e-05, | |
| "loss": 1.7346, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9571910679900255e-05, | |
| "loss": 1.3646, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.957019466773565e-05, | |
| "loss": 1.9329, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.95684752529182e-05, | |
| "loss": 1.8554, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9566752435686036e-05, | |
| "loss": 1.9678, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.956502621627773e-05, | |
| "loss": 1.9655, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.956329659493234e-05, | |
| "loss": 1.5136, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.95615635718894e-05, | |
| "loss": 1.5812, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955982714738892e-05, | |
| "loss": 2.4143, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9558087321671374e-05, | |
| "loss": 2.0219, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955634409497768e-05, | |
| "loss": 1.3464, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955459746754928e-05, | |
| "loss": 2.011, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.955284743962804e-05, | |
| "loss": 1.1818, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9551094011456335e-05, | |
| "loss": 1.9715, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954933718327697e-05, | |
| "loss": 2.2617, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954757695533326e-05, | |
| "loss": 2.0038, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954581332786896e-05, | |
| "loss": 1.2001, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954404630112833e-05, | |
| "loss": 1.1356, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954227587535606e-05, | |
| "loss": 1.6941, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9540502050797335e-05, | |
| "loss": 1.3234, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9538724827697814e-05, | |
| "loss": 2.1369, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.953694420630361e-05, | |
| "loss": 2.0145, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.953516018686133e-05, | |
| "loss": 1.5505, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.953337276961803e-05, | |
| "loss": 1.5479, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.953158195482124e-05, | |
| "loss": 2.4011, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.952978774271897e-05, | |
| "loss": 1.5701, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9527990133559684e-05, | |
| "loss": 1.576, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.952618912759235e-05, | |
| "loss": 1.7477, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9524384725066355e-05, | |
| "loss": 1.9253, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.952257692623161e-05, | |
| "loss": 1.5152, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.952076573133846e-05, | |
| "loss": 1.5489, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.951895114063773e-05, | |
| "loss": 1.792, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9517133154380724e-05, | |
| "loss": 1.4957, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9515311772819214e-05, | |
| "loss": 1.9255, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.951348699620542e-05, | |
| "loss": 1.5982, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9511658824792065e-05, | |
| "loss": 1.4355, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.950982725883232e-05, | |
| "loss": 1.4889, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9507992298579845e-05, | |
| "loss": 1.4814, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.950615394428874e-05, | |
| "loss": 1.7735, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9504312196213596e-05, | |
| "loss": 1.5049, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.950246705460949e-05, | |
| "loss": 1.516, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.950061851973193e-05, | |
| "loss": 1.805, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.949876659183692e-05, | |
| "loss": 1.3621, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.949691127118093e-05, | |
| "loss": 1.8475, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9495052558020896e-05, | |
| "loss": 2.3199, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.949319045261423e-05, | |
| "loss": 1.3384, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.949132495521881e-05, | |
| "loss": 1.4563, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.948945606609297e-05, | |
| "loss": 1.6799, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.948758378549554e-05, | |
| "loss": 1.7253, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.94857081136858e-05, | |
| "loss": 1.0717, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9483829050923514e-05, | |
| "loss": 1.8967, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9481946597468896e-05, | |
| "loss": 1.4919, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.948006075358266e-05, | |
| "loss": 1.4128, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.947817151952595e-05, | |
| "loss": 0.9676, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9476278895560406e-05, | |
| "loss": 2.0732, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9474382881948146e-05, | |
| "loss": 1.3808, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.947248347895172e-05, | |
| "loss": 1.4687, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.947058068683419e-05, | |
| "loss": 1.4168, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.946867450585906e-05, | |
| "loss": 1.352, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.946676493629031e-05, | |
| "loss": 1.8284, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9464851978392396e-05, | |
| "loss": 1.2706, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.946293563243023e-05, | |
| "loss": 1.4926, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.946101589866921e-05, | |
| "loss": 1.3684, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.945909277737519e-05, | |
| "loss": 1.4088, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.945716626881449e-05, | |
| "loss": 1.5532, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9455236373253924e-05, | |
| "loss": 1.7944, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9453303090960745e-05, | |
| "loss": 1.4292, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9451366422202684e-05, | |
| "loss": 2.0021, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.944942636724795e-05, | |
| "loss": 1.7356, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.944748292636522e-05, | |
| "loss": 1.6986, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.944553609982363e-05, | |
| "loss": 2.0781, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.944358588789279e-05, | |
| "loss": 2.2571, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.944163229084278e-05, | |
| "loss": 1.5924, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.943967530894415e-05, | |
| "loss": 2.0977, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.943771494246791e-05, | |
| "loss": 1.8041, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.943575119168555e-05, | |
| "loss": 1.6522, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9433784056869023e-05, | |
| "loss": 1.4012, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.943181353829076e-05, | |
| "loss": 1.3196, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9429839636223644e-05, | |
| "loss": 1.7751, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.942786235094103e-05, | |
| "loss": 1.3212, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9425881682716747e-05, | |
| "loss": 1.7491, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.94238976318251e-05, | |
| "loss": 1.7227, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.942191019854085e-05, | |
| "loss": 1.4114, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.941991938313924e-05, | |
| "loss": 2.0666, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.941792518589596e-05, | |
| "loss": 1.7138, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.941592760708718e-05, | |
| "loss": 1.7506, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9413926646989545e-05, | |
| "loss": 1.8036, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.941192230588015e-05, | |
| "loss": 1.357, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.94099145840366e-05, | |
| "loss": 1.5612, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9407903481736904e-05, | |
| "loss": 1.1086, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.940588899925959e-05, | |
| "loss": 1.217, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.940387113688363e-05, | |
| "loss": 1.6993, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9401849894888485e-05, | |
| "loss": 1.684, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.939982527355407e-05, | |
| "loss": 1.8285, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.939779727316075e-05, | |
| "loss": 1.28, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.939576589398939e-05, | |
| "loss": 1.6247, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9393731136321306e-05, | |
| "loss": 1.7047, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9391693000438286e-05, | |
| "loss": 1.67, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.93896514866226e-05, | |
| "loss": 2.0157, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9387606595156944e-05, | |
| "loss": 1.5195, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9385558326324534e-05, | |
| "loss": 1.9139, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9383506680409006e-05, | |
| "loss": 1.5393, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.938145165769451e-05, | |
| "loss": 2.1366, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9379393258465623e-05, | |
| "loss": 1.835, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.93773314830074e-05, | |
| "loss": 1.4716, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.93752663316054e-05, | |
| "loss": 1.8467, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.937319780454559e-05, | |
| "loss": 1.5434, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.937112590211445e-05, | |
| "loss": 1.5179, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.936905062459891e-05, | |
| "loss": 1.5824, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9366971972286356e-05, | |
| "loss": 1.8289, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.936488994546467e-05, | |
| "loss": 1.7236, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.936280454442218e-05, | |
| "loss": 2.3392, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.936071576944769e-05, | |
| "loss": 1.6376, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9358623620830456e-05, | |
| "loss": 1.7252, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9356528098860235e-05, | |
| "loss": 1.4184, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.93544292038272e-05, | |
| "loss": 1.4501, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9352326936022055e-05, | |
| "loss": 1.4233, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.935022129573591e-05, | |
| "loss": 1.6631, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9348112283260376e-05, | |
| "loss": 1.155, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.934599989888753e-05, | |
| "loss": 1.2512, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.934388414290991e-05, | |
| "loss": 1.3304, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9341765015620514e-05, | |
| "loss": 1.5858, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.933964251731281e-05, | |
| "loss": 1.3061, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9337516648280756e-05, | |
| "loss": 1.782, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.933538740881874e-05, | |
| "loss": 1.8033, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.933325479922163e-05, | |
| "loss": 1.8176, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9331118819784773e-05, | |
| "loss": 1.8735, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.932897947080398e-05, | |
| "loss": 1.59, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9326836752575514e-05, | |
| "loss": 2.2281, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.932469066539612e-05, | |
| "loss": 2.0652, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9322541209563e-05, | |
| "loss": 1.9974, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9320388385373825e-05, | |
| "loss": 1.5013, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9318232193126737e-05, | |
| "loss": 1.5165, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.931607263312032e-05, | |
| "loss": 1.4775, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.931390970565368e-05, | |
| "loss": 1.8116, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.931174341102634e-05, | |
| "loss": 1.731, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9309573749538295e-05, | |
| "loss": 1.5851, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9307400721490015e-05, | |
| "loss": 1.6169, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.930522432718245e-05, | |
| "loss": 1.6315, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.930304456691699e-05, | |
| "loss": 1.6771, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.930086144099551e-05, | |
| "loss": 1.083, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9298674949720335e-05, | |
| "loss": 1.9102, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9296485093394283e-05, | |
| "loss": 1.7891, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.92942918723206e-05, | |
| "loss": 1.2337, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.929209528680304e-05, | |
| "loss": 1.8288, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.928989533714579e-05, | |
| "loss": 1.8799, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.92876920236535e-05, | |
| "loss": 2.0931, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9285485346631334e-05, | |
| "loss": 1.0637, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9283275306384856e-05, | |
| "loss": 2.1745, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.928106190322015e-05, | |
| "loss": 2.3641, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.927884513744373e-05, | |
| "loss": 1.5699, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.92766250093626e-05, | |
| "loss": 0.9195, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.927440151928421e-05, | |
| "loss": 1.6488, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.927217466751648e-05, | |
| "loss": 1.3872, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9269944454367813e-05, | |
| "loss": 2.0154, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.926771088014707e-05, | |
| "loss": 1.8308, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.926547394516354e-05, | |
| "loss": 1.6353, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.926323364972705e-05, | |
| "loss": 1.57, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.926098999414782e-05, | |
| "loss": 1.9442, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.925874297873659e-05, | |
| "loss": 1.8304, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9256492603804526e-05, | |
| "loss": 1.399, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.925423886966328e-05, | |
| "loss": 1.5874, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.925198177662497e-05, | |
| "loss": 1.8739, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.924972132500217e-05, | |
| "loss": 1.6697, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.924745751510792e-05, | |
| "loss": 1.3031, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9245190347255746e-05, | |
| "loss": 1.5218, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.924291982175959e-05, | |
| "loss": 2.1374, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.924064593893392e-05, | |
| "loss": 1.1152, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.923836869909362e-05, | |
| "loss": 1.8031, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9236088102554075e-05, | |
| "loss": 2.0702, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.92338041496311e-05, | |
| "loss": 1.6122, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9231516840641e-05, | |
| "loss": 1.8458, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9229226175900544e-05, | |
| "loss": 1.5888, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.922693215572695e-05, | |
| "loss": 1.5632, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.922463478043792e-05, | |
| "loss": 1.7268, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.922233405035159e-05, | |
| "loss": 1.6798, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.922002996578661e-05, | |
| "loss": 1.2235, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.921772252706205e-05, | |
| "loss": 1.5721, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9215411734497464e-05, | |
| "loss": 1.8032, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.921309758841285e-05, | |
| "loss": 1.4632, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9210780089128715e-05, | |
| "loss": 1.6855, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.920845923696599e-05, | |
| "loss": 1.6799, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.920613503224608e-05, | |
| "loss": 2.0956, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.920380747529085e-05, | |
| "loss": 1.4309, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9201476566422656e-05, | |
| "loss": 1.1498, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.919914230596429e-05, | |
| "loss": 1.988, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.9196804694239006e-05, | |
| "loss": 1.8407, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.919446373157055e-05, | |
| "loss": 1.7266, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.91921194182831e-05, | |
| "loss": 1.7592, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.918977175470133e-05, | |
| "loss": 1.1552, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.918742074115035e-05, | |
| "loss": 1.5499, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9185066377955735e-05, | |
| "loss": 1.2127, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.918270866544356e-05, | |
| "loss": 1.4216, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.918034760394031e-05, | |
| "loss": 1.2214, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.917798319377297e-05, | |
| "loss": 1.1203, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.917561543526899e-05, | |
| "loss": 1.6326, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.917324432875627e-05, | |
| "loss": 1.8997, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.917086987456317e-05, | |
| "loss": 2.0038, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.916849207301852e-05, | |
| "loss": 1.6171, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9166110924451634e-05, | |
| "loss": 1.6013, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.916372642919225e-05, | |
| "loss": 2.0288, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.916133858757059e-05, | |
| "loss": 1.8568, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.915894739991735e-05, | |
| "loss": 2.1949, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.915655286656368e-05, | |
| "loss": 1.4406, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.915415498784118e-05, | |
| "loss": 1.338, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.915175376408193e-05, | |
| "loss": 1.5971, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.914934919561848e-05, | |
| "loss": 1.8418, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.914694128278381e-05, | |
| "loss": 1.2456, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.914453002591139e-05, | |
| "loss": 1.3663, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9142115425335154e-05, | |
| "loss": 1.226, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.91396974813895e-05, | |
| "loss": 1.6677, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.913727619440926e-05, | |
| "loss": 2.4861, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.913485156472978e-05, | |
| "loss": 1.4411, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9132423592686816e-05, | |
| "loss": 1.5114, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.912999227861662e-05, | |
| "loss": 1.9969, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.912755762285589e-05, | |
| "loss": 1.7201, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.912511962574181e-05, | |
| "loss": 1.9378, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.912267828761199e-05, | |
| "loss": 1.5512, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9120233608804546e-05, | |
| "loss": 1.6215, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9117785589658014e-05, | |
| "loss": 1.6503, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.911533423051143e-05, | |
| "loss": 1.5132, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9112879531704256e-05, | |
| "loss": 2.3205, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.911042149357646e-05, | |
| "loss": 1.644, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.910796011646843e-05, | |
| "loss": 1.7747, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9105495400721034e-05, | |
| "loss": 1.9071, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.910302734667562e-05, | |
| "loss": 1.2772, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.910055595467396e-05, | |
| "loss": 1.2807, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9098081225058326e-05, | |
| "loss": 1.6285, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9095603158171436e-05, | |
| "loss": 1.7838, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9093121754356454e-05, | |
| "loss": 1.7611, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.909063701395704e-05, | |
| "loss": 1.8441, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.908814893731728e-05, | |
| "loss": 1.4508, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.908565752478176e-05, | |
| "loss": 1.8418, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.90831627766955e-05, | |
| "loss": 1.1092, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.908066469340399e-05, | |
| "loss": 2.0333, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.907816327525318e-05, | |
| "loss": 1.6242, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9075658522589486e-05, | |
| "loss": 1.5041, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9073150435759786e-05, | |
| "loss": 1.5954, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.907063901511141e-05, | |
| "loss": 1.4664, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.906812426099217e-05, | |
| "loss": 1.4326, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.90656061737503e-05, | |
| "loss": 1.6302, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9063084753734556e-05, | |
| "loss": 2.325, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.906056000129411e-05, | |
| "loss": 1.2344, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.90580319167786e-05, | |
| "loss": 1.5865, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.905550050053813e-05, | |
| "loss": 1.4518, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.905296575292329e-05, | |
| "loss": 1.8988, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9050427674285085e-05, | |
| "loss": 1.625, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9047886264975016e-05, | |
| "loss": 1.6413, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9045341525345035e-05, | |
| "loss": 1.9323, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.904279345574756e-05, | |
| "loss": 1.6475, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9040242056535465e-05, | |
| "loss": 1.6182, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.903768732806208e-05, | |
| "loss": 1.8794, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9035129270681196e-05, | |
| "loss": 2.3609, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.903256788474708e-05, | |
| "loss": 1.6079, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9030003170614456e-05, | |
| "loss": 1.6279, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9027435128638494e-05, | |
| "loss": 1.4993, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.902486375917483e-05, | |
| "loss": 1.8698, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.902228906257958e-05, | |
| "loss": 1.6502, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.90197110392093e-05, | |
| "loss": 1.2136, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9017129689421e-05, | |
| "loss": 1.8118, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.901454501357219e-05, | |
| "loss": 1.9347, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.9011957012020793e-05, | |
| "loss": 1.9713, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.900936568512522e-05, | |
| "loss": 2.036, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.900677103324433e-05, | |
| "loss": 1.5848, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.900417305673746e-05, | |
| "loss": 1.1603, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.900157175596438e-05, | |
| "loss": 0.9986, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.8998967131285356e-05, | |
| "loss": 1.431, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.899635918306108e-05, | |
| "loss": 1.6065, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.899374791165273e-05, | |
| "loss": 1.7656, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.899113331742192e-05, | |
| "loss": 1.4621, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.898851540073075e-05, | |
| "loss": 2.0439, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.898589416194176e-05, | |
| "loss": 1.8089, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.898326960141796e-05, | |
| "loss": 1.5008, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.898064171952281e-05, | |
| "loss": 1.4781, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.897801051662025e-05, | |
| "loss": 1.5729, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8975375993074665e-05, | |
| "loss": 2.0593, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.89727381492509e-05, | |
| "loss": 1.5069, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.897009698551426e-05, | |
| "loss": 1.4593, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.896745250223052e-05, | |
| "loss": 1.535, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.896480469976589e-05, | |
| "loss": 1.4929, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8962153578487055e-05, | |
| "loss": 1.2304, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.895949913876119e-05, | |
| "loss": 1.9493, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.895684138095588e-05, | |
| "loss": 1.4758, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.895418030543919e-05, | |
| "loss": 1.8714, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8951515912579645e-05, | |
| "loss": 1.3302, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.894884820274623e-05, | |
| "loss": 2.0195, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8946177176308396e-05, | |
| "loss": 1.5886, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8943502833636026e-05, | |
| "loss": 1.7283, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8940825175099504e-05, | |
| "loss": 1.3368, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8938144201069635e-05, | |
| "loss": 1.2978, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8935459911917704e-05, | |
| "loss": 1.4301, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.893277230801546e-05, | |
| "loss": 1.726, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.893008138973508e-05, | |
| "loss": 1.7124, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.892738715744924e-05, | |
| "loss": 1.1582, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.892468961153105e-05, | |
| "loss": 1.6847, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.892198875235409e-05, | |
| "loss": 1.9052, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.891928458029238e-05, | |
| "loss": 1.7829, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8916577095720424e-05, | |
| "loss": 1.5794, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.891386629901317e-05, | |
| "loss": 2.0447, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.891115219054603e-05, | |
| "loss": 2.0359, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.890843477069487e-05, | |
| "loss": 2.1139, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.890571403983603e-05, | |
| "loss": 1.3969, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.890298999834627e-05, | |
| "loss": 1.7877, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8900262646602854e-05, | |
| "loss": 1.5858, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.889753198498348e-05, | |
| "loss": 1.6977, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8894798013866315e-05, | |
| "loss": 1.1872, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8892060733629963e-05, | |
| "loss": 2.2305, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.888932014465352e-05, | |
| "loss": 1.778, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.888657624731651e-05, | |
| "loss": 1.5463, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.888382904199893e-05, | |
| "loss": 1.4502, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8881078529081236e-05, | |
| "loss": 1.4547, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.887832470894433e-05, | |
| "loss": 1.4357, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.887556758196959e-05, | |
| "loss": 1.8349, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8872807148538845e-05, | |
| "loss": 1.543, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.887004340903436e-05, | |
| "loss": 2.1176, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8867276363838896e-05, | |
| "loss": 1.6262, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.886450601333564e-05, | |
| "loss": 1.5648, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8861732357908264e-05, | |
| "loss": 1.3948, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.885895539794087e-05, | |
| "loss": 1.3463, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.885617513381803e-05, | |
| "loss": 1.8717, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.885339156592479e-05, | |
| "loss": 1.6983, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.885060469464662e-05, | |
| "loss": 1.734, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8847814520369475e-05, | |
| "loss": 1.7681, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.884502104347976e-05, | |
| "loss": 1.2692, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.884222426436433e-05, | |
| "loss": 1.4418, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.883942418341051e-05, | |
| "loss": 1.1686, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.883662080100606e-05, | |
| "loss": 1.7017, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.883381411753922e-05, | |
| "loss": 1.4877, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.883100413339869e-05, | |
| "loss": 1.5458, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8828190848973595e-05, | |
| "loss": 1.5484, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8825374264653565e-05, | |
| "loss": 1.834, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.882255438082863e-05, | |
| "loss": 2.2466, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8819731197889334e-05, | |
| "loss": 1.459, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.881690471622663e-05, | |
| "loss": 1.9088, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.881407493623197e-05, | |
| "loss": 1.6538, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.881124185829723e-05, | |
| "loss": 2.1155, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.880840548281475e-05, | |
| "loss": 2.1894, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8805565810177334e-05, | |
| "loss": 2.033, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8802722840778246e-05, | |
| "loss": 1.9662, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.87998765750112e-05, | |
| "loss": 1.6538, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.879702701327035e-05, | |
| "loss": 1.259, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.879417415595034e-05, | |
| "loss": 1.6262, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.879131800344626e-05, | |
| "loss": 1.6802, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8788458556153635e-05, | |
| "loss": 1.336, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.878559581446846e-05, | |
| "loss": 1.8981, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8782729778787214e-05, | |
| "loss": 1.6329, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.877986044950677e-05, | |
| "loss": 1.1941, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.877698782702451e-05, | |
| "loss": 1.3871, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8774111911738256e-05, | |
| "loss": 1.829, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8771232704046283e-05, | |
| "loss": 1.4151, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8768350204347324e-05, | |
| "loss": 1.7855, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.876546441304056e-05, | |
| "loss": 1.3968, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.876257533052565e-05, | |
| "loss": 1.8227, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8759682957202685e-05, | |
| "loss": 1.4529, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.875678729347223e-05, | |
| "loss": 1.1851, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8753888339735274e-05, | |
| "loss": 1.2907, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8750986096393315e-05, | |
| "loss": 1.7967, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.874808056384825e-05, | |
| "loss": 1.4848, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8745171742502484e-05, | |
| "loss": 1.6839, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.874225963275882e-05, | |
| "loss": 1.8314, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.873934423502058e-05, | |
| "loss": 2.0282, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8736425549691475e-05, | |
| "loss": 1.341, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.873350357717573e-05, | |
| "loss": 1.4107, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8730578317878e-05, | |
| "loss": 1.4731, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.872764977220338e-05, | |
| "loss": 1.9332, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.872471794055744e-05, | |
| "loss": 1.8369, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.872178282334621e-05, | |
| "loss": 1.3453, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.871884442097616e-05, | |
| "loss": 1.7878, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.871590273385421e-05, | |
| "loss": 1.9997, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.871295776238777e-05, | |
| "loss": 1.9064, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8710009506984656e-05, | |
| "loss": 1.8554, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8707057968053175e-05, | |
| "loss": 2.1312, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8704103146002075e-05, | |
| "loss": 1.972, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.870114504124057e-05, | |
| "loss": 2.2831, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.86981836541783e-05, | |
| "loss": 1.4848, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.869521898522539e-05, | |
| "loss": 1.9652, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.86922510347924e-05, | |
| "loss": 1.4183, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.868927980329037e-05, | |
| "loss": 2.0386, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.868630529113075e-05, | |
| "loss": 1.7273, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8683327498725494e-05, | |
| "loss": 1.3664, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.868034642648699e-05, | |
| "loss": 1.2684, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.867736207482806e-05, | |
| "loss": 1.5481, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8674374444162005e-05, | |
| "loss": 1.2832, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.867138353490258e-05, | |
| "loss": 2.0168, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8668389347463973e-05, | |
| "loss": 1.2822, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8665391882260856e-05, | |
| "loss": 1.6619, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.866239113970833e-05, | |
| "loss": 2.1701, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8659387120221956e-05, | |
| "loss": 2.0322, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.865637982421776e-05, | |
| "loss": 1.8849, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8653369252112214e-05, | |
| "loss": 1.4536, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8650355404322234e-05, | |
| "loss": 1.791, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.86473382812652e-05, | |
| "loss": 1.9112, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8644317883358956e-05, | |
| "loss": 1.5631, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8641294211021774e-05, | |
| "loss": 1.4402, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.86382672646724e-05, | |
| "loss": 1.6691, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.863523704473002e-05, | |
| "loss": 1.4176, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.863220355161429e-05, | |
| "loss": 1.6321, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.862916678574531e-05, | |
| "loss": 1.6517, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.862612674754362e-05, | |
| "loss": 1.6625, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.862308343743024e-05, | |
| "loss": 1.2245, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8620036855826624e-05, | |
| "loss": 1.1567, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.861698700315468e-05, | |
| "loss": 1.4538, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.861393387983677e-05, | |
| "loss": 1.5213, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8610877486295726e-05, | |
| "loss": 1.8746, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.86078178229548e-05, | |
| "loss": 1.8911, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.860475489023773e-05, | |
| "loss": 1.3414, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8601688688568695e-05, | |
| "loss": 1.3054, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8598619218372315e-05, | |
| "loss": 1.9671, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8595546480073675e-05, | |
| "loss": 1.9155, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8592470474098304e-05, | |
| "loss": 1.7364, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.858939120087219e-05, | |
| "loss": 1.8287, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.858630866082179e-05, | |
| "loss": 1.2718, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8583222854373964e-05, | |
| "loss": 1.9186, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8580133781956086e-05, | |
| "loss": 1.441, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.857704144399594e-05, | |
| "loss": 1.5738, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.857394584092177e-05, | |
| "loss": 1.4612, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8570846973162284e-05, | |
| "loss": 1.5269, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.856774484114663e-05, | |
| "loss": 1.5464, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.856463944530442e-05, | |
| "loss": 1.8616, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.85615307860657e-05, | |
| "loss": 1.652, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8558418863860986e-05, | |
| "loss": 1.8906, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.855530367912124e-05, | |
| "loss": 1.7692, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.855218523227787e-05, | |
| "loss": 1.5755, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.854906352376275e-05, | |
| "loss": 1.8406, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8545938554008184e-05, | |
| "loss": 1.4997, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.854281032344695e-05, | |
| "loss": 1.7373, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.853967883251226e-05, | |
| "loss": 1.8977, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8536544081637787e-05, | |
| "loss": 1.7549, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8533406071257644e-05, | |
| "loss": 1.2084, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.853026480180643e-05, | |
| "loss": 1.6226, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8527120273719144e-05, | |
| "loss": 1.7877, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.852397248743128e-05, | |
| "loss": 1.3214, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.852082144337875e-05, | |
| "loss": 1.4208, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8517667141997946e-05, | |
| "loss": 1.0795, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8514509583725684e-05, | |
| "loss": 1.7681, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.851134876899927e-05, | |
| "loss": 1.5075, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.85081846982564e-05, | |
| "loss": 1.4082, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.850501737193529e-05, | |
| "loss": 1.6105, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.850184679047455e-05, | |
| "loss": 1.1266, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8498672954313275e-05, | |
| "loss": 1.4528, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.849549586389101e-05, | |
| "loss": 1.7253, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.849231551964771e-05, | |
| "loss": 1.5444, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8489131922023845e-05, | |
| "loss": 2.2302, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.848594507146028e-05, | |
| "loss": 1.4719, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8482754968398366e-05, | |
| "loss": 1.6143, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8479561613279876e-05, | |
| "loss": 2.1498, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.847636500654707e-05, | |
| "loss": 1.9538, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.847316514864262e-05, | |
| "loss": 2.097, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.846996204000967e-05, | |
| "loss": 1.7786, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.846675568109179e-05, | |
| "loss": 1.7149, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8463546072333056e-05, | |
| "loss": 1.6515, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.846033321417793e-05, | |
| "loss": 1.1366, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.845711710707136e-05, | |
| "loss": 1.5233, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.845389775145873e-05, | |
| "loss": 1.7639, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.845067514778589e-05, | |
| "loss": 1.4838, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.844744929649911e-05, | |
| "loss": 1.806, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8444220198045154e-05, | |
| "loss": 2.4917, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.844098785287119e-05, | |
| "loss": 1.6103, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.843775226142486e-05, | |
| "loss": 0.8902, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.843451342415426e-05, | |
| "loss": 1.7303, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.843127134150791e-05, | |
| "loss": 1.5742, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.842802601393482e-05, | |
| "loss": 2.2771, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8424777441884405e-05, | |
| "loss": 2.0366, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8421525625806555e-05, | |
| "loss": 1.2802, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8418270566151606e-05, | |
| "loss": 1.2683, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.841501226337035e-05, | |
| "loss": 1.4906, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.841175071791401e-05, | |
| "loss": 1.508, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.840848593023426e-05, | |
| "loss": 1.7541, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.840521790078325e-05, | |
| "loss": 1.6975, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.840194663001354e-05, | |
| "loss": 1.7784, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.839867211837817e-05, | |
| "loss": 1.4971, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.839539436633063e-05, | |
| "loss": 2.3331, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.839211337432482e-05, | |
| "loss": 2.0467, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.838882914281513e-05, | |
| "loss": 1.4363, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.838554167225637e-05, | |
| "loss": 1.3551, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.838225096310384e-05, | |
| "loss": 1.8533, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8378957015813225e-05, | |
| "loss": 1.4613, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.837565983084072e-05, | |
| "loss": 1.5865, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.837235940864293e-05, | |
| "loss": 1.4011, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8369055749676926e-05, | |
| "loss": 1.3844, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8365748854400224e-05, | |
| "loss": 1.8292, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8362438723270775e-05, | |
| "loss": 1.777, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8359125356747005e-05, | |
| "loss": 1.8838, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.835580875528776e-05, | |
| "loss": 1.2637, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8352488919352345e-05, | |
| "loss": 1.7713, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.834916584940052e-05, | |
| "loss": 1.477, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.834583954589249e-05, | |
| "loss": 2.2866, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8342510009288896e-05, | |
| "loss": 1.8303, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8339177240050835e-05, | |
| "loss": 1.6103, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.833584123863987e-05, | |
| "loss": 1.3783, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8332502005517976e-05, | |
| "loss": 1.7061, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.83291595411476e-05, | |
| "loss": 1.7425, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.832581384599163e-05, | |
| "loss": 1.4885, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.83224649205134e-05, | |
| "loss": 1.697, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.831911276517669e-05, | |
| "loss": 2.0936, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8315757380445734e-05, | |
| "loss": 1.7644, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8312398766785205e-05, | |
| "loss": 1.2963, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.830903692466024e-05, | |
| "loss": 1.9974, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.830567185453638e-05, | |
| "loss": 2.0313, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.830230355687968e-05, | |
| "loss": 1.603, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.829893203215659e-05, | |
| "loss": 1.5684, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.829555728083402e-05, | |
| "loss": 1.6818, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.829217930337933e-05, | |
| "loss": 1.8331, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.828879810026033e-05, | |
| "loss": 1.4052, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8285413671945266e-05, | |
| "loss": 2.1854, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.828202601890285e-05, | |
| "loss": 1.8066, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8278635141602216e-05, | |
| "loss": 1.9945, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8275241040512956e-05, | |
| "loss": 1.8455, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.827184371610511e-05, | |
| "loss": 1.7102, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.826844316884917e-05, | |
| "loss": 1.2846, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.826503939921606e-05, | |
| "loss": 0.9758, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8261632407677174e-05, | |
| "loss": 1.5587, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8258222194704304e-05, | |
| "loss": 1.1849, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8254808760769756e-05, | |
| "loss": 1.6938, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.825139210634623e-05, | |
| "loss": 1.2542, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.824797223190688e-05, | |
| "loss": 0.9688, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.824454913792532e-05, | |
| "loss": 1.7337, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.824112282487562e-05, | |
| "loss": 2.0502, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.823769329323226e-05, | |
| "loss": 1.8759, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.823426054347019e-05, | |
| "loss": 1.444, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.82308245760648e-05, | |
| "loss": 1.5999, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.822738539149194e-05, | |
| "loss": 1.3271, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.822394299022788e-05, | |
| "loss": 1.4029, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.822049737274934e-05, | |
| "loss": 1.7134, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.821704853953351e-05, | |
| "loss": 1.9922, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.821359649105801e-05, | |
| "loss": 1.5071, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8210141227800885e-05, | |
| "loss": 1.8292, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.820668275024066e-05, | |
| "loss": 1.3623, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8203221058856285e-05, | |
| "loss": 1.5124, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8199756154127164e-05, | |
| "loss": 1.8369, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.819628803653313e-05, | |
| "loss": 1.8629, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.819281670655448e-05, | |
| "loss": 1.5195, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8189342164671944e-05, | |
| "loss": 2.5293, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.818586441136671e-05, | |
| "loss": 1.8663, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.818238344712038e-05, | |
| "loss": 1.9068, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.817889927241506e-05, | |
| "loss": 2.135, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.817541188773322e-05, | |
| "loss": 1.6423, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.817192129355785e-05, | |
| "loss": 1.3553, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8168427490372335e-05, | |
| "loss": 1.2737, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.816493047866053e-05, | |
| "loss": 2.1974, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.816143025890672e-05, | |
| "loss": 1.5832, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8157926831595636e-05, | |
| "loss": 2.2052, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.815442019721247e-05, | |
| "loss": 1.6406, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8150910356242834e-05, | |
| "loss": 1.8293, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8147397309172803e-05, | |
| "loss": 1.3065, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8143881056488884e-05, | |
| "loss": 1.7817, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.814036159867803e-05, | |
| "loss": 1.3977, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8136838936227645e-05, | |
| "loss": 1.4343, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.813331306962558e-05, | |
| "loss": 1.4871, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.81297839993601e-05, | |
| "loss": 1.6122, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8126251725919954e-05, | |
| "loss": 1.2465, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.812271624979431e-05, | |
| "loss": 1.4962, | |
| "step": 1058 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 8452, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 529, | |
| "total_flos": 6.01330610601984e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |