{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a409dd5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From d:\\Anaconda\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
      "\n",
      "Menggunakan perangkat: cuda\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[I 2025-07-18 06:26:20,055] A new study created in memory with name: no-name-50af0249-7af4-476f-988c-7342adeab58c\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memulai hyperparameter tuning dengan Optuna...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "C:\\Users\\BUDI\\AppData\\Local\\Temp\\ipykernel_6152\\2584540621.py:147: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
      "  trainer = Trainer(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='836' max='836' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [836/836 03:00, Epoch 4/4]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Per Entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.124700</td>\n",
       "      <td>0.166868</td>\n",
       "      <td>0.748068</td>\n",
       "      <td>0.731118</td>\n",
       "      <td>0.739496</td>\n",
       "      <td>0.945582</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.103800</td>\n",
       "      <td>0.157893</td>\n",
       "      <td>0.750355</td>\n",
       "      <td>0.799094</td>\n",
       "      <td>0.773958</td>\n",
       "      <td>0.952456</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.096100</td>\n",
       "      <td>0.171932</td>\n",
       "      <td>0.800613</td>\n",
       "      <td>0.788520</td>\n",
       "      <td>0.794521</td>\n",
       "      <td>0.955606</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.032800</td>\n",
       "      <td>0.178615</td>\n",
       "      <td>0.750704</td>\n",
       "      <td>0.805136</td>\n",
       "      <td>0.776968</td>\n",
       "      <td>0.954031</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='27' max='27' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [27/27 00:01]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "[I 2025-07-18 06:29:29,091] Trial 0 finished with value: 0.7945205479452055 and parameters: {'learning_rate': 2.3555847899573657e-05, 'batch_size': 8, 'num_epochs': 4}. Best is trial 0 with value: 0.7945205479452055.\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "C:\\Users\\BUDI\\AppData\\Local\\Temp\\ipykernel_6152\\2584540621.py:147: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
      "  trainer = Trainer(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='1045' max='1045' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [1045/1045 04:05, Epoch 5/5]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Per Entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.123500</td>\n",
       "      <td>0.163488</td>\n",
       "      <td>0.728788</td>\n",
       "      <td>0.726586</td>\n",
       "      <td>0.727685</td>\n",
       "      <td>0.945009</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.108800</td>\n",
       "      <td>0.155614</td>\n",
       "      <td>0.737346</td>\n",
       "      <td>0.814199</td>\n",
       "      <td>0.773869</td>\n",
       "      <td>0.953745</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.110300</td>\n",
       "      <td>0.170470</td>\n",
       "      <td>0.763314</td>\n",
       "      <td>0.779456</td>\n",
       "      <td>0.771300</td>\n",
       "      <td>0.953172</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.045800</td>\n",
       "      <td>0.182373</td>\n",
       "      <td>0.765557</td>\n",
       "      <td>0.799094</td>\n",
       "      <td>0.781966</td>\n",
       "      <td>0.954031</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.022400</td>\n",
       "      <td>0.191159</td>\n",
       "      <td>0.758571</td>\n",
       "      <td>0.802115</td>\n",
       "      <td>0.779736</td>\n",
       "      <td>0.953315</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='27' max='27' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [27/27 00:01]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "[I 2025-07-18 06:33:40,086] Trial 1 finished with value: 0.7819660014781965 and parameters: {'learning_rate': 1.7904807706862636e-05, 'batch_size': 8, 'num_epochs': 5}. Best is trial 0 with value: 0.7945205479452055.\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "C:\\Users\\BUDI\\AppData\\Local\\Temp\\ipykernel_6152\\2584540621.py:147: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
      "  trainer = Trainer(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='420' max='420' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [420/420 05:47, Epoch 4/4]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Per Entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.138600</td>\n",
       "      <td>0.185550</td>\n",
       "      <td>0.738769</td>\n",
       "      <td>0.670695</td>\n",
       "      <td>0.703088</td>\n",
       "      <td>0.942432</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.109800</td>\n",
       "      <td>0.154619</td>\n",
       "      <td>0.781899</td>\n",
       "      <td>0.796073</td>\n",
       "      <td>0.788922</td>\n",
       "      <td>0.955463</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.069800</td>\n",
       "      <td>0.155078</td>\n",
       "      <td>0.807750</td>\n",
       "      <td>0.818731</td>\n",
       "      <td>0.813203</td>\n",
       "      <td>0.960332</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.027200</td>\n",
       "      <td>0.174292</td>\n",
       "      <td>0.765292</td>\n",
       "      <td>0.812689</td>\n",
       "      <td>0.788278</td>\n",
       "      <td>0.954747</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='14' max='14' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [14/14 00:00]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "[I 2025-07-18 06:39:32,835] Trial 2 finished with value: 0.8132033008252062 and parameters: {'learning_rate': 3.672145523121866e-05, 'batch_size': 16, 'num_epochs': 4}. Best is trial 2 with value: 0.8132033008252062.\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "C:\\Users\\BUDI\\AppData\\Local\\Temp\\ipykernel_6152\\2584540621.py:147: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
      "  trainer = Trainer(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='525' max='525' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [525/525 07:42, Epoch 5/5]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Per Entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.143200</td>\n",
       "      <td>0.170970</td>\n",
       "      <td>0.745514</td>\n",
       "      <td>0.690332</td>\n",
       "      <td>0.716863</td>\n",
       "      <td>0.945869</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.107300</td>\n",
       "      <td>0.154406</td>\n",
       "      <td>0.766141</td>\n",
       "      <td>0.806647</td>\n",
       "      <td>0.785872</td>\n",
       "      <td>0.953029</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.075100</td>\n",
       "      <td>0.158503</td>\n",
       "      <td>0.795420</td>\n",
       "      <td>0.787009</td>\n",
       "      <td>0.791192</td>\n",
       "      <td>0.956895</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.025800</td>\n",
       "      <td>0.179348</td>\n",
       "      <td>0.764791</td>\n",
       "      <td>0.800604</td>\n",
       "      <td>0.782288</td>\n",
       "      <td>0.954461</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.013400</td>\n",
       "      <td>0.185257</td>\n",
       "      <td>0.766049</td>\n",
       "      <td>0.811178</td>\n",
       "      <td>0.787968</td>\n",
       "      <td>0.953888</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='14' max='14' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [14/14 00:01]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "[I 2025-07-18 06:47:22,280] Trial 3 finished with value: 0.7911921032649962 and parameters: {'learning_rate': 3.713773945286763e-05, 'batch_size': 16, 'num_epochs': 5}. Best is trial 2 with value: 0.8132033008252062.\n",
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "C:\\Users\\BUDI\\AppData\\Local\\Temp\\ipykernel_6152\\2584540621.py:147: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
      "  trainer = Trainer(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='1045' max='1045' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [1045/1045 04:30, Epoch 5/5]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Per Entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.132700</td>\n",
       "      <td>0.169205</td>\n",
       "      <td>0.715361</td>\n",
       "      <td>0.717523</td>\n",
       "      <td>0.716440</td>\n",
       "      <td>0.944007</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.120000</td>\n",
       "      <td>0.155390</td>\n",
       "      <td>0.750700</td>\n",
       "      <td>0.809668</td>\n",
       "      <td>0.779070</td>\n",
       "      <td>0.953458</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.136600</td>\n",
       "      <td>0.163555</td>\n",
       "      <td>0.761974</td>\n",
       "      <td>0.793051</td>\n",
       "      <td>0.777202</td>\n",
       "      <td>0.954174</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.067900</td>\n",
       "      <td>0.172124</td>\n",
       "      <td>0.766476</td>\n",
       "      <td>0.808157</td>\n",
       "      <td>0.786765</td>\n",
       "      <td>0.953888</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.035200</td>\n",
       "      <td>0.180249</td>\n",
       "      <td>0.759943</td>\n",
       "      <td>0.808157</td>\n",
       "      <td>0.783309</td>\n",
       "      <td>0.953745</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='27' max='27' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [27/27 00:01]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "[I 2025-07-18 06:51:59,633] Trial 4 finished with value: 0.7867647058823529 and parameters: {'learning_rate': 1.1923156920458335e-05, 'batch_size': 8, 'num_epochs': 5}. Best is trial 2 with value: 0.8132033008252062.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Hyperparameter terbaik:\n",
      "{'learning_rate': 3.672145523121866e-05, 'batch_size': 16, 'num_epochs': 4}\n",
      "F1-Score terbaik: 0.8132\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of BertForTokenClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "C:\\Users\\BUDI\\AppData\\Local\\Temp\\ipykernel_6152\\2584540621.py:195: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
      "  trainer = Trainer(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Memulai pelatihan dengan hyperparameter terbaik...\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='420' max='420' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [420/420 07:01, Epoch 4/4]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Per Entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.138600</td>\n",
       "      <td>0.185550</td>\n",
       "      <td>0.738769</td>\n",
       "      <td>0.670695</td>\n",
       "      <td>0.703088</td>\n",
       "      <td>0.942432</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.109800</td>\n",
       "      <td>0.154619</td>\n",
       "      <td>0.781899</td>\n",
       "      <td>0.796073</td>\n",
       "      <td>0.788922</td>\n",
       "      <td>0.955463</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.069800</td>\n",
       "      <td>0.155078</td>\n",
       "      <td>0.807750</td>\n",
       "      <td>0.818731</td>\n",
       "      <td>0.813203</td>\n",
       "      <td>0.960332</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.027200</td>\n",
       "      <td>0.174292</td>\n",
       "      <td>0.765292</td>\n",
       "      <td>0.812689</td>\n",
       "      <td>0.788278</td>\n",
       "      <td>0.954747</td>\n",
       "      <td>{}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Mengevaluasi model pada data test...\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='14' max='14' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [14/14 00:05]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval_per_entity\" as a metric. MLflow's log_metric() only accepts float and int types so we dropped this attribute.\n",
      "Trainer is attempting to log a value of \"{}\" of type <class 'dict'> for key \"eval/per_entity\" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Hasil Evaluasi:\n",
      "Precision: 0.7528\n",
      "Recall: 0.7878\n",
      "F1-Score: 0.7699\n",
      "Accuracy: 0.9497\n",
      "\n",
      "Metrik per Entitas:\n",
      "\n",
      "Model dan tokenizer telah disimpan ke './ner_model'\n",
      "\n",
      "Contoh Prediksi pada Data Test (5 Sampel):\n",
      "\n",
      "Sampel 1:\n",
      "Tokens: [CLS] joe ##tat ##a hadi ##hard ##aja dan dihadiri oleh rektor undip prof . [SEP]\n",
      "True Labels: ['B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'O', 'O']\n",
      "Predicted Labels: ['B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'B-PLACE', 'O', 'O']\n",
      "\n",
      "Sampel 2:\n",
      "Tokens: [CLS] sejak masih duduk di bangku sekolah tk kevin sudah belajar alat musik piano secara formal dan ketika ia menginjak sekolah smp pemilik nama asli kevin april ##io sum ##aat ##maj ##a ini , mulai belajar menulis lagu sendiri . [SEP]\n",
      "True Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
      "Predicted Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
      "\n",
      "Sampel 3:\n",
      "Tokens: [CLS] pada tanggal 6 februari 1976 , wakil ketua lock ##he ##ed corporation memberitahu subk ##omi ##te senat as bahwa tana ##ka selaku pm telah dibayar ( dis ##ogo ##k ) sebagai ganjaran pembelian pesawat lock ##he ##ed l - 1011 . [SEP]\n",
      "True Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'I-ORGANISATION', 'O', 'O', 'O', 'B-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'O', 'O', 'O', 'O']\n",
      "Predicted Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'I-ORGANISATION', 'O', 'O', 'O', 'B-PLACE', 'O', 'B-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'O', 'O', 'O', 'O']\n",
      "\n",
      "Sampel 4:\n",
      "Tokens: [CLS] dengan kondisi alam yang sejuk dan curah hujan yang tinggi maka didaerah tersebut banyak didapati bermacam jenis flora dan fauna seperti : gajah yang di kenal dengan legenda poc ##ut me ##urah ##nya , rusa , harimau , beruang , kancil , babi hutan , tengg ##iling , landak dan ular , juga terdapat berbagai macam jenis burung yang selalu menghiasi kawasan ini . [SEP]\n",
      "True Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
      "Predicted Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
      "\n",
      "Sampel 5:\n",
      "Tokens: [CLS] awak pesawat yang terdiri atas pilot ard ##y ted ##jo , kopi ##lot h ribuan dan dua awak lainnya perry reh ##ata dan mei ##nas ##ta segera membuka pintu pesawat dan menurunkan penumpang dengan selamat . tanggal 14 juni 2009 , hari minggu , pukul 09 . 20 , pesawat terbang express air jenis dor ##nie ##r d ##32 ##8 - 100 bernomor badan pk - tx ##n , mengalami kecelakaan saat mendarat . [SEP]\n",
      "True Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'B-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORGANISATION', 'I-ORGANISATION', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
      "Predicted Labels: ['O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'B-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
      "\n",
      "Analisis Pola Error (Tanggal diprediksi sebagai Lokasi):\n",
      "Tidak ditemukan contoh tanggal yang diprediksi sebagai lokasi dalam 100 sampel.\n",
      "\n",
      "Pertimbangan Keamanan Data, Privasi, dan Etika:\n",
      "- Dataset bersumber dari berita publik, tidak mengandung informasi sensitif seperti alamat atau nomor identitas.\n",
      "- Nama orang dalam dataset berasal dari media publik, aman untuk digunakan.\n",
      "- Dataset mencakup berbagai topik berita, mengurangi risiko bias terhadap entitas tertentu.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import subprocess\n",
    "import numpy as np\n",
    "from datasets import load_dataset\n",
    "from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, Trainer, TrainingArguments\n",
    "import evaluate\n",
    "import torch\n",
    "import optuna\n",
    "\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "print(f\"Menggunakan perangkat: {device}\")\n",
    "\n",
    "# Load dataset\n",
    "try:\n",
    "    dataset = load_dataset(\"indonlp/indonlu\", \"nergrit\", trust_remote_code=True)\n",
    "except Exception as e:\n",
    "    print(f\"Gagal memuat dataset: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Verify dataset structure\n",
    "if \"train\" not in dataset or \"validation\" not in dataset or \"test\" not in dataset:\n",
    "    print(\"Dataset tidak memiliki split train/validation/test yang diharapkan.\")\n",
    "    sys.exit(1)\n",
    "if \"tokens\" not in dataset[\"train\"].column_names or \"ner_tags\" not in dataset[\"train\"].column_names:\n",
    "    print(\"Dataset tidak memiliki kolom 'tokens' atau 'ner_tags'.\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Define label list\n",
    "try:\n",
    "    label_list = dataset[\"train\"].features[\"ner_tags\"].feature.names\n",
    "    label2id = {label: i for i, label in enumerate(label_list)}\n",
    "    id2label = {i: label for i, label in enumerate(label_list)}\n",
    "except Exception as e:\n",
    "    print(f\"Gagal mendapatkan label: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Load tokenizer\n",
    "try:\n",
    "    tokenizer = AutoTokenizer.from_pretrained(\"indobenchmark/indobert-base-p1\")\n",
    "except Exception as e:\n",
    "    print(f\"Gagal memuat tokenizer: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Tokenize and align labels\n",
    "def tokenize_and_align_labels(examples):\n",
    "    tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n",
    "    labels = []\n",
    "    for i, label in enumerate(examples[\"ner_tags\"]):\n",
    "        word_ids = tokenized_inputs.word_ids(batch_index=i)\n",
    "        previous_word_idx = None\n",
    "        label_ids = []\n",
    "        for word_idx in word_ids:\n",
    "            if word_idx is None:\n",
    "                label_ids.append(-100)\n",
    "            elif word_idx != previous_word_idx:\n",
    "                label_ids.append(label[word_idx])\n",
    "            else:\n",
    "                label_ids.append(-100)\n",
    "            previous_word_idx = word_idx\n",
    "        labels.append(label_ids)\n",
    "    tokenized_inputs[\"labels\"] = labels\n",
    "    return tokenized_inputs\n",
    "\n",
    "# Tokenize dataset\n",
    "try:\n",
    "    tokenized_dataset = dataset.map(tokenize_and_align_labels, batched=True)\n",
    "except Exception as e:\n",
    "    print(f\"Gagal menokenisasi dataset: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Data collator\n",
    "data_collator = DataCollatorForTokenClassification(tokenizer)\n",
    "\n",
    "# Load evaluation metric\n",
    "metric = evaluate.load(\"seqeval\")\n",
    "\n",
    "# Compute metrics\n",
    "def compute_metrics(p):\n",
    "    predictions, labels = p\n",
    "    predictions = np.argmax(predictions, axis=2)\n",
    "    true_labels = [[id2label[l] for l in label if l != -100] for label in labels]\n",
    "    pred_labels = [[id2label[p] for p, l in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]\n",
    "    results = metric.compute(predictions=pred_labels, references=true_labels)\n",
    "    per_entity = {}\n",
    "    for entity in [\"PERSON\", \"ORGANISATION\", \"PLACE\", \"DATE\"]:\n",
    "        if entity.lower() in results:\n",
    "            per_entity[entity] = {\n",
    "                \"precision\": results[entity.lower()][\"precision\"],\n",
    "                \"recall\": results[entity.lower()][\"recall\"],\n",
    "                \"f1\": results[entity.lower()][\"f1\"],\n",
    "            }\n",
    "    return {\n",
    "        \"precision\": results[\"overall_precision\"],\n",
    "        \"recall\": results[\"overall_recall\"],\n",
    "        \"f1\": results[\"overall_f1\"],\n",
    "        \"accuracy\": results[\"overall_accuracy\"],\n",
    "        \"per_entity\": per_entity,\n",
    "    }\n",
    "\n",
    "# Define objective function for Optuna\n",
    "def objective(trial):\n",
    "    # Define hyperparameter search space\n",
    "    learning_rate = trial.suggest_float(\"learning_rate\", 1e-5, 5e-5, log=True)\n",
    "    batch_size = trial.suggest_categorical(\"batch_size\", [8, 16, 32])\n",
    "    num_epochs = trial.suggest_int(\"num_epochs\", 3, 5)\n",
    "\n",
    "    # Load model for each trial\n",
    "    model = AutoModelForTokenClassification.from_pretrained(\n",
    "        \"indobenchmark/indobert-base-p1\",\n",
    "        num_labels=len(label_list),\n",
    "        id2label=id2label,\n",
    "        label2id=label2id\n",
    "    )\n",
    "    model.to(device)\n",
    "\n",
    "    # Set training arguments\n",
    "    training_args = TrainingArguments(\n",
    "        output_dir=f\"./results_trial_{trial.number}\",\n",
    "        eval_strategy=\"epoch\",\n",
    "        learning_rate=learning_rate,\n",
    "        per_device_train_batch_size=batch_size,\n",
    "        per_device_eval_batch_size=batch_size,\n",
    "        num_train_epochs=num_epochs,\n",
    "        weight_decay=0.01,\n",
    "        logging_dir=f\"./logs_trial_{trial.number}\",\n",
    "        logging_steps=10,\n",
    "        save_strategy=\"epoch\",\n",
    "        load_best_model_at_end=True,\n",
    "        metric_for_best_model=\"f1\",\n",
    "    )\n",
    "\n",
    "    # Initialize Trainer\n",
    "    trainer = Trainer(\n",
    "        model=model,\n",
    "        args=training_args,\n",
    "        train_dataset=tokenized_dataset[\"train\"],\n",
    "        eval_dataset=tokenized_dataset[\"validation\"],\n",
    "        tokenizer=tokenizer,\n",
    "        data_collator=data_collator,\n",
    "        compute_metrics=compute_metrics,\n",
    "    )\n",
    "\n",
    "    # Train and evaluate\n",
    "    trainer.train()\n",
    "    eval_results = trainer.evaluate()\n",
    "    return eval_results[\"eval_f1\"]\n",
    "\n",
    "# Run Optuna optimization\n",
    "print(\"Memulai hyperparameter tuning dengan Optuna...\")\n",
    "study = optuna.create_study(direction=\"maximize\")\n",
    "study.optimize(objective, n_trials=5)  # Adjust n_trials as needed\n",
    "print(\"\\nHyperparameter terbaik:\")\n",
    "print(study.best_params)\n",
    "print(f\"F1-Score terbaik: {study.best_value:.4f}\")\n",
    "\n",
    "# Train final model with best hyperparameters\n",
    "best_params = study.best_params\n",
    "model = AutoModelForTokenClassification.from_pretrained(\n",
    "    \"indobenchmark/indobert-base-p1\",\n",
    "    num_labels=len(label_list),\n",
    "    id2label=id2label,\n",
    "    label2id=label2id\n",
    ")\n",
    "model.to(device)\n",
    "\n",
    "training_args = TrainingArguments(\n",
    "    output_dir=\"./results\",\n",
    "    eval_strategy=\"epoch\",\n",
    "    learning_rate=best_params[\"learning_rate\"],\n",
    "    per_device_train_batch_size=best_params[\"batch_size\"],\n",
    "    per_device_eval_batch_size=best_params[\"batch_size\"],\n",
    "    num_train_epochs=best_params[\"num_epochs\"],\n",
    "    weight_decay=0.01,\n",
    "    logging_dir=\"./logs\",\n",
    "    logging_steps=10,\n",
    "    save_strategy=\"epoch\",\n",
    "    load_best_model_at_end=True,\n",
    "    metric_for_best_model=\"f1\",\n",
    ")\n",
    "\n",
    "trainer = Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=tokenized_dataset[\"train\"],\n",
    "    eval_dataset=tokenized_dataset[\"validation\"],\n",
    "    tokenizer=tokenizer,\n",
    "    data_collator=data_collator,\n",
    "    compute_metrics=compute_metrics,\n",
    ")\n",
    "\n",
    "# Train the model\n",
    "print(\"\\nMemulai pelatihan dengan hyperparameter terbaik...\")\n",
    "try:\n",
    "    trainer.train()\n",
    "except Exception as e:\n",
    "    print(f\"Gagal melatih model: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Evaluate on test set\n",
    "print(\"\\nMengevaluasi model pada data test...\")\n",
    "try:\n",
    "    results = trainer.evaluate(tokenized_dataset[\"test\"])\n",
    "except Exception as e:\n",
    "    print(f\"Gagal mengevaluasi model: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Print evaluation results\n",
    "print(\"\\nHasil Evaluasi:\")\n",
    "print(f\"Precision: {results['eval_precision']:.4f}\")\n",
    "print(f\"Recall: {results['eval_recall']:.4f}\")\n",
    "print(f\"F1-Score: {results['eval_f1']:.4f}\")\n",
    "print(f\"Accuracy: {results['eval_accuracy']:.4f}\")\n",
    "print(\"\\nMetrik per Entitas:\")\n",
    "for entity, metrics in results.get(\"eval_per_entity\", {}).items():\n",
    "    print(f\"{entity}:\")\n",
    "    print(f\"  Precision: {metrics['precision']:.4f}\")\n",
    "    print(f\"  Recall: {metrics['recall']:.4f}\")\n",
    "    print(f\"  F1-Score: {metrics['f1']:.4f}\")\n",
    "\n",
    "# Save the model\n",
    "try:\n",
    "    model.save_pretrained(\"./ner_model\")\n",
    "    tokenizer.save_pretrained(\"./ner_model\")\n",
    "    print(\"\\nModel dan tokenizer telah disimpan ke './ner_model'\")\n",
    "except Exception as e:\n",
    "    print(f\"Gagal menyimpan model: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Example inference on test samples\n",
    "print(\"\\nContoh Prediksi pada Data Test (5 Sampel):\")\n",
    "try:\n",
    "    for i in range(min(5, len(tokenized_dataset[\"test\"]))):\n",
    "        sample = tokenized_dataset[\"test\"][i]\n",
    "        input_ids = torch.tensor([sample[\"input_ids\"]], device=device)\n",
    "        attention_mask = torch.tensor([sample[\"attention_mask\"]], device=device)\n",
    "        model.eval()\n",
    "        with torch.no_grad():\n",
    "            outputs = model(input_ids, attention_mask=attention_mask)\n",
    "        predictions = outputs.logits.argmax(dim=2)[0].cpu().numpy()\n",
    "        tokens = tokenizer.convert_ids_to_tokens(sample[\"input_ids\"])\n",
    "        labels = [id2label[pred] for pred, label in zip(predictions, sample[\"labels\"]) if label != -100]\n",
    "        true_labels = [id2label[label] for label in sample[\"labels\"] if label != -100]\n",
    "        print(f\"\\nSampel {i+1}:\")\n",
    "        print(f\"Tokens: {' '.join(tokens)}\")\n",
    "        print(f\"True Labels: {true_labels}\")\n",
    "        print(f\"Predicted Labels: {labels}\")\n",
    "except Exception as e:\n",
    "    print(f\"Gagal melakukan inferensi: {e}\")\n",
    "    sys.exit(1)\n",
    "\n",
    "# Analyze error patterns (DATE predicted as LOC)\n",
    "print(\"\\nAnalisis Pola Error (Tanggal diprediksi sebagai Lokasi):\")\n",
    "found_error = False\n",
    "for i in range(min(100, len(tokenized_dataset[\"test\"]))):\n",
    "    sample = tokenized_dataset[\"test\"][i]\n",
    "    input_ids = torch.tensor([sample[\"input_ids\"]], device=device)\n",
    "    attention_mask = torch.tensor([sample[\"attention_mask\"]], device=device)\n",
    "    with torch.no_grad():\n",
    "        outputs = model(input_ids, attention_mask=attention_mask)\n",
    "    predictions = outputs.logits.argmax(dim=2)[0].cpu().numpy()\n",
    "    true_labels = [id2label[label] for label in sample[\"labels\"] if label != -100]\n",
    "    pred_labels = [id2label[pred] for pred, label in zip(predictions, sample[\"labels\"]) if label != -100]\n",
    "    for j, (true, pred) in enumerate(zip(true_labels, pred_labels)):\n",
    "        if true.startswith(\"B-DATE\") and pred.startswith(\"B-LOC\"):\n",
    "            tokens = tokenizer.convert_ids_to_tokens(sample[\"input_ids\"])\n",
    "            print(f\"\\nSampel dengan Error (DATE diprediksi sebagai LOC):\")\n",
    "            print(f\"Tokens: {' '.join(tokens)}\")\n",
    "            print(f\"True Labels: {true_labels}\")\n",
    "            print(f\"Predicted Labels: {pred_labels}\")\n",
    "            found_error = True\n",
    "            break\n",
    "    if found_error:\n",
    "        break\n",
    "if not found_error:\n",
    "    print(\"Tidak ditemukan contoh tanggal yang diprediksi sebagai lokasi dalam 100 sampel.\")\n",
    "\n",
    "# Data Security, Privacy, and Ethics\n",
    "print(\"\\nPertimbangan Keamanan Data, Privasi, dan Etika:\")\n",
    "print(\"- Dataset bersumber dari berita publik, tidak mengandung informasi sensitif seperti alamat atau nomor identitas.\")\n",
    "print(\"- Nama orang dalam dataset berasal dari media publik, aman untuk digunakan.\")\n",
    "print(\"- Dataset mencakup berbagai topik berita, mengurangi risiko bias terhadap entitas tertentu.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "714cfb72",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93508875",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}