Sound / run_eval_flex.sh
Wendy-Fly's picture
Upload run_eval_flex.sh with huggingface_hub
a3bcb12 verified
#!/bin/bash
#
# AIPF 灵活评估脚本 —— 支持选择比较轮次 + 是否做 embedding warm-start
# 详细说明见 README_eval_flex.sh
#
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# ========== 解析参数 ==========
ROUNDS=8
WARMSTART="none"
SCENARIO="yss"
LIMIT=""
EMB_BATCH_SIZE=4
while [[ $# -gt 0 ]]; do
case "$1" in
--rounds) ROUNDS="$2"; shift 2 ;;
--warmstart) WARMSTART="$2"; shift 2 ;;
--scenario) SCENARIO="$2"; shift 2 ;;
--limit) LIMIT="$2"; shift 2 ;;
--emb-batch-size) EMB_BATCH_SIZE="$2"; shift 2 ;;
*) echo "[ERROR] 未知参数: $1"; exit 1 ;;
esac
done
DATE="${DATE:-$(date +%Y%m%d)}"
RUN_ID="${RUN_ID:-$(date +%Y%m%d_%H%M%S)}"
export PYTHONPATH="${SCRIPT_DIR}:${SCRIPT_DIR}/vendor/ranking_moderation/src:${PYTHONPATH:-}"
# ========== 场景定义 ==========
# 格式: "短名|example子目录|cg名|golden_csv文件名"
declare -A SCENARIO_MAP
SCENARIO_MAP[yss]="yss_ruler_eval|youth_sexual_and_physical_abuse|aipf_golden_set.csv"
SCENARIO_MAP[nsa]="nsa_ruler_eval|ansa|nsa_golden_set.csv"
# ========== 标签 ==========
TAG="heuristic_r${ROUNDS}"
if [[ "${WARMSTART}" != "none" ]]; then
TAG="${TAG}_warm_${WARMSTART}"
fi
echo "======================================================"
echo " 配置:"
echo " rounds = ${ROUNDS}"
echo " warmstart = ${WARMSTART}"
echo " scenario = ${SCENARIO}"
echo " date = ${DATE}"
echo " run_id = ${RUN_ID}"
echo " tag = ${TAG}"
echo "======================================================"
# ========== 主流程 ==========
run_scenario() {
local short_name="$1"
local eval_dir="$2"
local cg_name="$3"
local golden_csv="$4"
local example_root="${SCRIPT_DIR}/aipf_example/${eval_dir}"
local base_pipeline_yaml="${example_root}/pipeline.yaml"
local input_csv="${example_root}/data/${golden_csv}"
local workspace="${example_root}/runs/${DATE}/${RUN_ID}_${TAG}"
echo ""
echo "======================================================"
echo " 场景: ${short_name} (${cg_name})"
echo " Pipeline(base): ${base_pipeline_yaml}"
echo " Golden set: ${input_csv}"
echo " Workspace: ${workspace}"
echo "======================================================"
if [[ ! -f "${base_pipeline_yaml}" ]]; then
echo "[ERROR] pipeline.yaml 不存在: ${base_pipeline_yaml}"; return 1
fi
if [[ ! -f "${input_csv}" ]]; then
echo "[ERROR] golden set 不存在: ${input_csv}"; return 1
fi
mkdir -p "${workspace}/intermediate" "${workspace}/configs/pos_config" "${workspace}/outputs"
# ---------- Step 0: embedding warm-start (可选) ----------
local actual_csv="${input_csv}"
if [[ "${WARMSTART}" != "none" ]]; then
echo ""
echo "[Step 0] Embedding warm-start (${WARMSTART}) ..."
local topk emb_script emb_output
if [[ "${WARMSTART}" == "top5" ]]; then
topk=5
emb_script="${SCRIPT_DIR}/batch_top5_match.py"
emb_output="${workspace}/intermediate/emb_top5.jsonl"
elif [[ "${WARMSTART}" == "top100" ]]; then
topk=100
emb_script="${SCRIPT_DIR}/batch_top100_match.py"
emb_output="${workspace}/intermediate/emb_top100.jsonl"
else
echo "[ERROR] 未知 warmstart 模式: ${WARMSTART} (可选: top5 / top100 / none)"
return 1
fi
local limit_args=""
if [[ -n "${LIMIT}" ]]; then
limit_args="--limit ${LIMIT}"
fi
# 0a) 跑 embedding 匹配
python3 "${emb_script}" \
--csv "${input_csv}" \
--output "${emb_output}" \
--top-k "${topk}" \
--batch-size "${EMB_BATCH_SIZE}" \
--cache-dir "${SCRIPT_DIR}/cache_emb" \
${limit_args}
# 0b) 把 estimated_position 写回一份新的 csv(不污染原始文件)
actual_csv="${workspace}/intermediate/golden_with_warmstart.csv"
cp "${input_csv}" "${actual_csv}"
python3 "${SCRIPT_DIR}/add_estimated_position.py" \
--csv "${actual_csv}" \
--jsonl "${emb_output}" \
--output "${actual_csv}" \
--k "${topk}"
echo "[Step 0] done. warm-start csv: ${actual_csv}"
fi
# ---------- Step 1: 生成临时 pipeline.yaml (覆盖 rounds) ----------
local runtime_pipeline_yaml="${workspace}/configs/pipeline_runtime.yaml"
python3 -c "
import yaml
with open('${base_pipeline_yaml}', 'r') as f:
cfg = yaml.safe_load(f)
cfg['find_positions']['ranking']['num_rounds'] = ${ROUNDS}
cfg['find_positions']['ranking']['search_method'] = 'heuristic_search'
with open('${runtime_pipeline_yaml}', 'w') as f:
yaml.safe_dump(cfg, f, allow_unicode=True, sort_keys=False)
print(f' num_rounds = ${ROUNDS}, search_method = heuristic_search')
"
echo "[Step 1] 生成运行时 pipeline.yaml: ${runtime_pipeline_yaml}"
# ---------- Step 2: 准备评估数据 ----------
echo ""
echo "[Step 2] 准备本地评估数据 ..."
python3 "${SCRIPT_DIR}/pipeline/prepare_local_eval_data.py" \
--input_csv "${actual_csv}" \
--output_jsonl "${workspace}/intermediate/evr_${DATE}_local_eval_input.jsonl"
# ---------- Step 3: 生成 find_positions 配置 ----------
echo ""
echo "[Step 3] 生成 find_positions 配置 ..."
python3 "${SCRIPT_DIR}/pipeline/gen_find_positions_cfg.py" \
--date "${DATE}" \
--run_root "${workspace}" \
--cg "${cg_name}" \
--pipeline_yaml "${runtime_pipeline_yaml}" \
--input_jsonl "${workspace}/intermediate/evr_${DATE}_local_eval_input.jsonl" \
--output_yaml "${workspace}/configs/pos_config/find_positions_${cg_name}_${DATE}.yaml"
# ---------- Step 4: 运行 find_positions ----------
echo ""
echo "[Step 4] 运行 find_positions (heuristic_search, ${ROUNDS} rounds) ..."
python3 "${SCRIPT_DIR}/vendor/ranking_moderation/scripts/find_positions.py" \
--config "${workspace}/configs/pos_config/find_positions_${cg_name}_${DATE}.yaml"
# ---------- Step 5: 评估结果 ----------
echo ""
echo "[Step 5] 评估结果 ..."
python3 "${SCRIPT_DIR}/pipeline/evaluate_local_ruler_results.py" \
--input_jsonl "${workspace}/intermediate/evr_${DATE}_local_eval_input.jsonl" \
--results_dir "${workspace}/outputs/find_positions/${cg_name}_${DATE}" \
--pipeline_yaml "${runtime_pipeline_yaml}" \
--cg "${cg_name}" \
--output_cases_jsonl "${workspace}/outputs/${cg_name}_case_results_${DATE}.jsonl" \
--output_metrics_json "${workspace}/outputs/${cg_name}_metrics_${DATE}.json"
echo ""
echo "[DONE] ${short_name} (${TAG}) 完成!"
echo " 结果: ${workspace}/outputs/${cg_name}_case_results_${DATE}.jsonl"
echo " 指标: ${workspace}/outputs/${cg_name}_metrics_${DATE}.json"
if [[ -f "${workspace}/outputs/${cg_name}_metrics_${DATE}.json" ]]; then
echo ""
echo "--- Metrics ---"
cat "${workspace}/outputs/${cg_name}_metrics_${DATE}.json"
echo ""
fi
}
# ========== 执行 ==========
if [[ "${SCENARIO}" == "all" ]]; then
scenarios_to_run=(yss nsa)
else
scenarios_to_run=("${SCENARIO}")
fi
for s in "${scenarios_to_run[@]}"; do
if [[ -z "${SCENARIO_MAP[$s]+x}" ]]; then
echo "[ERROR] 未知场景: ${s} (可选: yss / nsa / all)"
exit 1
fi
IFS='|' read -r eval_dir cg_name golden_csv <<< "${SCENARIO_MAP[$s]}"
run_scenario "${s}" "${eval_dir}" "${cg_name}" "${golden_csv}"
done
echo ""
echo "========== 全部完成 (${TAG}) =========="