Spaces:
Sleeping
Sleeping
| """Dashboard diagnostic tests. | |
| Covers two layers: | |
| 1. Unit tests — data-loading functions work independently of Streamlit | |
| 2. Selenium UI tests — the rendered dashboard shows the expected elements | |
| Usage: | |
| pytest tests/test_dashboard.py -v | |
| pytest tests/test_dashboard.py -v -k unit # unit tests only | |
| pytest tests/test_dashboard.py -v -k selenium # UI tests only (needs running dashboard) | |
| The Selenium tests expect the Streamlit dashboard at DASHBOARD_URL (default | |
| http://localhost:8501). Start it first with: | |
| .venv/Scripts/python run.py --no-sim | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import time | |
| from pathlib import Path | |
| import pandas as pd | |
| import pytest | |
| ROOT = Path(__file__).resolve().parent.parent | |
| DASHBOARD_URL = os.environ.get("DASHBOARD_URL", "http://localhost:8501") | |
| LOG_PATHS = { | |
| "performance": ROOT / "data" / "logs" / "performance.jsonl", | |
| "drift": ROOT / "data" / "logs" / "drift_reports.jsonl", | |
| "retrain": ROOT / "data" / "logs" / "retraining.jsonl", | |
| "predictions": ROOT / "data" / "logs" / "predictions.jsonl", | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Helpers (mirror dashboard logic without Streamlit dependency) | |
| # --------------------------------------------------------------------------- | |
| def _load_jsonl(path: Path, limit: int = 2000) -> pd.DataFrame: | |
| if not path.exists(): | |
| return pd.DataFrame() | |
| lines = path.read_text(encoding="utf-8").splitlines()[-limit:] | |
| records = [] | |
| for line in lines: | |
| try: | |
| records.append(json.loads(line)) | |
| except json.JSONDecodeError: | |
| pass | |
| return pd.DataFrame(records) if records else pd.DataFrame() | |
| # --------------------------------------------------------------------------- | |
| # Unit tests | |
| # --------------------------------------------------------------------------- | |
| class TestDataLoading: | |
| """Verify that log files exist and load correctly.""" | |
| def test_performance_file_exists(self): | |
| assert LOG_PATHS["performance"].exists(), ( | |
| "performance.jsonl not found — run the simulation first: " | |
| "python scripts/simulate_drift.py" | |
| ) | |
| def test_performance_file_has_rmse_column(self): | |
| df = _load_jsonl(LOG_PATHS["performance"]) | |
| assert not df.empty, "performance.jsonl is empty" | |
| assert "rmse" in df.columns, f"Expected 'rmse' column; got {list(df.columns)}" | |
| def test_performance_rmse_values_are_positive(self): | |
| df = _load_jsonl(LOG_PATHS["performance"]) | |
| if df.empty: | |
| pytest.skip("No performance data yet") | |
| assert (df["rmse"] > 0).all(), "RMSE values must be positive" | |
| def test_performance_file_has_required_columns(self): | |
| df = _load_jsonl(LOG_PATHS["performance"]) | |
| if df.empty: | |
| pytest.skip("No performance data yet") | |
| required = {"rmse", "mae", "r2", "n_samples", "timestamp"} | |
| missing = required - set(df.columns) | |
| assert not missing, f"Missing columns in performance log: {missing}" | |
| def test_predictions_file_exists_and_has_data(self): | |
| assert LOG_PATHS["predictions"].exists(), "predictions.jsonl not found" | |
| df = _load_jsonl(LOG_PATHS["predictions"]) | |
| assert not df.empty, "predictions.jsonl is empty" | |
| def test_drift_file_structure(self): | |
| if not LOG_PATHS["drift"].exists(): | |
| pytest.skip("No drift reports yet") | |
| df = _load_jsonl(LOG_PATHS["drift"]) | |
| assert not df.empty | |
| assert "drift_detected" in df.columns, ( | |
| f"Expected 'drift_detected'; got {list(df.columns)}" | |
| ) | |
| def test_path_resolution_is_correct(self): | |
| """PROJECT_ROOT computed from dashboard/app.py must point to repo root.""" | |
| dashboard_file = ROOT / "dashboard" / "app.py" | |
| resolved_root = dashboard_file.resolve().parent.parent | |
| assert resolved_root == ROOT.resolve(), ( | |
| f"Path mismatch: dashboard resolves to {resolved_root}, " | |
| f"expected {ROOT.resolve()}" | |
| ) | |
| def test_load_jsonl_returns_dataframe_not_empty_when_file_has_data(self): | |
| path = LOG_PATHS["performance"] | |
| if not path.exists(): | |
| pytest.skip("No performance data yet") | |
| df = _load_jsonl(path) | |
| assert isinstance(df, pd.DataFrame) | |
| assert not df.empty | |
| assert len(df) > 0 | |
| def test_load_jsonl_handles_missing_file_gracefully(self): | |
| df = _load_jsonl(ROOT / "data" / "logs" / "nonexistent.jsonl") | |
| assert isinstance(df, pd.DataFrame) | |
| assert df.empty | |
| def test_load_jsonl_handles_corrupted_lines_gracefully(self): | |
| import tempfile | |
| with tempfile.NamedTemporaryFile( | |
| mode="w", suffix=".jsonl", delete=False, encoding="utf-8" | |
| ) as f: | |
| f.write('{"rmse": 1.5, "mae": 1.2}\n') | |
| f.write("NOT JSON\n") | |
| f.write('{"rmse": 1.6, "mae": 1.3}\n') | |
| tmp = Path(f.name) | |
| try: | |
| df = _load_jsonl(tmp) | |
| assert len(df) == 2, "Should skip corrupted lines and keep valid ones" | |
| assert list(df["rmse"]) == [1.5, 1.6] | |
| finally: | |
| tmp.unlink() | |
| # --------------------------------------------------------------------------- | |
| # Selenium UI tests | |
| # --------------------------------------------------------------------------- | |
| def _get_driver(): | |
| """Return a headless Chrome driver via webdriver-manager.""" | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.chrome.service import Service | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| opts = Options() | |
| opts.add_argument("--headless=new") | |
| opts.add_argument("--no-sandbox") | |
| opts.add_argument("--disable-dev-shm-usage") | |
| opts.add_argument("--window-size=1600,900") | |
| service = Service(ChromeDriverManager().install()) | |
| return webdriver.Chrome(service=service, options=opts) | |
| def _dashboard_reachable() -> bool: | |
| try: | |
| import urllib.request | |
| urllib.request.urlopen(DASHBOARD_URL, timeout=3) | |
| return True | |
| except Exception: | |
| return False | |
| def driver(): | |
| if not _dashboard_reachable(): | |
| pytest.skip(f"Dashboard not running at {DASHBOARD_URL}") | |
| drv = _get_driver() | |
| drv.get(DASHBOARD_URL) | |
| time.sleep(6) | |
| yield drv | |
| drv.quit() | |
| class TestDashboardUI: | |
| """Selenium tests against the live Streamlit dashboard.""" | |
| def test_page_title_is_argus(self, driver): | |
| assert "Argus" in driver.title, ( | |
| f"Expected 'Argus' in page title, got: {driver.title!r}" | |
| ) | |
| def test_sidebar_is_visible(self, driver): | |
| from selenium.webdriver.common.by import By | |
| sidebar = driver.find_elements(By.CSS_SELECTOR, "[data-testid='stSidebar']") | |
| assert sidebar, "Sidebar element not found" | |
| def test_api_status_shown_in_sidebar(self, driver): | |
| from selenium.webdriver.common.by import By | |
| body_text = driver.find_element(By.TAG_NAME, "body").text | |
| assert any(kw in body_text for kw in ("API Online", "API Offline")), ( | |
| "Expected API status badge in sidebar" | |
| ) | |
| def test_navigation_pages_present(self, driver): | |
| from selenium.webdriver.common.by import By | |
| body_text = driver.find_element(By.TAG_NAME, "body").text | |
| for page in ("Overview", "Drift Analysis", "Feature Insights", | |
| "Retraining Log", "Live Demo"): | |
| assert page in body_text, f"Navigation option '{page}' not found" | |
| def test_overview_metrics_rendered(self, driver): | |
| from selenium.webdriver.common.by import By | |
| body_text = driver.find_element(By.TAG_NAME, "body").text | |
| for label in ("Rolling RMSE", "Baseline RMSE", "Labeled Samples"): | |
| assert label in body_text, f"Metric '{label}' not visible on Overview" | |
| def test_no_python_traceback_on_page(self, driver): | |
| from selenium.webdriver.common.by import By | |
| body_text = driver.find_element(By.TAG_NAME, "body").text | |
| assert "Traceback (most recent call last)" not in body_text, ( | |
| "Python traceback found on dashboard page" | |
| ) | |
| def test_chart_renders_when_data_present(self, driver): | |
| """If performance data exists, the RMSE chart must be visible (not 'No data').""" | |
| if not LOG_PATHS["performance"].exists(): | |
| pytest.skip("No performance data — chart absence is expected") | |
| df = _load_jsonl(LOG_PATHS["performance"]) | |
| if df.empty: | |
| pytest.skip("performance.jsonl is empty — chart absence is expected") | |
| from selenium.webdriver.common.by import By | |
| body_text = driver.find_element(By.TAG_NAME, "body").text | |
| no_data_msg = "No performance data yet" | |
| assert no_data_msg not in body_text, ( | |
| f"Dashboard shows '{no_data_msg}' but performance.jsonl has " | |
| f"{len(df)} rows. Root cause: auto-refresh clears the cache " | |
| "BEFORE chart code runs, causing an infinite blank loop." | |
| ) | |
| def test_refresh_now_button_exists(self, driver): | |
| from selenium.webdriver.common.by import By | |
| buttons = driver.find_elements(By.TAG_NAME, "button") | |
| labels = [b.text.strip() for b in buttons] | |
| assert "Refresh Now" in labels, ( | |
| f"'Refresh Now' button not found. Available buttons: {labels}" | |
| ) | |
| def test_clicking_refresh_loads_chart(self, driver): | |
| """Click Refresh Now and verify the chart appears within 10 seconds.""" | |
| if not LOG_PATHS["performance"].exists(): | |
| pytest.skip("No performance data") | |
| df = _load_jsonl(LOG_PATHS["performance"]) | |
| if df.empty: | |
| pytest.skip("performance.jsonl is empty") | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| buttons = driver.find_elements(By.TAG_NAME, "button") | |
| for btn in buttons: | |
| if btn.text.strip() == "Refresh Now": | |
| btn.click() | |
| break | |
| time.sleep(8) | |
| body_text = driver.find_element(By.TAG_NAME, "body").text | |
| no_data_msg = "No performance data yet" | |
| assert no_data_msg not in body_text, ( | |
| "Chart still absent after clicking Refresh Now" | |
| ) | |
| def test_screenshot_on_failure(self, driver, request): | |
| """Save a screenshot to assets/test_screenshot.png for inspection.""" | |
| screenshot_path = ROOT / "assets" / "test_screenshot.png" | |
| driver.save_screenshot(str(screenshot_path)) | |
| # --------------------------------------------------------------------------- | |
| # Unit tests: fix #1 — baseline RMSE must not use iloc[0] from the log | |
| # --------------------------------------------------------------------------- | |
| class TestBaselineRmseLogic: | |
| """ | |
| Verify that the baseline hline calculation uses api_metrics baseline_rmse | |
| rather than the first row of the performance log. | |
| Before the fix: bsl = perf_df["rmse"].iloc[0] | |
| After the fix: bsl = baseline or perf_df["rmse"].min() | |
| If the log starts mid-drift (high RMSE), iloc[0] would have been wrong. | |
| """ | |
| def _bsl(self, api_baseline, perf_rmse_values: list) -> float: | |
| """Replicate the fixed dashboard bsl calculation.""" | |
| import numpy as np | |
| df = pd.DataFrame({"rmse": perf_rmse_values}) | |
| baseline = api_baseline | |
| return baseline if baseline else float(df["rmse"].min()) | |
| def test_uses_api_baseline_when_available(self): | |
| # Log starts at a high value (simulating mid-drift start) | |
| rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9] | |
| bsl = self._bsl(api_baseline=2.1, perf_rmse_values=rmse_series) | |
| assert bsl == 2.1, ( | |
| f"Expected api baseline 2.1, got {bsl}. " | |
| "Fix is not applied: bsl must come from api_metrics, not iloc[0]." | |
| ) | |
| def test_falls_back_to_min_when_api_unavailable(self): | |
| rmse_series = [1.8, 2.1, 5.3, 9.0, 3.2] | |
| bsl = self._bsl(api_baseline=None, perf_rmse_values=rmse_series) | |
| assert bsl == 1.8, ( | |
| f"Fallback should be min(rmse)=1.8, got {bsl}." | |
| ) | |
| def test_old_iloc0_would_have_failed_mid_drift(self): | |
| """Demonstrate the old bug: iloc[0] on a mid-drift log gives wrong baseline.""" | |
| rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9] | |
| df = pd.DataFrame({"rmse": rmse_series}) | |
| old_bsl = df["rmse"].iloc[0] # old (broken) logic | |
| assert old_bsl == 10.5, "Setup check: old logic picks high value" | |
| # The old bsl would set the baseline hline at 10.5 instead of ~2.1, | |
| # causing the chart to look flat (everything near or above "baseline") | |
| assert old_bsl > 5.0, ( | |
| "Old baseline would have been unreasonably high — confirms the bug." | |
| ) | |
| def test_alert_threshold_is_correct_fraction_of_bsl(self): | |
| """Alert hline must be 15% above baseline.""" | |
| bsl = 2.131 | |
| alert = bsl * 1.15 | |
| assert abs(alert - 2.451) < 0.01, f"Alert threshold wrong: {alert:.3f}" | |
| # --------------------------------------------------------------------------- | |
| # Unit tests: fix #2 — R² y-axis must accommodate negative values | |
| # --------------------------------------------------------------------------- | |
| class TestR2AxisScaling: | |
| """ | |
| Verify that the R² chart y-axis lower bound scales to include negative R² | |
| instead of clipping at 0. | |
| Before the fix: range=[0, 1.05] (negative values invisible) | |
| After the fix: range=[r2_floor, 1.05] where r2_floor < 0 when data dips negative | |
| """ | |
| def _r2_floor(self, r2_values: list) -> float: | |
| """Replicate the fixed dashboard r2_floor calculation.""" | |
| r2_min = min(r2_values) | |
| return min(r2_min - 0.05, -0.1) if r2_min < 0 else -0.05 | |
| def test_negative_r2_produces_negative_floor(self): | |
| r2_series = [0.91, 0.60, -0.49, -1.22, 0.83] | |
| floor = self._r2_floor(r2_series) | |
| assert floor < 0, f"r2_floor must be negative when data goes below 0, got {floor}" | |
| assert floor <= -1.22 - 0.05, ( | |
| f"Floor {floor} is not low enough to show min r2=-1.22 " | |
| "(should be min - 0.05 = -1.27)" | |
| ) | |
| def test_all_positive_r2_uses_small_negative_floor(self): | |
| r2_series = [0.91, 0.88, 0.93, 0.85] | |
| floor = self._r2_floor(r2_series) | |
| assert floor == -0.05, ( | |
| f"When all R² > 0, floor should be -0.05 for breathing room, got {floor}" | |
| ) | |
| def test_floor_is_below_min_r2(self): | |
| """Floor must always be below the minimum R² value so no data is clipped.""" | |
| for min_r2 in [-0.05, -0.5, -1.0, -1.22]: | |
| r2_series = [0.9, min_r2] | |
| floor = self._r2_floor(r2_series) | |
| assert floor <= min_r2, ( | |
| f"At min_r2={min_r2}, floor={floor} clips data (must be <= min_r2)" | |
| ) | |
| def test_old_hardcoded_range_clipped_negative_r2(self): | |
| """Show that the old range=[0, 1.05] would have hidden the negative data.""" | |
| old_range_min = 0 | |
| r2_min_in_data = -1.22 | |
| assert r2_min_in_data < old_range_min, ( | |
| "Confirms bug: min R² in data is below old y-axis floor of 0" | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Selenium: verify chart renders correctly with fixed logic | |
| # --------------------------------------------------------------------------- | |
| class TestChartFixes: | |
| """End-to-end Selenium tests verifying the two chart fixes in production.""" | |
| def test_overview_chart_section_visible(self, driver): | |
| from selenium.webdriver.common.by import By | |
| body = driver.find_element(By.TAG_NAME, "body").text | |
| assert "Prediction Error Over Time" in body, ( | |
| "RMSE chart section heading not visible on Overview" | |
| ) | |
| def test_baseline_annotation_present_in_chart(self, driver): | |
| """ | |
| The 'Baseline' hline annotation must appear in the rendered SVG. | |
| If bsl was computed from a high iloc[0], the annotation would still | |
| appear but at the wrong Y level — this confirms it's rendered at all. | |
| """ | |
| from selenium.webdriver.common.by import By | |
| page_source = driver.page_source | |
| assert "Baseline" in page_source, ( | |
| "Baseline annotation not found in rendered page source. " | |
| "Chart may not have rendered." | |
| ) | |
| def test_alert_annotation_present_in_chart(self, driver): | |
| from selenium.webdriver.common.by import By | |
| page_source = driver.page_source | |
| assert "Alert" in page_source or "+15%" in page_source, ( | |
| "Alert +15% annotation not found in rendered chart." | |
| ) | |
| def test_r2_chart_section_visible(self, driver): | |
| from selenium.webdriver.common.by import By | |
| page_source = driver.page_source | |
| # R² label should appear as an axis title in the SVG | |
| assert "R²" in page_source or "R\u00b2" in page_source, ( | |
| "R² chart axis label not found — chart may not have rendered." | |
| ) | |
| def test_no_traceback_on_overview(self, driver): | |
| from selenium.webdriver.common.by import By | |
| assert "Traceback (most recent call last)" not in \ | |
| driver.find_element(By.TAG_NAME, "body").text | |
| def test_overview_screenshot_with_fixes(self, driver): | |
| """Save a screenshot showing the fixed chart for visual verification.""" | |
| screenshot_path = ROOT / "assets" / "overview_chart_fixed.png" | |
| driver.save_screenshot(str(screenshot_path)) | |