#!/usr/bin/env python3
"""
Test Report Aggregator
======================
Reads all JSON test reports from testing/reports/, aggregates them into a
summary dashboard, and outputs JSON + formatted tables (or Markdown).

Usage:
    python testing/report_aggregator.py                    # all reports, table output
    python testing/report_aggregator.py --product sunaiva  # filter by product
    python testing/report_aggregator.py --competitors-only # competitor intel only
    python testing/report_aggregator.py --failures-only    # failed runs only
    python testing/report_aggregator.py --format md        # markdown output
    python testing/report_aggregator.py --no-write         # skip writing summary JSON
"""

from __future__ import annotations

import argparse
import json
import os
import re
import sys
from collections import Counter, defaultdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional

# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------

TESTING_DIR = Path(__file__).parent
REPORTS_DIR = TESTING_DIR / "reports"
SCREENSHOTS_DIR = TESTING_DIR / "screenshots"


# ---------------------------------------------------------------------------
# Data loading
# ---------------------------------------------------------------------------

def load_reports(
    reports_dir: Path,
    product_filter: Optional[str] = None,
    competitors_only: bool = False,
    failures_only: bool = False,
) -> list[dict]:
    """
    Load and filter all JSON reports from reports_dir.

    Skips SUMMARY_*.json files (our own output) and files that fail to parse.
    """
    reports = []
    for path in sorted(reports_dir.glob("*.json")):
        if path.name.startswith("SUMMARY_"):
            continue
        try:
            with open(path, encoding="utf-8") as f:
                data = json.load(f)
        except (json.JSONDecodeError, OSError) as exc:
            print(f"[WARN] Skipping {path.name}: {exc}", file=sys.stderr)
            continue

        data["_file"] = path.name

        # Mode classification: "product" or "competitor"
        mode = data.get("mode", "product")

        # Apply filters
        if competitors_only and mode != "competitor":
            continue
        if product_filter:
            # Match on product name, run_id prefix, or base_url substring
            product_val = str(data.get("product", "")).lower()
            run_val = str(data.get("run_id", "")).lower()
            url_val = str(data.get("base_url", "")).lower()
            needle = product_filter.lower()
            if needle not in product_val and needle not in run_val and needle not in url_val:
                continue
        if failures_only:
            if data.get("failed", 0) == 0 and data.get("passed", 0) > 0:
                continue

        reports.append(data)

    return reports


# ---------------------------------------------------------------------------
# Per-report normalisation
# ---------------------------------------------------------------------------

def _duration_ms(report: dict) -> int:
    """Total run duration in milliseconds from started_at / finished_at."""
    try:
        fmt = "%Y-%m-%dT%H:%M:%S.%f%z"
        start = datetime.fromisoformat(report["started_at"])
        end = datetime.fromisoformat(report["finished_at"])
        return int((end - start).total_seconds() * 1000)
    except Exception:
        # Sum scenario durations as fallback
        scenarios = report.get("scenarios", [])
        return sum(s.get("duration_ms", 0) for s in scenarios)


def _screenshot_links(report: dict) -> list[str]:
    """Collect non-null screenshot paths from all scenarios."""
    links = []
    for s in report.get("scenarios", []):
        shot = s.get("screenshot")
        if shot:
            links.append(shot)
    return links


def _failed_steps(report: dict) -> list[dict]:
    """Return list of {name, error} for failed scenarios."""
    return [
        {"step": s["name"], "error": (s.get("error") or "").strip()}
        for s in report.get("scenarios", [])
        if not s.get("passed", True)
    ]


def normalise(report: dict) -> dict:
    """Return a cleaned, enriched view of a single report."""
    passed = report.get("passed", 0)
    failed = report.get("failed", 0)
    total = report.get("total", passed + failed)
    success_rate = round(passed / total * 100, 1) if total else 0.0

    return {
        "product": report.get("product", "unknown"),
        "run_id": report.get("run_id", ""),
        "mode": report.get("mode", "product"),
        "base_url": report.get("base_url", ""),
        "started_at": report.get("started_at", ""),
        "finished_at": report.get("finished_at", ""),
        "email_used": report.get("email_used", ""),
        "passed": passed,
        "failed": failed,
        "total": total,
        "success_rate": success_rate,
        "duration_ms": _duration_ms(report),
        "failed_steps": _failed_steps(report),
        "screenshot_links": _screenshot_links(report),
        "scenarios": report.get("scenarios", []),
        "_file": report.get("_file", ""),
    }


# ---------------------------------------------------------------------------
# Per-product aggregation
# ---------------------------------------------------------------------------

def aggregate_by_product(normalised: list[dict]) -> dict[str, dict]:
    """
    Group runs by product name and compute per-product statistics.

    Returns a dict keyed by product name.
    """
    groups: dict[str, list[dict]] = defaultdict(list)
    for r in normalised:
        groups[r["product"]].append(r)

    result = {}
    for product, runs in sorted(groups.items()):
        total_passed = sum(r["passed"] for r in runs)
        total_failed = sum(r["failed"] for r in runs)
        total_scenarios = sum(r["total"] for r in runs)
        success_rate = (
            round(total_passed / total_scenarios * 100, 1) if total_scenarios else 0.0
        )

        # Common failure patterns across all runs
        failure_counter: Counter = Counter()
        for r in runs:
            for fs in r["failed_steps"]:
                # Normalise error to first line for grouping
                first_line = (fs["error"] or "").split("\n")[0].strip()
                key = f"{fs['step']}: {first_line[:80]}" if first_line else fs["step"]
                failure_counter[key] += 1

        # All screenshot links
        all_screenshots = []
        for r in runs:
            all_screenshots.extend(r["screenshot_links"])

        # Latest run stats
        latest = max(runs, key=lambda r: r["started_at"])

        result[product] = {
            "product": product,
            "mode": runs[0]["mode"],
            "base_url": runs[0]["base_url"],
            "run_count": len(runs),
            "total_passed": total_passed,
            "total_failed": total_failed,
            "total_scenarios": total_scenarios,
            "success_rate": success_rate,
            "avg_duration_ms": int(sum(r["duration_ms"] for r in runs) / len(runs)),
            "common_failures": [
                {"pattern": pattern, "occurrences": count}
                for pattern, count in failure_counter.most_common(5)
            ],
            "screenshot_links": all_screenshots,
            "latest_run_id": latest["run_id"],
            "latest_run_at": latest["started_at"],
            "runs": runs,
        }

    return result


# ---------------------------------------------------------------------------
# Cross-product comparison
# ---------------------------------------------------------------------------

def cross_product_comparison(by_product: dict[str, dict]) -> dict:
    """
    Rank products by success rate and surface common failure patterns
    across all products.
    """
    products = list(by_product.values())
    if not products:
        return {"ranked": [], "common_failure_patterns": []}

    ranked = sorted(products, key=lambda p: p["success_rate"], reverse=True)

    # Aggregate failure patterns across all products
    global_failures: Counter = Counter()
    for p in products:
        for cf in p["common_failures"]:
            global_failures[cf["pattern"]] += cf["occurrences"]

    return {
        "ranked": [
            {
                "rank": i + 1,
                "product": p["product"],
                "success_rate": p["success_rate"],
                "total_scenarios": p["total_scenarios"],
                "avg_duration_ms": p["avg_duration_ms"],
                "run_count": p["run_count"],
            }
            for i, p in enumerate(ranked)
        ],
        "common_failure_patterns": [
            {"pattern": pat, "occurrences": cnt}
            for pat, cnt in global_failures.most_common(10)
        ],
    }


# ---------------------------------------------------------------------------
# Competitive intel summary
# ---------------------------------------------------------------------------

def competitive_intel_summary(by_product: dict[str, dict]) -> dict:
    """
    Summarise competitor runs only.

    Extracts funnel depth (scenario count), time-to-value (total duration),
    and friction score (failed_steps count) for each competitor.
    """
    competitors = [p for p in by_product.values() if p["mode"] == "competitor"]
    if not competitors:
        return {"competitors": [], "ranked_by_friction": []}

    intel_rows = []
    for comp in competitors:
        # Funnel depth: average # of scenarios across runs
        avg_funnel = (
            round(sum(len(r["scenarios"]) for r in comp["runs"]) / comp["run_count"], 1)
            if comp["run_count"] else 0
        )
        # Friction score: failed steps per run (higher = more friction)
        total_friction = sum(r["failed"] for r in comp["runs"])
        friction_score = round(total_friction / comp["run_count"], 2) if comp["run_count"] else 0

        intel_rows.append({
            "product": comp["product"],
            "base_url": comp["base_url"],
            "funnel_steps": avg_funnel,
            "avg_time_to_value_ms": comp["avg_duration_ms"],
            "friction_score": friction_score,
            "run_count": comp["run_count"],
            "screenshot_links": comp["screenshot_links"],
        })

    # Rank by friction score ascending (lower friction = better UX)
    ranked = sorted(intel_rows, key=lambda r: r["friction_score"])
    for i, row in enumerate(ranked):
        row["friction_rank"] = i + 1

    # Time-to-value rank (lower is faster / better)
    ttv_ranked = sorted(intel_rows, key=lambda r: r["avg_time_to_value_ms"])
    ttv_map = {r["product"]: i + 1 for i, r in enumerate(ttv_ranked)}
    for row in ranked:
        row["ttv_rank"] = ttv_map[row["product"]]

    return {
        "competitors": ranked,
        "ranked_by_friction": [
            {
                "rank": r["friction_rank"],
                "product": r["product"],
                "friction_score": r["friction_score"],
                "funnel_steps": r["funnel_steps"],
                "avg_time_to_value_ms": r["avg_time_to_value_ms"],
            }
            for r in ranked
        ],
    }


# ---------------------------------------------------------------------------
# Summary assembly
# ---------------------------------------------------------------------------

def build_summary(
    reports: list[dict],
    by_product: dict[str, dict],
    comparison: dict,
    intel: dict,
) -> dict:
    """Assemble the final SUMMARY JSON object."""
    total_runs = len(reports)
    total_passed = sum(r["passed"] for r in reports)
    total_failed = sum(r["failed"] for r in reports)
    total_scenarios = sum(r["total"] for r in reports)
    overall_rate = round(total_passed / total_scenarios * 100, 1) if total_scenarios else 0.0

    return {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "meta": {
            "total_runs": total_runs,
            "total_scenarios": total_scenarios,
            "total_passed": total_passed,
            "total_failed": total_failed,
            "overall_success_rate": overall_rate,
            "products_tested": len(by_product),
        },
        "per_product": {
            name: {
                "product": p["product"],
                "mode": p["mode"],
                "base_url": p["base_url"],
                "run_count": p["run_count"],
                "success_rate": p["success_rate"],
                "total_passed": p["total_passed"],
                "total_failed": p["total_failed"],
                "total_scenarios": p["total_scenarios"],
                "avg_duration_ms": p["avg_duration_ms"],
                "common_failures": p["common_failures"],
                "screenshot_links": p["screenshot_links"],
                "latest_run_id": p["latest_run_id"],
                "latest_run_at": p["latest_run_at"],
            }
            for name, p in by_product.items()
        },
        "cross_product_comparison": comparison,
        "competitive_intel": intel,
    }


# ---------------------------------------------------------------------------
# Output formatters
# ---------------------------------------------------------------------------

def _bar(rate: float, width: int = 20) -> str:
    """ASCII progress bar for a 0-100 rate."""
    filled = round(rate / 100 * width)
    return "[" + "#" * filled + "-" * (width - filled) + "]"


def _ms_to_human(ms: int) -> str:
    if ms < 1000:
        return f"{ms}ms"
    if ms < 60_000:
        return f"{ms / 1000:.1f}s"
    return f"{ms / 60_000:.1f}m"


def format_markdown(summary: dict) -> str:
    """Render a Markdown report."""
    meta = summary["meta"]
    gen = summary["generated_at"]
    lines: list[str] = []

    lines.append("# Genesis E2E Test Report Summary")
    lines.append(f"**Generated**: {gen}")
    lines.append("")
    lines.append("## Overview")
    lines.append("")
    lines.append(f"| Metric | Value |")
    lines.append(f"|--------|-------|")
    lines.append(f"| Total runs | {meta['total_runs']} |")
    lines.append(f"| Total scenarios | {meta['total_scenarios']} |")
    lines.append(f"| Passed | {meta['total_passed']} |")
    lines.append(f"| Failed | {meta['total_failed']} |")
    lines.append(f"| Overall success rate | {meta['overall_success_rate']}% |")
    lines.append(f"| Products tested | {meta['products_tested']} |")
    lines.append("")

    # Per-product
    per_product = summary["per_product"]
    if per_product:
        lines.append("## Per-Product Results")
        lines.append("")
        lines.append("| Product | Success Rate | Pass/Total | Avg Duration | Runs |")
        lines.append("|---------|-------------|------------|--------------|------|")
        for name, p in sorted(
            per_product.items(), key=lambda x: x[1]["success_rate"], reverse=True
        ):
            lines.append(
                f"| {name} | {p['success_rate']}% | "
                f"{p['total_passed']}/{p['total_scenarios']} | "
                f"{_ms_to_human(p['avg_duration_ms'])} | "
                f"{p['run_count']} |"
            )
        lines.append("")

        for name, p in per_product.items():
            if p["common_failures"]:
                lines.append(f"### {name} — Common Failures")
                lines.append("")
                for cf in p["common_failures"]:
                    lines.append(f"- **x{cf['occurrences']}** — `{cf['pattern']}`")
                lines.append("")
            if p["screenshot_links"]:
                lines.append(f"### {name} — Screenshots")
                lines.append("")
                for link in p["screenshot_links"]:
                    lines.append(f"- `{link}`")
                lines.append("")

    # Cross-product ranking
    comparison = summary["cross_product_comparison"]
    ranked = comparison.get("ranked", [])
    if ranked:
        lines.append("## Success Rate Ranking")
        lines.append("")
        lines.append("| Rank | Product | Success Rate | Avg Duration | Runs |")
        lines.append("|------|---------|-------------|--------------|------|")
        for row in ranked:
            lines.append(
                f"| #{row['rank']} | {row['product']} | {row['success_rate']}% | "
                f"{_ms_to_human(row['avg_duration_ms'])} | {row['run_count']} |"
            )
        lines.append("")

        patterns = comparison.get("common_failure_patterns", [])
        if patterns:
            lines.append("### Common Failure Patterns (cross-product)")
            lines.append("")
            for p in patterns:
                lines.append(f"- **x{p['occurrences']}** — `{p['pattern']}`")
            lines.append("")

    # Competitive intel
    intel = summary["competitive_intel"]
    competitors = intel.get("competitors", [])
    if competitors:
        lines.append("## Competitive Intelligence")
        lines.append("")
        lines.append(
            "| Friction Rank | Competitor | Friction Score | Funnel Steps | "
            "Time-to-Value | TTV Rank |"
        )
        lines.append("|--------------|-----------|---------------|-------------|"
                     "-------------|----------|")
        for comp in sorted(competitors, key=lambda c: c["friction_rank"]):
            lines.append(
                f"| #{comp['friction_rank']} | {comp['product']} | "
                f"{comp['friction_score']} | {comp['funnel_steps']} | "
                f"{_ms_to_human(comp['avg_time_to_value_ms'])} | "
                f"#{comp['ttv_rank']} |"
            )
        lines.append("")

    return "\n".join(lines)


def _format_product_row(name: str, p: dict) -> list[str]:
    rate = p["success_rate"]
    bar = _bar(rate, 12)
    rows = []
    rows.append(
        f"  {name:<22} {rate:>6.1f}%  {bar}  "
        f"{p['total_passed']}/{p['total_scenarios']}  "
        f"{_ms_to_human(p['avg_duration_ms']):>8}  "
        f"{p['run_count']:>4}"
    )
    for cf in p["common_failures"][:2]:
        truncated = cf["pattern"][:60]
        rows.append(f"    ! {truncated} (x{cf['occurrences']})")
    return rows


def format_table(summary: dict) -> str:
    """Render a human-readable table report to a string."""
    lines: list[str] = []
    meta = summary["meta"]
    gen = summary["generated_at"]

    lines.append("")
    lines.append("=" * 70)
    lines.append("  Genesis E2E Test Report Summary")
    lines.append(f"  Generated: {gen}")
    lines.append("=" * 70)
    lines.append("")
    lines.append(f"  Total runs:       {meta['total_runs']}")
    lines.append(f"  Total scenarios:  {meta['total_scenarios']}")
    lines.append(
        f"  Overall:          {meta['total_passed']}/{meta['total_scenarios']} passed "
        f"({meta['overall_success_rate']}%)"
    )
    lines.append(f"  Products tested:  {meta['products_tested']}")
    lines.append("")

    # Per-product table
    per_product = summary["per_product"]
    if per_product:
        lines.append("-" * 70)
        lines.append("  PER-PRODUCT RESULTS")
        lines.append("-" * 70)
        lines.append(
            f"  {'Product':<22} {'Rate':>8}  {'Progress':>14}  "
            f"{'Pass/Total':<12} {'Avg Time':>8}  {'Runs':>4}"
        )
        lines.append("  " + "-" * 66)
        for name, p in sorted(
            per_product.items(), key=lambda x: x[1]["success_rate"], reverse=True
        ):
            for row in _format_product_row(name, p):
                lines.append(row)
        lines.append("")

    # Cross-product ranking
    comparison = summary["cross_product_comparison"]
    ranked = comparison.get("ranked", [])
    if ranked:
        lines.append("-" * 70)
        lines.append("  SUCCESS RATE RANKING")
        lines.append("-" * 70)
        for row in ranked:
            lines.append(
                f"  #{row['rank']:>2}  {row['product']:<22}  "
                f"{row['success_rate']:>6.1f}%  "
                f"{_ms_to_human(row['avg_duration_ms']):>8}  "
                f"{row['run_count']} run(s)"
            )
        lines.append("")

        patterns = comparison.get("common_failure_patterns", [])
        if patterns:
            lines.append("  TOP FAILURE PATTERNS (cross-product)")
            lines.append("  " + "-" * 52)
            for p in patterns[:5]:
                lines.append(f"  x{p['occurrences']:>2}  {p['pattern'][:60]}")
            lines.append("")

    # Competitive intel
    intel = summary["competitive_intel"]
    competitors = intel.get("competitors", [])
    if competitors:
        lines.append("-" * 70)
        lines.append("  COMPETITIVE INTELLIGENCE")
        lines.append("-" * 70)
        lines.append(
            f"  {'Competitor':<22} {'Friction':>8}  {'Steps':>6}  "
            f"{'Time-to-Val':>12}  {'Rank':>4}"
        )
        lines.append("  " + "-" * 56)
        for comp in sorted(competitors, key=lambda c: c["friction_rank"]):
            lines.append(
                f"  {comp['product']:<22} {comp['friction_score']:>8.2f}  "
                f"{comp['funnel_steps']:>6.1f}  "
                f"{_ms_to_human(comp['avg_time_to_value_ms']):>12}  "
                f"#{comp['friction_rank']:>3}"
            )
        lines.append("")

    lines.append("=" * 70)
    lines.append("")
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Summary writer
# ---------------------------------------------------------------------------

def write_summary(summary: dict, reports_dir: Path) -> Path:
    date_str = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_path = reports_dir / f"SUMMARY_{date_str}.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(summary, f, indent=2, ensure_ascii=False)
    return out_path


# ---------------------------------------------------------------------------
# Self-test
# ---------------------------------------------------------------------------

def _run_self_test() -> None:
    """Internal self-test using synthetic report data."""
    print("\n=== Report Aggregator Self-Test ===\n")
    passed = 0
    failed = 0

    def ok(label: str) -> None:
        nonlocal passed
        print(f"  PASS  {label}")
        passed += 1

    def fail(label: str, reason: str) -> None:
        nonlocal failed
        print(f"  FAIL  {label}: {reason}")
        failed += 1

    # --- Synthetic reports ---
    sample_reports_raw = [
        {
            "product": "sunaiva",
            "run_id": "sunaiva_001",
            "started_at": "2026-02-26T10:00:00.000000+00:00",
            "finished_at": "2026-02-26T10:00:05.000000+00:00",
            "email_used": "test@example.com",
            "base_url": "https://sunaiva.com",
            "mode": "product",
            "passed": 3,
            "failed": 1,
            "total": 4,
            "scenarios": [
                {"name": "signup", "passed": True, "duration_ms": 1200, "screenshot": "s1.png", "error": None},
                {"name": "email_verify", "passed": True, "duration_ms": 800, "screenshot": "s2.png", "error": None},
                {"name": "plan_select", "passed": True, "duration_ms": 600, "screenshot": None, "error": None},
                {"name": "checkout", "passed": False, "duration_ms": 400, "screenshot": None, "error": "TimeoutError: waiting for selector\nStack trace..."},
            ],
            "summary": "3/4 scenarios passed",
            "_file": "sunaiva_001.json",
        },
        {
            "product": "sunaiva",
            "run_id": "sunaiva_002",
            "started_at": "2026-02-27T08:00:00.000000+00:00",
            "finished_at": "2026-02-27T08:00:08.000000+00:00",
            "email_used": "test2@example.com",
            "base_url": "https://sunaiva.com",
            "mode": "product",
            "passed": 4,
            "failed": 0,
            "total": 4,
            "scenarios": [
                {"name": "signup", "passed": True, "duration_ms": 1100, "screenshot": "s3.png", "error": None},
                {"name": "email_verify", "passed": True, "duration_ms": 900, "screenshot": "s4.png", "error": None},
                {"name": "plan_select", "passed": True, "duration_ms": 700, "screenshot": None, "error": None},
                {"name": "checkout", "passed": True, "duration_ms": 500, "screenshot": "s5.png", "error": None},
            ],
            "summary": "4/4 scenarios passed",
            "_file": "sunaiva_002.json",
        },
        {
            "product": "competitor_acme",
            "run_id": "competitor_acme_001",
            "started_at": "2026-02-27T09:00:00.000000+00:00",
            "finished_at": "2026-02-27T09:00:20.000000+00:00",
            "email_used": "spy@example.com",
            "base_url": "https://acme.io",
            "mode": "competitor",
            "passed": 2,
            "failed": 3,
            "total": 5,
            "scenarios": [
                {"name": "visit", "passed": True, "duration_ms": 2000, "screenshot": "c1.png", "error": None},
                {"name": "signup", "passed": True, "duration_ms": 3000, "screenshot": "c2.png", "error": None},
                {"name": "email_verify", "passed": False, "duration_ms": 1000, "screenshot": None, "error": "Email not received"},
                {"name": "plan_select", "passed": False, "duration_ms": 500, "screenshot": None, "error": "Selector not found"},
                {"name": "checkout", "passed": False, "duration_ms": 200, "screenshot": None, "error": "Page crashed"},
            ],
            "summary": "2/5 scenarios passed",
            "_file": "competitor_acme_001.json",
        },
    ]

    # Test: normalise
    normed = [normalise(r) for r in sample_reports_raw]
    if len(normed) == 3:
        ok("normalise() returns 3 records")
    else:
        fail("normalise() count", f"expected 3, got {len(normed)}")

    sunaiva_normed = [n for n in normed if n["product"] == "sunaiva"]
    if sunaiva_normed[0]["success_rate"] == 75.0:
        ok("normalise() success_rate 75%")
    else:
        fail("normalise() success_rate", str(sunaiva_normed[0]["success_rate"]))

    # Test: duration from timestamps
    assert normed[0]["duration_ms"] == 5000, f"Expected 5000ms, got {normed[0]['duration_ms']}"
    ok("normalise() duration_ms from timestamps")

    # Test: failed_steps extraction
    assert len(normed[0]["failed_steps"]) == 1
    assert normed[0]["failed_steps"][0]["step"] == "checkout"
    ok("normalise() failed_steps extracted")

    # Test: screenshot_links
    assert "s1.png" in normed[0]["screenshot_links"]
    assert len(normed[0]["screenshot_links"]) == 2
    ok("normalise() screenshot_links correct")

    # Test: aggregate_by_product
    by_product = aggregate_by_product(normed)
    if "sunaiva" in by_product and "competitor_acme" in by_product:
        ok("aggregate_by_product() keys correct")
    else:
        fail("aggregate_by_product() keys", str(list(by_product.keys())))

    sunaiva_agg = by_product["sunaiva"]
    if sunaiva_agg["run_count"] == 2:
        ok("aggregate_by_product() run_count=2")
    else:
        fail("aggregate_by_product() run_count", str(sunaiva_agg["run_count"]))

    if sunaiva_agg["total_passed"] == 7 and sunaiva_agg["total_failed"] == 1:
        ok("aggregate_by_product() pass/fail totals")
    else:
        fail("pass/fail", f"{sunaiva_agg['total_passed']}/{sunaiva_agg['total_failed']}")

    if sunaiva_agg["success_rate"] == 87.5:
        ok("aggregate_by_product() success_rate=87.5%")
    else:
        fail("success_rate", str(sunaiva_agg["success_rate"]))

    # Common failures: checkout should appear once across sunaiva runs
    if sunaiva_agg["common_failures"]:
        ok("aggregate_by_product() common_failures populated")
    else:
        fail("common_failures", "empty")

    # Test: cross_product_comparison
    comparison = cross_product_comparison(by_product)
    ranked = comparison["ranked"]
    if ranked[0]["product"] == "sunaiva":
        ok("cross_product_comparison() sunaiva ranked first (87.5% > 40%)")
    else:
        fail("cross_product_comparison() ranking", str([r["product"] for r in ranked]))

    if comparison["common_failure_patterns"]:
        ok("cross_product_comparison() failure patterns non-empty")
    else:
        fail("common_failure_patterns", "empty")

    # Test: competitive_intel_summary
    intel = competitive_intel_summary(by_product)
    if intel["competitors"] and intel["competitors"][0]["product"] == "competitor_acme":
        ok("competitive_intel_summary() competitor found")
    else:
        fail("competitive_intel_summary()", str(intel))

    acme = intel["competitors"][0]
    if acme["friction_score"] == 3.0:
        ok("competitive_intel_summary() friction_score=3.0")
    else:
        fail("friction_score", str(acme["friction_score"]))

    if acme["funnel_steps"] == 5.0:
        ok("competitive_intel_summary() funnel_steps=5")
    else:
        fail("funnel_steps", str(acme["funnel_steps"]))

    # Test: build_summary
    summary = build_summary(normed, by_product, comparison, intel)
    if summary["meta"]["total_runs"] == 3:
        ok("build_summary() total_runs=3")
    else:
        fail("total_runs", str(summary["meta"]["total_runs"]))

    if summary["meta"]["overall_success_rate"] == round(9 / 13 * 100, 1):
        ok("build_summary() overall_success_rate correct")
    else:
        fail("overall_success_rate", str(summary["meta"]["overall_success_rate"]))

    # Test: format_table produces non-empty string
    table_out = format_table(summary)
    if "sunaiva" in table_out and "Genesis E2E" in table_out:
        ok("format_table() output contains expected strings")
    else:
        fail("format_table()", "missing expected content")

    # Test: format_markdown produces markdown
    md_out = format_markdown(summary)
    if "# Genesis E2E" in md_out and "| Product |" in md_out:
        ok("format_markdown() output is valid markdown")
    else:
        fail("format_markdown()", "missing expected content")

    # Test: _ms_to_human
    assert _ms_to_human(500) == "500ms"
    assert _ms_to_human(2500) == "2.5s"
    assert _ms_to_human(90000) == "1.5m"
    ok("_ms_to_human() all cases")

    # Test: filter by product (product_filter logic)
    all_normed = [normalise(r) for r in sample_reports_raw]
    product_filtered = [n for n in all_normed if "sunaiva" in n["product"].lower()]
    assert len(product_filtered) == 2
    ok("product filter selects 2 sunaiva runs")

    # Test: competitors_only filter
    competitor_filtered = [n for n in all_normed if n["mode"] == "competitor"]
    assert len(competitor_filtered) == 1
    ok("competitors_only filter selects 1 competitor run")

    # Test: failures_only filter
    failures_filtered = [n for n in all_normed if n["failed"] > 0]
    assert len(failures_filtered) == 2
    ok("failures_only filter selects 2 runs with failures")

    # Summary
    print("")
    print(f"{'=' * 40}")
    print(f"Results: {passed} passed, {failed} failed")
    if failed:
        print("SOME TESTS FAILED")
        sys.exit(1)
    else:
        print("ALL TESTS PASSED")
    print(f"{'=' * 40}\n")


# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser(
        description="Aggregate E2E test reports from testing/reports/",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    parser.add_argument(
        "--product",
        metavar="NAME",
        help="Filter to runs matching this product name / URL substring",
    )
    parser.add_argument(
        "--competitors-only",
        action="store_true",
        help="Include only competitor intel runs (mode=competitor)",
    )
    parser.add_argument(
        "--failures-only",
        action="store_true",
        help="Include only runs that have at least one failure",
    )
    parser.add_argument(
        "--format",
        choices=["table", "md", "json"],
        default="table",
        help="Output format: table (default), md (markdown), json (raw summary)",
    )
    parser.add_argument(
        "--no-write",
        action="store_true",
        help="Skip writing SUMMARY_*.json to disk",
    )
    parser.add_argument(
        "--reports-dir",
        metavar="PATH",
        default=str(REPORTS_DIR),
        help=f"Path to reports directory (default: {REPORTS_DIR})",
    )
    parser.add_argument(
        "--self-test",
        action="store_true",
        help="Run internal self-test suite and exit",
    )
    args = parser.parse_args()

    if args.self_test:
        _run_self_test()
        return

    reports_dir = Path(args.reports_dir)
    if not reports_dir.exists():
        print(f"[ERROR] Reports directory not found: {reports_dir}", file=sys.stderr)
        sys.exit(1)

    reports_raw = load_reports(
        reports_dir,
        product_filter=args.product,
        competitors_only=args.competitors_only,
        failures_only=args.failures_only,
    )

    if not reports_raw:
        print(
            "[WARN] No reports found matching the given filters.",
            file=sys.stderr,
        )
        print("       Run the test runner first: python testing/runner.py --product sunaiva")
        sys.exit(0)

    normed = [normalise(r) for r in reports_raw]
    by_product = aggregate_by_product(normed)
    comparison = cross_product_comparison(by_product)
    intel = competitive_intel_summary(by_product)
    summary = build_summary(normed, by_product, comparison, intel)

    # Write summary JSON
    if not args.no_write:
        out_path = write_summary(summary, reports_dir)
        print(f"[INFO] Summary written to: {out_path}", file=sys.stderr)

    # Print output
    if args.format == "json":
        print(json.dumps(summary, indent=2))
    elif args.format == "md":
        print(format_markdown(summary))
    else:
        print(format_table(summary))


if __name__ == "__main__":
    # Run self-test when no CLI args given and REPORTS_DIR has no reports to process,
    # otherwise run normally. Allows `python testing/report_aggregator.py` to self-test.
    if len(sys.argv) == 1:
        # Check if there are real reports to process
        has_reports = REPORTS_DIR.exists() and any(
            f for f in REPORTS_DIR.glob("*.json") if not f.name.startswith("SUMMARY_")
        )
        if has_reports:
            main()
        else:
            # No real reports — run self-test as demo
            _run_self_test()
    else:
        main()