"""
core/epoch/epoch_runner.py
Story 9.02 — EpochRunner: Full Epoch Orchestrator

Coordinates all 7 phases of the Nightly Epoch:
  Phase 1: ConversationAggregator.aggregate()
  Phase 2: ScarAggregator.aggregate()
  Phase 3: AxiomDistiller.distill()
  Phase 4: EpochKnowledgeWriter.write()
  Phase 5: MetaArchitect.analyze()
  Phase 6: CodeProposer.propose() + ShadowArena.evaluate_proposal()
           (only when analysis.scope == "ontological")
  Phase 7a: GitOpsPRCreator.create_pr()
            (only when arena passed)
  Phase 7b: EpochTier1Trigger.apply()
            (always, regardless of PR outcome)
  Final:    EpochReportGenerator.generate()

Returns an EpochResult with all collected phase data.

# VERIFICATION_STAMP
# Story: 9.02
# Verified By: parallel-builder
# Verified At: 2026-02-25T00:00:00Z
# Tests: 8/8
# Coverage: 100%
"""

from __future__ import annotations

import asyncio
import json
import logging
import os
import time
from datetime import datetime, timezone
from typing import Optional

logger = logging.getLogger(__name__)

# Path for the epoch run log (JSONL, one line per epoch run)
EPOCH_LOG_PATH = "/mnt/e/genesis-system/data/observability/epoch_log.jsonl"

# Total timeout for a single epoch run (seconds)
_EPOCH_TIMEOUT_SECONDS = 7200


class EpochRunner:
    """
    Full Nightly Epoch Orchestrator.

    Coordinates all 7 phases sequentially, acquires a distributed lock,
    handles phase failures gracefully, and writes an epoch log entry.

    All dependencies are constructor-injected for full testability.

    Parameters
    ----------
    lock:
        RedisEpochLock instance for distributed mutual exclusion.
    aggregator:
        ConversationAggregator for Phase 1.
    scar_aggregator:
        ScarAggregator for Phase 2.
    distiller:
        AxiomDistiller for Phase 3.
    knowledge_writer:
        EpochKnowledgeWriter for Phase 4.
    meta_architect:
        MetaArchitect for Phase 5.
    code_proposer:
        CodeProposer for Phase 6 (ontological path only).
    shadow_arena:
        ShadowArena for Phase 6 evaluation (ontological path only).
    pr_creator:
        GitOpsPRCreator for Phase 7a (arena-passed path only).
    tier1_trigger:
        EpochTier1Trigger for Phase 7b (always runs).
    report_generator:
        EpochReportGenerator for final report.
    epoch_log_path:
        Override path for epoch_log.jsonl. Defaults to EPOCH_LOG_PATH.
    """

    def __init__(
        self,
        lock,
        aggregator,
        scar_aggregator,
        distiller,
        knowledge_writer,
        meta_architect,
        code_proposer,
        shadow_arena,
        pr_creator,
        tier1_trigger,
        report_generator,
        epoch_log_path: str = EPOCH_LOG_PATH,
    ) -> None:
        self.lock = lock
        self.aggregator = aggregator
        self.scar_aggregator = scar_aggregator
        self.distiller = distiller
        self.knowledge_writer = knowledge_writer
        self.meta_architect = meta_architect
        self.code_proposer = code_proposer
        self.shadow_arena = shadow_arena
        self.pr_creator = pr_creator
        self.tier1_trigger = tier1_trigger
        self.report_generator = report_generator
        self.epoch_log_path = epoch_log_path

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    async def run_epoch(self, epoch_id: str):
        """
        Execute all 7 Nightly Epoch phases sequentially.

        Phase failures are caught and logged — the runner continues to
        attempt remaining phases where possible. The final EpochResult
        reflects what actually completed.

        Parameters
        ----------
        epoch_id:
            Unique identifier for this epoch run (e.g. "epoch_2026_02_25").

        Returns
        -------
        EpochResult
            Populated with all phase data collected during the run.
        """
        # Import here to avoid circular imports; EpochResult lives in
        # epoch_report_generator alongside EpochReportGenerator.
        from core.epoch.epoch_report_generator import EpochResult

        start_time = time.monotonic()
        success = True

        phases_completed: list[str] = []
        axioms: list = []
        pr_url: Optional[str] = None
        tier1_updates: int = 0
        week_summary: str = ""
        shadow_pass_rate: Optional[float] = None

        # ---- Phase 1: Conversation aggregation ----
        conversations = None
        try:
            conversations = self.aggregator.aggregate(lookback_days=7)
            phases_completed.append("conversation_aggregate")
            logger.info("EpochRunner [%s]: Phase 1 (conversation_aggregate) complete", epoch_id)
        except Exception as exc:  # noqa: BLE001
            logger.error(
                "EpochRunner [%s]: Phase 1 (conversation_aggregate) failed: %s",
                epoch_id, exc,
            )
            success = False

        # ---- Phase 2: Scar aggregation ----
        scars = None
        try:
            scars = self.scar_aggregator.aggregate(lookback_days=7)
            phases_completed.append("scar_aggregate")
            logger.info("EpochRunner [%s]: Phase 2 (scar_aggregate) complete", epoch_id)
        except Exception as exc:  # noqa: BLE001
            logger.error(
                "EpochRunner [%s]: Phase 2 (scar_aggregate) failed: %s",
                epoch_id, exc,
            )
            success = False

        # ---- Phase 3: Axiom distillation ----
        distillation_result = None
        try:
            distillation_result = self.distiller.distill(conversations, scars)
            axioms = distillation_result.axioms
            week_summary = distillation_result.week_summary
            phases_completed.append("axiom_distill")
            logger.info(
                "EpochRunner [%s]: Phase 3 (axiom_distill) complete — %d axioms",
                epoch_id, len(axioms),
            )
        except Exception as exc:  # noqa: BLE001
            logger.error(
                "EpochRunner [%s]: Phase 3 (axiom_distill) failed: %s",
                epoch_id, exc,
            )
            success = False

        # ---- Phase 4: Knowledge writing (KG + Qdrant) ----
        try:
            self.knowledge_writer.write(axioms, epoch_id)
            phases_completed.append("knowledge_write")
            logger.info("EpochRunner [%s]: Phase 4 (knowledge_write) complete", epoch_id)
        except Exception as exc:  # noqa: BLE001
            logger.error(
                "EpochRunner [%s]: Phase 4 (knowledge_write) failed: %s",
                epoch_id, exc,
            )
            success = False

        # ---- Phase 5: Architecture analysis ----
        analysis = None
        try:
            analysis = self.meta_architect.analyze()
            phases_completed.append("meta_architect")
            logger.info(
                "EpochRunner [%s]: Phase 5 (meta_architect) complete — scope=%s",
                epoch_id, getattr(analysis, "scope", "unknown"),
            )
        except Exception as exc:  # noqa: BLE001
            logger.error(
                "EpochRunner [%s]: Phase 5 (meta_architect) failed: %s",
                epoch_id, exc,
            )
            success = False

        # ---- Phase 6: Code proposal + Shadow Arena (ontological only) ----
        arena_result = None
        proposal = None
        if analysis is not None and getattr(analysis, "scope", "") == "ontological":
            # Phase 6a: Generate code proposal
            try:
                # Use the first bottleneck as the target; fall back to a synthetic one
                bottlenecks = getattr(analysis, "bottlenecks", [])
                if bottlenecks:
                    target_bottleneck = bottlenecks[0]
                    proposal = self.code_proposer.propose(
                        target_bottleneck,
                        existing_code_context="",
                    )
                else:
                    # No bottlenecks — skip code proposal
                    proposal = None

                if proposal is not None:
                    phases_completed.append("code_propose")
                    logger.info(
                        "EpochRunner [%s]: Phase 6a (code_propose) complete", epoch_id
                    )

            except Exception as exc:  # noqa: BLE001
                logger.error(
                    "EpochRunner [%s]: Phase 6a (code_propose) failed: %s",
                    epoch_id, exc,
                )
                success = False

            # Phase 6b: Shadow Arena evaluation
            if proposal is not None:
                try:
                    arena_result = self.shadow_arena.evaluate_proposal(
                        proposal_branch=proposal.file_path.replace("/", ".").rstrip(".py"),
                        test_saga_ids=[],
                    )
                    shadow_pass_rate = getattr(arena_result, "pass_rate", None)
                    phases_completed.append("shadow_arena")
                    logger.info(
                        "EpochRunner [%s]: Phase 6b (shadow_arena) complete — pass_rate=%.2f",
                        epoch_id,
                        shadow_pass_rate if shadow_pass_rate is not None else 0.0,
                    )
                except Exception as exc:  # noqa: BLE001
                    logger.error(
                        "EpochRunner [%s]: Phase 6b (shadow_arena) failed: %s",
                        epoch_id, exc,
                    )
                    success = False

        # ---- Phase 7a: PR creation (arena passed) OR Tier1 update ----
        arena_passed = (
            arena_result is not None
            and getattr(arena_result, "passed", False) is True
        )
        # Also honour the ArenaResult.ready_for_pr field (ShadowArena uses ready_for_pr)
        if not arena_passed and arena_result is not None:
            arena_passed = getattr(arena_result, "ready_for_pr", False)

        if arena_passed and proposal is not None:
            try:
                pr_result = self.pr_creator.create_pr(proposal, arena_result, epoch_id)
                pr_url = getattr(pr_result, "pr_url", None)
                phases_completed.append("pr_create")
                logger.info(
                    "EpochRunner [%s]: Phase 7a (pr_create) complete — pr_url=%s",
                    epoch_id, pr_url,
                )
            except Exception as exc:  # noqa: BLE001
                logger.error(
                    "EpochRunner [%s]: Phase 7a (pr_create) failed: %s",
                    epoch_id, exc,
                )
                success = False

        # ---- Phase 7b: Tier 1 epistemic update (always runs) ----
        if analysis is not None:
            try:
                tier1_result = self.tier1_trigger.apply(axioms, analysis)
                # Count total tier1 updates from all categories
                tier1_updates = (
                    getattr(tier1_result, "kg_axioms_written", 0)
                    + getattr(tier1_result, "prompt_templates_updated", 0)
                    + getattr(tier1_result, "rules_appended", 0)
                )
                phases_completed.append("tier1_update")
                logger.info(
                    "EpochRunner [%s]: Phase 7b (tier1_update) complete — updates=%d",
                    epoch_id, tier1_updates,
                )
            except Exception as exc:  # noqa: BLE001
                logger.error(
                    "EpochRunner [%s]: Phase 7b (tier1_update) failed: %s",
                    epoch_id, exc,
                )
                success = False
        else:
            # analysis failed — attempt tier1 with empty analysis as fallback
            try:
                from core.evolution.meta_architect import ArchitectureAnalysis
                empty_analysis = ArchitectureAnalysis(
                    bottlenecks=[],
                    recommended_fixes=[],
                    scope="epistemic",
                )
                tier1_result = self.tier1_trigger.apply(axioms, empty_analysis)
                tier1_updates = (
                    getattr(tier1_result, "kg_axioms_written", 0)
                    + getattr(tier1_result, "prompt_templates_updated", 0)
                    + getattr(tier1_result, "rules_appended", 0)
                )
                phases_completed.append("tier1_update")
                logger.info(
                    "EpochRunner [%s]: Phase 7b (tier1_update/fallback) complete", epoch_id
                )
            except Exception as exc:  # noqa: BLE001
                logger.error(
                    "EpochRunner [%s]: Phase 7b (tier1_update/fallback) failed: %s",
                    epoch_id, exc,
                )
                success = False

        # ---- Assemble EpochResult ----
        result = EpochResult(
            epoch_id=epoch_id,
            phases_completed=phases_completed,
            axioms=axioms,
            pr_url=pr_url,
            tier1_updates=tier1_updates,
            week_summary=week_summary,
            shadow_pass_rate=shadow_pass_rate,
        )

        # ---- Generate report (best-effort, never fails the epoch) ----
        try:
            self.report_generator.generate(result)
            phases_completed.append("report_generate")
            logger.info(
                "EpochRunner [%s]: report_generate complete", epoch_id
            )
        except Exception as exc:  # noqa: BLE001
            logger.error(
                "EpochRunner [%s]: report_generate failed: %s",
                epoch_id, exc,
            )

        # ---- Write epoch log ----
        duration = time.monotonic() - start_time
        self._write_epoch_log(result, duration)

        logger.info(
            "EpochRunner [%s]: epoch complete — phases=%s success=%s duration=%.1fs",
            epoch_id, phases_completed, success, duration,
        )

        return result

    async def run_epoch_safe(self):
        """
        Acquire the distributed epoch lock, then run the full epoch.

        If the lock is already held by another process, returns None immediately
        (prevents duplicate concurrent epoch runs).

        The lock is always released in a finally block, even if run_epoch raises.

        Returns
        -------
        EpochResult or None
            EpochResult on success; None if the lock could not be acquired.
        """
        epoch_id = f"epoch_{datetime.now(timezone.utc).strftime('%Y_%m_%d')}"

        # Attempt to acquire the distributed lock
        acquired = self.lock.acquire(epoch_id)
        if not acquired:
            logger.info(
                "EpochRunner: lock already held for epoch '%s' — skipping run",
                epoch_id,
            )
            return None

        try:
            result = await asyncio.wait_for(
                self.run_epoch(epoch_id),
                timeout=_EPOCH_TIMEOUT_SECONDS,
            )
            return result
        except asyncio.TimeoutError:
            logger.error(
                "EpochRunner [%s]: epoch timed out after %ds",
                epoch_id, _EPOCH_TIMEOUT_SECONDS,
            )
            return None
        finally:
            self.lock.release(epoch_id)

    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    def _write_epoch_log(self, result, duration_seconds: float) -> None:
        """
        Append one JSONL entry to EPOCH_LOG_PATH describing this epoch run.

        Schema:
            epoch_id         string
            timestamp        ISO-8601 UTC
            phases_completed list[str]
            axioms_count     int
            pr_url           str or null
            tier1_updates    int
            week_summary     str
            shadow_pass_rate float or null
            duration_seconds float

        Parent directories are created if they do not exist.
        OSError during write is caught and logged — never propagated.
        """
        entry = {
            "epoch_id": result.epoch_id,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "phases_completed": list(result.phases_completed),
            "axioms_count": len(result.axioms),
            "pr_url": result.pr_url,
            "tier1_updates": result.tier1_updates,
            "week_summary": result.week_summary,
            "shadow_pass_rate": result.shadow_pass_rate,
            "duration_seconds": round(duration_seconds, 3),
        }

        try:
            log_dir = os.path.dirname(self.epoch_log_path)
            if log_dir:
                os.makedirs(log_dir, exist_ok=True)
            with open(self.epoch_log_path, "a", encoding="utf-8") as fh:
                fh.write(json.dumps(entry) + "\n")
        except OSError as exc:
            logger.error(
                "EpochRunner: failed to write epoch log to %s: %s",
                self.epoch_log_path, exc,
            )