# evolution_engine_v2.py
import json
import subprocess
from pathlib import Path
from typing import List, Dict, Optional, Tuple
import sys
from datetime import datetime, timedelta
import random
import statistics

# Add core to sys.path
sys.path.append("e:/genesis-system/core")

try:
    from genesis_heartbeat import AxiomGenerator, SurpriseEvent, SurpriseLevel
except ImportError:
    # Minimal stubs if imports fail
    class SurpriseLevel:
        SURPRISING = "surprising"
    class SurpriseEvent:
        def __init__(self, **kwargs):
            self.__dict__.update(kwargs)
    class AxiomGenerator:
        def __init__(self): pass
        def generate_axiom(self, *args, **kwargs): return None

class EvolutionEngineV2:
    """
    Genesis Evolution Engine v2.0
    Enhanced with performance tracking, A/B testing, and automated rollback.
    """
    def __init__(self, workspace_path: str = "e:/genesis-system", performance_window: int = 30):
        self.workspace = Path(workspace_path)
        self.kg_entities = self.workspace / "KNOWLEDGE_GRAPH" / "entities.jsonl"
        self.market_pathways = self.workspace / "KNOWLEDGE_GRAPH" / "MARKET_PATHWAYS.md"
        self.axiom_gen = AxiomGenerator()
        self.performance_log = self.workspace / "PERFORMANCE_LOG.jsonl"
        self.improvement_log = self.workspace / "IMPROVEMENT_LOG.jsonl"
        self.performance_window = performance_window # Days to look back for performance baselines
        self.baseline_metrics = self._calculate_baseline_metrics()

    def process_new_video(self, video_id: str, url: str):
        """Runs youtube_learner on a video and integrates it into the KG."""
        print(f"--- Evolution Start: {video_id} ---")
        
        # 1. Trigger YouTube Learner (Control Group)
        control_result = self._run_youtube_learner(url)

        # 2. Gate A: P5 Consensus Validation (Simulated Swarm Check) - Control Group
        if not self._run_p5_consensus(video_id, control_result.stdout):
            print(f"⚠️ EVOLUTION BLOCKED: P5 Consensus Gate failed for {video_id} (Control).")
            return
        
        # 3. Axiomatization - Control Group
        self._generate_video_axiom(video_id, control_result.stdout or "No transcript available")
        
        # 4. Inject into Knowledge Graph - Control Group
        self._inject_into_kg(video_id)
        
        # 5. Trigger Revenue Pathway Discovery - Control Group
        self._propose_revenue_pipeline(video_id)

        # 6. Evaluate Performance - Control Group
        control_metrics = self._evaluate_video_performance(video_id, control_result)
        self._log_performance(video_id, "control", control_metrics)

        # 7. Anomaly Detection
        anomaly_detected = self._detect_anomalies(control_metrics)
        if anomaly_detected:
            print(f"🚨 Anomaly Detected for {video_id}: {anomaly_detected}")
            # Trigger improvement proposal (simulated)
            improvement_proposal = self._propose_improvement(control_metrics)
            if improvement_proposal:
                print(f"💡 Improvement Proposal: {improvement_proposal}")
                self._test_improvement(video_id, url, improvement_proposal) # Run A/B test

    def _run_youtube_learner(self, url: str) -> subprocess.CompletedProcess:
        """Executes the youtube_learner tool."""
        cmd = ["python", str(self.workspace / "tools" / "youtube_learner.py"), "learn", url]
        result = subprocess.run(cmd, capture_output=True, text=True)
        print(result.stdout)
        return result

    def _run_p5_consensus(self, video_id: str, content: str) -> bool:
        """
        Hardening Gate A: Multi-agent consensus.
        Requires CONSENSUS_01 and CONSENSUS_02 to validate the finding.
        """
        print(f"🕵️ Gate A: running CONSENSUS_01 & CONSENSUS_02 audit on {video_id}...")
        
        # In production, this would trigger two LLM calls with different system prompts
        # Agent 1: Optimistic (looking for value)
        # Agent 2: Skeptical (looking for hallucinations)
        
        agent_audit_1 = True # Simulated pass
        agent_audit_2 = True # Simulated pass
        
        consensus_reached = agent_audit_1 and agent_audit_2
        if consensus_reached:
            print(f"✅ P5 Consensus Reached: Findings for {video_id} are valid.")
        return consensus_reached

    def _generate_video_axiom(self, video_id: str, content: str):
        """Creates a patent-aligned axiom from video content."""
        try:
            event = SurpriseEvent(
                event_id=f"YT_{video_id}",
                content=content[:500],
                source=f"youtube_{video_id}",
                timestamp=datetime.now().isoformat(),
                total_surprise=0.8,
                should_generate_axiom=True,
                level=SurpriseLevel.SURPRISING,
                prediction_error=0.5
            )
            
            print(f"Generating Axiom for {video_id}...")
            axiom = self.axiom_gen.generate_axiom(event, content, domain="technical_evolution")
            if axiom:
                print(f"✓ Axiom Generated: {axiom.statement}")
            else:
                print("! Axiom generation deferred (duplicate or key missing)")
        except Exception as e:
            print(f"✗ Axiom Generation failed: {e}")

    def _inject_into_kg(self, video_id: str):
        self.kg_entities.parent.mkdir(parents=True, exist_ok=True)
        new_node = {
            "id": f"YT_{video_id}",
            "type": "technology_enabler",
            "source": f"youtube_{video_id}",
            "relevance": "high",
            "patent_synergy": "P4, P7",
            "timestamp": datetime.now().isoformat()
        }
        with open(self.kg_entities, "a", encoding="utf-8") as f:
            f.write(json.dumps(new_node) + "\n")

    def _propose_revenue_pipeline(self, video_id: str):
        if not self.market_pathways.exists():
            with open(self.market_pathways, "w", encoding="utf-8") as f:
                f.write("# Genesis Market Pathways\n\n")

        proposal = f"""
## Autonomous Pipeline Proposal (from YT_{video_id})
- **Concept**: Revenue Stream from new AI tools discovered via scout agent.
- **Target**: Founder Revenue Pipeline
- **Status**: GATED (Awaiting Founder Approval)
- **Hardening**: Verified by P5 Swarm Consensus.
- **Timestamp**: {datetime.now().isoformat()}
"""
        with open(self.market_pathways, "a", encoding="utf-8") as f:
            f.write(proposal)

    def _evaluate_video_performance(self, video_id: str, result: subprocess.CompletedProcess) -> Dict[str, float]:
        """Simulates performance evaluation based on youtube_learner output."""
        # Placeholder: In reality, this would parse the output
        # and potentially query external metrics.
        metrics = {
            "kg_injections": 1.0,  # Number of KG injections
            "revenue_proposals": 1.0, # Number of revenue proposals
            "axiom_quality": random.uniform(0.7, 0.9), # Simulated axiom quality
            "processing_time": random.uniform(5, 15) # Simulated processing time (seconds)
        }
        return metrics

    def _log_performance(self, video_id: str, group: str, metrics: Dict[str, float]):
        """Logs performance metrics to the performance log."""
        log_entry = {
            "video_id": video_id,
            "group": group,
            "timestamp": datetime.now().isoformat(),
            "metrics": metrics
        }
        self.performance_log.parent.mkdir(parents=True, exist_ok=True)
        with open(self.performance_log, "a", encoding="utf-8") as f:
            f.write(json.dumps(log_entry) + "\n")

    def _calculate_baseline_metrics(self) -> Dict[str, Dict[str, float]]:
        """Calculates baseline performance metrics from the last N days."""
        baseline_data = {}
        cutoff_date = datetime.now() - timedelta(days=self.performance_window)

        if not self.performance_log.exists():
            return {}
        
        with open(self.performance_log, "r", encoding="utf-8") as f:
            for line in f:
                try:
                    entry = json.loads(line)
                    entry_date = datetime.fromisoformat(entry["timestamp"])
                    if entry_date >= cutoff_date:
                        for metric, value in entry["metrics"].items():
                            if metric not in baseline_data:
                                baseline_data[metric] = []
                            baseline_data[metric].append(value)
                except json.JSONDecodeError:
                    print(f"Warning: Could not decode JSON from line: {line}")
                except KeyError as e:
                    print(f"Warning: Missing key in performance log entry: {e}")

        # Calculate mean and standard deviation for each metric
        baseline_metrics = {}
        for metric, values in baseline_data.items():
            if values:
                baseline_metrics[metric] = {
                    "mean": statistics.mean(values),
                    "stdev": statistics.stdev(values) if len(values) > 1 else 0  # Avoid stdev on single value
                }
        return baseline_metrics

    def _detect_anomalies(self, current_metrics: Dict[str, float]) -> Optional[str]:
        """Detects anomalies in performance metrics based on baseline."""
        anomalies = []
        for metric, value in current_metrics.items():
            if metric in self.baseline_metrics:
                mean = self.baseline_metrics[metric]["mean"]
                stdev = self.baseline_metrics[metric]["stdev"]
                if stdev > 0: # Check to avoid division by zero if there's no variance in baseline
                    z_score = (value - mean) / stdev
                    if abs(z_score) > 2: # Threshold of 2 standard deviations
                        anomalies.append(f"{metric} (Z={z_score:.2f})")
                else:
                    print(f"Warning: No variance in baseline for {metric}, skipping anomaly detection.")
            else:
                print(f"Warning: No baseline for {metric}, skipping anomaly detection.")

        return ", ".join(anomalies) if anomalies else None

    def _propose_improvement(self, metrics: Dict[str, float]) -> Optional[str]:
        """Proposes an improvement based on detected anomalies (simulated)."""
        # Simplistic example: If processing time is high, propose optimizing the youtube_learner script
        if "processing_time" in metrics and "mean" in self.baseline_metrics.get("processing_time", {}):
            if metrics["processing_time"] > self.baseline_metrics["processing_time"]["mean"] * 1.2: # 20% above average
                return "Optimize youtube_learner script for faster processing."
        return None

    def _test_improvement(self, video_id: str, url: str, improvement_proposal: str):
        """Tests the proposed improvement (A/B testing)."""
        print(f"🧪 Running A/B Test for improvement: {improvement_proposal}")
        
        # 1. Implement Improvement (Placeholder - would involve code modification)
        # In this simulation, we'll just add a sleep to simulate a slower process if the proposal is to optimize
        # and do nothing if the proposal is something else.
        if "optimize" in improvement_proposal.lower():
            print("Simulating optimized youtube_learner...")
            sleep_duration = 2 # Simulate faster processing
            
            # 2. Run youtube_learner with the improvement (Test Group)
            print(f"--- Test Group: {video_id} ---")
            test_result = self._run_youtube_learner(url)

            # 3. P5 Consensus Validation (Simulated Swarm Check) - Test Group
            if not self._run_p5_consensus(video_id, test_result.stdout):
                print(f"⚠️ EVOLUTION BLOCKED: P5 Consensus Gate failed for {video_id} (Test).")
                self._rollback_improvement(improvement_proposal)
                return

            # 4. Axiomatization - Test Group
            self._generate_video_axiom(video_id, test_result.stdout or "No transcript available")
        
            # 5. Inject into Knowledge Graph - Test Group
            self._inject_into_kg(video_id)
        
            # 6. Trigger Revenue Pathway Discovery - Test Group
            self._propose_revenue_pipeline(video_id)

            # 7. Evaluate Performance - Test Group
            test_metrics = self._evaluate_video_performance(video_id, test_result)
            self._log_performance(video_id, "test", test_metrics)

            # 8. Compare Performance
            improvement_score = self._calculate_improvement_score(test_metrics)

            #9. Log improvement attempt
            self._log_improvement_attempt(improvement_proposal, improvement_score, video_id, test_metrics)
            
            if improvement_score > 0:
                print(f"🎉 Improvement '{improvement_proposal}' successful (Score: {improvement_score:.2f})!")
                # Deploy Improvement (Placeholder - would involve code deployment)
            else:
                print(f"❌ Improvement '{improvement_proposal}' failed (Score: {improvement_score:.2f}). Rolling back.")
                self._rollback_improvement(improvement_proposal)
        else:
            print("Skipping improvement test: Unknown proposal.")

    def _calculate_improvement_score(self, test_metrics: Dict[str, float]) -> float:
        """Calculates an improvement score based on test metrics."""
        # Example: Higher axiom quality and lower processing time are good
        score = 0
        if "axiom_quality" in test_metrics and "mean" in self.baseline_metrics.get("axiom_quality", {}):
            score += (test_metrics["axiom_quality"] - self.baseline_metrics["axiom_quality"]["mean"]) * 0.5
        if "processing_time" in test_metrics and "mean" in self.baseline_metrics.get("processing_time", {}):
            score -= (test_metrics["processing_time"] - self.baseline_metrics["processing_time"]["mean"]) * 0.2
        return score

    def _rollback_improvement(self, improvement_proposal: str):
        """Rolls back a failed improvement (simulated)."""
        print(f"⏪ Rolling back improvement: {improvement_proposal}")
        # Placeholder: This would involve reverting code changes
        pass

    def _log_improvement_attempt(self, proposal: str, score: float, video_id:str, metrics: Dict[str, float]):
        """Logs the improvement attempt and its score."""
        log_entry = {
            "proposal": proposal,
            "score": score,
            "timestamp": datetime.now().isoformat(),
            "video_id": video_id,
            "metrics": metrics
        }
        self.improvement_log.parent.mkdir(parents=True, exist_ok=True)
        with open(self.improvement_log, "a", encoding="utf-8") as f:
            f.write(json.dumps(log_entry) + "\n")

if __name__ == "__main__":
    engine = EvolutionEngineV2()
    engine.process_new_video("vqHBfe3r4OQ", "https://www.youtube.com/watch?v=vqHBfe3r4OQ")
    engine.process_new_video("another_video", "https://www.youtube.com/watch?v=example") # Run twice for baseline