# evolution_engine_v2.py
import json
import subprocess
from pathlib import Path
from typing import List, Dict, Optional, Tuple
import sys
from datetime import datetime
import time
import random

# Add core to sys.path
sys.path.append("e:/genesis-system/core")

try:
    from genesis_heartbeat import AxiomGenerator, SurpriseEvent, SurpriseLevel
except ImportError:
    # Minimal stubs if imports fail
    class SurpriseLevel:
        SURPRISING = "surprising"
    class SurpriseEvent:
        def __init__(self, **kwargs):
            self.__dict__.update(kwargs)
    class AxiomGenerator:
        def __init__(self): pass
        def generate_axiom(self, *args, **kwargs): return None

# Assuming blackboard singleton is available via swarm interface
# In a real scenario, this would import the active singleton
try:
    from swarms.blackboard import get_blackboard, EntryType
except ImportError:
    # Fallback/Mock for testing
    def get_blackboard(): 
        print("Warning: Blackboard not found.  Running in Mock mode.")
        return None


class EvolutionEngineV2:
    """
    Genesis Evolution Engine v2.0
    Tracks performance, identifies improvements, tests, and rolls back if needed.
    """
    def __init__(self, workspace_path: str = "e:/genesis-system", metrics_file: str = "evolution_metrics.json"):
        self.workspace = Path(workspace_path)
        self.kg_entities = self.workspace / "KNOWLEDGE_GRAPH" / "entities.jsonl"
        self.market_pathways = self.workspace / "KNOWLEDGE_GRAPH" / "MARKET_PATHWAYS.md"
        self.axiom_gen = AxiomGenerator()
        self.metrics_file = Path(metrics_file)
        self.metrics = self._load_metrics()
        self.anomaly_threshold = 2.0  # Standard deviations
        self.improvement_log = []  # Log of improvement attempts

    def _load_metrics(self) -> Dict:
        """Loads performance metrics from file."""
        if self.metrics_file.exists():
            with open(self.metrics_file, "r") as f:
                return json.load(f)
        else:
            return {"video_processing_time": [], "consensus_success_rate": [], "axiom_generation_success_rate": []}

    def _save_metrics(self):
        """Saves performance metrics to file."""
        with open(self.metrics_file, "w") as f:
            json.dump(self.metrics, f)

    def process_new_video(self, video_id: str, url: str):
        """Processes a new video, tracking metrics and attempting improvements."""
        start_time = time.time()
        print(f"--- Evolution Start: {video_id} ---")

        # 1. Trigger YouTube Learner
        cmd = ["python", str(self.workspace / "tools" / "youtube_learner.py"), "learn", url]
        result = subprocess.run(cmd, capture_output=True, text=True)
        youtube_learner_output = result.stdout
        print(youtube_learner_output)
        processing_time = time.time() - start_time

        # 2. Gate A: P5 Consensus Validation (Simulated Swarm Check)
        consensus_start_time = time.time()
        consensus_result = self._run_p5_consensus(video_id, youtube_learner_output)
        consensus_time = time.time() - consensus_start_time
        if not consensus_result:
            print(f"⚠️ EVOLUTION BLOCKED: P5 Consensus Gate failed for {video_id}.")
            self._update_metric("consensus_success_rate", 0)
            return

        self._update_metric("consensus_success_rate", 1)

        # 3. Axiomatization
        axiom_start_time = time.time()
        axiom_result = self._generate_video_axiom(video_id, youtube_learner_output or "No transcript available")
        axiom_time = time.time() - axiom_start_time

        if axiom_result:
            self._update_metric("axiom_generation_success_rate", 1)
        else:
            self._update_metric("axiom_generation_success_rate", 0)

        # 4. Inject into Knowledge Graph
        self._inject_into_kg(video_id)

        # 5. Trigger Revenue Pathway Discovery
        self._propose_revenue_pipeline(video_id)

        # Update video processing time metric
        self._update_metric("video_processing_time", processing_time)
        self._save_metrics()

        # 6. Check for improvement opportunities
        self._check_for_improvement_opportunities()


    def _run_p5_consensus(self, video_id: str, content: str) -> bool:
        """
        Hardening Gate A: Multi-agent consensus.
        Requires CONSENSUS_01 and CONSENSUS_02 to validate the finding.
        """
        print(f"🕵️ Gate A: running CONSENSUS_01 & CONSENSUS_02 audit on {video_id}...")

        # In production, this would trigger two LLM calls with different system prompts
        # Agent 1: Optimistic (looking for value)
        # Agent 2: Skeptical (looking for hallucinations)

        agent_audit_1 = random.random() > 0.1  # Simulate some failures
        agent_audit_2 = random.random() > 0.1  # Simulate some failures

        consensus_reached = agent_audit_1 and agent_audit_2
        if consensus_reached:
            print(f"✅ P5 Consensus Reached: Findings for {video_id} are valid.")
        else:
            print(f"❌ P5 Consensus Failed for {video_id}.")
        return consensus_reached

    def _generate_video_axiom(self, video_id: str, content: str) -> bool:
        """Creates a patent-aligned axiom from video content."""
        try:
            event = SurpriseEvent(
                event_id=f"YT_{video_id}",
                content=content[:500],
                source=f"youtube_{video_id}",
                timestamp=datetime.now().isoformat(),
                total_surprise=0.8,
                should_generate_axiom=True,
                level=SurpriseLevel.SURPRISING,
                prediction_error=0.5
            )

            print(f"Generating Axiom for {video_id}...")
            axiom = self.axiom_gen.generate_axiom(event, content, domain="technical_evolution")
            if axiom:
                print(f"✓ Axiom Generated: {axiom.statement}")
                return True
            else:
                print("! Axiom generation deferred (duplicate or key missing)")
                return False
        except Exception as e:
            print(f"✗ Axiom Generation failed: {e}")
            return False

    def _inject_into_kg(self, video_id: str):
        self.kg_entities.parent.mkdir(parents=True, exist_ok=True)
        new_node = {
            "id": f"YT_{video_id}",
            "type": "technology_enabler",
            "source": f"youtube_{video_id}",
            "relevance": "high",
            "patent_synergy": "P4, P7",
            "timestamp": datetime.now().isoformat()
        }
        with open(self.kg_entities, "a", encoding="utf-8") as f:
            f.write(json.dumps(new_node) + "\n")

    def _propose_revenue_pipeline(self, video_id: str):
        if not self.market_pathways.exists():
            with open(self.market_pathways, "w", encoding="utf-8") as f:
                f.write("# Genesis Market Pathways\n\n")

        proposal = f"""
## Autonomous Pipeline Proposal (from YT_{video_id})
- **Concept**: Revenue Stream from new AI tools discovered via scout agent.
- **Target**: Founder Revenue Pipeline
- **Status**: GATED (Awaiting Founder Approval)
- **Hardening**: Verified by P5 Swarm Consensus.
- **Timestamp**: {datetime.now().isoformat()}
"""
        with open(self.market_pathways, "a", encoding="utf-8") as f:
            f.write(proposal)

    def _update_metric(self, metric_name: str, value: float):
        """Updates a performance metric."""
        self.metrics[metric_name].append(value)

    def _is_anomaly(self, metric_name: str) -> bool:
        """Checks if the latest metric value is an anomaly."""
        values = self.metrics[metric_name]
        if len(values) < 3:  # Need at least 3 data points to calculate std dev
            return False

        mean = sum(values) / len(values)
        std_dev = (sum([(x - mean) ** 2 for x in values]) / len(values)) ** 0.5
        latest_value = values[-1]

        if std_dev == 0:  # Avoid division by zero if all values are the same
            return False

        z_score = abs(latest_value - mean) / std_dev
        return z_score > self.anomaly_threshold

    def _check_for_improvement_opportunities(self):
        """Identifies areas for improvement based on performance metrics."""
        if self._is_anomaly("video_processing_time"):
            print("⚠️ Anomaly detected in video processing time. Generating improvement proposal...")
            self._generate_improvement_proposal("Optimize video processing pipeline")

        if self._is_anomaly("consensus_success_rate"):
            print("⚠️ Anomaly detected in consensus success rate. Generating improvement proposal...")
            self._generate_improvement_proposal("Improve consensus mechanism")

        if self._is_anomaly("axiom_generation_success_rate"):
            print("⚠️ Anomaly detected in axiom generation success rate. Generating improvement proposal...")
            self._generate_improvement_proposal("Improve axiom generation logic")

    def _generate_improvement_proposal(self, area: str):
        """Generates a proposal for improving a specific area."""
        proposal_id = len(self.improvement_log) + 1
        proposal = {
            "id": proposal_id,
            "area": area,
            "description": f"Investigate and implement improvements to {area}.",
            "status": "proposed",
            "score": self._calculate_improvement_score(area)  # Assign a score based on estimated impact
        }
        self.improvement_log.append(proposal)
        print(f"💡 Improvement Proposal Generated: {proposal['description']} (Score: {proposal['score']})")
        self._test_improvement(proposal)

    def _calculate_improvement_score(self, area: str) -> float:
        """Calculates a score for an improvement proposal based on potential impact."""
        # This is a placeholder; in reality, this would use a more sophisticated model
        if "video processing time" in area.lower():
            return 0.7
        elif "consensus" in area.lower():
            return 0.8
        elif "axiom" in area.lower():
            return 0.6
        else:
            return 0.5

    def _test_improvement(self, proposal: Dict):
        """Tests an improvement using A/B testing."""
        print(f"🧪 Starting A/B test for proposal {proposal['id']}: {proposal['description']}")
        # Simulate A/B testing
        success = random.random() > 0.3  # Simulate a 70% chance of success
        if success:
            print(f"✅ A/B test passed for proposal {proposal['id']}. Deploying improvement...")
            self._deploy_improvement(proposal)
        else:
            print(f"❌ A/B test failed for proposal {proposal['id']}. Discarding improvement...")
            proposal["status"] = "failed"
            self._learn_from_failure(proposal)  # Record failure details

    def _deploy_improvement(self, proposal: Dict):
        """Deploys an improvement."""
        print(f"🚀 Deploying improvement: {proposal['description']}")
        # Simulate deployment (in reality, this would involve code changes and redeployment)
        proposal["status"] = "deployed"
        # Simulate a post-deployment check that might fail
        post_deployment_success = random.random() > 0.1
        if not post_deployment_success:
             print("🚨 Post-deployment check failed. Rolling back...")
             self._rollback_improvement(proposal)
        else:
            self._learn_from_success(proposal)

    def _rollback_improvement(self, proposal: Dict):
        """Rolls back a failed improvement."""
        print(f"⏪ Rolling back improvement: {proposal['description']}")
        proposal["status"] = "rolled_back"
        # Simulate rollback (in reality, this would involve reverting code changes)
        self._learn_from_failure(proposal)


    def _learn_from_success(self, proposal: Dict):
        """Learns from a successful improvement."""
        print(f"🧠 Learning from successful improvement: {proposal['description']}")
        # Store success information in the improvement log for future reference
        proposal["learning"] = "Improvement successfully deployed and validated."
        # Potentially adjust future improvement scores based on this success
        self._adjust_future_scores(proposal["area"], 0.1)  # Increase score by 10%


    def _learn_from_failure(self, proposal: Dict):
        """Learns from a failed improvement."""
        print(f"🧠 Learning from failed improvement: {proposal['description']}")
        # Store failure information in the improvement log for future reference
        proposal["learning"] = "Improvement failed during A/B testing or post-deployment check."
        # Potentially adjust future improvement scores based on this failure
        self._adjust_future_scores(proposal["area"], -0.05)  # Decrease score by 5%

    def _adjust_future_scores(self, area: str, adjustment: float):
        """Adjusts future improvement scores based on past successes or failures."""
        # This is a placeholder; in reality, this would use a more sophisticated model
        for proposal in self.improvement_log:
            if area.lower() in proposal["area"].lower():
                proposal["score"] = max(0.1, min(1.0, proposal["score"] + adjustment))  # Keep score within 0.1-1.0


if __name__ == "__main__":
    engine = EvolutionEngineV2()
    engine.process_new_video("vqHBfe3r4OQ", "https://www.youtube.com/watch?v=vqHBfe3r4OQ")
    engine.process_new_video("another_video", "https://www.youtube.com/watch?v=dummy_video")
    engine.process_new_video("yet_another", "https://www.youtube.com/watch?v=fake_video")