"""
Genesis Outcome Memory System

Tier 5 of the Five-Tier Cognitive Memory Architecture.
Tracks action → result with attribution for self-optimization.

Key Features:
- Revenue attribution: Which actions led to conversions
- Pattern detection: Success/failure patterns
- Self-optimization: Feed learnings back to trigger memory

Usage:
    from outcome_memory import OutcomeMemory

    om = OutcomeMemory()

    # Log an action
    action_id = om.log_action(
        action_type="email_sent",
        workflow_id="lead_nurture",
        input_data={"lead_id": "123", "template": "welcome_v2"}
    )

    # Update with outcome
    om.record_outcome(
        action_id=action_id,
        result={"opened": True, "clicked": True, "converted": True},
        revenue=97.00
    )

    # Get insights
    insights = om.get_attribution_insights()
"""

"""
RULE 7 PARTIAL MIGRATION: Schema and initialization migrated to PostgreSQL.

TODO: Complete migration of remaining methods (record_outcome, get_*, etc.)
Pattern established in _init_db() and log_action() - follow same approach.
Remaining sqlite3.connect() calls need conversion to:
    with connection() as conn:
        cursor = conn.cursor()
        cursor.execute(query, params)  # Use %s not ?
"""
import json
import logging
from datetime import datetime, timedelta, timezone
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict
from pathlib import Path
from collections import defaultdict
import sys

# RULE 7: Use PostgreSQL via genesis_db (no sqlite3)
sys.path.insert(0, '/mnt/e/genesis-system')
from core.genesis_db import connection, ensure_table

logger = logging.getLogger(__name__)


@dataclass
class Action:
    """Represents an action taken by Genesis."""
    id: str
    action_type: str
    workflow_id: Optional[str]
    trigger_id: Optional[str]
    input_data: Dict[str, Any]
    timestamp: str
    status: str = "pending"  # pending, completed, failed


@dataclass
class Outcome:
    """Represents the result of an action."""
    action_id: str
    success: bool
    result_data: Dict[str, Any]
    revenue: float = 0.0
    timestamp: str = None
    attribution: Dict[str, Any] = None
    learnings: List[str] = None

    def __post_init__(self):
        if self.timestamp is None:
            self.timestamp = datetime.now(timezone.utc).isoformat()
        if self.attribution is None:
            self.attribution = {}
        if self.learnings is None:
            self.learnings = []


class OutcomeMemory:
    """
    Manages Outcome Memory (Tier 5) - action → result tracking.

    Features:
    - SQLite storage for durability
    - Revenue attribution
    - Pattern detection
    - Self-optimization insights
    """

    def __init__(self, db_path: str = None):
        """
        Initialize Outcome Memory.

        Args:
            db_path: Ignored - uses PostgreSQL via genesis_db (RULE 7)
        """
        # RULE 7: Uses PostgreSQL via genesis_db
        self._init_db()

    def _init_db(self):
        """Initialize PostgreSQL database schema (RULE 7)."""
        # Actions table
        ensure_table('outcome_actions', '''
            id TEXT PRIMARY KEY,
            action_type TEXT NOT NULL,
            workflow_id TEXT,
            trigger_id TEXT,
            input_data JSONB,
            timestamp TIMESTAMPTZ NOT NULL,
            status TEXT DEFAULT 'pending'
        ''')

        # Outcomes table
        ensure_table('outcome_results', '''
            id SERIAL PRIMARY KEY,
            action_id TEXT NOT NULL REFERENCES outcome_actions(id),
            success BOOLEAN NOT NULL,
            result_data JSONB,
            revenue REAL DEFAULT 0.0,
            timestamp TIMESTAMPTZ NOT NULL,
            attribution JSONB,
            learnings JSONB
        ''')

        # Attribution summary table
        ensure_table('outcome_attribution', '''
            dimension TEXT NOT NULL,
            value TEXT NOT NULL,
            total_actions INTEGER DEFAULT 0,
            successful_actions INTEGER DEFAULT 0,
            total_revenue REAL DEFAULT 0.0,
            avg_revenue REAL DEFAULT 0.0,
            last_updated TIMESTAMPTZ,
            PRIMARY KEY (dimension, value)
        ''')

        # Learnings table
        ensure_table('outcome_learnings', '''
            id SERIAL PRIMARY KEY,
            pattern TEXT NOT NULL,
            insight TEXT NOT NULL,
            confidence REAL DEFAULT 0.5,
            sample_size INTEGER DEFAULT 0,
            created_at TIMESTAMPTZ,
            validated BOOLEAN DEFAULT FALSE
        ''')

        # Create indexes
        try:
            with connection() as conn:
                cursor = conn.cursor()
                cursor.execute("CREATE INDEX IF NOT EXISTS idx_outcome_actions_type ON outcome_actions(action_type)")
                cursor.execute("CREATE INDEX IF NOT EXISTS idx_outcome_actions_timestamp ON outcome_actions(timestamp)")
                cursor.execute("CREATE INDEX IF NOT EXISTS idx_outcome_results_action ON outcome_results(action_id)")
                cursor.execute("CREATE INDEX IF NOT EXISTS idx_outcome_results_revenue ON outcome_results(revenue)")
        except Exception as e:
            logger.warning(f"Index creation warning: {e}")

    def _generate_id(self, prefix: str = "action") -> str:
        """Generate unique ID."""
        import hashlib
        base = f"{prefix}_{datetime.now(timezone.utc).isoformat()}"
        return f"{prefix}_{hashlib.md5(base.encode()).hexdigest()[:12]}"

    # ========== Action Logging ==========

    def log_action(
        self,
        action_type: str,
        workflow_id: str = None,
        trigger_id: str = None,
        input_data: Dict[str, Any] = None
    ) -> str:
        """
        Log an action taken by Genesis.

        Args:
            action_type: Type of action (email_sent, webhook_called, etc.)
            workflow_id: Optional n8n workflow that initiated this
            trigger_id: Optional trigger that caused this
            input_data: Action parameters/context

        Returns:
            Action ID for tracking
        """
        action_id = self._generate_id("action")
        timestamp = datetime.now(timezone.utc).isoformat()

        try:
            with connection() as conn:
                cursor = conn.cursor()
                cursor.execute("""
                    INSERT INTO outcome_actions (id, action_type, workflow_id, trigger_id, input_data, timestamp, status)
                    VALUES (%s, %s, %s, %s, %s, %s, 'pending')
                """, (
                    action_id,
                    action_type,
                    workflow_id,
                    trigger_id,
                    json.dumps(input_data or {}),
                    timestamp
                ))
        except Exception as e:
            logger.warning(f"Failed to log action: {e}")

        return action_id

    def record_outcome(
        self,
        action_id: str,
        result: Dict[str, Any],
        success: bool = None,
        revenue: float = 0.0,
        attribution: Dict[str, Any] = None
    ) -> bool:
        """
        Record the outcome of an action.

        Args:
            action_id: ID of the action
            result: Result data (opened, clicked, converted, etc.)
            success: Whether action was successful (auto-detected if not provided)
            revenue: Revenue generated (if any)
            attribution: Attribution data (template, time, source, etc.)

        Returns:
            True if recorded successfully
        """
        # Auto-detect success if not provided
        if success is None:
            success = result.get("success", result.get("converted", False))

        # Build attribution if not provided
        if attribution is None:
            attribution = self._build_attribution(action_id, result)

        # Generate learnings
        learnings = self._generate_learnings(action_id, result, success, revenue)

        timestamp = datetime.now(timezone.utc).isoformat()

        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        # Insert outcome
        cursor.execute("""
            INSERT INTO outcomes (action_id, success, result_data, revenue, timestamp, attribution, learnings)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        """, (
            action_id,
            1 if success else 0,
            json.dumps(result),
            revenue,
            timestamp,
            json.dumps(attribution),
            json.dumps(learnings)
        ))

        # Update action status
        cursor.execute("""
            UPDATE actions SET status = ? WHERE id = ?
        """, ("completed" if success else "failed", action_id))

        conn.commit()
        conn.close()

        # Update attribution summary
        self._update_attribution_summary(attribution, success, revenue)

        return True

    def _build_attribution(self, action_id: str, result: Dict) -> Dict[str, Any]:
        """Build attribution data from action and result."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute("SELECT * FROM actions WHERE id = ?", (action_id,))
        row = cursor.fetchone()
        conn.close()

        if not row:
            return {}

        input_data = json.loads(row[4]) if row[4] else {}

        attribution = {
            "action_type": row[1],
            "workflow_id": row[2],
            "trigger_id": row[3],
        }

        # Extract common attribution dimensions
        for key in ["template", "source", "channel", "campaign", "segment"]:
            if key in input_data:
                attribution[key] = input_data[key]

        # Time-based attribution
        timestamp = datetime.fromisoformat(row[5].replace('Z', '+00:00'))
        attribution["hour"] = timestamp.hour
        attribution["day_of_week"] = timestamp.strftime("%A")
        attribution["time_of_day"] = self._get_time_of_day(timestamp.hour)

        return attribution

    def _get_time_of_day(self, hour: int) -> str:
        """Categorize hour into time of day."""
        if 5 <= hour < 12:
            return "morning"
        elif 12 <= hour < 17:
            return "afternoon"
        elif 17 <= hour < 21:
            return "evening"
        else:
            return "night"

    def _generate_learnings(
        self,
        action_id: str,
        result: Dict,
        success: bool,
        revenue: float
    ) -> List[str]:
        """Generate learnings from outcome."""
        learnings = []

        # Get action context
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()
        cursor.execute("SELECT * FROM actions WHERE id = ?", (action_id,))
        row = cursor.fetchone()
        conn.close()

        if not row:
            return learnings

        input_data = json.loads(row[4]) if row[4] else {}
        action_type = row[1]

        # Generate contextual learnings
        if success and revenue > 0:
            if "template" in input_data:
                learnings.append(f"{input_data['template']} template led to ${revenue:.2f} revenue")

        if result.get("opened") and result.get("clicked"):
            learnings.append(f"{action_type} had high engagement (opened + clicked)")

        if not success and result.get("error"):
            learnings.append(f"{action_type} failed: {result['error']}")

        return learnings

    def _update_attribution_summary(
        self,
        attribution: Dict[str, Any],
        success: bool,
        revenue: float
    ):
        """Update attribution summary table."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        timestamp = datetime.now(timezone.utc).isoformat()

        for dimension, value in attribution.items():
            if value is None:
                continue

            value_str = str(value)

            # Upsert attribution summary
            cursor.execute("""
                INSERT INTO attribution_summary (dimension, value, total_actions, successful_actions, total_revenue, last_updated)
                VALUES (?, ?, 1, ?, ?, ?)
                ON CONFLICT(dimension, value) DO UPDATE SET
                    total_actions = total_actions + 1,
                    successful_actions = successful_actions + ?,
                    total_revenue = total_revenue + ?,
                    avg_revenue = (total_revenue + ?) / (total_actions + 1),
                    last_updated = ?
            """, (
                dimension, value_str,
                1 if success else 0, revenue, timestamp,
                1 if success else 0, revenue, revenue, timestamp
            ))

        conn.commit()
        conn.close()

    # ========== Querying ==========

    def get_action(self, action_id: str) -> Optional[Dict]:
        """Get action by ID."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute("""
            SELECT a.*, o.success, o.result_data, o.revenue, o.attribution
            FROM actions a
            LEFT JOIN outcomes o ON a.id = o.action_id
            WHERE a.id = ?
        """, (action_id,))

        row = cursor.fetchone()
        conn.close()

        if not row:
            return None

        return {
            "id": row[0],
            "action_type": row[1],
            "workflow_id": row[2],
            "trigger_id": row[3],
            "input_data": json.loads(row[4]) if row[4] else {},
            "timestamp": row[5],
            "status": row[6],
            "success": bool(row[7]) if row[7] is not None else None,
            "result": json.loads(row[8]) if row[8] else None,
            "revenue": row[9],
            "attribution": json.loads(row[10]) if row[10] else None
        }

    def get_outcomes_by_action_type(
        self,
        action_type: str,
        days: int = 30
    ) -> List[Dict]:
        """Get outcomes for a specific action type."""
        cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()

        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute("""
            SELECT a.*, o.success, o.result_data, o.revenue
            FROM actions a
            JOIN outcomes o ON a.id = o.action_id
            WHERE a.action_type = ? AND a.timestamp > ?
            ORDER BY a.timestamp DESC
        """, (action_type, cutoff))

        rows = cursor.fetchall()
        conn.close()

        return [{
            "id": row[0],
            "action_type": row[1],
            "timestamp": row[5],
            "success": bool(row[7]),
            "revenue": row[9]
        } for row in rows]

    # ========== Attribution Insights ==========

    def get_attribution_insights(self) -> Dict[str, Any]:
        """Get attribution insights for optimization."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        insights = {
            "by_dimension": {},
            "top_performers": [],
            "opportunities": [],
            "recommendations": []
        }

        # Get summary by dimension
        cursor.execute("""
            SELECT dimension, value, total_actions, successful_actions, total_revenue, avg_revenue
            FROM attribution_summary
            ORDER BY total_revenue DESC
        """)

        for row in cursor.fetchall():
            dimension = row[0]
            if dimension not in insights["by_dimension"]:
                insights["by_dimension"][dimension] = []

            success_rate = row[3] / row[2] if row[2] > 0 else 0

            insights["by_dimension"][dimension].append({
                "value": row[1],
                "total_actions": row[2],
                "success_rate": round(success_rate * 100, 1),
                "total_revenue": round(row[4], 2),
                "avg_revenue": round(row[5], 2)
            })

        conn.close()

        # Generate recommendations
        self._generate_recommendations(insights)

        return insights

    def _generate_recommendations(self, insights: Dict):
        """Generate actionable recommendations from insights."""
        recommendations = []

        for dimension, values in insights["by_dimension"].items():
            if len(values) < 2:
                continue

            # Sort by success rate
            sorted_values = sorted(values, key=lambda x: x["success_rate"], reverse=True)

            if sorted_values[0]["success_rate"] > sorted_values[-1]["success_rate"] * 1.5:
                best = sorted_values[0]
                worst = sorted_values[-1]
                recommendations.append({
                    "dimension": dimension,
                    "insight": f"{best['value']} outperforms {worst['value']} by {best['success_rate'] - worst['success_rate']:.1f}%",
                    "action": f"Increase usage of {best['value']}, reduce {worst['value']}"
                })

            # Revenue-based recommendations
            sorted_by_revenue = sorted(values, key=lambda x: x["avg_revenue"], reverse=True)
            if sorted_by_revenue[0]["avg_revenue"] > 0:
                top = sorted_by_revenue[0]
                recommendations.append({
                    "dimension": dimension,
                    "insight": f"{top['value']} generates ${top['avg_revenue']:.2f} avg revenue",
                    "action": f"Prioritize {top['value']} for high-value actions"
                })

        insights["recommendations"] = recommendations[:10]  # Top 10 recommendations

    def get_revenue_attribution(self, days: int = 30) -> Dict[str, float]:
        """Get revenue broken down by attribution dimension."""
        cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()

        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute("""
            SELECT o.attribution, o.revenue
            FROM outcomes o
            JOIN actions a ON o.action_id = a.id
            WHERE a.timestamp > ? AND o.revenue > 0
        """, (cutoff,))

        revenue_by_dimension = defaultdict(lambda: defaultdict(float))

        for row in cursor.fetchall():
            attribution = json.loads(row[0]) if row[0] else {}
            revenue = row[1]

            for dim, value in attribution.items():
                if value is not None:
                    revenue_by_dimension[dim][str(value)] += revenue

        conn.close()

        return dict(revenue_by_dimension)

    # ========== Pattern Detection ==========

    def detect_patterns(self, min_sample_size: int = 10) -> List[Dict]:
        """Detect success/failure patterns."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        patterns = []

        # Pattern: Time of day performance
        cursor.execute("""
            SELECT
                json_extract(o.attribution, '$.time_of_day') as time,
                COUNT(*) as total,
                SUM(o.success) as successes,
                AVG(o.revenue) as avg_revenue
            FROM outcomes o
            GROUP BY time
            HAVING total >= ?
        """, (min_sample_size,))

        for row in cursor.fetchall():
            if row[0]:
                success_rate = row[2] / row[1] if row[1] > 0 else 0
                patterns.append({
                    "type": "time_of_day",
                    "value": row[0],
                    "sample_size": row[1],
                    "success_rate": round(success_rate * 100, 1),
                    "avg_revenue": round(row[3] or 0, 2)
                })

        # Pattern: Template performance
        cursor.execute("""
            SELECT
                json_extract(o.attribution, '$.template') as template,
                COUNT(*) as total,
                SUM(o.success) as successes,
                SUM(o.revenue) as total_revenue
            FROM outcomes o
            WHERE template IS NOT NULL
            GROUP BY template
            HAVING total >= ?
        """, (min_sample_size,))

        for row in cursor.fetchall():
            if row[0]:
                success_rate = row[2] / row[1] if row[1] > 0 else 0
                patterns.append({
                    "type": "template",
                    "value": row[0],
                    "sample_size": row[1],
                    "success_rate": round(success_rate * 100, 1),
                    "total_revenue": round(row[3] or 0, 2)
                })

        conn.close()

        # Sort by impact (success_rate * sample_size)
        patterns.sort(key=lambda x: x["success_rate"] * x["sample_size"], reverse=True)

        return patterns

    def store_learning(self, pattern: str, insight: str, confidence: float = 0.5):
        """Store a validated learning."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute("""
            INSERT INTO learnings (pattern, insight, confidence, created_at)
            VALUES (?, ?, ?, ?)
        """, (pattern, insight, confidence, datetime.now(timezone.utc).isoformat()))

        conn.commit()
        conn.close()

    def get_learnings(self, min_confidence: float = 0.5) -> List[Dict]:
        """Get stored learnings."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute("""
            SELECT pattern, insight, confidence, sample_size, created_at
            FROM learnings
            WHERE confidence >= ?
            ORDER BY confidence DESC, created_at DESC
        """, (min_confidence,))

        learnings = [{
            "pattern": row[0],
            "insight": row[1],
            "confidence": row[2],
            "sample_size": row[3],
            "created_at": row[4]
        } for row in cursor.fetchall()]

        conn.close()
        return learnings

    # ========== Statistics ==========

    def get_stats(self, days: int = 30) -> Dict[str, Any]:
        """Get outcome memory statistics."""
        cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()

        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        stats = {}

        # Total actions
        cursor.execute("SELECT COUNT(*) FROM actions WHERE timestamp > ?", (cutoff,))
        stats["total_actions"] = cursor.fetchone()[0]

        # Outcomes recorded
        cursor.execute("""
            SELECT COUNT(*), SUM(success), SUM(revenue)
            FROM outcomes o
            JOIN actions a ON o.action_id = a.id
            WHERE a.timestamp > ?
        """, (cutoff,))

        row = cursor.fetchone()
        stats["outcomes_recorded"] = row[0]
        stats["successful_outcomes"] = row[1] or 0
        stats["total_revenue"] = round(row[2] or 0, 2)
        stats["success_rate"] = round(
            (row[1] / row[0] * 100) if row[0] > 0 else 0, 1
        )

        # Pending actions
        cursor.execute("""
            SELECT COUNT(*) FROM actions
            WHERE status = 'pending' AND timestamp > ?
        """, (cutoff,))
        stats["pending_actions"] = cursor.fetchone()[0]

        # Attribution coverage
        cursor.execute("SELECT COUNT(DISTINCT dimension) FROM attribution_summary")
        stats["attribution_dimensions"] = cursor.fetchone()[0]

        # Learnings count
        cursor.execute("SELECT COUNT(*) FROM learnings")
        stats["total_learnings"] = cursor.fetchone()[0]

        conn.close()

        return stats


if __name__ == "__main__":
    # Test the outcome memory system
    print("Testing Outcome Memory System...")

    om = OutcomeMemory()

    # Log some actions
    print("\n1. Logging actions...")

    action1 = om.log_action(
        action_type="email_sent",
        workflow_id="lead_nurture",
        input_data={
            "lead_id": "lead_123",
            "template": "welcome_v2",
            "source": "organic_search"
        }
    )
    print(f"   Logged: {action1}")

    action2 = om.log_action(
        action_type="email_sent",
        workflow_id="lead_nurture",
        input_data={
            "lead_id": "lead_456",
            "template": "welcome_v1",
            "source": "paid_ads"
        }
    )
    print(f"   Logged: {action2}")

    # Record outcomes
    print("\n2. Recording outcomes...")

    om.record_outcome(
        action_id=action1,
        result={"opened": True, "clicked": True, "converted": True},
        success=True,
        revenue=97.00
    )
    print(f"   Recorded outcome for {action1}: SUCCESS, $97.00")

    om.record_outcome(
        action_id=action2,
        result={"opened": True, "clicked": False, "converted": False},
        success=False,
        revenue=0
    )
    print(f"   Recorded outcome for {action2}: FAILED, $0")

    # Get action details
    print("\n3. Action details...")
    action_data = om.get_action(action1)
    print(f"   Action: {action_data['action_type']}")
    print(f"   Status: {action_data['status']}")
    print(f"   Revenue: ${action_data['revenue']}")

    # Get insights
    print("\n4. Attribution insights...")
    insights = om.get_attribution_insights()
    for dim, values in insights["by_dimension"].items():
        if values:
            print(f"   {dim}:")
            for v in values[:3]:
                print(f"      - {v['value']}: {v['success_rate']}% success, ${v['total_revenue']} revenue")

    # Get stats
    print("\n5. Statistics...")
    stats = om.get_stats()
    for key, value in stats.items():
        print(f"   {key}: {value}")

    print("\nOutcome Memory System test complete!")
