"""
AIVA Mentorship Extractor
==========================

Extracts Kinan's decision patterns from existing data sources and injects
them as seed training data into AIVA's outcome tracker.

Part of the Non-Human Autonomy Maturity Protocol (NAMP).

Data Sources:
  1. Git commit history - Kinan's actual decisions in code
  2. Knowledge Graph axioms - Documented decision principles
  3. AIVA autonomy rules - Category-based baseline decisions

CRITICAL: Uses PostgreSQL via Elestio config (NO SQLite)

VERIFICATION_STAMP
Component: AIVA Mentorship Extractor (NAMP seed training)
Verified By: parallel-builder
Verified At: 2026-02-11
Tests: Pending (black box + white box tests required)
Coverage: Pending
Storage: PostgreSQL via Elestio config (NO SQLite)
Compliance: GLOBAL_GENESIS_RULES.md Rule 7
"""

import sys
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass
import json
import logging
import subprocess
import re
from datetime import datetime

# Add genesis root for imports
sys.path.insert(0, '/mnt/e/genesis-system')

# Elestio config path
GENESIS_ROOT = Path('/mnt/e/genesis-system')
sys.path.insert(0, str(GENESIS_ROOT / "data" / "genesis-memory"))

from elestio_config import PostgresConfig
import psycopg2

logger = logging.getLogger("AIVA.MentorshipExtractor")


@dataclass
class MentorshipPattern:
    """A decision pattern extracted from Kinan's historical data."""
    source: str                    # 'git', 'axiom', 'autonomy_rule'
    source_id: str                 # Commit hash, axiom ID, or rule name
    task_category: str             # Task type this pattern applies to
    decision_type: str             # What kind of decision (monitoring, feature, refactor, etc)
    expected_outcome: Dict[str, Any]  # What success looks like
    confidence: float              # 0.0-1.0 confidence in this pattern
    metadata: Dict[str, Any]       # Additional context
    extracted_at: datetime


@dataclass
class ExtractionReport:
    """Summary of mentorship extraction."""
    git_patterns: int
    axiom_patterns: int
    rule_patterns: int
    total_injected: int
    categories_covered: List[str]
    extraction_duration_seconds: float
    timestamp: datetime


class MentorshipExtractor:
    """
    Extracts Kinan's decision patterns from historical data and injects
    them as training data for AIVA's confidence scoring.

    Usage:
        extractor = MentorshipExtractor()
        from AIVA.autonomy.outcome_tracker import OutcomeTracker
        tracker = OutcomeTracker()
        count = extractor.inject_mentorship_data(tracker)
        report = extractor.get_extraction_report()
    """

    # Commit message patterns mapped to task categories
    COMMIT_PATTERNS = {
        'fix:': 'monitoring_fix',
        'feat:': 'feature_development',
        'refactor:': 'code_quality',
        'docs:': 'documentation',
        'test:': 'testing',
        'chore:': 'maintenance',
        'perf:': 'performance',
        'security:': 'security',
        'build:': 'infrastructure',
    }

    # Axiom type to task category mapping
    AXIOM_CATEGORIES = {
        'revenue': 'revenue_decision',
        'growth': 'growth_strategy',
        'agent_architecture': 'agent_design',
        'observability': 'monitoring',
        'execution': 'task_execution',
        'memory': 'knowledge_management',
        'critical_failure': 'error_handling',
        'meta_cognition': 'self_improvement',
    }

    def __init__(self, genesis_root: Optional[Path] = None):
        """
        Initialize the mentorship extractor.

        Args:
            genesis_root: Path to genesis-system root (defaults to /mnt/e/genesis-system)
        """
        self.genesis_root = genesis_root or GENESIS_ROOT
        self.patterns: List[MentorshipPattern] = []
        self.extraction_start = None
        self.extraction_end = None
        logger.info(f"MentorshipExtractor initialized at {self.genesis_root}")

    # =========================================================================
    # GIT COMMIT EXTRACTION
    # =========================================================================

    def extract_from_git(self, max_commits: int = 500) -> int:
        """
        Extract decision patterns from git commit history.

        Analyzes commit messages for:
          - Conventional commit types (fix:, feat:, refactor:, etc)
          - Confidence based on message clarity and scope
          - Decision outcomes based on commit changes

        Args:
            max_commits: Maximum number of commits to analyze

        Returns:
            Number of patterns extracted
        """
        logger.info(f"Extracting patterns from git history (max {max_commits} commits)")

        try:
            # Use subprocess to call git log
            result = subprocess.run(
                [
                    'git', 'log',
                    f'--max-count={max_commits}',
                    '--pretty=format:%H|%s|%an|%at',  # Hash|Subject|Author|Timestamp
                    '--no-merges',
                ],
                cwd=str(self.genesis_root),
                capture_output=True,
                text=True,
                timeout=30
            )

            if result.returncode != 0:
                logger.error(f"Git log failed: {result.stderr}")
                return 0

            lines = result.stdout.strip().split('\n')
            extracted_count = 0

            for line in lines:
                if not line:
                    continue

                parts = line.split('|')
                if len(parts) < 4:
                    continue

                commit_hash, subject, author, timestamp = parts[0], parts[1], parts[2], parts[3]

                # Parse commit type and extract pattern
                pattern = self._parse_commit_message(commit_hash, subject, author, timestamp)
                if pattern:
                    self.patterns.append(pattern)
                    extracted_count += 1

            logger.info(f"Extracted {extracted_count} patterns from git history")
            return extracted_count

        except subprocess.TimeoutExpired:
            logger.error("Git log timed out after 30 seconds")
            return 0
        except Exception as e:
            logger.error(f"Git extraction failed: {e}")
            return 0

    def _parse_commit_message(
        self, commit_hash: str, subject: str, author: str, timestamp: str
    ) -> Optional[MentorshipPattern]:
        """
        Parse a commit message and extract decision pattern.

        Returns:
            MentorshipPattern or None if commit doesn't match known patterns
        """
        subject_lower = subject.lower()

        # Match conventional commit pattern
        matched_category = None
        for prefix, category in self.COMMIT_PATTERNS.items():
            if subject_lower.startswith(prefix):
                matched_category = category
                break

        if not matched_category:
            # Try fuzzy match for common keywords
            if 'fix' in subject_lower or 'bug' in subject_lower:
                matched_category = 'monitoring_fix'
            elif 'add' in subject_lower or 'implement' in subject_lower:
                matched_category = 'feature_development'
            elif 'update' in subject_lower or 'improve' in subject_lower:
                matched_category = 'code_quality'
            else:
                return None  # Skip commits without clear category

        # Calculate confidence based on commit message quality
        confidence = self._score_commit_clarity(subject)

        # Build expected outcome
        expected_outcome = {
            'commit_type': matched_category,
            'message': subject,
            'author': author,
            'success': True,  # Commits that made it to main are successes
        }

        return MentorshipPattern(
            source='git',
            source_id=commit_hash[:8],
            task_category=matched_category,
            decision_type='code_change',
            expected_outcome=expected_outcome,
            confidence=confidence,
            metadata={
                'full_message': subject,
                'author': author,
                'timestamp': timestamp,
            },
            extracted_at=datetime.now()
        )

    def _score_commit_clarity(self, message: str) -> float:
        """
        Score commit message clarity to derive confidence.

        Higher score for:
          - Concise messages (20-100 chars optimal)
          - Specific technical terms
          - Clear action verbs

        Returns:
            Confidence score 0.0-1.0
        """
        length = len(message)
        score = 0.5  # Baseline

        # Length scoring (sweet spot 20-100 chars)
        if 20 <= length <= 100:
            score += 0.2
        elif length < 20:
            score -= 0.1  # Too terse
        elif length > 150:
            score -= 0.1  # Too verbose

        # Specific indicators
        if ':' in message:
            score += 0.1  # Structured format
        if any(word in message.lower() for word in ['implement', 'fix', 'add', 'update']):
            score += 0.1  # Clear action verb
        if re.search(r'\b[A-Z]+-\d+\b', message):
            score += 0.1  # References ticket/issue

        return max(0.0, min(1.0, score))

    # =========================================================================
    # KNOWLEDGE GRAPH AXIOM EXTRACTION
    # =========================================================================

    def extract_from_axioms(self) -> int:
        """
        Extract decision patterns from Knowledge Graph axioms.

        Reads JSONL axiom files and maps them to task categories
        based on axiom type and content.

        Returns:
            Number of patterns extracted
        """
        logger.info("Extracting patterns from Knowledge Graph axioms")

        axiom_dir = self.genesis_root / "KNOWLEDGE_GRAPH" / "axioms"
        if not axiom_dir.exists():
            logger.warning(f"Axiom directory not found: {axiom_dir}")
            return 0

        extracted_count = 0

        try:
            # Find all JSONL axiom files
            axiom_files = list(axiom_dir.glob("*.jsonl"))
            logger.info(f"Found {len(axiom_files)} axiom files")

            for axiom_file in axiom_files:
                count = self._extract_from_axiom_file(axiom_file)
                extracted_count += count

            logger.info(f"Extracted {extracted_count} patterns from axioms")
            return extracted_count

        except Exception as e:
            logger.error(f"Axiom extraction failed: {e}")
            return 0

    def _extract_from_axiom_file(self, filepath: Path) -> int:
        """Extract patterns from a single axiom JSONL file."""
        extracted = 0

        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                for line_num, line in enumerate(f, 1):
                    line = line.strip()
                    if not line:
                        continue

                    try:
                        axiom = json.loads(line)
                        pattern = self._parse_axiom(axiom, filepath.stem)
                        if pattern:
                            self.patterns.append(pattern)
                            extracted += 1
                    except json.JSONDecodeError as e:
                        logger.warning(f"Invalid JSON in {filepath.name}:{line_num}: {e}")
                        continue

            return extracted

        except Exception as e:
            logger.error(f"Failed to read {filepath.name}: {e}")
            return 0

    def _parse_axiom(self, axiom: Dict[str, Any], file_stem: str) -> Optional[MentorshipPattern]:
        """
        Parse an axiom and extract decision pattern.

        Returns:
            MentorshipPattern or None if axiom doesn't contain useful pattern
        """
        axiom_id = axiom.get('id', 'unknown')
        axiom_type = axiom.get('type', axiom.get('category', 'general'))

        # Map axiom type to task category
        task_category = self.AXIOM_CATEGORIES.get(axiom_type)
        if not task_category:
            # Try fuzzy matching on file name
            for key, category in self.AXIOM_CATEGORIES.items():
                if key in file_stem.lower():
                    task_category = category
                    break

        if not task_category:
            task_category = 'general_decision'

        # Extract decision pattern from axiom content
        title = axiom.get('title', axiom.get('principle', ''))
        principle = axiom.get('principle', axiom.get('learning', ''))

        if not title and not principle:
            return None  # No useful content

        # Build expected outcome
        expected_outcome = {
            'axiom_type': axiom_type,
            'principle': principle,
            'success': True,  # Axioms represent successful patterns
        }

        # Confidence based on axiom specificity
        confidence = self._score_axiom_confidence(axiom)

        return MentorshipPattern(
            source='axiom',
            source_id=axiom_id,
            task_category=task_category,
            decision_type=axiom_type,
            expected_outcome=expected_outcome,
            confidence=confidence,
            metadata={
                'title': title,
                'file': file_stem,
                'full_axiom': axiom,
            },
            extracted_at=datetime.now()
        )

    def _score_axiom_confidence(self, axiom: Dict[str, Any]) -> float:
        """
        Score axiom confidence based on specificity and evidence.

        Returns:
            Confidence score 0.0-1.0
        """
        score = 0.6  # Baseline (axioms are curated knowledge)

        # Bonus for concrete examples
        if 'example' in axiom or 'observation' in axiom:
            score += 0.15

        # Bonus for explicit principles
        if 'principle' in axiom and len(str(axiom['principle'])) > 30:
            score += 0.1

        # Bonus for learning/correction mechanisms
        if 'learning' in axiom or 'correction_trigger' in axiom:
            score += 0.1

        # Penalty for vague categories
        if axiom.get('type') in ['general', 'misc', 'other']:
            score -= 0.1

        return max(0.0, min(1.0, score))

    # =========================================================================
    # AUTONOMY RULE EXTRACTION
    # =========================================================================

    def extract_from_autonomy_rules(self) -> int:
        """
        Extract baseline decision patterns from AIVA's autonomy rules.

        Uses TASK_CATEGORY_RULES from autonomy_engine.py to generate
        "known good" seed decisions for each category.

        Returns:
            Number of patterns extracted
        """
        logger.info("Extracting patterns from autonomy rules")

        try:
            # Import TASK_CATEGORY_RULES from autonomy_engine
            from AIVA.autonomy.autonomy_engine import TASK_CATEGORY_RULES, AutonomyLevel

            extracted_count = 0

            for autonomy_level, categories in TASK_CATEGORY_RULES.items():
                for category_name, keywords in categories.items():
                    if category_name == 'description' or not isinstance(keywords, list):
                        continue

                    # Generate pattern for this category
                    pattern = self._create_rule_pattern(
                        autonomy_level, category_name, keywords
                    )
                    if pattern:
                        self.patterns.append(pattern)
                        extracted_count += 1

            logger.info(f"Extracted {extracted_count} patterns from autonomy rules")
            return extracted_count

        except Exception as e:
            logger.error(f"Autonomy rule extraction failed: {e}")
            return 0

    def _create_rule_pattern(
        self, autonomy_level, category_name: str, keywords: List[str]
    ) -> Optional[MentorshipPattern]:
        """
        Create a mentorship pattern from an autonomy rule category.

        Returns:
            MentorshipPattern with baseline confidence for this category
        """
        # Map autonomy level to confidence
        # Level 0 (FULL_AUTONOMOUS) = high confidence
        # Level 3 (ADVISORY_ONLY) = low confidence
        confidence_map = {
            0: 0.85,  # FULL_AUTONOMOUS - very safe
            1: 0.70,  # NOTIFY - moderate risk
            2: 0.50,  # CONFIRM_FIRST - needs confirmation
            3: 0.20,  # ADVISORY_ONLY - high risk
        }

        level_value = autonomy_level.value if hasattr(autonomy_level, 'value') else autonomy_level
        confidence = confidence_map.get(level_value, 0.5)

        # Build expected outcome
        expected_outcome = {
            'category': category_name,
            'autonomy_level': level_value,
            'keywords': keywords[:5],  # Sample of keywords
            'success': True,
        }

        return MentorshipPattern(
            source='autonomy_rule',
            source_id=f"{autonomy_level.name}_{category_name}",
            task_category=category_name,
            decision_type='autonomy_classification',
            expected_outcome=expected_outcome,
            confidence=confidence,
            metadata={
                'autonomy_level': autonomy_level.name,
                'keyword_count': len(keywords),
            },
            extracted_at=datetime.now()
        )

    # =========================================================================
    # INJECTION INTO OUTCOME TRACKER
    # =========================================================================

    def inject_mentorship_data(self, outcome_tracker) -> int:
        """
        Inject extracted patterns into the outcome tracker.

        Args:
            outcome_tracker: OutcomeTracker instance

        Returns:
            Number of patterns injected
        """
        if not self.patterns:
            logger.warning("No patterns to inject. Run extract_* methods first.")
            return 0

        logger.info(f"Injecting {len(self.patterns)} mentorship patterns")

        injected_count = 0

        for pattern in self.patterns:
            try:
                # Generate decision ID
                decision_id = f"mentorship_{pattern.source}_{pattern.source_id}_{pattern.task_category}"

                # Record prediction
                success = outcome_tracker.record_prediction(
                    decision_id=decision_id,
                    task_type=pattern.task_category,
                    expected_outcome=pattern.expected_outcome,
                    confidence_score=pattern.confidence,
                    metadata={
                        'source': 'mentorship_extraction',
                        'source_type': pattern.source,
                        'source_id': pattern.source_id,
                        'decision_type': pattern.decision_type,
                        'extracted_at': pattern.extracted_at.isoformat(),
                        **pattern.metadata
                    }
                )

                if not success:
                    continue

                # Record actual outcome (mark as success since these are historical patterns)
                success = outcome_tracker.record_actual(
                    decision_id=decision_id,
                    actual_outcome=pattern.expected_outcome,
                    success=True
                )

                if success:
                    injected_count += 1

            except Exception as e:
                logger.warning(f"Failed to inject pattern {pattern.source_id}: {e}")
                continue

        logger.info(f"Successfully injected {injected_count}/{len(self.patterns)} patterns")
        return injected_count

    # =========================================================================
    # EXTRACTION REPORT
    # =========================================================================

    def get_extraction_report(self) -> ExtractionReport:
        """
        Get summary report of extraction process.

        Returns:
            ExtractionReport with counts and statistics
        """
        git_count = sum(1 for p in self.patterns if p.source == 'git')
        axiom_count = sum(1 for p in self.patterns if p.source == 'axiom')
        rule_count = sum(1 for p in self.patterns if p.source == 'autonomy_rule')

        # Get unique categories
        categories = list(set(p.task_category for p in self.patterns))

        # Calculate duration
        if self.extraction_start and self.extraction_end:
            duration = (self.extraction_end - self.extraction_start).total_seconds()
        else:
            duration = 0.0

        return ExtractionReport(
            git_patterns=git_count,
            axiom_patterns=axiom_count,
            rule_patterns=rule_count,
            total_injected=len(self.patterns),
            categories_covered=sorted(categories),
            extraction_duration_seconds=duration,
            timestamp=datetime.now()
        )

    # =========================================================================
    # ORCHESTRATION
    # =========================================================================

    def extract_all(
        self,
        max_git_commits: int = 500,
        include_axioms: bool = True,
        include_rules: bool = True
    ) -> int:
        """
        Run all extraction methods.

        Args:
            max_git_commits: Max commits to analyze from git
            include_axioms: Whether to extract from axioms
            include_rules: Whether to extract from autonomy rules

        Returns:
            Total number of patterns extracted
        """
        self.extraction_start = datetime.now()
        total = 0

        # Git extraction
        total += self.extract_from_git(max_commits=max_git_commits)

        # Axiom extraction
        if include_axioms:
            total += self.extract_from_axioms()

        # Rule extraction
        if include_rules:
            total += self.extract_from_autonomy_rules()

        self.extraction_end = datetime.now()

        logger.info(f"Total extraction complete: {total} patterns from all sources")
        return total


# =============================================================================
# SINGLETON ACCESSOR
# =============================================================================

_extractor_instance: Optional[MentorshipExtractor] = None


def get_mentorship_extractor(genesis_root: Optional[Path] = None) -> MentorshipExtractor:
    """
    Get or create the singleton MentorshipExtractor instance.

    Args:
        genesis_root: Optional path to genesis-system root

    Returns:
        MentorshipExtractor instance
    """
    global _extractor_instance
    if _extractor_instance is None:
        _extractor_instance = MentorshipExtractor(genesis_root=genesis_root)
    return _extractor_instance


# =============================================================================
# CONVENIENCE FUNCTION
# =============================================================================

def run_mentorship_extraction(
    max_git_commits: int = 500,
    include_axioms: bool = True,
    include_rules: bool = True,
    auto_inject: bool = True
) -> ExtractionReport:
    """
    Convenience function to run full mentorship extraction and injection.

    Args:
        max_git_commits: Max commits to analyze
        include_axioms: Extract from axioms
        include_rules: Extract from autonomy rules
        auto_inject: Automatically inject into outcome tracker

    Returns:
        ExtractionReport with results

    Example:
        report = run_mentorship_extraction()
        print(f"Injected {report.total_injected} patterns")
        print(f"Categories: {', '.join(report.categories_covered)}")
    """
    extractor = get_mentorship_extractor()

    # Extract from all sources
    extractor.extract_all(
        max_git_commits=max_git_commits,
        include_axioms=include_axioms,
        include_rules=include_rules
    )

    # Auto-inject if requested
    if auto_inject:
        from AIVA.autonomy.outcome_tracker import OutcomeTracker
        tracker = OutcomeTracker()
        extractor.inject_mentorship_data(tracker)

    return extractor.get_extraction_report()


if __name__ == "__main__":
    # CLI usage
    import argparse

    parser = argparse.ArgumentParser(description="AIVA Mentorship Extractor")
    parser.add_argument('--git-commits', type=int, default=500, help="Max git commits to analyze")
    parser.add_argument('--no-axioms', action='store_true', help="Skip axiom extraction")
    parser.add_argument('--no-rules', action='store_true', help="Skip rule extraction")
    parser.add_argument('--no-inject', action='store_true', help="Skip auto-injection")
    parser.add_argument('--verbose', action='store_true', help="Verbose logging")

    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    print("AIVA Mentorship Extractor")
    print("=" * 60)

    report = run_mentorship_extraction(
        max_git_commits=args.git_commits,
        include_axioms=not args.no_axioms,
        include_rules=not args.no_rules,
        auto_inject=not args.no_inject
    )

    print(f"\nExtraction Complete!")
    print(f"  Git patterns:   {report.git_patterns}")
    print(f"  Axiom patterns: {report.axiom_patterns}")
    print(f"  Rule patterns:  {report.rule_patterns}")
    print(f"  Total injected: {report.total_injected}")
    print(f"  Duration:       {report.extraction_duration_seconds:.2f}s")
    print(f"\nCategories covered ({len(report.categories_covered)}):")
    for category in report.categories_covered:
        print(f"    - {category}")