#!/usr/bin/env python3
"""
GENESIS PROGRESS ANALYZER
=========================
Analyzes progress.txt and task history to extract insights.

Features:
- Task completion patterns
- Time-of-day analysis
- Success/failure trends
- Agent performance metrics
- Recommendations

Usage:
    python progress_analyzer.py           # Full analysis
    python progress_analyzer.py --summary # Quick summary
    python progress_analyzer.py --trends  # Trend analysis
"""

import json
import re
from collections import Counter, defaultdict
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Any, Optional


class ProgressAnalyzer:
    """Analyzes Genesis progress and extracts insights."""

    def __init__(self, genesis_root: Path = None):
        self.root = genesis_root or Path(__file__).parent.parent
        self.progress_path = self.root / "loop" / "progress.txt"
        self.tasks_path = self.root / "loop" / "tasks.json"
        self.budget_path = self.root / "data" / "hyperdrive_budget.json"
        self.log_path = self.root / "data" / "hyperdrive_log.jsonl"

    def load_progress_log(self) -> List[Dict]:
        """Load and parse progress.txt entries."""
        if not self.progress_path.exists():
            return []

        entries = []
        with open(self.progress_path) as f:
            for line in f:
                match = re.match(r'\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\] (.+)', line.strip())
                if match:
                    timestamp_str, message = match.groups()
                    try:
                        timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
                        entries.append({
                            "timestamp": timestamp,
                            "message": message,
                            "type": self._classify_entry(message)
                        })
                    except ValueError:
                        pass

        return entries

    def _classify_entry(self, message: str) -> str:
        """Classify a progress entry type."""
        message_lower = message.lower()

        if "completed" in message_lower or "pass" in message_lower:
            return "success"
        elif "failed" in message_lower or "fail" in message_lower:
            return "failure"
        elif "starting" in message_lower or "begin" in message_lower:
            return "start"
        elif "error" in message_lower or "exception" in message_lower:
            return "error"
        else:
            return "info"

    def load_task_history(self) -> Dict:
        """Load task history from tasks.json."""
        if not self.tasks_path.exists():
            return {"stories": []}

        with open(self.tasks_path) as f:
            return json.load(f)

    def load_execution_log(self) -> List[Dict]:
        """Load execution log from hyperdrive_log.jsonl."""
        if not self.log_path.exists():
            return []

        entries = []
        with open(self.log_path) as f:
            for line in f:
                try:
                    entries.append(json.loads(line))
                except json.JSONDecodeError:
                    pass

        return entries

    def analyze_completion_rate(self) -> Dict:
        """Analyze task completion rate."""
        task_data = self.load_task_history()
        stories = task_data.get("stories", [])

        if not stories:
            return {"total": 0, "completed": 0, "rate": 0.0}

        completed = len([s for s in stories if s.get("passes")])
        total = len(stories)

        return {
            "total": total,
            "completed": completed,
            "pending": total - completed,
            "rate": completed / total if total > 0 else 0.0,
            "rate_pct": f"{(completed / total * 100):.1f}%" if total > 0 else "0%"
        }

    def analyze_complexity_distribution(self) -> Dict:
        """Analyze task complexity distribution."""
        task_data = self.load_task_history()
        stories = task_data.get("stories", [])

        complexity_counts = Counter()
        complexity_success = defaultdict(int)
        complexity_total = defaultdict(int)

        for story in stories:
            complexity = story.get("complexity", "unknown")
            complexity_counts[complexity] += 1
            complexity_total[complexity] += 1

            if story.get("passes"):
                complexity_success[complexity] += 1

        # Calculate success rates per complexity
        success_rates = {}
        for complexity, total in complexity_total.items():
            success = complexity_success[complexity]
            success_rates[complexity] = {
                "total": total,
                "completed": success,
                "rate": success / total if total > 0 else 0.0
            }

        return {
            "distribution": dict(complexity_counts),
            "success_rates": success_rates
        }

    def analyze_time_patterns(self) -> Dict:
        """Analyze time-of-day patterns."""
        entries = self.load_progress_log()

        if not entries:
            return {"by_hour": {}, "most_productive_hour": None}

        hour_counts = Counter()
        hour_successes = Counter()

        for entry in entries:
            hour = entry["timestamp"].hour
            hour_counts[hour] += 1
            if entry["type"] == "success":
                hour_successes[hour] += 1

        # Find most productive hour
        if hour_successes:
            most_productive = hour_successes.most_common(1)[0][0]
        else:
            most_productive = None

        return {
            "by_hour": dict(hour_counts),
            "successes_by_hour": dict(hour_successes),
            "most_productive_hour": most_productive,
            "most_active_hour": hour_counts.most_common(1)[0][0] if hour_counts else None
        }

    def analyze_agent_performance(self) -> Dict:
        """Analyze performance by agent type."""
        task_data = self.load_task_history()
        stories = task_data.get("stories", [])
        exec_log = self.load_execution_log()

        agent_stats = defaultdict(lambda: {"tasks": 0, "successes": 0, "cost": 0.0})

        # From task recommendations
        for story in stories:
            agent = story.get("recommended_agent", "gemini-2.0-flash")
            agent_stats[agent]["tasks"] += 1
            if story.get("passes"):
                agent_stats[agent]["successes"] += 1

        # From execution log
        for entry in exec_log:
            agent = entry.get("agent", "unknown")
            cost = entry.get("cost", 0)
            agent_stats[agent]["cost"] += cost

        # Calculate success rates
        result = {}
        for agent, stats in agent_stats.items():
            result[agent] = {
                **stats,
                "success_rate": stats["successes"] / stats["tasks"] if stats["tasks"] > 0 else 0.0
            }

        return result

    def analyze_failure_patterns(self) -> Dict:
        """Analyze failure patterns and common issues."""
        entries = self.load_progress_log()
        task_data = self.load_task_history()

        failures = [e for e in entries if e["type"] in ["failure", "error"]]
        failed_tasks = [s for s in task_data.get("stories", [])
                        if s.get("last_failure")]

        # Extract failure reasons
        failure_reasons = []
        for task in failed_tasks:
            if task.get("last_failure"):
                reason = task["last_failure"].get("reason", "unknown")
                failure_reasons.append(reason)

        return {
            "total_failures": len(failures),
            "failed_tasks": len(failed_tasks),
            "common_reasons": Counter(failure_reasons).most_common(5),
            "recent_failures": [e["message"] for e in failures[-5:]]
        }

    def generate_recommendations(self) -> List[str]:
        """Generate recommendations based on analysis."""
        recommendations = []

        completion = self.analyze_completion_rate()
        complexity = self.analyze_complexity_distribution()
        time_patterns = self.analyze_time_patterns()
        failures = self.analyze_failure_patterns()

        # Completion rate recommendations
        if completion["rate"] < 0.5:
            recommendations.append(
                "Completion rate is below 50%. Consider breaking tasks into smaller chunks."
            )

        # Complexity recommendations
        success_rates = complexity.get("success_rates", {})
        for level, stats in success_rates.items():
            if stats["rate"] < 0.5 and stats["total"] >= 3:
                recommendations.append(
                    f"{level.title()} tasks have low success rate ({stats['rate']*100:.0f}%). "
                    f"Consider using more capable agents for these."
                )

        # Time-based recommendations
        productive_hour = time_patterns.get("most_productive_hour")
        if productive_hour is not None:
            recommendations.append(
                f"Most successful completions happen at hour {productive_hour}. "
                f"Consider scheduling complex tasks around this time."
            )

        # Failure recommendations
        if failures["total_failures"] > 10:
            recommendations.append(
                f"High failure count ({failures['total_failures']}). "
                "Review recent failures and add better acceptance criteria."
            )

        if not recommendations:
            recommendations.append(
                "System performing well. Continue current approach."
            )

        return recommendations

    def get_full_analysis(self) -> Dict:
        """Get complete analysis report."""
        return {
            "timestamp": datetime.now().isoformat(),
            "completion": self.analyze_completion_rate(),
            "complexity": self.analyze_complexity_distribution(),
            "time_patterns": self.analyze_time_patterns(),
            "agent_performance": self.analyze_agent_performance(),
            "failures": self.analyze_failure_patterns(),
            "recommendations": self.generate_recommendations()
        }

    def display_summary(self):
        """Display quick summary."""
        completion = self.analyze_completion_rate()
        failures = self.analyze_failure_patterns()

        print("\n" + "=" * 50)
        print("GENESIS PROGRESS SUMMARY")
        print("=" * 50)
        print(f"Tasks: {completion['completed']}/{completion['total']} ({completion['rate_pct']})")
        print(f"Failures: {failures['total_failures']}")
        print(f"Pending: {completion['pending']}")
        print("=" * 50)

    def display_full(self):
        """Display full analysis."""
        analysis = self.get_full_analysis()

        print("\n" + "=" * 60)
        print("GENESIS PROGRESS ANALYSIS")
        print("=" * 60)

        print("\n--- COMPLETION RATE ---")
        comp = analysis["completion"]
        print(f"  Total: {comp['total']}")
        print(f"  Completed: {comp['completed']}")
        print(f"  Rate: {comp['rate_pct']}")

        print("\n--- COMPLEXITY DISTRIBUTION ---")
        for level, count in analysis["complexity"]["distribution"].items():
            rate = analysis["complexity"]["success_rates"].get(level, {}).get("rate", 0)
            print(f"  {level}: {count} tasks ({rate*100:.0f}% success)")

        print("\n--- AGENT PERFORMANCE ---")
        for agent, stats in analysis["agent_performance"].items():
            print(f"  {agent}: {stats['tasks']} tasks, "
                  f"{stats['success_rate']*100:.0f}% success, ${stats['cost']:.4f} spent")

        print("\n--- RECOMMENDATIONS ---")
        for i, rec in enumerate(analysis["recommendations"], 1):
            print(f"  {i}. {rec}")

        print("\n" + "=" * 60)


def main():
    import argparse
    parser = argparse.ArgumentParser(description="Genesis Progress Analyzer")
    parser.add_argument("--summary", action="store_true", help="Quick summary")
    parser.add_argument("--trends", action="store_true", help="Time trends")
    parser.add_argument("--json", action="store_true", help="JSON output")
    args = parser.parse_args()

    analyzer = ProgressAnalyzer()

    if args.json:
        import json
        print(json.dumps(analyzer.get_full_analysis(), indent=2, default=str))
    elif args.summary:
        analyzer.display_summary()
    else:
        analyzer.display_full()


if __name__ == "__main__":
    main()