#!/usr/bin/env python3
"""
GENESIS TASK AUTO-GENERATOR
============================
Analyzes codebase and generates improvement tasks automatically.

Analysis Methods:
    1. Code Quality: Find areas needing refactoring
    2. Test Coverage: Identify untested code
    3. Documentation: Find undocumented functions
    4. Security: Scan for vulnerabilities
    5. Performance: Detect potential bottlenecks
    6. TODOs: Extract TODO/FIXME comments

Usage:
    generator = TaskGenerator()
    tasks = generator.analyze_codebase("/path/to/code")
"""

import ast
import json
import os
import re
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Any, Optional, Set
from collections import Counter


@dataclass
class GeneratedTask:
    """A task generated from code analysis."""
    title: str
    description: str
    task_type: str
    complexity: str
    priority: int  # 1-10
    file_path: Optional[str] = None
    line_number: Optional[int] = None
    evidence: Optional[str] = None

    def to_dict(self) -> Dict:
        return {
            "id": f"auto-{hash(self.title) % 10000:04d}",
            "title": self.title,
            "description": self.description,
            "task_type": self.task_type,
            "complexity": self.complexity,
            "priority": self.priority,
            "file_path": self.file_path,
            "line_number": self.line_number,
            "evidence": self.evidence,
            "auto_generated": True,
            "generated_at": datetime.now().isoformat()
        }


class CodeAnalyzer:
    """Analyzes Python code for improvement opportunities."""

    def __init__(self):
        self.todo_patterns = [
            r'#\s*TODO[:\s](.+)',
            r'#\s*FIXME[:\s](.+)',
            r'#\s*HACK[:\s](.+)',
            r'#\s*XXX[:\s](.+)',
            r'#\s*BUG[:\s](.+)',
        ]

    def analyze_file(self, file_path: Path) -> List[GeneratedTask]:
        """Analyze a single Python file."""
        tasks = []

        try:
            content = file_path.read_text()
            lines = content.split('\n')
        except Exception:
            return tasks

        # TODO extraction
        tasks.extend(self._extract_todos(content, str(file_path)))

        # AST analysis
        try:
            tree = ast.parse(content)
            tasks.extend(self._analyze_ast(tree, str(file_path), lines))
        except SyntaxError:
            tasks.append(GeneratedTask(
                title=f"Fix syntax error in {file_path.name}",
                description=f"File {file_path} has syntax errors preventing parsing",
                task_type="bug_fix",
                complexity="moderate",
                priority=8,
                file_path=str(file_path)
            ))

        # Pattern-based analysis
        tasks.extend(self._pattern_analysis(content, str(file_path)))

        return tasks

    def _extract_todos(self, content: str, file_path: str) -> List[GeneratedTask]:
        """Extract TODO comments."""
        tasks = []

        for i, line in enumerate(content.split('\n'), 1):
            for pattern in self.todo_patterns:
                match = re.search(pattern, line, re.IGNORECASE)
                if match:
                    todo_text = match.group(1).strip()
                    task_type = "TODO" in pattern and "feature" or "bug_fix"

                    tasks.append(GeneratedTask(
                        title=f"Address: {todo_text[:50]}{'...' if len(todo_text) > 50 else ''}",
                        description=f"Found in code: {todo_text}",
                        task_type=task_type,
                        complexity="moderate",
                        priority=5,
                        file_path=file_path,
                        line_number=i,
                        evidence=line.strip()
                    ))

        return tasks

    def _analyze_ast(self, tree: ast.AST, file_path: str, lines: List[str]) -> List[GeneratedTask]:
        """AST-based analysis."""
        tasks = []

        for node in ast.walk(tree):
            # Missing docstrings
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                if not ast.get_docstring(node) and not node.name.startswith('_'):
                    tasks.append(GeneratedTask(
                        title=f"Add docstring to {node.name}()",
                        description=f"Function '{node.name}' in {Path(file_path).name} lacks documentation",
                        task_type="documentation",
                        complexity="simple",
                        priority=3,
                        file_path=file_path,
                        line_number=node.lineno
                    ))

                # Long functions
                end_line = getattr(node, 'end_lineno', node.lineno + 20)
                if end_line - node.lineno > 50:
                    tasks.append(GeneratedTask(
                        title=f"Refactor long function {node.name}()",
                        description=f"Function is {end_line - node.lineno} lines. Consider breaking into smaller functions.",
                        task_type="refactoring",
                        complexity="complex",
                        priority=4,
                        file_path=file_path,
                        line_number=node.lineno
                    ))

                # Missing type hints
                if not node.returns and not node.name.startswith('_'):
                    tasks.append(GeneratedTask(
                        title=f"Add type hints to {node.name}()",
                        description=f"Function lacks return type annotation",
                        task_type="code_quality",
                        complexity="simple",
                        priority=2,
                        file_path=file_path,
                        line_number=node.lineno
                    ))

            # Classes without docstrings
            if isinstance(node, ast.ClassDef):
                if not ast.get_docstring(node):
                    tasks.append(GeneratedTask(
                        title=f"Add docstring to class {node.name}",
                        description=f"Class '{node.name}' lacks documentation",
                        task_type="documentation",
                        complexity="simple",
                        priority=3,
                        file_path=file_path,
                        line_number=node.lineno
                    ))

            # Bare except
            if isinstance(node, ast.ExceptHandler) and node.type is None:
                tasks.append(GeneratedTask(
                    title=f"Fix bare except clause in {Path(file_path).name}",
                    description="Bare except catches all exceptions. Be more specific.",
                    task_type="code_quality",
                    complexity="simple",
                    priority=6,
                    file_path=file_path,
                    line_number=node.lineno
                ))

        return tasks

    def _pattern_analysis(self, content: str, file_path: str) -> List[GeneratedTask]:
        """Pattern-based code analysis."""
        tasks = []

        # Security patterns
        security_patterns = [
            (r'eval\s*\(', "Remove use of eval()", "security", 9),
            (r'exec\s*\(', "Remove use of exec()", "security", 9),
            (r'pickle\.loads?\s*\(', "Replace pickle with safer serialization", "security", 8),
            (r'shell\s*=\s*True', "Avoid shell=True in subprocess", "security", 7),
        ]

        for pattern, title, task_type, priority in security_patterns:
            for i, line in enumerate(content.split('\n'), 1):
                if re.search(pattern, line):
                    tasks.append(GeneratedTask(
                        title=title,
                        description=f"Security concern found at line {i}",
                        task_type=task_type,
                        complexity="moderate",
                        priority=priority,
                        file_path=file_path,
                        line_number=i,
                        evidence=line.strip()
                    ))

        # Performance patterns
        if 'time.sleep' in content and 'async' in content:
            tasks.append(GeneratedTask(
                title="Replace time.sleep with asyncio.sleep",
                description="Using sync sleep in async code blocks the event loop",
                task_type="performance",
                complexity="simple",
                priority=6,
                file_path=file_path
            ))

        return tasks


class TestCoverageAnalyzer:
    """Analyzes test coverage."""

    def analyze(self, source_dir: Path, test_dir: Path) -> List[GeneratedTask]:
        """Find untested modules."""
        tasks = []

        source_files = set(p.stem for p in source_dir.glob("*.py") if not p.stem.startswith('_'))
        test_files = set(p.stem.replace('test_', '') for p in test_dir.glob("test_*.py"))

        untested = source_files - test_files
        for module in untested:
            tasks.append(GeneratedTask(
                title=f"Add tests for {module}.py",
                description=f"Module {module} has no corresponding test file",
                task_type="testing",
                complexity="moderate",
                priority=5,
                file_path=str(source_dir / f"{module}.py")
            ))

        return tasks


class DependencyAnalyzer:
    """Analyzes dependencies for issues."""

    def analyze(self, requirements_path: Path) -> List[GeneratedTask]:
        """Analyze requirements.txt for issues."""
        tasks = []

        if not requirements_path.exists():
            return tasks

        content = requirements_path.read_text()
        lines = content.strip().split('\n')

        unpinned = []
        for line in lines:
            line = line.strip()
            if line and not line.startswith('#'):
                if '==' not in line and '>=' not in line and '<=' not in line:
                    unpinned.append(line)

        if unpinned:
            tasks.append(GeneratedTask(
                title="Pin dependency versions in requirements.txt",
                description=f"Unpinned dependencies: {', '.join(unpinned[:5])}{'...' if len(unpinned) > 5 else ''}",
                task_type="dependencies",
                complexity="simple",
                priority=4,
                file_path=str(requirements_path)
            ))

        return tasks


class TaskGenerator:
    """
    Main task generator that coordinates all analyzers.
    """

    def __init__(self):
        self.code_analyzer = CodeAnalyzer()
        self.test_analyzer = TestCoverageAnalyzer()
        self.dep_analyzer = DependencyAnalyzer()

    def analyze_codebase(
        self,
        root_path: Path,
        include_tests: bool = False,
        max_tasks: int = 50
    ) -> List[GeneratedTask]:
        """
        Analyze entire codebase and generate tasks.

        Args:
            root_path: Root directory to analyze
            include_tests: Whether to analyze test files
            max_tasks: Maximum number of tasks to return

        Returns:
            List of generated tasks
        """
        root_path = Path(root_path)
        all_tasks = []

        # Find Python files
        patterns = ["**/*.py"]
        exclude = ["__pycache__", ".git", "venv", "node_modules"]

        for pattern in patterns:
            for file_path in root_path.glob(pattern):
                # Skip excluded directories
                if any(ex in str(file_path) for ex in exclude):
                    continue

                # Skip test files unless requested
                if not include_tests and 'test' in file_path.name.lower():
                    continue

                tasks = self.code_analyzer.analyze_file(file_path)
                all_tasks.extend(tasks)

        # Test coverage analysis
        core_dir = root_path / "core"
        test_dir = root_path / "tests"
        if core_dir.exists() and test_dir.exists():
            all_tasks.extend(self.test_analyzer.analyze(core_dir, test_dir))

        # Dependency analysis
        req_path = root_path / "requirements.txt"
        if req_path.exists():
            all_tasks.extend(self.dep_analyzer.analyze(req_path))

        # Sort by priority and limit
        all_tasks.sort(key=lambda t: -t.priority)
        return all_tasks[:max_tasks]

    def generate_tasks_json(self, tasks: List[GeneratedTask]) -> Dict:
        """Generate tasks.json format from tasks."""
        return {
            "project": "Genesis Auto-Generated Tasks",
            "created_at": datetime.now().isoformat(),
            "auto_generated": True,
            "stories": [task.to_dict() for task in tasks]
        }

    def get_summary(self, tasks: List[GeneratedTask]) -> Dict:
        """Get summary of generated tasks."""
        by_type = Counter(t.task_type for t in tasks)
        by_priority = Counter(t.priority for t in tasks)
        by_complexity = Counter(t.complexity for t in tasks)

        return {
            "total": len(tasks),
            "by_type": dict(by_type),
            "by_priority": dict(by_priority),
            "by_complexity": dict(by_complexity),
            "high_priority": len([t for t in tasks if t.priority >= 7]),
            "files_affected": len(set(t.file_path for t in tasks if t.file_path))
        }


def main():
    """CLI for task generator."""
    import argparse
    parser = argparse.ArgumentParser(description="Genesis Task Generator")
    parser.add_argument("path", nargs="?", default=".", help="Path to analyze")
    parser.add_argument("--max", type=int, default=20, help="Max tasks")
    parser.add_argument("--output", help="Output JSON file")
    parser.add_argument("--include-tests", action="store_true")
    parser.add_argument("--summary", action="store_true", help="Show summary only")
    args = parser.parse_args()

    generator = TaskGenerator()
    tasks = generator.analyze_codebase(
        Path(args.path),
        include_tests=args.include_tests,
        max_tasks=args.max
    )

    if args.summary:
        print("Task Generation Summary")
        print("=" * 40)
        summary = generator.get_summary(tasks)
        print(f"Total tasks: {summary['total']}")
        print(f"High priority: {summary['high_priority']}")
        print(f"Files affected: {summary['files_affected']}")
        print("\nBy type:")
        for t, c in summary['by_type'].items():
            print(f"  {t}: {c}")
        return

    if args.output:
        output = generator.generate_tasks_json(tasks)
        Path(args.output).write_text(json.dumps(output, indent=2))
        print(f"Generated {len(tasks)} tasks to {args.output}")
        return

    # Default: print tasks
    print(f"Generated {len(tasks)} tasks:\n")
    for task in tasks:
        print(f"[P{task.priority}] [{task.task_type}] {task.title}")
        if task.file_path:
            loc = f"{task.file_path}"
            if task.line_number:
                loc += f":{task.line_number}"
            print(f"    Location: {loc}")
        print()


if __name__ == "__main__":
    main()