"""
GENESIS GEMINI EXECUTOR
=======================
Direct Gemini API execution for Hyperdrive mode.
Uses $300 Google AI Studio credits.

No Antigravity proxy needed - direct API calls.

Models (January 2026 - Updated):
- gemini-2.0-flash: Primary workhorse (2K RPM, 4M TPM, Unlimited RPD)
- gemini-2.5-flash: Newer model (1K RPM, 1M TPM, 10K RPD)
- gemini-2.5-pro: Complex reasoning (150 RPM, 2M TPM, 10K RPD)
- gemini-2.0-flash-lite: High throughput simple tasks (4K RPM)

Integrated with GeminiRateMaximizer for intelligent rate limit management.

Usage:
    executor = GeminiExecutor()
    result = executor.execute("Implement feature X")
    print(result.response)

    # With rate limit awareness
    executor = GeminiExecutor(use_rate_maximizer=True)
    result = executor.execute_optimized("Analyze this code", task_type="code_review")
"""

import os
import json
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Optional, Any, List
from dataclasses import dataclass

# Auto-load .env so GEMINI_API_KEY is always available without manual export
try:
    from dotenv import load_dotenv
    _env_path = Path(__file__).parent.parent / '.env'
    load_dotenv(_env_path)
except ImportError:
    pass  # dotenv not installed — rely on shell env

# Try google.generativeai first, fall back to REST API
try:
    import google.generativeai as genai
    GENAI_AVAILABLE = True
except ImportError:
    GENAI_AVAILABLE = False
    import urllib.request
    import urllib.error

# Try to import rate maximizer
try:
    from core.gemini_rate_maximizer import GeminiRateMaximizer, TaskType
    RATE_MAXIMIZER_AVAILABLE = True
except ImportError:
    RATE_MAXIMIZER_AVAILABLE = False
    TaskType = None


@dataclass
class GeminiResponse:
    """Response from Gemini execution."""
    success: bool
    response: str
    model: str
    tokens_used: int
    cost_estimate: float
    execution_time: float
    task_complete: bool
    error: Optional[str] = None


class GeminiExecutor:
    """
    Direct Gemini API executor for Genesis Hyperdrive.

    Uses $300 in Google AI Studio credits.
    No proxy needed - direct API calls.
    """

    # API key sources (in order of preference)
    API_KEY_SOURCES = [
        "GEMINI_API_KEY",
        "GOOGLE_API_KEY",
        "E:/genesis-system/Credentials/GoogleAIStudio-Gemini-AgileAdapt-API-KEY.txt"
    ]

    # Model configurations (January 2026 - Updated with rate limits)
    MODELS = {
        "flash": "gemini-3-flash-preview",    # Primary - Ultra Fast
        "flash2": "gemini-2.5-flash",           # Stable fallback (2.0-flash retires 2026-03-03)
        "pro": "gemini-3-pro-preview",        # Complex - Highest reasoning
        "lite": "gemini-2.0-flash-lite",      # High throughput
        "exp": "gemini-2.0-flash-exp",        # Experimental
    }

    # Cost per million tokens (output)
    COSTS = {
        "gemini-2.0-flash": 0.40,
        "gemini-3-flash-preview": 0.15,
        "gemini-3-pro-preview": 5.00,
        "gemini-2.5-flash": 0.60,
        "gemini-2.5-pro": 5.00,
        "gemini-2.0-flash-lite": 0.30,
        "gemini-2.0-flash-exp": 0.40,
    }

    # Task type to model mapping
    TASK_ROUTING = {
        "research": "gemini-3-flash-preview",
        "code_generation": "gemini-3-flash-preview",
        "code_review": "gemini-3-pro-preview",
        "architecture": "gemini-3-pro-preview",
        "simple_extraction": "gemini-2.0-flash-lite",
        "classification": "gemini-2.0-flash-lite",
        "summarization": "gemini-3-flash-preview",
        "agentic_vision": "gemini-3-flash-preview", # Fast loop
        "deep_vision": "gemini-3-pro-preview",      # Complex analysis
    }

    def __init__(self, api_key: str = None, default_model: str = "flash", use_rate_maximizer: bool = True):
        self.api_key = api_key or self._load_api_key()
        self.default_model = self.MODELS.get(default_model, default_model)
        self.usage_log_path = Path("E:/genesis-system/data/gemini_usage.jsonl")
        self.total_spent = 0.0

        # Initialize rate maximizer if available and requested
        self.rate_maximizer = None
        if use_rate_maximizer and RATE_MAXIMIZER_AVAILABLE:
            try:
                self.rate_maximizer = GeminiRateMaximizer()
            except Exception as e:
                print(f"Warning: Could not initialize rate maximizer: {e}")

        if GENAI_AVAILABLE and self.api_key:
            genai.configure(api_key=self.api_key)

    def _load_api_key(self) -> Optional[str]:
        """Load API key from environment or file."""
        # Check environment variables
        for env_var in self.API_KEY_SOURCES[:2]:
            key = os.environ.get(env_var)
            if key:
                return key

        # Check file
        key_file = Path(self.API_KEY_SOURCES[2])
        if key_file.exists():
            content = key_file.read_text().strip()
            # Parse "KEY=value" format
            if "=" in content:
                return content.split("=", 1)[1].strip()
            return content

        return None

    def execute(
        self,
        prompt: str,
        model: str = None,
        system_prompt: str = None,
        max_tokens: int = 8192,
        temperature: float = 0.7,
        cached_content_name: str = None
    ) -> GeminiResponse:
        """
        Execute a prompt with Gemini.

        Args:
            prompt: The task/prompt to execute
            model: Model to use (flash, pro, exp, or full model name)
            system_prompt: Optional system instruction
            max_tokens: Max output tokens
            temperature: Sampling temperature
            cached_content_name: Optional Titan Memory name to use

        Returns:
            GeminiResponse with results
        """
        model_name = self.MODELS.get(model, model) if model else self.default_model
        start_time = time.time()

        if not self.api_key:
            return GeminiResponse(
                success=False,
                response="",
                model=model_name,
                tokens_used=0,
                cost_estimate=0,
                execution_time=0,
                task_complete=False,
                error="No API key configured"
            )

        try:
            if GENAI_AVAILABLE:
                response = self._execute_with_sdk(
                    prompt, model_name, system_prompt, max_tokens, temperature, cached_content_name
                )
            else:
                # REST API fallback doesn't support caching easily yet in this implementation
                response = self._execute_with_rest(
                    prompt, model_name, system_prompt, max_tokens, temperature
                )

            execution_time = time.time() - start_time

            # Estimate tokens and cost (if cached, input cost is lower, but we estimate simply here)
            tokens_estimate = len(response.split()) * 1.3  # Rough estimate
            cost = (tokens_estimate / 1_000_000) * self.COSTS.get(model_name, 0.50)

            # Check for task completion marker
            task_complete = "TASK_COMPLETE" in response

            result = GeminiResponse(
                success=True,
                response=response,
                model=model_name,
                tokens_used=int(tokens_estimate),
                cost_estimate=cost,
                execution_time=execution_time,
                task_complete=task_complete
            )

            # Log usage
            self._log_usage(result, prompt[:100])
            self.total_spent += cost

            # Record with rate maximizer if available
            if self.rate_maximizer:
                input_tokens = len(prompt.split()) * 1.3  # Estimate
                if cached_content_name:
                    input_tokens = 0 # Cached inputs don't count towards rate limit the same way, or are negligible

                self.rate_maximizer.record_usage(
                    model_name,
                    input_tokens=int(input_tokens),
                    output_tokens=int(tokens_estimate),
                    success=True
                )

            return result

        except Exception as e:
            return GeminiResponse(
                success=False,
                response="",
                model=model_name,
                tokens_used=0,
                cost_estimate=0,
                execution_time=time.time() - start_time,
                task_complete=False,
                error=str(e)
            )

    def execute_optimized(
        self,
        prompt: str,
        task_type: str = "general",
        system_prompt: str = None,
        max_tokens: int = 8192,
        temperature: float = 0.7,
        priority: int = 5,
        cached_content_name: str = None
    ) -> GeminiResponse:
        """
        Execute with intelligent model selection and rate limit awareness.

        Uses the GeminiRateMaximizer to select the best available model
        based on current utilization and task type.

        Args:
            prompt: The task/prompt to execute
            task_type: Type of task for routing (research, code_generation, etc.)
            system_prompt: Optional system instruction
            max_tokens: Max output tokens
            temperature: Sampling temperature
            priority: Request priority (1-10, lower = higher)
            cached_content_name: Optional Titan Memory name to use

        Returns:
            GeminiResponse with results
        """
        # Estimate tokens for model selection
        token_estimate = int(len(prompt.split()) * 1.3) + max_tokens

        # Select model based on rate limits and task type
        if self.rate_maximizer and RATE_MAXIMIZER_AVAILABLE:
            # Get task type enum if available
            task_type_enum = None
            if TaskType:
                try:
                    task_type_enum = TaskType(task_type)
                except ValueError:
                    task_type_enum = TaskType.GENERAL

            # Schedule request with rate maximizer
            scheduled = self.rate_maximizer.schedule_request(
                token_estimate=token_estimate,
                task_type=task_type_enum or TaskType.GENERAL,
                priority=priority
            )

            model = scheduled.model

            # Apply delay if needed (rate limit approaching)
            if scheduled.delay_seconds > 0:
                time.sleep(scheduled.delay_seconds)
        else:
            # Fallback to task routing without rate awareness
            model = self.TASK_ROUTING.get(task_type, self.default_model)

        # Execute with selected model
        return self.execute(
            prompt=prompt,
            model=model,
            system_prompt=system_prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            cached_content_name=cached_content_name
        )

    def get_utilization_report(self) -> Optional[Dict]:
        """
        Get current rate limit utilization report.

        Returns:
            Utilization report dict or None if rate maximizer unavailable
        """
        if self.rate_maximizer:
            report = self.rate_maximizer.get_utilization_report()
            return {
                "timestamp": report.timestamp,
                "best_model": report.best_model,
                "total_capacity_used": report.total_capacity_used,
                "models": report.models,
                "recommendations": report.recommendations
            }
        return None

    def get_best_model(self, token_estimate: int = 1000, task_type: str = "general") -> str:
        """
        Get best available model for a request.

        Args:
            token_estimate: Estimated tokens needed
            task_type: Type of task

        Returns:
            Model name
        """
        if self.rate_maximizer and RATE_MAXIMIZER_AVAILABLE and TaskType:
            try:
                task_type_enum = TaskType(task_type)
            except ValueError:
                task_type_enum = TaskType.GENERAL
            return self.rate_maximizer.get_best_model(token_estimate, task_type_enum)

        return self.TASK_ROUTING.get(task_type, self.default_model)

    def _execute_with_sdk(
        self,
        prompt: str,
        model: str,
        system_prompt: str,
        max_tokens: int,
        temperature: float,
        cached_content_name: str = None
    ) -> str:
        """Execute using google-generativeai SDK."""
        
        # Handle cached content initialization
        if cached_content_name:
            try:
                # Retrieve the cache object by name to pass to model
                cache = genai.caching.CachedContent.get(cached_content_name)
                # When using cache, model must match cache's model
                model_obj = genai.GenerativeModel.from_cached_content(cached_content=cache)
            except Exception as e:
                # Fallback if cache fails
                print(f"Titan Memory Warning: Failed to use cache {cached_content_name}: {e}")
                model_obj = genai.GenerativeModel(
                    model_name=model,
                    system_instruction=system_prompt
                )
        else:
            model_obj = genai.GenerativeModel(
                model_name=model,
                system_instruction=system_prompt
            )

        generation_config = genai.GenerationConfig(
            max_output_tokens=max_tokens,
            temperature=temperature
        )

        response = model_obj.generate_content(
            prompt,
            generation_config=generation_config
        )

        return response.text

    def _execute_with_rest(
        self,
        prompt: str,
        model: str,
        system_prompt: str,
        max_tokens: int,
        temperature: float
    ) -> str:
        """Execute using REST API directly."""
        url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={self.api_key}"

        payload = {
            "contents": [{"parts": [{"text": prompt}]}],
            "generationConfig": {
                "maxOutputTokens": max_tokens,
                "temperature": temperature
            }
        }

        if system_prompt:
            payload["systemInstruction"] = {"parts": [{"text": system_prompt}]}

        data = json.dumps(payload).encode()
        req = urllib.request.Request(
            url,
            data=data,
            headers={"Content-Type": "application/json"}
        )

        with urllib.request.urlopen(req, timeout=120) as resp:
            result = json.loads(resp.read().decode())

        # Extract text from response
        candidates = result.get("candidates", [])
        if candidates:
            parts = candidates[0].get("content", {}).get("parts", [])
            if parts:
                return parts[0].get("text", "")

        return ""

    def _log_usage(self, result: GeminiResponse, prompt_preview: str):
        """Log usage for budget tracking."""
        self.usage_log_path.parent.mkdir(parents=True, exist_ok=True)

        entry = {
            "timestamp": datetime.now().isoformat(),
            "model": result.model,
            "tokens": result.tokens_used,
            "cost": result.cost_estimate,
            "execution_time": result.execution_time,
            "success": result.success,
            "task_complete": result.task_complete,
            "prompt_preview": prompt_preview
        }

        with open(self.usage_log_path, "a") as f:
            f.write(json.dumps(entry) + "\n")

    def execute_task(
        self,
        task: Dict,
        working_dir: str = "E:/genesis-system"
    ) -> GeminiResponse:
        """
        Execute a task from tasks.json format.

        Builds a proper prompt with acceptance criteria
        and verification instructions.
        """
        title = task.get("title", "Unknown Task")
        description = task.get("description", "")
        criteria = task.get("acceptance_criteria", [])
        context = task.get("context", "")

        criteria_text = "\n".join([
            f"- {c.get('description', c) if isinstance(c, dict) else c}"
            for c in criteria
        ])

        prompt = f"""# GENESIS HYPERDRIVE TASK

## Task: {title}

## Description
{description}

## Acceptance Criteria
{criteria_text}

## Context
{context}

## Working Directory
{working_dir}

## Instructions
1. Analyze this task carefully
2. Implement the solution
3. Verify against EACH acceptance criterion
4. Report PASS or FAIL for each criterion
5. If ALL criteria pass, include: TASK_COMPLETE
6. If ANY fail, explain what needs to be fixed

## Response Format
```
CRITERION 1: [PASS/FAIL] - [explanation]
CRITERION 2: [PASS/FAIL] - [explanation]
...

IMPLEMENTATION:
[Your implementation or explanation]

STATUS: [TASK_COMPLETE or TASK_INCOMPLETE]
```

Execute now."""

        system_prompt = """You are a Genesis Hyperdrive agent executing autonomous tasks.
You have full authority to implement solutions.
Be precise, verify your work, and report accurately.
Never claim completion without actual verification."""

        return self.execute(prompt, system_prompt=system_prompt)


    def get_budget_status(self) -> Dict:
        """Get current budget usage status."""
        total_budget = 300.0  # $300 Gemini credits

        # Read from usage log
        spent = 0.0
        iterations = 0

        if self.usage_log_path.exists():
            with open(self.usage_log_path) as f:
                for line in f:
                    try:
                        entry = json.loads(line)
                        spent += entry.get("cost", 0)
                        iterations += 1
                    except:
                        pass

        return {
            "total_budget": total_budget,
            "spent": spent,
            "remaining": total_budget - spent,
            "iterations": iterations,
            "avg_cost_per_iteration": spent / iterations if iterations > 0 else 0.50,
            "estimated_iterations_left": int((total_budget - spent) / 0.50)
        }


@dataclass
class TitanMemory:
    """
    Represents a 'Titan' persistent memory block (Context Cache).
    
    This holds the reference to the cached content on Google's servers.
    """
    name: str  # The cache resource name (e.g., 'cachedContents/12345')
    display_name: str
    model: str
    expire_time: str
    token_count: int
    ttl_seconds: int = 3600


class TitanMemoryManager:
    """
    Manages the 'Titan' memory layer (Context Caching).
    """
    
    def __init__(self, api_key: str):
        self.api_key = api_key
        if GENAI_AVAILABLE:
            genai.configure(api_key=self.api_key)
            
    def create_memory(
        self, 
        files: List[Path], 
        display_name: str = None,
        model: str = "gemini-2.0-flash-001",  # Cache only works on 1.5/2.0+ Flash/Pro
        ttl_minutes: int = 60
    ) -> Optional[TitanMemory]:
        """
        Create a new Titan Memory block from a list of files.
        """
        if not GENAI_AVAILABLE:
            print("Error: Titan Memory requires google-generativeai SDK")
            return None
            
        try:
            display_name = display_name or f"genesis_memory_{int(time.time())}"
            uploaded_files = []
            
            # 1. Upload files
            print(f"Titan Memory: Uploading {len(files)} files...")
            for file_path in files:
                if not file_path.exists():
                    continue
                # Determine mime type (simplified)
                mime_type = "text/plain"
                if file_path.suffix == ".py": mime_type = "text/x-python"
                elif file_path.suffix == ".md": mime_type = "text/markdown"
                elif file_path.suffix == ".json": mime_type = "application/json"
                elif file_path.suffix == ".html": mime_type = "text/html"
                
                pf = genai.upload_file(path=file_path, mime_type=mime_type, display_name=file_path.name)
                uploaded_files.append(pf)
                
            # Wait for processing
            print("Titan Memory: Waiting for file processing...")
            # Naive wait (SDK handles this mostly, but good to be safe)
            for f in uploaded_files:
                while f.state.name == "PROCESSING":
                    time.sleep(1)
                    f = genai.get_file(f.name)
                    
            # 2. Create Cache
            print(f"Titan Memory: creating cache '{display_name}'...")
            cache = genai.caching.CachedContent.create(
                model=model,
                display_name=display_name,
                system_instruction="You are the Genesis System. You have full recursive knowledge of your own source code provided in this memory block.",
                contents=uploaded_files,
                ttl=timedelta(minutes=ttl_minutes),
            )
            
            return TitanMemory(
                name=cache.name,
                display_name=cache.display_name,
                model=cache.model,
                expire_time=str(cache.expire_time),
                token_count=cache.usage_metadata.total_token_count,
                ttl_seconds=ttl_minutes * 60
            )
            
        except Exception as e:
            print(f"Titan Memory Error: {e}")
            return None

    def get_memory(self, name: str) -> Optional[TitanMemory]:
        """Retrieve an existing memory block."""
        if not GENAI_AVAILABLE: return None
        try:
            cache = genai.caching.CachedContent.get(name)
            return TitanMemory(
                name=cache.name,
                display_name=cache.display_name,
                model=cache.model,
                expire_time=str(cache.expire_time),
                token_count=cache.usage_metadata.total_token_count
            )
        except Exception:
            return None

    def list_memories(self) -> List[TitanMemory]:
        """List all active memories."""
        if not GENAI_AVAILABLE: return []
        memories = []
        try:
            for cache in genai.caching.CachedContent.list():
                memories.append(TitanMemory(
                    name=cache.name,
                    display_name=cache.display_name,
                    model=cache.model,
                    expire_time=str(cache.expire_time),
                    token_count=cache.usage_metadata.total_token_count
                ))
        except Exception:
            pass
        return memories


def test_gemini():
    """Quick test of Gemini execution."""
    executor = GeminiExecutor()

    print("=== GEMINI EXECUTOR TEST ===\\n")

    # Check API key
    if not executor.api_key:
        print("ERROR: No API key found!")
        return

    print(f"API Key: {executor.api_key[:10]}...{executor.api_key[-4:]}")
    print(f"Default Model: {executor.default_model}")

    # Test simple execution
    print("\\n--- Testing Simple Prompt ---")
    result = executor.execute(
        "What is 2 + 2? Reply with just the number.",
        model="flash"
    )

    print(f"Success: {result.success}")
    print(f"Response: {result.response[:100]}")
    print(f"Tokens: {result.tokens_used}")
    print(f"Cost: ${result.cost_estimate:.4f}")
    print(f"Time: {result.execution_time:.2f}s")
    
    # Test Titan Memory if available
    if GENAI_AVAILABLE:
        print("\\n--- Testing Titan Memory Capability ---")
        titan = TitanMemoryManager(executor.api_key)
        print("Titan Manager initialized.")
        # Note: We don't create a real cache in this simple test to avoid costs/delays
        # unless specifically requested.

    if result.error:
        print(f"Error: {result.error}")

    # Budget status
    print("\\n--- Budget Status ---")
    status = executor.get_budget_status()
    for key, value in status.items():
        print(f"{key}: {value}")


if __name__ == "__main__":
    test_gemini()
