import time
import logging
import traceback
import threading
import queue
import concurrent.futures
import os
import signal
from typing import Callable, Any, Dict, List, Optional, Tuple, Union
import json
import uuid
import functools
from collections import deque

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


class SkillExecutionError(Exception):
    """Base class for skill execution errors."""
    pass


class InputValidationError(SkillExecutionError):
    """Raised when input validation fails."""
    pass


class OutputValidationError(SkillExecutionError):
    """Raised when output validation fails."""
    pass


class SkillExecutionTimeout(SkillExecutionError):
    """Raised when skill execution exceeds the timeout."""
    pass


class SkillInvocation:
    """Represents a skill invocation request."""

    def __init__(self, skill_name: str, input_data: Dict[str, Any], invocation_id: Optional[str] = None):
        self.skill_name = skill_name
        self.input_data = input_data
        self.invocation_id = invocation_id or str(uuid.uuid4())  # Generate if not provided

    def __repr__(self):
        return f"SkillInvocation(skill_name='{self.skill_name}', input_data={self.input_data}, invocation_id='{self.invocation_id}')"


class SkillResult:
    """Represents the result of a skill execution."""

    def __init__(self, invocation_id: str, skill_name: str, output_data: Optional[Dict[str, Any]] = None,
                 error: Optional[Exception] = None, execution_time: float = 0.0, logs: Optional[List[str]] = None):
        self.invocation_id = invocation_id
        self.skill_name = skill_name
        self.output_data = output_data
        self.error = error
        self.execution_time = execution_time
        self.logs = logs or []

    def __repr__(self):
        return f"SkillResult(invocation_id='{self.invocation_id}', skill_name='{self.skill_name}', output_data={self.output_data}, error={self.error}, execution_time={self.execution_time})"


class SkillRegistry:
    """Manages registered skills."""

    def __init__(self):
        self.skills: Dict[str, Callable] = {}
        self.input_validators: Dict[str, Callable[[Dict[str, Any]], bool]] = {}
        self.output_validators: Dict[str, Callable[[Dict[str, Any]], bool]] = {}
        self.skill_dependencies: Dict[str, List[str]] = {}

    def register_skill(self, skill_name: str, skill_function: Callable, input_validator: Optional[Callable[[Dict[str, Any]], bool]] = None, output_validator: Optional[Callable[[Dict[str, Any]], bool]] = None, dependencies: Optional[List[str]] = None):
        """Registers a skill with the registry."""
        if skill_name in self.skills:
            raise ValueError(f"Skill '{skill_name}' already registered.")
        self.skills[skill_name] = skill_function
        if input_validator:
            self.input_validators[skill_name] = input_validator
        if output_validator:
            self.output_validators[skill_name] = output_validator
        self.skill_dependencies[skill_name] = dependencies or []


    def get_skill(self, skill_name: str) -> Callable:
        """Retrieves a skill from the registry."""
        if skill_name not in self.skills:
            raise ValueError(f"Skill '{skill_name}' not found.")
        return self.skills[skill_name]

    def get_input_validator(self, skill_name: str) -> Optional[Callable[[Dict[str, Any]], bool]]:
        """Retrieves the input validator for a skill."""
        return self.input_validators.get(skill_name)

    def get_output_validator(self, skill_name: str) -> Optional[Callable[[Dict[str, Any]], bool]]:
        """Retrieves the output validator for a skill."""
        return self.output_validators.get(skill_name)

    def get_dependencies(self, skill_name: str) -> List[str]:
        """Retrieves the dependencies for a skill."""
        return self.skill_dependencies.get(skill_name, [])


class SkillExecutor:
    """Executes skills with validation, error handling, and performance monitoring."""

    def __init__(self, skill_registry: SkillRegistry, max_retries: int = 3, retry_delay: float = 1.0,
                 timeout: float = 10.0, resource_limits: Optional[Dict[str, Any]] = None,
                 fallback_skills: Optional[Dict[str, str]] = None, enable_cache: bool = True):
        """Initializes the SkillExecutor."""
        self.skill_registry = skill_registry
        self.max_retries = max_retries
        self.retry_delay = retry_delay
        self.timeout = timeout
        self.resource_limits = resource_limits or {}
        self.fallback_skills = fallback_skills or {}
        self.execution_traces: Dict[str, List[Dict[str, Any]]] = {} # invocation_id: list of trace events
        self.metrics: Dict[str, Dict[str, Union[int, float]]] = {} # skill_name: dict of metrics
        self.skill_cache: Dict[Tuple[str, str], SkillResult] = {}  # (skill_name, input_hash): SkillResult
        self.enable_cache = enable_cache
        self.cache_lock = threading.Lock()


    def _validate_inputs(self, skill_name: str, input_data: Dict[str, Any]) -> None:
        """Validates the input data against the registered validator."""
        validator = self.skill_registry.get_input_validator(skill_name)
        if validator:
            try:
                if not validator(input_data):
                    raise InputValidationError(f"Input validation failed for skill '{skill_name}'.")
            except Exception as e:
                raise InputValidationError(f"Input validation failed for skill '{skill_name}': {e}")

    def _validate_outputs(self, skill_name: str, output_data: Dict[str, Any]) -> None:
        """Validates the output data against the registered validator."""
        validator = self.skill_registry.get_output_validator(skill_name)
        if validator:
            try:
                if not validator(output_data):
                    raise OutputValidationError(f"Output validation failed for skill '{skill_name}'.")
            except Exception as e:
                raise OutputValidationError(f"Output validation failed for skill '{skill_name}': {e}")


    def _execute_skill_with_timeout(self, skill_name: str, input_data: Dict[str, Any]) -> Any:
        """Executes the skill with a timeout."""
        skill_function = self.skill_registry.get_skill(skill_name)

        def execute_with_resource_limits(skill_function, input_data, resource_limits):
            """Execute the skill with resource limits."""
            # Apply resource limits (using os.setrlimit, etc.)
            # This is a placeholder; actual implementation depends on the specific resources to be limited.
            # Example:
            # if 'cpu_time' in resource_limits:
            #     resource.setrlimit(resource.RLIMIT_CPU, (resource_limits['cpu_time'], resource_limits['cpu_time']))
            return skill_function(**input_data) # Pass input_data as keyword arguments

        def skill_execution_wrapper(skill_name, input_data, resource_limits):
            try:
                if resource_limits:
                    return execute_with_resource_limits(skill_function, input_data, resource_limits)
                else:
                    return skill_function(**input_data) # Pass input_data as keyword arguments
            except Exception as e:
                logging.error(f"Skill '{skill_name}' execution failed: {e}")
                raise

        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
            future = executor.submit(skill_execution_wrapper, skill_name, input_data, self.resource_limits)
            try:
                return future.result(timeout=self.timeout)
            except concurrent.futures.TimeoutError:
                #Attempt to gracefully terminate the thread.  This is not guaranteed to work in all cases.
                #A more robust solution would involve using processes instead of threads.
                logging.warning(f"Skill '{skill_name}' execution timed out after {self.timeout} seconds. Attempting to terminate thread.")
                #This is a fragile solution and might not work in all environments.  Consider using a ProcessPoolExecutor for better isolation and control.
                #executor._threads.pop()._tstate_lock.release()  # Forcefully terminate the thread.  Use with caution.
                raise SkillExecutionTimeout(f"Skill '{skill_name}' execution timed out after {self.timeout} seconds.")
            except Exception as e:
                raise e


    def execute_skill(self, invocation: SkillInvocation) -> SkillResult:
        """Executes a skill with retry and fallback mechanisms."""
        skill_name = invocation.skill_name
        input_data = invocation.input_data
        invocation_id = invocation.invocation_id
        input_hash = str(hash(json.dumps(input_data, sort_keys=True))) # Create hashable representation of input

        # 1. Check Cache
        if self.enable_cache:
            cache_key = (skill_name, input_hash)
            with self.cache_lock:
                if cache_key in self.skill_cache:
                    cached_result = self.skill_cache[cache_key]
                    logging.info(f"Skill '{skill_name}' found in cache (invocation_id: {invocation_id}).")
                    self._add_trace_event(invocation_id, "Skill execution retrieved from cache", {"skill_name": skill_name, "input_data": input_data})
                    return cached_result

        self._add_trace_event(invocation_id, "Skill execution started", {"skill_name": skill_name, "input_data": input_data})

        logs: List[str] = []
        start_time = time.time()
        output_data: Optional[Dict[str, Any]] = None
        error: Optional[Exception] = None

        for attempt in range(self.max_retries + 1):
            try:
                self._validate_inputs(skill_name, input_data)
                self._add_trace_event(invocation_id, "Input validation succeeded")

                output_data = self._execute_skill_with_timeout(skill_name, input_data)
                self._add_trace_event(invocation_id, "Skill execution succeeded", {"output_data": output_data})

                self._validate_outputs(skill_name, output_data)
                self._add_trace_event(invocation_id, "Output validation succeeded")
                break  # Success, exit retry loop

            except (SkillExecutionError, SkillExecutionTimeout) as e:
                error = e
                logs.append(f"Attempt {attempt + 1} failed: {e}")
                self._add_trace_event(invocation_id, f"Attempt {attempt + 1} failed", {"error": str(e)})

                if attempt < self.max_retries:
                    time.sleep(self.retry_delay * (2 ** attempt))  # Exponential backoff
                    logging.warning(f"Retrying skill '{skill_name}' (attempt {attempt + 2}/{self.max_retries + 1})")
                else:
                    logging.error(f"Skill '{skill_name}' failed after {self.max_retries + 1} attempts: {e}")

            except Exception as e:
                error = e
                logs.append(f"Unexpected error during skill execution: {e}")
                self._add_trace_event(invocation_id, f"Unexpected error during skill execution: {e}")
                logging.exception(f"Unexpected error during skill execution of '{skill_name}': {e}")
                break # No retry for unexpected errors (e.g., coding errors)

        end_time = time.time()
        execution_time = end_time - start_time

        if error and skill_name in self.fallback_skills:
            fallback_skill_name = self.fallback_skills[skill_name]
            logging.info(f"Falling back to skill '{fallback_skill_name}' for invocation {invocation_id}")
            self._add_trace_event(invocation_id, f"Falling back to skill '{fallback_skill_name}'")

            fallback_invocation = SkillInvocation(fallback_skill_name, input_data, invocation_id=invocation_id)
            fallback_result = self.execute_skill(fallback_invocation) # Recursive call to execute the fallback

            # Combine logs and update error if the fallback also failed
            logs.extend(fallback_result.logs)
            if fallback_result.error:
                error = fallback_result.error
            else:
                output_data = fallback_result.output_data # Use fallback output if successful

        result = SkillResult(invocation_id, skill_name, output_data, error, execution_time, logs)
        self._add_trace_event(invocation_id, "Skill execution finished", {"result": str(result)})

        self._update_metrics(skill_name, execution_time, error)

        # 2. Store in Cache
        if self.enable_cache and error is None: # Only cache successful results
            with self.cache_lock:
                self.skill_cache[(skill_name, input_hash)] = result
                logging.info(f"Skill '{skill_name}' result cached (invocation_id: {invocation_id}).")


        return result


    def execute_skills_parallel(self, invocations: List[SkillInvocation]) -> List[SkillResult]:
        """Executes multiple skills in parallel using a thread pool."""
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = {executor.submit(self.execute_skill, invocation): invocation for invocation in invocations}
            results = []
            for future in concurrent.futures.as_completed(futures): # Collect results as they complete.
                results.append(future.result())
        return results

    def _add_trace_event(self, invocation_id: str, event_name: str, event_data: Optional[Dict[str, Any]] = None) -> None:
       """Adds a trace event to the execution trace."""
       if invocation_id not in self.execution_traces:
           self.execution_traces[invocation_id] = []
       self.execution_traces[invocation_id].append({"timestamp": time.time(), "event": event_name, "data": event_data})

    def get_trace(self, invocation_id: str) -> Optional[List[Dict[str, Any]]]:
        """Retrieves the execution trace for a given invocation ID."""
        return self.execution_traces.get(invocation_id)


    def _update_metrics(self, skill_name: str, execution_time: float, error: Optional[Exception]) -> None:
        """Updates performance metrics for the skill."""
        if skill_name not in self.metrics:
            self.metrics[skill_name] = {
                "execution_count": 0,
                "total_execution_time": 0.0,
                "average_execution_time": 0.0,
                "error_count": 0
            }

        self.metrics[skill_name]["execution_count"] += 1
        self.metrics[skill_name]["total_execution_time"] += execution_time
        self.metrics[skill_name]["average_execution_time"] = (
            self.metrics[skill_name]["total_execution_time"] / self.metrics[skill_name]["execution_count"]
        )
        if error:
            self.metrics[skill_name]["error_count"] += 1


    def get_metrics(self, skill_name: Optional[str] = None) -> Union[Dict[str, Dict[str, Union[int, float]]], Dict[str, Union[int, float]], None]:
        """Retrieves performance metrics for a specific skill or all skills."""
        if skill_name:
            return self.metrics.get(skill_name)
        else:
            return self.metrics


class SkillOrchestrator:
    """Orchestrates the execution of skills based on dependencies and a defined workflow."""

    def __init__(self, skill_registry: SkillRegistry, skill_executor: SkillExecutor):
        self.skill_registry = skill_registry
        self.skill_executor = skill_executor

    def execute_workflow(self, workflow_definition: Dict[str, Any], initial_input: Dict[str, Any]) -> Dict[str, Any]:
        """Executes a workflow defined in a DSL."""

        results: Dict[str, SkillResult] = {}  # Store results by invocation ID
        invocation_queue: deque[Tuple[SkillInvocation, List[str]]] = deque() # Queue of skills to execute, along with their dependencies
        final_output: Dict[str, Any] = {}

        # 1. Parse Workflow Definition and Create Initial Invocations
        for step_name, step_definition in workflow_definition.items():
            skill_name = step_definition["skill"]
            input_template = step_definition["input"]
            dependencies = self.skill_registry.get_dependencies(skill_name)  # Get dependencies from the registry
            invocation_id = str(uuid.uuid4())

            # Resolve input template with initial input
            try:
                input_data = self._resolve_input_template(input_template, initial_input, results)
            except Exception as e:
                 raise ValueError(f"Error resolving input template for skill '{skill_name}': {e}")

            invocation = SkillInvocation(skill_name, input_data, invocation_id=invocation_id)
            invocation_queue.append((invocation, dependencies))

        # 2. Execute Skills based on Dependency Resolution
        while invocation_queue:
            invocation, dependencies = invocation_queue.popleft()
            skill_name = invocation.skill_name
            invocation_id = invocation.invocation_id

            # Check if dependencies are met
            dependencies_met = True
            for dep_skill_name in dependencies:
                dep_invocation_id = self._find_invocation_id_by_skill_name(workflow_definition, dep_skill_name)
                if dep_invocation_id not in results:
                    dependencies_met = False
                    break

            if dependencies_met:
                # Execute the skill
                logging.info(f"Executing skill '{skill_name}' (invocation_id: {invocation_id})")
                skill_result = self.skill_executor.execute_skill(invocation)
                results[invocation_id] = skill_result

                if skill_result.error:
                   logging.error(f"Skill '{skill_name}' failed (invocation_id: {invocation_id}): {skill_result.error}")
                   # Handle error - potentially stop execution or use fallback

            else:
                # Re-queue the invocation if dependencies are not met
                logging.info(f"Skill '{skill_name}' dependencies not met (invocation_id: {invocation_id}). Re-queueing.")
                invocation_queue.append((invocation, dependencies))

        # 3. Construct Final Output
        for step_name, step_definition in workflow_definition.items():
            skill_name = step_definition["skill"]
            invocation_id = self._find_invocation_id_by_skill_name(workflow_definition, skill_name)
            if invocation_id in results:
               output_field = step_definition.get("output_field", step_name) # Default output field is the step name
               output_data = results[invocation_id].output_data
               final_output[output_field] = output_data

        return final_output

    def _resolve_input_template(self, input_template: Dict[str, Any], initial_input: Dict[str, Any], results: Dict[str, SkillResult]) -> Dict[str, Any]:
        """Resolves the input template by substituting values from initial input and skill results."""
        resolved_input: Dict[str, Any] = {}
        for key, value in input_template.items():
            if isinstance(value, str) and value.startswith("$"):
                # Value refers to a previous skill result or initial input
                reference = value[1:]  # Remove the "$"
                parts = reference.split(".")
                if len(parts) == 2:
                    # Reference to a skill result (skill_name.output_field)
                    referenced_skill_name = parts[0]
                    output_field = parts[1]
                    invocation_id = self._find_invocation_id_by_skill_name(self.get_workflow_definition(), referenced_skill_name)

                    if invocation_id in results and results[invocation_id].output_data and output_field in results[invocation_id].output_data:
                        resolved_input[key] = results[invocation_id].output_data[output_field]
                    else:
                        raise ValueError(f"Could not resolve skill result reference: {reference}")
                else:
                    # Reference to initial input
                    if reference in initial_input:
                        resolved_input[key] = initial_input[reference]
                    else:
                        raise ValueError(f"Could not resolve initial input reference: {reference}")
            else:
                # Literal value
                resolved_input[key] = value
        return resolved_input

    def _find_invocation_id_by_skill_name(self, workflow_definition: Dict[str, Any], skill_name: str) -> Optional[str]:
        """Finds the invocation ID for a skill name in the workflow definition."""
        for step_name, step_definition in workflow_definition.items():
            if step_definition["skill"] == skill_name:
               return step_definition.get("invocation_id")
        return None

    def get_workflow_definition(self) -> Dict[str, Any]:
        """Returns the workflow definition being used by the orchestrator."""
        return self._workflow_definition

    def set_workflow_definition(self, workflow_definition: Dict[str, Any]) -> None:
        """Sets the workflow definition for the orchestrator."""
        self._workflow_definition = workflow_definition

# Example Usage
if __name__ == '__main__':

    # 1. Create a SkillRegistry
    skill_registry = SkillRegistry()

    # 2. Define some skills
    def add(x: int, y: int) -> Dict[str, int]:
        """A simple addition skill."""
        time.sleep(0.1)  # Simulate some work
        return {"result": x + y}

    def multiply(x: int, y: int) -> Dict[str, int]:
        """A simple multiplication skill."""
        time.sleep(0.2)
        return {"result": x * y}

    def square(x: int) -> Dict[str, int]:
        """A simple squaring skill."""
        time.sleep(0.15)
        return {"result": x * x}

    # 3. Define input and output validators (optional)
    def validate_add_input(input_data: Dict[str, Any]) -> bool:
        """Validates the input for the add skill."""
        return isinstance(input_data.get("x"), int) and isinstance(input_data.get("y"), int)

    def validate_add_output(output_data: Dict[str, Any]) -> bool:
        """Validates the output for the add skill."""
        return isinstance(output_data.get("result"), int)

    # 4. Register the skills with the registry
    skill_registry.register_skill("add", add, input_validator=validate_add_input, output_validator=validate_add_output)
    skill_registry.register_skill("multiply", multiply, dependencies=["add"])  # Multiply depends on add
    skill_registry.register_skill("square", square)

    # 5. Create a SkillExecutor
    executor = SkillExecutor(skill_registry, max_retries=2, retry_delay=0.5, timeout=1.0, enable_cache=True)

    # 6. Create a SkillOrchestrator
    orchestrator = SkillOrchestrator(skill_registry, executor)

    # 7. Define a workflow using a DSL
    workflow_definition = {
        "add_step": {
            "skill": "add",
            "input": {"x": 5, "y": 3}
        },
        "multiply_step": {
            "skill": "multiply",
            "input": {"x": 2, "y": "$add_step.result"}  # Multiply depends on the result of add
        },
        "square_step": {
            "skill": "square",
            "input": {"x": "$multiply_step.result"} # Square depends on the result of multiply
        }
    }

    orchestrator.set_workflow_definition(workflow_definition) # Set the workflow definition

    # 8. Execute the workflow
    initial_input = {}  # No initial input in this example
    try:
        final_output = orchestrator.execute_workflow(workflow_definition, initial_input)
        print(f"Final Output: {final_output}")
    except Exception as e:
        print(f"Workflow execution failed: {e}")

    # Example with initial input
    workflow_definition_with_initial_input = {
        "add_step": {
            "skill": "add",
            "input": {"x": "$initial_x", "y": "$initial_y"}
        },
        "multiply_step": {
            "skill": "multiply",
            "input": {"x": 2, "y": "$add_step.result"}  # Multiply depends on the result of add
        }
    }

    orchestrator.set_workflow_definition(workflow_definition_with_initial_input) # Set the workflow definition

    initial_input = {"initial_x": 10, "initial_y": 5}
    try:
        final_output_with_initial_input = orchestrator.execute_workflow(workflow_definition_with_initial_input, initial_input)
        print(f"Final Output with initial input: {final_output_with_initial_input}")
    except Exception as e:
        print(f"Workflow execution failed: {e}")

    # 9. Access execution traces and metrics (example)
    add_invocation_id = orchestrator._find_invocation_id_by_skill_name(workflow_definition, "add")
    if add_invocation_id:
        trace = executor.get_trace(add_invocation_id)
        print(f"Execution trace for 'add': {trace}")

    metrics = executor.get_metrics()
    print(f"All metrics: {metrics}")
