import json
import yaml
import os
import logging
from typing import List, Dict, Tuple, Optional

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class TaskExtractor:
    """
    Extracts task definitions from JSON/YAML files, catalogs them,
    builds a dependency graph, and identifies orphaned/blocked tasks.
    """

    def __init__(self):
        """
        Initializes the TaskExtractor.
        """
        self.tasks: Dict[str, Dict] = {}
        self.dependency_graph: Dict[str, List[str]] = {}

    def load_tasks_from_file(self, filepath: str) -> None:
        """
        Loads task definitions from a JSON or YAML file.

        Args:
            filepath: The path to the task file.
        """
        try:
            with open(filepath, 'r') as f:
                if filepath.endswith('.json'):
                    tasks = json.load(f)
                elif filepath.endswith('.yaml') or filepath.endswith('.yml'):
                    tasks = yaml.safe_load(f)
                else:
                    logging.error(f"Unsupported file type: {filepath}")
                    return

            if not isinstance(tasks, list):
                logging.error(f"Expected a list of tasks in {filepath}, but found {type(tasks)}")
                return

            for task in tasks:
                self.add_task(task)

        except FileNotFoundError:
            logging.error(f"File not found: {filepath}")
        except json.JSONDecodeError:
            logging.error(f"Failed to decode JSON in: {filepath}")
        except yaml.YAMLError as e:
            logging.error(f"Failed to decode YAML in: {filepath} - {e}")
        except Exception as e:
            logging.exception(f"An unexpected error occurred while loading tasks from {filepath}: {e}")


    def add_task(self, task: Dict) -> None:
        """
        Adds a task to the task catalog and updates the dependency graph.

        Args:
            task: A dictionary representing the task definition.
        """
        try:
            task_id = task.get('task_id')
            if not task_id:
                logging.warning(f"Task has no task_id: {task}")
                return

            if task_id in self.tasks:
                logging.warning(f"Duplicate task_id: {task_id}")
                return

            self.tasks[task_id] = task
            self.dependency_graph[task_id] = task.get('dependencies', []) or [] # Handle None or empty list

        except Exception as e:
            logging.exception(f"Error adding task {task}: {e}")


    def build_dependency_graph(self) -> None:
        """
        Builds a task dependency graph based on task dependencies.
        """
        # Already built while adding tasks
        pass


    def identify_orphaned_tasks(self) -> List[str]:
        """
        Identifies tasks that have no dependencies and are not depended on by other tasks.

        Returns:
            A list of task IDs that are orphaned.
        """
        orphaned_tasks: List[str] = []
        all_dependencies = []
        for dependencies in self.dependency_graph.values():
            all_dependencies.extend(dependencies)

        for task_id in self.tasks:
            if task_id not in self.dependency_graph and task_id not in all_dependencies:
                orphaned_tasks.append(task_id)
            elif task_id not in all_dependencies and not any(task_id in deps for deps in self.dependency_graph.values()):
                orphaned_tasks.append(task_id)
        return orphaned_tasks


    def identify_blocked_tasks(self) -> List[str]:
        """
        Identifies tasks that have dependencies that do not exist.

        Returns:
            A list of task IDs that are blocked.
        """
        blocked_tasks: List[str] = []
        for task_id, dependencies in self.dependency_graph.items():
            for dependency in dependencies:
                if dependency not in self.tasks:
                    blocked_tasks.append(task_id)
                    break  # Only add the task once if multiple dependencies are missing
        return blocked_tasks


    def extract_task_details(self, task_id: str) -> Optional[Dict]:
        """
        Extracts details for a specific task.

        Args:
            task_id: The ID of the task to extract.

        Returns:
            A dictionary containing the task details, or None if the task is not found.
        """
        return self.tasks.get(task_id)


    def process_directory(self, directory: str) -> None:
        """
        Processes all JSON and YAML files in a directory.

        Args:
            directory: The directory to process.
        """
        try:
            for filename in os.listdir(directory):
                if filename.endswith('.json') or filename.endswith('.yaml') or filename.endswith('.yml'):
                    filepath = os.path.join(directory, filename)
                    self.load_tasks_from_file(filepath)
        except FileNotFoundError:
            logging.error(f"Directory not found: {directory}")
        except Exception as e:
            logging.exception(f"Error processing directory {directory}: {e}")


    def get_all_tasks(self) -> Dict[str, Dict]:
        """
        Returns all loaded tasks.

        Returns:
            A dictionary of all tasks.
        """
        return self.tasks


    def get_dependency_graph(self) -> Dict[str, List[str]]:
        """
        Returns the dependency graph.

        Returns:
            A dictionary representing the dependency graph.
        """
        return self.dependency_graph


if __name__ == '__main__':
    # Example Usage:
    extractor = TaskExtractor()
    example_dir = '/mnt/e/genesis-system/example_tasks'  # Create this directory and add some task files

    # Create example directory and files if they don't exist
    if not os.path.exists(example_dir):
        os.makedirs(example_dir)
        with open(os.path.join(example_dir, 'task1.json'), 'w') as f:
            json.dump([{'task_id': 'task1', 'description': 'First task'}, {'task_id': 'task2', 'description': 'Second Task', 'dependencies': ['task1']}], f)
        with open(os.path.join(example_dir, 'task2.yaml'), 'w') as f:
            yaml.dump([{'task_id': 'task3', 'description': 'Third task', 'dependencies': ['task2']}, {'task_id': 'task4', 'description': 'Fourth Task'}], f)

    extractor.process_directory(example_dir)

    all_tasks = extractor.get_all_tasks()
    print("All Tasks:", all_tasks)

    dependency_graph = extractor.get_dependency_graph()
    print("Dependency Graph:", dependency_graph)

    orphaned_tasks = extractor.identify_orphaned_tasks()
    print("Orphaned Tasks:", orphaned_tasks)

    blocked_tasks = extractor.identify_blocked_tasks()
    print("Blocked Tasks:", blocked_tasks)

    task_details = extractor.extract_task_details('task1')
    print("Task Details for task1:", task_details)
