"""
Generates a dependency graph of Python modules within a specified directory.

This script analyzes Python files to identify import statements, including
relative imports and external dependencies. It constructs a graph
representing these dependencies and can detect circular dependencies
and missing imports. The graph can be output in Graphviz dot format.
"""

import os
import ast
import logging
import re
from typing import List, Dict, Tuple, Set

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


class DependencyGraphGenerator:
    """
    Generates a dependency graph from Python files.
    """

    def __init__(self, root_directory: str):
        """
        Initializes the DependencyGraphGenerator.

        Args:
            root_directory: The root directory to scan for Python files.
        """
        self.root_directory = root_directory
        self.dependencies: Dict[str, List[str]] = {}
        self.all_modules: Set[str] = set()

    def find_python_files(self) -> List[str]:
        """
        Finds all Python files within the root directory.

        Returns:
            A list of absolute paths to Python files.
        """
        python_files = []
        for root, _, files in os.walk(self.root_directory):
            for file in files:
                if file.endswith(".py"):
                    python_files.append(os.path.join(root, file))
        return python_files

    def extract_imports(self, file_path: str) -> List[str]:
        """
        Extracts import statements from a Python file.

        Args:
            file_path: The path to the Python file.

        Returns:
            A list of imported module names.
        """
        imports: List[str] = []
        try:
            with open(file_path, "r", encoding="utf-8") as file:
                tree = ast.parse(file.read())

            for node in ast.walk(tree):
                if isinstance(node, ast.Import):
                    for alias in node.names:
                        imports.append(alias.name)
                elif isinstance(node, ast.ImportFrom):
                    module_name = node.module
                    if module_name:
                        imports.append(module_name)
        except FileNotFoundError:
            logging.error(f"File not found: {file_path}")
        except SyntaxError as e:
            logging.error(f"Syntax error in {file_path}: {e}")
        except Exception as e:
            logging.error(f"Error processing {file_path}: {e}")
        return imports

    def normalize_module_name(self, module_name: str, file_path: str) -> str:
        """
        Normalizes the module name to a full path for internal modules.

        Args:
            module_name: The imported module name.
            file_path: The path of the file containing the import.

        Returns:
            The normalized module name (full path if internal, original name if external).
        """
        if module_name.startswith("."):  # Handle relative imports
            # Calculate the absolute path of the imported module
            base_dir = os.path.dirname(file_path)
            parts = os.path.normpath(os.path.join(base_dir, module_name)).split(os.sep)
            try:
                index = parts.index(os.path.basename(self.root_directory))
                normalized_path = os.sep.join(parts[index:])
            except ValueError:
                logging.warning(f"Could not normalize relative import {module_name} from {file_path}.  Keeping relative path.")
                return module_name # Keep the relative path if it can't be normalized.
            
            normalized_module_name = os.path.splitext(normalized_path)[0].replace(os.sep, ".")
            return normalized_module_name

        # Check if the module exists within the root directory
        potential_path = os.path.join(self.root_directory, *module_name.split(".")) + ".py"
        if os.path.exists(potential_path):
            return module_name

        return module_name  # External dependency

    def build_dependency_graph(self) -> None:
        """
        Builds the dependency graph by analyzing Python files.
        """
        python_files = self.find_python_files()
        for file_path in python_files:
            module_name = self.get_module_name(file_path)
            self.all_modules.add(module_name)
            imports = self.extract_imports(file_path)
            normalized_imports = [self.normalize_module_name(imp, file_path) for imp in imports]
            self.dependencies[module_name] = normalized_imports
            for imp in normalized_imports:
                self.all_modules.add(imp)

    def get_module_name(self, file_path: str) -> str:
        """
        Gets the module name from the file path.

        Args:
            file_path: The path to the Python file.

        Returns:
            The module name (e.g., core.knowledge.dependency_graph).
        """
        relative_path = os.path.relpath(file_path, self.root_directory)
        module_name = os.path.splitext(relative_path)[0].replace(os.sep, ".")
        return module_name

    def find_circular_dependencies(self) -> List[List[str]]:
        """
        Finds circular dependencies in the dependency graph.

        Returns:
            A list of circular dependency paths.
        """
        visited: Set[str] = set()
        recursion_stack: List[str] = []
        circular_dependencies: List[List[str]] = []

        def dfs(node: str, path: List[str]):
            visited.add(node)
            recursion_stack.append(node)

            for neighbor in self.dependencies.get(node, []):
                if neighbor in recursion_stack:
                    cycle = recursion_stack[recursion_stack.index(neighbor):]
                    circular_dependencies.append(cycle)
                elif neighbor not in visited and neighbor in self.all_modules:
                    dfs(neighbor, path + [neighbor])

            recursion_stack.remove(node)

        for module in self.all_modules:
            if module not in visited:
                dfs(module, [module])

        return circular_dependencies

    def find_missing_imports(self) -> List[Tuple[str, str]]:
        """
        Finds missing imports in the dependency graph.

        Returns:
            A list of tuples containing the module and the missing import.
        """
        missing_imports: List[Tuple[str, str]] = []
        for module, imports in self.dependencies.items():
            for imp in imports:
                if imp not in self.all_modules and not self.is_stdlib(imp):
                    missing_imports.append((module, imp))
        return missing_imports

    def is_stdlib(self, module_name: str) -> bool:
        """
        Checks if a module is part of the Python standard library.  This is a basic check and may not be exhaustive.

        Args:
            module_name: The module name to check.

        Returns:
            True if the module is likely part of the standard library, False otherwise.
        """
        # This is a heuristic and might need refinement.
        stdlib_modules = [
            "os", "sys", "math", "datetime", "json", "re", "typing", "logging", "ast", "unittest",
            "collections", "io", "random", "threading", "multiprocessing", "socket", "http", "urllib",
            "venv", "asyncio", "concurrent", "functools", "itertools", "contextlib", "enum", "dataclasses",
            "pathlib", "shutil", "tempfile", "subprocess", "signal", "select", "fcntl", "termios", "pwd",
            "grp", "spwd", "resource", "time", "calendar", "locale", "gettext", "codecs", "encodings",
            "bz2", "gzip", "zipfile", "tarfile", "csv", "configparser", "email", "mimetypes", "base64",
            "hashlib", "hmac", "secrets", "uuid", "ipaddress", "socketserver", "ssl", "asyncore", "cgitb",
            "cgi", "wsgiref", "xml", "html", "webbrowser", "turtledemo", "tkinter", "unittest", "pdb",
            "profile", "pstats", "trace", "doctest", "venv", "ensurepip", "zipapp", "compileall", "dis",
            "pickle", "copyreg", "shelve", "marshal", "dbm", "sqlite3", "zlib", "readline", "rlcompleter",
            "nntplib", "ftplib", "poplib", "imaplib", "smtplib", "telnetlib", "uuid", "abc", "types",
            "weakref", "operator", "copy", "pprint", "reprlib", "textwrap", "difflib", "platform",
            "warnings", "gc", "inspect", "imp", "symtable", "traceback", "atexit", "argparse", "getopt",
            "io", "errno", "fcntl", "select", "signal", "mmap"
        ]
        return module_name in stdlib_modules

    def generate_dot_file(self, output_path: str) -> None:
        """
        Generates a Graphviz dot file representing the dependency graph.

        Args:
            output_path: The path to save the dot file.
        """
        try:
            with open(output_path, "w", encoding="utf-8") as f:
                f.write("digraph DependencyGraph {\n")
                for module, imports in self.dependencies.items():
                    for imp in imports:
                        f.write(f'    "{module}" -> "{imp}";\n')
                f.write("}\n")
            logging.info(f"Dependency graph saved to {output_path}")
        except Exception as e:
            logging.error(f"Error generating dot file: {e}")


if __name__ == "__main__":
    # Example usage:
    root_directory = "/mnt/e/genesis-system"  # Replace with the actual root directory
    generator = DependencyGraphGenerator(root_directory)
    generator.build_dependency_graph()

    circular_dependencies = generator.find_circular_dependencies()
    if circular_dependencies:
        logging.warning(f"Circular dependencies found: {circular_dependencies}")
    else:
        logging.info("No circular dependencies found.")

    missing_imports = generator.find_missing_imports()
    if missing_imports:
        logging.warning(f"Missing imports found: {missing_imports}")
    else:
        logging.info("No missing imports found.")

    generator.generate_dot_file("dependency_graph.dot")
