import os
import re
import json
import yaml
import toml
import dotenv
import logging
import argparse
from typing import Dict, List, Set, Any

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class ConfigMapper:
    """
    Maps configuration files and their relationships to code files.
    """

    def __init__(self, root_dir: str):
        """
        Initializes the ConfigMapper with a root directory to scan.

        Args:
            root_dir: The root directory to scan for configuration and code files.
        """
        self.root_dir = root_dir
        self.config_files: Dict[str, str] = {}  # {filepath: filetype}
        self.code_files: List[str] = []
        self.config_usage: Dict[str, List[str]] = {} # {config_filepath: [code_filepaths]}
        self.config_variables: Dict[str, Set[str]] = {} # {config_filepath: {variables}}
        self.variable_usage: Dict[str, List[str]] = {} # {variable: [code_filepaths]}

    def scan_files(self) -> None:
        """
        Scans the root directory for configuration and code files.
        """
        logging.info("Scanning files...")
        for root, _, files in os.walk(self.root_dir):
            for file in files:
                filepath = os.path.join(root, file)
                if file.endswith(('.env', '.json', '.yaml', '.yml', '.toml')):
                    filetype = file.split('.')[-1]
                    self.config_files[filepath] = filetype
                elif file.endswith(('.py', '.js', '.java', '.go', '.c', '.cpp', '.cs')): # Add more code file extensions if needed
                    self.code_files.append(filepath)
        logging.info(f"Found {len(self.config_files)} config files and {len(self.code_files)} code files.")

    def parse_config_files(self) -> None:
        """
        Parses the configuration files and extracts variables.
        """
        logging.info("Parsing config files...")
        for filepath, filetype in self.config_files.items():
            try:
                if filetype == 'env':
                    self.config_variables[filepath] = self._parse_env(filepath)
                elif filetype == 'json':
                    self.config_variables[filepath] = self._parse_json(filepath)
                elif filetype in ('yaml', 'yml'):
                    self.config_variables[filepath] = self._parse_yaml(filepath)
                elif filetype == 'toml':
                    self.config_variables[filepath] = self._parse_toml(filepath)
            except Exception as e:
                logging.error(f"Error parsing config file {filepath}: {e}")
                self.config_variables[filepath] = set()  # Ensure entry exists even on failure

    def _parse_env(self, filepath: str) -> Set[str]:
        """Parses .env files and returns a set of variable names."""
        try:
            dotenv.load_dotenv(dotenv_path=filepath)
            return set(os.environ.keys())
        except Exception as e:
            logging.error(f"Error parsing .env file {filepath}: {e}")
            return set()

    def _parse_json(self, filepath: str) -> Set[str]:
        """Parses .json files and returns a set of variable names (keys)."""
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
                return set(data.keys())
        except Exception as e:
            logging.error(f"Error parsing JSON file {filepath}: {e}")
            return set()

    def _parse_yaml(self, filepath: str) -> Set[str]:
        """Parses .yaml files and returns a set of variable names (keys)."""
        try:
            with open(filepath, 'r') as f:
                data = yaml.safe_load(f)
                if isinstance(data, dict):
                    return set(data.keys())
                else:
                    logging.warning(f"YAML file {filepath} does not contain a dictionary at the root.")
                    return set()
        except Exception as e:
            logging.error(f"Error parsing YAML file {filepath}: {e}")
            return set()

    def _parse_toml(self, filepath: str) -> Set[str]:
        """Parses .toml files and returns a set of variable names (keys)."""
        try:
            with open(filepath, 'r') as f:
                data = toml.load(f)
                return set(data.keys())
        except Exception as e:
            logging.error(f"Error parsing TOML file {filepath}: {e}")
            return set()

    def map_config_usage(self) -> None:
        """
        Maps which code files use which configuration files and variables.
        """
        logging.info("Mapping config usage...")
        for code_file in self.code_files:
            try:
                with open(code_file, 'r', encoding='utf-8') as f:
                    code = f.read()
                    for config_file, variables in self.config_variables.items():
                        for variable in variables:
                            # Simple regex for finding variable usage in code.  Can be improved.
                            if re.search(r'\b' + re.escape(variable) + r'\b', code):
                                if config_file not in self.config_usage:
                                    self.config_usage[config_file] = []
                                self.config_usage[config_file].append(code_file)

                                if variable not in self.variable_usage:
                                    self.variable_usage[variable] = []
                                self.variable_usage[variable].append(code_file)
            except Exception as e:
                logging.error(f"Error processing code file {code_file}: {e}")

    def validate_config_completeness(self) -> None:
        """
        Validates configuration completeness by checking if all used variables are defined.
        """
        logging.info("Validating config completeness...")
        undefined_variables: Dict[str, List[str]] = {}
        for code_file in self.code_files:
            try:
                with open(code_file, 'r', encoding='utf-8') as f:
                    code = f.read()
                    # Improved regex to find potential variable usages (e.g., os.getenv("MY_VAR"), config["my_var"])
                    matches = re.findall(r'[\'"]([A-Z_]+)[\'"]', code) # Matches only uppercase variables
                    for match in matches:
                        found = False
                        for config_file, variables in self.config_variables.items():
                            if match in variables:
                                found = True
                                break
                        if not found:
                            if code_file not in undefined_variables:
                                undefined_variables[code_file] = []
                            undefined_variables[code_file].append(match)
            except Exception as e:
                logging.error(f"Error processing code file {code_file}: {e}")

        if undefined_variables:
            logging.warning("Potential undefined variables found:")
            for code_file, variables in undefined_variables.items():
                logging.warning(f"  {code_file}: {variables}")
        else:
            logging.info("No potential undefined variables found.")

    def generate_report(self) -> Dict[str, Any]:
        """
        Generates a report of the config mapping.

        Returns:
            A dictionary containing the config mapping report.
        """
        report = {
            "config_files": self.config_files,
            "code_files": self.code_files,
            "config_usage": self.config_usage,
            "config_variables": self.config_variables,
            "variable_usage": self.variable_usage
        }
        return report

    def run(self) -> None:
        """
        Runs the config mapper.
        """
        self.scan_files()
        self.parse_config_files()
        self.map_config_usage()
        self.validate_config_completeness()
        logging.info("Config mapper run complete.")


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Map configuration files and their relationships to code files.')
    parser.add_argument('root_dir', help='The root directory to scan.')
    args = parser.parse_args()

    mapper = ConfigMapper(args.root_dir)
    mapper.run()

    report = mapper.generate_report()
    # You can print the report or save it to a file.
    # For example:
    # print(json.dumps(report, indent=4))
    logging.info("Report generation complete.  Check logs for details and consider printing report to JSON.")
