import os
import re
import logging
import argparse
from typing import List, Tuple, Dict

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class CredentialFinder:
    """
    A class to find and catalog credential references within a codebase.
    """

    def __init__(self):
        """
        Initializes the CredentialFinder with regex patterns for identifying
        API keys, password variables, and connection strings.
        """
        self.api_key_patterns = [
            re.compile(r"(API_KEY|APIKEY|api_key|apikey)\s*=\s*[\"']?([A-Za-z0-9_-]+)[\"']?"),
            re.compile(r"sk-[a-zA-Z0-9]{48}"),  # OpenAI-like API Keys
        ]
        self.password_patterns = [
            re.compile(r"(PASSWORD|Password|password)\s*=\s*[\"']?([A-Za-z0-9_-]+)[\"']?"),
            re.compile(r"(PASS|Pass|pass)\s*=\s*[\"']?([A-Za-z0-9_-]+)[\"']?"),
        ]
        self.connection_string_patterns = [
            re.compile(r"(DATABASE_URL|DATABASEURL|database_url|databaseurl)\s*=\s*[\"']?(.*?)[\"']?"),
            re.compile(r"(DB_CONNECTION_STRING|DBCONNECTIONSTRING|db_connection_string|dbconnectionstring)\s*=\s*[\"']?(.*?)[\"']?"),
            re.compile(r"(REDIS_URL|REDISURL|redis_url|redisurl)\s*=\s*[\"']?(.*?)[\"']?"),
            re.compile(r"(QDRANT_URL|QDRANTURL|qdrant_url|qdranturl)\s*=\s*[\"']?(.*?)[\"']?"),
        ]

    def find_credential_references(self, filepath: str) -> List[Dict[str, str]]:
        """
        Finds credential references within a given file.

        Args:
            filepath: The path to the file to analyze.

        Returns:
            A list of dictionaries, where each dictionary represents a credential
            reference found in the file. Each dictionary contains the following keys:
            - "source_file": The path to the file where the reference was found.
            - "line_number": The line number where the reference was found.
            - "env_var_name": The name of the environment variable (if applicable).
            - "type": The type of credential (e.g., "API_KEY", "PASSWORD", "CONNECTION_STRING").
            - "value": The matched value (if applicable).
        """
        results: List[Dict[str, str]] = []

        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                for line_number, line in enumerate(f, 1):
                    # API Key patterns
                    for pattern in self.api_key_patterns:
                        match = pattern.search(line)
                        if match:
                            try:
                                env_var_name = match.group(1).strip()
                                value = match.group(2).strip() if len(match.groups()) > 1 else None
                                results.append({
                                    "source_file": filepath,
                                    "line_number": str(line_number),
                                    "env_var_name": env_var_name,
                                    "type": "API_KEY",
                                    "value": value,
                                })
                                logging.info(f"Found API Key in {filepath}:{line_number}")
                            except IndexError:
                                logging.warning(f"IndexError while processing API Key match in {filepath}:{line_number}")

                    # Password patterns
                    for pattern in self.password_patterns:
                        match = pattern.search(line)
                        if match:
                            try:
                                env_var_name = match.group(1).strip()
                                value = match.group(2).strip() if len(match.groups()) > 1 else None
                                results.append({
                                    "source_file": filepath,
                                    "line_number": str(line_number),
                                    "env_var_name": env_var_name,
                                    "type": "PASSWORD",
                                    "value": value,
                                })
                                logging.info(f"Found Password in {filepath}:{line_number}")
                            except IndexError:
                                logging.warning(f"IndexError while processing Password match in {filepath}:{line_number}")

                    # Connection String patterns
                    for pattern in self.connection_string_patterns:
                        match = pattern.search(line)
                        if match:
                            try:
                                env_var_name = match.group(1).strip()
                                value = match.group(2).strip() if len(match.groups()) > 1 else None
                                results.append({
                                    "source_file": filepath,
                                    "line_number": str(line_number),
                                    "env_var_name": env_var_name,
                                    "type": "CONNECTION_STRING",
                                    "value": value,
                                })
                                logging.info(f"Found Connection String in {filepath}:{line_number}")
                            except IndexError:
                                logging.warning(f"IndexError while processing Connection String match in {filepath}:{line_number}")

        except FileNotFoundError:
            logging.error(f"File not found: {filepath}")
        except Exception as e:
            logging.error(f"Error processing file {filepath}: {e}")

        return results

    def scan_directory(self, directory: str) -> List[Dict[str, str]]:
        """
        Scans a directory recursively for credential references.

        Args:
            directory: The path to the directory to scan.

        Returns:
            A list of dictionaries, where each dictionary represents a credential
            reference found in the directory.
        """
        all_results: List[Dict[str, str]] = []
        for root, _, files in os.walk(directory):
            for file in files:
                filepath = os.path.join(root, file)
                all_results.extend(self.find_credential_references(filepath))
        return all_results

    def generate_security_audit_report(self, results: List[Dict[str, str]], output_file: str = "security_audit_report.txt") -> None:
        """
        Generates a security audit report from the found credential references.

        Args:
            results: A list of dictionaries, where each dictionary represents a credential
                reference found in the codebase.
            output_file: The name of the file to write the report to.
        """
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write("SECURITY AUDIT REPORT\n")
                f.write("----------------------\n\n")
                for result in results:
                    f.write(f"Source File: {result['source_file']}\n")
                    f.write(f"Line Number: {result['line_number']}\n")
                    f.write(f"Type: {result['type']}\n")
                    f.write(f"Environment Variable Name: {result['env_var_name']}\n")
                    if result.get('value'):
                      f.write(f"Value: {result['value']}\n")
                    f.write("\n")
            logging.info(f"Security audit report generated at {output_file}")
        except Exception as e:
            logging.error(f"Error generating security audit report: {e}")

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Credential Finder Tool")
    parser.add_argument("directory", help="The directory to scan for credential references.")
    parser.add_argument("--output", help="The output file for the security audit report.", default="security_audit_report.txt")
    args = parser.parse_args()

    finder = CredentialFinder()
    results = finder.scan_directory(args.directory)
    finder.generate_security_audit_report(results, args.output)
