#!/usr/bin/env python3
"""
Module: secrets_scanner.py

This module provides functionality for scanning files and directories for potential exposed secrets.
It uses regular expressions to identify common secret patterns and can be configured to ignore specific files or directories.
"""

import os
import re
import logging
import argparse
from typing import List, Optional, Tuple, Pattern


# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


class SecretsScanner:
    """
    A class for scanning files and directories for potential exposed secrets.
    """

    DEFAULT_PATTERNS = {
        "API Key": r"[a-zA-Z0-9_-]{32,45}",
        "AWS Access Key ID": r"AKIA[0-9A-Z]{16}",
        "AWS Secret Access Key": r"[\w/=+-]{40}",
        "Azure Storage Account Key": r"DefaultEndpointsProtocol=https;AccountName=[a-z0-9]+;AccountKey=[a-zA-Z0-9/+]{88}==;EndpointSuffix=core.windows.net",
        "Google Cloud API Key": r"AIza[0-9A-Za-z_-]{35}",
        "Slack Token": r"xoxb-\d+-\d+-\w+",
        "GitHub Token": r"(?:ghp|gho|ghu|ghs)_[a-zA-Z0-9]{36,255}",
    }

    def __init__(self, patterns: Optional[dict] = None, ignore_paths: Optional[List[str]] = None):
        """
        Initializes the SecretsScanner with custom patterns and ignore paths.

        Args:
            patterns (dict, optional): A dictionary of custom regular expression patterns to use for scanning.
                                       The keys are descriptions of the secret type, and the values are regular expression strings.
                                       Defaults to DEFAULT_PATTERNS.
            ignore_paths (List[str], optional): A list of file or directory paths to ignore during scanning.
                                               Defaults to None.
        """
        self.patterns = patterns if patterns is not None else self.DEFAULT_PATTERNS
        self.ignore_paths = ignore_paths if ignore_paths is not None else []
        self.compiled_patterns = {
            name: re.compile(pattern) for name, pattern in self.patterns.items()
        }

    def scan_file(self, file_path: str) -> List[Tuple[str, str, int]]:
        """
        Scans a single file for potential exposed secrets.

        Args:
            file_path (str): The path to the file to scan.

        Returns:
            List[Tuple[str, str, int]]: A list of tuples, where each tuple contains the secret type,
                                           the matched secret, and the line number where it was found.
                                           Returns an empty list if no secrets are found or if an error occurs.
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()

            findings = []
            for i, line in enumerate(lines):
                for name, pattern in self.compiled_patterns.items():
                    match = pattern.search(line)
                    if match:
                        secret = match.group(0)
                        findings.append((name, secret, i + 1))
            return findings

        except FileNotFoundError:
            logging.error(f"File not found: {file_path}")
            return []
        except IOError as e:
            logging.error(f"Error reading file {file_path}: {e}")
            return []

    def scan_directory(self, directory_path: str) -> List[Tuple[str, str, str, int]]:
        """
        Scans a directory recursively for potential exposed secrets.

        Args:
            directory_path (str): The path to the directory to scan.

        Returns:
            List[Tuple[str, str, str, int]]: A list of tuples, where each tuple contains the file path,
                                               the secret type, the matched secret, and the line number where it was found.
                                               Returns an empty list if no secrets are found or if an error occurs.
        """
        findings = []
        for root, _, files in os.walk(directory_path):
            for file in files:
                file_path = os.path.join(root, file)

                # Check if the path is in the ignore list
                if any(os.path.normpath(path) in os.path.normpath(file_path) for path in self.ignore_paths):
                    logging.debug(f"Skipping ignored path: {file_path}")
                    continue

                file_findings = self.scan_file(file_path)
                for secret_type, secret, line_number in file_findings:
                    findings.append((file_path, secret_type, secret, line_number))
        return findings

    def scan_path(self, path: str) -> List[Tuple[str, str, str, int]]:
        """
        Scans a file or directory for potential exposed secrets.

        Args:
            path (str): The path to the file or directory to scan.

        Returns:
            List[Tuple[str, str, str, int]]: A list of tuples, where each tuple contains the file path (or directory path if scanning a single file),
                                               the secret type, the matched secret, and the line number where it was found.
                                               Returns an empty list if no secrets are found or if an error occurs.
        """
        if os.path.isfile(path):
            findings = []
            for secret_type, secret, line_number in self.scan_file(path):
                findings.append((path, secret_type, secret, line_number))
            return findings
        elif os.path.isdir(path):
            return self.scan_directory(path)
        else:
            logging.error(f"Invalid path: {path}")
            return []

def main():
    """
    Main function to parse command-line arguments and run the secrets scanner.
    """
    parser = argparse.ArgumentParser(description='Scan files or directories for potential exposed secrets.')
    parser.add_argument('path', help='The path to the file or directory to scan.')
    parser.add_argument('-i', '--ignore', nargs='+', help='List of paths to ignore (files or directories).', default=[])
    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose logging (debug level).')

    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    scanner = SecretsScanner(ignore_paths=args.ignore)
    findings = scanner.scan_path(args.path)

    if findings:
        print("Potential secrets found:")
        for file_path, secret_type, secret, line_number in findings:
            print(f"  File: {file_path}")
            print(f"  Type: {secret_type}")
            print(f"  Secret: {secret}")
            print(f"  Line: {line_number}")
            print("-" * 20)
    else:
        print("No secrets found.")


if __name__ == "__main__":
    main()