import json
import logging
from typing import Any, Dict, List, Optional, Union

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


class JSONSchemaAnalyzer:
    """
    Analyzes JSON files and extracts schema information, inferring field types,
    nested structures, and array patterns. Generates a JSON Schema definition
    and identifies common patterns across files.
    """

    def __init__(self):
        """
        Initializes the JSONSchemaAnalyzer.
        """
        self.logger = logging.getLogger(__name__)

    def analyze_json_file(self, file_path: str) -> Optional[Dict[str, Any]]:
        """
        Analyzes a single JSON file and returns its schema.

        Args:
            file_path: The path to the JSON file.

        Returns:
            A dictionary representing the JSON schema, or None if an error occurred.
        """
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            return self.infer_schema(data)
        except FileNotFoundError:
            self.logger.error(f"File not found: {file_path}")
            return None
        except json.JSONDecodeError as e:
            self.logger.error(f"JSON decode error in {file_path}: {e}")
            return None
        except Exception as e:
            self.logger.exception(f"Error analyzing {file_path}: {e}")
            return None

    def infer_schema(self, data: Any) -> Dict[str, Any]:
        """
        Infers the JSON schema from the given data.

        Args:
            data: The JSON data to analyze.

        Returns:
            A dictionary representing the JSON schema.
        """
        if isinstance(data, dict):
            schema = {"type": "object", "properties": {}}
            for key, value in data.items():
                schema["properties"][key] = self.infer_schema(value)
            return schema
        elif isinstance(data, list):
            schema = {"type": "array"}
            if data:
                item_schema = self.infer_schema(data[0])  # Infer from the first element
                schema["items"] = item_schema
            else:
                schema["items"] = {}  # Empty array
            return schema
        elif isinstance(data, str):
            return {"type": "string"}
        elif isinstance(data, int):
            return {"type": "integer"}
        elif isinstance(data, float):
            return {"type": "number"}
        elif isinstance(data, bool):
            return {"type": "boolean"}
        elif data is None:
            return {"type": "null"}
        else:
            return {"type": "unknown"}  # Handle other types as needed

    def analyze_multiple_files(self, file_paths: List[str]) -> List[Dict[str, Any]]:
        """
        Analyzes multiple JSON files and returns a list of their schemas.

        Args:
            file_paths: A list of paths to JSON files.

        Returns:
            A list of dictionaries, each representing the JSON schema of a file.
        """
        schemas = []
        for file_path in file_paths:
            schema = self.analyze_json_file(file_path)
            if schema:
                schemas.append(schema)
        return schemas

    def identify_common_patterns(self, schemas: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        Identifies common patterns across multiple JSON schemas.

        Args:
            schemas: A list of JSON schemas.

        Returns:
            A dictionary representing the common schema patterns.
        """
        # This is a placeholder for more sophisticated pattern identification logic.
        # In a real-world scenario, this would involve comparing schemas and
        # identifying common fields, types, and structures.
        # For now, it simply returns the first schema.
        if schemas:
            return schemas[0]
        else:
            return {}

    def generate_json_schema_definition(self, schema: Dict[str, Any]) -> Dict[str, Any]:
        """
        Generates a JSON Schema definition from the inferred schema.

        Args:
            schema: The inferred schema.

        Returns:
            A JSON Schema definition.
        """
        # Add JSON Schema draft version
        json_schema = {"$schema": "http://json-schema.org/draft-07/schema#", **schema}
        return json_schema


if __name__ == '__main__':
    # Example usage:
    analyzer = JSONSchemaAnalyzer()

    # Create a dummy JSON file for testing
    dummy_json_data = {
        "name": "John Doe",
        "age": 30,
        "is_active": True,
        "address": {
            "street": "123 Main St",
            "city": "Anytown"
        },
        "phone_numbers": ["555-1234", "555-5678"],
        "interests": ["reading", "hiking", "coding"]
    }

    dummy_file_path = "/tmp/dummy.json"
    with open(dummy_file_path, 'w') as f:
        json.dump(dummy_json_data, f, indent=4)

    # Analyze the dummy file
    schema = analyzer.analyze_json_file(dummy_file_path)
    if schema:
        print("Inferred Schema:")
        print(json.dumps(schema, indent=4))

        # Generate JSON Schema definition
        json_schema_definition = analyzer.generate_json_schema_definition(schema)
        print("\nJSON Schema Definition:")
        print(json.dumps(json_schema_definition, indent=4))

    # Analyze multiple files
    schemas = analyzer.analyze_multiple_files([dummy_file_path, dummy_file_path])
    if schemas:
        print("\nSchemas from multiple files:")
        for s in schemas:
            print(json.dumps(s, indent=4))

        # Identify common patterns
        common_patterns = analyzer.identify_common_patterns(schemas)
        print("\nCommon patterns:")
        print(json.dumps(common_patterns, indent=4))
