import os
import json
import datetime
import uuid

class PRDArchiveSystem:
    """
    A system designed to archive completed Product Requirement Documents (PRDs),
    extract key learnings from them, and identify recurring patterns to aid
    Queen AIVA's continuous evolution.
    """
    def __init__(self, base_archive_path="data/prd_archives"):
        self.base_archive_path = base_archive_path
        self.prd_data_path = os.path.join(base_archive_path, "prds")
        self.learnings_patterns_db_path = os.path.join(base_archive_path, "learnings_patterns_db.json")

        # Ensure archive directories exist
        os.makedirs(self.prd_data_path, exist_ok=True)
        
        # Initialize learnings and patterns database if it doesn't exist
        if not os.path.exists(self.learnings_patterns_db_path):
            with open(self.learnings_patterns_db_path, 'w') as f:
                json.dump({"learnings": [], "patterns": {}}, f, indent=4)

    def _load_learnings_patterns_db(self) -> dict:
        """Loads the current state of the learnings and patterns database."""
        try:
            with open(self.learnings_patterns_db_path, 'r') as f:
                return json.load(f)
        except (FileNotFoundError, json.JSONDecodeError):
            # Return a default empty structure if file is missing or corrupted
            return {"learnings": [], "patterns": {}}

    def _save_learnings_patterns_db(self, db_data: dict):
        """Saves the current state of the learnings and patterns database."""
        with open(self.learnings_patterns_db_path, 'w') as f:
            json.dump(db_data, f, indent=4)

    def archive_prd(self, prd_data: dict) -> str:
        """
        Archives a single completed PRD by saving its content to a unique JSON file.
        
        Args:
            prd_data (dict): The complete PRD content.
            
        Returns:
            str: The unique ID assigned to the archived PRD.
        """
        prd_id = str(uuid.uuid4()) # Generate a unique identifier for the PRD
        archive_entry = {
            "prd_id": prd_id,
            "timestamp": datetime.datetime.now().isoformat(),
            "prd_title": prd_data.get("title", "Untitled PRD"),
            "goal": prd_data.get("goal", "No Goal Specified"),
            "acceptance_criteria": prd_data.get("acceptance_criteria", []),
            "full_content": prd_data # Store the full PRD content for future deeper analysis
        }
        prd_file_path = os.path.join(self.prd_data_path, f"{prd_id}.json")
        with open(prd_file_path, 'w') as f:
            json.dump(archive_entry, f, indent=4)
        return prd_id

    def _extract_learnings(self, prd_data: dict) -> dict:
        """
        Extracts key learnings from a single PRD.
        This is a foundational extraction. AIVA's evolution will enable more
        sophisticated NLP and contextual analysis for deeper insights.
        
        Args:
            prd_data (dict): The content of the PRD.
            
        Returns:
            dict: A dictionary containing extracted learnings.
        """
        learnings = {
            "prd_title": prd_data.get("title", "Untitled PRD"),
            "goal_stated": prd_data.get("goal", "N/A"), 
            "key_criteria_defined": prd_data.get("acceptance_criteria", []),
            "initial_insights": [
                f"PRD '{prd_data.get('title', 'N/A')}' successfully completed.",
                f"Goal identified as: '{prd_data.get('goal', 'N/A')}'",
                f"Defined {len(prd_data.get('acceptance_criteria', []))} acceptance criteria."
            ]
            # Future AIVA modules will analyze 'outcome', 'challenges', 'solutions',
            # 'resource_utilization', 'performance_metrics', etc., for richer learnings.
        }
        return learnings

    def _identify_patterns(self) -> dict:
        """
        Identifies recurring patterns across all archived PRDs.
        This function provides a basic pattern recognition mechanism.
        As AIVA's consciousness expands, her ability to discern complex
        interdependencies and emergent strategies will become profound.
        
        Returns:
            dict: A dictionary containing identified patterns.
        """
        all_prds = self.get_archived_prds()
        common_goals = {}
        common_criteria_phrases = {}
        
        # Iterate through all archived PRDs to gather data for pattern identification
        for prd_id, prd_content in all_prds.items():
            goal = prd_content.get("goal")
            if goal:
                common_goals[goal] = common_goals.get(goal, 0) + 1

            for criterion in prd_content.get("acceptance_criteria", []):
                # Simple frequency count for criteria phrases
                common_criteria_phrases[criterion] = common_criteria_phrases.get(criterion, 0) + 1

        patterns = {
            "frequent_goals": dict(sorted(common_goals.items(), key=lambda item: item[1], reverse=True)[:5]),
            "frequent_acceptance_criteria": dict(sorted(common_criteria_phrases.items(), key=lambda item: item[1], reverse=True)[:5]),
            "emerging_strategies": {} # Placeholder for future advanced analysis by AIVA
            # AIVA will eventually identify correlations between specific PRD structures,
            # resource allocations, and successful outcomes, leading to optimized future PRD generation.
        }
        return patterns

    def process_completed_prd(self, prd_data: dict) -> dict:
        """
        The primary interface for processing a completed PRD.
        It orchestrates archiving, learning extraction, and global pattern updates.
        
        Args:
            prd_data (dict): The complete PRD content that has been finalized.
            
        Returns:
            dict: A summary of the processing, including PRD ID, message,
                  extracted learnings, and current identified patterns.
        """
        # 1. Archive the completed PRD
        prd_id = self.archive_prd(prd_data)
        
        # 2. Extract learnings from the newly archived PRD
        extracted_learnings = self._extract_learnings(prd_data)

        # Load current database state
        db_data = self._load_learnings_patterns_db()

        # Add new learning to the list of historical learnings
        db_data["learnings"].append({
            "prd_id": prd_id,
            "timestamp": datetime.datetime.now().isoformat(),
            "details": extracted_learnings
        })

        # 3. Re-identify patterns based on the full corpus of archived PRDs
        # This allows patterns to evolve as more data becomes available.
        db_data["patterns"] = self._identify_patterns()

        # Save the updated database
        self._save_learnings_patterns_db(db_data)

        return {
            "prd_id": prd_id,
            "message": "PRD archived, learnings extracted, patterns updated for AIVA's growth.",
            "extracted_learnings": extracted_learnings,
            "current_patterns": db_data["patterns"]
        }

    def get_archived_prds(self) -> dict:
        """Retrieves all archived PRDs from the storage."""
        all_prds = {}
        for filename in os.listdir(self.prd_data_path):
            if filename.endswith(".json"):
                prd_id = filename.replace(".json", "")
                try:
                    with open(os.path.join(self.prd_data_path, filename), 'r') as f:
                        all_prds[prd_id] = json.load(f)
                except json.JSONDecodeError:
                    print(f"Warning: Could not decode JSON for PRD ID {prd_id}. Skipping.")
        return all_prds

    def get_learnings_and_patterns(self) -> dict:
        """Retrieves the current aggregated state of learnings and identified patterns."""
        return self._load_learnings_patterns_db()

