#!/usr/bin/env python3
"""
Test script to verify SWARM_MISSIONS.md parsing without making API calls.
"""

import sys
sys.path.append('/mnt/e/genesis-system')

# Import just the parsing functions from openrouter_swarm
import re
from typing import List, Optional
from dataclasses import dataclass

@dataclass
class Story:
    """Represents an atomic story from SWARM_MISSIONS.md."""
    story_id: str
    title: str
    role: str
    need: str
    benefit: str
    acceptance_criteria: List[str]
    black_box_tests: str
    white_box_tests: str
    estimated_tokens: int
    model: str
    prd_name: str


def parse_single_story(story_text: str, prd_name: str) -> Optional[Story]:
    """Parse a single story from text."""
    try:
        # Extract story ID and title
        title_match = re.search(r'####\s+([\w-]+):\s+(.+)', story_text)
        if not title_match:
            return None

        story_id = title_match.group(1)
        title = title_match.group(2).strip()

        # Extract user story components
        role_match = re.search(r'\*\*As a\*\*\s+(.+?),', story_text)
        need_match = re.search(r'\*\*I need\*\*\s+(.+?),\s+\*\*so that\*\*', story_text)
        benefit_match = re.search(r'\*\*so that\*\*\s+(.+?)\.', story_text)

        if not (role_match and need_match and benefit_match):
            return None

        role = role_match.group(1).strip()
        need = need_match.group(1).strip()
        benefit = benefit_match.group(1).strip()

        # Extract acceptance criteria
        criteria_match = re.search(r'\*\*Acceptance Criteria\*\*:\n(.*?)\*\*Black Box Tests\*\*:', story_text, re.DOTALL)
        acceptance_criteria = []
        if criteria_match:
            criteria_text = criteria_match.group(1)
            # Extract bullet points
            for line in criteria_text.split('\n'):
                line = line.strip()
                if line.startswith('-'):
                    acceptance_criteria.append(line[1:].strip())

        # Extract tests
        black_box_match = re.search(r'\*\*Black Box Tests\*\*:\s+(.+?)\n\*\*White Box Tests\*\*:', story_text, re.DOTALL)
        white_box_match = re.search(r'\*\*White Box Tests\*\*:\s+(.+?)\n\*\*Estimated Tokens\*\*:', story_text, re.DOTALL)

        black_box_tests = black_box_match.group(1).strip() if black_box_match else ""
        white_box_tests = white_box_match.group(1).strip() if white_box_match else ""

        # Extract estimated tokens
        tokens_match = re.search(r'\*\*Estimated Tokens\*\*:\s+([\d,]+)', story_text)
        estimated_tokens = int(tokens_match.group(1).replace(',', '')) if tokens_match else 5000

        # Extract model
        model_match = re.search(r'\*\*Model\*\*:\s+(\w+)', story_text)
        model = model_match.group(1) if model_match else "Kimi"

        return Story(
            story_id=story_id,
            title=title,
            role=role,
            need=need,
            benefit=benefit,
            acceptance_criteria=acceptance_criteria,
            black_box_tests=black_box_tests,
            white_box_tests=white_box_tests,
            estimated_tokens=estimated_tokens,
            model=model,
            prd_name=prd_name
        )

    except Exception as e:
        print(f"WARNING: Failed to parse story: {e}")
        return None


def parse_missions_file(file_path: str) -> List[Story]:
    """Parse SWARM_MISSIONS.md and extract all atomic stories."""
    stories = []

    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Find current PRD context
    # Format: # PRD 1: SUNAIVA MEMORY VAULT (Ship in 6hrs)
    prd_pattern = re.compile(r'^# PRD \d+:\s+([^\(]+)', re.MULTILINE)

    # Split content by PRDs to track context
    current_prd = "Unknown PRD"
    lines = content.split('\n')
    current_story_text = []
    in_story = False

    for i, line in enumerate(lines):
        # Track PRD context
        prd_match = prd_pattern.match(line)
        if prd_match:
            current_prd = prd_match.group(1)

        # Check if we're starting a story
        if line.startswith('####'):
            if in_story and current_story_text:
                # Process previous story
                story_text = '\n'.join(current_story_text)
                parsed_story = parse_single_story(story_text, current_prd)
                if parsed_story:
                    stories.append(parsed_story)

            # Start new story
            current_story_text = [line]
            in_story = True
        elif in_story:
            current_story_text.append(line)

            # Check if we've reached the end of the story
            if line.strip().startswith('---') and i > 0 and lines[i-1].strip().startswith('**Model**:'):
                story_text = '\n'.join(current_story_text)
                parsed_story = parse_single_story(story_text, current_prd)
                if parsed_story:
                    stories.append(parsed_story)
                current_story_text = []
                in_story = False

    # Process last story if any
    if in_story and current_story_text:
        story_text = '\n'.join(current_story_text)
        parsed_story = parse_single_story(story_text, current_prd)
        if parsed_story:
            stories.append(parsed_story)

    return stories


if __name__ == "__main__":
    missions_file = "/mnt/e/genesis-system/hive/SWARM_MISSIONS.md"

    print("=" * 80)
    print("SWARM MISSIONS PARSER TEST")
    print("=" * 80)
    print(f"Parsing: {missions_file}\n")

    stories = parse_missions_file(missions_file)

    print(f"✅ Successfully parsed {len(stories)} stories\n")

    # Group by model
    kimi_stories = [s for s in stories if s.model.lower() == "kimi"]
    minimax_stories = [s for s in stories if s.model.lower() == "minimax"]

    print(f"Model breakdown:")
    print(f"  Kimi: {len(kimi_stories)}")
    print(f"  MiniMax: {len(minimax_stories)}")
    print(f"  Total: {len(stories)}\n")

    # Group by PRD
    prd_breakdown = {}
    for story in stories:
        if story.prd_name not in prd_breakdown:
            prd_breakdown[story.prd_name] = 0
        prd_breakdown[story.prd_name] += 1

    print("PRD breakdown:")
    for prd_name, count in sorted(prd_breakdown.items()):
        print(f"  {prd_name}: {count} stories")
    print()

    # Show first 5 stories
    print("First 5 stories:")
    for i, story in enumerate(stories[:5], 1):
        print(f"\n{i}. {story.story_id}: {story.title}")
        print(f"   PRD: {story.prd_name}")
        print(f"   Model: {story.model}")
        print(f"   Role: {story.role}")
        print(f"   Acceptance Criteria: {len(story.acceptance_criteria)} items")
        print(f"   Estimated Tokens: {story.estimated_tokens:,}")

    # Calculate total estimated tokens
    total_tokens = sum(s.estimated_tokens for s in stories)
    avg_tokens = total_tokens / len(stories) if stories else 0

    print(f"\n{'=' * 80}")
    print("TOKEN ESTIMATES")
    print("=" * 80)
    print(f"Total estimated tokens: {total_tokens:,}")
    print(f"Average per story: {avg_tokens:,.0f}")
    print(f"Estimated cost @ $1/MTok: ${total_tokens / 1_000_000:.2f}")
    print("=" * 80)
