#!/usr/bin/env python3
"""
Extract Voice+Memory MVP (ClawdTalk) artifacts from swarm JSONL results.
Filters for V-* story IDs and writes artifacts to Sunaiva/voice-memory/swarm_generated/
"""

import json
import os
import re
from pathlib import Path
from typing import Dict, List, Tuple

# Paths
REPO_ROOT = Path("/mnt/e/genesis-system")
KIMI_RESULTS = REPO_ROOT / "hive/swarm_results/kimi_results.jsonl"
MINIMAX_RESULTS = REPO_ROOT / "hive/swarm_results/minimax_results.jsonl"
OUTPUT_BASE = REPO_ROOT / "Sunaiva/voice-memory/swarm_generated"

# Create output directories
(OUTPUT_BASE / "server").mkdir(parents=True, exist_ok=True)
(OUTPUT_BASE / "marketing").mkdir(parents=True, exist_ok=True)
(OUTPUT_BASE / "tests").mkdir(parents=True, exist_ok=True)
(OUTPUT_BASE / "deploy").mkdir(parents=True, exist_ok=True)
(OUTPUT_BASE / "docs").mkdir(parents=True, exist_ok=True)


def extract_code_blocks(text: str) -> List[Tuple[str, str, str]]:
    """
    Extract code blocks from markdown text.
    Returns list of (language, filename_hint, code) tuples.
    """
    if not text:
        return []

    # Pattern: ```language [filename]\ncode\n```
    pattern = r'```(\w+)(?:\s+([^\n]+))?\n(.*?)```'
    matches = re.findall(pattern, text, re.DOTALL)

    blocks = []
    for lang, filename_hint, code in matches:
        blocks.append((lang, filename_hint.strip(), code.strip()))

    return blocks


def determine_file_path(story_id: str, story_title: str, lang: str, filename_hint: str) -> Path:
    """
    Determine where to save the artifact based on story metadata.
    """
    title_lower = story_title.lower()

    # Use filename hint if provided
    if filename_hint and '/' not in filename_hint:
        base_name = filename_hint
    else:
        # Generate from story title
        base_name = story_title.replace(" ", "_").replace("-", "_").lower()
        base_name = re.sub(r'[^\w_.]', '', base_name)

        # Add appropriate extension
        if lang in ['python', 'py']:
            base_name += '.py'
        elif lang in ['javascript', 'js', 'typescript', 'ts']:
            base_name += '.js'
        elif lang in ['sql']:
            base_name += '.sql'
        elif lang in ['dockerfile', 'docker']:
            base_name = 'Dockerfile'
        elif lang in ['yaml', 'yml']:
            base_name += '.yml'
        elif lang in ['json']:
            base_name += '.json'
        elif lang in ['markdown', 'md']:
            base_name += '.md'
        elif lang in ['html']:
            base_name += '.html'
        elif lang in ['css']:
            base_name += '.css'
        else:
            base_name += f'.{lang}'

    # Determine subdirectory
    if 'test' in title_lower or 'e2e' in title_lower:
        subdir = OUTPUT_BASE / "tests"
    elif any(x in title_lower for x in ['marketing', 'landing', 'pricing', 'copy', 'email']):
        subdir = OUTPUT_BASE / "marketing"
    elif any(x in title_lower for x in ['deploy', 'docker', 'config', 'nginx']):
        subdir = OUTPUT_BASE / "deploy"
    elif any(x in title_lower for x in ['schema', 'database', 'migration']):
        subdir = OUTPUT_BASE / "server"
    else:
        # Default to server for code
        if lang in ['python', 'py', 'javascript', 'js', 'typescript', 'ts']:
            subdir = OUTPUT_BASE / "server"
        else:
            subdir = OUTPUT_BASE / "docs"

    return subdir / base_name


def process_jsonl_file(filepath: Path, team: str) -> List[Dict]:
    """
    Read JSONL file and extract V-* stories.
    """
    artifacts = []

    if not filepath.exists():
        print(f"⚠️  File not found: {filepath}")
        return artifacts

    with open(filepath, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue

            try:
                record = json.loads(line)
            except json.JSONDecodeError as e:
                print(f"⚠️  JSON decode error at line {line_num} in {filepath.name}: {e}")
                continue

            story_id = record.get('story_id', '')

            # Filter for V-* stories
            if not story_id.startswith('V-'):
                continue

            # Skip failed stories with no response
            if record.get('status') == 'fail' or not record.get('response'):
                print(f"⚠️  Skipping {story_id} ({record.get('story_title', 'Unknown')}): {record.get('error', 'No response')}")
                continue

            artifacts.append({
                'story_id': story_id,
                'story_title': record.get('story_title', 'Unknown'),
                'team': team,
                'response': record.get('response', ''),
                'status': record.get('status', 'unknown'),
            })

    return artifacts


def save_artifact(story_id: str, story_title: str, response: str) -> List[str]:
    """
    Extract code blocks from response and save to appropriate files.
    Returns list of created file paths.
    """
    created_files = []

    # Extract code blocks
    code_blocks = extract_code_blocks(response)

    if not code_blocks:
        # No code blocks, save raw response as markdown
        file_path = OUTPUT_BASE / "docs" / f"{story_id}_{story_title.replace(' ', '_')}.md"
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(f"# {story_title}\n\n**Story ID**: {story_id}\n\n{response}\n")
        created_files.append(str(file_path.relative_to(REPO_ROOT)))
        return created_files

    # Save each code block
    for idx, (lang, filename_hint, code) in enumerate(code_blocks, 1):
        # Determine file path
        if idx == 1 and not filename_hint:
            # First block gets the main story filename
            file_path = determine_file_path(story_id, story_title, lang, filename_hint)
        else:
            # Subsequent blocks or explicit filenames
            if filename_hint:
                # Use explicit filename
                file_path = determine_file_path(story_id, story_title, lang, filename_hint)
            else:
                # Add index to avoid collisions
                file_path = determine_file_path(story_id, f"{story_title}_{idx}", lang, filename_hint)

        # Write file
        file_path.parent.mkdir(parents=True, exist_ok=True)
        with open(file_path, 'w', encoding='utf-8') as f:
            # Add header comment
            header = f"# Generated from swarm story {story_id}: {story_title}\n"
            if lang in ['python', 'py']:
                f.write(f'"""{header}"""\n\n')
            elif lang in ['javascript', 'js', 'typescript', 'ts']:
                f.write(f'/* {header} */\n\n')
            elif lang in ['sql']:
                f.write(f'-- {header}\n\n')
            elif lang in ['html', 'markdown', 'md']:
                f.write(f'<!-- {header} -->\n\n')

            f.write(code)
            f.write('\n')

        created_files.append(str(file_path.relative_to(REPO_ROOT)))

    return created_files


def main():
    print("🔍 Extracting Voice+Memory MVP artifacts from swarm results...\n")

    # Process both JSONL files
    kimi_artifacts = process_jsonl_file(KIMI_RESULTS, "KIMI")
    minimax_artifacts = process_jsonl_file(MINIMAX_RESULTS, "MINIMAX")

    all_artifacts = kimi_artifacts + minimax_artifacts

    print(f"✅ Found {len(all_artifacts)} successful V-* stories:")
    print(f"   - KIMI (code): {len(kimi_artifacts)}")
    print(f"   - MINIMAX (content): {len(minimax_artifacts)}\n")

    if not all_artifacts:
        print("⚠️  No artifacts to extract. Exiting.")
        return

    # Extract and save artifacts
    index_entries = []
    total_files = 0

    for artifact in all_artifacts:
        story_id = artifact['story_id']
        story_title = artifact['story_title']
        team = artifact['team']
        response = artifact['response']

        print(f"📦 Processing {story_id}: {story_title} ({team})")

        created_files = save_artifact(story_id, story_title, response)
        total_files += len(created_files)

        # Add to index
        index_entries.append({
            'story_id': story_id,
            'story_title': story_title,
            'team': team,
            'files': created_files,
        })

        print(f"   ✓ Created {len(created_files)} file(s)")

    # Write INDEX.md
    print(f"\n📝 Writing INDEX.md...")
    index_path = OUTPUT_BASE / "INDEX.md"
    with open(index_path, 'w', encoding='utf-8') as f:
        f.write("# Voice+Memory MVP (ClawdTalk) - Swarm Generated Artifacts\n\n")
        f.write(f"**Generated**: {Path(__file__).name}\n")
        f.write(f"**Source**: hive/swarm_results/{{kimi,minimax}}_results.jsonl\n")
        f.write(f"**Filter**: Story IDs starting with V-*\n")
        f.write(f"**Total Artifacts**: {len(all_artifacts)}\n")
        f.write(f"**Total Files**: {total_files}\n\n")

        f.write("---\n\n")

        # Group by team
        f.write("## KIMI Artifacts (Code)\n\n")
        for entry in index_entries:
            if entry['team'] == 'KIMI':
                f.write(f"### {entry['story_id']}: {entry['story_title']}\n\n")
                for file_path in entry['files']:
                    f.write(f"- `{file_path}`\n")
                f.write("\n")

        f.write("## MINIMAX Artifacts (Content)\n\n")
        for entry in index_entries:
            if entry['team'] == 'MINIMAX':
                f.write(f"### {entry['story_id']}: {entry['story_title']}\n\n")
                for file_path in entry['files']:
                    f.write(f"- `{file_path}`\n")
                f.write("\n")

    print(f"✅ INDEX.md written to {index_path.relative_to(REPO_ROOT)}")

    # Summary
    print(f"\n{'='*60}")
    print(f"✅ EXTRACTION COMPLETE")
    print(f"{'='*60}")
    print(f"📊 Total V-* stories processed: {len(all_artifacts)}")
    print(f"📁 Total files created: {total_files}")
    print(f"📍 Output directory: {OUTPUT_BASE.relative_to(REPO_ROOT)}")
    print(f"📄 Index: {index_path.relative_to(REPO_ROOT)}")
    print(f"{'='*60}\n")


if __name__ == '__main__':
    main()
