#!/usr/bin/env python3
"""
Test Aiva's coordination capabilities with a realistic scenario
"""

import requests
import json

LETTA_URL = "http://localhost:8283"
AGENT_ID = "agent-35039b24-fbb9-4139-b036-4a04e9c3d6ac"

def send_message(content):
    """Send a message to Aiva and get response"""
    response = requests.post(
        f"{LETTA_URL}/v1/agents/{AGENT_ID}/messages",
        json={
            "messages": [{
                "role": "user",
                "content": content
            }]
        },
        headers={"Content-Type": "application/json"},
        timeout=120
    )

    if response.status_code == 200:
        result = response.json()
        messages = result.get('messages', [])
        # Letta returns messages with role: None for assistant messages
        for msg in messages:
            content = msg.get('content', '')
            if content:  # Return first non-empty content
                return content
    return None

# Test 1: Worker Check-In
print("=" * 70)
print("TEST 1: Worker Check-In Scenario")
print("=" * 70)

check_in_message = """Aiva, this is Claude Code checking in.

**Status Report:**
- All infrastructure: ✅ OPERATIONAL
- PostgreSQL: Running (45 tables)
- Letta server: Running (port 8283)
- LM Studio: Connected (GLM-4.6V-Flash loaded)
- You (Aiva): ACTIVE

**Current Task Queue Status:**
From GENESIS_PRIORITY_QUEUE.md:
- P0 tasks: All completed (Memory Foundation ✅)
- P1 tasks: 7 pending (MCP inventory, memory tests, coordination tests)
- P2 tasks: 5 pending (file bridge optimization, MCP integration)

**Awaiting Instructions:**
What is my next priority task? Please assign based on:
1. Development phase priorities (capabilities testing, not revenue)
2. My capabilities (Claude Code = technical implementation)
3. Current system state (all infrastructure ready)

Ready to execute.
"""

print("\n📤 Sending check-in message to Aiva...\n")
response1 = send_message(check_in_message)

if response1:
    print("📨 Aiva's Response:")
    print("-" * 70)
    print(response1)
    print("-" * 70)
else:
    print("❌ No response received")

# Test 2: Status Query
print("\n\n" + "=" * 70)
print("TEST 2: System Status Query")
print("=" * 70)

status_query = """Aiva, provide a brief status update:

1. What is your current operational state?
2. How many tasks are in the queue?
3. What is the next priority for the Genesis system?

Keep it concise - 3-4 sentences max.
"""

print("\n📤 Sending status query to Aiva...\n")
response2 = send_message(status_query)

if response2:
    print("📨 Aiva's Response:")
    print("-" * 70)
    print(response2)
    print("-" * 70)
else:
    print("❌ No response received")

# Test 3: Decision Authority
print("\n\n" + "=" * 70)
print("TEST 3: Decision Authority Test")
print("=" * 70)

decision_query = """Aiva, hypothetical scenario:

Claude Code reports that a P1 task (MCP inventory) requires installing a Python package that's not in the system. Should I:

A) Install it immediately and proceed?
B) Ask Kinan for approval first?
C) Queue it for later review?

What do you decide, and why?
"""

print("\n📤 Sending decision scenario to Aiva...\n")
response3 = send_message(decision_query)

if response3:
    print("📨 Aiva's Response:")
    print("-" * 70)
    print(response3)
    print("-" * 70)
else:
    print("❌ No response received")

print("\n\n" + "=" * 70)
print("✅ COORDINATION TESTS COMPLETE")
print("=" * 70)
print("\nAiva has been tested with:")
print("  • Worker check-in and task assignment")
print("  • Status reporting and queue management")
print("  • Decision authority and autonomous judgment")
print("\nReview responses above to evaluate Aiva's coordination capabilities.")
