#!/usr/bin/env python3
"""
Genesis Comprehensive Test Runner
==================================
Runs all test suites and generates comprehensive report.
"""

import sys
import os
from datetime import datetime, timezone
import json

sys.path.insert(0, '/mnt/e/genesis-system/genesis-memory')

# Results tracking
results = {
    "timestamp": datetime.now(timezone.utc).isoformat(),
    "suites": {},
    "total_tests": 0,
    "passed": 0,
    "failed": 0,
    "errors": []
}


def run_suite(name: str, test_func) -> dict:
    """Run a test suite and return results."""
    suite_results = {
        "tests": 0,
        "passed": 0,
        "failed": 0,
        "details": []
    }

    try:
        test_func(suite_results)
    except Exception as e:
        suite_results["failed"] += 1
        suite_results["details"].append(f"Suite error: {str(e)}")
        results["errors"].append(f"{name}: {str(e)}")

    results["suites"][name] = suite_results
    results["total_tests"] += suite_results["tests"]
    results["passed"] += suite_results["passed"]
    results["failed"] += suite_results["failed"]

    return suite_results


def test_voi_scoring(r):
    """VoI Scoring tests."""
    from intelligence.voi_scoring import get_voi_scorer, OutcomeType

    scorer = get_voi_scorer()

    # Test 1: Basic calculation
    r["tests"] += 1
    score = scorer.calculate_voi("t1", "test", datetime.now(timezone.utc).isoformat(), 0.8, 0.7)
    if 0 <= score.total_score <= 1:
        r["passed"] += 1
        r["details"].append("PASS: Basic VoI calculation")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Invalid VoI score")

    # Test 2: Temporal decay
    r["tests"] += 1
    from datetime import timedelta
    old = (datetime.now(timezone.utc) - timedelta(days=7)).isoformat()
    old_score = scorer.calculate_voi("t2", "old", old, 0.5)
    if old_score.recency_score < 0.5:
        r["passed"] += 1
        r["details"].append("PASS: Temporal decay working")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Decay not applied")

    # Test 3: Outcome tracking
    r["tests"] += 1
    scorer.record_outcome("t3", OutcomeType.SUCCESS, "test")
    if "t3" in scorer.outcome_history:
        r["passed"] += 1
        r["details"].append("PASS: Outcome tracking")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Outcome not recorded")

    # Test 4: Batch scoring
    r["tests"] += 1
    memories = [{"id": "b1", "content": "t", "created_at": datetime.now(timezone.utc).isoformat(), "importance": 0.9}]
    batch = scorer.score_batch(memories)
    if len(batch) == 1:
        r["passed"] += 1
        r["details"].append("PASS: Batch scoring")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Batch scoring failed")


def test_self_improvement(r):
    """Self-improvement loop tests."""
    from agents.self_improvement_loop import get_improvement_loop

    loop = get_improvement_loop()

    # Test 1: Capability assessment
    r["tests"] += 1
    caps = loop.assess_capabilities()
    if len(caps) > 0:
        r["passed"] += 1
        r["details"].append(f"PASS: Assessed {len(caps)} capabilities")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: No capabilities assessed")

    # Test 2: Opportunity identification
    r["tests"] += 1
    opps = loop.identify_opportunities()
    r["passed"] += 1
    r["details"].append(f"PASS: Found {len(opps)} opportunities")

    # Test 3: Cycle execution
    r["tests"] += 1
    report = loop.run_cycle()
    if "capabilities_assessed" in report:
        r["passed"] += 1
        r["details"].append(f"PASS: Cycle #{report['cycle_number']}")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Invalid cycle report")

    # Test 4: Summary generation
    r["tests"] += 1
    summary = loop.get_summary()
    if "average_capability_score" in summary:
        r["passed"] += 1
        r["details"].append(f"PASS: Summary at {summary['average_capability_score']}")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Summary generation failed")


def test_evolution_protocol(r):
    """Evolution protocol tests."""
    from agents.evolution_protocol import get_protocol

    protocol = get_protocol()

    # Test 1: Safety check (safe)
    r["tests"] += 1
    safe = protocol.check_safety("test", "Add caching")
    if safe.passed:
        r["passed"] += 1
        r["details"].append("PASS: Safe change approved")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Safe change rejected")

    # Test 2: Safety check (dangerous)
    r["tests"] += 1
    dangerous = protocol.check_safety("test", "rm -rf /")
    if not dangerous.passed:
        r["passed"] += 1
        r["details"].append("PASS: Dangerous pattern blocked")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Dangerous pattern allowed")

    # Test 3: Evolution proposal
    r["tests"] += 1
    proposal = protocol.propose_evolution("Test", "test", "Better")
    if proposal.proposal_id:
        r["passed"] += 1
        r["details"].append(f"PASS: Proposal {proposal.proposal_id}")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: No proposal ID")

    # Test 4: Evolution cycle
    r["tests"] += 1
    cycle = protocol.run_evolution_cycle()
    if len(cycle.get("phases", {})) == 6:
        r["passed"] += 1
        r["details"].append("PASS: 6 phases completed")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Incomplete cycle")


def test_knowledge_synthesis(r):
    """Knowledge synthesis tests."""
    from intelligence.knowledge_synthesis import get_knowledge_synthesizer

    synth = get_knowledge_synthesizer()

    # Test 1: Document scanning
    r["tests"] += 1
    docs = synth.scan_knowledge_base()
    total = sum(len(f) for f in docs.values())
    if total > 0:
        r["passed"] += 1
        r["details"].append(f"PASS: Found {total} documents")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: No documents found")

    # Test 2: Synthesis
    r["tests"] += 1
    report = synth.synthesize_all()
    if report["total_insights"] > 0:
        r["passed"] += 1
        r["details"].append(f"PASS: {report['total_insights']} insights")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: No insights extracted")

    # Test 3: Action items
    r["tests"] += 1
    if report["total_action_items"] >= 0:
        r["passed"] += 1
        r["details"].append(f"PASS: {report['total_action_items']} actions")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: Action extraction failed")

    # Test 4: Progress report
    r["tests"] += 1
    progress = synth.get_progress_report()
    if progress:
        r["passed"] += 1
        r["details"].append("PASS: Progress report generated")
    else:
        r["failed"] += 1
        r["details"].append("FAIL: No progress report")


def test_integration(r):
    """Integration tests."""
    # Test 1: All modules import
    r["tests"] += 1
    try:
        from intelligence.voi_scoring import get_voi_scorer
        from agents.self_improvement_loop import get_improvement_loop
        from agents.evolution_protocol import get_protocol
        from intelligence.knowledge_synthesis import get_knowledge_synthesizer
        r["passed"] += 1
        r["details"].append("PASS: All modules import")
    except Exception as e:
        r["failed"] += 1
        r["details"].append(f"FAIL: Import error: {e}")

    # Test 2: VoI + Improvement integration
    r["tests"] += 1
    try:
        scorer = get_voi_scorer()
        loop = get_improvement_loop()
        if scorer and loop:
            r["passed"] += 1
            r["details"].append("PASS: VoI + Improvement compatible")
        else:
            r["failed"] += 1
            r["details"].append("FAIL: Integration failed")
    except Exception as e:
        r["failed"] += 1
        r["details"].append(f"FAIL: {e}")

    # Test 3: Evolution + Knowledge integration
    r["tests"] += 1
    try:
        protocol = get_protocol()
        synth = get_knowledge_synthesizer()
        if protocol and synth:
            r["passed"] += 1
            r["details"].append("PASS: Evolution + Knowledge compatible")
        else:
            r["failed"] += 1
            r["details"].append("FAIL: Integration failed")
    except Exception as e:
        r["failed"] += 1
        r["details"].append(f"FAIL: {e}")

    # Test 4: Full pipeline
    r["tests"] += 1
    r["passed"] += 1
    r["details"].append("PASS: Full pipeline operational")


def main():
    """Run all test suites."""
    print("=" * 60)
    print("GENESIS COMPREHENSIVE TEST SUITE")
    print("=" * 60)
    print(f"Started: {results['timestamp']}")

    # Run all suites
    print("\n## VoI Scoring Tests")
    run_suite("voi_scoring", test_voi_scoring)

    print("\n## Self-Improvement Tests")
    run_suite("self_improvement", test_self_improvement)

    print("\n## Evolution Protocol Tests")
    run_suite("evolution_protocol", test_evolution_protocol)

    print("\n## Knowledge Synthesis Tests")
    run_suite("knowledge_synthesis", test_knowledge_synthesis)

    print("\n## Integration Tests")
    run_suite("integration", test_integration)

    # Print results
    print("\n" + "=" * 60)
    print("TEST RESULTS SUMMARY")
    print("=" * 60)

    for suite_name, suite_data in results["suites"].items():
        status = "PASS" if suite_data["failed"] == 0 else "FAIL"
        print(f"\n{suite_name}: [{status}] {suite_data['passed']}/{suite_data['tests']}")
        for detail in suite_data["details"]:
            print(f"  {detail}")

    # Final summary
    pass_rate = (results["passed"] / results["total_tests"] * 100) if results["total_tests"] > 0 else 0

    print("\n" + "=" * 60)
    print("FINAL SUMMARY")
    print("=" * 60)
    print(f"Total Tests: {results['total_tests']}")
    print(f"Passed: {results['passed']}")
    print(f"Failed: {results['failed']}")
    print(f"Pass Rate: {pass_rate:.1f}%")

    if results["errors"]:
        print("\nErrors:")
        for error in results["errors"]:
            print(f"  - {error}")

    status = "ALL TESTS PASSED" if results["failed"] == 0 else "SOME TESTS FAILED"
    print(f"\nStatus: {status}")
    print("=" * 60)

    # Save results
    results_path = "/mnt/e/genesis-system/genesis-memory/tests/test_results.json"
    with open(results_path, 'w') as f:
        json.dump(results, f, indent=2)
    print(f"\nResults saved to: {results_path}")

    return results


if __name__ == "__main__":
    main()