#!/usr/bin/env python3
"""
COMPREHENSIVE TEST SUITE: Context Synthesizer
==============================================
Tests for core/knowledge/context_synthesizer.py

Test Coverage:
- Black Box Tests: External behavior without internal knowledge
- White Box Tests: Internal implementation paths and branches

Story: KG-006
Author: Genesis System
Version: 1.0.0
"""

import sys
import pytest
import json
import tempfile
import shutil
from pathlib import Path
from datetime import datetime, timedelta
from unittest.mock import Mock, patch, MagicMock

# Add Genesis to path
sys.path.insert(0, '/mnt/e/genesis-system')

from core.knowledge.context_synthesizer import (
    ContextSynthesizer,
    ContextPackage,
    Entity,
    Learning,
    Relationship
)


# ============================================================================
# FIXTURES
# ============================================================================

@pytest.fixture
def temp_workspace():
    """Create temporary workspace directory."""
    temp_dir = tempfile.mkdtemp(prefix='genesis_test_')
    yield Path(temp_dir)
    shutil.rmtree(temp_dir)


@pytest.fixture
def mock_workspace(temp_workspace):
    """Create mock Genesis workspace structure."""
    # Create directories
    kg_dir = temp_workspace / "KNOWLEDGE_GRAPH"
    kg_dir.mkdir()

    data_dir = temp_workspace / "data"
    data_dir.mkdir()

    loop_dir = temp_workspace / "loop"
    loop_dir.mkdir()

    # Create sample entities
    entities_path = kg_dir / "entities.jsonl"
    sample_entities = [
        {
            "id": "ENTITY_001",
            "type": "strategy_node",
            "title": "Test Strategy",
            "timestamp": datetime.now().isoformat(),
            "confidence": 0.9,
            "relevance": "high"
        },
        {
            "id": "ENTITY_002",
            "type": "protocol",
            "title": "Test Protocol",
            "timestamp": (datetime.now() - timedelta(days=10)).isoformat(),
            "confidence": 0.7,
            "relevance": "medium"
        },
        {
            "id": "ENTITY_003",
            "type": "technology_enabler",
            "title": "Old Tech",
            "timestamp": (datetime.now() - timedelta(days=100)).isoformat(),
            "confidence": 0.5,
            "relevance": "low"
        }
    ]

    with open(entities_path, 'w') as f:
        for entity in sample_entities:
            f.write(json.dumps(entity) + '\n')

    # Create sample relationships
    relationships_path = kg_dir / "relationships.jsonl"
    sample_relationships = [
        {
            "from": "ENTITY_001",
            "to": "ENTITY_002",
            "type": "depends_on"
        }
    ]

    with open(relationships_path, 'w') as f:
        for rel in sample_relationships:
            f.write(json.dumps(rel) + '\n')

    # Create sample Titan learnings
    titan_path = data_dir / "titan_learnings.json"
    sample_learnings = {
        "learning_001": {
            "learning_id": "learning_001",
            "category": "optimization",
            "insight": "High-confidence optimization insight",
            "confidence": 0.95,
            "source_events": 5,
            "created_at": datetime.now().isoformat(),
            "last_updated": datetime.now().isoformat(),
            "actionable": True
        },
        "learning_002": {
            "learning_id": "learning_002",
            "category": "testing",
            "insight": "Low-confidence testing insight",
            "confidence": 0.4,
            "source_events": 1,
            "created_at": (datetime.now() - timedelta(days=5)).isoformat(),
            "last_updated": (datetime.now() - timedelta(days=5)).isoformat(),
            "actionable": False
        }
    }

    with open(titan_path, 'w') as f:
        json.dump(sample_learnings, f)

    # Create sample tasks
    tasks_path = loop_dir / "tasks.json"
    sample_tasks = {
        "project": "Test Project",
        "stories": [
            {
                "id": "STORY-001",
                "title": "Test Story",
                "acceptance_criteria": [
                    {"description": "Test ENTITY_001 integration"}
                ]
            }
        ]
    }

    with open(tasks_path, 'w') as f:
        json.dump(sample_tasks, f)

    return temp_workspace


@pytest.fixture
def synthesizer(mock_workspace):
    """Create synthesizer with mock workspace."""
    return ContextSynthesizer(
        workspace_path=str(mock_workspace),
        token_budget=5000
    )


# ============================================================================
# BLACK BOX TESTS - Testing External Behavior
# ============================================================================

class TestBlackBox:
    """Black box tests - test from outside without internal knowledge."""

    def test_initialization_with_default_params(self):
        """Test synthesizer can be created with default parameters."""
        synth = ContextSynthesizer()
        assert synth is not None
        assert synth.token_budget == 5000
        assert synth.workspace == Path("/mnt/e/genesis-system")

    def test_initialization_with_custom_params(self, temp_workspace):
        """Test synthesizer can be created with custom parameters."""
        synth = ContextSynthesizer(
            workspace_path=str(temp_workspace),
            token_budget=10000
        )
        assert synth.token_budget == 10000
        assert synth.workspace == temp_workspace

    def test_generate_context_returns_package(self, synthesizer):
        """Test that generate_context returns a valid ContextPackage."""
        package = synthesizer.generate_context()

        assert isinstance(package, ContextPackage)
        assert isinstance(package.active_learnings, list)
        assert isinstance(package.recent_entities, list)
        assert isinstance(package.key_relationships, list)
        assert isinstance(package.recommended_focus, list)
        assert isinstance(package.memory_stats, dict)
        assert isinstance(package.token_count, int)

    def test_generate_context_respects_token_budget(self, synthesizer):
        """Test that generated context stays within token budget."""
        package = synthesizer.generate_context()

        # Should not exceed budget
        assert package.token_count <= synthesizer.token_budget

        # Should have a reasonable token count (not 0)
        assert package.token_count > 0

    def test_generate_context_with_empty_knowledge_graph(self, temp_workspace):
        """Test graceful handling when knowledge graph is empty."""
        # Create workspace with empty files
        kg_dir = temp_workspace / "KNOWLEDGE_GRAPH"
        kg_dir.mkdir()
        (kg_dir / "entities.jsonl").touch()
        (kg_dir / "relationships.jsonl").touch()

        data_dir = temp_workspace / "data"
        data_dir.mkdir()
        with open(data_dir / "titan_learnings.json", 'w') as f:
            json.dump({}, f)

        synth = ContextSynthesizer(workspace_path=str(temp_workspace))
        package = synth.generate_context()

        # Should still return valid package
        assert isinstance(package, ContextPackage)
        assert len(package.active_learnings) == 0
        assert len(package.recent_entities) == 0

    def test_json_output_is_valid_json(self, synthesizer):
        """Test that JSON output is valid and parseable."""
        package = synthesizer.generate_context()
        json_str = synthesizer.to_json(package)

        # Should be valid JSON
        parsed = json.loads(json_str)
        assert isinstance(parsed, dict)
        assert 'active_learnings' in parsed
        assert 'recent_entities' in parsed

    def test_markdown_output_contains_required_sections(self, synthesizer):
        """Test that Markdown output contains all required sections."""
        package = synthesizer.generate_context()
        md_str = synthesizer.to_markdown(package)

        # Check for required sections
        assert "# Genesis Context Briefing" in md_str
        assert "## Memory Statistics" in md_str
        assert "## Recommended Focus" in md_str
        assert "## Active Learnings" in md_str
        assert "## Recent High-Priority Entities" in md_str
        assert "## Key Relationships" in md_str

    def test_save_to_file_creates_files(self, synthesizer, temp_workspace):
        """Test that saving creates actual files."""
        json_path = temp_workspace / "test_output.json"
        md_path = temp_workspace / "test_output.md"

        package = synthesizer.generate_context()
        synthesizer.to_json(package, str(json_path))
        synthesizer.to_markdown(package, str(md_path))

        assert json_path.exists()
        assert md_path.exists()
        assert json_path.stat().st_size > 0
        assert md_path.stat().st_size > 0

    def test_generate_and_save_convenience_method(self, synthesizer, temp_workspace):
        """Test the generate_and_save convenience method."""
        json_path = temp_workspace / "output.json"
        md_path = temp_workspace / "output.md"

        package = synthesizer.generate_and_save(
            json_path=str(json_path),
            md_path=str(md_path)
        )

        assert isinstance(package, ContextPackage)
        assert json_path.exists()
        assert md_path.exists()

    def test_prioritization_favors_recent_entities(self, mock_workspace):
        """Test that recent entities are prioritized over old ones."""
        synth = ContextSynthesizer(workspace_path=str(mock_workspace))
        package = synth.generate_context()

        if len(package.recent_entities) > 1:
            # Most recent should be first
            first_entity = package.recent_entities[0]
            assert first_entity.id == "ENTITY_001"  # Most recent in our fixtures

    def test_prioritization_favors_high_confidence_learnings(self, mock_workspace):
        """Test that high-confidence learnings are prioritized."""
        synth = ContextSynthesizer(workspace_path=str(mock_workspace))
        package = synth.generate_context()

        if len(package.active_learnings) > 0:
            # First learning should be the high-confidence one
            first_learning = package.active_learnings[0]
            assert first_learning.confidence >= 0.9


# ============================================================================
# WHITE BOX TESTS - Testing Internal Implementation
# ============================================================================

class TestWhiteBox:
    """White box tests - test with knowledge of internal implementation."""

    def test_load_entities_populates_list(self, synthesizer):
        """Test that _load_entities correctly populates entities list."""
        synthesizer._load_entities()

        assert len(synthesizer.entities) == 3
        assert all(isinstance(e, Entity) for e in synthesizer.entities)

    def test_load_entities_adds_to_graph(self, synthesizer):
        """Test that _load_entities adds nodes to NetworkX graph."""
        synthesizer._load_entities()

        assert synthesizer.graph.number_of_nodes() == 3
        assert "ENTITY_001" in synthesizer.graph.nodes

    def test_load_relationships_populates_list(self, synthesizer):
        """Test that _load_relationships correctly populates relationships list."""
        synthesizer._load_entities()  # Need entities first
        synthesizer._load_relationships()

        assert len(synthesizer.relationships) >= 1
        assert all(isinstance(r, Relationship) for r in synthesizer.relationships)

    def test_load_relationships_adds_edges_to_graph(self, synthesizer):
        """Test that _load_relationships adds edges to graph."""
        synthesizer._load_entities()
        synthesizer._load_relationships()

        assert synthesizer.graph.number_of_edges() >= 1

    def test_load_learnings_populates_list(self, synthesizer):
        """Test that _load_learnings correctly populates learnings list."""
        synthesizer._load_learnings()

        assert len(synthesizer.learnings) == 2
        assert all(isinstance(l, Learning) for l in synthesizer.learnings)

    def test_load_active_tasks_extracts_story_ids(self, synthesizer):
        """Test that _load_active_tasks extracts story IDs."""
        synthesizer._load_active_tasks()

        assert len(synthesizer.active_tasks) > 0
        assert any("STORY-001" in task for task in synthesizer.active_tasks)

    def test_calculate_recency_score_recent_is_high(self, synthesizer):
        """Test that recent timestamps get high recency scores."""
        recent_timestamp = datetime.now().isoformat()
        score = synthesizer._calculate_recency_score(recent_timestamp)

        assert score > 0.9  # Very recent should be close to 1.0

    def test_calculate_recency_score_old_is_low(self, synthesizer):
        """Test that old timestamps get low recency scores."""
        old_timestamp = (datetime.now() - timedelta(days=365)).isoformat()
        score = synthesizer._calculate_recency_score(old_timestamp)

        assert score < 0.1  # Very old should be close to 0.0

    def test_calculate_recency_score_handles_invalid_timestamp(self, synthesizer):
        """Test that invalid timestamps return 0.0."""
        score = synthesizer._calculate_recency_score("invalid")
        assert score == 0.0

        score = synthesizer._calculate_recency_score("")
        assert score == 0.0

    def test_calculate_entity_relevance_components(self, synthesizer):
        """Test that entity relevance considers all factors."""
        synthesizer.load_all()

        # Get first entity
        entity = synthesizer.entities[0]
        relevance = synthesizer._calculate_entity_relevance(entity)

        # Should be between 0 and 1
        assert 0.0 <= relevance <= 1.0

        # High confidence + recent + task match should be high
        if entity.id == "ENTITY_001":
            assert relevance > 0.5

    def test_calculate_task_match_finds_matches(self, synthesizer):
        """Test that _calculate_task_match correctly identifies matches."""
        synthesizer._load_active_tasks()

        # Create entity with task-matching content
        entity = Entity(
            id="ENTITY_001",
            type="test",
            data={"title": "STORY-001 implementation"}
        )

        score = synthesizer._calculate_task_match(entity)
        assert score > 0.0  # Should find match

    def test_calculate_task_match_no_tasks_returns_zero(self, synthesizer):
        """Test that _calculate_task_match returns 0 when no tasks."""
        synthesizer.active_tasks = []

        entity = Entity(id="TEST", type="test", data={})
        score = synthesizer._calculate_task_match(entity)

        assert score == 0.0

    def test_prioritize_entities_returns_sorted_list(self, synthesizer):
        """Test that _prioritize_entities returns correctly sorted list."""
        synthesizer.load_all()

        top_entities = synthesizer._prioritize_entities(limit=2)

        assert len(top_entities) <= 2
        # Should be sorted by relevance score descending
        if len(top_entities) == 2:
            assert top_entities[0].relevance_score >= top_entities[1].relevance_score

    def test_prioritize_learnings_returns_sorted_list(self, synthesizer):
        """Test that _prioritize_learnings returns correctly sorted list."""
        synthesizer._load_learnings()

        top_learnings = synthesizer._prioritize_learnings(limit=5)

        # Should be sorted by score (confidence + recency + actionable)
        assert len(top_learnings) <= 5
        if len(top_learnings) >= 2:
            # First should be the high-confidence actionable one
            assert top_learnings[0].learning_id == "learning_001"

    def test_get_key_relationships_filters_by_entities(self, synthesizer):
        """Test that _get_key_relationships only includes relevant entities."""
        synthesizer.load_all()

        # Create entity list
        entities = [Entity(id="ENTITY_001", type="test", data={})]

        key_rels = synthesizer._get_key_relationships(entities)

        # All relationships should involve ENTITY_001
        for rel in key_rels:
            assert rel.from_entity == "ENTITY_001" or rel.to_entity == "ENTITY_001"

    def test_estimate_tokens_rough_accuracy(self, synthesizer):
        """Test that _estimate_tokens provides rough accuracy."""
        text = "This is a test" * 100  # ~400 chars
        tokens = synthesizer._estimate_tokens(text)

        # Should be roughly 100 tokens (400 / 4)
        assert 80 <= tokens <= 120

    def test_truncate_to_budget_reduces_content(self, synthesizer):
        """Test that _truncate_to_budget reduces content when over budget."""
        # Create oversized lists
        large_learnings = [
            Learning(
                learning_id=f"L{i}",
                category="test",
                insight="x" * 1000,  # Large insight
                confidence=0.9,
                source_events=1,
                created_at=datetime.now().isoformat(),
                last_updated=datetime.now().isoformat(),
                actionable=True
            )
            for i in range(100)
        ]

        large_entities = [
            Entity(id=f"E{i}", type="test", data={"desc": "x" * 1000})
            for i in range(100)
        ]

        large_relationships = [
            Relationship(from_entity=f"E{i}", to_entity=f"E{i+1}", relationship_type="test")
            for i in range(100)
        ]

        # Set small budget
        synthesizer.token_budget = 1000

        truncated_l, truncated_e, truncated_r = synthesizer._truncate_to_budget(
            large_learnings,
            large_entities,
            large_relationships
        )

        # Should have reduced
        assert len(truncated_l) < len(large_learnings)
        assert len(truncated_e) < len(large_entities)
        assert len(truncated_r) < len(large_relationships)

    def test_generate_recommendations_from_entities(self, synthesizer):
        """Test that _generate_recommendations creates meaningful recommendations."""
        synthesizer.load_all()

        entities = synthesizer._prioritize_entities(limit=5)
        learnings = synthesizer._prioritize_learnings(limit=5)

        recommendations = synthesizer._generate_recommendations(entities, learnings)

        assert isinstance(recommendations, list)
        assert len(recommendations) <= 5
        assert all(isinstance(r, str) for r in recommendations)

    def test_load_all_loads_everything(self, synthesizer):
        """Test that load_all loads all data sources."""
        synthesizer.load_all()

        assert len(synthesizer.entities) > 0
        assert len(synthesizer.learnings) > 0
        # Relationships and tasks might be optional


# ============================================================================
# INTEGRATION TESTS
# ============================================================================

class TestIntegration:
    """Integration tests - test complete workflows."""

    def test_end_to_end_context_generation(self, synthesizer, temp_workspace):
        """Test complete end-to-end context generation workflow."""
        # Generate context
        package = synthesizer.generate_context(
            max_learnings=5,
            max_entities=10,
            max_relationships=10
        )

        # Verify package structure
        assert isinstance(package, ContextPackage)
        assert len(package.active_learnings) <= 5
        assert len(package.recent_entities) <= 10
        assert len(package.key_relationships) <= 10

        # Save to files
        json_path = temp_workspace / "context.json"
        md_path = temp_workspace / "context.md"

        synthesizer.to_json(package, str(json_path))
        synthesizer.to_markdown(package, str(md_path))

        # Verify files
        assert json_path.exists()
        assert md_path.exists()

        # Verify JSON is parseable
        with open(json_path) as f:
            data = json.load(f)
        assert 'active_learnings' in data

        # Verify Markdown is readable
        with open(md_path) as f:
            content = f.read()
        assert "Genesis Context Briefing" in content

    def test_cli_execution(self, mock_workspace, temp_workspace):
        """Test CLI execution path."""
        from core.knowledge.context_synthesizer import main
        import sys

        json_out = temp_workspace / "cli_output.json"
        md_out = temp_workspace / "cli_output.md"

        # Mock argv
        sys.argv = [
            'context_synthesizer.py',
            '--workspace', str(mock_workspace),
            '--budget', '3000',
            '--json', str(json_out),
            '--markdown', str(md_out)
        ]

        # Run main
        main()

        # Verify outputs
        assert json_out.exists()
        assert md_out.exists()


# ============================================================================
# EDGE CASES AND ERROR HANDLING
# ============================================================================

class TestEdgeCases:
    """Test edge cases and error conditions."""

    def test_missing_knowledge_graph_directory(self, temp_workspace):
        """Test handling when KNOWLEDGE_GRAPH directory doesn't exist."""
        synth = ContextSynthesizer(workspace_path=str(temp_workspace))
        synth._load_entities()

        # Should not crash
        assert len(synth.entities) == 0

    def test_missing_titan_learnings_file(self, temp_workspace):
        """Test handling when titan_learnings.json doesn't exist."""
        synth = ContextSynthesizer(workspace_path=str(temp_workspace))
        synth._load_learnings()

        # Should not crash
        assert len(synth.learnings) == 0

    def test_corrupted_json_in_entities(self, mock_workspace):
        """Test handling of corrupted JSON in entities.jsonl."""
        # Add corrupted line to entities
        entities_path = mock_workspace / "KNOWLEDGE_GRAPH" / "entities.jsonl"
        with open(entities_path, 'a') as f:
            f.write("{ this is not valid json\n")

        synth = ContextSynthesizer(workspace_path=str(mock_workspace))
        # Should load valid entries and skip corrupted ones
        synth._load_entities()

        # Should have loaded the valid entries
        assert len(synth.entities) >= 3

    def test_zero_token_budget(self, mock_workspace):
        """Test behavior with zero token budget."""
        synth = ContextSynthesizer(
            workspace_path=str(mock_workspace),
            token_budget=0
        )

        package = synth.generate_context()

        # Should still work but return minimal content
        assert isinstance(package, ContextPackage)

    def test_entity_without_timestamp(self, synthesizer):
        """Test entity relevance calculation without timestamp."""
        entity = Entity(id="TEST", type="test", data={})
        # No timestamp field

        score = synthesizer._calculate_entity_relevance(entity)

        # Should still return valid score (0-1)
        assert 0.0 <= score <= 1.0

    def test_learning_without_confidence(self):
        """Test learning creation with missing fields."""
        # This should work or raise appropriate error
        with pytest.raises((KeyError, TypeError)):
            Learning(
                learning_id="test",
                category="test"
                # Missing required fields
            )


# ============================================================================
# PERFORMANCE TESTS
# ============================================================================

class TestPerformance:
    """Test performance characteristics."""

    def test_large_entity_set_performance(self, temp_workspace):
        """Test performance with large entity set."""
        import time

        # Create large entity file
        kg_dir = temp_workspace / "KNOWLEDGE_GRAPH"
        kg_dir.mkdir()
        entities_path = kg_dir / "entities.jsonl"

        # Write 1000 entities
        with open(entities_path, 'w') as f:
            for i in range(1000):
                entity = {
                    "id": f"ENTITY_{i}",
                    "type": "test",
                    "title": f"Test {i}",
                    "timestamp": datetime.now().isoformat(),
                    "confidence": 0.5
                }
                f.write(json.dumps(entity) + '\n')

        synth = ContextSynthesizer(workspace_path=str(temp_workspace))

        # Time the operation
        start = time.time()
        package = synth.generate_context()
        elapsed = time.time() - start

        # Should complete in reasonable time (< 5 seconds)
        assert elapsed < 5.0
        assert isinstance(package, ContextPackage)

    def test_token_estimation_speed(self, synthesizer):
        """Test that token estimation is fast."""
        import time

        large_text = "test " * 10000  # 50k characters

        start = time.time()
        for _ in range(100):
            synthesizer._estimate_tokens(large_text)
        elapsed = time.time() - start

        # Should be very fast (< 0.1s for 100 iterations)
        assert elapsed < 0.1


# ============================================================================
# RUN TESTS
# ============================================================================

if __name__ == '__main__':
    pytest.main([__file__, '-v', '--tb=short'])