#!/usr/bin/env python3
"""
Research Agent Skill for Genesis System

This skill provides autonomous research capabilities including web search,
content analysis, and synthesis. It stores findings in a persistent memory
system for future reference and builds knowledge over time.

Usage:
    python research_agent.py "What are the latest developments in quantum computing?"

    Or import and use programmatically:
    from research_agent import ResearchAgent
    agent = ResearchAgent()
    results = agent.research("topic to research")
"""

import os
import sys
import json
import hashlib
import logging
import time
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict, field
from pathlib import Path
from abc import ABC, abstractmethod
from urllib.parse import urlparse, quote_plus
import re
from collections import defaultdict

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Optional imports
try:
    import requests
    REQUESTS_AVAILABLE = True
except ImportError:
    REQUESTS_AVAILABLE = False
    logger.warning("requests not installed. Install with: pip install requests")

try:
    from bs4 import BeautifulSoup
    BS4_AVAILABLE = True
except ImportError:
    BS4_AVAILABLE = False
    logger.warning("beautifulsoup4 not installed. Install with: pip install beautifulsoup4")


@dataclass
class SearchResult:
    """Represents a single search result."""
    title: str
    url: str
    snippet: str
    source: str  # 'google', 'duckduckgo', etc.
    rank: int
    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())


@dataclass
class WebContent:
    """Represents extracted content from a webpage."""
    url: str
    title: str
    content: str
    word_count: int
    headings: List[str]
    links: List[str]
    extracted_at: str


@dataclass
class ResearchFinding:
    """Represents a research finding."""
    topic: str
    content: str
    source_url: str
    confidence: float  # 0-1 scale
    category: str  # 'fact', 'opinion', 'claim', 'statistic', etc.
    supporting_sources: List[str] = field(default_factory=list)


@dataclass
class ResearchReport:
    """Complete research report."""
    query: str
    timestamp: str
    executive_summary: str
    key_findings: List[ResearchFinding]
    sources: List[SearchResult]
    analyzed_pages: int
    total_sources_found: int
    confidence_score: float
    follow_up_questions: List[str]
    metadata: Dict[str, Any]

    def to_dict(self) -> Dict:
        """Convert to dictionary for serialization."""
        return {
            "query": self.query,
            "timestamp": self.timestamp,
            "executive_summary": self.executive_summary,
            "key_findings": [asdict(f) for f in self.key_findings],
            "sources": [asdict(s) for s in self.sources],
            "analyzed_pages": self.analyzed_pages,
            "total_sources_found": self.total_sources_found,
            "confidence_score": self.confidence_score,
            "follow_up_questions": self.follow_up_questions,
            "metadata": self.metadata
        }


@dataclass
class MemoryEntry:
    """Entry in the research memory system."""
    id: str
    topic: str
    content: str
    sources: List[str]
    created_at: str
    accessed_at: str
    access_count: int
    importance: float
    related_topics: List[str]
    tags: List[str]


class SearchProvider(ABC):
    """Abstract base class for search providers."""

    @abstractmethod
    def search(self, query: str, num_results: int = 10) -> List[SearchResult]:
        """Perform a search and return results."""
        pass


class DuckDuckGoSearch(SearchProvider):
    """
    DuckDuckGo search provider.

    Uses the DuckDuckGo HTML interface (no API key required).
    """

    def __init__(self):
        self.base_url = "https://html.duckduckgo.com/html/"
        self.session = requests.Session() if REQUESTS_AVAILABLE else None
        if self.session:
            self.session.headers.update({
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
            })

    def search(self, query: str, num_results: int = 10) -> List[SearchResult]:
        """Perform a DuckDuckGo search."""
        if not REQUESTS_AVAILABLE or not BS4_AVAILABLE:
            logger.warning("Required libraries not available for search")
            return []

        results = []

        try:
            response = self.session.post(
                self.base_url,
                data={"q": query},
                timeout=15
            )
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')

            # Parse results
            for rank, result in enumerate(soup.select('.result'), 1):
                if rank > num_results:
                    break

                title_elem = result.select_one('.result__title a')
                snippet_elem = result.select_one('.result__snippet')

                if title_elem:
                    title = title_elem.get_text(strip=True)
                    url = title_elem.get('href', '')

                    # DuckDuckGo wraps URLs
                    if 'uddg=' in url:
                        import urllib.parse
                        parsed = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
                        url = parsed.get('uddg', [url])[0]

                    snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""

                    results.append(SearchResult(
                        title=title,
                        url=url,
                        snippet=snippet,
                        source="duckduckgo",
                        rank=rank
                    ))

        except Exception as e:
            logger.error(f"DuckDuckGo search failed: {e}")

        return results


class MockSearchProvider(SearchProvider):
    """
    Mock search provider for testing or when no real search is available.

    Generates placeholder results based on the query.
    """

    def search(self, query: str, num_results: int = 10) -> List[SearchResult]:
        """Generate mock search results."""
        results = []
        query_words = query.lower().split()

        mock_sources = [
            ("Wikipedia", "wikipedia.org", "The free encyclopedia"),
            ("Research Paper", "arxiv.org", "Academic research"),
            ("News Article", "news.com", "Latest developments"),
            ("Blog Post", "medium.com", "Expert insights"),
            ("Documentation", "docs.example.com", "Technical reference"),
        ]

        for rank, (title_prefix, domain, snippet_prefix) in enumerate(mock_sources[:num_results], 1):
            results.append(SearchResult(
                title=f"{title_prefix}: {query.title()}",
                url=f"https://{domain}/{quote_plus(query)}",
                snippet=f"{snippet_prefix} about {query}. This is a mock result for testing.",
                source="mock",
                rank=rank
            ))

        return results


class ContentExtractor:
    """Extracts and processes content from web pages."""

    def __init__(self):
        self.session = requests.Session() if REQUESTS_AVAILABLE else None
        if self.session:
            self.session.headers.update({
                "User-Agent": "Mozilla/5.0 (compatible; GenesisResearchBot/1.0)"
            })

    def extract(self, url: str, timeout: int = 15) -> Optional[WebContent]:
        """
        Extract content from a URL.

        Args:
            url: The URL to extract content from
            timeout: Request timeout in seconds

        Returns:
            WebContent object or None if extraction fails
        """
        if not REQUESTS_AVAILABLE or not BS4_AVAILABLE:
            return None

        try:
            response = self.session.get(url, timeout=timeout)
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')

            # Remove unwanted elements
            for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'form']):
                element.decompose()

            # Extract title
            title = soup.title.string if soup.title else ""

            # Extract headings
            headings = []
            for level in range(1, 4):
                for heading in soup.find_all(f'h{level}'):
                    text = heading.get_text(strip=True)
                    if text:
                        headings.append(text)

            # Extract links
            links = []
            for link in soup.find_all('a', href=True):
                href = link.get('href', '')
                if href.startswith('http'):
                    links.append(href)

            # Extract main content
            # Try common content containers
            content_selectors = ['article', 'main', '.content', '.post', '#content', '.entry']
            content = ""

            for selector in content_selectors:
                container = soup.select_one(selector)
                if container:
                    content = container.get_text(separator='\n', strip=True)
                    break

            if not content:
                # Fallback to body
                body = soup.find('body')
                if body:
                    content = body.get_text(separator='\n', strip=True)

            # Clean content
            content = self._clean_content(content)

            return WebContent(
                url=url,
                title=title,
                content=content[:50000],  # Limit content length
                word_count=len(content.split()),
                headings=headings[:20],
                links=links[:50],
                extracted_at=datetime.now().isoformat()
            )

        except Exception as e:
            logger.error(f"Failed to extract content from {url}: {e}")
            return None

    def _clean_content(self, content: str) -> str:
        """Clean and normalize extracted content."""
        # Remove excessive whitespace
        content = re.sub(r'\n\s*\n', '\n\n', content)
        content = re.sub(r' +', ' ', content)

        # Remove very short lines (likely navigation/UI elements)
        lines = content.split('\n')
        lines = [line for line in lines if len(line.strip()) > 20 or not line.strip()]

        return '\n'.join(lines).strip()


class ResearchMemory:
    """
    Persistent memory system for research findings.

    Stores and retrieves research findings with semantic search capabilities.
    """

    def __init__(self, memory_path: str = None):
        """
        Initialize the research memory.

        Args:
            memory_path: Path to store memory data
        """
        if memory_path is None:
            memory_path = os.path.join(
                os.path.dirname(__file__), "..", "knowledge_base", "research_memory"
            )
        self.memory_path = Path(memory_path)
        self.memory_path.mkdir(parents=True, exist_ok=True)

        self.index_path = self.memory_path / "index.json"
        self.entries: Dict[str, MemoryEntry] = {}

        self._load_index()

    def _load_index(self):
        """Load the memory index from disk."""
        if self.index_path.exists():
            try:
                with open(self.index_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                    for entry_data in data.get('entries', []):
                        entry = MemoryEntry(**entry_data)
                        self.entries[entry.id] = entry
                logger.info(f"Loaded {len(self.entries)} memory entries")
            except Exception as e:
                logger.error(f"Failed to load memory index: {e}")
                self.entries = {}

    def _save_index(self):
        """Save the memory index to disk."""
        try:
            data = {
                'entries': [asdict(e) for e in self.entries.values()],
                'updated_at': datetime.now().isoformat()
            }
            with open(self.index_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=2, ensure_ascii=False)
        except Exception as e:
            logger.error(f"Failed to save memory index: {e}")

    def store(self, topic: str, content: str, sources: List[str],
             importance: float = 0.5, tags: List[str] = None) -> str:
        """
        Store a research finding in memory.

        Args:
            topic: The research topic
            content: The finding content
            sources: List of source URLs
            importance: Importance score (0-1)
            tags: Optional tags for categorization

        Returns:
            The ID of the stored entry
        """
        entry_id = hashlib.md5(
            f"{topic}:{content[:100]}:{datetime.now().isoformat()}".encode()
        ).hexdigest()[:12]

        entry = MemoryEntry(
            id=entry_id,
            topic=topic,
            content=content,
            sources=sources,
            created_at=datetime.now().isoformat(),
            accessed_at=datetime.now().isoformat(),
            access_count=0,
            importance=importance,
            related_topics=self._find_related_topics(topic),
            tags=tags or []
        )

        self.entries[entry_id] = entry

        # Save full entry content
        entry_path = self.memory_path / f"{entry_id}.json"
        with open(entry_path, 'w', encoding='utf-8') as f:
            json.dump(asdict(entry), f, indent=2, ensure_ascii=False)

        self._save_index()
        logger.info(f"Stored research memory: {entry_id}")

        return entry_id

    def search(self, query: str, limit: int = 10) -> List[MemoryEntry]:
        """
        Search memory for relevant entries.

        Args:
            query: Search query
            limit: Maximum results to return

        Returns:
            List of matching memory entries
        """
        query_words = set(query.lower().split())
        scores = []

        for entry in self.entries.values():
            # Simple keyword matching (in production, use embeddings)
            entry_words = set(entry.topic.lower().split())
            entry_words.update(entry.content.lower().split()[:100])
            entry_words.update(tag.lower() for tag in entry.tags)

            overlap = len(query_words & entry_words)
            if overlap > 0:
                # Boost by importance and recency
                score = overlap * (1 + entry.importance)
                scores.append((entry, score))

        # Sort by score and return top results
        scores.sort(key=lambda x: x[1], reverse=True)
        results = [entry for entry, score in scores[:limit]]

        # Update access counts
        for entry in results:
            entry.accessed_at = datetime.now().isoformat()
            entry.access_count += 1

        self._save_index()

        return results

    def _find_related_topics(self, topic: str) -> List[str]:
        """Find topics related to the given topic."""
        related = []
        topic_words = set(topic.lower().split())

        for entry in self.entries.values():
            entry_words = set(entry.topic.lower().split())
            if topic_words & entry_words:
                related.append(entry.topic)

        return list(set(related))[:5]

    def get_statistics(self) -> Dict[str, Any]:
        """Get memory statistics."""
        return {
            "total_entries": len(self.entries),
            "topics": list(set(e.topic for e in self.entries.values()))[:20],
            "total_sources": sum(len(e.sources) for e in self.entries.values()),
            "avg_importance": sum(e.importance for e in self.entries.values()) / max(len(self.entries), 1)
        }


class ContentAnalyzer:
    """
    Analyzes content to extract insights and findings.

    In production, this would use an LLM. Currently uses rule-based extraction.
    """

    def analyze(self, content: WebContent, query: str) -> List[ResearchFinding]:
        """
        Analyze content and extract findings relevant to the query.

        Args:
            content: Extracted web content
            query: Research query

        Returns:
            List of research findings
        """
        findings = []
        query_words = set(query.lower().split())

        # Split into paragraphs
        paragraphs = content.content.split('\n\n')

        for para in paragraphs:
            if len(para) < 50:
                continue

            # Check relevance
            para_words = set(para.lower().split())
            relevance = len(query_words & para_words) / max(len(query_words), 1)

            if relevance > 0.3:
                # Classify the finding
                category = self._classify_finding(para)
                confidence = min(0.9, relevance + 0.3)

                findings.append(ResearchFinding(
                    topic=query,
                    content=para[:500],
                    source_url=content.url,
                    confidence=confidence,
                    category=category
                ))

        # Deduplicate and sort by confidence
        findings = self._deduplicate_findings(findings)
        findings.sort(key=lambda f: f.confidence, reverse=True)

        return findings[:10]  # Limit findings per source

    def _classify_finding(self, text: str) -> str:
        """Classify a finding into a category."""
        text_lower = text.lower()

        if re.search(r'\d+%|\d+\s*(million|billion|thousand)', text_lower):
            return "statistic"
        if re.search(r'according to|study|research|found that', text_lower):
            return "claim"
        if re.search(r'(i think|in my opinion|i believe)', text_lower):
            return "opinion"
        if re.search(r'(is|are|was|were)\s+(?:the|a|an)', text_lower):
            return "fact"

        return "insight"

    def _deduplicate_findings(self, findings: List[ResearchFinding]) -> List[ResearchFinding]:
        """Remove duplicate or very similar findings."""
        unique = []
        seen_content = set()

        for finding in findings:
            # Simple deduplication by content prefix
            content_key = finding.content[:100].lower()
            if content_key not in seen_content:
                seen_content.add(content_key)
                unique.append(finding)

        return unique


class ResearchSynthesizer:
    """
    Synthesizes findings into a coherent research report.
    """

    def synthesize(self, query: str, findings: List[ResearchFinding],
                  sources: List[SearchResult]) -> Tuple[str, List[str]]:
        """
        Synthesize findings into a summary.

        Args:
            query: Original research query
            findings: List of research findings
            sources: List of search results used

        Returns:
            Tuple of (executive_summary, follow_up_questions)
        """
        # Group findings by category
        by_category = defaultdict(list)
        for finding in findings:
            by_category[finding.category].append(finding)

        # Build summary
        summary_parts = []

        summary_parts.append(f"Research on: {query}\n")

        if by_category.get('fact'):
            summary_parts.append("Key Facts:")
            for f in by_category['fact'][:3]:
                summary_parts.append(f"  - {f.content[:200]}")

        if by_category.get('statistic'):
            summary_parts.append("\nStatistics:")
            for f in by_category['statistic'][:3]:
                summary_parts.append(f"  - {f.content[:200]}")

        if by_category.get('claim'):
            summary_parts.append("\nNotable Claims:")
            for f in by_category['claim'][:2]:
                summary_parts.append(f"  - {f.content[:200]}")

        if by_category.get('insight'):
            summary_parts.append("\nInsights:")
            for f in by_category['insight'][:2]:
                summary_parts.append(f"  - {f.content[:200]}")

        summary_parts.append(f"\nBased on {len(sources)} sources.")

        executive_summary = '\n'.join(summary_parts)

        # Generate follow-up questions
        follow_up = self._generate_follow_ups(query, findings)

        return executive_summary, follow_up

    def _generate_follow_ups(self, query: str, findings: List[ResearchFinding]) -> List[str]:
        """Generate follow-up research questions."""
        questions = []

        # Extract topics mentioned in findings
        all_text = ' '.join(f.content for f in findings)
        words = set(all_text.lower().split()) - set(query.lower().split())

        # Generate basic follow-up patterns
        if 'how' not in query.lower():
            questions.append(f"How does {query} work in practice?")
        if 'why' not in query.lower():
            questions.append(f"Why is {query} important?")
        if 'future' not in query.lower():
            questions.append(f"What is the future of {query}?")
        if 'compare' not in query.lower():
            questions.append(f"How does {query} compare to alternatives?")

        return questions[:4]


class ResearchAgent:
    """
    Autonomous research agent that searches, analyzes, and synthesizes information.

    This agent orchestrates the entire research process:
    1. Search for relevant sources
    2. Extract content from sources
    3. Analyze content for findings
    4. Synthesize findings into a report
    5. Store findings in memory for future reference
    """

    def __init__(self,
                 search_provider: SearchProvider = None,
                 memory_path: str = None,
                 max_sources: int = 5):
        """
        Initialize the research agent.

        Args:
            search_provider: Search provider to use
            memory_path: Path for research memory
            max_sources: Maximum sources to analyze per query
        """
        self.search_provider = search_provider or self._get_default_provider()
        self.extractor = ContentExtractor()
        self.analyzer = ContentAnalyzer()
        self.synthesizer = ResearchSynthesizer()
        self.memory = ResearchMemory(memory_path)
        self.max_sources = max_sources

    def _get_default_provider(self) -> SearchProvider:
        """Get the default search provider."""
        if REQUESTS_AVAILABLE and BS4_AVAILABLE:
            return DuckDuckGoSearch()
        return MockSearchProvider()

    def research(self, query: str, use_memory: bool = True) -> ResearchReport:
        """
        Perform autonomous research on a query.

        Args:
            query: The research query
            use_memory: Whether to check memory for existing findings

        Returns:
            ResearchReport with findings
        """
        start_time = time.time()
        logger.info(f"Starting research: {query}")

        # Check memory for existing findings
        existing_findings = []
        if use_memory:
            memory_results = self.memory.search(query, limit=5)
            if memory_results:
                logger.info(f"Found {len(memory_results)} relevant memory entries")
                for entry in memory_results:
                    existing_findings.append(ResearchFinding(
                        topic=entry.topic,
                        content=entry.content[:300],
                        source_url=entry.sources[0] if entry.sources else "",
                        confidence=0.8,
                        category="memory"
                    ))

        # Search for new sources
        search_results = self.search_provider.search(query, num_results=10)
        logger.info(f"Found {len(search_results)} search results")

        # Analyze top sources
        all_findings = list(existing_findings)
        analyzed_count = 0

        for result in search_results[:self.max_sources]:
            logger.info(f"Analyzing: {result.url}")

            content = self.extractor.extract(result.url)
            if content:
                findings = self.analyzer.analyze(content, query)
                all_findings.extend(findings)
                analyzed_count += 1

            # Be polite to servers
            time.sleep(0.5)

        # Synthesize findings
        executive_summary, follow_ups = self.synthesizer.synthesize(
            query, all_findings, search_results
        )

        # Calculate confidence
        confidence = self._calculate_confidence(all_findings, analyzed_count)

        # Store in memory
        if all_findings:
            self.memory.store(
                topic=query,
                content=executive_summary,
                sources=[f.source_url for f in all_findings if f.source_url],
                importance=confidence,
                tags=["research", query.split()[0]]
            )

        # Build report
        report = ResearchReport(
            query=query,
            timestamp=datetime.now().isoformat(),
            executive_summary=executive_summary,
            key_findings=all_findings[:20],
            sources=search_results,
            analyzed_pages=analyzed_count,
            total_sources_found=len(search_results),
            confidence_score=confidence,
            follow_up_questions=follow_ups,
            metadata={
                "research_time_seconds": time.time() - start_time,
                "memory_hits": len(existing_findings),
                "findings_by_category": self._count_by_category(all_findings)
            }
        )

        # Store report
        self._store_report(report)

        return report

    def _calculate_confidence(self, findings: List[ResearchFinding], sources_analyzed: int) -> float:
        """Calculate overall confidence in research results."""
        if not findings:
            return 0.0

        # Average finding confidence
        avg_confidence = sum(f.confidence for f in findings) / len(findings)

        # Boost for more sources
        source_boost = min(0.2, sources_analyzed * 0.05)

        # Boost for finding variety
        categories = set(f.category for f in findings)
        variety_boost = min(0.1, len(categories) * 0.025)

        return min(1.0, avg_confidence + source_boost + variety_boost)

    def _count_by_category(self, findings: List[ResearchFinding]) -> Dict[str, int]:
        """Count findings by category."""
        counts = defaultdict(int)
        for finding in findings:
            counts[finding.category] += 1
        return dict(counts)

    def _store_report(self, report: ResearchReport):
        """Store the research report."""
        reports_path = self.memory.memory_path / "reports"
        reports_path.mkdir(exist_ok=True)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        query_slug = re.sub(r'[^\w\s-]', '', report.query)[:30].strip().replace(' ', '_')
        filename = f"report_{query_slug}_{timestamp}.json"

        with open(reports_path / filename, 'w', encoding='utf-8') as f:
            json.dump(report.to_dict(), f, indent=2, ensure_ascii=False)

        logger.info(f"Report stored: {filename}")

    def quick_search(self, query: str) -> List[SearchResult]:
        """Perform a quick search without full analysis."""
        return self.search_provider.search(query, num_results=10)

    def get_memory_stats(self) -> Dict[str, Any]:
        """Get research memory statistics."""
        return self.memory.get_statistics()


def main():
    """Main entry point for the research agent skill."""
    import argparse

    parser = argparse.ArgumentParser(
        description="Autonomous research agent"
    )
    parser.add_argument("query", nargs="?", help="Research query")
    parser.add_argument("--quick", "-q", action="store_true",
                       help="Quick search only (no analysis)")
    parser.add_argument("--no-memory", action="store_true",
                       help="Don't use research memory")
    parser.add_argument("--max-sources", "-m", type=int, default=5,
                       help="Maximum sources to analyze")
    parser.add_argument("--memory-stats", action="store_true",
                       help="Show memory statistics")

    args = parser.parse_args()

    agent = ResearchAgent(max_sources=args.max_sources)

    if args.memory_stats:
        stats = agent.get_memory_stats()
        print("\n" + "="*60)
        print("RESEARCH MEMORY STATISTICS")
        print("="*60)
        for key, value in stats.items():
            print(f"{key}: {value}")
        return

    if not args.query:
        print(__doc__)
        print("\nUsage: python research_agent.py \"your research query\"")
        print("\nExample: python research_agent.py \"latest AI developments 2024\"")
        sys.exit(1)

    if args.quick:
        print(f"\nQuick search: {args.query}")
        results = agent.quick_search(args.query)
        print(f"\nFound {len(results)} results:\n")
        for result in results:
            print(f"{result.rank}. {result.title}")
            print(f"   {result.url}")
            print(f"   {result.snippet[:100]}...")
            print()
        return

    # Full research
    print(f"\nResearching: {args.query}")
    print("This may take a moment...\n")

    report = agent.research(args.query, use_memory=not args.no_memory)

    # Output results
    print("="*60)
    print("RESEARCH REPORT")
    print("="*60)
    print(f"\nQuery: {report.query}")
    print(f"Timestamp: {report.timestamp}")
    print(f"Confidence: {report.confidence_score:.1%}")
    print(f"Sources Analyzed: {report.analyzed_pages}/{report.total_sources_found}")

    print(f"\n--- Executive Summary ---")
    print(report.executive_summary)

    print(f"\n--- Key Findings ({len(report.key_findings)}) ---")
    for i, finding in enumerate(report.key_findings[:10], 1):
        print(f"\n{i}. [{finding.category.upper()}] (confidence: {finding.confidence:.1%})")
        print(f"   {finding.content[:200]}...")
        print(f"   Source: {finding.source_url}")

    print(f"\n--- Sources Used ---")
    for source in report.sources[:5]:
        print(f"  - {source.title}")
        print(f"    {source.url}")

    print(f"\n--- Follow-up Questions ---")
    for question in report.follow_up_questions:
        print(f"  ? {question}")

    print(f"\n--- Metadata ---")
    print(f"Research time: {report.metadata.get('research_time_seconds', 0):.2f}s")
    print(f"Memory hits: {report.metadata.get('memory_hits', 0)}")
    print(f"Findings by category: {report.metadata.get('findings_by_category', {})}")

    return report


if __name__ == "__main__":
    main()