
"""
HyDE (Hypothetical Document Embeddings) RAG Implementation.
Generates a hypothetical answer first, then uses its embedding for retrieval.
ADAPTED: Uses Anthropic.
"""

from typing import List, Dict
from config import config
from naive_rag import NaiveRAG

class HydeRAG(NaiveRAG):
    def generate_hypothetical_document(self, query: str) -> str:
        """Generate a hypothetical answer to the query."""
        prompt = f"""
        Please write a short passage that answers the following question. 
        It doesn't have to be factually accurate, but it should be relevant to the topic and keywords.
        
        Question: {query}
        """
        
        response = self.anthropic.messages.create(
            model=config.llm.model,
            max_tokens=500,
            messages=[
                {"role": "user", "content": prompt}
            ],
            system="You are a helpful assistant."
        )
        return response.content[0].text

    def retrieve(self, query: str, limit: int = 5) -> List[Dict]:
        """Retrieve using HyDE strategy."""
        # 1. Generate hypothetical document
        hypothetical_doc = self.generate_hypothetical_document(query)
        
        # 2. Embed the hypothetical document (instead of the raw query)
        vector = self.embed_query(hypothetical_doc)
        
        # 3. Retrieve real documents using that vector
        search_result = self.qdrant.query_points(
            collection_name=self.collection,
            query=vector,
            limit=limit,
            with_payload=True
        ).points
        
        results = []
        for hit in search_result:
            results.append({
                "score": hit.score,
                "content": hit.payload.get("text") or hit.payload.get("content", str(hit.payload)),
                "metadata": hit.payload,
                "strategy": "hyde"
            })
        return results

    def query(self, query: str) -> Dict:
        """End-to-end HyDE pipeline."""
        context = self.retrieve(query)
        answer = self.generate(query, context)
        
        return {
            "query": query,
            "answer": answer,
            "context": context,
            "method": "HyDE"
        }

if __name__ == "__main__":
    rag = HydeRAG()
