import os
import sys
import re
from pathlib import Path
from typing import List, Dict
from bs4 import BeautifulSoup
import glob

# Add core to path
sys.path.append(str(Path(__file__).parent))
try:
    from genesis_memory_cortex import MemoryCortex, MemoryTier
    from memory_schemas import MemoryItemInput
except ImportError:
    print("FATAL: Cortex not found.")
    sys.exit(1)

CORTEX = MemoryCortex(enable_vectors=True)

def ingest_patent_visuals(html_path: str):
    """Ingests the Patent Visualization Boards HTML."""
    print(f"📄 Processing Patents: {html_path}")
    
    with open(html_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f.read(), "html.parser")
    
    # 1. Patent Analysis
    # The HTML structure is .patent-node for individual patents
    patent_nodes = soup.find_all(class_="patent-node")
    
    print(f"  Found {len(patent_nodes)} patent references (visual nodes).")
    
    # Extract unique patents
    patents = {}
    for node in patent_nodes:
        text = node.get_text(separator=" ", strip=True) # "Patent #1 Cryptographic Validation"
        if "Patent #" in text:
            parts = text.split(" ", 2) # ["Patent", "#1", "Cryptographic Validation"]
            if len(parts) >= 3:
                p_id = parts[1] # "#1"
                p_name = parts[2]
                if p_id not in patents:
                    patents[p_id] = p_name

    print(f"  Unique Patents Identified: {patents}")

    # Store each Patent as a Knowledge Item
    for p_id, p_name in patents.items():
        content = f"GENESIS PATENT {p_id}: {p_name}"
        print(f"  🧠 Remembering: {content}")
        try:
            CORTEX.remember(
                content=content,
                source="patent_ingest",
                domain="intellectual_property",
                force_tier=MemoryTier.SEMANTIC,
                metadata={"type": "patent", "patent_id": p_id}
            )
        except Exception as e:
            print(f"  ❌ Failed to store {p_id}: {e}")

    # 2. Extract Board Summaries (The "Lens")
    boards = soup.find_all(class_="board")
    for board in boards:
        title = board.find("h2").text.strip()
        desc = board.find("p").text.strip()
        full_text = board.get_text(separator="\n", strip=True)
        
        content = f"PATENT STRATEGY - {title}: {desc}\n\n{full_text[:5000]}" # Cap size
        print(f"  🧠 Remembering Board: {title}")
        CORTEX.remember(
            content=content,
            source="patent_ingest",
            domain="strategy",
            force_tier=MemoryTier.SEMANTIC,
            metadata={"type": "strategy_board"}
        )

def ingest_markdown_docs(docs_dir: str):
    """Ingests all Markdown files in docs/research and docs/plans."""
    files = glob.glob(f"{docs_dir}/**/*.md", recursive=True)
    print(f"📄 Processing {len(files)} Markdown Docs...")
    
    for file_path in files:
        if "node_modules" in file_path or "venv" in file_path:
            continue
            
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                content = f.read()
            
            filename = Path(file_path).name
            print(f"  🧠 Remembering Doc: {filename}")
            
            # Chunking (Naive) - Split by headers if too large, or just take first 20k chars
            # For Genesis, most docs are under 20k chars.
            if len(content) > 50000:
                content = content[:50000] + "...(truncated)"
                
            CORTEX.remember(
                content=f"DOCUMENT: {filename}\n{content}",
                source="doc_ingest",
                domain="knowledge_base",
                force_tier=MemoryTier.SEMANTIC,
                metadata={"filename": filename, "path": file_path}
            )
        except Exception as e:
            print(f"  ❌ Failed to process {file_path}: {e}")

if __name__ == "__main__":
    # 1. Ingest Patents
    viz_path = "E:/genesis-system/docs/GENESIS PATENTS/VISUALS MAPS/patent_visualization_boards.html"
    if os.path.exists(viz_path):
        ingest_patent_visuals(viz_path)
    else:
        print("⚠️ Patent Visualization Board not found.")

    # 2. Ingest Research & Plans
    ingest_markdown_docs("E:/genesis-system/docs/research")
    ingest_markdown_docs("E:/genesis-system/docs/plans")