#!/usr/bin/env python3
"""
Extract Gemini chat 8e2a6293b063b7fe - Full conversation extraction
Login: kinan@agileadapt.com / Systema55
Target: https://gemini.google.com/app/8e2a6293b063b7fe
"""

import os
import time
import sys
import re
from pathlib import Path
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout

SCREENSHOT_DIR = "/mnt/e/genesis-system/hive/progress"
OUTPUT_DIR = "/mnt/e/genesis-system/Conversations"
CHAT_ID = "8e2a6293b063b7fe"


def screenshot(page, name):
    path = f"{SCREENSHOT_DIR}/gemini_8e2a_{name}.png"
    page.screenshot(path=path)
    print(f"  [screenshot] {path}")


def extract_gemini_chat():
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=[
                '--no-sandbox',
                '--disable-blink-features=AutomationControlled',
                '--disable-infobars',
                '--disable-extensions',
                '--disable-dev-shm-usage',
            ]
        )

        context = browser.new_context(
            viewport={"width": 1280, "height": 900},
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
        )

        page = context.new_page()

        print("[STEP 1] Navigating to Google login...")
        page.goto("https://accounts.google.com/signin/v2/identifier", wait_until="domcontentloaded")
        time.sleep(3)
        screenshot(page, "01_login_start")

        # Enter email
        print("[STEP 1a] Entering email...")
        try:
            email_input = page.locator('input[type="email"]')
            email_input.wait_for(timeout=10000)
            email_input.fill("kinan@agileadapt.com")
            screenshot(page, "02_email_filled")
            page.keyboard.press("Enter")
            time.sleep(3)
        except Exception as e:
            print(f"  Email input error: {e}")
            screenshot(page, "02_email_error")

        # Wait for password field
        print("[STEP 1b] Entering password...")
        try:
            page.wait_for_selector('input[type="password"]', timeout=15000)
            time.sleep(1)
            pwd_input = page.locator('input[type="password"]')
            pwd_input.fill("Systema55")
            screenshot(page, "03_password_filled")
            page.keyboard.press("Enter")
            time.sleep(4)
        except Exception as e:
            print(f"  Password input error: {e}")
            screenshot(page, "03_password_error")

        # Check for 2FA or captcha
        time.sleep(3)
        screenshot(page, "04_after_login_attempt")
        current_url = page.url
        print(f"  Current URL after login: {current_url}")

        # Check if we got 2FA
        if "challenge" in current_url or "signin/challenge" in current_url:
            print("  [!] 2FA/Challenge detected!")
            screenshot(page, "04_2fa_challenge")
            print("  [!] 2FA required - cannot bypass. Reporting failure.")
            browser.close()
            return None, "2FA_REQUIRED"

        print(f"[STEP 2] Navigating to Gemini chat {CHAT_ID}...")
        target_url = f"https://gemini.google.com/app/{CHAT_ID}"
        page.goto(target_url, wait_until="domcontentloaded")
        time.sleep(6)
        screenshot(page, "05_gemini_chat_load")

        current_url = page.url
        print(f"  Gemini URL: {current_url}")

        # Check if we got redirected to login
        if "accounts.google.com" in current_url or "signin" in current_url:
            print("  [!] Not authenticated - redirected to login")
            screenshot(page, "05_auth_redirect")
            browser.close()
            return None, "NOT_AUTHENTICATED"

        # Scroll to load all messages — full conversation
        print("[STEP 3] Scrolling to load all conversation...")

        # First scroll to bottom
        for i in range(30):
            page.keyboard.press("End")
            page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            time.sleep(0.4)

        screenshot(page, "06_scrolled_to_bottom")
        time.sleep(3)

        # Scroll back to top
        page.evaluate("window.scrollTo(0, 0)")
        time.sleep(1)

        # Slow scroll through entire page to trigger lazy loading
        print("[STEP 3a] Full slow scroll to trigger lazy content loading...")
        scroll_height = page.evaluate("document.body.scrollHeight")
        scroll_pos = 0
        while scroll_pos < scroll_height:
            page.evaluate(f"window.scrollTo(0, {scroll_pos})")
            time.sleep(0.25)
            scroll_pos += 600
            new_height = page.evaluate("document.body.scrollHeight")
            if new_height > scroll_height:
                print(f"  Page grew: {scroll_height} -> {new_height}")
                scroll_height = new_height

        # Final scroll to bottom to catch any stragglers
        page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(3)
        screenshot(page, "07_full_scroll_done")

        print("[STEP 4] Extracting conversation text...")

        # Try specific Gemini selectors first
        found_content = {}
        selectors_to_try = [
            'model-response',
            'user-query',
            'message-content',
            '.conversation-container',
            '[data-message-author-role]',
            '.response-container',
            '.message',
            'infinite-scroller',
            'conversation-turn',
        ]

        for sel in selectors_to_try:
            try:
                elements = page.locator(sel).all()
                if elements:
                    texts = []
                    for el in elements:
                        try:
                            t = el.inner_text()
                            if t and len(t) > 10:
                                texts.append(t)
                        except:
                            pass
                    if texts:
                        found_content[sel] = texts
                        print(f"  Selector '{sel}': {len(texts)} elements, total chars: {sum(len(t) for t in texts)}")
            except Exception as e:
                pass

        # Get full page body text
        body_text = ""
        try:
            body_text = page.inner_text('body')
            print(f"  Full body text: {len(body_text)} chars")
        except Exception as e:
            print(f"  Body text error: {e}")

        # Get page HTML
        html_content = ""
        try:
            html_content = page.content()
            print(f"  Full HTML: {len(html_content)} chars")
        except Exception as e:
            print(f"  HTML error: {e}")

        screenshot(page, "08_extraction_done")

        # Build structured output
        structured_text = f"# Gemini Chat Extraction: {CHAT_ID}\n"
        structured_text += f"# Extracted: 2026-02-20\n"
        structured_text += f"# URL: {target_url}\n\n"

        # Add selector-based content
        if found_content:
            structured_text += "## CONVERSATION TURNS (by selector)\n\n"
            for sel, texts in found_content.items():
                structured_text += f"### [{sel}]\n"
                for i, t in enumerate(texts):
                    structured_text += f"\n--- Turn {i+1} ---\n{t}\n"

        # Add full body
        if body_text:
            structured_text += f"\n\n## FULL PAGE TEXT\n\n{body_text}\n"

        # Save HTML for offline parsing
        html_path = f"{OUTPUT_DIR}/gemini_chat_{CHAT_ID}.html"
        with open(html_path, 'w', encoding='utf-8') as f:
            f.write(html_content)
        print(f"  Saved HTML: {html_path}")

        browser.close()
        return structured_text, "SUCCESS"


def parse_conversation_from_body(body_text):
    """
    Parse the raw body text into structured USER/GEMINI turns.
    Returns a cleaned conversation string.
    """
    if not body_text:
        return ""

    lines = body_text.split('\n')
    conversation = []
    current_role = None
    current_block = []
    turn_num = 0

    for line in lines:
        stripped = line.strip()
        if not stripped:
            if current_block:
                current_block.append('')
            continue

        # Heuristics to detect turn boundaries
        # Gemini responses often start with specific patterns
        # User queries tend to be shorter and followed by model responses

        current_block.append(stripped)

    # Return the full body text as-is, structured output comes from selector extraction
    return body_text


def main():
    print("=" * 60)
    print(f"GEMINI CHAT EXTRACTION: {CHAT_ID}")
    print("=" * 60)

    # Ensure output dirs exist
    Path(SCREENSHOT_DIR).mkdir(parents=True, exist_ok=True)
    Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

    full_path = f"{OUTPUT_DIR}/gemini_chat_{CHAT_ID}_RAW.md"

    # Extract
    content, status = extract_gemini_chat()

    if status == "2FA_REQUIRED":
        print("\n[FAILURE] 2FA required - cannot proceed without physical device")
        print("Screenshot saved to hive/progress/")
        sys.exit(1)

    if status == "NOT_AUTHENTICATED":
        print("\n[FAILURE] Login failed - not authenticated")
        sys.exit(1)

    if not content:
        print("\n[FAILURE] No content extracted")
        sys.exit(1)

    print(f"\n[STEP 5] Saving extraction ({len(content)} chars)...")

    with open(full_path, 'w', encoding='utf-8') as f:
        f.write(content)
    print(f"  Saved: {full_path}")

    word_count = len(content.split())
    turn_count = content.count('--- Turn ')
    print(f"\n[SUCCESS] Extracted {word_count} words ({len(content)} chars), ~{turn_count} selector turns")
    print(f"Output: {full_path}")
    return content


if __name__ == "__main__":
    main()
