#!/usr/bin/env python3
"""
Extract Gemini chat 7c9994350df7b8c4 - Full conversation extraction
Login: kinan@agileadapt.com / Systema55
Target: https://gemini.google.com/app/7c9994350df7b8c4
"""

import os
import time
import sys
from pathlib import Path
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout

SCREENSHOT_DIR = "/mnt/e/genesis-system/hive/progress"
OUTPUT_DIR = "/mnt/e/genesis-system/Conversations"
CHAT_ID = "7c9994350df7b8c4"

def screenshot(page, name):
    path = f"{SCREENSHOT_DIR}/gemini_{name}.png"
    page.screenshot(path=path)
    print(f"  [screenshot] {path}")

def extract_gemini_chat():
    with sync_playwright() as p:
        # Launch with non-headless for better Google auth compatibility
        browser = p.chromium.launch(
            headless=False,
            args=[
                '--no-sandbox',
                '--disable-blink-features=AutomationControlled',
                '--disable-infobars',
                '--disable-extensions',
            ]
        )

        context = browser.new_context(
            viewport={"width": 1280, "height": 900},
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
        )

        page = context.new_page()

        print("[STEP 1] Navigating to Google login...")
        page.goto("https://accounts.google.com/signin/v2/identifier", wait_until="domcontentloaded")
        time.sleep(2)
        screenshot(page, "01_login_start")

        # Enter email
        print("[STEP 1a] Entering email...")
        try:
            email_input = page.locator('input[type="email"]')
            email_input.wait_for(timeout=10000)
            email_input.fill("kinan@agileadapt.com")
            screenshot(page, "02_email_filled")
            page.keyboard.press("Enter")
            time.sleep(2)
        except Exception as e:
            print(f"  Email input error: {e}")
            screenshot(page, "02_email_error")

        # Wait for password field
        print("[STEP 1b] Entering password...")
        try:
            page.wait_for_selector('input[type="password"]', timeout=15000)
            time.sleep(1)
            pwd_input = page.locator('input[type="password"]')
            pwd_input.fill("Systema55")
            screenshot(page, "03_password_filled")
            page.keyboard.press("Enter")
            time.sleep(3)
        except Exception as e:
            print(f"  Password input error: {e}")
            screenshot(page, "03_password_error")

        # Check for 2FA or captcha
        time.sleep(3)
        screenshot(page, "04_after_login_attempt")
        current_url = page.url
        print(f"  Current URL after login: {current_url}")

        # Check if we got 2FA
        if "challenge" in current_url or "signin/challenge" in current_url:
            print("  [!] 2FA/Challenge detected! Taking screenshot...")
            screenshot(page, "04_2fa_challenge")

            # Try to handle "Send verification code" or skip 2FA
            try:
                # Try clicking "Try another way"
                skip_btn = page.locator('text=Try another way')
                if skip_btn.is_visible():
                    skip_btn.click()
                    time.sleep(2)
                    screenshot(page, "04_2fa_skip_attempt")
            except:
                pass

            # Check again
            current_url = page.url
            if "challenge" in current_url:
                print("  [!] 2FA required - cannot bypass. Reporting failure.")
                browser.close()
                return None, "2FA_REQUIRED"

        # Check if we're logged in by looking for Google account indicator
        if "myaccount.google.com" in current_url or "accounts.google.com" in current_url:
            # Might still be on accounts page - navigate to Gemini
            print("  Navigating to Gemini directly...")

        print(f"[STEP 2] Navigating to Gemini chat {CHAT_ID}...")
        target_url = f"https://gemini.google.com/app/{CHAT_ID}"
        page.goto(target_url, wait_until="domcontentloaded")
        time.sleep(5)
        screenshot(page, "05_gemini_chat_load")

        current_url = page.url
        print(f"  Gemini URL: {current_url}")

        # Check if we got redirected to login
        if "accounts.google.com" in current_url or "signin" in current_url:
            print("  [!] Not authenticated - redirected to login")
            screenshot(page, "05_auth_redirect")
            browser.close()
            return None, "NOT_AUTHENTICATED"

        # Scroll to bottom to load all messages
        print("[STEP 3] Scrolling to load all conversation...")
        for i in range(20):
            page.keyboard.press("End")
            page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            time.sleep(0.5)

        screenshot(page, "06_scrolled_to_bottom")

        # Wait for lazy-loaded content
        time.sleep(3)

        # Scroll back to top
        page.evaluate("window.scrollTo(0, 0)")
        time.sleep(1)

        # Now scroll through the whole conversation to trigger loading
        print("[STEP 3a] Full scroll to load all lazy content...")
        scroll_height = page.evaluate("document.body.scrollHeight")
        scroll_pos = 0
        while scroll_pos < scroll_height:
            page.evaluate(f"window.scrollTo(0, {scroll_pos})")
            time.sleep(0.3)
            scroll_pos += 800
            scroll_height = page.evaluate("document.body.scrollHeight")

        # Final scroll to bottom
        page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(2)
        screenshot(page, "07_full_scroll_done")

        print("[STEP 4] Extracting conversation text...")

        # Method 1: Try specific Gemini selectors
        full_text = ""

        # Try to get conversation turns
        selectors_to_try = [
            'model-response',
            'user-query',
            'message-content',
            '.conversation-container',
            '[data-message-author-role]',
            '.response-container',
            '.message',
            'infinite-scroller',
        ]

        found_content = {}
        for sel in selectors_to_try:
            try:
                elements = page.locator(sel).all()
                if elements:
                    texts = []
                    for el in elements:
                        try:
                            t = el.inner_text()
                            if t and len(t) > 10:
                                texts.append(t)
                        except:
                            pass
                    if texts:
                        found_content[sel] = texts
                        print(f"  Selector '{sel}': {len(texts)} elements")
            except Exception as e:
                pass

        # Method 2: Get full page body text
        try:
            body_text = page.inner_text('body')
            full_text = body_text
            print(f"  Full body text: {len(body_text)} chars")
        except Exception as e:
            print(f"  Body text error: {e}")

        # Method 3: Get page HTML and parse
        try:
            html_content = page.content()
            print(f"  Full HTML: {len(html_content)} chars")
        except Exception as e:
            print(f"  HTML error: {e}")
            html_content = ""

        screenshot(page, "08_extraction_done")

        # Build structured output
        structured_text = f"# Gemini Chat Extraction: {CHAT_ID}\n"
        structured_text += f"# Extracted: 2026-02-20\n"
        structured_text += f"# URL: {target_url}\n\n"

        # Add selector-based content
        if found_content:
            structured_text += "## CONVERSATION TURNS (by selector)\n\n"
            for sel, texts in found_content.items():
                structured_text += f"### [{sel}]\n"
                for i, t in enumerate(texts):
                    structured_text += f"\n--- Turn {i+1} ---\n{t}\n"

        # Add full body
        if full_text:
            structured_text += f"\n\n## FULL PAGE TEXT\n\n{full_text}\n"

        browser.close()
        return structured_text, "SUCCESS"


def main():
    print("=" * 60)
    print("GEMINI CHAT EXTRACTION: 7c9994350df7b8c4")
    print("=" * 60)

    # Ensure output dirs exist
    Path(SCREENSHOT_DIR).mkdir(parents=True, exist_ok=True)
    Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

    # Check if existing file exists
    full_path = f"{OUTPUT_DIR}/gemini_chat_{CHAT_ID}_FULL.md"
    new_path = f"{OUTPUT_DIR}/gemini_chat_{CHAT_ID}_NEW.md"

    existing_content = ""
    if Path(full_path).exists():
        with open(full_path, 'r', encoding='utf-8') as f:
            existing_content = f.read()
        print(f"Found existing file: {len(existing_content)} chars")
    else:
        print("No existing file found - this is a fresh extraction")

    # Extract
    content, status = extract_gemini_chat()

    if status == "2FA_REQUIRED":
        print("\n[FAILURE] 2FA required - cannot proceed without physical device")
        print("Screenshot saved to hive/progress/gemini_04_2fa_challenge.png")
        sys.exit(1)

    if status == "NOT_AUTHENTICATED":
        print("\n[FAILURE] Login failed - not authenticated")
        sys.exit(1)

    if not content:
        print("\n[FAILURE] No content extracted")
        sys.exit(1)

    print(f"\n[STEP 5] Saving extraction ({len(content)} chars)...")

    # Save full content
    with open(full_path, 'w', encoding='utf-8') as f:
        f.write(content)
    print(f"  Saved: {full_path}")

    # Save new content (diff from existing)
    if existing_content:
        # Find what's new
        # Simple approach: if content is longer, save the extra
        if len(content) > len(existing_content):
            new_content = f"# NEW CONTENT (extracted 2026-02-20)\n# Previous size: {len(existing_content)} chars\n# New size: {len(content)} chars\n\n"
            new_content += content[len(existing_content)//2:]  # Rough overlap handling
            with open(new_path, 'w', encoding='utf-8') as f:
                f.write(new_content)
            print(f"  Saved new content: {new_path}")
        else:
            print("  Content not longer than existing - saving full as new anyway")
            with open(new_path, 'w', encoding='utf-8') as f:
                f.write(content)
    else:
        with open(new_path, 'w', encoding='utf-8') as f:
            f.write(content)

    word_count = len(content.split())
    print(f"\n[SUCCESS] Extracted {word_count} words ({len(content)} chars)")
    return content


if __name__ == "__main__":
    main()
