#!/usr/bin/env python3
"""
Tradie Website Audit Pipeline (V2)
Iterates through tradie leads, visits website, runs audit (Vision + PageSpeed), generates report.

Usage:
    python3 scripts/tradie_audit_pipeline.py --limit 10 --offset 0
"""

import os
import sys
import csv
import json
import time
import requests
import re
import argparse
from pathlib import Path
from datetime import datetime

# Add scripts dir to path to import browser_agent
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from browser_agent import BrowserAgent

# Configuration
LEADS_FILE = "/mnt/e/genesis-system/data/LEADS/Manus Email enriched Tradie Leads/ZOHO_SIMPLE_IMPORT_ENRICHED.csv"
OUTPUT_DIR = Path("/mnt/e/genesis-system/data/AUDITS")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
PAGESPEED_API_KEY = os.environ.get("GOOGLE_API_KEY")

def clean_filename(s):
    """Clean string for filename."""
    s = str(s).strip().replace("http://", "").replace("https://", "").replace("www.", "")
    return re.sub(r'[^\w\-_.]', '_', s)

def get_pagespeed_score(url, strategy="mobile"):
    """Get PageSpeed Insights score."""
    if not PAGESPEED_API_KEY:
        return None
    
    api_url = f"https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url={url}&strategy={strategy}&category=PERFORMANCE&category=SEO&key={PAGESPEED_API_KEY}"
    try:
        response = requests.get(api_url, timeout=30)
        if response.status_code == 200:
            data = response.json()
            lighthouse = data.get("lighthouseResult", {})
            categories = lighthouse.get("categories", {})
            return {
                "performance": categories.get("performance", {}).get("score"),
                "seo": categories.get("seo", {}).get("score"),
                "loading_experience": data.get("loadingExperience", {}).get("overall_category")
            }
        elif response.status_code == 403:
             # Just return None silently or log warning, don't crash
             return None
        else:
            return None
    except Exception:
        return None

def generate_audit_prompt(business_name, industry):
    return f"""
    You are an expert Digital Marketing Auditor specializing in Australian Trade Businesses.
    Analyze this website screenshot for '{business_name}' ({industry}).
    
    Provide a professional, constructive audit report in Markdown format.
    
    Focus on:
    1. **First Impressions (3-second test)**: Does it look trustworthy? Is the service clear?
    2. **Conversion Elements**: Phone number visibility, 'Get Quote' buttons, contact forms.
    3. **Trust Signals**: Reviews, licenses, insurance, 'About Us', real photos vs stock.
    4. **Mobile Experience**: (Infer from layout) Is text readable? Are buttons clickable?
    5. **SEO Basics**: Headlines, content relevance, local keywords.
    
    **Output Format:**
    
    # Website Audit for {business_name}
    
    ## 🚦 Executive Summary
    [1-2 sentences on overall health]
    
    ## ✅ What You're Doing Well
    *   [Point 1]
    *   [Point 2]
    
    ## ⚠️ High-Priority Fixes (The "Money Leaks")
    *   [Critical Issue 1]
    *   [Critical Issue 2]
    *   [Critical Issue 3]
    
    ## 💡 Growth Opportunities
    [Strategic advice]
    
    ## 📱 Mobile & Trust Score
    *   **Mobile-Friendly:** [Yes/No/Needs Improvement]
    *   **Trust Factor:** [High/Medium/Low]
    
    Tone: Professional, helpful, direct (no fluff). Speak to a busy tradie.
    """

def generate_html_report(lead, audit_md, pagespeed_data, screenshot_path):
    """Generate HTML report."""
    
    ps_html = ""
    if pagespeed_data:
        perf = int(pagespeed_data.get('performance', 0) * 100) if pagespeed_data.get('performance') else "N/A"
        seo = int(pagespeed_data.get('seo', 0) * 100) if pagespeed_data.get('seo') else "N/A"
        
        def get_color(score):
            if score == "N/A": return "gray"
            if score >= 90: return "green"
            if score >= 50: return "orange"
            return "red"
            
        ps_html = f"""
        <div class="metrics-grid">
            <div class="metric-card">
                <h3>Performance</h3>
                <div class="score {get_color(perf)}">{perf}</div>
            </div>
            <div class="metric-card">
                <h3>SEO</h3>
                <div class="score {get_color(seo)}">{seo}</div>
            </div>
        </div>
        """
    else:
        ps_html = """
        <div class="metrics-grid">
            <div class="metric-card">
                <h3>Performance</h3>
                <div class="score gray">N/A</div>
                <small>Requires PageSpeed API Key</small>
            </div>
            <div class="metric-card">
                <h3>SEO</h3>
                <div class="score gray">N/A</div>
            </div>
        </div>
        """
    
    # Simple Markdown to HTML conversion
    html_content = audit_md
    # Headers
    html_content = re.sub(r'^# (.*)', r'<h1>\1</h1>', html_content, flags=re.MULTILINE)
    html_content = re.sub(r'^## (.*)', r'<h2>\1</h2>', html_content, flags=re.MULTILINE)
    html_content = re.sub(r'^### (.*)', r'<h3>\1</h3>', html_content, flags=re.MULTILINE)
    # Lists
    html_content = re.sub(r'^\* (.*)', r'<li>\1</li>', html_content, flags=re.MULTILINE)
    # Bold
    html_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html_content)
    
    # Wrap lists in <ul> (heuristic: sequences of <li>)
    # This is a hacky way to do it without a parser, but sufficient for simple reports
    
    template = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Sunaiva Audit: {lead['Company']}</title>
        <style>
            :root {{
                --primary: #2c3e50;
                --accent: #e67e22;
                --text: #333;
                --bg: #f5f7fa;
                --card-bg: #ffffff;
            }}
            body {{
                font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
                line-height: 1.6;
                color: var(--text);
                background-color: var(--bg);
                margin: 0;
                padding: 0;
            }}
            .container {{
                max-width: 900px;
                margin: 40px auto;
                background: var(--card-bg);
                padding: 40px;
                border-radius: 12px;
                box-shadow: 0 10px 30px rgba(0,0,0,0.05);
            }}
            .header {{
                text-align: center;
                border-bottom: 2px solid #eee;
                padding-bottom: 30px;
                margin-bottom: 40px;
            }}
            .header h1 {{
                margin: 0;
                color: var(--primary);
                font-size: 2.5em;
            }}
            .header p {{
                color: #7f8c8d;
                font-size: 1.1em;
                margin-top: 10px;
            }}
            .metrics-grid {{
                display: flex;
                gap: 20px;
                margin-bottom: 40px;
            }}
            .metric-card {{
                flex: 1;
                background: #f8f9fa;
                padding: 20px;
                border-radius: 8px;
                text-align: center;
                border: 1px solid #e9ecef;
            }}
            .score {{
                font-size: 3em;
                font-weight: 700;
                margin: 10px 0;
            }}
            .green {{ color: #27ae60; }}
            .orange {{ color: #f39c12; }}
            .red {{ color: #c0392b; }}
            .gray {{ color: #95a5a6; }}
            
            h2 {{ color: var(--primary); margin-top: 40px; border-left: 4px solid var(--accent); padding-left: 15px; }}
            h3 {{ color: #34495e; margin-top: 25px; }}
            
            .screenshot-container {{
                text-align: center;
                margin: 40px 0;
                background: #2c3e50;
                padding: 10px;
                border-radius: 8px;
            }}
            .screenshot {{
                max-width: 100%;
                border-radius: 4px;
                display: block;
            }}
            
            .footer {{
                text-align: center;
                margin-top: 60px;
                padding-top: 20px;
                border-top: 1px solid #eee;
                color: #95a5a6;
                font-size: 0.9em;
            }}
            
            /* Markdown styles */
            li {{ margin-bottom: 8px; }}
            strong {{ color: var(--primary); }}
        </style>
    </head>
    <body>
        <div class="container">
            <div class="header">
                <h1>Sunaiva Digital Audit</h1>
                <p>Prepared for <strong>{lead['Company']}</strong> on {datetime.now().strftime('%d %B %Y')}</p>
            </div>
            
            {ps_html}
            
            <div class="screenshot-container">
                <img src="screenshot.png" class="screenshot" alt="Website Screenshot">
            </div>
            
            <div class="content">
                {html_content}
            </div>
            
            <div class="footer">
                <p>Generated by Sunaiva Digital Audit System</p>
                <p>www.sunaivadigital.com</p>
            </div>
        </div>
    </body>
    </html>
    """
    return template

def main():
    parser = argparse.ArgumentParser(description="Tradie Website Audit Pipeline")
    parser.add_argument("--limit", type=int, default=5, help="Max audits to run")
    parser.add_argument("--offset", type=int, default=0, help="Skip first N leads")
    args = parser.parse_args()
    
    print(f"Starting Tradie Audit Pipeline (Limit: {args.limit}, Offset: {args.offset})")
    
    if not os.path.exists(LEADS_FILE):
        print(f"Error: Leads file not found at {LEADS_FILE}")
        return

    agent = BrowserAgent(headless=True)
    agent.start()
    
    count = 0
    skipped = 0
    
    try:
        with open(LEADS_FILE, 'r', encoding='utf-8-sig') as f:
            reader = csv.DictReader(f)
            rows = list(reader) # Read all to handle offset easily
            
            total_rows = len(rows)
            print(f"Found {total_rows} leads in CSV.")
            
            for i, row in enumerate(rows):
                if i < args.offset:
                    continue
                
                if count >= args.limit and args.limit > 0:
                    break
                
                website = row.get('Website')
                email = row.get('Email')
                
                if not website or not email:
                    continue
                    
                if not website.startswith('http'):
                    url = f"https://{website}"
                else:
                    url = website
                    
                company_slug = clean_filename(row['Company'])
                company_dir = OUTPUT_DIR / company_slug
                if company_dir.exists() and (company_dir / "report.html").exists():
                    # print(f"Skipping {row['Company']} (already audited)")
                    skipped += 1
                    continue
                
                company_dir.mkdir(exist_ok=True)
                
                print(f"\\n[{count+1}/{args.limit}] Processing: {row['Company']} ({url})")
                
                # PageSpeed
                ps_data = get_pagespeed_score(url)
                
                # Browser
                success = agent.navigate(url)
                if not success:
                    if "www." not in url:
                        url = url.replace("https://", "https://www.")
                        print(f"  Retrying with {url}...")
                        success = agent.navigate(url)
                
                if success:
                    screenshot_filename = "screenshot.png"
                    screenshot_path = agent.screenshot(str(company_dir / screenshot_filename))
                    
                    prompt = generate_audit_prompt(row['Company'], row['Industry'])
                    analysis = agent.analyze_page(prompt, screenshot_path)
                    
                    report_html = generate_html_report(row, analysis, ps_data, screenshot_path)
                    (company_dir / "report.html").write_text(report_html)
                    (company_dir / "audit.md").write_text(analysis)
                    (company_dir / "lead_data.json").write_text(json.dumps(row, indent=2))
                    
                    print(f"  ✅ Complete: {company_dir}/report.html")
                    count += 1
                else:
                    print(f"  ❌ Failed: {url}")
                    
    except KeyboardInterrupt:
        print("Stopped by user.")
    except Exception as e:
        print(f"Pipeline Error: {e}")
    finally:
        agent.stop()
        print(f"\\nPipeline finished. Processed {count} audits (Skipped {skipped} existing).")

if __name__ == "__main__":
    main()