"""
Genesis Gold Pipeline — Prospect Audit Pipeline
=================================================
The revenue weapon: scrape prospect's website → score it → generate a
superior version with their logo → deploy as live demo → generate spore URL.

Flow:
  1. Scrape prospect URL via Jina API
  2. Extract business name, services, phone, location
  3. Score their current site (mobile, speed, SEO, conversion)
  4. Generate a superior landing page via Gemini
  5. Inject voice widget (their business context)
  6. Deploy to Netlify as a live demo
  7. Generate spore URL for outreach
  8. Create PDF audit report

Usage:
    python prospect_audit.py --url https://example-plumber.com.au
    python prospect_audit.py --url https://example-plumber.com.au --deploy
"""

import argparse
import base64
import json
import logging
import re
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

import httpx

sys.path.insert(0, str(Path(__file__).parent))
sys.path.insert(0, "/mnt/e/genesis-system/scripts/clone_pipeline")

logger = logging.getLogger("gold_pipeline.prospect_audit")

OUTPUT_DIR = Path("/mnt/e/genesis-system/scripts/gold_pipeline/audits")


# ---------------------------------------------------------------------------
# Step 1: Scrape prospect URL
# ---------------------------------------------------------------------------

def scrape_prospect(url: str) -> dict:
    """
    Scrape a prospect's website via Jina API.
    Returns: {content, business_name, phone, services, location, raw_length}
    """
    logger.info(f"Scraping: {url}")
    result = {
        "url": url,
        "content": "",
        "business_name": "",
        "phone": "",
        "services": [],
        "location": "",
        "raw_length": 0,
    }

    try:
        import httpx
        with httpx.Client(timeout=30.0) as client:
            # Jina Reader API with headers for better extraction
            resp = client.get(
                f"https://r.jina.ai/{url}",
                headers={
                    "Accept": "text/plain",
                    "X-With-Generated-Alt": "true",
                    "X-No-Cache": "true",
                },
            )

            # If Jina fails (400/422), try direct HTTP scrape as fallback
            if resp.status_code >= 400:
                logger.warning(f"Jina returned {resp.status_code}, trying direct scrape...")
                direct = client.get(
                    url,
                    headers={
                        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
                        "Accept": "text/html,application/xhtml+xml",
                    },
                    follow_redirects=True,
                )
                raw = direct.text
            else:
                raw = resp.text
            result["raw_length"] = len(raw)
            result["content"] = raw[:15000]

            # Extract business name from title
            m = re.search(r"^(?:Title:|#\s+)(.+)", raw, re.MULTILINE)
            if m:
                result["business_name"] = m.group(1).strip()[:80]

            # Extract phone numbers (AU format)
            phones = re.findall(
                r"(?:(?:\+61|0)\s*[2-9]\s*(?:\d\s*){7,8}|\b1[38]00\s*\d{3}\s*\d{3}\b|\b(?:0[2-9])\s*\d{4}\s*\d{4}\b)",
                raw
            )
            if phones:
                result["phone"] = phones[0].strip()

            # Detect location from common AU city names
            for city in ["Sydney", "Melbourne", "Brisbane", "Perth", "Adelaide",
                         "Gold Coast", "Cairns", "Darwin", "Townsville", "Sunshine Coast",
                         "Newcastle", "Wollongong", "Canberra", "Hobart"]:
                if city.lower() in raw.lower():
                    result["location"] = city
                    break

            # Try to extract services (look for common patterns)
            service_matches = re.findall(
                r"(?:services?|we\s+(?:offer|provide|specialise)|our\s+services)[:\s]+(.+?)(?:\n|$)",
                raw, re.IGNORECASE
            )
            if service_matches:
                services_text = service_matches[0]
                result["services"] = [
                    s.strip().strip("•-*·")
                    for s in re.split(r"[,\n•\-\*·|]", services_text)
                    if s.strip() and len(s.strip()) > 2
                ][:8]

    except Exception as e:
        logger.error(f"Scrape failed: {e}")
        # Fallback: extract from URL
        try:
            from urllib.parse import urlparse
            host = urlparse(url).hostname or ""
            result["business_name"] = host.replace("www.", "").split(".")[0].replace("-", " ").title()
        except Exception:
            result["business_name"] = "Business"

    logger.info(f"Scraped: {result['business_name']} ({result['raw_length']} chars, phone={result['phone']}, location={result['location']})")
    return result


# ---------------------------------------------------------------------------
# Step 2: Score the prospect's current site
# ---------------------------------------------------------------------------

def score_site(url: str, content: str) -> dict:
    """
    Score a prospect's website on key metrics.
    Returns scores 0-100 for each dimension.
    """
    scores = {
        "mobile_friendly": 0,
        "has_phone_prominent": 0,
        "has_cta": 0,
        "has_ssl": 0,
        "has_voice_agent": 0,
        "has_chat_widget": 0,
        "has_reviews": 0,
        "has_schema": 0,
        "has_meta_description": 0,
        "content_length": 0,
        "overall": 0,
    }

    content_lower = content.lower()

    # SSL
    scores["has_ssl"] = 100 if url.startswith("https") else 0

    # Phone prominent
    phone_patterns = re.findall(r"(?:\+61|0)\s*[2-9]", content)
    scores["has_phone_prominent"] = min(100, len(phone_patterns) * 25)

    # CTA buttons/links
    cta_words = ["call now", "get a quote", "book now", "contact us", "free quote",
                 "request a quote", "get started", "book online", "enquire now"]
    cta_count = sum(1 for w in cta_words if w in content_lower)
    scores["has_cta"] = min(100, cta_count * 20)

    # Voice/chat
    scores["has_voice_agent"] = 100 if any(w in content_lower for w in ["voice agent", "ai receptionist", "telnyx", "voice widget"]) else 0
    scores["has_chat_widget"] = 100 if any(w in content_lower for w in ["chat widget", "live chat", "chatbot", "intercom", "drift"]) else 0

    # Reviews/testimonials
    scores["has_reviews"] = 100 if any(w in content_lower for w in ["review", "testimonial", "star", "rating", "google review"]) else 0

    # Schema markup
    scores["has_schema"] = 100 if "schema.org" in content_lower or "json-ld" in content_lower else 0

    # Meta description
    scores["has_meta_description"] = 100 if "meta" in content_lower and "description" in content_lower else 25

    # Content length
    scores["content_length"] = min(100, int(len(content) / 50))

    # Mobile friendly (heuristic: viewport meta tag)
    scores["mobile_friendly"] = 100 if "viewport" in content_lower else 30

    # Overall score (weighted)
    weights = {
        "mobile_friendly": 15,
        "has_phone_prominent": 15,
        "has_cta": 15,
        "has_ssl": 10,
        "has_voice_agent": 10,
        "has_chat_widget": 5,
        "has_reviews": 10,
        "has_schema": 5,
        "has_meta_description": 5,
        "content_length": 10,
    }
    total_weight = sum(weights.values())
    scores["overall"] = round(sum(scores[k] * w for k, w in weights.items()) / total_weight)

    return scores


# ---------------------------------------------------------------------------
# Step 3: Generate superior version
# ---------------------------------------------------------------------------

def generate_superior_site(
    scrape_data: dict,
    model: str = "gemini-2.5-flash",
) -> Optional[str]:
    """Generate a superior version of the prospect's website."""
    from generate_site import _call_gemini, _clean_html

    biz = scrape_data["business_name"]
    phone = scrape_data["phone"]
    location = scrape_data["location"] or "Australia"
    services = ", ".join(scrape_data["services"]) if scrape_data["services"] else "See content"

    prompt = f"""You are rebuilding a better version of "{biz}"'s website.
The current website is at: {scrape_data['url']}

Here is their ACTUAL content (scraped from their website):
{scrape_data['content'][:12000]}

BUSINESS DETAILS (extracted):
- Name: {biz}
- Phone: {phone or 'See content above'}
- Location: {location}
- Services: {services}

YOUR TASK: Create a DRAMATICALLY BETTER version of their website. This is being
used as a live demo to show the business owner what their site COULD look like.

REQUIREMENTS:
1. Use their ACTUAL business name, phone, services, and location
2. Make it mobile-first, fast-loading, and conversion-optimised
3. Use Tailwind CSS CDN + Google Fonts Inter
4. Include proper SEO meta tags and LocalBusiness schema
5. Add a sticky header with their phone number
6. Add clear CTAs ("Call Now", "Get a Free Quote")
7. Include a "Services" grid using their actual services
8. Add placeholder testimonials in their industry style
9. Include a contact section with their phone and a lead capture form
10. Use a professional colour scheme appropriate for their industry

CRITICAL: This must look like THEIR website, upgraded. Use their business name
prominently. This is a sales demo — it needs to be impressive enough that the
business owner says "I want THIS as my website."

DESIGN:
- Professional, modern, clean
- Mobile-first responsive
- Australian English spelling
- Click-to-call phone links
- Subtle CSS animations (fade-in hero, hover effects)
- Trust signals (years in business, licensed, insured — if mentioned in content)

VOICE WIDGET: Include a floating button (bottom-right) with this HTML:
<div id="voice-demo" style="position:fixed;bottom:24px;right:24px;z-index:9999">
  <a href="https://aiva.agileadapt.com/s/demo" target="_blank"
    style="display:flex;align-items:center;gap:8px;background:linear-gradient(135deg,#4f46e5,#7c3aed);
    color:white;text-decoration:none;padding:12px 20px;border-radius:50px;font-weight:600;
    box-shadow:0 4px 20px rgba(79,70,229,0.4);transition:all 0.3s;"
    onmouseover="this.style.transform='scale(1.05)'"
    onmouseout="this.style.transform='scale(1)'">
    🎙️ Talk to Your AI Receptionist
  </a>
</div>

FOOTER: Small text "Website demo powered by Sunaiva AI | agileadapt.com"

OUTPUT: Return ONLY the complete HTML starting with <!DOCTYPE html>. No markdown, no code blocks."""

    raw = _call_gemini(prompt, model=model)
    if raw:
        return _clean_html(raw)
    return None


# ---------------------------------------------------------------------------
# Step 4: Generate spore URL
# ---------------------------------------------------------------------------

def generate_spore_url(prospect_url: str, tier: int = 1, referral_code: str = "") -> str:
    """Generate a spore URL for the prospect."""
    payload = {"u": prospect_url, "t": tier}
    if referral_code:
        payload["r"] = referral_code

    encoded = base64.urlsafe_b64encode(
        json.dumps(payload, separators=(",", ":")).encode()
    ).decode().rstrip("=")

    return f"https://aiva.agileadapt.com/s/{encoded}"


# ---------------------------------------------------------------------------
# Step 5: Generate audit report
# ---------------------------------------------------------------------------

def generate_audit_report(
    scrape_data: dict,
    scores: dict,
    demo_url: str = "",
    spore_url: str = "",
) -> str:
    """Generate a markdown audit report for the prospect."""
    biz = scrape_data["business_name"]
    now = datetime.now(timezone.utc).strftime("%Y-%m-%d")

    # Score bar helper
    def bar(score):
        filled = score // 10
        empty = 10 - filled
        colour = "🟢" if score >= 70 else "🟡" if score >= 40 else "🔴"
        return f"{colour} {'█' * filled}{'░' * empty} {score}/100"

    report = f"""# Website Audit Report: {biz}
## Date: {now}
## URL: {scrape_data['url']}

---

### Overall Score: {scores['overall']}/100

{bar(scores['overall'])}

---

### Detailed Scores

| Metric | Score | Status |
|--------|-------|--------|
| Mobile Friendly | {bar(scores['mobile_friendly'])} |
| Phone Visibility | {bar(scores['has_phone_prominent'])} |
| Call-to-Action | {bar(scores['has_cta'])} |
| SSL Certificate | {bar(scores['has_ssl'])} |
| AI Voice Agent | {bar(scores['has_voice_agent'])} |
| Chat Widget | {bar(scores['has_chat_widget'])} |
| Customer Reviews | {bar(scores['has_reviews'])} |
| Schema Markup | {bar(scores['has_schema'])} |
| SEO Meta Tags | {bar(scores['has_meta_description'])} |
| Content Depth | {bar(scores['content_length'])} |

---

### Key Findings

"""
    # Add findings based on scores
    if scores["has_voice_agent"] == 0:
        report += "**No AI Voice Agent** — You're missing calls after hours and when you're on the job. An AI voice receptionist answers every call 24/7, capturing leads you're currently losing.\n\n"
    if scores["has_phone_prominent"] < 50:
        report += "**Phone Number Not Prominent** — Visitors can't easily find how to call you. Your phone number should be in the header, hero, and footer — always one tap away.\n\n"
    if scores["has_cta"] < 40:
        report += "**Weak Calls-to-Action** — Your site doesn't clearly tell visitors what to do next. Strong CTAs like 'Get a Free Quote' and 'Call Now' dramatically increase conversions.\n\n"
    if scores["mobile_friendly"] < 70:
        report += "**Not Mobile-Optimised** — Over 60% of your visitors are on phones. A non-responsive site is losing you jobs every day.\n\n"
    if scores["has_reviews"] == 0:
        report += "**No Visible Reviews** — Customer testimonials and Google reviews build trust instantly. Businesses with visible reviews convert 270% more visitors.\n\n"
    if scores["has_schema"] == 0:
        report += "**No Schema Markup** — Google can't properly understand your business. LocalBusiness schema markup helps you appear in local search results and Google Maps.\n\n"

    report += f"""---

### What Your Website Could Look Like

"""
    if demo_url:
        report += f"**See your upgraded website live:** [{demo_url}]({demo_url})\n\n"
    if spore_url:
        report += f"**Talk to your AI receptionist:** [{spore_url}]({spore_url})\n\n"

    report += f"""---

### Recommended Action

Your current website scores **{scores['overall']}/100**. With our AI-powered upgrade, your site would score **90+/100** and include:

- ✅ 24/7 AI voice receptionist (answers every call)
- ✅ Mobile-first, lightning-fast design
- ✅ SEO-optimised with schema markup
- ✅ Clear calls-to-action that convert visitors to leads
- ✅ Professional design that builds instant trust
- ✅ Australian Privacy Act 2026 compliant

**Pricing starts at $397/month** — less than $13/day, and a fraction of what a human receptionist costs.

---

### About AgileAdapt

AgileAdapt provides AI-powered voice agents and websites for Australian businesses. Our patent-protected technology ensures every AI decision is auditable and compliant with the upcoming Privacy Act 2026 ADM reforms.

**Contact:** hello@agileadapt.com | [agileadapt.com](https://agileadapt.com)

*This audit was generated automatically by Genesis AI.*
"""
    return report


# ---------------------------------------------------------------------------
# Full pipeline
# ---------------------------------------------------------------------------

def run_audit(
    url: str,
    model: str = "gemini-2.5-flash",
    deploy: bool = False,
    referral_code: str = "",
) -> dict:
    """
    Run the full prospect audit pipeline.

    Returns dict with: scrape_data, scores, html, demo_url, spore_url, report
    """
    start = time.time()
    result = {
        "url": url,
        "scrape_data": None,
        "scores": None,
        "html": None,
        "html_chars": 0,
        "demo_url": "",
        "spore_url": "",
        "report": "",
        "report_path": "",
        "elapsed_secs": 0,
        "status": "pending",
    }

    # Step 1: Scrape
    scrape_data = scrape_prospect(url)
    result["scrape_data"] = scrape_data

    if not scrape_data["content"]:
        result["status"] = "scrape_failed"
        result["elapsed_secs"] = round(time.time() - start, 2)
        return result

    # Step 2: Score
    scores = score_site(url, scrape_data["content"])
    result["scores"] = scores
    logger.info(f"Site score: {scores['overall']}/100")

    # Step 3: Generate superior version
    html = generate_superior_site(scrape_data, model=model)
    if html:
        result["html"] = html
        result["html_chars"] = len(html)

        # Save locally
        slug = re.sub(r"[^a-z0-9]+", "-", scrape_data["business_name"].lower()).strip("-") or "prospect"
        audit_dir = OUTPUT_DIR / slug
        audit_dir.mkdir(parents=True, exist_ok=True)
        (audit_dir / "index.html").write_text(html, encoding="utf-8")
        logger.info(f"Saved superior site: {audit_dir / 'index.html'} ({len(html)} chars)")

        # Step 4: Deploy if requested
        if deploy:
            try:
                from deployer import deploy_to_netlify
                demo_url = deploy_to_netlify(html, site_name=f"demo-{slug}")
                if demo_url:
                    result["demo_url"] = demo_url
                    logger.info(f"Demo deployed: {demo_url}")
            except Exception as e:
                logger.warning(f"Deploy failed: {e}")

    # Step 5: Generate spore URL
    spore_url = generate_spore_url(url, tier=1, referral_code=referral_code)
    result["spore_url"] = spore_url

    # Step 6: Generate audit report
    report = generate_audit_report(
        scrape_data=scrape_data,
        scores=scores,
        demo_url=result["demo_url"],
        spore_url=spore_url,
    )
    result["report"] = report

    # Save report
    slug = re.sub(r"[^a-z0-9]+", "-", scrape_data["business_name"].lower()).strip("-") or "prospect"
    report_path = OUTPUT_DIR / slug / "audit_report.md"
    report_path.parent.mkdir(parents=True, exist_ok=True)
    report_path.write_text(report, encoding="utf-8")
    result["report_path"] = str(report_path)
    logger.info(f"Audit report saved: {report_path}")

    result["status"] = "complete"
    result["elapsed_secs"] = round(time.time() - start, 2)

    return result


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s", datefmt="%H:%M:%S")

    parser = argparse.ArgumentParser(description="Genesis Prospect Audit Pipeline")
    parser.add_argument("--url", required=True, help="Prospect website URL")
    parser.add_argument("--model", default="gemini-2.5-flash", help="Gemini model")
    parser.add_argument("--deploy", action="store_true", help="Deploy demo to Netlify")
    parser.add_argument("--referral", default="", help="Agency referral code")
    args = parser.parse_args()

    result = run_audit(
        url=args.url,
        model=args.model,
        deploy=args.deploy,
        referral_code=args.referral,
    )

    print(f"\n=== AUDIT COMPLETE: {result['scrape_data']['business_name']} ===")
    print(f"URL: {result['url']}")
    print(f"Score: {result['scores']['overall']}/100")
    print(f"Superior site: {result['html_chars']:,} chars")
    print(f"Spore URL: {result['spore_url']}")
    if result["demo_url"]:
        print(f"Live demo: {result['demo_url']}")
    print(f"Report: {result['report_path']}")
    print(f"Time: {result['elapsed_secs']}s")