"""
Website Factory Pipeline
========================
Full end-to-end orchestrator for the zero-build-until-engagement pipeline.

Flow:
  1. Pull targets from PostgreSQL tradie_leads table
  2. Audit their current web presence (tradie_website_auditor)
  3. Generate demo sites for top N leads per run (demo_site_generator)
  4. Upload demo to static hosting (Netlify or local serve path)
  5. Send outreach email via GHL / SMTP
  6. Track all actions back to the tradie_leads table in PostgreSQL

Database schema required (run ONCE):
    See: scripts/db_migrations/website_factory_schema.sql

Usage:
    # Full daily pipeline run
    python website_factory_pipeline.py run --limit 10

    # Only generate demos (no outreach)
    python website_factory_pipeline.py generate --limit 10

    # Only send outreach for demos already generated
    python website_factory_pipeline.py outreach --limit 20

    # Import a new CSV batch of leads
    python website_factory_pipeline.py import --file leads.csv

    # Check pipeline status
    python website_factory_pipeline.py status
"""

import argparse
import csv
import json
import os
import sys
import time
import smtplib
import re
from datetime import datetime, timezone
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from pathlib import Path
from typing import Optional

# ---- Genesis imports ----
sys.path.insert(0, str(Path(__file__).parent.parent / "data" / "genesis-memory"))
try:
    from elestio_config import PostgresConfig
    import psycopg2
    import psycopg2.extras
    POSTGRES_AVAILABLE = True
except ImportError:
    POSTGRES_AVAILABLE = False
    print("[WARN] PostgreSQL not available — running in CSV-only mode.", file=sys.stderr)

# ---- Local pipeline imports ----
SCRIPTS_DIR = Path(__file__).parent
sys.path.insert(0, str(SCRIPTS_DIR))
from tradie_website_auditor import audit_business
from demo_site_generator import generate_demo_site, slugify

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
BASE_DIR = Path(__file__).parent.parent
DEMOS_DIR = BASE_DIR / "Sunaiva" / "demos"
DEMOS_DIR.mkdir(parents=True, exist_ok=True)

DEMO_BASE_URL = os.environ.get(
    "DEMO_BASE_URL",
    "https://demos.receptionistai.com.au"
)

# Email config — reads from secrets.env or environment
SMTP_HOST     = os.environ.get("SMTP_HOST", "smtp.gmail.com")
SMTP_PORT     = int(os.environ.get("SMTP_PORT", "587"))
SMTP_USER     = os.environ.get("SMTP_USER", "outreach@receptionistai.com.au")
SMTP_PASSWORD = os.environ.get("SMTP_PASSWORD", "")
FROM_NAME     = "ReceptionistAI"
FROM_EMAIL    = SMTP_USER
REPLY_TO      = "outreach@receptionistai.com.au"

# Load secrets from env file if SMTP_PASSWORD not set
if not SMTP_PASSWORD:
    secrets_path = BASE_DIR / "config" / "secrets.env"
    if secrets_path.exists():
        for line in secrets_path.read_text().splitlines():
            if "=" in line and not line.startswith("#"):
                k, v = line.split("=", 1)
                k = k.strip()
                if k == "SMTP_PASSWORD":
                    SMTP_PASSWORD = v.strip().strip('"\'')

PIPELINE_DAILY_LIMIT = 10   # max demos generated per run
OUTREACH_DAILY_LIMIT = 20   # max outreach emails per run

# ---------------------------------------------------------------------------
# PostgreSQL helpers
# ---------------------------------------------------------------------------
def get_pg_conn():
    if not POSTGRES_AVAILABLE:
        raise RuntimeError("PostgreSQL unavailable. Install psycopg2 and configure elestio_config.")
    params = PostgresConfig.get_connection_params()
    return psycopg2.connect(**params)


def ensure_schema():
    """Create the tradie_leads table if it doesn't exist."""
    ddl = """
    CREATE TABLE IF NOT EXISTS tradie_leads (
        id                  SERIAL PRIMARY KEY,
        business_name       TEXT NOT NULL,
        first_name          TEXT,
        phone               TEXT,
        email               TEXT,
        suburb              TEXT,
        trade_type          TEXT,
        has_website         BOOLEAN,
        website_url         TEXT,
        website_score       INTEGER DEFAULT 0,
        priority_tier       TEXT,
        priority_score      INTEGER DEFAULT 0,
        is_mobile_friendly  BOOLEAN,
        has_contact_form    BOOLEAN,
        demo_generated      BOOLEAN DEFAULT FALSE,
        demo_slug           TEXT,
        demo_url            TEXT,
        demo_generated_at   TIMESTAMPTZ,
        outreach_sent       BOOLEAN DEFAULT FALSE,
        outreach_sent_at    TIMESTAMPTZ,
        outreach_template   TEXT,
        ghl_contact_id      TEXT,
        status              TEXT DEFAULT 'new',
        notes               TEXT,
        created_at          TIMESTAMPTZ DEFAULT NOW(),
        updated_at          TIMESTAMPTZ DEFAULT NOW()
    );
    CREATE INDEX IF NOT EXISTS idx_tradie_leads_priority ON tradie_leads(priority_score DESC);
    CREATE INDEX IF NOT EXISTS idx_tradie_leads_status   ON tradie_leads(status);
    """
    conn = get_pg_conn()
    try:
        with conn.cursor() as cur:
            cur.execute(ddl)
        conn.commit()
        print("[SCHEMA] tradie_leads table ready.")
    finally:
        conn.close()


def upsert_lead(lead: dict) -> int:
    """Insert or update a lead row. Returns the row id."""
    conn = get_pg_conn()
    try:
        with conn.cursor() as cur:
            cur.execute("""
                INSERT INTO tradie_leads
                    (business_name, first_name, phone, email, suburb, trade_type)
                VALUES (%s, %s, %s, %s, %s, %s)
                ON CONFLICT DO NOTHING
                RETURNING id
            """, (
                lead.get("business_name"),
                lead.get("first_name"),
                lead.get("phone"),
                lead.get("email"),
                lead.get("suburb"),
                lead.get("trade_type", "tradie"),
            ))
            row = cur.fetchone()
            if row:
                conn.commit()
                return row[0]
            # If no row returned (conflict), fetch existing id
            cur.execute(
                "SELECT id FROM tradie_leads WHERE business_name=%s AND suburb=%s LIMIT 1",
                (lead.get("business_name"), lead.get("suburb"))
            )
            existing = cur.fetchone()
            conn.commit()
            return existing[0] if existing else -1
    finally:
        conn.close()


def update_lead(lead_id: int, updates: dict):
    """Update specific fields for a lead row."""
    if not updates:
        return
    updates["updated_at"] = datetime.now(timezone.utc)
    cols = ", ".join(f"{k} = %s" for k in updates)
    vals = list(updates.values()) + [lead_id]
    conn = get_pg_conn()
    try:
        with conn.cursor() as cur:
            cur.execute(f"UPDATE tradie_leads SET {cols} WHERE id = %s", vals)
        conn.commit()
    finally:
        conn.close()


def fetch_pending_audit(limit: int = 50) -> list:
    """Leads that haven't been audited yet."""
    conn = get_pg_conn()
    try:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute("""
                SELECT * FROM tradie_leads
                WHERE priority_tier IS NULL
                ORDER BY created_at ASC
                LIMIT %s
            """, (limit,))
            return [dict(r) for r in cur.fetchall()]
    finally:
        conn.close()


def fetch_pending_demo(limit: int = PIPELINE_DAILY_LIMIT) -> list:
    """High/PRIME leads that haven't had a demo site generated."""
    conn = get_pg_conn()
    try:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute("""
                SELECT * FROM tradie_leads
                WHERE demo_generated = FALSE
                  AND priority_tier IN ('PRIME', 'HIGH')
                ORDER BY priority_score DESC, created_at ASC
                LIMIT %s
            """, (limit,))
            return [dict(r) for r in cur.fetchall()]
    finally:
        conn.close()


def fetch_pending_outreach(limit: int = OUTREACH_DAILY_LIMIT) -> list:
    """Leads with demo site ready but outreach not yet sent."""
    conn = get_pg_conn()
    try:
        with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
            cur.execute("""
                SELECT * FROM tradie_leads
                WHERE demo_generated = TRUE
                  AND outreach_sent = FALSE
                  AND (phone IS NOT NULL OR email IS NOT NULL)
                ORDER BY priority_score DESC
                LIMIT %s
            """, (limit,))
            return [dict(r) for r in cur.fetchall()]
    finally:
        conn.close()


# ---------------------------------------------------------------------------
# Email sending
# ---------------------------------------------------------------------------
EMAIL_TEMPLATE_NO_WEBSITE = """Hi {first_name},

My name is Alex from ReceptionistAI — we help {suburb} trade businesses get more calls and bookings online.

I noticed {business_name} doesn't have a website, so we built one for you. It took our system about 60 seconds.

You can see it here: {demo_url}

It includes your business details, a full {trade_display} services page, an online quote form, and an AI receptionist that handles calls when you're busy.

If you'd like to use it, it's free for 14 days — no credit card, no lock-in.

Worth a look?

Alex
ReceptionistAI
https://receptionistai.com.au

---
To stop receiving emails from us, reply with "unsubscribe."
"""

EMAIL_TEMPLATE_BAD_WEBSITE = """Hi {first_name},

I came across {business_name} online. You have a website — but I noticed it's not displaying well on mobile and is missing a few features that would help convert visitors into enquiries.

We built you an upgraded version. It's free to look at:

{demo_url}

The new site is mobile-first, has a live quote form, and includes an AI receptionist that handles calls after hours.

If you'd like to switch, we can have it live today — free for 14 days.

Alex
ReceptionistAI
https://receptionistai.com.au

---
To unsubscribe, reply "unsubscribe."
"""


def select_email_template(lead: dict) -> tuple[str, str]:
    """Returns (subject, body) for the appropriate template."""
    first_name = lead.get("first_name") or lead.get("business_name", "there").split()[0]
    business_name = lead.get("business_name", "your business")
    suburb = lead.get("suburb", "your area")
    trade_type = lead.get("trade_type", "tradie")
    trade_display = trade_type.title()
    demo_url = lead.get("demo_url", "#")

    if not lead.get("has_website"):
        subject = f"We built {business_name} a website — want to see it?"
        body = EMAIL_TEMPLATE_NO_WEBSITE.format(
            first_name=first_name,
            business_name=business_name,
            suburb=suburb,
            trade_display=trade_display,
            demo_url=demo_url,
        )
        template_name = "no_website_v1"
    else:
        subject = f"{business_name}'s website — we've built you something better"
        body = EMAIL_TEMPLATE_BAD_WEBSITE.format(
            first_name=first_name,
            business_name=business_name,
            suburb=suburb,
            trade_display=trade_display,
            demo_url=demo_url,
        )
        template_name = "bad_website_v1"

    return subject, body, template_name


def send_email(to_email: str, subject: str, body: str) -> bool:
    """Send a plain-text email. Returns True on success."""
    if not SMTP_PASSWORD:
        print(f"  [EMAIL] SMTP not configured — would send to {to_email}: {subject}")
        return True  # Dry-run mode

    try:
        msg = MIMEMultipart("alternative")
        msg["Subject"] = subject
        msg["From"]    = f"{FROM_NAME} <{FROM_EMAIL}>"
        msg["To"]      = to_email
        msg["Reply-To"] = REPLY_TO
        msg.attach(MIMEText(body, "plain"))

        with smtplib.SMTP(SMTP_HOST, SMTP_PORT, timeout=15) as server:
            server.ehlo()
            server.starttls()
            server.login(SMTP_USER, SMTP_PASSWORD)
            server.sendmail(FROM_EMAIL, [to_email], msg.as_string())
        return True
    except Exception as exc:
        print(f"  [EMAIL ERROR] {to_email}: {exc}", file=sys.stderr)
        return False


# ---------------------------------------------------------------------------
# Pipeline stages
# ---------------------------------------------------------------------------
def stage_audit(leads: list) -> list:
    """Audit web presence for a list of lead dicts. Updates DB in-place."""
    print(f"\n[STAGE: AUDIT] Auditing {len(leads)} leads...")
    results = []
    for lead in leads:
        audit = audit_business(
            business_name=lead["business_name"],
            phone=lead.get("phone", ""),
            suburb=lead.get("suburb", ""),
            trade_type=lead.get("trade_type", "tradie"),
        )
        merged = {**lead, **audit}
        results.append(merged)
        if POSTGRES_AVAILABLE:
            update_lead(lead["id"], {
                "has_website":      audit["has_website"],
                "website_url":      audit.get("website_url"),
                "website_score":    audit.get("website_score", 0),
                "priority_tier":    audit["priority_tier"],
                "priority_score":   audit["priority_score"],
                "is_mobile_friendly": audit.get("is_mobile_friendly", False),
                "has_contact_form": audit.get("has_contact_form", False),
            })
        time.sleep(0.5)
    print(f"[STAGE: AUDIT] Complete. {len(results)} leads audited.")
    return results


def stage_generate(leads: list) -> list:
    """Generate demo sites for the given leads."""
    print(f"\n[STAGE: GENERATE] Generating demo sites for {len(leads)} leads...")
    generated = []
    for lead in leads:
        business_name = lead["business_name"]
        trade_type    = lead.get("trade_type", "tradie")
        suburb        = lead.get("suburb", "")
        phone         = lead.get("phone", "0400 000 000")
        email_addr    = lead.get("email")

        try:
            demo_path = generate_demo_site(
                business_name=business_name,
                trade_type=trade_type,
                suburb=suburb,
                phone=phone,
                email=email_addr,
            )
            slug = slugify(f"{business_name}-{suburb}")
            demo_url = f"{DEMO_BASE_URL}/{slug}"
            lead["demo_slug"] = slug
            lead["demo_url"]  = demo_url
            generated.append(lead)

            if POSTGRES_AVAILABLE and lead.get("id"):
                update_lead(lead["id"], {
                    "demo_generated":    True,
                    "demo_slug":         slug,
                    "demo_url":          demo_url,
                    "demo_generated_at": datetime.now(timezone.utc),
                })
            print(f"  [GENERATED] {business_name} → {demo_url}")
        except Exception as exc:
            print(f"  [ERROR] Failed to generate demo for {business_name}: {exc}", file=sys.stderr)

    print(f"[STAGE: GENERATE] {len(generated)}/{len(leads)} demo sites created.")
    return generated


def stage_outreach(leads: list) -> list:
    """Send outreach emails for leads with demo sites ready."""
    print(f"\n[STAGE: OUTREACH] Sending outreach for {len(leads)} leads...")
    sent = []
    for lead in leads:
        email_addr = lead.get("email")
        if not email_addr:
            print(f"  [SKIP] {lead['business_name']} — no email address")
            continue

        subject, body, template_name = select_email_template(lead)
        success = send_email(email_addr, subject, body)
        if success:
            sent.append(lead)
            if POSTGRES_AVAILABLE and lead.get("id"):
                update_lead(lead["id"], {
                    "outreach_sent":    True,
                    "outreach_sent_at": datetime.now(timezone.utc),
                    "outreach_template": template_name,
                    "status":           "outreach_sent",
                })
            print(f"  [SENT] {lead['business_name']} → {email_addr}")
        time.sleep(2)  # polite send rate

    print(f"[STAGE: OUTREACH] {len(sent)}/{len(leads)} emails sent.")
    return sent


# ---------------------------------------------------------------------------
# Import CSV
# ---------------------------------------------------------------------------
def cmd_import(file_path: str):
    """Import leads from CSV into the database."""
    path = Path(file_path)
    if not path.exists():
        print(f"[ERROR] File not found: {file_path}", file=sys.stderr)
        sys.exit(1)

    imported = 0
    skipped  = 0
    with open(path, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            lead = {
                "business_name": row.get("business_name", "").strip(),
                "first_name":    row.get("first_name", "").strip() or None,
                "phone":         row.get("phone", "").strip() or None,
                "email":         row.get("email", "").strip() or None,
                "suburb":        row.get("suburb", "").strip(),
                "trade_type":    row.get("trade_type", "tradie").strip().lower(),
            }
            if not lead["business_name"]:
                skipped += 1
                continue

            if POSTGRES_AVAILABLE:
                upsert_lead(lead)
            imported += 1

    print(f"[IMPORT] {imported} leads imported, {skipped} skipped.")


# ---------------------------------------------------------------------------
# Status report
# ---------------------------------------------------------------------------
def cmd_status():
    if not POSTGRES_AVAILABLE:
        print("[STATUS] PostgreSQL not available. Cannot show status.")
        return

    conn = get_pg_conn()
    try:
        with conn.cursor() as cur:
            cur.execute("SELECT COUNT(*) FROM tradie_leads")
            total = cur.fetchone()[0]

            cur.execute("SELECT COUNT(*) FROM tradie_leads WHERE priority_tier IS NULL")
            unaudited = cur.fetchone()[0]

            cur.execute("SELECT COUNT(*) FROM tradie_leads WHERE priority_tier IN ('PRIME','HIGH')")
            high_value = cur.fetchone()[0]

            cur.execute("SELECT COUNT(*) FROM tradie_leads WHERE demo_generated = TRUE")
            demos_done = cur.fetchone()[0]

            cur.execute("SELECT COUNT(*) FROM tradie_leads WHERE outreach_sent = TRUE")
            outreach_done = cur.fetchone()[0]

        print("\n=== Website Factory Pipeline Status ===")
        print(f"  Total leads in DB:         {total}")
        print(f"  Unaudited:                 {unaudited}")
        print(f"  High-value (PRIME/HIGH):   {high_value}")
        print(f"  Demo sites generated:      {demos_done}")
        print(f"  Outreach sent:             {outreach_done}")
        print("=======================================\n")
    finally:
        conn.close()


# ---------------------------------------------------------------------------
# Main pipeline run
# ---------------------------------------------------------------------------
def cmd_run(limit: int = PIPELINE_DAILY_LIMIT):
    """Full daily pipeline: audit → generate → outreach."""
    print(f"\n{'='*60}")
    print(f"  WEBSITE FACTORY PIPELINE — {datetime.now().strftime('%Y-%m-%d %H:%M')}")
    print(f"  Daily limit: {limit} demos")
    print(f"{'='*60}\n")

    if POSTGRES_AVAILABLE:
        ensure_schema()

        # Stage 1: Audit any unaudited leads
        pending_audit = fetch_pending_audit(limit=50)
        if pending_audit:
            stage_audit(pending_audit)

        # Stage 2: Generate demos for top leads
        pending_demo = fetch_pending_demo(limit=limit)
        if pending_demo:
            generated = stage_generate(pending_demo)
        else:
            print("[PIPELINE] No leads pending demo generation.")
            generated = []

        # Stage 3: Send outreach for ready demos
        pending_outreach = fetch_pending_outreach(limit=OUTREACH_DAILY_LIMIT)
        if pending_outreach:
            stage_outreach(pending_outreach)
        else:
            print("[PIPELINE] No leads pending outreach.")

        cmd_status()
    else:
        print("[PIPELINE] Running in offline demo mode (no PostgreSQL).")
        print("[PIPELINE] Use 'import' command to load leads when DB is available.")


def cmd_generate_only(limit: int = PIPELINE_DAILY_LIMIT):
    """Only generate demo sites, no outreach."""
    ensure_schema()
    pending = fetch_pending_demo(limit=limit)
    if not pending:
        print("[GENERATE] No pending leads.")
        return
    stage_generate(pending)


def cmd_outreach_only(limit: int = OUTREACH_DAILY_LIMIT):
    """Only send outreach for demos already generated."""
    ensure_schema()
    pending = fetch_pending_outreach(limit=limit)
    if not pending:
        print("[OUTREACH] No pending outreach.")
        return
    stage_outreach(pending)


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
    parser = argparse.ArgumentParser(
        description="Website Factory Pipeline — end-to-end tradie lead generation."
    )
    sub = parser.add_subparsers(dest="cmd", required=True)

    p_run = sub.add_parser("run", help="Full daily pipeline run")
    p_run.add_argument("--limit", type=int, default=PIPELINE_DAILY_LIMIT,
                       help="Max demo sites to generate per run")

    p_gen = sub.add_parser("generate", help="Generate demo sites only (no outreach)")
    p_gen.add_argument("--limit", type=int, default=PIPELINE_DAILY_LIMIT)

    p_out = sub.add_parser("outreach", help="Send outreach only (demos must exist)")
    p_out.add_argument("--limit", type=int, default=OUTREACH_DAILY_LIMIT)

    p_imp = sub.add_parser("import", help="Import leads from CSV")
    p_imp.add_argument("--file", required=True, help="Path to leads CSV")

    sub.add_parser("status", help="Show pipeline status from DB")

    args = parser.parse_args()

    if args.cmd == "run":
        cmd_run(limit=args.limit)
    elif args.cmd == "generate":
        cmd_generate_only(limit=args.limit)
    elif args.cmd == "outreach":
        cmd_outreach_only(limit=args.limit)
    elif args.cmd == "import":
        cmd_import(args.file)
    elif args.cmd == "status":
        cmd_status()


if __name__ == "__main__":
    main()
