#!/usr/bin/env python3
"""
LEAD PRIORITY QUEUE BUILDER — Genesis ReceptionistAI Pipeline
Pulls top-N highest priority leads from PostgreSQL tradie_leads.
Formats them as an outreach queue for cold calling / email campaigns.
Outputs: E:\genesis-system\data\outreach_queue.json

Priority order:
  1. Highest priority_score (no website + many reviews)
  2. Not yet contacted (outreach_status = 'not_contacted')
  3. Has a phone number (for voice outreach)

Usage:
    python lead_priority_queue.py [--top N] [--trade plumbers] [--state QLD]
"""

import sys
import json
import argparse
from datetime import datetime
from pathlib import Path

sys.path.append('E:/genesis-system/data/genesis-memory')

OUTPUT_FILE = Path('E:/genesis-system/data/outreach_queue.json')
OUTPUT_CSV  = Path('E:/genesis-system/data/LEADS/outreach_queue_top50.csv')

DEMO_URL_TEMPLATE = "https://demo.receptionistai.com.au/?business={name}&trade={trade}&suburb={suburb}"

def get_db_conn():
    from elestio_config import PostgresConfig
    import psycopg2
    return psycopg2.connect(**PostgresConfig.get_connection_params())

def build_demo_url(lead: dict) -> str:
    """Build personalised demo URL for each lead."""
    import urllib.parse
    name  = urllib.parse.quote(lead.get('business_name', '')[:40])
    trade = urllib.parse.quote(lead.get('trade_category', ''))
    suburb = urllib.parse.quote(lead.get('suburb', ''))
    return DEMO_URL_TEMPLATE.format(name=name, trade=trade, suburb=suburb)

def generate_outreach_snippet(lead: dict) -> str:
    """Generate a personalised first-line outreach message."""
    name = lead.get('business_name', 'there')
    trade = lead.get('trade_category', 'business')
    suburb = lead.get('suburb', '')
    reviews = lead.get('review_count', '')
    has_web = lead.get('has_website')

    if not has_web:
        return (f"Hi {name} — noticed you don't have a website yet but you've got great reviews. "
                f"We help {trade}s in {suburb} get more calls with AI voice assistants. "
                f"Takes 5 mins to set up. Worth a chat?")
    else:
        return (f"Hi {name} — you've built a strong reputation in {suburb}. "
                f"We help {trade}s capture every missed call with an AI receptionist. "
                f"Would you like a free demo?")

def build_priority_queue(top_n: int = 50, trade_filter: str = None,
                         state_filter: str = None) -> list:
    conn = get_db_conn()
    cur = conn.cursor()

    # Build query
    where_clauses = ["phone IS NOT NULL", "phone != ''",
                     "outreach_status = 'not_contacted'"]
    params = []

    if trade_filter:
        where_clauses.append("trade_category = %s")
        params.append(trade_filter)
    if state_filter:
        where_clauses.append("UPPER(state) = %s")
        params.append(state_filter.upper())

    where_sql = " AND ".join(where_clauses)

    # Try enriched leads first, fall back to all leads if none enriched
    query = f"""
        SELECT id, business_name, trade_category, phone, email, website,
               suburb, state, postcode, rating, review_count,
               has_website, website_score, priority_score,
               estimated_revenue_band, outreach_status, source_url
        FROM tradie_leads
        WHERE {where_sql}
        ORDER BY
            COALESCE(priority_score, 0) DESC,
            CASE WHEN review_count ~ '^[0-9]+$'
                 THEN CAST(review_count AS INTEGER) ELSE 0 END DESC,
            CASE WHEN has_website = FALSE THEN 1 ELSE 0 END DESC
        LIMIT %s
    """
    params.append(top_n)

    cur.execute(query, params)
    rows = cur.fetchall()
    cols = ['id', 'business_name', 'trade_category', 'phone', 'email', 'website',
            'suburb', 'state', 'postcode', 'rating', 'review_count',
            'has_website', 'website_score', 'priority_score',
            'estimated_revenue_band', 'outreach_status', 'source_url']

    # If no enriched leads, get all leads ordered by review count
    if not rows:
        query_fallback = f"""
            SELECT id, business_name, trade_category, phone, email, website,
                   suburb, state, postcode, rating, review_count,
                   has_website, website_score, priority_score,
                   estimated_revenue_band, outreach_status, source_url
            FROM tradie_leads
            WHERE {where_sql}
            ORDER BY
                CASE WHEN review_count ~ '^[0-9]+$'
                     THEN CAST(review_count AS INTEGER) ELSE 0 END DESC
            LIMIT %s
        """
        cur.execute(query_fallback, params)
        rows = cur.fetchall()

    conn.close()

    queue = []
    for i, row in enumerate(rows, 1):
        lead = dict(zip(cols, row))
        demo_url = build_demo_url(lead)
        snippet = generate_outreach_snippet(lead)

        queue_item = {
            "rank":             i,
            "id":               lead['id'],
            "business_name":    lead['business_name'],
            "trade_category":   lead['trade_category'],
            "phone":            lead['phone'],
            "email":            lead['email'] or '',
            "website":          lead['website'] or '',
            "suburb":           lead['suburb'] or '',
            "state":            lead['state'] or '',
            "rating":           lead['rating'] or '',
            "review_count":     lead['review_count'] or '',
            "has_website":      lead['has_website'],
            "website_score":    lead['website_score'],
            "priority_score":   lead['priority_score'],
            "revenue_band":     lead['estimated_revenue_band'] or 'UNKNOWN',
            "outreach_status":  lead['outreach_status'] or 'not_contacted',
            "demo_url":         demo_url,
            "outreach_snippet": snippet,
            "source_url":       lead['source_url'] or '',
        }
        queue.append(queue_item)

    return queue

def save_queue(queue: list):
    """Save queue to JSON and CSV."""
    output = {
        "generated_at": datetime.now().isoformat(),
        "total_leads":  len(queue),
        "leads":        queue,
    }

    OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(output, f, indent=2, default=str)
    print(f"Saved JSON: {OUTPUT_FILE}")

    # CSV export
    import csv
    if queue:
        keys = list(queue[0].keys())
        with open(OUTPUT_CSV, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=keys)
            writer.writeheader()
            writer.writerows(queue)
        print(f"Saved CSV:  {OUTPUT_CSV}")

def print_queue_summary(queue: list):
    """Print human-readable summary."""
    print(f"\n{'='*60}")
    print(f"OUTREACH QUEUE — TOP {len(queue)} LEADS")
    print(f"{'='*60}")
    for item in queue[:10]:
        print(f"\n#{item['rank']} {item['business_name']}")
        print(f"   Trade:    {item['trade_category']} | {item['suburb']}, {item['state']}")
        print(f"   Phone:    {item['phone']}")
        print(f"   Reviews:  {item['review_count']} | Rating: {item['rating']}")
        print(f"   Website:  {'YES (score:'+str(item['website_score'])+')' if item['has_website'] else 'NO WEBSITE'}")
        print(f"   Priority: {item['priority_score']} | Revenue: {item['revenue_band']}")
        print(f"   Snippet:  {item['outreach_snippet'][:80]}...")
    if len(queue) > 10:
        print(f"\n  ... and {len(queue) - 10} more leads in the queue file.")
    print(f"\n{'='*60}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Build priority outreach queue')
    parser.add_argument('--top', type=int, default=50, help='Number of top leads')
    parser.add_argument('--trade', type=str, default=None, help='Filter by trade')
    parser.add_argument('--state', type=str, default=None, help='Filter by state (QLD, NSW, etc)')
    args = parser.parse_args()

    print(f"Building priority queue (top {args.top})...")
    queue = build_priority_queue(top_n=args.top, trade_filter=args.trade, state_filter=args.state)

    if not queue:
        print("No leads found in database. Run the scraper first, then re-run this.")
    else:
        save_queue(queue)
        print_queue_summary(queue)
        print(f"\nFull queue saved to: {OUTPUT_FILE}")
