"""
Import existing localsearch_tradies_DEDUPED.csv into tradie_leads PostgreSQL table.
Maps CSV columns to the current table schema.
"""
import sys
import csv
import hashlib

sys.path.insert(0, '/mnt/e/genesis-system/data/genesis-memory')
from elestio_config import PostgresConfig
import psycopg2

CSV_PATH = '/mnt/e/genesis-system/data/LEADS/localsearch_tradies_DEDUPED.csv'
LIMIT = None  # Set to None to import all

def compute_lead_hash(business_name, phone):
    key = '{}|{}'.format(business_name.lower().strip(), phone.strip())
    return hashlib.sha256(key.encode()).hexdigest()[:64]

def compute_priority_score(rating, review_count, has_email):
    score = 0
    try:
        r = float(rating) if rating else None
        rc = int(review_count) if review_count else 0
    except:
        r, rc = None, 0

    if r is not None:
        if r >= 4.5: score += 30
        elif r >= 4.0: score += 20
        elif r >= 3.5: score += 10
    if rc >= 50: score += 30
    elif rc >= 20: score += 20
    elif rc >= 10: score += 10
    elif rc >= 5: score += 5
    if has_email: score += 20
    if rc >= 100: score += 20
    return min(score, 100)

conn = psycopg2.connect(**PostgresConfig.get_connection_params())
cur = conn.cursor()

INSERT_SQL = """
INSERT INTO tradie_leads (
    lead_hash, business_name, trade, city, state, suburb,
    phone, email, website, rating, review_count,
    listing_url, scrape_source, outreach_status, priority_score
) VALUES (
    %s, %s, %s, %s, %s, %s,
    %s, %s, %s, %s, %s,
    %s, %s, %s, %s
)
ON CONFLICT (lead_hash) DO UPDATE SET
    updated_at = NOW(),
    outreach_status = EXCLUDED.outreach_status
"""

inserted = 0
skipped = 0
errors = 0

with open(CSV_PATH, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for i, row in enumerate(reader):
        if LIMIT and i >= LIMIT:
            break

        business_name = row.get('business_name', '').strip()
        phone = row.get('phone', '').strip()
        if not business_name or not phone:
            skipped += 1
            continue

        lead_hash = compute_lead_hash(business_name, phone)
        trade = row.get('trade_category', '').strip().rstrip('s')  # plumbers -> plumber
        website = row.get('website', '').strip()
        email = row.get('email', '').strip()
        has_email = bool(email)

        # Determine city from source_url or suburb
        source_url = row.get('source_url', '')
        city = 'brisbane'
        if 'sydney' in source_url.lower(): city = 'sydney'
        elif 'melbourne' in source_url.lower(): city = 'melbourne'
        elif 'gold-coast' in source_url.lower(): city = 'gold coast'
        elif 'cairns' in source_url.lower(): city = 'cairns'
        elif 'perth' in source_url.lower(): city = 'perth'
        elif 'adelaide' in source_url.lower(): city = 'adelaide'

        try:
            rating = float(row.get('rating', '') or 0) or None
        except:
            rating = None
        try:
            review_count = int(row.get('review_count', '') or 0)
        except:
            review_count = 0

        priority = compute_priority_score(rating, review_count, has_email)

        try:
            cur.execute(INSERT_SQL, (
                lead_hash,
                business_name,
                trade,
                city,
                row.get('state', 'QLD').strip(),
                row.get('suburb', '').strip(),
                phone,
                email or None,
                website or None,
                rating,
                review_count,
                row.get('source_url', '').strip() or None,
                'localsearch.com.au',
                'not_contacted',
                priority
            ))
            inserted += 1
        except Exception as e:
            errors += 1
            if errors <= 3:
                print('  Error on row {}: {}'.format(i, e))

conn.commit()
print('Import complete: {} inserted, {} skipped, {} errors'.format(inserted, skipped, errors))

# Show sample of what was inserted
cur.execute("""
    SELECT business_name, trade, city, suburb, phone, priority_score, outreach_status
    FROM tradie_leads ORDER BY priority_score DESC LIMIT 10
""")
rows = cur.fetchall()
print('\nTop 10 leads by priority:')
print('{:<40} {:<15} {:<12} {:<20} {:<15} {:>5} {}'.format(
    'Business', 'Trade', 'City', 'Suburb', 'Phone', 'Score', 'Status'))
print('-' * 115)
for r in rows:
    print('{:<40} {:<15} {:<12} {:<20} {:<15} {:>5} {}'.format(
        str(r[0])[:39], str(r[1])[:14], str(r[2])[:11], str(r[3])[:19],
        str(r[4])[:14], r[5], r[6]))

cur.execute("SELECT COUNT(*) FROM tradie_leads")
total = cur.fetchone()[0]
print('\nTotal rows in tradie_leads: {}'.format(total))

conn.close()
