"""
Genesis Gold Pipeline — Batch Prospect Auditor
===============================================
Process multiple prospects in sequence: scrape → score → generate → deploy → report.

Usage:
    # Audit from URL list file (one URL per line)
    python3 batch_audit.py --file prospects.txt --deploy

    # Audit a comma-separated list
    python3 batch_audit.py --urls "https://site1.com.au,https://site2.com.au"

    # Score only (no Gemini generation, fast)
    python3 batch_audit.py --file prospects.txt --score-only

    # With agency referral code
    python3 batch_audit.py --file prospects.txt --deploy --referral AGENCY-XK92F1
"""

import argparse
import csv
import json
import logging
import os
import sys
import time
from datetime import datetime, timezone
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))
sys.path.insert(0, "/mnt/e/genesis-system/scripts/clone_pipeline")

from prospect_audit import scrape_prospect, score_site, generate_spore_url, generate_audit_report, run_audit

logger = logging.getLogger("gold_pipeline.batch_audit")

OUTPUT_DIR = Path("/mnt/e/genesis-system/scripts/gold_pipeline/audits")
BATCH_LOG = OUTPUT_DIR / "batch_log.jsonl"


def load_prospects(file_path: str = None, urls: str = None) -> list:
    """Load prospect URLs from file or comma-separated string."""
    prospects = []

    if file_path:
        p = Path(file_path)
        if p.suffix == ".csv":
            with open(p) as f:
                reader = csv.DictReader(f)
                for row in reader:
                    url = row.get("url") or row.get("URL") or row.get("website") or row.get("Website")
                    if url:
                        prospects.append(url.strip())
        else:
            for line in p.read_text().splitlines():
                line = line.strip()
                if line and not line.startswith("#"):
                    prospects.append(line)

    if urls:
        prospects.extend(u.strip() for u in urls.split(",") if u.strip())

    # Ensure https://
    normalised = []
    for url in prospects:
        if not url.startswith("http"):
            url = "https://" + url
        normalised.append(url)

    return normalised


def run_batch(
    prospects: list,
    model: str = "gemini-2.5-flash",
    deploy: bool = False,
    score_only: bool = False,
    referral_code: str = "",
    delay: float = 3.0,
) -> list:
    """Run audit pipeline on multiple prospects."""
    results = []
    total = len(prospects)
    batch_start = time.time()

    logger.info(f"=== BATCH AUDIT: {total} prospects ===")

    for i, url in enumerate(prospects, 1):
        logger.info(f"[{i}/{total}] Auditing: {url}")

        if score_only:
            scrape_data = scrape_prospect(url)
            if scrape_data["content"]:
                scores = score_site(url, scrape_data["content"])
                spore = generate_spore_url(url, tier=1, referral_code=referral_code)
                report = generate_audit_report(scrape_data, scores, spore_url=spore)
                result = {
                    "url": url,
                    "business_name": scrape_data["business_name"],
                    "phone": scrape_data["phone"],
                    "location": scrape_data["location"],
                    "score": scores["overall"],
                    "spore_url": spore,
                    "status": "scored",
                }
                # Save report
                import re
                slug = re.sub(r"[^a-z0-9]+", "-", scrape_data["business_name"].lower()).strip("-") or "prospect"
                report_dir = OUTPUT_DIR / slug
                report_dir.mkdir(parents=True, exist_ok=True)
                (report_dir / "audit_report.md").write_text(report, encoding="utf-8")
                result["report_path"] = str(report_dir / "audit_report.md")
            else:
                result = {"url": url, "status": "scrape_failed", "score": 0}
        else:
            audit = run_audit(url, model=model, deploy=deploy, referral_code=referral_code)
            result = {
                "url": url,
                "business_name": audit["scrape_data"]["business_name"] if audit["scrape_data"] else "",
                "phone": audit["scrape_data"]["phone"] if audit["scrape_data"] else "",
                "location": audit["scrape_data"]["location"] if audit["scrape_data"] else "",
                "score": audit["scores"]["overall"] if audit["scores"] else 0,
                "html_chars": audit["html_chars"],
                "demo_url": audit["demo_url"],
                "spore_url": audit["spore_url"],
                "report_path": audit["report_path"],
                "status": audit["status"],
                "elapsed_secs": audit["elapsed_secs"],
            }

        results.append(result)

        # Log to JSONL
        BATCH_LOG.parent.mkdir(parents=True, exist_ok=True)
        with open(BATCH_LOG, "a") as f:
            f.write(json.dumps({
                "timestamp": datetime.now(timezone.utc).isoformat(),
                **result,
            }) + "\n")

        status_emoji = "✓" if result["status"] in ("complete", "scored") else "✗"
        logger.info(f"[{i}/{total}] {status_emoji} {result.get('business_name', '?')} — score={result.get('score', '?')}")

        if i < total:
            time.sleep(delay if not score_only else 1.0)

    batch_elapsed = time.time() - batch_start

    # Summary
    scored = [r for r in results if r["status"] in ("complete", "scored")]
    failed = [r for r in results if r["status"] not in ("complete", "scored")]

    logger.info(f"=== BATCH COMPLETE: {len(scored)} success, {len(failed)} failed, {batch_elapsed:.1f}s ===")
    return results


def generate_batch_report(results: list, referral_code: str = "") -> str:
    """Generate a summary report for the batch."""
    now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
    scored = [r for r in results if r.get("score", 0) > 0]
    avg_score = sum(r["score"] for r in scored) / max(len(scored), 1)

    report = f"""# Gold Pipeline — Batch Audit Report
## Generated: {now}

### Summary
| Metric | Value |
|--------|-------|
| Prospects audited | {len(results)} |
| Successfully scored | {len(scored)} |
| Average score | {avg_score:.0f}/100 |
| Below 50 (hot leads) | {sum(1 for r in scored if r['score'] < 50)} |
| Below 30 (critical) | {sum(1 for r in scored if r['score'] < 30)} |

### Prospect Rankings (lowest score = biggest opportunity)

| Rank | Business | Score | Phone | Location | Spore URL |
|------|----------|-------|-------|----------|-----------|
"""
    for i, r in enumerate(sorted(scored, key=lambda x: x["score"]), 1):
        biz = r.get("business_name", "?")[:30]
        phone = r.get("phone", "")
        loc = r.get("location", "")
        spore = r.get("spore_url", "")
        score = r.get("score", 0)
        report += f"| {i} | {biz} | {score}/100 | {phone} | {loc} | [Spore]({spore}) |\n"

    report += f"""
### Hot Leads (Score < 50)

These businesses have the most to gain from an upgrade:

"""
    for r in sorted(scored, key=lambda x: x["score"]):
        if r["score"] < 50:
            report += f"- **{r.get('business_name', '?')}** ({r['score']}/100) — {r['url']}\n"

    report += f"""
### Outreach Template

Subject: Your website is scoring {avg_score:.0f}/100 — here's how to fix it

Hi [First Name],

I ran a quick audit on your website and wanted to share the results.

Your site scored {avg_score:.0f}/100 across key metrics like mobile responsiveness, SEO, call-to-action visibility, and lead capture.

The biggest gaps:
- No AI voice receptionist (you're missing after-hours calls)
- [Personalised finding from audit]
- [Personalised finding from audit]

I've put together a free demo showing what your website could look like with an AI upgrade — complete with a voice receptionist that answers calls 24/7.

**See it live:** [Demo URL]
**Try the voice agent:** [Spore URL]

This isn't a generic template — it's built using YOUR business details, YOUR services, and YOUR phone number.

Happy to chat if you'd like to learn more. No pressure.

Cheers,
[Name]
AgileAdapt | AI-Powered Business Intelligence
"""
    return report


if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
        datefmt="%H:%M:%S",
    )

    parser = argparse.ArgumentParser(description="Genesis Gold Pipeline — Batch Prospect Auditor")
    parser.add_argument("--file", help="File with prospect URLs (txt: one per line, csv: 'url' column)")
    parser.add_argument("--urls", help="Comma-separated list of URLs")
    parser.add_argument("--model", default="gemini-2.5-flash", help="Gemini model")
    parser.add_argument("--deploy", action="store_true", help="Deploy demo sites to Netlify")
    parser.add_argument("--score-only", action="store_true", help="Score only (no Gemini generation)")
    parser.add_argument("--referral", default="", help="Agency referral code")
    parser.add_argument("--delay", type=float, default=3.0, help="Seconds between audits")
    args = parser.parse_args()

    if not args.file and not args.urls:
        parser.error("Provide --file or --urls")

    prospects = load_prospects(args.file, args.urls)
    if not prospects:
        print("No prospects found")
        sys.exit(1)

    print(f"\nAuditing {len(prospects)} prospects:")
    for p in prospects:
        print(f"  {p}")

    results = run_batch(
        prospects=prospects,
        model=args.model,
        deploy=args.deploy,
        score_only=args.score_only,
        referral_code=args.referral,
        delay=args.delay,
    )

    # Generate and save report
    report = generate_batch_report(results, args.referral)
    report_path = OUTPUT_DIR / f"batch_report_{datetime.now().strftime('%Y%m%d_%H%M')}.md"
    report_path.parent.mkdir(parents=True, exist_ok=True)
    report_path.write_text(report, encoding="utf-8")
    print(f"\nReport saved: {report_path}")

    # Summary table
    print(f"\n{'='*60}")
    print(f"{'Business':<30} {'Score':>5}  {'Status':<10}")
    print(f"{'='*60}")
    for r in sorted(results, key=lambda x: x.get("score", 0)):
        biz = r.get("business_name", "?")[:28]
        score = r.get("score", 0)
        status = r.get("status", "?")
        print(f"{biz:<30} {score:>5}  {status:<10}")
    print(f"{'='*60}")