#!/usr/bin/env python3
"""
AEO Audit Tool — Genesis SEO + AEO Intelligence System
=======================================================
Audits a tradie business for Answer Engine Optimisation (AEO) readiness.
Checks how visible the business is when AI engines (Perplexity, ChatGPT,
Gemini, Claude) answer queries like "find me a plumber in Brisbane".

Usage:
    python aeo_audit.py --business "Smith Plumbing" --suburb "Brisbane" \
                        --trade "plumber" --website "https://smithplumbing.com.au"

    python aeo_audit.py --business "ABC Electrical" --suburb "Parramatta" \
                        --trade "electrician"

Output:
    - AEO Score 0–100
    - Gap list with severity (CRITICAL / HIGH / MEDIUM / LOW)
    - Prioritised fix list
    - Professional report saved to reports/aeo_report_<business>.md

Author: Genesis System — ReceptionistAI Division
Date:   2026-02-23
"""

import argparse
import json
import os
import re
import sys
import time
import urllib.request
import urllib.parse
import urllib.error
from datetime import datetime
from typing import Optional


# ─────────────────────────────────────────────────────────────────────────────
# CONFIGURATION
# ─────────────────────────────────────────────────────────────────────────────

# Australian tradie-relevant AI directories (highest-signal for AEO)
AU_DIRECTORIES = [
    {"name": "Google Business Profile", "url": "https://business.google.com",   "weight": 20, "key": "gbp"},
    {"name": "Bing Places for Business",  "url": "https://bingplaces.com",       "weight": 12, "key": "bing"},
    {"name": "Apple Business Connect",    "url": "https://businessconnect.apple.com", "weight": 8, "key": "apple"},
    {"name": "hipages",                   "url": "https://hipages.com.au",       "weight": 10, "key": "hipages"},
    {"name": "ServiceSeeking",            "url": "https://serviceseeking.com.au","weight": 6,  "key": "serviceseeking"},
    {"name": "Truelocal",                 "url": "https://truelocal.com.au",     "weight": 5,  "key": "truelocal"},
    {"name": "Yellow Pages AU",           "url": "https://yellowpages.com.au",   "weight": 6,  "key": "yellowpages"},
    {"name": "Yelp",                      "url": "https://yelp.com",             "weight": 5,  "key": "yelp"},
    {"name": "Foursquare",                "url": "https://foursquare.com",       "weight": 8,  "key": "foursquare"},
    {"name": "Facebook Business",         "url": "https://business.facebook.com","weight": 6,  "key": "facebook"},
    {"name": "LinkedIn Company Page",     "url": "https://linkedin.com",         "weight": 4,  "key": "linkedin"},
    {"name": "Houzz AU",                  "url": "https://houzz.com.au",         "weight": 5,  "key": "houzz"},
    {"name": "TrueReview / Clutch",       "url": "https://clutch.co",            "weight": 3,  "key": "clutch"},
    {"name": "BuildSearch",               "url": "https://buildsearch.com.au",   "weight": 2,  "key": "buildsearch"},
]

# Schema types most relevant for AI engines (weighted by AEO impact)
SCHEMA_TYPES = {
    "LocalBusiness":    {"weight": 15, "critical": True,
                         "description": "Core entity — tells AI who you are, where, and what you do"},
    "Service":          {"weight": 8,  "critical": True,
                         "description": "Lists individual services — essential for trade-specific queries"},
    "FAQPage":          {"weight": 7,  "critical": True,
                         "description": "Directly feeds AI answer engines with Q&A content"},
    "Review":           {"weight": 6,  "critical": False,
                         "description": "Social proof signals used by Perplexity and ChatGPT"},
    "HowTo":            {"weight": 5,  "critical": False,
                         "description": "Step-by-step content preferred in AI overviews"},
    "BreadcrumbList":   {"weight": 3,  "critical": False,
                         "description": "Site structure clarity for AI crawlers"},
    "WebSite":          {"weight": 2,  "critical": False,
                         "description": "Site-level entity confirmation"},
    "Organization":     {"weight": 2,  "critical": False,
                         "description": "Business entity for Knowledge Graph"},
}

# Scoring weights (must sum to 100)
SCORING_WEIGHTS = {
    "gbp_presence":         20,   # Google Business Profile — single biggest signal
    "bing_places":          10,   # Bing → ChatGPT pipeline
    "foursquare":            8,   # 60-70% of ChatGPT local data
    "au_directories":       10,   # hipages, Yellow Pages, Truelocal etc.
    "schema_markup":        15,   # LocalBusiness + Service + FAQ schema
    "review_count":          8,   # Volume of reviews (social proof for AI)
    "review_rating":         5,   # Average star rating
    "nap_consistency":      10,   # Name/Address/Phone uniformity
    "website_quality":       7,   # Website content depth, FAQ pages
    "content_depth":         7,   # suburb-specific pages, trade-specific content
}

REVIEW_THRESHOLDS = {
    "excellent": 50,   # 50+ reviews = full score
    "good":      20,   # 20–49 reviews
    "fair":      5,    # 5–19 reviews
    "poor":      1,    # 1–4 reviews
    "none":      0,    # 0 reviews
}


# ─────────────────────────────────────────────────────────────────────────────
# CORE AUDIT CLASS
# ─────────────────────────────────────────────────────────────────────────────

class AEOAudit:
    """
    Runs a full AEO audit for a local tradie business.

    The audit checks:
    1. Directory presence (Google Business Profile, Bing Places, Foursquare,
       hipages, Yellow Pages AU, etc.)
    2. Schema markup on the business website (if URL provided)
    3. Review count and rating signals
    4. NAP (Name / Address / Phone) consistency indicators
    5. Website content depth for AI-optimised pages

    Produces:
    - AEO Score 0–100
    - Severity-ranked gap list
    - Prioritised fix recommendations
    - Professional markdown report
    """

    def __init__(
        self,
        business_name: str,
        suburb: str,
        trade_type: str,
        website_url: Optional[str] = None,
        state: str = "QLD",
        verbose: bool = False,
    ):
        self.business_name = business_name.strip()
        self.suburb = suburb.strip()
        self.trade_type = trade_type.strip().lower()
        self.website_url = website_url.strip().rstrip("/") if website_url else None
        self.state = state.upper().strip()
        self.verbose = verbose

        self.results: dict = {}
        self.gaps: list = []
        self.fixes: list = []
        self.score: int = 0
        self.audit_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")

    # ──────────────────────────────────────────────
    # UTILITY
    # ──────────────────────────────────────────────

    def _log(self, msg: str):
        if self.verbose:
            print(f"  [AEO] {msg}")

    def _fetch_url(self, url: str, timeout: int = 10) -> Optional[str]:
        """Fetch URL content; return None on any error."""
        try:
            req = urllib.request.Request(
                url,
                headers={
                    "User-Agent": (
                        "Mozilla/5.0 (compatible; GenesisAEOBot/1.0; "
                        "+https://receptionistai.com.au/aeo-audit)"
                    )
                },
            )
            with urllib.request.urlopen(req, timeout=timeout) as resp:
                return resp.read().decode("utf-8", errors="ignore")
        except Exception as e:
            self._log(f"fetch error for {url}: {e}")
            return None

    def _search_directory(self, directory_key: str, search_term: str) -> dict:
        """
        Simulate a directory presence check.
        In production: integrate with SerpAPI / BrightLocal / Yext API.
        Returns: {found: bool, url: str|None, confidence: str}
        """
        # Heuristic simulation based on common patterns.
        # A real implementation would call SerpAPI with site: operator or
        # BrightLocal's Citation Burst API.
        business_slug = re.sub(r"[^a-z0-9]+", "-", self.business_name.lower()).strip("-")
        suburb_slug   = re.sub(r"[^a-z0-9]+", "-", self.suburb.lower()).strip("-")

        # Directories that tradies almost always list on (if they have a website)
        likely_listed = {"gbp", "hipages", "yellowpages", "facebook"}
        # Directories tradies often miss (these are AEO gold)
        often_missed  = {"bing", "foursquare", "apple", "houzz", "linkedin"}

        if directory_key in likely_listed:
            confidence = "medium"
            found = True   # Assume listed — manual verification needed
        elif directory_key in often_missed:
            confidence = "low"
            found = False  # Assume NOT listed — high-value gap
        else:
            confidence = "low"
            found = False

        note = (
            "Assumed present — verify manually"
            if found
            else "Likely missing — verify and list immediately"
        )

        return {
            "found": found,
            "confidence": confidence,
            "note": note,
            "search_query": f'"{self.business_name}" site:{directory_key}',
        }

    # ──────────────────────────────────────────────
    # CHECK 1: GOOGLE BUSINESS PROFILE
    # ──────────────────────────────────────────────

    def check_gbp(self) -> dict:
        """
        Check Google Business Profile via Google Knowledge Panel signals.
        In production: use Google Places API or BrightLocal API.
        """
        self._log("Checking Google Business Profile...")

        # Search for the business using Google search URL
        query = urllib.parse.quote(f"{self.business_name} {self.suburb} {self.trade_type}")
        search_url = f"https://www.google.com/search?q={query}&gl=au"

        content = self._fetch_url(search_url)

        result = {
            "present":           False,
            "has_reviews":       False,
            "review_count":      0,
            "rating":            0.0,
            "has_photos":        False,
            "has_hours":         False,
            "has_description":   False,
            "score_contribution": 0,
            "gaps": [],
        }

        if content:
            # Heuristic signals from Google search HTML
            if self.business_name.lower()[:8] in content.lower():
                result["present"] = True
                self._log("GBP: likely present in search results")

            # Look for review count patterns (e.g. "47 reviews")
            review_match = re.search(r"(\d+)\s+review", content, re.I)
            if review_match:
                result["has_reviews"] = True
                result["review_count"] = int(review_match.group(1))
                self._log(f"GBP: found ~{result['review_count']} reviews")

            # Look for rating (e.g. "4.8")
            rating_match = re.search(r'"ratingValue"\s*:\s*"?([\d.]+)"?', content)
            if rating_match:
                result["rating"] = float(rating_match.group(1))
        else:
            # Cannot fetch — flag as needing manual check
            result["present"] = None  # Unknown
            result["gaps"].append({
                "severity": "CRITICAL",
                "message": "Cannot verify GBP — manual check required at business.google.com",
            })

        # Score contribution
        if result["present"]:
            result["score_contribution"] = SCORING_WEIGHTS["gbp_presence"]
        else:
            result["gaps"].append({
                "severity": "CRITICAL",
                "message": (
                    f"No Google Business Profile found for '{self.business_name}' in {self.suburb}. "
                    "GBP is the #1 signal for all AI engines. Create one immediately at "
                    "business.google.com."
                ),
            })

        return result

    # ──────────────────────────────────────────────
    # CHECK 2: BING PLACES (ChatGPT pipeline)
    # ──────────────────────────────────────────────

    def check_bing_places(self) -> dict:
        """
        Bing Places feeds directly into ChatGPT local search results.
        Missing Bing Places = invisible on ChatGPT.
        """
        self._log("Checking Bing Places for Business...")

        result = {
            "present":            False,
            "score_contribution": 0,
            "gaps":               [],
        }

        query = urllib.parse.quote(
            f"{self.business_name} {self.suburb} {self.trade_type} site:bing.com/maps"
        )
        content = self._fetch_url(
            f"https://www.bing.com/search?q={query}&cc=AU", timeout=8
        )

        if content and self.business_name.lower()[:6] in content.lower():
            result["present"] = True
            result["score_contribution"] = SCORING_WEIGHTS["bing_places"]
            self._log("Bing Places: likely listed")
        else:
            result["gaps"].append({
                "severity": "CRITICAL",
                "message": (
                    "Not found on Bing Places for Business. "
                    "ChatGPT pulls 70%+ of local recommendations from Bing. "
                    "Register free at bingplaces.com immediately."
                ),
            })

        return result

    # ──────────────────────────────────────────────
    # CHECK 3: FOURSQUARE (60-70% of ChatGPT local data)
    # ──────────────────────────────────────────────

    def check_foursquare(self) -> dict:
        """
        Foursquare City Guide is the source for 60-70% of ChatGPT local results
        (per Local Falcon research, 2025). Critical for AI visibility.
        """
        self._log("Checking Foursquare presence...")

        result = {
            "present":            False,
            "score_contribution": 0,
            "gaps":               [],
        }

        query = urllib.parse.quote(
            f"{self.business_name} {self.suburb} {self.trade_type}"
        )
        content = self._fetch_url(
            f"https://foursquare.com/explore?near={urllib.parse.quote(self.suburb)}+Australia"
            f"&q={urllib.parse.quote(self.trade_type)}",
            timeout=8,
        )

        # Foursquare blocks most bots — assume not listed (conservative)
        result["gaps"].append({
            "severity": "CRITICAL",
            "message": (
                "Foursquare listing not verified. "
                "Foursquare data feeds 60-70% of ChatGPT local results. "
                "Claim or create listing at foursquare.com/add-place."
            ),
        })
        result["present"] = False

        return result

    # ──────────────────────────────────────────────
    # CHECK 4: AUSTRALIAN TRADIE DIRECTORIES
    # ──────────────────────────────────────────────

    def check_au_directories(self) -> dict:
        """
        Check presence on hipages, ServiceSeeking, Yellow Pages AU, Truelocal.
        These are the primary AU-specific signals AI engines use for local trades.
        """
        self._log("Checking Australian directories...")

        result = {
            "directories_found": [],
            "directories_missing": [],
            "score_contribution": 0,
            "gaps": [],
        }

        au_dirs = [d for d in AU_DIRECTORIES if d["key"] in
                   {"hipages", "serviceseeking", "yellowpages", "truelocal", "houzz"}]

        for directory in au_dirs:
            check = self._search_directory(directory["key"], self.business_name)
            if check["found"]:
                result["directories_found"].append(directory["name"])
            else:
                result["directories_missing"].append(directory["name"])
                severity = "HIGH" if directory["weight"] >= 6 else "MEDIUM"
                result["gaps"].append({
                    "severity": severity,
                    "message": (
                        f"Not listed on {directory['name']} ({directory['url']}). "
                        f"Create a free listing — AI engines cross-reference these directories."
                    ),
                })

        # Score: proportional to how many high-weight AU dirs are covered
        total_weight = sum(d["weight"] for d in au_dirs)
        found_weight = sum(
            d["weight"] for d in au_dirs if d["name"] in result["directories_found"]
        )
        fraction = found_weight / total_weight if total_weight else 0
        result["score_contribution"] = int(SCORING_WEIGHTS["au_directories"] * fraction)

        return result

    # ──────────────────────────────────────────────
    # CHECK 5: SCHEMA MARKUP
    # ──────────────────────────────────────────────

    def check_schema_markup(self) -> dict:
        """
        Checks website for JSON-LD schema markup.
        Priority: LocalBusiness, Service, FAQPage, Review, HowTo.
        """
        self._log("Checking schema markup on website...")

        result = {
            "schema_found": [],
            "schema_missing": [],
            "has_local_business": False,
            "has_faq": False,
            "has_service": False,
            "score_contribution": 0,
            "gaps": [],
        }

        if not self.website_url:
            result["gaps"].append({
                "severity": "CRITICAL",
                "message": (
                    "No website URL provided — cannot check schema. "
                    "A website with proper schema is essential for AI engine visibility."
                ),
            })
            return result

        content = self._fetch_url(self.website_url, timeout=12)

        if not content:
            result["gaps"].append({
                "severity": "HIGH",
                "message": f"Could not access website at {self.website_url}. Check if site is live.",
            })
            return result

        # Extract all JSON-LD blocks
        json_ld_blocks = re.findall(
            r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>',
            content,
            re.DOTALL | re.IGNORECASE,
        )

        found_types = set()
        for block in json_ld_blocks:
            try:
                data = json.loads(block.strip())
                # Handle @graph arrays
                if isinstance(data, dict):
                    items = data.get("@graph", [data])
                elif isinstance(data, list):
                    items = data
                else:
                    items = []

                for item in items:
                    schema_type = item.get("@type", "")
                    if isinstance(schema_type, list):
                        found_types.update(schema_type)
                    elif schema_type:
                        found_types.add(schema_type)
            except (json.JSONDecodeError, AttributeError):
                continue

        self._log(f"Schema types found: {found_types}")

        # Score schema presence
        total_schema_weight = 0
        found_schema_weight = 0

        for schema_type, meta in SCHEMA_TYPES.items():
            total_schema_weight += meta["weight"]
            if schema_type in found_types:
                result["schema_found"].append(schema_type)
                found_schema_weight += meta["weight"]
                if schema_type == "LocalBusiness":
                    result["has_local_business"] = True
                elif schema_type == "FAQPage":
                    result["has_faq"] = True
                elif schema_type == "Service":
                    result["has_service"] = True
            else:
                result["schema_missing"].append(schema_type)
                if meta["critical"]:
                    result["gaps"].append({
                        "severity": "CRITICAL",
                        "message": (
                            f"Missing {schema_type} schema. "
                            f"{meta['description']}. "
                            "Add JSON-LD schema immediately."
                        ),
                    })
                else:
                    result["gaps"].append({
                        "severity": "MEDIUM",
                        "message": (
                            f"Missing {schema_type} schema. "
                            f"{meta['description']}."
                        ),
                    })

        fraction = found_schema_weight / total_schema_weight if total_schema_weight else 0
        result["score_contribution"] = int(SCORING_WEIGHTS["schema_markup"] * fraction)

        return result

    # ──────────────────────────────────────────────
    # CHECK 6: REVIEW COUNT & RATING
    # ──────────────────────────────────────────────

    def check_reviews(self, gbp_result: dict) -> dict:
        """
        Evaluates review volume and star rating as AI trust signals.
        Sources: Google, hipages, Facebook.
        """
        self._log("Evaluating review signals...")

        result = {
            "review_count":         gbp_result.get("review_count", 0),
            "rating":               gbp_result.get("rating", 0.0),
            "score_contribution":   0,
            "gaps":                 [],
        }

        count = result["review_count"]

        if count >= REVIEW_THRESHOLDS["excellent"]:
            result["score_contribution"] = SCORING_WEIGHTS["review_count"]
            result["score_contribution"] += SCORING_WEIGHTS["review_rating"] if result["rating"] >= 4.5 else int(SCORING_WEIGHTS["review_rating"] * 0.6)
        elif count >= REVIEW_THRESHOLDS["good"]:
            result["score_contribution"] = int(SCORING_WEIGHTS["review_count"] * 0.7)
            result["gaps"].append({
                "severity": "MEDIUM",
                "message": (
                    f"Only {count} reviews detected. AI engines favour businesses with 50+ reviews. "
                    "Implement a post-job review request SMS via ReceptionistAI to accelerate review velocity."
                ),
            })
        elif count >= REVIEW_THRESHOLDS["fair"]:
            result["score_contribution"] = int(SCORING_WEIGHTS["review_count"] * 0.35)
            result["gaps"].append({
                "severity": "HIGH",
                "message": (
                    f"Only {count} reviews detected. This is very low for AI engine visibility. "
                    "ChatGPT and Perplexity favour businesses with 20+ reviews. "
                    "Set up automated review requests immediately."
                ),
            })
        else:
            result["score_contribution"] = 0
            result["gaps"].append({
                "severity": "CRITICAL",
                "message": (
                    "No reviews detected. AI engines will not recommend a business with zero reviews. "
                    "Getting your first 10 reviews is the single fastest AEO win available. "
                    "Call your last 10 happy clients TODAY and ask for a Google review."
                ),
            })

        if result["rating"] < 4.0 and count > 0:
            result["gaps"].append({
                "severity": "HIGH",
                "message": (
                    f"Average rating {result['rating']:.1f} is below 4.0. "
                    "AI engines strongly prefer businesses with 4.5+ stars. "
                    "Respond to all negative reviews professionally and focus on service quality."
                ),
            })

        return result

    # ──────────────────────────────────────────────
    # CHECK 7: NAP CONSISTENCY
    # ──────────────────────────────────────────────

    def check_nap_consistency(self) -> dict:
        """
        NAP = Name / Address / Phone consistency across directories.
        Inconsistent NAP is a major AI engine trust killer.
        """
        self._log("Checking NAP consistency signals...")

        result = {
            "score_contribution": 5,   # Give partial credit — hard to check without paid API
            "gaps": [],
        }

        result["gaps"].append({
            "severity": "HIGH",
            "message": (
                "NAP consistency not fully verified (requires manual audit or BrightLocal/Yext). "
                "Ensure your business Name, Address, and Phone number are IDENTICAL across ALL "
                "directories. Even minor differences (e.g. 'St' vs 'Street', '0412' vs '+61412') "
                "reduce AI engine trust. AI cross-references 50+ sources — consistency is non-negotiable."
            ),
        })

        return result

    # ──────────────────────────────────────────────
    # CHECK 8: WEBSITE CONTENT DEPTH
    # ──────────────────────────────────────────────

    def check_website_content(self) -> dict:
        """
        Checks website for AEO-friendly content signals:
        - Suburb-specific service pages
        - FAQ section
        - Detailed service descriptions
        - Contact/location information clarity
        """
        self._log("Checking website content depth...")

        result = {
            "has_suburb_page":    False,
            "has_faq_section":    False,
            "has_service_pages":  False,
            "word_count_est":     0,
            "score_contribution": 0,
            "gaps":               [],
        }

        if not self.website_url:
            result["gaps"].append({
                "severity": "CRITICAL",
                "message": (
                    "No website — this is a critical AEO gap. "
                    "AI engines need a website to crawl. "
                    "A basic 5-page site with suburb + service pages is the foundation of AEO."
                ),
            })
            return result

        content = self._fetch_url(self.website_url, timeout=12)

        if not content:
            return result

        content_lower = content.lower()

        # Check for suburb mention
        if self.suburb.lower() in content_lower:
            result["has_suburb_page"] = True

        # Check for FAQ
        faq_patterns = [r"faq", r"frequently asked", r"common question", r"q&a"]
        if any(re.search(p, content_lower) for p in faq_patterns):
            result["has_faq_section"] = True

        # Estimate visible word count (strip tags)
        text_only = re.sub(r"<[^>]+>", " ", content)
        text_only = re.sub(r"\s+", " ", text_only)
        result["word_count_est"] = len(text_only.split())

        # Check for service-level pages
        service_patterns = [
            self.trade_type,
            "service",
            "emergency",
            "installation",
            "repair",
            "maintenance",
        ]
        if sum(1 for p in service_patterns if p in content_lower) >= 3:
            result["has_service_pages"] = True

        # Score
        content_score = 0
        if result["has_suburb_page"]:
            content_score += 3
        else:
            result["gaps"].append({
                "severity": "HIGH",
                "message": (
                    f"No suburb-specific content found for '{self.suburb}'. "
                    "AI engines geo-match businesses by suburb mentions. "
                    f"Create a dedicated page: '{self.trade_type.title()} in {self.suburb}' "
                    "with 500+ words of local content."
                ),
            })

        if result["has_faq_section"]:
            content_score += 3
        else:
            result["gaps"].append({
                "severity": "HIGH",
                "message": (
                    "No FAQ section detected. FAQPage schema + FAQ content is the fastest route "
                    "to AI engine citations. Add 10 questions your clients ask, with direct answers."
                ),
            })

        if result["has_service_pages"]:
            content_score += 1
        else:
            result["gaps"].append({
                "severity": "MEDIUM",
                "message": (
                    "Limited service page content detected. "
                    "Create individual pages for each service: e.g. 'Hot Water System Repair', "
                    "'Emergency Plumber Brisbane'. AI engines match specific service queries to specific pages."
                ),
            })

        if result["word_count_est"] < 500:
            result["gaps"].append({
                "severity": "HIGH",
                "message": (
                    f"Website has very low content (~{result['word_count_est']} words estimated). "
                    "AI engines favour content depth. Target 1,500+ words across your site."
                ),
            })
        elif result["word_count_est"] < 1500:
            result["gaps"].append({
                "severity": "MEDIUM",
                "message": (
                    f"Website has moderate content (~{result['word_count_est']} words estimated). "
                    "Add suburb pages and service detail pages to boost AEO signals."
                ),
            })

        result["score_contribution"] = min(SCORING_WEIGHTS["website_quality"], content_score + 1)
        return result

    # ──────────────────────────────────────────────
    # CALCULATE FINAL SCORE
    # ──────────────────────────────────────────────

    def calculate_score(self) -> int:
        """Aggregate all check scores into a final AEO Score 0–100."""
        total = 0

        total += self.results.get("gbp", {}).get("score_contribution", 0)
        total += self.results.get("bing", {}).get("score_contribution", 0)
        total += self.results.get("foursquare", {}).get("score_contribution", 0)
        total += self.results.get("au_dirs", {}).get("score_contribution", 0)
        total += self.results.get("schema", {}).get("score_contribution", 0)
        total += self.results.get("reviews", {}).get("score_contribution", 0)
        total += self.results.get("nap", {}).get("score_contribution", 0)
        total += self.results.get("content", {}).get("score_contribution", 0)

        return min(100, max(0, total))

    # ──────────────────────────────────────────────
    # COLLECT ALL GAPS + FIXES
    # ──────────────────────────────────────────────

    def collect_gaps(self):
        """Flatten all gaps from all checks, sorted by severity."""
        severity_order = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3}
        all_gaps = []

        for check_name, check_result in self.results.items():
            for gap in check_result.get("gaps", []):
                all_gaps.append(gap)

        self.gaps = sorted(all_gaps, key=lambda g: severity_order.get(g["severity"], 99))

    def build_fix_list(self):
        """Build prioritised fix list from gaps."""
        self.fixes = []

        # Predefined fixes mapped to gap keywords
        fix_map = {
            "Google Business Profile": {
                "action": "Create & verify Google Business Profile",
                "url": "https://business.google.com",
                "time": "1–2 days (verification by postcard)",
                "impact": "HIGH — single biggest AEO signal",
                "effort": "30 min setup",
            },
            "Bing Places": {
                "action": "Register on Bing Places for Business",
                "url": "https://bingplaces.com",
                "time": "24 hours",
                "impact": "HIGH — direct ChatGPT feed",
                "effort": "20 min setup",
            },
            "Foursquare": {
                "action": "Claim/create Foursquare listing",
                "url": "https://foursquare.com/add-place",
                "time": "Instant",
                "impact": "HIGH — 60-70% of ChatGPT local data",
                "effort": "15 min setup",
            },
            "LocalBusiness schema": {
                "action": "Add LocalBusiness JSON-LD schema to homepage",
                "url": "https://schema.org/LocalBusiness",
                "time": "2–4 hours (or 1 day with developer)",
                "impact": "HIGH — core entity recognition by all AI engines",
                "effort": "Developer task",
            },
            "FAQPage schema": {
                "action": "Add FAQ section + FAQPage schema to website",
                "url": "https://schema.org/FAQPage",
                "time": "2–4 hours",
                "impact": "HIGH — directly feeds AI answer boxes",
                "effort": "Write 10 Q&As + add schema",
            },
            "reviews": {
                "action": "Launch automated Google Review request system",
                "url": "https://business.google.com",
                "time": "Ongoing — first 10 reviews within 2 weeks",
                "impact": "CRITICAL — AI engines won't recommend 0-review businesses",
                "effort": "SMS 10 recent clients manually today",
            },
            "suburb-specific": {
                "action": f"Create suburb service page: '{self.trade_type.title()} in {self.suburb}'",
                "url": None,
                "time": "1 day",
                "impact": "HIGH — geo-matching for AI local queries",
                "effort": "Write 500+ words of local content",
            },
        }

        for gap in self.gaps:
            msg_lower = gap["message"].lower()
            for keyword, fix in fix_map.items():
                if keyword.lower() in msg_lower:
                    if fix not in self.fixes:
                        self.fixes.append({**fix, "severity": gap["severity"]})
                    break

    # ──────────────────────────────────────────────
    # MAIN RUN METHOD
    # ──────────────────────────────────────────────

    def run(self) -> dict:
        """Execute full AEO audit."""
        print(f"\n{'='*60}")
        print(f"  AEO AUDIT — {self.business_name}")
        print(f"  {self.trade_type.title()} in {self.suburb}, {self.state}")
        print(f"  {self.audit_timestamp}")
        print(f"{'='*60}\n")

        print("Running checks...")

        # Run all checks
        self.results["gbp"]       = self.check_gbp()
        self.results["bing"]      = self.check_bing_places()
        self.results["foursquare"]= self.check_foursquare()
        self.results["au_dirs"]   = self.check_au_directories()
        self.results["schema"]    = self.check_schema_markup()
        self.results["reviews"]   = self.check_reviews(self.results["gbp"])
        self.results["nap"]       = self.check_nap_consistency()
        self.results["content"]   = self.check_website_content()

        # Calculate score and collect findings
        self.score = self.calculate_score()
        self.collect_gaps()
        self.build_fix_list()

        # Print summary
        self._print_summary()

        return {
            "business_name": self.business_name,
            "suburb":        self.suburb,
            "trade_type":    self.trade_type,
            "aeo_score":     self.score,
            "grade":         self._get_grade(),
            "gaps":          self.gaps,
            "fixes":         self.fixes,
            "results":       self.results,
            "timestamp":     self.audit_timestamp,
        }

    # ──────────────────────────────────────────────
    # OUTPUT METHODS
    # ──────────────────────────────────────────────

    def _get_grade(self) -> str:
        if self.score >= 85:
            return "A — Excellent AEO presence"
        elif self.score >= 70:
            return "B — Good, a few gaps to close"
        elif self.score >= 55:
            return "C — Moderate — significant opportunities"
        elif self.score >= 35:
            return "D — Poor — major gaps, AI engines likely ignoring you"
        else:
            return "F — Critical — essentially invisible to AI search"

    def _print_summary(self):
        """Print human-readable summary to stdout."""
        grade = self._get_grade()
        print(f"\n  AEO SCORE: {self.score}/100  |  Grade: {grade}\n")

        critical = [g for g in self.gaps if g["severity"] == "CRITICAL"]
        high     = [g for g in self.gaps if g["severity"] == "HIGH"]
        medium   = [g for g in self.gaps if g["severity"] == "MEDIUM"]

        print(f"  Gaps found: {len(critical)} CRITICAL, {len(high)} HIGH, {len(medium)} MEDIUM\n")

        if critical:
            print("  CRITICAL GAPS (fix first):")
            for i, gap in enumerate(critical, 1):
                print(f"    {i}. {gap['message'][:100]}...")
            print()

        if self.fixes:
            print("  TOP FIXES (prioritised):")
            for i, fix in enumerate(self.fixes[:5], 1):
                print(f"    {i}. [{fix['severity']}] {fix['action']}")
                print(f"       Impact: {fix['impact']}")
                print(f"       Effort: {fix['effort']}  |  Timeline: {fix['time']}")
                print()

    def save_report(self, output_dir: str = "reports") -> str:
        """Save full markdown report to file."""
        os.makedirs(output_dir, exist_ok=True)

        slug = re.sub(r"[^a-z0-9]+", "_", self.business_name.lower()).strip("_")
        filename = os.path.join(output_dir, f"aeo_report_{slug}.md")

        report = self._build_markdown_report()

        with open(filename, "w", encoding="utf-8") as f:
            f.write(report)

        print(f"\n  Full report saved: {filename}")
        return filename

    def _build_markdown_report(self) -> str:
        """Build the professional markdown report."""
        grade = self._get_grade()
        critical = [g for g in self.gaps if g["severity"] == "CRITICAL"]
        high     = [g for g in self.gaps if g["severity"] == "HIGH"]
        medium   = [g for g in self.gaps if g["severity"] == "MEDIUM"]

        # Build score bar
        filled = int(self.score / 5)
        bar = "█" * filled + "░" * (20 - filled)

        lines = [
            f"# AEO Audit Report",
            f"## {self.business_name}",
            f"**{self.trade_type.title()} | {self.suburb}, {self.state}** | Audited: {self.audit_timestamp}",
            f"",
            f"---",
            f"",
            f"## AEO Score",
            f"",
            f"```",
            f"  {self.score}/100  [{bar}]",
            f"  Grade: {grade}",
            f"```",
            f"",
            f"**What this means:** {'Your business is ' + ('well optimised' if self.score >= 70 else 'poorly optimised' if self.score < 40 else 'partially optimised') + ' for AI search engines like ChatGPT, Perplexity, Gemini, and Google AI Overview.'}",
            f"",
            f"---",
            f"",
            f"## What AI Shows When Someone Searches for You",
            f"",
            f'When someone asks **ChatGPT, Perplexity, or Gemini** _"find me a {self.trade_type} in {self.suburb}"_:',
            f"",
        ]

        if self.score >= 70:
            lines += [
                f"- Your business **likely appears** in AI-generated recommendations",
                f"- You have a reasonable AEO foundation — the gaps below will further strengthen your position",
            ]
        elif self.score >= 40:
            lines += [
                f"- Your business **may appear occasionally**, but inconsistently",
                f"- Competitors with better AEO setups are appearing instead of you",
                f"- Fixing the gaps below could move you from occasional to frequent AI citations",
            ]
        else:
            lines += [
                f"- Your business is **effectively invisible** to AI search engines",
                f"- When someone asks AI for a {self.trade_type} in {self.suburb}, your name does not come up",
                f"- Competitors who have invested in AEO are capturing 100% of that traffic",
            ]

        lines += [
            f"",
            f"---",
            f"",
            f"## Gap Analysis",
            f"",
            f"| Severity | Count |",
            f"|----------|-------|",
            f"| CRITICAL | {len(critical)} |",
            f"| HIGH | {len(high)} |",
            f"| MEDIUM | {len(medium)} |",
            f"",
        ]

        if critical:
            lines.append("### CRITICAL Gaps")
            lines.append("")
            for i, gap in enumerate(critical, 1):
                lines.append(f"{i}. **{gap['message']}**")
                lines.append("")

        if high:
            lines.append("### HIGH Priority Gaps")
            lines.append("")
            for i, gap in enumerate(high, 1):
                lines.append(f"{i}. {gap['message']}")
                lines.append("")

        if medium:
            lines.append("### MEDIUM Priority Gaps")
            lines.append("")
            for i, gap in enumerate(medium, 1):
                lines.append(f"{i}. {gap['message']}")
                lines.append("")

        lines += [
            f"---",
            f"",
            f"## Your 5 Priority Actions",
            f"",
            f"These are the highest-impact fixes, ordered by ROI:",
            f"",
        ]

        top_fixes = self.fixes[:5]
        for i, fix in enumerate(top_fixes, 1):
            lines += [
                f"### Action {i}: {fix['action']}",
                f"",
                f"- **Severity:** {fix['severity']}",
                f"- **Impact:** {fix['impact']}",
                f"- **Effort:** {fix['effort']}",
                f"- **Timeline:** {fix['time']}",
            ]
            if fix.get("url"):
                lines.append(f"- **Link:** {fix['url']}")
            lines.append("")

        lines += [
            f"---",
            f"",
            f"## Score Breakdown",
            f"",
            f"| Signal | Weight | Your Score | Max |",
            f"|--------|--------|-----------|-----|",
            f"| Google Business Profile | 20% | {self.results['gbp'].get('score_contribution',0)} | 20 |",
            f"| Bing Places (ChatGPT feed) | 10% | {self.results['bing'].get('score_contribution',0)} | 10 |",
            f"| Foursquare (ChatGPT data) | 8% | {self.results['foursquare'].get('score_contribution',0)} | 8 |",
            f"| AU Directories (hipages etc.) | 10% | {self.results['au_dirs'].get('score_contribution',0)} | 10 |",
            f"| Schema Markup | 15% | {self.results['schema'].get('score_contribution',0)} | 15 |",
            f"| Review Count | 8% | {self.results['reviews'].get('score_contribution',0)} | 13 |",
            f"| NAP Consistency | 10% | {self.results['nap'].get('score_contribution',0)} | 10 |",
            f"| Website Content | 7% | {self.results['content'].get('score_contribution',0)} | 7 |",
            f"| **TOTAL** | **100%** | **{self.score}** | **100** |",
            f"",
            f"---",
            f"",
            f"## What Is AEO and Why It Matters",
            f"",
            f"**Answer Engine Optimisation (AEO)** is the practice of optimising your business so that",
            f"AI tools — ChatGPT, Perplexity, Gemini, Google AI Overview — recommend you when",
            f"someone asks them to find a local tradie.",
            f"",
            f"Traditional Google SEO gets you on the list. AEO gets you recommended by AI.",
            f"",
            f"By 2027, LLM-driven search is projected to overtake traditional Google search.",
            f"Businesses that get AEO right now will own the AI-recommendation channel.",
            f"",
            f"---",
            f"",
            f"## Next Steps",
            f"",
            f"**Option A: DIY** — Use this report to fix the gaps yourself. Estimated time: 2–4 weeks.",
            f"",
            f"**Option B: Done-For-You via ReceptionistAI**",
            f"",
            f"| Service | Price | What's Included |",
            f"|---------|-------|-----------------|",
            f"| AEO Setup Package | $497 one-time | GBP optimisation, Bing Places, Foursquare, schema markup, FAQ pages, AU directory listings |",
            f"| Monthly AEO Maintenance | $197/month | Monitor AI rankings, update listings, review velocity, adapt as AI engines evolve |",
            f"| AEO Audit (this report) | $297 | FREE if you sign up for ReceptionistAI |",
            f"",
            f"**Book a free 15-min AEO strategy call:** [receptionistai.com.au](https://receptionistai.com.au)",
            f"",
            f"---",
            f"",
            f"*Report generated by Genesis AEO Intelligence System v1.0*",
            f"*ReceptionistAI — AI Receptionist + AEO for Australian Tradies*",
        ]

        return "\n".join(lines)


# ─────────────────────────────────────────────────────────────────────────────
# CLI ENTRY POINT
# ─────────────────────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(
        description="AEO Audit Tool — Check how visible a tradie is on AI search engines",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python aeo_audit.py --business "Smith Plumbing" --suburb "Brisbane" --trade "plumber"
  python aeo_audit.py --business "ABC Electrical" --suburb "Parramatta" --trade "electrician" --website https://abcelectrical.com.au --state NSW
  python aeo_audit.py --business "Jones Roofing" --suburb "Melbourne" --trade "roofer" --state VIC --verbose
        """,
    )
    parser.add_argument("--business",  required=True,  help="Business name")
    parser.add_argument("--suburb",    required=True,  help="Suburb / city")
    parser.add_argument("--trade",     required=True,  help="Trade type (plumber, electrician, roofer...)")
    parser.add_argument("--website",   default=None,   help="Business website URL (optional)")
    parser.add_argument("--state",     default="QLD",  help="Australian state code (default: QLD)")
    parser.add_argument("--output",    default="reports", help="Output directory for reports (default: reports/)")
    parser.add_argument("--json",      action="store_true", help="Print full JSON result to stdout")
    parser.add_argument("--verbose",   action="store_true", help="Show detailed check progress")
    parser.add_argument("--no-report", action="store_true", help="Skip saving markdown report to disk")

    args = parser.parse_args()

    audit = AEOAudit(
        business_name=args.business,
        suburb=args.suburb,
        trade_type=args.trade,
        website_url=args.website,
        state=args.state,
        verbose=args.verbose,
    )

    result = audit.run()

    if not args.no_report:
        audit.save_report(output_dir=args.output)

    if args.json:
        # Sanitise for JSON serialisation
        def sanitise(obj):
            if isinstance(obj, dict):
                return {k: sanitise(v) for k, v in obj.items()}
            elif isinstance(obj, list):
                return [sanitise(i) for i in obj]
            else:
                return obj
        print(json.dumps(sanitise(result), indent=2))


if __name__ == "__main__":
    main()
