import csv
import sys
import time
import json
from pathlib import Path

# Add genesis-system to python path
sys.path.append(str(Path(__file__).resolve().parent.parent))

from scripts.browser_agent import BrowserAgent


# --- CONFIGURATION ---
OUTPUT_DIR = Path("/mnt/e/genesis-system/data/LEADS")
OUTPUT_FILE = OUTPUT_DIR / "localsearch_tradies.csv"
ERROR_LOG = OUTPUT_DIR / "localsearch_errors.log"

TRADES = [
    "plumbers", "electricians", "air-conditioning", "builders", "concreters",
    "roofing-contractors", "landscapers", "painters", "tilers",
    "fencing-contractors", "carpenters"
]

QLD_CITIES = [
    "brisbane-qld", "gold-coast-qld", "sunshine-coast-qld", "cairns-qld",
    "townsville-qld", "toowoomba-qld", "mackay-qld", "rockhampton-qld",
    "bundaberg-qld", "hervey-bay-qld", "gladstone-qld", "mount-isa-qld"
]

def create_csv_file():
    """Creates the output CSV file with the header row."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    if not OUTPUT_FILE.exists():
        with open(OUTPUT_FILE, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([
                "business_name", "trade_category", "phone", "website",
                "street_address", "suburb", "state", "postcode", "email",
                "rating", "review_count", "source_url", "scraped_date"
            ])
        print(f"Created CSV file: {OUTPUT_FILE}")

def save_to_csv(data: dict):
    """Appends a row of data to the CSV file."""
    fieldnames = [
        "business_name", "trade_category", "phone", "website",
        "street_address", "suburb", "state", "postcode", "email",
        "rating", "review_count", "source_url", "scraped_date"
    ]
    with open(OUTPUT_FILE, 'a', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        if f.tell() == 0:
            writer.writeheader()
        writer.writerow(data)

def save_single_item(item, trade, source_url):
    """Helper to format and save a single business item."""
    data = {
        "business_name": item.get("name"),
        "trade_category": trade,
        "phone": item.get("telephone"),
        "website": item.get("url"),
        "street_address": item.get("address", {}).get("streetAddress") if isinstance(item.get("address"), dict) else None,
        "suburb": item.get("address", {}).get("addressLocality") if isinstance(item.get("address"), dict) else None,
        "state": item.get("address", {}).get("addressRegion") if isinstance(item.get("address"), dict) else None,
        "postcode": item.get("address", {}).get("postalCode") if isinstance(item.get("address"), dict) else None,
        "email": item.get("email"),
        "rating": item.get("aggregateRating", {}).get("ratingValue") if isinstance(item.get("aggregateRating"), dict) else None,
        "review_count": item.get("aggregateRating", {}).get("reviewCount") if isinstance(item.get("aggregateRating"), dict) else None,
        "source_url": source_url,
        "scraped_date": time.strftime("%Y-%m-%d")
    }
    save_to_csv(data)

USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
]

def main():
    """
    Main function to run the scraper.
    """
    create_csv_file()
    
    agent = BrowserAgent(headless=True, user_agents=USER_AGENTS)
    running_total = 0

    try:
        agent.start()
        agent.new_page()

        for trade in TRADES:
            for city in QLD_CITIES:
                print(f"--- Processing: {trade} in {city} ---")
                page_num = 1
                listings_in_city = 0
                while True:
                    try:
                        url = f"https://www.localsearch.com.au/find/{trade}/{city}?page={page_num}"
                        if not agent.navigate(url):
                            print(f"Failed to load {url}. Skipping.")
                            with open(ERROR_LOG, 'a') as f:
                                f.write(f"Failed to load: {url}\n")
                            break

                        # Wait for dynamic content
                        time.sleep(3)
                        
                        # Use evaluate to get script contents
                        script_contents = agent.page.evaluate("""() => {
                            return Array.from(document.querySelectorAll('script[type="application/ld+json"]'))
                                        .map(s => s.innerText);
                        }""")
                        
                        count = len(script_contents)
                        print(f"Found {count} script tags on page {page_num}.")

                        if count == 0:
                            break

                        page_had_items = False
                        for content in script_contents:
                            try:
                                if not content.strip(): continue
                                json_data = json.loads(content)
                                
                                # Process based on schema type
                                items = []
                                if isinstance(json_data, list):
                                    items = json_data
                                else:
                                    items = [json_data]
                                    
                                for item in items:
                                    if item.get("@type") not in ["LocalBusiness", "PlumbingBusiness", "ProfessionalService", "Electrician", "HVACBusiness"]:
                                        if item.get("@type") == "ItemList":
                                            for sub_item in item.get("itemListElement", []):
                                                biz = sub_item.get("item", {})
                                                if biz and biz.get("name"):
                                                    save_single_item(biz, trade, agent.page.url)
                                                    listings_in_city += 1
                                                    page_had_items = True
                                        continue
                                    
                                    if item.get("name"):
                                        save_single_item(item, trade, agent.page.url)
                                        listings_in_city += 1
                                        page_had_items = True
                            except Exception as e:
                                print(f"Error parsing script tag: {e}")

                        if not page_had_items and page_num > 1:
                            # If page 1 had items but page 2 didn't, we might be done
                            break
                            
                        page_num += 1
                        time.sleep(2)

                    except Exception as e:
                        print(f"An error occurred while processing {url}: {e}")
                        with open(ERROR_LOG, 'a') as f:
                            f.write(f"Error on {url}: {e}\n")
                        break
                
                running_total += listings_in_city
                print(f"PROGRESS: {trade} / {city} -- {listings_in_city} listings extracted. Total so far: {running_total}")

    except Exception as e:
        print(f"A critical error occurred: {e}")
    finally:
        agent.stop()

if __name__ == "__main__":
    main()
