import csv
import sys
import time
import json
import os
from pathlib import Path

# Add genesis-system to python path
sys.path.append(str(Path(__file__).resolve().parent.parent))

from scripts.browser_agent import BrowserAgent

# --- CONFIGURATION ---
OUTPUT_DIR = Path("/mnt/e/genesis-system/data/LEADS")
OUTPUT_FILE = OUTPUT_DIR / "localsearch_brisbane_plumbers.csv"
ERROR_LOG = OUTPUT_DIR / "localsearch_errors_test.log"

TRADE = "plumbers"
CITY = "brisbane-qld"

def create_csv_file():
    """Creates the output CSV file with the header row."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    if not OUTPUT_FILE.exists():
        with open(OUTPUT_FILE, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([
                "business_name", "trade_category", "phone", "website",
                "street_address", "suburb", "state", "postcode", "email",
                "rating", "review_count", "source_url", "scraped_date"
            ])
        print(f"Created CSV file: {OUTPUT_FILE}")

def save_to_csv(data: dict):
    """Appends a row of data to the CSV file."""
    fieldnames = [
        "business_name", "trade_category", "phone", "website",
        "street_address", "suburb", "state", "postcode", "email",
        "rating", "review_count", "source_url", "scraped_date"
    ]
    with open(OUTPUT_FILE, 'a', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writerow(data)

def main():
    """
    Main function to run the scraper for Brisbane Plumbers.
    """
    create_csv_file()
    
    agent = BrowserAgent(headless=True)
    running_total = 0

    try:
        agent.start()
        agent.new_page()

        print(f"--- Processing: {TRADE} in {CITY} ---")
        page_num = 1
        listings_in_city = 0
        
        # Limit to 2 pages for the test
        while page_num <= 2:
            try:
                url = f"https://www.localsearch.com.au/find/{TRADE}/{CITY}?page={page_num}"
                if not agent.navigate(url):
                    print(f"Failed to load {url}. Skipping.")
                    break

                # Save HTML for debug if needed
                if page_num == 1:
                    with open("localsearch_debug.html", "w") as f:
                        f.write(agent.page.content())

                # Wait a bit for dynamic content
                time.sleep(3)
                
                # Use evaluate to get script contents
                script_contents = agent.page.evaluate("""() => {
                    return Array.from(document.querySelectorAll('script[type="application/ld+json"]'))
                                .map(s => s.innerText);
                }""")
                
                count = len(script_contents)
                print(f"Found {count} script tags on page {page_num}.")

                if count == 0:
                    print("No listings found via JSON-LD. Checking page content...")
                    break

                for content in script_contents:
                    try:
                        if not content.strip(): continue
                        
                        json_data = json.loads(content)
                        
                        # Some JSON-LD might not be a Business/Listing
                        if isinstance(json_data, list):
                            items = json_data
                        else:
                            items = [json_data]
                            
                        for item in items:
                            if item.get("@type") not in ["LocalBusiness", "PlumbingBusiness", "ProfessionalService"]:
                                # Skip BreadcrumbList etc.
                                if item.get("@type") == "ItemList":
                                    # This might be the actual list of businesses
                                    for sub_item in item.get("itemListElement", []):
                                        biz = sub_item.get("item", {})
                                        if biz:
                                            process_item(biz, TRADE, agent.page.url)
                                            listings_in_city += 1
                                continue
                            
                            process_item(item, TRADE, agent.page.url)
                            listings_in_city += 1

                    except Exception as e:
                        print(f"Error parsing script tag {i}: {e}")

                page_num += 1
                time.sleep(2)

            except Exception as e:
                print(f"An error occurred while processing {url}: {e}")
                break
        
        print(f"PROGRESS: {TRADE} / {CITY} -- {listings_in_city} listings extracted.")

    except Exception as e:
        print(f"A critical error occurred: {e}")
    finally:
        agent.stop()

def process_item(item, trade, source_url):
    data = {
        "business_name": item.get("name"),
        "trade_category": trade,
        "phone": item.get("telephone"),
        "website": item.get("url"),
        "street_address": item.get("address", {}).get("streetAddress") if isinstance(item.get("address"), dict) else None,
        "suburb": item.get("address", {}).get("addressLocality") if isinstance(item.get("address"), dict) else None,
        "state": item.get("address", {}).get("addressRegion") if isinstance(item.get("address"), dict) else None,
        "postcode": item.get("address", {}).get("postalCode") if isinstance(item.get("address"), dict) else None,
        "email": item.get("email"),
        "rating": item.get("aggregateRating", {}).get("ratingValue") if isinstance(item.get("aggregateRating"), dict) else None,
        "review_count": item.get("aggregateRating", {}).get("reviewCount") if isinstance(item.get("aggregateRating"), dict) else None,
        "source_url": source_url,
        "scraped_date": time.strftime("%Y-%m-%d")
    }
    if data["business_name"]:
        save_to_csv(data)

if __name__ == "__main__":
    main()
