import random
import csv
import sys
import time
import json
from pathlib import Path

# Add genesis-system to python path
sys.path.append(str(Path(__file__).resolve().parent.parent))

from scripts.browser_agent import BrowserAgent


# --- CONFIGURATION ---
OUTPUT_DIR = Path("/mnt/e/genesis-system/data/LEADS")
OUTPUT_FILE = OUTPUT_DIR / "localsearch_brisbane_roofers_painters.csv"
ERROR_LOG = OUTPUT_DIR / "localsearch_brisbane_roofers_painters_errors.log"

TRADES = [
    "roofing-contractors", "painters"
]

QLD_CITIES = [
    "brisbane-qld"
]

def create_csv_file():
    """Creates the output CSV file with the header row."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    if not OUTPUT_FILE.exists():
        with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow([
                "business_name", "trade_category", "phone", "website",
                "street_address", "suburb", "state", "postcode", "email",
                "rating", "review_count", "source_url", "scraped_date"
            ])
        print(f"Created CSV file: {OUTPUT_FILE}")

def save_to_csv(data: dict):
    """Appends a row of data to the CSV file."""
    fieldnames = [
        "business_name", "trade_category", "phone", "website",
        "street_address", "suburb", "state", "postcode", "email",
        "rating", "review_count", "source_url", "scraped_date"
    ]
    with open(OUTPUT_FILE, 'a', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writerow(data)

def main():
    """
    Main function to run the scraper.
    """
    create_csv_file()
    
    agent = BrowserAgent(headless=True)
    category_counts = {trade: 0 for trade in TRADES} # Initialize counts for reporting

    try:
        agent.start()
        agent.new_page()

        for trade in TRADES:
            for city in QLD_CITIES:
                print(f"--- Processing: {trade} in {city} ---")
                page_num = 1
                
                while True:
                    try:
                        url = f"https://www.localsearch.com.au/find/{trade}/{city}?page={page_num}"
                        
                        time.sleep(random.randint(5, 10)) 

                        if not agent.navigate(url):
                            print(f"Failed to load {url}. Skipping.")
                            with open(ERROR_LOG, 'a') as f:
                                f.write(f"Failed to load: {url}\n")
                            break

                        # Check if the page contains listings
                        # Use a more robust check for "no results" if possible, e.g., by looking for a specific message
                        # For now, relying on script_tags count to be 0
                        script_tags = agent.page.locator('script[type="application/ld+json"]')
                        count = script_tags.count()

                        if count == 0:
                            print(f"No more listings found for {trade} in {city} on page {page_num}.")
                            break

                        print(f"Found {count} listings on page {page_num} for {trade} in {city}.")

                        for i in range(count):
                            script_tag = script_tags.nth(i)
                            json_data = json.loads(script_tag.inner_text())

                            data = {
                                "business_name": json_data.get("name"),
                                "trade_category": trade,
                                "phone": json_data.get("telephone"),
                                "website": json_data.get("url"),
                                "street_address": json_data.get("address", {}).get("streetAddress"),
                                "suburb": json_data.get("address", {}).get("addressLocality"),
                                "state": json_data.get("address", {}).get("addressRegion"),
                                "postcode": json_data.get("address", {}).get("postalCode"),
                                "email": json_data.get("email"),
                                "rating": json_data.get("aggregateRating", {}).get("ratingValue"),
                                "review_count": json_data.get("aggregateRating", {}).get("reviewCount"),
                                "source_url": agent.page.url,
                                "scraped_date": time.strftime("%Y-%m-%d")
                            }
                            save_to_csv(data)
                            category_counts[trade] += 1 # Increment count for the current trade

                        page_num += 1

                    except Exception as e:
                        print(f"An error occurred while processing {url}: {e}")
                        with open(ERROR_LOG, 'a') as f:
                                f.write(f"Error on {url}: {e}\n")
                        break # Break from inner while loop on error
                
                print(f"--- Finished {trade} in {city}. Total listings for {trade}: {category_counts[trade]} ---")

    except Exception as e:
        print(f"A critical error occurred: {e}")
    finally:
        agent.stop()
        print("\n--- Scraping Summary ---")
        for trade, count in category_counts.items():
            print(f"{trade.replace('-', ' ').title()}: {count} listings")
        print(f"Results saved to: {OUTPUT_FILE}")

if __name__ == "__main__":
    main()
