import csv
import sys
import time
import json
import requests
from pathlib import Path
from bs4 import BeautifulSoup

# Add genesis-system to python path
sys.path.append(str(Path(__file__).resolve().parent.parent))

# --- CONFIGURATION ---
OUTPUT_DIR = Path("/mnt/e/genesis-system/data/LEADS")
OUTPUT_FILE = OUTPUT_DIR / "localsearch_tradies_google.csv"
ERROR_LOG = OUTPUT_DIR / "localsearch_single_errors.log"

def create_csv_file():
    """Creates the output CSV file with the header row if it doesn't exist."""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    if not OUTPUT_FILE.exists():
        with open(OUTPUT_FILE, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([
                "business_name", "trade_category", "phone", "website",
                "street_address", "suburb", "state", "postcode", "email",
                "rating", "review_count", "source_url", "scraped_date"
            ])
        print(f"Created CSV file: {OUTPUT_FILE}")

def save_to_csv(data: dict):
    """Appends a row of data to the CSV file."""
    fieldnames = [
        "business_name", "trade_category", "phone", "website",
        "street_address", "suburb", "state", "postcode", "email",
        "rating", "review_count", "source_url", "scraped_date"
    ]
    with open(OUTPUT_FILE, 'a', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writerow(data)

def main(url: str, trade: str):
    """
    Main function to run the scraper for a single URL.
    """
    create_csv_file()

    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'})
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        script_tag = soup.find('script', type='application/ld+json')

        if script_tag:
            json_data = json.loads(script_tag.string)

            # Handle cases where the main object is nested
            if '@graph' in json_data:
                for item in json_data['@graph']:
                    if item.get('@type') in ["LocalBusiness", "PlumbingBusiness", "ProfessionalService", "Electrician", "HVACBusiness"]:
                        json_data = item
                        break
            
            data = {
                "business_name": json_data.get("name"),
                "trade_category": trade,
                "phone": json_data.get("telephone"),
                "website": json_data.get("url"),
                "street_address": json_data.get("address", {}).get("streetAddress"),
                "suburb": json_data.get("address", {}).get("addressLocality"),
                "state": json_data.get("address", {}).get("addressRegion"),
                "postcode": json_data.get("address", {}).get("postalCode"),
                "email": json_data.get("email"),
                "rating": json_data.get("aggregateRating", {}).get("ratingValue"),
                "review_count": json_data.get("aggregateRating", {}).get("reviewCount"),
                "source_url": url,
                "scraped_date": time.strftime("%Y-%m-%d")
            }
            save_to_csv(data)
            print(f"Successfully scraped: {url}")
        else:
            print(f"No JSON-LD script tag found on {url}")
            with open(ERROR_LOG, 'a') as f:
                f.write(f"No JSON-LD script tag found on {url}\\n")

    except requests.exceptions.RequestException as e:
        print(f"An error occurred while fetching {url}: {e}")
        with open(ERROR_LOG, 'a') as f:
            f.write(f"Error on {url}: {e}\\n")

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python localsearch_single_scraper.py <url> <trade_category>")
        sys.exit(1)
    
    url_to_scrape = sys.argv[1]
    trade_category = sys.argv[2]
    main(url_to_scrape, trade_category)
