import pandas as pd
import os
import json
import re

def segment_tradie_leads(input_file, output_dir):
    print(f"Reading tradie leads from {input_file}...")
    df = pd.read_csv(input_file)
    
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Segment by City
    cities = df['City'].dropna().unique()
    for city in cities:
        city_df = df[df['City'] == city]
        # Sanitize slug: remove path separators
        city_slug = re.sub(r'[^a-zA-Z0-9_\-]', '_', str(city).lower())
        city_file = os.path.join(output_dir, f"tradies_{city_slug}.csv")
        city_df.to_csv(city_file, index=False)
        print(f"Created {city_file} with {len(city_df)} leads.")
        
    # Segment by Industry
    industries = df['Industry'].dropna().unique()
    for industry in industries:
        industry_df = df[df['Industry'] == industry]
        # Sanitize slug: remove path separators
        industry_slug = re.sub(r'[^a-zA-Z0-9_\-]', '_', str(industry).lower())
        industry_file = os.path.join(output_dir, f"tradies_{industry_slug}.csv")
        industry_df.to_csv(industry_file, index=False)
        print(f"Created {industry_file} with {len(industry_df)} leads.")
        
    # Segment by Priority (High Rating + Reviews)
    # Extract rating and reviews from Description
    # Example Description: WEBSITE-SALE-TARGET | Rating: 4.9 | Reviews: 66
    def extract_metrics(desc):
        try:
            parts = str(desc).split('|')
            rating = 0
            reviews = 0
            for p in parts:
                if 'Rating:' in p:
                    rating = float(p.split(':')[1].strip())
                if 'Reviews:' in p:
                    reviews = int(p.split(':')[1].strip())
            return rating, reviews
        except:
            return 0, 0

    df[['Rating', 'Reviews']] = df['Description'].apply(lambda x: pd.Series(extract_metrics(x)))
    
    high_value_df = df[(df['Rating'] >= 4.5) & (df['Reviews'] >= 20)]
    high_value_file = os.path.join(output_dir, "tradies_high_value_priority.csv")
    high_value_df.to_csv(high_value_file, index=False)
    print(f"Created {high_value_file} with {len(high_value_df)} leads.")

if __name__ == "__main__":
    tradie_path = r"e:\genesis-system\data\LEADS\TRADIE LEADS AUSTRALIA\ZOHO_SIMPLE_IMPORT.csv"
    output_path = r"e:\genesis-system\data\LEADS\SEGMENTED"
    
    segment_tradie_leads(tradie_path, output_path)
