import pandas as pd
import os
import re

def segment_influencer_leads(input_file, output_dir):
    print(f"Reading influencer leads from {input_file}...")
    # These are XLSX files, so we need openpyxl
    df = pd.read_excel(input_file)
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Check columns to decide segmentation logic
    print(f"Columns: {df.columns.tolist()}")
    
    # Segment by Niche/Topic if available
    # Segment by Geographic location if available
    # Segment by Follower Tiers (Nano, Micro, etc.)
    
    # Generic segmentation for now based on what we find in the columns
    for col in df.columns:
        if 'Followers' in col:
            # Create tiers
            def get_tier(x):
                try:
                    val = float(str(x).replace(',', ''))
                    if val < 10000: return 'Nano'
                    if val < 100000: return 'Micro'
                    return 'Mid'
                except: return 'Unknown'
            df['Tier'] = df[col].apply(get_tier)
            for tier in df['Tier'].unique():
                tier_df = df[df['Tier'] == tier]
                tier_file = os.path.join(output_dir, f"influencers_{tier.lower()}.csv")
                tier_df.to_csv(tier_file, index=False)
                print(f"Created {tier_file} with {len(tier_df)} leads.")
        
        if 'Niche' in col or 'Category' in col:
            for niche in df[col].dropna().unique():
                niche_df = df[df[col] == niche]
                niche_slug = re.sub(r'[^a-zA-Z0-9_\-]', '_', str(niche).lower())
                niche_file = os.path.join(output_dir, f"influencers_{niche_slug}.csv")
                niche_df.to_csv(niche_file, index=False)
                print(f"Created {niche_file} with {len(niche_df)} leads.")

if __name__ == "__main__":
    influencer_base = r"e:\genesis-system\data\LEADS\SUNAIVA_5000_Micro_Influencer_Empire(1)"
    output_path = r"e:\genesis-system\data\LEADS\SEGMENTED_INFLUENCERS"
    
    # Process the master file first
    master_file = os.path.join(influencer_base, "SUNAIVA_Micro_Influencer_Database.xlsx")
    if os.path.exists(master_file):
        segment_influencer_leads(master_file, output_path)