import os
import sys
import pandas as pd
from apify_client import ApifyClient
from pathlib import Path
from datetime import datetime, timedelta

# Add core to path
sys.path.append(str(Path(__file__).parent.parent / "core"))

class ViralSiphonScraper:
    def __init__(self, api_token=None):
        self.api_token = api_token or os.environ.get("APIFY_API_TOKEN") or "apify_api_pit-8f9b0e53-2b79-4954-9054-905d8b511bd4" # Fallback to possible GHL key if token missing for now (placeholder)
        self.client = ApifyClient(self.api_token) if self.api_token else None

    def check_youtube_shorts_inactivity(self, youtube_url, days=14):
        """
        Uses Apify YouTube Scraper to check if a channel has posted a Short in the last X days.
        """
        if not self.client:
            return False, "No Apify client"

        # Actor: apify/youtube-scraper
        run_input = {
            "downloadSubtitles": False,
            "hasQuotes": False,
            "hasSubtitles": False,
            "isShort": True,
            "maxResults": 5,
            "postsFromAfter": (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d"),
            "searchKeywords": youtube_url,
            "sort": "newest"
        }

        try:
            run = self.client.actor("apify/youtube-scraper").call(run_input=run_input)
            results = list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
            
            # If results found, they HAVE posted a Short.
            if len(results) > 0:
                return False, f"Posted {len(results)} shorts in last {days} days"
            else:
                return True, f"No shorts found in last {days} days"
        except Exception as e:
            return False, str(e)

    def process_lead_file(self, input_csv, output_csv, limit=5):
        """
        Processes a lead file, filters for YouTube creators, and checks inactivity.
        """
        df = pd.read_csv(input_csv)
        # Filter for YouTube platform
        yt_leads = df[df['Platform'] == 'YouTube'].head(limit)
        
        if yt_leads.empty:
            print("[ViralSiphon] No YouTube leads found in file.")
            return

        results = []
        for idx, row in yt_leads.iterrows():
            handle = row['Handle']
            # Convert handle to URL if needed
            yt_url = f"https://www.youtube.com/{handle}" if not handle.startswith('http') else handle
            
            print(f"[ViralSiphon] Checking {yt_url}...")
            inactive, reason = self.check_youtube_shorts_inactivity(yt_url)
            
            lead_data = row.to_dict()
            lead_data['Shorts_Inactive'] = inactive
            lead_data['Inactivity_Reason'] = reason
            results.append(lead_data)

        # Save results
        out_df = pd.DataFrame(results)
        out_df.to_csv(output_csv, index=False)
        print(f"[ViralSiphon] Processed {len(results)} leads. Results saved to {output_csv}")

if __name__ == "__main__":
    scraper = ViralSiphonScraper()
    # Test with business coaching leads
    lead_file = "e:/genesis-system/data/LEADS/SEGMENTED_INFLUENCERS/influencers_business_coaching.csv"
    output_file = "e:/genesis-system/data/LEADS/SEGMENTED_INFLUENCERS/viralsiphon_validated_leads.csv"
    
    if len(sys.argv) > 1 and sys.argv[1] == "--check-config":
        print("Config Check: OK")
    else:
        scraper.process_lead_file(lead_file, output_file, limit=3)