
import os
import glob
import json
from swarms.config import swarm_config

# Initialize Flash Model
model = swarm_config.get_model("fast") 

def analyze_transcripts_batch():
    transcript_files = glob.glob(r"E:\genesis-system\transcripts\*.txt")
    print(f"Found {len(transcript_files)} transcripts.")
    
    findings = []
    
    # Process in chunks of 5 for speed/context hygiene
    chunk_size = 5
    for i in range(0, len(transcript_files), chunk_size):
        batch = transcript_files[i:i+chunk_size]
        print(f"Processing Batch {i//chunk_size + 1}...")
        
        batch_content = ""
        for tf in batch:
            try:
                with open(tf, "r", encoding="utf-8") as f:
                    batch_content += f"\n--- FILE: {os.path.basename(tf)} ---\n" + f.read()[:5000] # Cap at 5k chars per file to save tokens/speed
            except Exception as e:
                print(f"Skipping {tf}: {e}")

        prompt = f"""
        Analyze these YouTube transcripts from AI Automation Agencies.
        EXTRACT the following into a JSON list:
        1. "Offer Hook": Specific guarantees or value props (e.g. "10 calls or refund").
        2. "Automation Trigger": Technical logic mentioned (e.g. "If lead says 'price', send loom video").
        
        Return ONLY valid JSON: [{{ "source": "filename", "hook": "...", "trigger": "..." }}]
        
        TRANSCRIPTS:
        {batch_content}
        """
        
        # Retry logic for 429 handling
        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = model.generate_content(prompt)
                # Basic cleanup to ensure JSON
                text = response.text.replace("```json", "").replace("```", "").strip()
                batch_data = json.loads(text)
                findings.extend(batch_data)
                print(f"  > Extracted {len(batch_data)} intelligence points.")
                
                # Success - wait and break
                import time
                time.sleep(20) # Paid tier barrier protection
                break
            except Exception as e:
                if "429" in str(e):
                    print(f"  > Hit Rate Limit. Sleeping 60s... (Attempt {attempt+1}/{max_retries})")
                    import time
                    time.sleep(60)
                else:
                    print(f"  > Batch failed: {e}")
                    break

    # Save to file
    with open(r"E:\genesis-system\research\competitive_intelligence_raw.json", "w", encoding="utf-8") as f:
        json.dump(findings, f, indent=2)
    
    print("Analysis Complete.")

if __name__ == "__main__":
    analyze_transcripts_batch()
