
    
i2                     F   d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
m
Z
 ddlmZ ddlmZ ddlZ e ee      j%                         j&                  d         Zeej*                  vrej*                  j-                  de       ddlmZ ddlmZmZ dd	lmZ  ej<                  e      Z d
edefdZ!de"dee"   fdZ#	 	 dde"dejH                  dededeejJ                     de"fdZ&	 	 	 ddedede'dedee'   de"fdZ(de)de"fdZ*d Z+edk(  r e+        yy)u  
batch_runner.py — Process leads in parallel with async concurrency control.

Usage:
    python -m scripts.heatmap_audit.batch_runner \
        --csv data/LEADS/leads.csv \
        --output data/heatmap_reports/ \
        --concurrency 10

Supports the Zoho CSV format with columns:
    Last Name, Company, Email, Phone, Website, Street, City, State, ...

Also supports a simpler format with columns:
    business_name, email, phone, website_url, industry
    N)datetime)Path)Optional   )
scrape_all)analyzecalculate_overall_score)generate_pdfnamereturnc                     t        j                  dd| j                               }t        j                  dd|      }|dd }|j                         S )z'Convert business name to safe filename.z[^\w\s-] z[\s]+_NP   )resubstriplower)r   safes     ;/mnt/e/genesis-system/scripts/heatmap_audit/batch_runner.py_sanitize_filenamer   ,   sC    66+r4::<0D66(C&D9D::<    rowc                    | j                         D ci c]E  \  }}|s	|j                         j                         j                  dd      |j                         G }}}|j	                  d      xs; |j	                  d      xs( |j	                  d      xs |j	                  d      xs d}|j	                  d      xs( |j	                  d	      xs |j	                  d
      xs d}|j	                  dd      }|j	                  dd      }|j	                  dd      }|j	                  dd      }	|j	                  dd      }
|	s|
r|	 d|
 j                  d      nd}|sy||||||xs ddS c c}}w )zi
    Parse a CSV row into a standardized lead dict.
    Supports both Zoho format and simple format.
     r   companybusiness_name	last_namer   r   websitewebsite_urlurlemailphoneindustrycitystatez, 	AustraliaN)r   r   r"   r#   r$   location)itemsr   r   replaceget)r   kv
normalizedr   r   r"   r#   r$   r%   r&   r(   s               r   _parse_csv_rowr/   4   s~    NQYY[^TQ\]!'')//#++C5qwwy@^J^ 	y! 	>>/*	>>+&	 >>&!	   	y! 	>>-(	>>% 	 	  NN7B'ENN7B'E~~j"-H>>&"%DNN7B'E26%$r%!''-kH '+ ; _s
   
E0?E0leadsession
output_dir	phone_cta	semaphorec           
        K   |xs t        j                  d      }| j                  dd      }| j                  dd      }|4 d{    ||| j                  dd      ddddd	}	 |sN|d| j                  d
d      i i ddddd}		 ddlm}
  |
||| j                  d
d             d{   }|r||	d<   n't        ||| j                  d
d      |       d{   }	t        |	      \  }}|j                  di       j                  dd      }t        |      }| d}t        j                  j                  ||      }t        ||||||       d|d<   ||d<   ||d<   |cddd      d{    S 7 *7 # t
        $ r Y w xY w7 # t
        $ r4}d|d<   t        |      |d<   t        j                  d||       Y d}~ad}~ww xY w7 [# 1 d{  7  sw Y   yxY ww)zd
    Process a single lead: scrape, analyze, generate PDF.
    Returns result dict with status.
       r   Unknownr   r   Nr"   pending)r   r   r"   statusoverall_scorepdf_patherrorr(   r'   Fr   )is_reachablehas_sslstatus_code)r   r!   r(   	pagespeedbraver   )fetch_brave_searchrA   )r!   r   r(   r1   overallscorez_ai_score.pdf)r   scoresrecommendationsoutput_pathr3   r    successr9   r:   r;   r<   zError processing %s: %s)asyncio	Semaphorer+   scripts.heatmap_audit.scraperrB   	Exceptionr   r   r   ospathjoinr
   strloggerr<   )r0   r1   r2   r3   r4   semr   r   resultscrapedrB   
brave_datarE   rF   r:   	safe_namepdf_filenamer;   es                      r   process_leadrY   a   s/     
+w((+CHH_i8Mhhy"%G G G*XXgr*!
:	F &3 $[ A!#05%XYZP'9[0Q( "J "+5( !+"/!XXj+>#	!  '.g&6#FO"JJy"599'1EM +=9I'[6Lww||J=H+ /$##  )F8&3F?#!)F: OG G G6"
 ! 
:  	F&F8!!fF7OLL2M1EE	FEG G G Gs   AGE0G
G&F#E5)E3*E55#FFBFGG*G+G3E55	F>F FF	G*F>9G>GGGGGGGcsv_pathconcurrencylimitc                 	  K   t        j                  |d       g }t        | dd      5 }t        j                  |      }|D ]!  }t        |      }	|	s|j                  |	       # 	 ddd       |r|d| }t        |      }
t        j                  d|
|        |
dk(  rddddg d	S t        j                  |      }t        j                  d
|dz        }t        j                  d      }g }d}d}t        j                         }t         j                   j#                  |d      }t         j                   j#                  |d      }t        j$                  ||      4 d{   }|dz  }t'        d|
|      D ]  }t)        ||z   |
      }||| }|D cg c]  }t+        |||||       }}t        j,                  |ddi d{   }|D ]v  }t/        |t0              r#|dz  }|j                  dt3        |      d       6|j5                  d      dk(  r|dz  }|j                  |       a|dz  }|j                  |       x |}t        j                         |z
  }|dkD  r||z  nd}t7        d| d|
 d| d| d|dd d!        ddd      d{    t        j                         |z
  } |D cg c]  }|j5                  d      dk(  s| }!}|!rjt        |d"      5 }|!D ]9  }"|j9                  |"j5                  d#d$       d%|"j5                  dd&       d'       ; 	 ddd       t        j                  d(|       t;        j<                         j?                         | ||
||tA        | d      | dkD  rtA        |
| z  d      ndtC        |      |D cg c]E  }|j5                  d#      |j5                  d)      |j5                  d*      |j5                  d      d+G c}d,
}#t        |d"      5 }tE        jF                  |#|d-       ddd       t        j                  d.|       t7        d'd/        t7        d0       t7        d1|
 d2| d3|        t7        d4| dd5|#d6    d7       t7        d8|        t7        d9|        |!rt7        d:|        t7        d/        |#S # 1 sw Y   >xY w7 Qc c}w 7 7 '# 1 d{  7  sw Y   8xY wc c}w # 1 sw Y   xY wc c}w # 1 sw Y   xY ww);am  
    Process all leads from CSV in parallel.

    Args:
        csv_path: Path to leads CSV file
        output_dir: Directory for output PDFs
        concurrency: Max concurrent scraping tasks
        phone_cta: Phone number for CTA in PDFs
        limit: Max number of leads to process (None = all)

    Returns:
        Summary dict with counts and results.
    T)exist_okrz	utf-8-sig)encodingNzLoaded %d leads from %sr   )totalrH   r<   skippedresultsFr   )sslr\   <   )ra   z
errors.logzbatch_summary.json)	connectortimeout   )r0   r1   r2   r3   r4   return_exceptionsr6   r<   )r9   r<   r9   rH   z  [/z] z
 success, z	 errors (z.1fz leads/sec))flushwr   unknownz: zunknown error
zError log written to %sr:   r;   )r   r:   r;   r9   )
	timestamprZ   r2   total_leadsrH   errorselapsed_secondsrate_per_secondscore_distributionrc   )indentzBatch summary written to %s<============================================================z  BATCH COMPLETEz	  Total: z | Success: z | Errors: z  Time: z
s | Rate: rs   z
 leads/secz  PDFs: z  Summary: z
  Errors: )$rM   makedirsopencsv
DictReaderr/   appendlenrQ   inforI   rJ   aiohttpTCPConnectorClientTimeouttimerN   rO   ClientSessionrangeminrY   gather
isinstancerL   rP   r+   printwriter   now	isoformatround_score_distributionjsondump)$rZ   r2   r[   r3   r\   leadsfreaderr   parsedra   r4   rf   rg   rc   success_counterror_count
start_timeerror_log_pathsummary_pathr1   
batch_sizebatch_start	batch_endbatch_leadsr0   tasksbatch_resultsr_   	processedelapsedrateelapsed_totalrq   errsummarys$                                       r   	run_batchr      s    ( KK
T* E	hk	2 %a" 	%C#C(FV$	%% fuJE
KK)5(;zq1rRR !!+.I$$kAoFI##"-GGMKJWW\\*l;N77<<
,@AL$$y'J + +g 1_
 E:6 (	KK*4e<II6K (	  #)''	E 	 #*..%"P4"PPM" &a+1$KNN")!$Q$  UU8_	1!Q&MNN1%1$KNN1%& "IiikJ.G*1A+9w&1Di[% /K= 9:[* 	G(	+ +Z IIK*,M !?AAEE(Ow$>a?F?.#& 	! ww	:;2cggg>_=``bc	
 	-~> \\^--/   2>Ka>O5!6:UV1': 
  "#!7!"!7EE*-%%/	
G* 
lC	  (A		'1Q'(
KK-|<	Bvh-		IeWL{;-
PQ	H]3'z':K2L1MZ
XY	HZL
!"	K~
&'
>*+,	VHNu% %:+	 Q%+ + + +` @	 	$
( (s   (S7(R#R#'C8S7R0 S7#/R>R3)R>R8CR>	S7R;S74SSS7"?S"A2S7A
S&S7/S+BS7#R-(	S73R>;S7>SSSS7S#S7+S40S7rc   c                     dddddd}| D ]o  }|j                  d      }||dk  r|dxx   dz  cc<   *|dk  r|dxx   dz  cc<   =|d	k  r|d
xx   dz  cc<   P|dk  r|dxx   dz  cc<   c|dxx   dz  cc<   q |S )z)Calculate score distribution for summary.r   )critical_0_20
poor_21_40average_41_60
good_61_80excellent_81_100r:      r   r6   (   r   re   r   r   r   r   )r+   )rc   distr_   rD   s       r   r   r   O  s    a!STjklD *o&=B;!Q&!b[!#b[!Q&!b[!##$)$* Kr   c            	         t        j                  d      } | j                  ddd       | j                  ddd	
       | j                  dt        dd       | j                  ddd
       | j                  dt        d d       | j                  dddd       | j	                         }|j
                  rt        j                  nt        j                  }t        j                  |dd       t        j                  j                  |j                        s-t        d|j                          t        j                   d       t        d       t        d       t        d         t        d!|j                          t        d"|j"                          t        d#|j$                          |j&                  rt        d$|j&                          |j(                  rt        d%|j(                   d&       t        d  d'       t+        j,                  t/        |j                  |j"                  |j$                  |j&                  |j(                  (             y ))NuB   Heatmap Audit Generator — Batch process leads into AI Score PDFs)descriptionz--csvTzPath to leads CSV file)requiredhelpz--outputzdata/heatmap_reports/z:Output directory for PDFs (default: data/heatmap_reports/))defaultr   z--concurrency
   z+Max concurrent scraping tasks (default: 10))typer   r   z--phoner   zPhone number for CTA in PDFsz--limitz-Max number of leads to process (default: all)z	--verbosez-v
store_truezEnable verbose logging)actionr   z1%(asctime)s [%(levelname)s] %(name)s: %(message)sz%H:%M:%S)levelformatdatefmtzERROR: CSV file not found: r6   z
Heatmap Audit Generator v1.0zPowered by Sunaiva Digitalrv   z  CSV: z
  Output: z  Concurrency: z  CTA Phone: z	  Limit: z leadsrn   )rZ   r2   r[   r3   r\   )argparseArgumentParseradd_argumentint
parse_argsverboseloggingDEBUGINFObasicConfigrM   rN   existsry   r   sysexitoutputr[   r#   r\   rI   runr   )parserargsr   s      r   mainr   c  s   $$XF $%   3I   c2:   2+   T<   T,%  
 D "\\GMMw||EB 77>>$((#+DHH:67	*,	&(	VH	GDHH:
	Jt{{m
$%	OD,,-
./zzdjj\*+zz	$**V,-	VHB-KKXX{{((jj**	
r   __main__)r   N)r   r   N),__doc__r   rI   ry   r   r   rM   r   r   r   r   pathlibr   typingr   r~   rP   __file__resolveparentsPROJECT_ROOTrN   insertrK   r   scripts.heatmap_audit.analyzerr   r	   #scripts.heatmap_audit.pdf_generatorr
   	getLogger__name__rQ   r   dictr/   r   rJ   rY   r   r   listr   r    r   r   <module>r      s      
   	 	 
      4>))+33A67sxxHHOOA|$ 4 K <			8	$S S * *$ *b -1V
V""V V 	V
 ))*V 
Vx RRR R 	R
 C=R 
Rj $ (?D zF r   