
    qiQ                     B   d Z ddlZddlZddlZddlmZ ddlmZmZ ddl	m	Z	m
Z
 ddlmZ ddlmZmZmZmZmZ ddlmZ  G d	 d
e      Ze G d d             Ze G d d             Ze G d d             Ze G d d             Z G d d      Zd Zedk(  r e        yy)a  
GENESIS GEMINI RATE MAXIMIZER
==============================
Intelligent rate limit tracking and request scheduling to maximize
utilization of Gemini API credits while staying just under limits.

Features:
- Multi-model load balancing based on current utilization
- Sliding window tracking for RPM/TPM/RPD
- Predictive scheduling to hit 90-95% of limits
- Automatic failover when models hit limits
- Burst detection and proactive throttling

Usage:
    maximizer = GeminiRateMaximizer()

    # Get best available model
    model = maximizer.get_best_model(token_estimate=1000)

    # Record usage after request
    maximizer.record_usage(model, input_tokens=500, output_tokens=1000)

    # Get utilization report
    report = maximizer.get_utilization_report()
    Ndeque)	dataclassfield)datetime	timedelta)Path)DictListOptionalAnyTuple)Enumc                   0    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zy
)TaskTypez#Task types for intelligent routing.researchcode_generationcode_reviewarchitecturesimple_extractionclassificationsummarizationgeneralN)__name__
__module____qualname____doc__RESEARCHCODE_GENERATIONCODE_REVIEWARCHITECTURESIMPLE_EXTRACTIONCLASSIFICATIONSUMMARIZATIONGENERAL     3/mnt/e/genesis-system/core/gemini_rate_maximizer.pyr   r   '   s.    -H'OK!L+%N#MGr'   r   c                   p    e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<    ee      Z	e
e   ed	<   y
)ModelLimitszRate limits for a single model.rpmtpmrpdprioritycost_per_million_inputcost_per_million_outputdefault_factory	use_casesN)r   r   r   r   int__annotations__floatr   listr3   r   strr&   r'   r(   r*   r*   3   s8    )	H	H	HM!!"" 6ItCy6r'   r*   c                       e Zd ZU dZ ed       Zeed<    ed       Zeed<   dZ	e
ed<   dZe
ed	<    eej                        Zeed
<   d Zd Zde
fdZde
fdZde
fdZy)UsageWindowz"Sliding window for tracking usage.c                      t        d      S N'  )maxlenr   r&   r'   r(   <lambda>zUsageWindow.<lambda>B   s    E4G r'   r1   requestsc                      t        d      S r<   r   r&   r'   r(   r?   zUsageWindow.<lambda>C   s    %u2E r'   tokensr   daily_requestsdaily_tokenslast_daily_resetc                    t        j                          dz
  }| j                  rQ| j                  d   d   |k  r<| j                  j                          | j                  r| j                  d   d   |k  r<| j                  rT| j                  d   d   |k  r>| j                  j                          | j                  r| j                  d   d   |k  r<yyyy)z#Remove entries older than 1 minute.<   r   N)timer@   popleftrB   )selfcutoffs     r(   cleanup_minute_windowz!UsageWindow.cleanup_minute_windowH   s    r!mma 0 3f <MM!!# mma 0 3f <kkdkk!nQ/&8KK! kkdkk!nQ/&8k8kr'   c                 |    t        j                          }|| j                  z
  dkD  rd| _        d| _        || _        yy)z)Reset daily counters at midnight Pacific.iQ r   N)rH   rE   rC   rD   )rJ   nows     r(   check_daily_resetzUsageWindow.check_daily_resetP   s=    iik&&&."#D !D$'D! /r'   returnc                 L    | j                          t        | j                        S )z Get current requests per minute.)rL   lenr@   rJ   s    r(   get_rpmzUsageWindow.get_rpmY   s    ""$4==!!r'   c                 Z    | j                          t        d | j                  D              S )zGet current tokens per minute.c              3   &   K   | ]	  }|d      yw)   Nr&   ).0ts     r(   	<genexpr>z&UsageWindow.get_tpm.<locals>.<genexpr>a   s     -A1Q4-s   )rL   sumrB   rS   s    r(   get_tpmzUsageWindow.get_tpm^   s#    ""$----r'   c                    t        j                          }| j                  j                  |f       | j                  j                  ||f       | xj                  dz  c_        | xj
                  |z  c_        | j                          y)z"Record a request with token count.rW   N)rH   r@   appendrB   rC   rD   rO   )rJ   rB   rN   s      r(   recordzUsageWindow.recordc   se    iikcV$C=)q V# r'   N)r   r   r   r   r   r@   r   r5   rB   rC   r4   rD   rH   rE   r6   rL   rO   rT   r\   r_   r&   r'   r(   r:   r:   ?   s{    ,,GHHeH*EFFEFNCL##DII>e>"(" "
. .
!S !r'   r:   c                   :    e Zd ZU dZeed<   eed<   eed<   eed<   y)ScheduledRequestz"A request scheduled for execution.modeldelay_secondsreasonutilizationN)r   r   r   r   r8   r5   r6   r&   r'   r(   ra   ra   m   s    ,JKr'   ra   c                   ^    e Zd ZU dZeed<   eeeeef   f   ed<   eed<   eed<   e	e   ed<   y)UtilizationReportz"Utilization report for all models.	timestampmodels
best_modeltotal_capacity_usedrecommendationsN)
r   r   r   r   r8   r5   r
   r   r6   r   r&   r'   r(   rg   rg   v   s7    ,Nd38n$%%O#Yr'   rg   c            	       ^   e Zd ZdZ ed      Z ed      ZddefdZdefdZ	defd	Z
d
 Zdedeeef   fdZdedeeef   fdZdej$                  fdededefdZ	 ddedededefdZdedededefdZdej$                  dfdedededefdZdefdZdeeef   fdZdeeef   fdZy)GeminiRateMaximizerz
    Intelligent Gemini API rate limit maximizer.

    Tracks usage across all models and routes requests to maximize
    throughput while staying within limits.
    z0E:/genesis-system/config/gemini_rate_limits.jsonz1E:/genesis-system/data/rate_maximizer_usage.jsonlNconfig_pathc                     |xs | j                   | _        | j                         | _        i | _        i | _        t        j                         | _        | j                          y N)
CONFIG_PATHro   _load_configconfigri   usage_windows	threadingRLock_lock_initialize_models)rJ   ro   s     r(   __init__zGeminiRateMaximizer.__init__   sN    &:$*:*:'').057__&
!r'   rP   c                     | j                   j                         r4t        | j                         5 }t        j                  |      cddd       S | j                         S # 1 sw Y   | j                         S xY w)z"Load configuration from JSON file.N)ro   existsopenjsonload_default_config)rJ   fs     r(   rs   z GeminiRateMaximizer._load_config   sb    ""$d&&' $1yy|$ $##%%$##%%s   AA7c                 (    dddddddddd	d
dddS )z(Default configuration if file not found.?g?   i@B i  rW   )r+   r,   r-   r.           )gemini-2.5-flashzgemini-2.5-pro)target_utilizationsafety_marginri   r&   rS   s    r(   r   z#GeminiRateMaximizer._default_config   s3     #'!,/Z[$\*-gdXY"Z
 	
r'   c                    | j                   j                  di       j                         D ]  \  }}t        |j                  dd      |j                  dd      |j                  dd      |j                  dd	      |j                  d
d      |j                  dd      |j                  ddg            | j                  |<   t               | j                  |<    y)z$Initialize model limits from config.ri   r+   d   r,   i r-   r.   
   r/   g?r0   g?r3   r   )r+   r,   r-   r.   r/   r0   r3   N)rt   getitemsr*   ri   r:   ru   )rJ   
model_namemodel_configs      r(   ry   z&GeminiRateMaximizer._initialize_models   s    (,"(E(K(K(M 
	;$J&1 $$UC0 $$UF3 $$UB/%))*b9'3'7'78PRV'W(4(8(89RTX(Y&**;D'DKK
# .9]Dz*
	;r'   rb   c           	      B   | j                   5  || j                  vrdddddcddd       S | j                  |   }| j                  |   }|j                  dkD  r|j	                         |j                  z  nd}|j
                  dkD  r|j                         |j
                  z  nd}|j                  dkD  r|j                  |j                  z  }nd}|||t        |||      |j	                         |j                         |j                  dcddd       S # 1 sw Y   yxY w)z
        Get current utilization percentages for a model.

        Returns:
            Dict with rpm_util, tpm_util, rpd_util percentages
        r   )rpm_utiltpm_utilrpd_utilmax_utilN)r   r   r   r   current_rpmcurrent_tpmrC   )
rx   ri   ru   r+   rT   r,   r\   r-   rC   max)rJ   rb   limitswindowr   r   r   s          r(   get_model_utilizationz)GeminiRateMaximizer.get_model_utilization   s    ZZ 	DKK'$%1!QRS	 	 [['F''.F8>

Qv~~'&**4AH8>

Qv~~'&**4AHzzA~!006::= %$$(H=%~~/%~~/"("7"7	 	 	s   DCDDc           	         | j                   5  || j                  vrddddcddd       S | j                  |   }| j                  |   }| j                  j	                  dd      }t        |j                  |z        |j                         z
  }t        |j                  |z        |j                         z
  }|j                  dkD  r&t        |j                  |z        |j                  z
  }nt        d      }t        d|      t        d|      |t        d      k7  rt        dt        |            nddcddd       S # 1 sw Y   yxY w)zy
        Get available capacity for a model.

        Returns remaining RPM, TPM, and RPD before hitting limits.
        r   )rpm_availabletpm_availablerpd_availableNr   r   infr   )rx   ri   ru   rt   r   r4   r+   rT   r,   r\   r-   rC   r6   r   )rJ   rb   r   r   targetr   r   r   s           r(   get_available_capacityz*GeminiRateMaximizer.get_available_capacity   s!    ZZ 	DKK')*QQRS	 	 [['F''.F[[__%94@F

V 34v~~7GGM

V 34v~~7GGMzzA~ #FJJ$7 86;P;P P %e "%Q!6!$Q!6?LPUV[P\?\QM(:!;bd!	 	 	s   D=DD==Er   token_estimate	task_typec                    | j                   5  | j                  j                  di       j                  di       }|j                  |v r;||j                     }| j	                  |      }|d   dkD  r|d   |k\  r|cddd       S d}d}t        | j                  j                         d 	      }|D ]y  \  }	}
| j	                  |	      }|d   dk  r |d   |k  r)|d
   dk(  r2d|
j                  z
  dz  }|d   dz  |d   dz  dz  z   |d
   dkD  r|d
   nddz  z   }||z   }||kD  sv|}|	}{ |,| j                  j                  di       j                  dd      }|cddd       S # 1 sw Y   yxY w)a  
        Select the best available model based on current utilization.

        Args:
            token_estimate: Estimated tokens for the request
            task_type: Type of task for intelligent routing

        Returns:
            Model name with most available capacity
        routing_rulestask_routingr   r   r   Nr   c                      | d   j                   S )NrW   )r.   )xs    r(   r?   z4GeminiRateMaximizer.get_best_model.<locals>.<lambda>  s    admm r'   )keyr   r   r         ?r   333333?r=   皙?default_modelr   )	rx   rt   r   valuer   sortedri   r   r.   )rJ   r   r   r   	preferredcapacityrj   
best_scoresorted_modelsr   r   priority_bonuscapacity_scorescores                 r(   get_best_modelz"GeminiRateMaximizer.get_best_model   s    ZZ 3	;;???B?CCNTVWL,.(9	66yAO,q0Xo5NR`5`$3	 3	 JJ #!!#+M
 '4 ,"
F66zB O,1O,~=O,1 #%v"6#!=_-3_-4s:;2:?2Ka2OXo.UZ^aab  '7:%!&J!+J1,6 !![[___bAEE#%7
 g3	 3	 3	s   A,E$B!E$%5E$$E-input_tokensoutput_tokenssuccessc                     | j                   5  || j                  vrt               | j                  |<   ||z   }| j                  |   j                  |       | j	                  ||||       ddd       y# 1 sw Y   yxY w)z
        Record API usage for a model.

        Args:
            model: Model name
            input_tokens: Number of input tokens
            output_tokens: Number of output tokens
            success: Whether the request succeeded
        N)rx   ru   r:   r_   
_log_usage)rJ   rb   r   r   r   total_tokenss         r(   record_usagez GeminiRateMaximizer.record_usage4  s{      ZZ 	ID...,7M""5)'-7Lu%,,\: OOE<H	I 	I 	Is   AA33A<c           	      n   | j                   j                  j                  dd       t        j                         j                         |||||z   || j                  |      d}t        | j                   d      5 }|j                  t        j                  |      dz          ddd       y# 1 sw Y   yxY w)zLog usage to JSONL file.T)parentsexist_ok)rh   rb   r   r   r   r   re   a
N)USAGE_LOG_PATHparentmkdirr   rN   	isoformatr   r}   writer~   dumps)rJ   rb   r   r   r   entryr   s          r(   r   zGeminiRateMaximizer._log_usageN  s     	""(((E "113(*(=855e<
 $%%s+ 	.qGGDJJu%,-	. 	. 	.s   :(B++B4   r.   c                    | j                   5  | j                  ||      }| j                  |      }| j                  |      }d}d}|d   dk  rd| j                  |   j
                  z  }d}nV|d   |k  r(d||d   z
  z  | j                  |   j                  z  }d}n&|d	   | j                  j                  d
d      kD  rd}d}t        ||||d	         cddd       S # 1 sw Y   yxY w)aQ  
        Schedule a request with optimal timing and model selection.

        Args:
            token_estimate: Estimated tokens for the request
            task_type: Type of task
            priority: Request priority (1-10, lower = higher priority)

        Returns:
            ScheduledRequest with model, delay, and reason
        g        	immediater   r   g      N@rpm_throttler   tpm_throttler   burst_detection_thresholdg333333?r   burst_prevention)rb   rc   rd   re   N)
rx   r   r   r   ri   r+   r,   rt   r   ra   )	rJ   r   r   r.   rj   r   re   delayrd   s	            r(   schedule_requestz$GeminiRateMaximizer.schedule_requeste  s   " ZZ 	,,^YGJ22:>H44Z@K E F(A-t{{:6:::'/*^;/1J JKdkkZdNeNiNii'Z(4;;??;VX\+]]+# #'
3	-	 	 	s   CCC'c           	      J   | j                   5  i }d}d}| j                  D ]  }| j                  |      }| j                  |      }| j                  |   }|||j                  |j
                  |j                  d|j                  d||<   ||j                  z  }||d   z  } g }|j                         D ]0  \  }}	|	d   d   dk  s|j                  d| d	|	d   d   d
d       2 |j                         D ]0  \  }}	|	d   d   dkD  s|j                  d| d|	d   d   d
d       2 |dkD  r||z  nd}
|
dk  r|j                  d|
d
d       t        t        j                         j                         || j                         |
|      cddd       S # 1 sw Y   yxY w)z
        Generate comprehensive utilization report.

        Returns:
            UtilizationReport with all model stats and recommendations
        r   )r+   r,   r-   )re   r   r   r.   r   re   r   r   zModel z is underutilized (.1%z'). Consider routing more requests here.gffffff?z is near capacity (z+). Consider load balancing to other models.zOverall utilization is low (z/). Enable research tasks to fill idle capacity.)rh   ri   rj   rk   rl   N)rx   ri   r   r   r+   r,   r-   r.   r   r^   rg   r   rN   r   r   )rJ   models_reporttotal_capacity
total_usedr   utilr   r   rl   reportoverall_utils              r(   get_utilization_reportz*GeminiRateMaximizer.get_utilization_report  s    ZZ :	MNJ"kk 2
11*=66zBZ0 $( (%zz%zz%zz
 !'	-j) &**,d=11
%2* !O '4&9&9&; "
F-(4s:#** ,?}@UV`@abe?f g? @ '4&9&9&; "
F-(4t;#** ,?}@UV`@abe?f gC D ;I1:L:6RSLc!&&2<2D EC D
 %",,.224$..0$0 /i:	 :	 :	s   B9FAF
BFF"c                     | j                   j                  di       }|j                  dd      sy|j                  dd      }| j                         }| j                  |      }d|d   z
  }||k\  rdd	|d
fS dd|d
fS )zs
        Check if there's capacity for research tasks.

        Returns:
            (can_execute, reason)
        research_configenabledT)Fresearch_disabledmin_capacity_for_researchr   g      ?r   capacity_available_r   Finsufficient_capacity_)rt   r   r   r   )rJ   r   min_capacityrj   r   	availables         r(   can_execute_researchz(GeminiRateMaximizer.can_execute_research  s     ++//*;R@""9d3-&**+FM ((*
))*5$z**	$.yo>>>29S/BBBr'   c                    | j                   j                  di       }|j                  dd      }| j                  t        j                        }| j                  |      }|t        |d   |z        t        |d   |z        dS )z
        Calculate how many research requests can be made.

        Returns:
            Dict with requests_available and tokens_available
        r   max_research_percentager   )r   r   r   )rb   requests_availabletokens_available)rt   r   r   r   r   r   r4   )rJ   r   max_percentagerj   r   s        r(   get_research_budgetz'GeminiRateMaximizer.get_research_budget  s     ++//*;R@(,,-FM((83D3D(E
..z:  "%h&?.&P"Q #H_$=$N O
 	
r'   rq   )T)r   r   r   r   r	   rr   r   rz   r
   rs   r   ry   r8   r6   r   r4   r   r   r%   r   boolr   r   ra   r   rg   r   r   r   r   r&   r'   r(   rn   rn      s    IJKMNN"D "&d &	
 	
;3 4U
3C @C DcN < #&..BB B 
	BR II I 	I
 I4.. . 	.
 .2 #&..	,, , 	,
 
,\A(9 AFCeD#I&6 C0
T#s(^ 
r'   rn   c                  (   ddl } | j                  d      }|j                  dg d       |j                  dt        d	d
       |j                  dt        dd       |j                         }t               }|j                  dk(  rm|j                         }t        dd        t        d|j                          t        d d       t        d|j                          t        d|j                  dd       t        d       t        d       |j                  j                         D ]  \  }}|d   }t        d| d       t        d|d    d|d   d    d |d!   dd"       t        d#|d$    d|d   d%    d |d&   dd"       t        d'|d(    d|d   d)           t                 |j                  r6t        d*       t        d       |j                  D ]  }t        d+|         yy|j                  d,k(  r|j                   dk7  rt#        |j                         nt"        j$                  }	|j'                  |j(                  |	      }|j+                  |      }
t        d-|j(                   d.|j                    d/       t        d0|        t        d1|
d2           t        d3|
d4           y|j                  d5k(  rt|j-                         \  }}|j/                         }t        d6       t        d7| d | d"       t        d0|d8           t        d9|d:           t        d;|d<           y|j                  d=k(  rt        d>       t        d?       t1        d@      D ]l  }|j3                  dAB      }t        dC|dDz    dE|j4                   dF|j6                  dGdH|j8                   d"	       |j;                  |j4                  dIdJK       n t        dL       |j                         }t        dM|j                  d       yy)NzCLI for rate maximizer.r   NzGemini Rate Maximizer)descriptioncommand)statusbestr   demo)choicesz--tokensr   zToken estimate)typedefaulthelpz--taskr   z	Task typer   r   z<============================================================zGEMINI RATE MAXIMIZER STATUS - zBest Model: zTotal Capacity Used: r   zModel Utilization:z(----------------------------------------re   z  :z	    RPM: r   /r   r+   z (r   )z	    TPM: r   r,   r   z	    RPD: rC   r-   zRecommendations:z  - r   z
Best Model for z	 tokens (z):z	  Model: z  Available RPM: r   z  Available TPM: r   r   z
Research Capacity:z  Can Execute: rb   z  Requests Available: r   z  Tokens Available: r   r   z
=== RATE MAXIMIZER DEMO ===
zSimulating 10 requests...r   i  )r   z
  Request rW   z: z	 (delay: z.2fzs, reason: r   i,  )r   r   z)
========================================zFinal utilization: )argparseArgumentParseradd_argumentr4   r8   
parse_argsrn   r   r   printrh   rj   rk   ri   r   rl   taskr   r%   r   rB   r   r   r   ranger   rb   rc   rd   r   )r   parserargs	maximizerr   rb   datar   recr   r   can_executerd   budgeti	scheduleds                   r(   mainr    s1   $$1H$IF
	+QR

dAQR
sIKPD#%I||x1136(m/0@0@/ABCmV../01%f&@&@%ERHI"#h!==..0 	KE4&DBugQ- Id=12!DN54I3J"TR\M]^aLbbcdeId=12!DN54I3J"TR\M]^aLbbcdeId#345QtH~e7L6MNOG	 !!$%(O-- $SEl#$ " 
	+/99	+AHTYY'xGWGW	((i@33E:!$++i		{"EF	%!"!(?";!<=>!(?";!<=>		#'<<>V..0$&}Bvha89	&/*+,&v.B'C&DEF$V,>%?$@AB		/0 	)*r 	YA!22#2FIJqse2ioo%6i	@W@WX[?\\ghqhxhxgyyz{| ""9??TW"X	Y 	m113#F$>$>s#CDE 
 r'   __main__)r   r~   rH   rv   collectionsr   dataclassesr   r   r   r   pathlibr	   typingr
   r   r   r   r   enumr   r   r*   r:   ra   rg   rn   r  r   r&   r'   r(   <module>r     s   4     ( (  3 3 	t 	 7 7 7 *! *! *!Z      
 
DGFT zF r'   