
    did!                         d Z ddlZddlZddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZ ddlmZmZmZmZ  ej$                  e      ZdZdZd	Zd
Z G d d      ZddZy)u  
GCCWatchdog — Heartbeat Monitor and Auto-Respawn for All 5 Command Centres
===========================================================================
Runs as a separate daemon (in its own tmux session or background process).
Polls heartbeat files every 30 seconds. Respawns any centre whose heartbeat
is stale (older than STALE_THRESHOLD_SECONDS = 120 s).

Author: Genesis Parallel Builder
Created: 2026-02-26
    N)datetimetimezone)Path)AnyOptional)CENTRES_CONFIGGCC_HEARTBEATS_DIRSTALE_THRESHOLD_SECONDS
launch_one   zgcc-watchdog
   <   c                       e Zd ZdZeefdededdfdZdededdfd	Z	dd
Z
dedefdZdedee   fdZdeddfdZdeddfdZdefdZy)GCCWatchdogz
    Monitors all 5 command centres. Respawns if heartbeat stale > 2 min.

    Tracks per-centre respawn counts and enforces a cooldown between
    successive respawns to avoid thrashing.
    poll_intervalstale_thresholdreturnNc                 n   || _         || _        d| _        t        D ci c]  }|d   d
 c}| _        t        D ci c]  }|d   d 
 c}| _        t        j                  t        j                  | j                         t        j                  t        j                  | j                         y c c}w c c}w )NTnamer   )
r   r   runningr   _respawn_counts_last_respawnsignalSIGTERM_signal_handlerSIGINT)selfr   r   cs       =/mnt/e/genesis-system/core/gemini_command_centres/watchdog.py__init__zGCCWatchdog.__init__.   s    
 +. GU/U&	1/U%3:
 !AfItO:
 	fnnd&:&:;fmmT%9%9: 0V:
s
   B-B2signumframec                 >    t         j                  d|       d| _        y )Nu+   GCCWatchdog received signal %d — stoppingF)loggerinfor   )r   r!   r"   s      r   r   zGCCWatchdog._signal_handler@   s    A6J    c                    t         j                  dt        t              | j                  | j
                         | j                  rt        D ]  }|d   }t        | dz  }|j                         s(t         j                  d|       | j                  |       L| j                  |      r>| j                  |      }t         j                  d||xs d       | j                  |       t         j                  d|        t        j                  | j                         | j                  rt         j                  d       y	)
u   
        Continuous monitoring loop.

        Every WATCHDOG_POLL_INTERVAL seconds:
        1. Check each centre's heartbeat file.
        2. If stale → respawn (subject to cooldown and max attempts).
        3. Log summary.
        uB   GCCWatchdog started — monitoring %d centres, poll=%ds, stale=%dsr   z.jsonu/   GCCWatchdog: no heartbeat for %r — respawningu:   GCCWatchdog: %r heartbeat stale (age=%.0fs) — respawningzGCCWatchdog: %r OKzGCCWatchdog stoppedN)r$   r%   lenr   r   r   r   r	   existswarning_maybe_respawn	_is_stale_heartbeat_agedebugtimesleep)r   centrer   hb_fileages        r   runzGCCWatchdog.runF   s    	P  		
 ll( =f~,$u~=~~'NNI4 ''/>>'*--g6CNNT	r
 ''/LL!5t<)=, JJt))*/ ll2 	)*r&   r3   c                 H    | j                  |      }|y|| j                  kD  S )z
        Return True if the heartbeat file's last-written timestamp is older
        than stale_threshold seconds.

        Args:
            hb_file: Path to the heartbeat JSON file.

        Returns:
            True if stale, False if fresh.
        T)r.   r   )r   r3   r4   s      r   r-   zGCCWatchdog._is_stales   s-     !!'*;T))))r&   c                    	 t        |dd      5 }t        j                  |      }ddd       j                  dd      }|rIt	        j
                  |      }t	        j                  t        j                        |z
  j                         S 	 	 |j                         j                  }t        j                         |z
  S # 1 sw Y   xY w# t        j                  t        t        f$ r Y ^w xY w# t        $ r Y yw xY w)a  
        Return the age in seconds of the timestamp recorded inside the heartbeat file.

        Falls back to file mtime if JSON parse fails.

        Args:
            hb_file: Path to heartbeat JSON.

        Returns:
            Age in seconds, or None on error.
        rzutf-8)encodingN	timestamp )openjsonloadgetr   fromisoformatnowr   utctotal_secondsJSONDecodeError
ValueErrorOSErrorstatst_mtimer0   )r   r3   fhdatats_strlast_tsmtimes          r   r.   zGCCWatchdog._heartbeat_age   s    	gsW5 %yy}%XXk2.F"008 X\\2W<KKMM 	LLN++E99;&&% % $$j': 		  		s:   C
 B>A$C
 0C. >CC
 
C+*C+.	C:9C:r2   c                 p   |d   }t        j                         }| j                  j                  |      }|.||z
  t        k  r"t
        j                  d|t        ||z
  z
         y| j                  j                  |d      }|t        k\  rt
        j                  d|t               y| j                  |       y)z
        Respawn a centre if not in cooldown and under max attempts.

        Args:
            centre: Centre config dict from CENTRES_CONFIG.
        r   Nz5GCCWatchdog: %r in respawn cooldown (%.0fs remaining)r   uI   GCCWatchdog: %r has exceeded max respawn attempts (%d) — NOT respawning)r0   	monotonicr   r?   RESPAWN_COOLDOWNr$   r%   r   MAX_RESPAWN_ATTEMPTSerror_respawn)r   r2   r   rA   lastcounts         r   r,   zGCCWatchdog._maybe_respawn   s     f~nn !!%%d+t/? ?KKG C$J/
  $$((q1((LL[$
 fr&   c                 F   |d   }t        |d      }| j                  j                  |d      dz   | j                  |<   t        j                         | j
                  |<   |r*t        j                  d|| j                  |   t               yt        j                  d|       y)	zs
        Kill old tmux session and relaunch daemon.

        Args:
            centre: Centre config dict.
        r   F)r   verboser      z)GCCWatchdog: respawned %r (attempt %d/%d)z!GCCWatchdog: failed to respawn %rN)
r   r   r?   r0   rO   r   r$   r%   rQ   rR   )r   r2   r   successs       r   rS   zGCCWatchdog._respawn   s     f~$6%)%9%9%=%=dA%F%JT"#'>>#34 KK;$$T*$	 LL3Tr&   c                    | j                   rdnd| j                  | j                  t        D cg c]  }|d   	 c}D ci c];  }|| j                  j                  |d      | j                  j                  |      d= c}dS c c}w c c}w )z
        Return current watchdog status including per-centre respawn counts.

        Returns:
            Status dict.
        r   stoppedr   r   )respawn_countlast_respawn)watchdogr   r   centres)r   r   r   r   r   r?   r   )r   r   r   s      r   
get_statuszGCCWatchdog.get_status   s     &*\\	y!//#33 1??1QvY?
 	 %)%9%9%=%=dA%F$($6$6$:$:4$@ 	
 	
 @s   B
A B	
r   N)__name__
__module____qualname____doc__WATCHDOG_POLL_INTERVALr
   intr    r   r   r5   r   boolr-   r   floatr.   dictr,   rS   r`    r&   r   r   r   &   s     46;; ; 
	;$c # $ )+Z* *$ * d x >T d @t  6
D 
r&   r   c                      t        j                  t         j                  dt        j                         t               } | j                          y)z-Entry point to start the GCCWatchdog process.z1%(asctime)s [%(levelname)s] %(name)s: %(message)s)levelformatstreamN)loggingbasicConfigINFOsysstdoutr   r5   )r^   s    r   run_watchdogru      s3    llBzz
 }HLLNr&   ra   )re   r=   rp   r   rs   r0   r   r   pathlibr   typingr   r   $core.gemini_command_centres.launcherr   r	   r
   r   	getLoggerrb   r$   rf   WATCHDOG_TMUX_SESSIONrQ   rP   r   ru   rk   r&   r   <module>r{      sn   	    
  '     
		8	$  &   I
 I
Xr&   