
    :uiT                         d Z ddlZddlmZmZ ddlmZ ddlmZ ddl	m	Z	 ddl
mZ  eh d      Zd	Zd
ZdefdZdefdZddedefdZdefdZ G d d      Z e       ZdefdZy)z
Genesis V2 MCP Validation Module
=================================
Pydantic models and validators for MCP tool inputs.
H-11, H-19, H-20: Input validation, sanitization, URL validation.
    N)OptionalAny)urlparse)defaultdict)time)Lock>   filehttphttpsz^https?://(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|localhost|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?(?:/?|[/?]\S+)$)zjavascript:zdata:z<scriptz</scriptonerroronloadonclickzeval\(urlc                 :   | rt        | t              sy| j                         } t        |       dkD  ry	 t	        |       }|j
                  t        vrdd|j
                   dt         fS |j                  syy# t        $ r}dd	t        |       fcY d
}~S d
}~ww xY w)z
    Validate URL for browser navigation.

    Args:
        url: URL to validate

    Returns:
        Tuple of (is_valid, error_message)
    )FzURL must be a non-empty stringi   )FzURL too long (max 2048 chars)FzInvalid URL scheme: z. Allowed: )FzURL must have a valid hostTNzURL parsing failed: N)	
isinstancestrstriplenr   schemeALLOWED_URL_SCHEMESnetloc	Exception)r   parsedes      2/mnt/e/genesis-system/genesis_v2/mcp/validation.pyvalidate_urlr   "   s     jc*8
))+C
3x$78#== 331&--L_K`abb}}8 8-c!fX6778s#   4A7 )A7 7	B BBBselectorc                     | rt        | t              sy| j                         } t        |       dkD  ry| j	                         }t
        D ]0  }t        j                  ||t        j                        s)dd| fc S  | dfS )z
    Sanitize CSS selector for browser click operations.

    Args:
        selector: CSS selector to sanitize

    Returns:
        Tuple of (sanitized_selector, warning_message)
    ) z#Selector must be a non-empty stringi  )r    z!Selector too long (max 500 chars)r    z(Dangerous pattern detected in selector: N)	r   r   r   r   lowerDANGEROUS_SELECTOR_PATTERNSresearch
IGNORECASE)r   selector_lowerpatterns      r   sanitize_selectorr(   ?   s     :h4:~~H
8}s8^^%N. N99Wnbmm<B7)LMMN d    querylimitc                 ~    | rt        | t              syt        |       dkD  ryt        |t              r
|dk  s|dkD  ryy)zValidate kg_search tool inputs.)Fz Query must be a non-empty stringi  )FzQuery too long (max 1000 chars)   d   )Fz*Limit must be an integer between 1 and 100r   )r   r   r   int)r*   r+   s     r   validate_kg_search_inputr0   Y   s<    
5#.:
5zD9eS!UQY%#+Dr)   entity_jsonc                     ddl }| rt        | t              syt        |       dkD  ry	 |j	                  |       }t        |t              syd	|vry
d|vrydd|fS # |j
                  $ r}dd| dfcY d}~S d}~ww xY w)zs
    Validate entity JSON for kg_ingest.

    Returns:
        Tuple of (is_valid, error_message, parsed_dict)
    r   N)Fz&Entity JSON must be a non-empty stringNiP  )Fz Entity JSON too large (max 50KB)NFzInvalid JSON: )FzEntity must be a JSON objectNid)FzEntity must have 'id' fieldNtype)FzEntity must have 'type' fieldNT)jsonr   r   r   loadsJSONDecodeErrordict)r1   r5   datar   s       r   validate_entity_jsonr:   g   s     jc:F
;%@3zz+& dD!<4;T=$  3s+T223s   A A:(A5/A:5A:c                   0    e Zd ZdZddedefdZd	defdZy)
RateLimiterz,Simple in-memory rate limiter for MCP tools.	max_callswindow_secondsc                 f    || _         || _        t        t              | _        t               | _        y )N)r=   r>   r   listcallsr   _lock)selfr=   r>   s      r   __init__zRateLimiter.__init__   s&    ", &
V
r)   keyc                    t               }|| j                  z
  }| j                  5  | j                  |   D cg c]
  }||kD  s	| c}| j                  |<   t	        | j                  |         | j
                  k\  r
	 ddd       y| j                  |   j                  |       | j
                  t	        | j                  |         z
  }d|fcddd       S c c}w # 1 sw Y   yxY w)z
        Check if a call is allowed under rate limit.

        Returns:
            Tuple of (is_allowed, remaining_calls)
        N)Fr   T)r   r>   rB   rA   r   r=   append)rC   rE   nowwindow_startt	remainings         r   
is_allowedzRateLimiter.is_allowed   s     fT000ZZ 		%*.**S/NQQ=MqNDJJsO4::c?#t~~5!		% 		% JJsO""3'TZZ_)==I)$		% 		%N		% 		%s)   C
CC6CACCC$N)r.   <   )default)__name__
__module____qualname____doc__r/   rD   r   rL    r)   r   r<   r<      s$    6# S %c %r)   r<   	tool_namec                 H    t         j                  |       \  }}|sdd|  dfS y)z!Check rate limit for a tool call.FzRate limit exceeded for z. Try again later.r   )_rate_limiterrL   )rT   allowedrK   s      r   check_rate_limitrX      s3    &11)<GY1)<NOPPr)   )
   )rR   r#   typingr   r   urllib.parser   collectionsr   r   	threadingr   	frozensetr   URL_PATTERNr"   r   r   r(   r/   r0   r:   r<   rV   rX   rS   r)   r   <module>r`      s    
   ! #     9:  ]	 8c 8: 4C  c @% %@  r)   