
    Ei                     N   d Z ddlmZmZmZmZmZ ddlmZm	Z	 e G d d             Z
e G d d             Ze G d d	             Ze G d
 d             Ze G d d             Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Zy)z
Component Interface Contracts
==============================
Shared dataclasses and Protocol classes for the KB ingestion pipeline.
Every module MUST use these types for inter-module communication.
    )ProtocolListDictAnyOptional)	dataclassfieldc                   X    e Zd ZU dZeed<   eed<   eed<   eed<   eeef   ed<   eed<   y)	FetchedPagezOutput of the Fetcher module.urlhtmlstatus_codecontent_typeheaders
fetched_atN)__name__
__module____qualname____doc__str__annotations__intr        */mnt/e/genesis-system/core/kb/contracts.pyr   r      s.    '	H
I#s(^Or   r   c                       e Zd ZU dZeed<   eed<   eed<   ee   ed<   ee   ed<   ee   ed<    ee      Z	e
eef   ed	<   y
)ExtractedContentzOutput of the Extractor module.r   titletextheadingscode_blockstablesdefault_factorymetadataN)r   r   r   r   r   r   r   r	   dictr%   r   r   r   r   r   r   r      sI    )	HJ
I3icI$T:Hd38n:r   r   c                       e Zd ZU dZeed<   eed<   eed<   ee   ed<   eed<   eed<   eed<   eed	<   eed
<    ee	      Z
eeef   ed<   y)ChunkzOutput of the Chunker module.chunk_id
source_urlplatformcustomer_idr   r   heading_contextchunk_indextotal_chunksr#   r%   N)r   r   r   r   r   r   r   r   r	   r&   r%   r   r   r   r   r   r(   r(   (   sQ    'MOM#J
I$T:Hd38n:r   r(   c                   6    e Zd ZU dZeed<   ee   ed<   eed<   y)EmbeddedChunkzOutput of the Embedder module.chunkvectorembedding_modelN)	r   r   r   r   r(   r   r   floatr   r   r   r   r1   r1   7   s    (LKr   r1   c                      e Zd ZU dZeed<   eed<   eed<   dZee   ed<    ed       Z	e
e   ed	<    ee      Ze
e   ed
<   dZeed<    ee      Zeeef   ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   y)PlatformConfigzPlatform registry entry.namedisplay_namedocs_base_urlNsitemap_urlc                      dgS )N*r   r   r   r   <lambda>zPlatformConfig.<lambda>F   s    SE r   r#   url_patternsexclude_patternsnone	auth_typeauth_configi  
chunk_size   chunk_overlapi  	max_pages   refresh_hoursFuse_browserless)r   r   r   r   r   r   r;   r   r	   r?   r   listr@   rB   r&   rC   r   rD   r   rF   rG   rI   rJ   boolr   r   r   r7   r7   ?   s    "
I!%K#%#MBL$s)B"'"=d3i=Is"'"=Kc3h=JM3IsM3!OT!r   r7   c            
       \    e Zd ZdededefdZdedee   fdZd
dee   dede	dee   fdZ
y	)IFetcherr   configreturnc                    K   y wNr   )selfr   rO   s      r   
fetch_pagezIFetcher.fetch_pageV           c                    K   y wrR   r   )rS   r   s     r   fetch_sitemapzIFetcher.fetch_sitemapW   rU   rV   urlsconcurrencyc                    K   y wrR   r   )rS   rY   rO   rZ   s       r   fetch_pageszIFetcher.fetch_pagesX   rU   rV   N)   )r   r   r   r   r7   r   rT   r   rX   r   r\   r   r   r   rN   rN   U   sH    TCTTKT=s=tCy=xd3ixxVYxbfgrbsxr   rN   c                   8    e Zd ZdedefdZdee   dee   fdZy)
IExtractorpagerP   c                      y rR   r   )rS   r`   s     r   extractzIExtractor.extract\       r   pagesc                      y rR   r   )rS   rd   s     r   extract_batchzIExtractor.extract_batch]   rc   r   N)r   r   r   r   r   rb   r   rf   r   r   r   r_   r_   [   s(    AKA,<AT4#4T>N9OTr   r_   c                   F    e Zd Zdededee   fdZdee   dedee   fdZy)IChunkercontentrO   rP   c                      y rR   r   )rS   ri   rO   s      r   r2   zIChunker.chunka   rc   r   contentsc                      y rR   r   )rS   rk   rO   s      r   chunk_batchzIChunker.chunk_batchb   rc   r   N)	r   r   r   r   r7   r   r(   r2   rm   r   r   r   rh   rh   `   s8    Z-Z~Z$u+ZgD)9$:gNgW[\aWbgr   rh   c                   D    e Zd Zdedee   fdZddee   dedee	   fdZ
y)		IEmbedderr   rP   c                      y rR   r   )rS   r   s     r   embedzIEmbedder.embedf   rc   r   chunks
batch_sizec                      y rR   r   )rS   rr   rs   s      r   embed_batchzIEmbedder.embed_batchg   rc   r   N)2   )r   r   r   r   r   r5   rq   r(   r   r1   ru   r   r   r   ro   ro   e   s.    2#2$u+2`$u+`3`]H[`r   ro   c                   T    e Zd Zdee   defdZdee   defdZd
de	de
e	   defd	Zy)IStoreembedded_chunksrP   c                      y rR   r   )rS   ry   s     r   upsert_vectorszIStore.upsert_vectorsk   rc   r   rr   c                      y rR   r   )rS   rr   s     r   upsert_metadatazIStore.upsert_metadatal   rc   r   Nr+   r,   c                      y rR   r   rS   r+   r,   s      r   delete_platformzIStore.delete_platformm   rc   r   rR   )r   r   r   r   r1   r   r{   r(   r}   r   r   r   r   r   r   rx   rx   j   s<    Nd=.ANcN>d5k>c>[[(3-[SV[r   rx   c                   v    e Zd Zd	dedee   deeef   fdZd	dededee   deeef   fdZdedeeef   fdZ	y)
IOrchestratorNr+   r,   rP   c                    K   y wrR   r   r   s      r   ingest_platformzIOrchestrator.ingest_platformq   rU   rV   r   c                    K   y wrR   r   )rS   r   r+   r,   s       r   
ingest_urlzIOrchestrator.ingest_urlr   rU   rV   c                      y rR   r   )rS   r+   s     r   
get_statuszIOrchestrator.get_statuss   rc   r   rR   )
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   p   sb    lcllY]^acf^fYglqCq3qXc]q^bcfhkck^lq>3>4S>>r   r   N)r   typingr   r   r   r   r   dataclassesr   r	   r   r   r(   r1   r7   rN   r_   rh   ro   rx   r   r   r   r   <module>r      s    7 6 (    ; ; ; ; ; ;    " " "*yx yU U
hx h
a a
\X \?H ?r   