
    diy$                        d Z ddlZddlZddlZddlZddlmZ ddlmZ  ed      Z	e	dz  dz  Z
e
j                  dd	       e	dz  e	d
z  e	dz  e	dz  e	dz  e	dz  e	dz  e	dz  e	dz  dz  e	dz  dz  e	dz  dz  e	dz  dz  e	dz  e	dz  e	dz  e	dz  dz  e	dz  dz  dz  e	dz  e	dz  dz  e	d z  gZh d!Zh d"Zd3d#ed$efd%Zd&ed$efd'Zd( Zd) Zd*ed$efd+Zd4d,ed-ed$efd.Zd/ Zd0 Zd1 Zed2k(  r e        yy)5a/  
LightRAG Corpus Intelligence Layer
====================================
Indexes all Genesis deep_think files, KG axioms, and strategic documents.
Enables global synthesis queries over the full Genesis knowledge corpus.

Usage:
    # Test index (5 files):
    python3 core/lightrag_corpus.py --test

    # Full index (runs everything):
    python3 core/lightrag_corpus.py --full

    # Query:
    python3 core/lightrag_corpus.py --query "What are the dominant strategic patterns?"

    # List indexable files:
    python3 core/lightrag_corpus.py --list-files
    N)Path)partialz/mnt/e/genesis-systemKNOWLEDGE_GRAPHlightrag_indexT)parentsexist_okplans
deep_thinkdeep_think_resultscoreSunaivadocsConversationsRECEPTIONISTAIstrategyresearch	verticalsswarm_generatedTRADIESpromptsn8nhiveswarm_resultsdataagentsforkszmcp-serversloopzMission Briefs>   .git.venvdistvenvbuild__pycache__node_modulesr   >   .md.py.txt.json	max_filesreturnc                 l   g t               dt        ffd}t        D ]X  }|j                         st        D ]<  }|j                  d|       D ]#  } ||       | st              | k\  sc c c S  > Z t        j                  d      D ]  } ||       | st              | k\  sc S  S )zMGather all indexable corpus files from CORPUS_DIRS plus root-level .md files.fpc                    dt        |       v ryt        d | j                  D              ryt        d | j                  D              ryt        | j                               }|v ryj	                  |       j                  |        y)z0Add a file to the list if it passes all filters.r   Nc              3   >   K   | ]  }|j                  d         yw).N)
startswith.0parts     -/mnt/e/genesis-system/core/lightrag_corpus.py	<genexpr>z1get_corpus_files.<locals>._add.<locals>.<genexpr>M   s     9ts#9s   c              3   ,   K   | ]  }|t         v   y wN)	SKIP_DIRSr1   s     r4   r5   z1get_corpus_files.<locals>._add.<locals>.<genexpr>O   s     6Tty 6s   )stranypartsresolveaddappend)r,   abs_pathfilesseens     r4   _addzget_corpus_files.<locals>._addI   so    s2w&9996RXX66rzz|$tR    *z*.md)	setr   CORPUS_DIRSexistsINCLUDE_EXTENSIONSrgloblenGENESIS_ROOTglob)r)   rB   dextr,   r@   rA   s        @@r4   get_corpus_filesrO   D   s    E5D   !xxz% 	!Cgg#i( !RUy!8 L!	!! ' RUy0L
 LrC   r,   c                 H    	 | j                  dd      S # t        $ r Y yw xY w)z6Read a file safely, returning empty string on failure.zutf-8ignore)encodingerrors )	read_text	Exception)r,   s    r4   read_file_saferW   j   s,    ||WX|>> s    	!!c                    	
 	 ddl m} m} ddlm	m
 t        j                  j                  d      xs@ t        j                  j                  d	      xs t        j                  j                  d
      st        d       d	fd	}ddlm} 
fd} |dd|      }  t        t               ||      }|fS # t        $ r8}t        d|        t        d       t        j                  d       Y d}~d}~ww xY w)zDBuild and return a LightRAG instance pointed at our index directory.r   )LightRAG
QueryParam)gemini_complete_if_cachegemini_embedz$ERROR: lightrag-hku not importable: zFInstall with: /mnt/e/genesis-system/.venv/bin/pip install lightrag-hku   NGEMINI_API_KEY_NEWGEMINI_API_KEYGOOGLE_API_KEYzKWARNING: No Gemini API key found. Set GEMINI_API_KEY_NEW or GEMINI_API_KEY.c           	      D   K    dd| ||xs g d| d {   S 7 w)Nzgemini-2.0-flash)modelpromptsystem_prompthistory_messagesapi_key rg   )rc   rd   re   kwargsrf   r[   s       r4   llm_model_funcz!build_rag.<locals>.llm_model_func   sA     - 
$'-3
 
 
 	
 
s     )EmbeddingFuncc                 F   K   j                  |        d {   S 7 w)N)rf   )func)textsrf   r\   s    r4   embedding_func_with_keyz*build_rag.<locals>.embedding_func_with_key   s#     !&&ug&>>>>s   !!i   i   )embedding_dimmax_token_sizerl   )working_dirri   embedding_func)NN)lightragrY   rZ   lightrag.llm.geminir[   r\   ImportErrorprintsysexitosenvirongetlightrag.utilsrj   r9   LIGHTRAG_INDEX_DIR)rY   rZ   eri   rj   rn   
embed_funcragrf   r[   r\   s           @@@r4   	build_ragr   r   s    1N 	

+, 	,::>>*+	,::>>*+ 
 [\
 -? $J *+%!C
 
?U  4QC89VWs   B= =	C>.C99C>c                 B   K   | j                          d{    | S 7 w)z2Initialize LightRAG storages (required for v1.4+).N)initialize_storages)r   s    r4   init_ragr      s"     

!
!
###J $s   r@   c           	        K   |j                          d{    ddg d}| D ]  }t        |      }t        |j                               dk  r|dxx   dz  cc<   8	 d| d| }|j	                  |       d{    |d	xx   dz  cc<   t        d
|j                   dt        |      dd        |S 7 7 =# t        $ rH}|d   j                  |j                   d|        t        d|j                   d|        Y d}~d}~ww xY ww)z0Insert files into LightRAG index. Returns stats.Nr   )indexedskippedrS   2   r   r]   z[FILE: z]

r   z  Indexed: z (,z chars)rS   z: z  ERROR indexing )	r   rW   rJ   stripainsertrv   namerV   r>   )r@   r   statsr,   contentdocr~   s          r4   index_filesr      s!     
!
!
###a26E 6 $w}}"$)!	6B4uWI.C++c""")!Ky3w<*:'BC6 L $ #  	6(O""bggYb#45%bggYb455	6sL   DB4ADB88B697B80D6B88	D	>D?DD		Dquerymodec                    K   t               \  }}|j                          d{    |j                  |  ||             d{   }|S 7 (7 w)a  
    Run a synthesis query over the indexed corpus.

    Modes:
        global  - Synthesizes across entire corpus (best for patterns/strategy)
        local   - Focuses on most relevant entities
        hybrid  - Combines both
        naive   - Simple similarity search
    Nr   param)r   r   aquery)r   r   r   rZ   results        r4   query_corpusr      sM       kOC

!
!
###::e:4+@:AAFM $As!   !AA!AAAAc                    K   t        d       t        d      } t        dt        |        d       | D ]  }t        d|         t               \  }}t	        | |       d{   }t        dt        j                  |d	
              |d   dkD  rIt        d       t        d       	 |j                  d |d             d{   }t        d|        |S |S 7 z7 # t        $ r}t        d|        Y d}~|S d}~ww xY ww)z0Index first 5 corpus files and run a test query.z%=== LightRAG Test Index (5 files) ===   )r)   z	Selected z files:z  - Nz
Index stats:    indentr   r   z
=== Test Query ===z2Query: What is Genesis System and what does it do?z+What is Genesis System and what does it do?naiver   r   zResult:
zQuery error: )	rv   rO   rJ   r   r   jsondumpsr   rV   )r@   fr   rZ   r   r   r~   s          r4   run_test_indexr      s    	
12q)E	Ic%j\
)* QCj  kOCeS))E	ODJJuQ78
9:Y!$%BC	'::= g. &  F IfX&' L5L *
  	'M!%&&L	'sO   A!D#C$AD*C" C C" D C" "	D+C?9D?DDc                    K   t        d       t               } t        dt        |               t               \  }}t	        | |       d{   }t        |       |d   |d   |d   t        t              d}t        dz  }|j                  t        j                  |d	
             t        d|        t        dt        j                  |d	
              |S 7 w)zIndex all corpus files.z"=== LightRAG Full Corpus Index ===zTotal files to index: Nr   r   rS   )total_filesr   r   rS   	index_dirzindex_manifest.jsonr   r   z
Manifest saved: zFinal stats: )
rv   rO   rJ   r   r   r9   r}   
write_textr   r   )r@   r   rZ   r   manifestmanifest_paths         r4   run_full_indexr      s     	
./E	"3u:,
/0kOCeS))E 5z##/+,H ')>>MTZZ;<	}o
./	M$**U156
78L *s   A	CCBCc                  ~   dd l } | j                  d      }|j                  ddd       |j                  ddd	       |j                  d
t        d       |j                  dt        dg dd       |j                  ddd       |j	                         }|j
                  r7t               }t        dt        |              |D ]  }t        d|         y |j                  rt        j                  t                      y |j                  rt        j                  t                      y |j                  rtt        j                  t!        |j                  |j"                              }t        d|j"                  j%                          d|j                   d       t        |       y |j'                          y )Nr   z"LightRAG Corpus Intelligence Layer)descriptionz--test
store_truez Index 5 files and run test query)actionhelpz--fullzFull corpus indexz--queryzRun a synthesis query)typer   z--modeglobal)r   localhybridr   zQuery mode (default: global))r   defaultchoicesr   z--list-fileszList all corpus fileszTotal corpus files: z  r   z
[z	 QUERY]: 
)argparseArgumentParseradd_argumentr9   
parse_args
list_filesrO   rv   rJ   testasynciorunr   fullr   r   r   r   upper
print_help)r   parserargsr@   r   r   s         r4   mainr     sl   $$1U$VF
<^_
<OP
	2IJ
sH F;  = |BYZD "$SZL12 	ABqc(O	yyN$%	N$%	\$**499EFDIIOO%&i

|2>?frC   __main__r7   )r   )__doc__ry   rw   r   r   pathlibr   	functoolsr   rK   r}   mkdirrF   r8   rH   intlistrO   r9   rW   r   r   dictr   r   r   r   r   __name__rg   rC   r4   <module>r      s  ( 
 
     +,!$558HH      5 $$7<''696?"##j0##j0##k1##&779956O+6H$w.= 6H$##)0	 5 # #t #Lt  /dT 4 *c  C  6.> zF rC   