
    iz                      d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z	d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dl Z!d dl"m#Z# d d	l$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,  ejZ                  d
      Z. G d de	j^                        Z0 e	jb                  d      Z2de2_3        e2ji                  e	jj                         e2jl                  sV e0       Z7e7ji                  e	jj                          e	jp                  d      Z9e7ju                  e9       e2jw                  e7        e	jb                  d      ji                  e	jx                         djdZ= e=        	 d dl>Z>dZ?	 	 	 	 dk	 	 	 	 	 	 	 	 	 	 	 	 	 dldZBeCdf	 	 	 	 	 	 	 	 	 dmdZDer
d dlEmFZFmGZGmHZH  e#dd        ej                  dd      j                         dk(  aKdndZLdodZMd d d dZN G d d e	j                        ZP	 	 	 	 dp	 	 	 	 	 	 	 	 	 dqd!ZQ G d" d#      ZRe G d$ d%             ZSe G d& d'             ZTdrd(ZUdsdtd)ZVdud*ZWdvd+ZX G d, d-eY      ZZ G d. d/eY      Z[ G d0 d1eY      Z\	 	 	 	 	 	 dw	 	 	 	 	 	 	 	 	 	 	 	 	 dxd2Z]d3 Z^d4 Z_dyd5Z` G d6 d7ej                        Zbd8 Zc G d9 d:e      Zd G d; d<      Ze G d= d>ee      Zfdzd?Zgd{d@Zhd|dAZi	 	 	 	 	 	 	 	 	 	 d}dBZjdC Zk	 	 d~	 ddDZle G dE dF             ZmddGZndH ZoddIZpddJZq	 	 d	 	 	 	 	 	 	 ddKZr	 	 d	 	 	 	 	 	 	 ddLZsddMZt	 d	 	 	 	 	 	 	 	 	 ddNZudydOZv	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddPZwdddQZx	 d	 	 	 ddRZydddSZzdsddTZ{ddUZ|	 d	 	 	 	 	 	 	 ddVZ}	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddWZ~ G dX dY      Z	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZZ	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd[Zdd\Z	 	 	 	 	 	 dd]Zdd^	 	 	 	 	 	 	 	 	 dd_Z	 	 	 	 	 	 	 	 dd`Z	 	 	 	 	 	 ddaZddbZddcZddddZdydeZ	 	 	 	 	 	 	 	 ddfZddgZ	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddhZ	 	 	 	 ddiZy# e@$ r dZ>dZ?e2j                  d       Y w xY w)    )annotationsN)	dataclass)datetime)wraps)md5)	AnyProtocolCallableTYPE_CHECKINGListOptionalIterableSequence
Collection)load_dotenv)DEFAULT_LOG_MAX_BYTESDEFAULT_LOG_BACKUP_COUNTDEFAULT_LOG_FILENAMEGRAPH_FIELD_SEPDEFAULT_MAX_TOTAL_TOKENSDEFAULT_SOURCE_IDS_LIMIT_METHODVALID_SOURCE_IDS_LIMIT_METHODSSOURCE_IDS_LIMIT_METHOD_FIFOz[\uD800-\uDFFF\uFFFE\uFFFF]c                  ,     e Zd ZdZ fdZ fdZ xZS )SafeStreamHandlera  StreamHandler that gracefully handles closed streams during shutdown.

    This handler prevents "ValueError: I/O operation on closed file" errors
    that can occur when pytest or other test frameworks close stdout/stderr
    before Python's logging cleanup runs.
    c                N    	 t         |           y# t        t        f$ r Y yw xY w)z:Flush the stream, ignoring errors if the stream is closed.N)superflush
ValueErrorOSErrorself	__class__s    J/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/lightrag/utils.pyr   zSafeStreamHandler.flush:   (    	GMOG$ 		    $$c                N    	 t         |           y# t        t        f$ r Y yw xY w)zCClose the handler, ignoring errors if the stream is already closed.N)r   closer   r    r!   s    r$   r(   zSafeStreamHandler.closeB   r%   r&   )__name__
__module____qualname____doc__r   r(   __classcell__r#   s   @r$   r   r   2   s         r   lightragF%(levelname)s: %(message)shttpxc                     	 ddl m}  t        | dd      ry| j                  dfd}|| _        d| _        y# t        $ r Y yw xY w)	zHPrevent ascii_colors from printing flush errors during interpreter exit.r   )ConsoleHandlerN_lightrag_patchedFc                    t        j                         \  }}}|t        t        fv rd|j	                         v ry  | |       y )Nr(   )sysexc_infor   r    lower)r"   messageexc_type_original_handle_errors       r$   _safe_handle_errorz?_patch_ascii_colors_console_handler.<locals>._safe_handle_errori   s;    !Q
G,,GMMO1KdG,r/   T)r:   strreturnNone)ascii_colorsr4   ImportErrorgetattrhandle_errorr5   )r4   r>   r=   s     @r$   #_patch_ascii_colors_console_handlerrF   \   sT    / ~2E:*77- #5N'+N$  s   8 	AATzQpypinyin is not installed. Chinese pinyin sorting will use simple string sorting.c                p  K   |xs t         j                  }t        |      D ]  }	  |         d{     y y7 # t        $ ro}||dz
  k\  r"d| d| d| d| }	 ||	       t        |	      | |d| d|dz    d| d| d		       |d
kD  rt	        j
                  |       d{  7   Y d}~d}~ww xY ww)a  
    Safely execute vector database operations with retry mechanism and exception handling.

    This function ensures that VDB operations are executed with proper error handling
    and retry logic. If all retries fail, it raises an exception to maintain data consistency.

    Args:
        operation: The async operation to execute
        operation_name: Operation name for logging purposes
        entity_name: Entity name for logging purposes
        max_retries: Maximum number of retry attempts
        retry_delay: Delay between retries in seconds
        logger_func: Logger function to use for error messages

    Raises:
        Exception: When operation fails after all retry attempts
    N   zVDB z failed for  after z attempts: z	 attempt : z, retrying...r   )loggerwarningrange	Exceptionasynciosleep)
	operationoperation_nameentity_namemax_retriesretry_delaylogger_funclog_funcattempte	error_msgs
             r$   !safe_vdb_operation_with_exceptionr[      s     2 ,fnnH% 5	5+5 
	5+/)">"2,{m7S^R__jkljmn	#	*1>*)GaK=[MY[\][^^kl ?!--444
	5sD   #B6
;9;B6;	B3AB.#B&$B.)B6.B33B6c                   t        j                  |       }||S |r|dk(  ry|t        u r|j                         dv S |t        u rD	 ddl} |j                  |      }t        |t              r|S t        j                  d|  d       |S 	  ||      S # j                  t        f$ r(}t        j                  d|  d| d	       |cY d}~S d}~ww xY w# t        t        f$ r |cY S w xY w)
a  
    Get value from environment variable with type conversion

    Args:
        env_key (str): Environment variable key
        default (any): Default value if env variable is not set
        value_type (type): Type to convert the value to
        special_none (bool): If True, return None when value is "None"

    Returns:
        any: Converted value from environment or default
    NrA   )true1yestonr   zEnvironment variable z( is not a valid JSON list, using defaultzFailed to parse z as JSON list: z, using default)osgetenvboolr9   listjsonloads
isinstancerK   rL   JSONDecodeErrorr   	TypeError)env_keydefault
value_typespecial_nonevaluerf   parsed_valuerY   s           r$   get_env_valuerq      s    IIgE} T{{} ??? T	%4::e,L,-##+G94\] %   $$j1 	NN"7)?1#_M N		 	" s6   'B -B 	C C&C	C	CC%$C%)BaseKVStorageBaseVectorStorage
QueryParamz.env)dotenv_pathoverrideVERBOSEfalser]   c                    t         rt        j                  | g|i | y|r| |z  }n| }t        |      dkD  r|dd dz   n|}t	        j
                  dd|      }t        j                  |fi | y)a`  Function for outputting detailed debug information.
    When VERBOSE_DEBUG=True, outputs the complete message.
    When VERBOSE_DEBUG=False, outputs only the first 50 characters.

    Args:
        msg: The message format string
        *args: Arguments to be formatted into the message
        **kwargs: Keyword arguments passed to logger.debug()
       N...z\n+
)VERBOSE_DEBUGrK   debuglenresub)msgargskwargsformatted_msgtruncated_msgs        r$   verbose_debugr      sy     S*4*6* $JMM ,/}+=+CM$3%' 	 vt];]-f-r/   c                    | a y)z&Enable or disable verbose debug outputN)r}   )enableds    r$   set_verbose_debugr     s	     Mr/   )llm_call	llm_cache
embed_callc                  (     e Zd ZdZ fdZd Z xZS )LightragPathFilterzBFilter for lightrag logger to filter out frequent path access logsc                4    t         |           g d| _        y )N)z
/documentsz/documents/paginatedz/healthz/webui/z/documents/pipeline_status)r   __init__filtered_pathsr!   s    r$   r   zLightragPathFilter.__init__  s    
r/   c                D   	 t        |d      rt        |j                  t              syt	        |j                        dk  ry|j                  d   }|j                  d   }|j                  d   }|dk(  s|dk(  r|d	k(  s|d
k(  r|| j
                  v ryy# t        $ r Y yw xY w)Nr   T   rH         GETPOST   i0  F)hasattrrh   r   tupler   r   rN   )r"   recordmethodpathstatuss        r$   filterzLightragPathFilter.filter#  s    	66**V[[%2P6;;!# [[^F;;q>D[[^F 5Ff$4s]fmD/// 		s   &B B AB 	BB)r)   r*   r+   r,   r   r   r-   r.   s   @r$   r   r     s    L	
r/   r   c           	     Z   t        j                  d      }t        j                  d      }t        j                  |       }|j                  |       g |_        d|_        t               }|j                  |       |j                  |       |j                  |       |r |it        j                  dt        j                               }	t        j                  j                  t        j                  j                  |	t                    }t        j                   t        j                  j#                  |      d       t%        dt&        t(              }
t%        d	t*        t(              }	 t         j                  j-                  ||
|d
      }|j                  |       |j                  |       |j                  |       |rt7               }|j9                  |       yy# t.        $ rC}t0        j3                  d| dt5        |              t0        j3                  d       Y d}~fd}~ww xY w)a  Set up a logger with console and optionally file handlers

    Args:
        logger_name: Name of the logger to set up
        level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
        add_filter: Whether to add LightragPathFilter to the logger
        log_file_path: Path to the log file. If None and file logging is enabled, defaults to lightrag.log in LOG_DIR or cwd
        enable_file_logging: Whether to enable logging to a file (defaults to True)
    z4%(asctime)s - %(name)s - %(levelname)s - %(message)sr1   FNLOG_DIRT)exist_okLOG_MAX_BYTESLOG_BACKUP_COUNTutf-8)filenamemaxBytesbackupCountencodingzCould not create log file at rJ   z$Continuing with console logging only)logging	Formatter	getLoggersetLevelhandlers	propagater   setFormatter
addHandlerrb   rc   getcwdr   abspathjoinr   makedirsdirnamerq   r   intr   RotatingFileHandlerPermissionErrorrK   rL   r?   r   	addFilter)logger_namelevel
add_filterlog_file_pathenable_file_loggingdetailed_formattersimple_formatterlogger_instanceconsole_handlerlog_dirlog_max_byteslog_backup_countfile_handlerrY   path_filters                  r$   setup_loggerr   >  s   " !**> (()EF''4OU#!O %O ()O  !12U#/  ii	299;7GGGOOBGGLLBV,WXM 	BGGOOM2TB &o7LcR( 8#
	C"++??&&, 	 @ L %%&89!!%(&&|4 (*!!+.   	CNN:=/CPQF8TUNNABB	Cs   )AG 	H*'9H%%H*c                      e Zd ZdZd Zd Zy)UnlimitedSemaphorez/A context manager that allows unlimited access.c                   K   y wN r"   s    r$   
__aenter__zUnlimitedSemaphore.__aenter__  	        c                   K   y wr   r   )r"   r;   exctbs       r$   	__aexit__zUnlimitedSemaphore.__aexit__  r   r   N)r)   r*   r+   r,   r   r   r   r/   r$   r   r     s    9r/   r   c                  ^    e Zd ZU dZded<   ded<   dZded<   dZd	ed
<   dZd	ed<   dZd	ed<   y)	TaskStatez1Task state tracking for priority queue managementzasyncio.Futurefuturefloat
start_timeNexecution_start_timeFrd   worker_startedcancellation_requestedcleanup_done)	r)   r*   r+   r,   __annotations__r   r   r   r   r   r/   r$   r   r     s;    ;"&%& ND #(D(L$r/   r   c                  ^    e Zd ZU dZded<   ded<   dZded<   d	Zd
ed<   dZded<   d ZddZ	y)EmbeddingFuncuw  Embedding function wrapper with dimension validation

    This class wraps an embedding function to ensure that the output embeddings have the correct dimension.
    If wrapped multiple times, the inner wrappers will be automatically unwrapped to prevent
    configuration conflicts where inner wrapper settings would override outer wrapper settings.

    Using functools.partial for parameter binding:
        A common pattern is to use functools.partial to pre-bind model and host parameters
        to an embedding function. When the base embedding function is already decorated with
        @wrap_embedding_func_with_attrs (e.g., ollama_embed), use `.func` to access the
        original unwrapped function to avoid double wrapping:

        Example:
            from functools import partial

            # ❌ Wrong - causes double wrapping (inner EmbeddingFunc still executes)
            func=partial(ollama_embed, embed_model="bge-m3:latest", host="http://localhost:11434")

            # ✅ Correct - access the unwrapped function via .func
            func=partial(ollama_embed.func, embed_model="bge-m3:latest", host="http://localhost:11434")

    Args:
        embedding_dim: Expected dimension of the embeddings(For dimension checking and workspace data isolation in vector DB)
        func: The actual embedding function to wrap
        max_token_size: Enable embedding token limit checking for description summarization(Set embedding_token_limit in LightRAG)
        send_dimensions: Whether to inject embedding_dim argument to underlying function
        model_name: Model name for implementing workspace data isolation in vector DB
    r   embedding_dimcallablefuncNz
int | Nonemax_token_sizeFrd   send_dimensions
str | None
model_namec                   d}d}t        | j                  t              rO|dz  }||kD  rt        d| d      | j                  j                  | _        t        | j                  t              rO|dkD  rt        j                  d| d       yy)	ax  Unwrap nested EmbeddingFunc to prevent double wrapping issues.

        When an EmbeddingFunc wraps another EmbeddingFunc, the inner wrapper's
        __call__ preprocessing would override the outer wrapper's settings.
        This method detects and unwraps nested EmbeddingFunc instances to ensure
        that only the outermost wrapper's configuration is applied.
           r   rH   z$EmbeddingFunc unwrap depth exceeded z'. Possible circular reference detected.z/Detected nested EmbeddingFunc wrapping (depth: zu), auto-unwrapped to prevent configuration conflicts. Consider using .func to access the unwrapped function directly.N)rh   r   r   r   rK   rL   )r"   max_unwrap_depthunwrap_counts      r$   __post_init__zEmbeddingFunc.__post_init__  s     M2AL.. :;K:L M< < 
 		DI M2 !NNA, PR R r/   c                  K   | j                   rOd|v r<|d   }|5|| j                  k7  r&t        j                  d| d| j                   d       | j                  |d<   | j                  @d|vr<t        j                  | j                        }d|j                  v r| j                  |d<    | j                  |i | d {   }|j                  }| j                  }||z  dk7  rt        d| d| d	      ||z  }|r>t        |d   t        t        f      r%t        |d         }	||	k7  rt        d
|	 d| d      |S 7 }w)Nr   z%Ignoring user-provided embedding_dim=z, using declared embedding_dim=z from decoratorr   r   z7Embedding dimension mismatch detected: total elements (z2) cannot be evenly divided by expected dimension (z). z Vector count mismatch: expected z vectors but got z! vectors (from embedding result).)r   r   rK   rL   r   inspect	signaturer   
parameterssizer   rh   re   r   r   )
r"   r   r   user_provided_dimsigresulttotal_elementsexpected_dimactual_vectorsexpected_vectorss
             r$   __call__zEmbeddingFunc.__call__  s    &($*?$;! &1)T-?-??NN?@Q?R S88<8J8J7K?\ '+&8&8F?# */?v/M##DII.C3>>1+/+>+>'( !tyy$1&11  )) L(A-##1"2 3''3nC9  (<7JtAwu6"47|!11   011B>BRRsu 
 1 2s   B=D??D= A>D?)r@   z
np.ndarray)
r)   r*   r+   r,   r   r   r   r   r   r   r   r/   r$   r   r     sA    : 
N!%NJ%!OT! 
 82r/   r   c                 $   dj                  | D cg c]  }t        |       c}      }	 t        |j                  d            j	                         S c c}w # t
        $ r/ |j                  dd      }t        |      j	                         cY S w xY w)zCompute a hash for the given arguments with safe Unicode handling.

    Args:
        *args: Arguments to hash
    Returns:
        str: Hash string
     r   replace)errors)r   r?   r   encode	hexdigestUnicodeEncodeError)r   argargs_str
safe_bytess       r$   compute_args_hashr    s~     wwD1SC12H+8??7+,6688 2  +__WY_?
:((**+s   A'A 5BBc                    |t        |       z   S )z
    Compute a unique ID for a given content string.

    The ID is a combination of the given prefix and the MD5 hash of the content string.
    )r  )contentprefixs     r$   compute_mdhash_idr  '  s     %g...r/   c                    |  d| d| S )aC  Generate a flattened cache key in the format {mode}:{cache_type}:{hash}

    Args:
        mode: Cache mode (e.g., 'default', 'local', 'global')
        cache_type: Type of cache (e.g., 'extract', 'query', 'keywords')
        hash_value: Hash value from compute_args_hash

    Returns:
        str: Flattened cache key
    :r   )mode
cache_type
hash_values      r$   generate_cache_keyr  0  s     V1ZL*..r/   c                `    | j                  dd      }t        |      dk(  r|d   |d   |d   fS y)zParse a flattened cache key back into its components

    Args:
        cache_key: Flattened cache key in format {mode}:{cache_type}:{hash}

    Returns:
        tuple[str, str, str] | None: (mode, cache_type, hash) or None if invalid format
    r  r   r   r   rH   N)splitr   )	cache_keypartss     r$   parse_cache_keyr  >  s;     OOC#E
5zQQxq58++r/   c                      e Zd ZdZy)QueueFullErrorz4Raised when the queue is full and the wait times outN)r)   r*   r+   r,   r   r/   r$   r  r  N  s    >r/   r  c                  &     e Zd ZdZdd fdZ xZS )WorkerTimeoutErrorz@Worker-level timeout exception with specific timeout informationc                N    || _         || _        t        |   d| d| d       y )NzWorker z timeout after s)timeout_valuetimeout_typer   r   )r"   r  r  r#   s      r$   r   zWorkerTimeoutError.__init__W  s/    *(7<.aPQr/   )	execution)r  r   r  r?   r)   r*   r+   r,   r   r-   r.   s   @r$   r  r  T  s    JR Rr/   r  c                  $     e Zd ZdZd fdZ xZS )HealthCheckTimeoutErrorz$Health Check-level timeout exceptionc                P    || _         || _        t        |   d| d|dd       y )Nz6Task forcefully terminated due to execution timeout (>zs, actual: .1fzs))r  execution_durationr   r   )r"   r  r%  r#   s      r$   r   z HealthCheckTimeoutError.__init__`  s9    *"4D]OS^_qru^vvxy	
r/   )r  r   r%  r   r   r.   s   @r$   r"  r"  ]  s    .
 
r/   r"  c                *      fd}|S )aS  
    Enhanced priority-limited asynchronous function call decorator with robust timeout handling

    This decorator provides a comprehensive solution for managing concurrent LLM requests with:
    - Multi-layer timeout protection (LLM -> Worker -> Health Check -> User)
    - Task state tracking to prevent race conditions
    - Enhanced health check system with stuck task detection
    - Proper resource cleanup and error recovery

    Args:
        max_size: Maximum number of concurrent calls
        max_queue_size: Maximum queue capacity to prevent memory overflow
        llm_timeout: LLM provider timeout (from global config), used to calculate other timeouts
        max_execution_timeout: Maximum time for worker to execute function (defaults to llm_timeout + 30s)
        max_task_duration: Maximum time before health check intervenes (defaults to llm_timeout + 60s)
        cleanup_timeout: Maximum time to wait for cleanup operations (defaults to 2.0s)
        queue_name: Optional queue name for logging identification (defaults to "limit_async")

    Returns:
        Decorator function
    c                :   	
 t               st        dt                      dz  dz  dz   t        j                        	t               t        j                         dt        j                         dd i t        j                         t        j                         d
 	fdf	d
fd		fd
}t               dd d d	f	d
       }||_        |S )Nz Expected a callable object, got r      )maxsizer   Fc            
     	  K   	 j                         s	 	 t        j                  
j                         d       d{   \  } }}}}4 d{    |vr"
j                          	 ddd      d{    w|   }d|_        t        j                         j                         |_	        ddd      d{    j                  s|j                  j                         r?4 d{    j                  |d       ddd      d{    
j                          	 	&t        j                   |i |	       d{   }n |i | d{   }|j                  j                         s|j                  j                  |       4 d{    j                  |d       ddd      d{    
j                          	 j                         st         j-                   d       y7 # t        j                  $ r Y w xY w7 7 7 d# 1 d{  7  sw Y   uxY w7 L7 -# 1 d{  7  sw Y   >xY w7 7 # t        j                  $ ra t         j#                   d| d	 d       |j                  j                         s%|j                  j%                  t'        	d             Y 9t        j(                  $ rS |j                  j                         s|j                  j+                          t         j-                   d	| d
       Y t.        $ rf}t         j1                   d| dt3        |              |j                  j                         s|j                  j%                  |       Y d}~d}~ww xY w7 7 # 1 d{  7  sw Y   xY w# 4 d{  7   j                  |d       ddd      d{  7   n# 1 d{  7  sw Y   nxY w
j                          w xY w# t.        $ rL}t         j1                   dt3        |              t        j4                  d       d{  7   Y d}~d}~ww xY w# t         j-                   d       w xY ww)zMEnhanced worker that processes tasks with proper timeout and state management      ?timeoutNTz: Worker timeout for task rI   r  r  z: Task z cancelled during executionz': Error in decorated function for task rJ   z: Critical error in worker: 皙?z: Worker exiting)is_setrO   wait_forgetTimeoutError	task_doner   get_event_looptimer   r   r   	cancelledpopdone
set_resultrK   rL   set_exceptionr  CancelledErrorcancelr~   rN   errorr?   rP   )prioritycounttask_idr   r   
task_stater   rY   r   max_execution_timeoutqueue
queue_nameshutdown_eventtask_statestask_states_locks           r$   workerzCpriority_limit_async_func_call.<locals>.final_decro.<locals>.worker  s    X>(//1T1	% '.&6&6uyy{C&P P ( % ' $ & $4 	 	&k9 % 1 (	 	 	 *5W)=J8<J5 !( 6 6 8 = = ? ';	 	 '==)00::<'7 ? ? + >? ?!OO-$*.4@/6/?/?$($$9&$9CX0" *" 04T/DV/D)D $.#4#4#9#9#; * 1 1 < <V D: (8 ? ? + >? ?!OO-] )//1n 
|+;<=[ !Q&33 %$%	 	 	 	 	 	 ? ? ? ? ?*" *E  '33 
""NN#-,.H	QXYnXoop q $.#4#4#9#9#; * 1 1 ? ?$6(={%&!"
  '55 #-#4#4#9#9#; * 1 1 8 8 :"LL#-,ggY>Y Z  ) C"LL#-,.UV]U^^`adefag`h i $.#4#4#9#9#; * 1 1 ? ? BC? ? ? ? ?'7 ? ? + >? ? ? ? ?!OO-$ 1)l*Fs1vhO &mmC0001 
|+;<=s.  RQ. (G>  G;
G> P HP H!+P 6H7P ;Q. <3H!/P :H;0P +H7,P /H=P H:P "Q. %"I II I9I P N P N&/P :N#;P Q. "R;G> >HP Q. HP P P !H4'H*(H4/	P :P =IIIP I I A0NN< A!N,N< /N7ANN< NN<  P #P &N9,N/-N94P <PO
PO-P&O)'P-O?3O64O?;PP 	Q+;Q&QQ& Q. &Q++Q. .RRc            	     J  	K   	 j                         sZt        j                  d       d{    t        j                         j	                         } )g }4 d{    t        j                               D ]R  \  }}|j                  s|j                   | |j                  z
  kD  s3|j                  || |j                  z
  f       T ddd      d{    |D ]  \  }}t        j                   d| d|dd       4 d{    |v rV|   }|j                  j                         s%|j                  j                  t        |             j!                  |d       ddd      d{     t#              }|D ch c]  }|j                         s| }}j%                  |       t'              }|z
  }	|	dkD  rt        j)                   d|	 d	       t#               }
t+        |	      D ]H  }t        j,                                }|
j/                  |       |j1                  j2                         J j5                  |
       j                         sZt        j=                   d       dy7 ]7 .7 # 1 d{  7  sw Y   xY w7 7 3# 1 d{  7  sw Y   xY wc c}w # t6        $ r-}t        j9                   d
t;        |              Y d}~d}~ww xY w# t        j=                   d       dw xY ww)z<Enhanced health check with stuck task detection and recoveryr   Nz: Detected stuck task z (execution time: r$  zs), forcing cleanupr   z: Creating z new workersz": Error in enhanced health check: z$: Enhanced health check task exitingF)r/  rO   rP   r4  r5  re   itemsr   r   appendrK   rL   r   r8  r:  r"  r7  setdifference_updater   inforM   create_taskaddadd_done_callbackdiscardupdaterN   r=  r?   r~   )current_timestuck_tasksr@  rA  r%  current_tasksr`   
done_tasksactive_tasks_countworkers_needed	new_tasksr<   taskrY   initializedmax_sizemax_task_durationrD  rE  rF  rG  tasksrH  s                 r$   enhanced_health_checkzRpriority_limit_async_func_call.<locals>.final_decro.<locals>.enhanced_health_check  s1    A$(//1!--***#*#9#9#;#@#@#BL )4&(#3 & &7;K<M<M<O7P & 3 %/$=$=(2(G(G(S(4z7V7V(V&7)8 %0$6$6,3,8.8.M.M-N)*%&&& &$ <G C7G%7"NN#-,.DWIM_`rsv_w  xK  !L (8 	C 	C#*k#91<W1EJ+5+<+<+A+A+C(2(9(9(G(G,C0ACU-.)*
 %0OOGT$B	C 	C 	C	C  %(JM-:!Gaffh!!GJ!G++J7),U&%-0B%BN%))l+n5E\R %(E	!&~!6 BA#*#6#6vx#@D%MM$/ 225==AB Y/u )//1~ 
|+OPQ# +& & & & &,	C 	C 	C 	C 	C "H"  X
|+McRSfXVWWX 
|+OPQ#s   L#)K J1K  J!K $,JJJ1"JK J1K J*K AJ0/K :J-;K K&K*CK 0L#K K K J'JJ'"	K -K 0K	6J97K	>K 	L#K<7L <LL L  L#c                   K   ry
4 d{    r	 ddd      d{    ydkD  r!dz  t         j                   d d       ndt              } | D ch c]  }|j                         s| }}j	                  |       t              }|dkD  r dkD  rt         j                   d| d       |z
  }t        |      D ]H  }t        j                                }j                  |       |j                  j                         J t        j                   	             dg }|j                  d	 d
       |j                  d d
       |j                  d d
       |rddj                  |       dnd}t         j                   d| d|        ddd      d{    y7 7 c c}w 7 # 1 d{  7  sw Y   yxY ww)z@Ensure worker system is initialized with enhanced error handlingNr   rH   z : Reinitializing system (count: )rJ   z, tasks still running during reinitializationTzFunc: r  zWorker: zHealth Check: z(Timeouts: , r   z new workers initialized )rK   rL   rL  r8  rM  r   rM   rO   rO  rP  rQ  rR  rK  r   rN  )rV  r`   rW  rX  rY  r<   r[  timeout_infotimeout_strr`  initialization_lockr\  llm_timeoutrB  r]  r^  rD  reinit_countr_  rH  worker_health_check_tasks            r$   ensure_workerszKpriority_limit_async_func_call.<locals>.final_decro.<locals>.ensure_workersH  s     * 0 00 0 0  !# A%LNN%,&F|nTUV $%L !$E
)6CA!&&(aC
C''
3%(Z"%)lQ.>NN%,b);(<<hi
 "*,>!>~. :A"..vx8DIIdO**5==9: ,3+>+>?T?V+W("!* ''&Q(?@(4 ''(3H2I(KL$0 ''.9J8K1(MN AMk$))L"9!:!<RT  !l"^$44Mk][]0 0 0 0 D0 0 0 0sw   
G!F?G!GG!GG!7GG3G7D7G.G!9G
:G!G!G
G!GGGG!c                   K   t         j                   d       j                          t              D ]#  } | j	                         r| j                          % 	4 d{    t        j                               D ]:  \  }}|j                  j	                         r!|j                  j                          < j                          ddd      d{    	 t        j                  j                         d       d{    t        
      D ]#  }|j	                         r|j                          % 
rt        j                  
ddi d{    r+j	                         sj                          	  d{    t         j                   d       y7 ?7 # 1 d{  7  sw Y   xY w7 # t        j                  $ r t         j                   d       Y w xY w7 7 l# t        j                  $ r Y w xY ww)	z6Gracefully shut down all workers and cleanup resourcesz&: Shutting down priority queue workersNg      @r,  z4: Timeout waiting for queue to empty during shutdownreturn_exceptionsTz*: Priority queue workers shutdown complete)rK   rN  rL  re   r8  r<  rJ  r   clearrO   r0  r   r2  rL   gatherr;  )r   r@  rA  r[  active_futuresrC  rD  rE  rF  rG  r_  ri  s       r$   shutdownzEpriority_limit_async_func_call.<locals>.final_decro.<locals>.shutdown  s    KK:,&LMN  ~. ${{}MMO$
 ( $ $+/0A0A0C+D 3'GZ%,,113"))0023 !!#	$ $&&uzz|SAAA U "yy{KKM"
 nneDtDDD (0H0M0M0O(//1222 KK:,&PQR?$ $ $ $ $ B'' !l"VW E 3-- s   AHH#F"$H':F'",F'HF%H(F> F<F> H+,HG/&H?G3 G1G3 	H%H'F9-F0.F95H<F> >+G,)H+G,,H1G3 3H	HH		H
   )	_priority_timeout_queue_timeoutc           	       	K            d{    t        t        j                                dt        j                         j	                          }t        j
                         }t        |t        j                         j	                               }	 4 d{    ||<   ddd      d{    j                  |       4 d{    }dz  ddd      d{    	 |4t        j                  j                  | |||f      |       d{    nj                  | |||f       d{    	 |\t        j                  ||       d{   j                  |       4 d{    j!                  |d       ddd      d{    S | d{   j                  |       4 d{    j!                  |d       ddd      d{    S 7 7 L7 :# 1 d{  7  sw Y   KxY w7 77 ## 1 d{  7  sw Y   4xY w7 7 # t        j                  $ r t         d| d      t        $ r'}	|j                         s|j                  |	        d}	~	ww xY w7 !7 	7 # 1 d{  7  sw Y   S xY w7 7 7 # 1 d{  7  sw Y   S xY w# t        j                  $ r 4 d{  7   |v r
d|   _        ddd      d{  7   n# 1 d{  7  sw Y   nxY w|j                         s|j%                          t        j                         j	                         }
|v rst        j                         j	                         |
z
  k  rKt        j&                  d	       d{  7   |v r)t        j                         j	                         |
z
  k  rKt         d
| d      t(        $ r}	t         dt+        |	             d}	~	wt,        $ r}	t         dt+        |	             d}	~	ww xY w# j                  |       4 d{  7   j!                  |d       ddd      d{  7   w # 1 d{  7  sw Y   w xY wxY ww)ag  
            Execute function with enhanced priority-based concurrency control and timeout handling

            Args:
                *args: Positional arguments passed to the function
                _priority: Call priority (lower values have higher priority)
                _timeout: Maximum time to wait for completion (in seconds, none means determinded by max_execution_timeout of the queue)
                _queue_timeout: Maximum time to wait for entering the queue (in seconds)
                **kwargs: Keyword arguments passed to the function

            Returns:
                The result of the function call

            Raises:
                TimeoutError: If the function call times out at any level
                QueueFullError: If the queue is full and waiting times out
                Any exception raised by the decorated function
            Nr<   )r   r   rH   r,  z: Queue full, timeout after z secondsTr.  z: User timeout after rJ   )idrO   current_taskr4  r5  Futurer   rP  r0  putr2  r  rN   r8  r:  rR  r7  r   r<  rP   r  r?   r"  )rr  rs  rt  r   r   r@  r   rA  current_countrY   cleanup_startro  cleanup_timeoutcounterrj  rf  rC  rD  rF  rG  s              r$   	wait_funczFpriority_limit_async_func_call.<locals>.final_decro.<locals>.wait_func  sk    , !""" G00234Ag6L6L6N6S6S6U5VWG^^%F #'*@*@*B*G*G*IJL3+ 6 6+5K(6 6 ""6* / ! !$+MqLG! !
%1%..!II!*M7D& Q %3	   $ii&wfM  !B+%,%5%5fh%GGF &&v.+ 3 3OOGT23 3 3E &,|B &&v.+ 3 3OOGT23 3 3m #6 6 6 6 6! ! ! ! ! ++ (%,&B>BRRZ[  ! !;;=,,Q/	  HH3 3 3 3 3E  ,D3 3 3 3 3C ++   0 O O"k1JNK0GO O O O O
 ";;= %,$:$:$<$A$A$CM;.#22499;mK)* &mmC000	  ;.#22499;mK)*
 '%,&;H:XN  * B&*RAx'@AA. B&*RAx'@AAB &&v.+ 3 3OOGT23 3 3 3 3si  Q;G&BQ;P! G)P! !G/'P! 2G,3P! HP! HP! %H&P! +0H& H!H& :H$;H&  J4 I<J4 Q;6I?7Q;:JQ;JQ;J4 #J$J4 'Q;>J?Q;JQ; J!Q;)P! ,P! /H5G86H=	P! P! HHHP! !H& $H& &,I9"I44I99P! <J4 ?Q;Q;J	
JJ	Q;J4 Q;Q;J1	%J(&J1	-Q;4PKPK4"P-K0.P4L	:K=;L	BPN1PPO44P PPP! !Q89P<
:Q8>Q#Q8QQ8#Q5)Q,*Q51Q88Q;)r   rj   typerO   PriorityQueuerL  LockEventweakrefWeakSetr   rp  )r   rp  r~  ro  r}  r`  rj  rf  r\  rC  rh  rE  rF  rG  r_  rH  ri  r|  rg  rB  max_queue_sizer]  r^  rD  s   `  @@@@@@@@@@@@@@r$   final_decroz3priority_limit_async_func_call.<locals>.final_decro  s#   ~>tDzlKLL "$,!O & !(!Ob( " %%n=%lln #'  "<<> *Z	> Z	>xD	$ D	$L7	 7	r+	S +	SZ 
t$tl	3 l	3 
l	3^ &	r/   r   )r]  rg  rB  r^  r  r|  rD  r  s   ``````` r$   priority_limit_async_func_callr  h  s    >Y Yv r/   c                      d fd}|S )uc  Decorator to add embedding dimension and token limit attributes to embedding functions.

    This decorator wraps an async embedding function and returns an EmbeddingFunc instance
    that automatically handles dimension parameter injection and attribute management.

    WARNING: DO NOT apply this decorator to wrapper functions that call other
    decorated embedding functions. This will cause double decoration and parameter
    injection conflicts.

    Correct usage patterns:

    1. Direct decoration:
        ```python
        @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192, model_name="my_embedding_model")
        async def my_embed(texts, embedding_dim=None):
            # Direct implementation
            return embeddings
        ```
    2. Double decoration:
        ```python
        @wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192, model_name="my_embedding_model")
        @retry(...)
        async def my_embed(texts, ...):
            # Base implementation
            pass

        @wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=4096, model_name="another_embedding_model")
        # Note: No @retry here!
        async def my_new_embed(texts, ...):
            # CRITICAL: Call .func to access unwrapped function
            return await my_embed.func(texts, ...)  # ✅ Correct
            # return await my_embed(texts, ...)     # ❌ Wrong - double decoration!
        ```

    The decorated function becomes an EmbeddingFunc instance with:
    - embedding_dim: The embedding dimension
    - max_token_size: Maximum token limit (optional)
    - model_name: Model name (optional)
    - func: The original unwrapped function (access via .func)
    - __call__: Wrapper that injects embedding_dim parameter

    Args:
        embedding_dim: The dimension of embedding vectors
        max_token_size: Maximum number of tokens (optional)
        send_dimensions: Whether to pass embedding_dim as a keyword argument (for models with configurable embedding dimensions).

    Returns:
        A decorator that wraps the function as an EmbeddingFunc instance
    c                &    t        di d| i}|S )Nr   r   )r   )r   new_funcr   s     r$   r  z3wrap_embedding_func_with_attrs.<locals>.final_decroX  s     5655r/   )r@   r   r   )r   r  s   ` r$   wrap_embedding_func_with_attrsr  %  s    f r/   c                    t         j                  j                  |       sy t        | d      5 }t	        j
                  |      cd d d        S # 1 sw Y   y xY w)Nz	utf-8-sigr   )rb   r   existsopenrf   load)	file_namefs     r$   	load_jsonr  _  sA    77>>)$	i+	. !yy|  s   AAc                d    | s| S t         j                  |       s| S t         j                  d|       S )a  Remove characters that cannot be encoded in UTF-8 for JSON serialization.

    Uses regex for optimal performance with zero-copy optimization for clean strings.
    Fast detection path for clean strings (99% of cases) with efficient removal for dirty strings.

    Args:
        text: String to sanitize

    Returns:
        Original string if clean (zero-copy), sanitized string if dirty
    r   )_SURROGATE_PATTERNsearchr   texts    r$   _sanitize_string_for_jsonr  f  s6      $$T* !!"d++r/   c                  4     e Zd ZdZ fdZd fd	Zd Z xZS )SanitizingJSONEncoderz
    Custom JSON encoder that sanitizes data during serialization.

    This encoder cleans strings during the encoding process without creating
    a full copy of the data structure, making it memory-efficient for large datasets.
    c                    t        |t              r(t        j                  j	                  t        |            S t        |   |      S )z4Override encode method to handle simple string cases)rh   r?   rf   encoderencode_basestringr  r   r  )r"   or#   s     r$   r  zSanitizingJSONEncoder.encode  s7    a<<112KA2NOOw~a  r/   c              #  b   K   | j                  |      }t        | 	  ||      D ]  }|  yw)z
        Override iterencode to sanitize strings during serialization.
        This is the core method that handles complex nested structures.
        N)_sanitize_for_encodingr   
iterencode)r"   r  	_one_shot	sanitizedchunkr#   s        r$   r  z SanitizingJSONEncoder.iterencode  s;      //2	 W'	9= 	EK	s   ,/c                   t        |t              rt        |      S t        |t              rOi }|j	                         D ]8  \  }}t        |t              rt        |      n|}| j                  |      }|||<   : |S t        |t        t        f      rA|D cg c]  }| j                  |       }}t        |t              r t        |      |      S |S |S c c}w )z
        Recursively sanitize strings in an object.
        Creates new objects only when necessary to avoid deep copies.

        Args:
            obj: Object to sanitize

        Returns:
            Sanitized object with cleaned strings
        )	rh   r?   r  dictrJ  r  re   r   r  )	r"   objnew_dictkvclean_kclean_vitemcleaneds	            r$   r  z,SanitizingJSONEncoder._sanitize_for_encoding  s     c3,S11T"H		 ,1:DQ:L3A6RS55a8$+!, OdE]+EHITt2248IGI)3C)?949W%LWL J Js   CF)r)   r*   r+   r,   r  r  r  r-   r.   s   @r$   r  r  }  s    !
r/   r  c                   	 t        |dd      5 }t        j                  | |dd       ddd       y# 1 sw Y   yxY w# t        t        f$ r"}t
        j                  d|        Y d}~nd}~ww xY wt        |dd      5 }t        j                  | |ddt        	       ddd       n# 1 sw Y   nxY wt
        j                  d
|        y)a  
    Write JSON data to file with optimized sanitization strategy.

    This function uses a two-stage approach:
    1. Fast path: Try direct serialization (works for clean data ~99% of time)
    2. Slow path: Use custom encoder that sanitizes during serialization

    The custom encoder approach avoids creating a deep copy of the data,
    making it memory-efficient. When sanitization occurs, the caller should
    reload the cleaned data from the file to update shared memory.

    Args:
        json_obj: Object to serialize (may be a shallow copy from shared memory)
        file_name: Output file path

    Returns:
        bool: True if sanitization was applied (caller should reload data),
              False if direct write succeeded (no reload needed)
    wr   r  r   F)indentensure_asciiNz4Direct JSON write failed, using sanitizing encoder: )r  r  clsz(JSON sanitization applied during write: T)	r  rf   dumpr  UnicodeDecodeErrorrK   r~   r  rN  )json_objr  r  rY   s       r$   
write_jsonr    s    (Q)S73 	AqIIh!%@	A	A 23 QKA3OPPQ 
iw	/ X1		(AaeAVWX X X KK:9+FGs5   ? 3? <? ? A0A++A0B))B2c                       e Zd ZdZddZddZy)TokenizerInterfacezU
    Defines the interface for a tokenizer, requiring encode and decode methods.
    c                     y)z'Encodes a string into a list of tokens.Nr   r"   r	  s     r$   r  zTokenizerInterface.encode      r/   c                     y)z'Decodes a list of tokens into a string.Nr   r"   tokenss     r$   decodezTokenizerInterface.decode  r  r/   Nr	  r?   r@   	List[int]r  r  r@   r?   )r)   r*   r+   r,   r  r  r   r/   r$   r  r    s    r/   r  c                  (    e Zd ZdZddZddZddZy)		Tokenizerzc
    A wrapper around a tokenizer to provide a consistent interface for encoding and decoding.
    c                     || _         || _        y)a  
        Initializes the Tokenizer with a tokenizer model name and a tokenizer instance.

        Args:
            model_name: The associated model name for the tokenizer.
            tokenizer: An instance of a class implementing the TokenizerInterface.
        Nr   	tokenizer)r"   r   r  s      r$   r   zTokenizer.__init__  s      *-6r/   c                8    | j                   j                  |      S )z
        Encodes a string into a list of tokens using the underlying tokenizer.

        Args:
            content: The string to encode.

        Returns:
            A list of integer tokens.
        )r  r  r  s     r$   r  zTokenizer.encode  s     ~~$$W--r/   c                8    | j                   j                  |      S )z
        Decodes a list of tokens into a string using the underlying tokenizer.

        Args:
            tokens: A list of integer tokens to decode.

        Returns:
            The decoded string.
        )r  r  r  s     r$   r  zTokenizer.decode  s     ~~$$V,,r/   N)r   r?   r  r  r  r  )r)   r*   r+   r,   r   r  r  r   r/   r$   r  r    s    	7
.
-r/   r  c                  &     e Zd ZdZdd fdZ xZS )TiktokenTokenizerz@
    A Tokenizer implementation using the tiktoken library.
    c                    	 ddl }	 |j                  |      }t        |   ||       y# t        $ r t        d      w xY w# t
        $ r t        d| d      w xY w)aC  
        Initializes the TiktokenTokenizer with a specified model name.

        Args:
            model_name: The model name for the tiktoken tokenizer to use.  Defaults to "gpt-4o-mini".

        Raises:
            ImportError: If tiktoken is not installed.
            ValueError: If the model_name is invalid.
        r   Nzktiktoken is not installed. Please install it with `pip install tiktoken` or define custom `tokenizer_func`.r  zInvalid model_name: .)tiktokenrC   encoding_for_modelr   r   KeyErrorr   )r"   r   r  r  r#   s       r$   r   zTiktokenTokenizer.__init__  s{    		C 33J?IG
iH  	} 	  	C3J<qABB	Cs   + "A A A)zgpt-4o-mini)r   r?   r   r.   s   @r$   r  r    s    C Cr/   r  c                 d    ddg}t        |       D cg c]  \  }}||dz     |d c}}S c c}}w )Nuser	assistantr   )roler	  )	enumerate)r   rolesir	  s       r$    pack_user_ass_to_openai_messagesr  2  s>    [!EENt_7Aq'q1u'2  s   ,c                    |s| gS | | nd} t        j                  dj                  d |D              |       }|D cg c]#  }|j                         s|j                         % c}S c c}w )z"Split a string by multiple markersr   |c              3  F   K   | ]  }t        j                  |        y wr   )r   escape).0markers     r$   	<genexpr>z0split_string_by_multi_markers.<locals>.<genexpr>>  s     Hf		& 1Hs   !)r   r  r   strip)r	  markersresultsrs       r$   split_string_by_multi_markersr  9  sZ    y ,g"GhhsxxHHH'RG&4!!'')AGGI444s   A'A'c                @    t        t        j                  d|             S )Nz^[-+]?[0-9]*\.?[0-9]+$)rd   r   match)ro   s    r$   is_float_regexr  B  s    2E:;;r/   c           	         |dk  rg S d}t        |       D ]4  \  }}|t        |j                   ||                  z  }||kD  s/| d| c S  | S )z%Truncate a list of data by token sizer   N)r  r   r  )	list_datakeyr   r  r  r  datas          r$   truncate_list_by_token_sizer  F  sh     	FY' !4#i&&s4y122N"Ra= ! r/   c                    t        j                  | |      }t         j                  j                  |       }t         j                  j                  |      }|||z  z  S )z/Calculate cosine similarity between two vectors)npdotlinalgnorm)v1v2dot_productnorm1norm2s        r$   cosine_similarityr  W  sD    &&R.KIINN2EIINN2E%%-((r/   c                  K   | y|dk7  r| j                   j                  d      sy| j                   j                  d      syt        |||      }| j                  |       d{   }|r4t        j                  d| d       |d   }|j                  dd	      }||fS t        j                  d
| d| d       y7 Ww)zGeneric cache handling function with flattened cache keys

    Returns:
        tuple[str, int] | None: (content, create_time) if cache hit, None if cache miss
    Nrl   enable_llm_cache#enable_llm_cache_for_entity_extractzFlattened cache hit(key:rb  r@   create_timer   zCache missed(mode:z type:)global_configr1  r  	get_by_idrK   r~   )	
hashing_kv	args_hashpromptr  r  flattened_keycache_entryr	  	timestamps	            r$   handle_cacher  _  s      y''++,>?''++,QR 'tZCM",,];;K/a@Ah'OOM15		!!
LL%dV6*Q?@ <s   A"B>$B<%AB>c                  d    e Zd ZU ded<   ded<   ded<   dZded<   dZded<   d	Zd
ed<   d	Zded<   y	)	CacheDatar?   r  r	  r  rl   r  queryr  Nr   chunk_idzdict | None
queryparam)r)   r*   r+   r   r  r  r  r  r   r/   r$   r	  r	    s9    NLKD#JHj"J"r/   r	  c                  K   | |j                   syt        |j                   d      rt        j                  d       yt	        |j
                  |j                  |j                        }| j                  |       d{   }|r:|j                  d      }||j                   k(  rt        j                  d| d       y|j                   |j                  |j                  |j                  nd|j                  |j                  |j                  ndd}t        j                  d|        | j                  ||i       d{    y7 7 w)	zSave data to cache using flattened key structure.

    Args:
        hashing_kv: The key-value storage for caching
        cache_data: The cache data to save
    N	__aiter__z+Streaming response detected, skipping cacher@   zCache duplication detected for z, skipping update)r@   r  r  original_promptr  z == LLM cache == saving: )r	  r   rK   r~   r  r  r  r  r   r1  rL   r  r  r  rN  upsert)r  
cache_datar  existing_cacheexisting_contentr  s         r$   save_to_cacher    sD     !3!3 z!!;/BC '..
0D0DM
 &//>>N)--h7z111NN1-@QR  $$ +++5+>+>+JJ''PT%,,  , !++K KK+M?;< 

]K8
999/ ?. :s%   A;E=E>CEE
E
Ec                x    t        j                  d      }d }|j                  || j                  d            }|S )Nz\\u([0-9a-fA-F]{4})c                J    t        t        | j                  d      d            S )NrH      )chrr   group)r  s    r$   replace_unicode_escapez3safe_unicode_decode.<locals>.replace_unicode_escape  s    3u{{1~r*++r/   r   )r   compiler   r  )r	  unicode_escape_patternr  decoded_contents       r$   safe_unicode_decoder    s?    ZZ(>?,
 -00w 7O r/   c                2    t        t        | |d            ryy)ztCheck if a function exists in an object or not.
    :param obj:
    :param func_name:
    :return: True / False
    NTF)r   rD   )r  	func_names     r$   exists_funcr!    s     Y-.r/   c                    	 t        j                         } | j                         rt        d      | S # t        $ rC t        j                  d       t        j                         }t        j                  |       |cY S w xY w)aN  
    Ensure that there is always an event loop available.

    This function tries to get the current event loop. If the current event loop is closed or does not exist,
    it creates a new event loop and sets it as the current event loop.

    Returns:
        asyncio.AbstractEventLoop: The current or newly created event loop.
    zEvent loop is closed.z)Creating a new event loop in main thread.)rO   r4  	is_closedRuntimeErrorrK   rN  new_event_loopset_event_loop)current_loopnew_loops     r$   always_get_an_event_loopr)    sp    --/!!#677 ?@))+x(s   03 A	A?>A?c                  %(K   g }g }g }| j                          d{   }	|	D ]  }
| j                  |
       d{   }|r|j                  d      nd}||d}|r+t        |
d      }|j	                  |       d{   }||d<   |
|t        |d         d}|rd|v rt        |d         |d<   |j                  |        |	D ]  }|	D ]  }||k(  r	| j                  ||       d{   }|s&| j                  ||       d{   }|r|j                  d      nd}||d}|r.t        ||z   d	      }|j	                  |       d{   }||d<   |||d   t        |d         d
}|rd|v rt        |d         |d<   |j                  |         |j                   d{   }|d   D ]"  }|j                  |d   t        |      d       $ |dk(  rPt        |ddd      5 }|rk|j                  d       t        j                  ||d   j                               }|j                          |j!                  |       |j                  d       |rk|j                  d       t        j                  ||d   j                               }|j                          |j!                  |       |j                  d       |rZ|j                  d       t        j                  ||d   j                               }|j                          |j!                  |       ddd       ne|dk(  rddl}|r|j%                  |      n|j%                         }|r|j%                  |      n|j%                         }|r|j%                  |      n|j%                         }|j'                  |d      5 }|j(                  s|j+                  |dd       |j(                  s|j+                  |dd       |j(                  s|j+                  |d d       ddd       nu|d!k(  rt        |dd"      5 } | j                  d#       | j                  d$       |r| j                  d%d&j-                  |d   j                               z   d'z          | j                  d%d&j-                  d(gt/        |d   j                               z        z   d'z          |D ]=  }!| j                  d%d&j-                  d) |!j1                         D              z   d'z          ? | j                  d       n| j                  d*       | j                  d+       |r| j                  d%d&j-                  |d   j                               z   d'z          | j                  d%d&j-                  d(gt/        |d   j                               z        z   d'z          |D ]=  }"| j                  d%d&j-                  d, |"j1                         D              z   d'z          ? | j                  d       n| j                  d-       | j                  d.       |r| j                  d%d&j-                  |d   j                               z   d'z          | j                  d%d&j-                  d(gt/        |d   j                               z        z   d'z          |D ]=  }#| j                  d%d&j-                  d/ |#j1                         D              z   d'z          ? n| j                  d0       ddd       n}|d1k(  rht        |dd"      5 }$|$j                  d2       |$j                  d3       |$j                  d4       |$j                  d5       |r|d   D %%ci c]*  %%t3        t/        %      t3        %fd6|D                    , c}%(d7j-                  (fd8|d   D              }&|$j                  |&d9z          |$j                  d:t/        |&      z  d9z          |D ]>  }!d7j-                  (fd;|!j5                         D              }'|$j                  |'d9z          @ |$j                  d       n|$j                  d<       |$j                  d=       |$j                  d5       |r|d   D %%ci c]*  %%t3        t/        %      t3        %fd>|D                    , c}%(d7j-                  (fd?|d   D              }&|$j                  |&d9z          |$j                  d:t/        |&      z  d9z          |D ]>  }"d7j-                  (fd@|"j5                         D              }'|$j                  |'d9z          @ |$j                  d       n|$j                  dA       |$j                  dB       |$j                  d5       |r|d   D %%ci c]*  %%t3        t/        %      t3        %fdC|D                    , c}%(d7j-                  (fdD|d   D              }&|$j                  |&d9z          |$j                  d:t/        |&      z  d9z          |D ]>  }#d7j-                  (fdE|#j5                         D              }'|$j                  |'d9z          @ n|$j                  dF       ddd       nt7        dG| dH      |t9        dI| dJ|        yt9        dK       y7 
7 
w7 
87 	7 	7 	t7 	# 1 sw Y   >xY w# 1 sw Y   JxY w# 1 sw Y   VxY wc c}%w c c}%w c c}%w # 1 sw Y   qxY ww)La  
    Asynchronously exports all entities, relations, and relationships to various formats.

    Args:
        chunk_entity_relation_graph: Graph storage instance for entities and relations
        entities_vdb: Vector database storage for entities
        relationships_vdb: Vector database storage for relationships
        output_path: The path to the output file (including extension).
        file_format: Output format - "csv", "excel", "md", "txt".
            - csv: Comma-separated values file
            - excel: Microsoft Excel file with multiple sheets
            - md: Markdown tables
            - txt: Plain text formatted output
        include_vector_data: Whether to include data from the vector database.
    N	source_id)
graph_datar+  zent-)r
  vector_datar,  )rS   r+  r,  zrel-)
src_entity
tgt_entityr+  r,  r  __id__)relationship_idr  csvr  r   r   )newliner   z# ENTITIES
r   )
fieldnamesz

z# RELATIONS
z# RELATIONSHIPS
excel
xlsxwriter)engineEntitiesF)
sheet_nameindex	RelationsRelationshipsmdr  z# LightRAG Data Export

z## Entities

z| z | z |
z---c              3  2   K   | ]  }t        |        y wr   r?   r  r  s     r$   r  zaexport_data.<locals>.<genexpr>  s     )JQ#a&)J   z*No entity data available*

z## Relations

c              3  2   K   | ]  }t        |        y wr   r?  r@  s     r$   r  zaexport_data.<locals>.<genexpr>  s     )LQ#a&)LrA  z*No relation data available*

z## Relationships

c              3  2   K   | ]  }t        |        y wr   r?  r@  s     r$   r  zaexport_data.<locals>.<genexpr>  s     $KSV$KrA  z"*No relationship data available*

txtzLIGHTRAG DATA EXPORT
zR================================================================================

z	ENTITIES
zQ--------------------------------------------------------------------------------
c              3  L   K   | ]  }t        t        |                 y wr   r   r?   )r  rY   r  s     r$   r  zaexport_data.<locals>.<genexpr>  s     &M!s3qt9~&M   !$z  c              3  F   K   | ]  }|j                  |           y wr   ljustr  r  
col_widthss     r$   r  zaexport_data.<locals>.<genexpr>  s     "Ta177:a=#9"T   !r|   -c              3  ^   K   | ]$  \  }}t        |      j                  |          & y wr   r?   rJ  r  r  r  rL  s      r$   r  zaexport_data.<locals>.<genexpr>  +      $8<1AZ]3$   *-zNo entity data available

z
RELATIONS
c              3  L   K   | ]  }t        t        |                 y wr   rF  r  r  r  s     r$   r  zaexport_data.<locals>.<genexpr>  s     &N!s3qt9~&NrG  c              3  F   K   | ]  }|j                  |           y wr   rI  rK  s     r$   r  zaexport_data.<locals>.<genexpr>  s     "Ua177:a=#9"UrM  c              3  ^   K   | ]$  \  }}t        |      j                  |          & y wr   rP  rQ  s      r$   r  zaexport_data.<locals>.<genexpr>  rR  rS  zNo relation data available

zRELATIONSHIPS
c              3  L   K   | ]  }t        t        |                 y wr   rF  rU  s     r$   r  zaexport_data.<locals>.<genexpr>  s     &R!s3qt9~&RrG  c              3  F   K   | ]  }|j                  |           y wr   rI  rK  s     r$   r  zaexport_data.<locals>.<genexpr>	  s#      #/0AGGJqM*#rM  c              3  ^   K   | ]$  \  }}t        |      j                  |          & y wr   rP  rQ  s      r$   r  zaexport_data.<locals>.<genexpr>  rR  rS  z No relationship data available

zUnsupported file format: z". Choose from: csv, excel, md, txtzData exported to: z with format: zData displayed as table format)get_all_labelsget_noder1  r  r   r?   rK  has_edgeget_edgeclient_storager  writer2  
DictWriterkeyswriteheader	writerowspandas	DataFrameExcelWriteremptyto_excelr   r   valuesmaxrJ  r   print))chunk_entity_relation_graphentities_vdbrelationships_vdboutput_pathfile_formatinclude_vector_dataentities_datarelations_datarelationships_dataall_entitiesrS   	node_datar+  entity_info	entity_idr-  
entity_rowr.  r/  edge_exists	edge_datarelation_inforel_idrelation_rowall_relationshipsrelcsvfilewriterpdentities_dfrelations_dfrelationships_dfmdfileentityrelationrelationshiptxtfiler  headerrowrL  s)                                        `  @r$   aexport_datar    s
    0 MN 5CCEEL# )5>>{KK	2;IMM+.	 $"
 )+fEI , 6 6y AAK)4K& '"L)

 =K#?(+K,F(GJ}%Z(3)8 # "4
& !	4JZ' ; D DJ! K "="F"F
# 	 ;DIMM+6	 #,!*! '.zJ/FvVF(9(C(CF(K"KK3>M-0 #-",!.{!;"%mL&A"B	  '=M+I25mM6R2SL/%%l3C!	4"4J 0>>> ( 
!!#&x=C	

 e+sBA 	5Wn-M!<L<Q<Q<ST""$  /f% o.N1<M<R<R<TU""$  0f% "12(:1(=(B(B(D ""$  !341	5 	54 
	5Bbll=1,:BLL( 	 1CBLL+, 	 ^^K^= 	$$$$V
%$P%%%%fE%R#)) ))e * 	 	 
	+sW5 :	ELL56 LL*+TEJJ}Q/?/D/D/F$GG&PQ5::ugM!4D4I4I4K0L&LMMPVV
 , FLLuzz)J&--/)JJJVS V$=> LL+,TEJJ~a/@/E/E/G$HH6QR5::ugN14E4J4J4L0M&MNNQWW
 !/ HLLuzz)L(//:K)LLLvU V$?@ LL/0!TEJJ/A!/D/I/I/K$LLvUVjj%3/A!/D/I/I/K+L!LMN %7 LLL**$K\5H5H5J$KKL ! CDu:	E :	Ex 
	+sW5 H	DMM23MM+, MM,'MM/* +1- s3q63&M}&M#MNN
 "T=QRCS"TTftm,cCK/$67 , .F)) $@F$ C MM#*-	.
 f%<= MM-(MM/* ,A. s3q63&N~&N#NOO
 "U>RSCT"UUftm,cCK/$67 !/ .H)) $@H@P$ C MM#*-	.
 f%>? MM+,MM/*! 02 s3q63&R?Q&R#RSS
  #4Fq4I#  ftm,cCK/$67 %7 .L)) $@L@R@R@T$ C MM#*-	. BCQH	D H	DV '}4VW
 	
 ";-~k]KL./_ F L B(
 #L ?	5 	5L	 	:	E :	EN..oH	D H	Ds  lj0lj3Al>j6?A*l)j9*l1lj<Alj?Al&k'Al*D7k!Bl0A!kl/Kk	l'Ak86/k)%C&k8/k.:C&k8 /k3B)k889l3l6l9l<l?llk
lklk&"l)k88l=lc           
         	 t        j                         }|j                  t        | |||||             y# t        $ r, t        j                         }t        j                  |       Y Tw xY w)a  
    Synchronously exports all entities, relations, and relationships to various formats.

    Args:
        chunk_entity_relation_graph: Graph storage instance for entities and relations
        entities_vdb: Vector database storage for entities
        relationships_vdb: Vector database storage for relationships
        output_path: The path to the output file (including extension).
        file_format: Output format - "csv", "excel", "md", "txt".
            - csv: Comma-separated values file
            - excel: Microsoft Excel file with multiple sheets
            - md: Markdown tables
            - txt: Plain text formatted output
        include_vector_data: Whether to include data from the vector database.
    N)rO   r4  r$  r%  r&  run_until_completer  )rm  rn  ro  rp  rq  rr  loops          r$   export_datar  "  sl    .%%%'
 	'	
		  %%%'t$%s   6 2A+*A+c                     ddl } |j                         j                  } |j                  |      }|r|j                  ndd fd}|S )zQLazily import a class from an external module based on the package of the caller.r   Nc                 Z    dd l }|j                        }t        |      } || i |S )Nr   )package)	importlibimport_modulerD   )r   r   r  moduler  
class_namemodule_namer  s        r$   import_classz*lazy_external_import.<locals>.import_classT  s7    ((g(Ffj)D#F##r/   )r   r   r   r   )r   currentframef_back	getmodule__package__)r  r  r   caller_framer  r  r  s   ``    @r$   lazy_external_importr  K  sL     '7'')00LW|,F$*f  G$ r/   c           	       K   |sy	 |j                  |        d{   }|rd|vrg |d<   t        |d         }|D cg c]	  }||vs| }}|rX|d   j                  |       |j                  | |i       d{    t        j                  d|  dt        |       d| d       yyy7 c c}w 7 6# t        $ r(}t        j                  d|  d| d	|        Y d}~yd}~ww xY ww)
a$  Update chunk's llm_cache_list with the given cache keys

    Args:
        chunk_id: Chunk identifier
        text_chunks_storage: Text chunks storage instance
        cache_keys: List of cache keys to add to the list
        cache_scenario: Description of the cache scenario for logging
    Nllm_cache_listzUpdated chunk z with z cache keys (rb  zFailed to update chunk z with cache references on rJ   )	r   rL  extendr  rK   r~   r   rN   rL   )	r  text_chunks_storage
cache_keyscache_scenario
chunk_dataexisting_keysr  new_keysrY   s	            r$   update_chunk_cache_listr  ^  s     
.88BB
z1/1
+,  
+; <=M'1NS5MNHN+,33H= *00(J1GHHH$XJfS]O=Q_P``ab   C O I  
%hZ/I.IYY[\][^_	
 	

sa   C&B2 B)!B2 	B+B+.B2 9B0:,B2 &C&)B2 +B2 2	C#;CC&C##C&c                l    t        j                  dd| t         j                        j                         S )zcRemove <think>...</think> tags from the text
    Remove  orphon ...</think> tags from the text alsoz ^(<think>.*?</think>|.*</think>)r   flags)r   r   DOTALLr  r  s    r$   remove_think_tagsr    s)     66+RRYYegr/   c	           
       K   t        |       }	|rt        |      nd}
d}|rcg }t        |      D ];  \  }}|j                         }d|v rt        |d         |d<   |j                  |       = t	        j
                  |d      }nd}|rsg }|	r|j                  |	       |
r|j                  |
       |r|j                  |       dj                  |      }t        |      }t        d||      }t        |||d|       d{   }|rE|\  }}t        j                  d|        t        d	xx   d
z  cc<   ||j                  |       ||fS t        dxx   d
z  cc<   i }|r||d<   |||d<    ||	fd|
i| d{   }t        |      }t        t        j                               }|j                   j#                  d      r5t%        |t'        |||||             d{    ||j                  |       ||fS i }|r||d<   |||d<   	  ||	fd|
i| d{   }t        t        j                               }t        |      |fS 7 J7 7 n7 5# t(        $ r%}dt+        |       } t-        |      |      |d}~ww xY ww)a	  Call LLM function with cache support and text sanitization

    If cache is available and enabled (determined by handle_cache based on mode),
    retrieve result from cache; otherwise call LLM function and save result to cache.

    This function applies text sanitization to prevent UTF-8 encoding errors for all LLM providers.

    Args:
        input_text: Input text to send to LLM
        use_llm_func: LLM function with higher priority
        llm_response_cache: Cache storage instance
        max_tokens: Maximum tokens for generation
        history_messages: History messages list
        cache_type: Type of cache
        chunk_id: Chunk identifier to store in cache
        text_chunks_storage: Text chunks storage to update llm_cache_list
        cache_keys_collector: Optional list to collect cache keys for batch processing

    Returns:
        tuple[str, int]: (LLM response text, timestamp)
            - For cache hits: (content, cache_create_time)
            - For cache misses: (content, current_timestamp)
    Nr	  Fr  r|   rl   )r  zFound cache for r   rH   r   history_messages
max_tokenssystem_promptr  )r  r	  r  r  r  z[LLM func] )sanitize_text_for_encodingr  copyrK  rf   dumpsr   r  r  r  rK   r~   statistic_datar  r   r5  r  r1  r  r	  rN   r?   r  )user_promptuse_llm_funcllm_response_cacher  r  r  r  r  cache_keys_collectorsafe_user_promptsafe_system_promptsafe_history_messagesr  r   safe_msghistoryprompt_parts_promptarg_hashr  cached_resultr	  r  r   rescurrent_timestamprY   rZ   s                               r$   use_llm_func_with_cacher    s    F 2+>5B"=1 
 ! " 01 	3FAsxxzHH$&@)AT&U#!((2		3
 **2G 01 23())L)$W-&y*hG	*!
 
 !.GYLL+H:67;'1,' $/$++I6I%%z"a'"  )>F%&!#-F< %
,>
BH
 
  $  		,++//0UV"&")%	 	 	 $/$++I6%%% F%:!")|( 
,>
BH
 
 DIIK(S!#444Q
4
	2
  (!#a&*	d1gi a'	(sn   C>J I
A:J;I<A$J I!+JI II  +JJJI 	J I<<JJc                R    | j                         } t        |       |k  r| S | d| dz   S )zGet summary of document content

    Args:
        content: Original document content
        max_length: Maximum length of summary

    Returns:
        Truncated content with ellipsis if needed
    Nr{   )r  r   )r	  
max_lengths     r$   get_content_summaryr    s2     mmoG
7|z!;J%''r/   c                <    t        |       }|rt        ||      }|S y)zSantitize and normalize extracted text
    Args:
        input_text: text string to be processed
        is_name: whether the input text is a entity or relation name

    Returns:
        Santitized and normalized text string
    )remove_inner_quotesr   )r  normalize_extracted_info)
input_textr  safe_input_textnormalized_texts       r$   %sanitize_and_normalize_extracted_textr  .  s,     1<O21D
 r/   c                   t        j                  dd| t         j                        } t        j                  dd| t         j                        } | j                  t        j                  dd            } | j                  t        j                  dd            } | j                  d	d
      } | j                  dd      } | j                  dd      } | j                  dd      } | j                  dd      j                  dd      } | j                  dd
      j                  d	d
      } | j                  dd      } t        j                  dd|       } t        j                  dd|       } t        j                  dd|       } t        |       dk\  r| j                  d      r| j                  d      r| dd }d|vr|} | j                  d      r| j                  d      r| dd }d|vr|} | j                  d       r | j                  d!      r| dd }d |vrd!|vr|} | j                  d"      r | j                  d#      r| dd }d"|vrd#|vr|} | j                  d$      r | j                  d%      r| dd }d$|vrd%|vr|} |r| j                  d d      j                  d!d      j                  d"d      j                  d#d      } t        j                  d&d|       } t        j                  d'd|       } | j                  d(d      } t        j                  d)d|       } | j                         } t        |       d*k  rt        j                  d+|       ryd, }t        |       d-k  r	 ||       ry| S ).ae  Normalize entity/relation names and description with the following rules:
    - Clean HTML tags (paragraph and line break tags)
    - Convert Chinese symbols to English symbols
    - Remove spaces between Chinese characters
    - Remove spaces between Chinese characters and English letters/numbers
    - Preserve spaces within English text and numbers
    - Replace Chinese parentheses with English parentheses
    - Replace Chinese dash with English dash
    - Remove English quotation marks from the beginning and end of the text
    - Remove English quotation marks in and around chinese
    - Remove Chinese quotation marks
    - Filter out short numeric-only text (length < 3 and only digits/dots)
    - remove_inner_quotes = True
        remove Chinese quotes
        remove English quotes in and around chinese
        Convert non-breaking spaces to regular spaces
        Convert narrow non-breaking spaces after non-digits to regular spaces

    Args:
        name: Entity name to normalize
        is_entity: Whether this is an entity name (affects quote handling)

    Returns:
        Normalized entity name
    z</p\s*>|<p\s*>|<p/>r   r  z</br\s*>|<br\s*>|<br/>u   ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ4ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzu   ０１２３４５６７８９
0123456789u   －rN  u   ＋+u   ／/u   ＊*u   （(u   ）rb  u   —u   　 z*(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])z;(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])z;(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])r   "rH   'u   “u   ”u   ‘u   ’u   《u   》z['\"]+(?=[\u4e00-\u9fa5])z(?<=[\u4e00-\u9fa5])['\"]+    z(?<=[^\d])\u202Fr   z^[0-9]+$c                2    t        d | D              xr d| v S )z
        Check if the string consists only of dots and digits, with at least one dot
        Filter cases include: 1.2.3, 12.3, .123, 123., 12.3., .1.23 etc.
        c              3  J   K   | ]  }|j                         xs |d k(    yw)r  N)isdigit)r  cs     r$   r  zJnormalize_extracted_info.<locals>.should_filter_by_dots.<locals>.<genexpr>  s"     9q199;*!s(*9   !#r  )allr  s    r$   should_filter_by_dotsz7normalize_extracted_info.<locals>.should_filter_by_dots  s    
 9D99IcTkIr/      )r   r   
IGNORECASE	translater?   	maketransr   r   
startswithendswithr  r  )namer  inner_contentr  s       r$   r  r  B  s3   6 66("d"--HD66+RR]]KD >> kB	
D >>#--(H,WXD <<s#D<<s#D<<s#D<<s#D <<s#++E37D <<s#++E37D <<s#D 66?TJD 66FDD 66FDD
 4yA~??3DMM#$6 2JM-'$ ??3DMM#$6 2JM-'$ ??5!dmmE&: 2JMM)e=.H$??5!dmmE&: 2JMM)e=.H$ ??5!dmmE&: 2JMM)e=.H$||E2&..ub9AA%LTTUZ\^_vv2B=vv3R>||Hc*vv)35 ::<D 4y1}+t4J 4y1}.t4 Kr/   c                   | s| S 	 | j                         } | s| S | j                  d       d}| D ]6  }t        |      }d|cxk  rdk  r	n n||z  }"|dk(  s|dk(  r||z  }2||z  }8 t        j                  d||      }|j                  d       t        j                  |      }t        j                  dd|      }|j                         S # t        $ r:}d	t        |      d
d  }t        j                  d|        t        |      |d
}~wt        $ rd}t        j                  dt        |              	 | j                  d       | cY d
}~S # t        $ r t        dt        |             |w xY wd
}~ww xY w)a  Sanitize text to ensure safe UTF-8 encoding by removing or replacing problematic characters.

    This function handles:
    - Surrogate characters (the main cause of encoding errors)
    - Other invalid Unicode sequences
    - Control characters that might cause issues
    - Unescape HTML escapes
    - Remove control characters
    - Whitespace trimming

    Args:
        text: Input text to sanitize
        replacement_char: Character to use for replacing invalid sequences

    Returns:
        Sanitized text that can be safely encoded as UTF-8

    Raises:
        ValueError: When text contains uncleanable encoding issues that cannot be safely processed
    r   r   i   i  i  i  z [\x00-\x08\x0B\x0C\x0E-\x1F\x7F]z%[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]z1Text contains uncleanable UTF-8 encoding issues: Nd   zText sanitization failed: z%Text sanitization: Unexpected error: z0Text sanitization failed with unexpected error: )r  r  ordr   r   htmlunescaper  r?   rK   r=  r   rN   )r  replacement_charr  char
code_pointrY   rZ   s          r$   r  r    s   * =zz| K 	G 	 	"DTJ-v---	v%v)=--	T!		"" FF/1A9
	
 	! MM),	 FFCRS	   +GAtPS~V	1)=>#* 	<SVHEF	KK K! 	B3q6(K		sA   C B1C 	E:5D

E:!E58E
E:"E22E55E:c                    ddl m} |j                  | g       }|D cg c]  }|t        j                  vs| }}|r t        d|  ddj                  |             yc c}w )zCheck if all required environment variables for storage implementation exist

    Args:
        storage_name: Storage implementation name

    Raises:
        ValueError: If required environment variables are missing
    r   )STORAGE_ENV_REQUIREMENTSzStorage implementation 'z0' requires the following environment variables: rc  N)lightrag.kgr  r1  rb   environr   r   )storage_namer  required_varsvarmissing_varss        r$   check_storage_env_varsr   !	  ss     5,00rBM#0JCCrzz4ICJLJ&|n 5&&*ii&=%>@
 	
  Ks
   AAc                   | sg S t        |       }|dk(  r| d   j                  dg       }|d| S g }t        |      D ]?  }|dkD  r||dz
  z  nd}||||z
  z  z
  }|j                  t	        t        |                   A g }	g }
d}t        |       D ]f  \  }}|j                  dg       }||   }t        |t        |            }|	j                  |d|        |
j                  |       ||z
  }|dkD  sb||z  }h t        |      D ]g  }d}t        |       D ]P  \  }}|j                  dg       }|
|   t        |      k  s*|	j                  ||
|             |
|xx   dz  cc<   d} n |rf |	S  |	S )aO  
    Linear gradient weighted polling algorithm for text chunk selection.

    This algorithm ensures that entities/relations with higher importance get more text chunks,
    forming a linear decreasing allocation pattern.

    Args:
        entities_or_relations: List of entities or relations sorted by importance (high to low)
        max_related_chunks: Expected number of text chunks for the highest importance entity/relation
        min_related_chunks: Expected number of text chunks for the lowest importance entity/relation

    Returns:
        List of selected text chunk IDs
    rH   r   sorted_chunksNFT)	r   r1  rM   rK  r   roundr  minr  )entities_or_relationsmax_related_chunksmin_related_chunksnentity_chunksexpected_countsr  ratioexpectedselected_chunksused_countstotal_remaining
entity_relactual	remainingr<   	allocateds                    r$   pick_by_weighted_pollingr  6	  s   & !	!"AAv-a044_bI0011 O1X 5 1uQU!%!33)
 
 	s5?345 OKO"#89 ):";"1% Xs=12}Wf566" v%	q=y(O) ?# 	 ''<= 		MAz&NN?B?M 1~M 22&&}[^'DEA!# 			 '& r/   c                  K   t         j                  d| d|rt        |      nd        |r|dk  rg S t               }t	        |      D ](  \  }}	|	j                  dg       }
|j                  |
       * |st         j                  d       g S t         j                  dt        |       d       t        |      }	 |, || g       d{   }|d   }t         j                  d	       nt         j                  d
       |j                  |       d{   }t         j                  dt        |       d       |rt        |      t        |      k7  rH|st         j                  d       g S t         j                  dt        |       dt        |              g S g }d}|D ]I  }||v r+||   }	 t        ||      }|j                  ||f       |dz  }2t         j                  d|        K |j                  d d       |d| D cg c]  \  }}|	 }}}t         j                  dt        |       dt        |       d       |S 7 ~7 8# t        $ r%}t         j                  d| d|        Y d}~d}~ww xY wc c}}w # t        $ rf}t         j                  d|        ddl}t         j                  d|j!                                 t         j                  d       |d| cY d}~S d}~ww xY ww)a  
    Vector similarity-based text chunk selection algorithm.

    This algorithm selects text chunks based on cosine similarity between
    the query embedding and text chunk embeddings.

    Args:
        query: User's original query string
        text_chunks_storage: Text chunks storage instance
        chunks_vdb: Vector database storage for chunks
        num_of_chunks: Number of chunks to select
        entity_info: List of entity information containing chunk IDs
        embedding_func: Embedding function to compute query embedding

    Returns:
        List of selected text chunk IDs sorted by similarity (highest first)
    z1Vector similarity chunk selection: num_of_chunks=z, entity_info_count=r   r  zEVector similarity chunk selection:  no chunk IDs found in entity_infoz#Vector similarity chunk selection: z unique chunk IDs collectedNz>Computed query embedding for vector similarity chunk selectionzHUsing pre-computed query embedding for vector similarity chunk selectionz chunk vectors RetrievedzGVector similarity chunk selection: no vectors retrieved from chunks_vdbz)Vector similarity chunk selection: found z but expecting rH   zLVector similarity chunk selection: failed to calculate similarity for chunk rJ   z>Vector similarity chunk selection:  no vector found for chunk c                    | d   S )NrH   r   xs    r$   <lambda>z+pick_by_vector_similarity.<locals>.<lambda>	  s
    ! r/   T)r  reverse chunks from z candidatesz8[VECTOR_SIMILARITY] Error in vector similarity sorting: z[VECTOR_SIMILARITY] Traceback: z5[VECTOR_SIMILARITY] Falling back to simple truncation)rK   r~   r   rL  r  r1  rS  rL   re   get_vectors_by_idsr  rK  rN   sortr=  	traceback
format_exc)r
  r  
chunks_vdbnum_of_chunksrx  embedding_funcquery_embeddingall_chunk_idsr  r  	chunk_idschunk_vectorssimilaritiesvalid_vectorsr  chunk_embedding
similarityrY   r<   r  r  s                        r$   pick_by_vector_similarityr+  	  s=    4 LL
;M?J^s~_bcn_o  EF  _G  	H -1,	 EM{+ (	6JJ3	Y'( S	
 	
LL
-c-.@-AA\] 'ME-"$2E7$;;O-O LLP LLZ
 );;MJJ1#m2D1EE]^	
 M 2c-6H H ] I ?M@R?SSbcfgtcubvw I % 	H=("/"9!2?O!TJ '':(>?!Q&M TU]T^_	$ 	nd;7CN]7ST!8TT1#o2F1G}UXYfUgThhst	
 s < K6 ! NNfgofpprstruv  U  -OPQsST6y7K7K7M6NOPLM^m,,-s   B8K6;J 	I
AJ I
AJ *K6+.J K6J -$I7J I>2J K6J 
J 	I;I61J 6I;;	J 	K3AK.(K3)K6.K33K6c                  :    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
y	)
TokenTrackerz Track token usage for LLM calls.c                $    | j                          y r   resetr   s    r$   r   zTokenTracker.__init__
  s    

r/   c                &    | j                          | S r   r/  r   s    r$   	__enter__zTokenTracker.__enter__
  s    

r/   c                    t        |        y r   )rl  )r"   r;   exc_valexc_tbs       r$   __exit__zTokenTracker.__exit__
  s	    dr/   c                <    d| _         d| _        d| _        d| _        y )Nr   prompt_tokenscompletion_tokenstotal_tokens
call_countr   s    r$   r0  zTokenTracker.reset
  s!    !"r/   c                l   | xj                   |j                  dd      z  c_         | xj                  |j                  dd      z  c_        d|v r| xj                  |d   z  c_        n8| xj                  |j                  dd      |j                  dd      z   z  c_        | xj                  dz  c_        y)zAdd token usage from one LLM call.

        Args:
            token_counts: A dictionary containing prompt_tokens, completion_tokens, total_tokens
        r9  r   r:  r;  rH   N)r9  r1  r:  r;  r<  )r"   token_countss     r$   	add_usagezTokenTracker.add_usage
  s     	l..BB,"2"23F"JJ \)n!==!1!1"  !4a8"9 9 	1r/   c                `    | j                   | j                  | j                  | j                  dS )zGet current usage statistics.r8  r8  r   s    r$   	get_usagezTokenTracker.get_usage'
  s0     "//!%!7!7 --//	
 	
r/   c           	     V    | j                         }d|d    d|d    d|d    d|d    S )	NzLLM call count: r<  z, Prompt tokens: r9  z, Completion tokens: r:  z, Total tokens: r;  )rA  )r"   usages     r$   __str__zTokenTracker.__str__0
  sV     u\23 4#O45 6""'(;"<!= >">235	
r/   N)r)   r*   r+   r,   r   r2  r6  r0  r?  rA  rD  r   r/   r$   r-  r-  
  s(    *&

r/   r-  c                  K   |r|s|S |j                  d      }|st        j                  d       |S 	 g }|D ]j  }|j                  d      xsD |j                  d      xs1 |j                  d      xs |j                  d      xs t        |      }|j	                  |       l  || ||       d{   }	|	rt        |	      d	kD  rt        |	d	   t              rd
|	d	   v rg }
|	D ]M  }|d
   }|d   }d	|cxk  rt        |      k  s"n %||   j                         }||d<   |
j	                  |       O t        j                  dt        |
       dt        |       d       |
S t        j                  dt        |	       d       |r|	d| S |	S t        j                  d       |S 7 # t        $ r%}t        j                  d| d       |cY d}~S d}~ww xY ww)a  
    Apply reranking to retrieved documents if rerank is enabled.

    Args:
        query: The search query
        retrieved_docs: List of retrieved documents
        global_config: Global configuration containing rerank settings
        enable_rerank: Whether to enable reranking from query parameter
        top_n: Number of top documents to return after reranking

    Returns:
        Reranked documents if rerank is enabled, otherwise original documents
    rerank_model_funczRerank is enabled but no rerank model is configured. Please set up a rerank model or set enable_rerank=False in query parameters.r	  r  chunk_contentdocument)r
  	documentstop_nNr   r:  relevance_scorererank_scorezSuccessfully reranked: r  z original chunkszUsing legacy rerank format:  chunksz4Rerank returned empty results, using original chunkszError during reranking: z, using original chunks)r1  rK   rL   r?   rK  r   rh   r  r  rN  rN   r=  )r
  retrieved_docsr  enable_rerankrJ  rerank_funcdocument_textsdocr	  rerank_resultsreranked_docsr   r:  rK  rY   s                  r$   apply_rerank_if_enabledrU  :
  s    ( ##$78K P	
 3! 		+C 	" 776?77?+ 77:& s8  !!'*		+  +$ 
 
 c.1A5.+T2w.QRBS7S ", 2F"7OE&,->&?O E7C$77,U388:.=N+%,,S12 -c-.@-AsSaObNccst %$ :3~;N:OwWX16~fu-JNJNNQR!!A
D  /s2IJKss   1GA?F* 3F(4AF* AF* %G&(F* GF* GF* 'G(F* *	G3GGGGGc                T  K   |sg S t        |      }|j                  r?| r=|r;|j                  xs t        |      }t        | |||j                  |       d{   }|j                  r|r|j	                  dd      }|dkD  r{t        |      }	g }
|D ]+  }|j	                  dd      }||k\  s|
j                  |       - |
}|	t        |      z
  }|dkD  r%t        j                  d	t        |       d
| d       |sg S |j                  [|j                  dkD  rLt        |      |j                  kD  r|d|j                   }t        j                  dt        |       d| d       |j	                  d      }|rk|ri|!t        |d|j	                  dt                    }t        |      }	t        |d ||      }t        j                  dt        |       d|	 d| d| d	       g }t        |      D ]1  \  }}|j                         }d|dz    |d<   |j                  |       3 |S 7 ͭw)a;  
    Unified processing for text chunks: deduplication, chunk_top_k limiting, reranking, and token truncation.

    Args:
        query: Search query for reranking
        chunks: List of text chunks to process
        query_param: Query parameters containing configuration
        global_config: Global configuration dictionary
        source_type: Source type for logging ("vector", "entity", "relationship", "mixed")
        chunk_token_limit: Dynamic token limit for chunks (if None, uses default)

    Returns:
        Processed and filtered list of text chunks
    )r
  rN  r  rO  rJ  Nmin_rerank_scoreg      ?g        rL  r+  r   zRerank filtering: z$ chunks remained (min rerank score: rb  zKept chunk_top-k: z  chunks (deduplicated original: r  max_total_tokensMAX_TOTAL_TOKENSc                4    dj                  d | fD              S )Nr|   c              3  J   K   | ]  }t        j                  |d         yw)Fr  N)rf   r  )r  r  s     r$   r  z;process_chunks_unified.<locals>.<lambda>.<locals>.<genexpr>
  s#      $9=

4e44$r  )r   r  s    r$   r  z(process_chunks_unified.<locals>.<lambda>
  s    $)) $BC$  r/   )r  r   r  zToken truncation: r  z (chunk available tokens: z
, source: DCrH   rv  )r   rO  chunk_top_krU  r1  rK  rK   rN  r~   rD   r   r  r  r  )r
  unique_chunksquery_paramr  source_typechunk_token_limitorigin_countrerank_top_krW  original_countfiltered_chunksr  rL  filtered_countr  final_chunksr  chunk_with_ids                     r$   process_chunks_unifiedri  
  s    , 	}%L   U}"..D#m2D5('%33
 
   ](,,-?Ec! /N !O& 2$yy"C   #33#**512 ,M+c-.@@N!(]);(<<`aq`rrst !	 *{/F/F/J} 7 77)*CK,C,CDM ]!3 44TUaTbbcd	

 !!+.I]$ '"!!"46NO! ]+3 -
 	 ]!3 4M.AQ R((9':*[MQRT	
 Lm, +5

 "1q5'ldM*+
 Y
s   AH(H%AH(,E:H(c                    | st         S | j                         }|t        vr!t        j	                  d| t                t         S |S )zPNormalize the source ID limiting strategy and fall back to default when invalid.z8Unknown SOURCE_IDS_LIMIT_METHOD '%s', falling back to %s)r   upperr   rK   rL   )r   
normalizeds     r$   !normalize_source_ids_limit_methodrm  
  sD     ..J77F+	

 /.r/   c                    g }t               }| |fD ]6  }|s|D ],  }|s||vs|j                  |       |j                  |       . 8 |S )zQMerge two iterables of source IDs while preserving order and removing duplicates.)rL  rP  rK  )existing_idsnew_idsmergedseensequencer+  s         r$   merge_source_idsrt    sf    
 FUD!7+ )! 	)I$#i(	)) Mr/   )
identifierc          	        |dk  rg S t        |       }t        |      |k  r|S t        |      }|t        k(  r|| d }n|d| }|rBt        |      t        |      k  r+t        j                  d||t        |      t        |             |S )z3Apply a limit strategy to a sequence of source IDs.r   Nz5Source_id truncated: %s | %s keeping %s of %s entries)re   r   rm  r   rK   r~   )
source_idslimitr   ru  source_ids_listnormalized_method	truncateds          r$   apply_source_ids_limitr|  "  s     z	:&O
?u$9&A88#UFG,	#FU+	c)ns?';;C	N 	
 r/   c                    t        |      t        |      z
  }t        |      t        |      z
  }| D cg c]	  }||vs| }}|D ]  }||v s||vs|j                  |        |S c c}w )a_  
    Compute incrementally updated chunk IDs based on changes.

    This function applies delta changes (additions and removals) to an existing
    list of chunk IDs while maintaining order and ensuring deduplication.
    Delta additions from new_chunk_ids are placed at the end.

    Args:
        existing_full_chunk_ids: Complete list of existing chunk IDs from storage
        old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
        new_chunk_ids: New chunk IDs from updated source_id (chunks being added)

    Returns:
        Updated list of chunk IDs with deduplication

    Example:
        >>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
        >>> old = ['chunk-1', 'chunk-2']
        >>> new = ['chunk-2', 'chunk-4']
        >>> compute_incremental_chunk_ids(existing, old, new)
        ['chunk-3', 'chunk-2', 'chunk-4']
    )rL  rK  )existing_full_chunk_idsold_chunk_idsnew_chunk_idschunks_to_removechunks_to_addcidupdated_chunk_idss          r$   compute_incremental_chunk_idsr  E  s    8 =)C,>>&]);;M
 /#=M2M   *-C/@$@$$S)* s
   	A'A'c                    t        |      }|s| D cg c]  }|s|	 c}S | D cg c]
  }|r||vr| c}S c c}w c c}w )zKRemove a collection of IDs from an ordered iterable while preserving order.)rL  )rw  ids_to_removeremoval_setr+  s       r$   subtract_source_idsr  s  sV     m$K+5Ci	CC $+5 	  Ds   77<c                B    t        j                  t        | |f            S )z?Create a deterministic storage key for relation chunk tracking.)r   r   sorted)srctgts     r$   make_relation_chunk_keyr    s     Sz 233r/   c                x    | j                  t              }t        |      dk7  rt        d|        |d   |d   fS )z=Parse a relation chunk storage key back into its entity pair.r   zInvalid relation chunk key: r   rH   )r  r   r   r   )r  r  s     r$   parse_relation_chunk_keyr    sC     IIo&E
5zQ7u=>>8U1Xr/   c                    t        j                         j                  d      }t        t	        j
                               dd }|  d| d| S )zGenerate a unique tracking ID with timestamp and UUID

    Args:
        prefix: Prefix for the track ID (e.g., 'upload', 'insert')

    Returns:
        str: Unique tracking ID in format: {prefix}_{timestamp}_{uuid}
    z%Y%m%d_%H%M%SN   r<   )r   nowstrftimer?   uuiduuid4)r
  r  	unique_ids      r$   generate_track_idr    sH     ''8IDJJL!"1%IXQyk9+..r/   c                   | syt         rO	 t        j                  | t        j                  j                        }dj                  |      j                         S | j                         S # t        $ r | j                         cY S w xY w)aN  Generate sort key for Chinese pinyin sorting

    This function uses pypinyin for true Chinese pinyin sorting.
    If pypinyin is not available, it falls back to simple lowercase string sorting.

    Args:
        text: Text to generate sort key for

    Returns:
        str: Sort key that can be used for comparison and sorting
    r   )style)_PYPINYIN_AVAILABLEpypinyinlazy_pinyinStyleNORMALr   r9   rN   )r  pinyin_lists     r$   get_pinyin_sort_keyr    sq     	 "..t8>>;P;PQK77;'--// zz|  	 ::<	 s   AA) )BBc                   | r|r|s| S t        j                  |      }t        j                  d| d| d||       } t        j                  d| d||       } t        j                  d||       } t        j                  d| d||       } t        j                  d| d	||       } t        j                  d
| d| d||       } t        j                  d| d||       } t        j                  d| d||       } t        j                  d| d||       } t        j                  d||       } t        j                  d| d||       } t        j                  d| d||       } t        j                  d| d||       } | S )a  
    Fix various forms of tuple_delimiter corruption from LLM output.

    This function handles missing or replaced characters around the core delimiter.
    It fixes common corruption patterns where the LLM output doesn't match the expected
    tuple_delimiter format.

    Args:
        record: The text record to fix
        delimiter_core: The core delimiter (e.g., "S" from "<|#|>")
        tuple_delimiter: The complete tuple delimiter (e.g., "<|#|>")

    Returns:
        The corrected record with proper tuple_delimiter format
    z<\|z\|*?z\|>z<\|\\z<\|+>z<.?\|z\|.?>z<\|?z\|?>z<[^|]z\|>|<\|z[^|]>z\|+(?!>)z:(?!>)z<\|+>z
<\|\|(?!>)z(?<!<)\|z\|>\|z\|\|)r   r  r   )r   delimiter_coretuple_delimiterescaped_delimiter_cores       r$   fix_tuple_delimiter_corruptionr    s   $   YY~6 VV%&d+A*B#FF VV'(,F VVF VV'(.F VV&'t,F VV'(0F/GuMF VV%&h/F VV%&f-F VV&'q)F VVF VV*+3/F VV%&e,F VV&'t,F Mr/   c                   	 t        | d      rq| j                  ret        | j                        }d}t        |      D ]#  \  }}t	        |t
              s| d| ||<   d} n |s| d|d    |d<    t        |       | S  t        |       | dt        |              S # t        t        t        f$ r=}t        | dt        |       j                   dt        |        d| d      cY d}~S d}~ww xY w)	a  
    Safely create a prefixed exception that adapts to all error types.

    Args:
        original_exception: The original exception.
        prefix: The prefix to add.

    Returns:
        A new exception with the prefix, maintaining the original exception type if possible.
    r   FrJ   Tr   z1 (Original exception could not be reconstructed: rb  N)r   r   re   r  rh   r?   r  rj   r   AttributeErrorr$  r)   )original_exceptionr
  r   	found_strr  r  construct_errors          r$   create_prefixed_exceptionr  3  s&   
%v.3E3J3J*//0D I#D/ 3c3'!'3%0DG $I	 #HBtAwi0Q+4*+T22 ,4*+vhb=O9P8Q,RSSz>2 
 hb01::;2cBT>U=V W??N>OqR
 	
	
s*   AB +B ?B C/22C*$C/*C/c                (   g }| D ]  }|j                  dd      }	d}
|r	|	|v r||	   }
|
ry|j                  |
j                  d|	      |
j                  dd      |
j                  dd      |
j                  dd      |
j                  d	d
      |
j                  dd      d       |j                  |	|j                  dd      |j                  dd      |j                  dd      |j                  d	d
      |j                  dd      d        g }|D ]O  }|j                  dd      }|j                  dd      }||f}d}|r	||v r||   }|r|j                  |j                  d|      |j                  d|      |j                  dd      |j                  dd      |j                  dd      |j                  dd      |j                  d	d
      |j                  dd      d       |j                  |||j                  dd      |j                  dd      |j                  dd      |j                  dd      |j                  d	d
      |j                  dd      d       R g }t        |      D ]]  \  }}|j                  dd      |j                  dd      |j                  d	d
      |j                  dd      d}|j                  |       _ t        j	                  dt        |       dt        |       d       |g g dd}dd ||||d!|d"S )#zQConvert internal data format to user-friendly format using original database datar  r   NrS   entity_typeUNKNOWNdescriptionr+  	file_pathunknown_source
created_at)rS   r  r  r+  r  r  r  entity1entity2src_idtgt_idkeywordsweightr+  )r  r  r  r  r  r+  r  r  reference_idr	  r  )r  r	  r  r  z#[convert_to_user_format] Formatted r  rM  )
high_level	low_level)
query_moder  successzQuery processed successfully)entitiesrelationshipschunks
references)r   r:   r  metadata)r1  rK  r  rK   r~   r   )entities_contextrelations_contextr  r  r  entity_id_to_originalrelation_id_to_originalformatted_entitiesr  rS   original_entityformatted_relationshipsr  r  r  relation_keyoriginal_relationformatted_chunksr  r  r  r  s                         r$   convert_to_user_formatr  ]  s^    " jj2.  [4I%I3K@O%%#2#6#6}k#R#2#6#6}i#P#2#6#6}b#I!0!4!4["!E!0!4!4[BR!S"1"5"5lB"G	 %%#.#)::fi#@#)::mR#@!'K!<!'K9I!J"(**\2">	-D !% %,,y"-,,y"-) !"|7N'N 7 E#**/33HgF/33HgF#4#8#8#K 1 5 5j" E/33HcB!2!6!6{B!G!2!6!6{DT!U"3"7"7b"I	 $**%%#+<<r#B (Z <&ll8S9!)k2!>!)k;K!L"*,,|R"@	5%P f% ,5!IInb9yyB/;0@A		*b1	

 	
+, LL
-c2B.C-DAc&k]RYZ !
H 1*4&$	
 
 
r/   c                   | sg g fS i }| D ]5  }|j                  dd      }|s|dk7  s|j                  |d      dz   ||<   7 g }t               }t        |       D ]M  \  }}|j                  dd      }|s|dk7  s!||vs&|j                  |||   |f       |j	                  |       O t        |d       }|D cg c]  }|d   	 }	}i }
t        |	      D ]  \  }}t        |dz         |
|<    g }| D ]J  }|j                         }|j                  dd      }|r|dk7  r	|
|   |d<   nd|d<   |j                  |       L g }t        |	      D ]%  \  }}|j                  t        |dz         |d	       ' ||fS c c}w )
al  
    Generate reference list from chunks, prioritizing by occurrence frequency.

    This function extracts file_paths from chunks, counts their occurrences,
    sorts by frequency and first appearance order, creates reference_id mappings,
    and builds a reference_list structure.

    Args:
        chunks: List of chunk dictionaries with file_path information

    Returns:
        tuple: (reference_list, updated_chunks_with_reference_ids)
            - reference_list: List of dicts with reference_id and file_path
            - updated_chunks_with_reference_ids: Original chunks with reference_id field added
    r  r   r  r   rH   c                    | d    | d   fS )NrH   r   r   r  s    r$   r  z5generate_reference_list_from_chunks.<locals>.<lambda>   s    qteQqT] r/   )r  r  )r  r  )r1  rL  r  rK  rP  r  r?   r  )r  file_path_countsr  r  file_path_with_indices
seen_pathsr  sorted_file_pathsr  unique_file_pathsfile_path_to_ref_idupdated_chunks
chunk_copyreference_lists                 r$   #generate_reference_list_from_chunksr    s   $ 2v  QIIk2.	&66*:*>*>y!*Lq*PY'Q  Jf% &5IIk2.	&669J;V"))96Fy6QST*UVNN9%	& 5;RS->?Ta?? !"34 49),QUI&4 N *ZZ\
NN;3	&66)<Y)GJ~&)+J~&j)* N!"34 T9s1q5z	RST >))/ @s   <E:)r@   rA   )r   r   g?N)rQ   r
   rR   r?   rS   r?   rT   r   rU   r   rV   zOptional[Callable]r@   rA   )
rk   r?   rl   anyrm   r  rn   rd   r@   r  )r   r?   )r   rd   )INFOFNT)
r   r?   r   r?   r   rd   r   r   r   rd   )r   r   r@   r?   )r   )r	  r?   r
  r?   r@   r?   )r  r?   r  r?   r  r?   r@   r?   )r  r?   r@   ztuple[str, str, str] | None)NNNi  g       @limit_async)r]  r   rg  r   rB  r   r^  r   r  r   r|  r   rD  r?   )r  r?   r@   r?   )r   r?   )r	  r?   r  	list[str]r@   r  )ro   r?   r@   rd   )
r  z	list[Any]r  zCallable[[Any], str]r   r   r  r  r@   z	list[int])rl   unknown)r@   ztuple[str, int] | None)r  r	  )r   r?   r@   rd   )r@   zasyncio.AbstractEventLoop)r2  F)rp  r?   rq  r?   rr  rd   r@   rA   )r  r?   r  r?   r@   zCallable[..., Any])batch_update)
r  r?   r  'BaseKVStorage'r  r  r  r?   r@   rA   )NNNNextractNN)r  r?   r  r   r  z'BaseKVStorage | None'r  r   r  r   r  zlist[dict[str, str]]r  r?   r  r   r  re   r@   ztuple[str, int])   )r	  r?   r  r   r@   r?   r  )r  r?   r@   r?   )r  r?   r@   r?   )r  r?   r  r?   r@   r?   )r  r?   r@   rA   )rH   )r  
list[dict]r  r   r  r   r@   r  r   )r
  r?   r  r  r   z'BaseVectorStorage'r!  r   rx  zlist[dict[str, Any]]r"  r   r@   r  )TN)r
  r?   rN  r  r  r  rO  rd   rJ  r   r@   r  )mixedN)r
  r?   r^  r  r_  z'QueryParam'r  r  r`  r?   ra  r   r@   r  )r   r   r@   r?   )ro  Iterable[str] | Nonerp  r  r@   r  )
rw  zSequence[str]rx  r   r   r?   ru  r   r@   r  )r~  r  r  r  r  r  r@   r  )rw  zIterable[str]r  zCollection[str]r@   r  )r  r?   r  r?   r@   r?   )r  r?   r@   ztuple[str, str])upload)r
  r?   r@   r?   )r   r?   r  r?   r  r?   r@   r?   )r  rN   r
  r?   r@   rN   )NN)r  r  r  r  r  r  r  r  r  r?   r  r  r  r  r@   zdict[str, Any])r  r  r@   ztuple[list[dict], list[dict]])
__future__r   r  r7   rO   r  r2  r   rf   r   logging.handlersrb   r   r5  r  dataclassesr   r   	functoolsr   hashlibr   typingr   r	   r
   r   r   r   r   r   r   numpyr  dotenvr   lightrag.constantsr   r   r   r   r   r   r   r   r  r  StreamHandlerr   r   rK   r   r   r  r   r   r   	formatterr   r   WARNINGrF   r  r  rC   rL   r[   r?   rq   lightrag.baserr   rs   rt   rc   r9   r}   r   r   r  Filterr   r   r   r   r   r  r  r  r  rN   r  r  r"  r  r  r  r  JSONEncoderr  r  r  r  r  r  r  r  r  r  r  r	  r  r  r!  r)  r  r  r  r  r  r  r  r  r  r  r   r  r+  r-  rU  ri  rm  rt  r|  r  r  r  r  r  r  r  r  r  r  r   r/   r$   <module>r     sb   "  
   
     	 	   !   
 
 
  	 	 	  RZZ >? -- 4 
		:	&    ')OW\\*!!!">?I  +
o&   '  # #GOO 4,. $ %
 &*)5)5)5 )5 	)5
 )5 $)5 
)5Z 47U111,01FJ11j JJ
  /		)W-335?.6  aqA' 'X  $ $C/C/C/ C/ 	C/
 C/L     t t tn+*// 	Y 	R R
i 
 #'# #zzz !z 	z
 z z zz7t,.8D,, 8v"J &- &-RC	 C<5<	  	
 ") 
   F # # #-:`"	<  %l0 	l0
 l0 l0 
l0h	  %& 	&
 & & 
&R. )	'
'
('
 '
 	'

 
'
T 26 $-1!%K5K5K5 /K5 	K5
 K5 +K5 K5 K5 K5 K5\(" */(DNUp
0  M%MM M 	Mn x-x-(x- $x- 	x-
 &x- x- x-v6
 6
z QQQ Q 	Q
 Q Qr !jjj j 	j
 j j jZ$&1E4 "     
    F+&++ + 	+\" "4/8ss!$s7:ssl'
` #'$(y y!y y 	y
 y  y "y yx?*?*"?*{b  H
NN[s   ;P? ?QQ