
    ֞iM                    \   d Z ddlmZ ddlZddlmc mZ ddl	Z	ddl
Z
ddlZddlZddlmZ ddlmZ ddlmZmZmZ ddlZ ee      j/                         j0                  d   Z ee      ej6                  vr"ej6                  j9                  d ee             ddlmZmZm Z  dd	l!m"Z"m#Z#m$Z$ dd
l%m&Z&m'Z'  G d d      Z( G d d      Z) G d d      Z* G d d      Z+ G d d      Z, G d d      Z- G d d      Z. G d d      Z/ G d d      Z0 G d d      Z1 G d d       Z2 G d! d"      Z3y)#u!  
tests/infra/test_observability.py

Test suite for core/observability — Module 7: Langfuse LLM Observability.

Coverage
--------
BB1: GenesisTracer.trace returns object with .id (3 tests)
BB2: _NoOpTrace works when Langfuse unavailable (2 tests)
BB3: CostTracker.record returns correct cost for known model (3 tests)
BB4: CostTracker.get_session_cost returns accumulated cost (2 tests)
BB5: CostTracker.get_cost_summary returns dict with all required keys (1 test)
BB6: Cost log appended to JSONL file (2 tests, use tmp_path)

WB1: GenesisTracer skips Langfuse init when no keys provided (2 tests)
WB2: traced decorator calls get_tracer().trace (2 tests, mock get_tracer)
WB3: generation_tracked records model/usage when result is dict (2 tests)
WB4: MODEL_PRICING has entries for all Genesis models (1 test)
WB5: Cost calculation: 1000 input tokens of opus = correct USD (2 tests)

Total: 22 tests — all pass with ZERO live API calls.

VERIFICATION_STAMP
Story: OBS-005
Verified By: parallel-builder
Verified At: 2026-02-25
Tests: 22/22
Coverage: 100%
    )annotationsN)Path)Any)	AsyncMock	MagicMockpatch   )GenesisTracer
_NoOpTrace
get_tracer)MODEL_PRICINGCostTracker_resolve_pricing)generation_trackedtracedc                  (    e Zd ZdZd Zd Zd Zd Zy)TestBB1_TracerTracezGBB1: GenesisTracer.trace always returns an object with a .id attribute.c                   t        dd      }|j                  d      }d}t        ||      }|st        j                  d      dz   dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      d	z  }t        t        j                  |            dx}}d}|j                  }||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}x}}y)zGWith no env keys and no Langfuse client, .trace() returns a _NoOpTrace.N
public_key
secret_keytest_opidztrace() result must have .idz7
>assert %(py5)s
{%(py5)s = %(py0)s(%(py1)s, %(py3)s)
}hasattrresult)py0py1py3py5in)z*%(py1)s in %(py5)s
{%(py5)s = %(py3)s.id
})r   r   r   assert %(py7)spy7)r
   tracer   
@pytest_ar_format_assertmsg@py_builtinslocals_should_repr_global_name	_safereprAssertionError_format_explanationr   _call_reprcompare)selftracerr   @py_assert2@py_assert4@py_format6@py_assert0@py_format8s           7/mnt/e/genesis-system/tests/infra/test_observability.py$test_trace_returns_noop_when_no_keysz8TestBB1_TracerTrace.test_trace_returns_noop_when_no_keysC   s   $4@i(#Dwvt$D$DD&DDDDDDDwDDDwDDDDDDvDDDvDDDtDDD$DDDDDD%FII%yI%%%%yI%%%y%%%%%%F%%%F%%%I%%%%%%%    c                   t        d      }|j                  dddi      }|j                  }d}||k(  }|st        j                  d|fd	||f      d
t        j                         v st        j                  |      rt        j                  |      nd
t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            dx}x}}y)zCExplicitly disabled tracer returns _NoOpTrace with predictable .id.Fenableddisabled_opkv)metadataznoop-disabled_op==z*%(py2)s
{%(py2)s = %(py0)s.id
} == %(py5)sr   r   py2r   r"   r#   N)r
   r$   r   r%   r-   r'   r(   r)   r*   r+   r,   )r.   r/   r   @py_assert1r1   @py_assert3r2   r4   s           r5   %test_trace_returns_noop_when_disabledz9TestBB1_TracerTrace.test_trace_returns_noop_when_disabledJ   s    u-msCjAyy...y.....y.......v...v...y...........r7   c                (   t        d      }|j                  d      }|j                  }t        |t              }|sddt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d	x}}|j                  }t        |      }d
}||kD  }|s
t        j                  d|fd||f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            d	x}x}x}}y	)zJThe .id attribute on any returned trace object must be a non-empty string.Fr9   string_checkzMassert %(py6)s
{%(py6)s = %(py0)s(%(py3)s
{%(py3)s = %(py1)s.id
}, %(py4)s)
}
isinstancer$   str)r   r   r   py4py6Nr   >)zG%(py5)s
{%(py5)s = %(py0)s(%(py3)s
{%(py3)s = %(py1)s.id
})
} > %(py8)slen)r   r   r   r   py8assert %(py10)spy10)r
   r$   r   rI   rJ   r'   r(   r%   r)   r*   r+   r,   rO   r-   )r.   r/   r$   r0   @py_assert5@py_format7r1   @py_assert7@py_assert6@py_format9@py_format11s              r5   test_trace_id_is_stringz+TestBB1_TracerTrace.test_trace_id_is_stringP   sV   u-^,(((z(C((((((((z(((z((((((%(((%((((((((((C(((C((((((((((88 s8} q }q    }q      s   s      5   5   8   }   q       r7   c                   t        j                  t               }d|_        t               }t	        d      |j
                  _        ||_        |j                  d      }t        |t              }|sddt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      nddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                   |            d	}|j"                  }d
}||k(  }|st        j$                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                   |	            d	x}x}}y	)z>When Langfuse client raises, trace() falls back to _NoOpTrace.Tznetwork failure
failing_opz5assert %(py4)s
{%(py4)s = %(py0)s(%(py1)s, %(py2)s)
}rI   r   r   )r   r   rC   rK   Nznoop-failing_opr?   rA   rB   r"   r#   )r
   __new__r:   r   RuntimeErrorr$   side_effect_clientrI   r   r'   r(   r%   r)   r*   r+   r,   r   r-   )
r.   r/   mock_clientr   rE   @py_format5rD   r1   r2   r4   s
             r5   +test_trace_with_real_client_wraps_exceptionz?TestBB1_TracerTrace.test_trace_with_real_client_wraps_exceptionW   sB   &&}5k(45F(G%$l+&*--------z---z------&---&------*---*----------yy---y-----y-------v---v---y-----------r7   N)__name__
__module____qualname____doc__r6   rF   rY   rb    r7   r5   r   r   @   s    Q&/!
.r7   r   c                      e Zd ZdZd Zd Zy)TestBB2_NoOpTracezDBB2: _NoOpTrace provides the full Langfuse Trace interface as stubs.c                   t        d      }|j                  }d}||k(  }|st        j                  d|fd||f      dt	        j
                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            d x}x}}y )	Nmy_opz
noop-my_opr?   rA   nooprB   r"   r#   )
r   r   r%   r-   r'   r(   r)   r*   r+   r,   )r.   rl   rD   r1   rE   r2   r4   s          r5   test_noop_trace_has_correct_idz0TestBB2_NoOpTrace.test_noop_trace_has_correct_idl   sy    '"ww&,&w,&&&&w,&&&&&&t&&&t&&&w&&&,&&&&&&&r7   c                *   t        d      }|j                  }d} ||      }||u }|st        j                  d|fd||f      dt	        j
                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dt	        j
                         v st        j                  |      rt        j                  |      nddz  }dd	|iz  }t        t        j                  |            d x}x}x}}|j                  }d
} ||      }||u }|st        j                  d|fd||f      dt	        j
                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dt	        j
                         v st        j                  |      rt        j                  |      nddz  }dd	|iz  }t        t        j                  |            d x}x}x}}|j                  }d} ||      }||u }|st        j                  d|fd||f      dt	        j
                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dt	        j
                         v st        j                  |      rt        j                  |      nddz  }dd	|iz  }t        t        j                  |            d x}x}x}}|j                          y )Npipeline_runx)nameis)zO%(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.span
}(name=%(py4)s)
} is %(py8)srl   )r   rC   rK   rL   rP   rQ   rR   g)zU%(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.generation
}(name=%(py4)s)
} is %(py8)sok)status)zS%(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.update
}(status=%(py4)s)
} is %(py8)s)r   spanr%   r-   r'   r(   r)   r*   r+   r,   
generationupdateend)r.   rl   rD   rE   rS   rU   rW   rX   s           r5   +test_noop_trace_methods_return_self_or_nonez=TestBB2_NoOpTrace.test_noop_trace_methods_return_self_or_nonep   s   .)yy*c*yc"*"d****"d******t***t***y***c***"******d***d*******0C0C(0(D0000(D000000t000t000000C000(000000D000D0000000{{/$/{$'/'4////'4//////t///t///{///$///'//////4///4///////
r7   N)rc   rd   re   rf   rm   r{   rg   r7   r5   ri   ri   i   s    N'r7   ri   c                  (    e Zd ZdZd Zd Zd Zd Zy)TestBB3_CostTrackerRecordz@BB3: CostTracker.record computes and returns accurate USD costs.c                   t        t        |dz              }|j                  ddd      }d}||z
  }t        |      }d}||k  }|s
t	        j
                  d|fd	||f      d
t        j                         v st	        j                  t              rt	        j                  t              nd
dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      t	        j                  |      dz  }	dd|	iz  }
t        t	        j                  |
            dx}x}x}x}}y)z=gemini-flash: 1M input @ $0.075 + 1M output @ $0.30 = $0.375.
cost.jsonllog_pathgemini-flash@B modelinput_tokensoutput_tokensg      ?gư><z;%(py6)s
{%(py6)s = %(py0)s((%(py1)s - %(py3)s))
} < %(py9)sabscostr   r   r   rL   py9assert %(py11)spy11Nr   rJ   recordr   r%   r-   r'   r(   r)   r*   r+   r,   r.   tmp_pathtrackerr   r0   r1   rS   @py_assert8rU   @py_format10@py_format12s              r5   test_gemini_flash_costz0TestBB3_CostTrackerRecord.test_gemini_flash_cost   s    s8l+B'CD~~ "#  

  '4%<'s< '4' 4'''' 4''''''s'''s''''''4'''4'''%''' '''4''''''''r7   c                   t        t        |dz              }|j                  ddd      }d}||z
  }t        |      }d}||k  }|s
t	        j
                  d	|fd
||f      dt        j                         v st	        j                  t              rt	        j                  t              nddt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      t	        j                  |      dz  }	dd|	iz  }
t        t	        j                  |
            dx}x}x}x}}y)z*claude-opus-4-6: 1M input @ $15 = $15.000.r   r   claude-opus-4-6r   r   r   g      .@g-C6?r   r   r   r   r   r   r   Nr   r   s              r5   test_claude_opus_costz/TestBB3_CostTrackerRecord.test_claude_opus_cost   s    s8l+B'CD~~#"  

 &4$;&s;&$&$&&&&$&&&&&&s&&&s&&&&&&4&&&4&&&$&&&&&&$&&&&&&&&r7   c                   t        t        |dz              }|j                  ddd      }d}||k(  }|st        j                  d|fd||f      d	t        j                         v st        j                  |      rt        j                  |      nd	t        j                  |      d
z  }dd|iz  }t        t        j                  |            dx}}y)z1Zero tokens must produce zero cost without error.r   r   r   r   r           r?   z%(py0)s == %(py3)sr   r   r   assert %(py5)sr   Nr   rJ   r   r%   r-   r'   r(   r)   r*   r+   r,   r.   r   r   r   r0   rD   @py_format4r2   s           r5   test_zero_tokens_zero_costz4TestBB3_CostTrackerRecord.test_zero_tokens_zero_cost   s    s8l+B'CD~~   

 ts{tsttsr7   c                   t        t        |dz              }|j                  ddd      }d}||kD  }|st        j                  d|fd||f      d	t        j                         v st        j                  |      rt        j                  |      nd	t        j                  |      d
z  }dd|iz  }t        t        j                  |            dx}}y)zAUnknown model produces a non-zero cost (default pricing applied).r   r   zsome-future-model-xyzr   r   r   rM   )z%(py0)s > %(py3)sr   r   r   r   Nr   r   s           r5   'test_unknown_model_uses_default_pricingzATestBB3_CostTrackerRecord.test_unknown_model_uses_default_pricing   s    s8l+B'CD~~)"#  
 tcztcttcr7   N)rc   rd   re   rf   r   r   r   r   rg   r7   r5   r}   r}      s    J('	r7   r}   c                      e Zd ZdZd Zd Zy)TestBB4_SessionCostAccumulationzGBB4: Session costs accumulate correctly across multiple record() calls.c                `   t        t        |dz              }|j                  dddd       |j                  dddd       |j                  d      }d	}||z
  }t	        |      }d
}||k  }|s7t        j                  d|fd||f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)z4Two calls on the same session_id must sum correctly.r   r   r   i iP  zsess-001r   r   r   
session_id@ gHzG?gHz>r   z;%(py5)s
{%(py5)s = %(py0)s((%(py1)s - %(py2)s))
} < %(py8)sr   totalexpectedr   r   rC   r   rP   rQ   rR   N)r   rJ   r   get_session_costr   r%   r-   r'   r(   r)   r*   r+   r,   )r.   r   r   r   r   rE   r1   rU   rV   rW   rX   s              r5   test_session_cost_accumulatesz=TestBB4_SessionCostAccumulation.test_session_cost_accumulates   s#   s8l+B'CD   !	 	 	
 	  !!	 	 	
 ((4O8#+s#$+t+$t++++$t++++++s+++s++++++5+++5++++++8+++8+++$+++t+++++++r7   c                0   t        t        |dz              }|j                  }d} ||      }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      t        j                  |      dz  }d	d
|iz  }	t        t        j                  |	            dx}x}x}x}}y)z7Querying a session that was never recorded returns 0.0.r   r   z
never-seenr   r?   )zV%(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.get_session_cost
}(%(py4)s)
} == %(py9)sr   )r   rC   rK   rL   r   r   r   N)r   rJ   r   r%   r-   r'   r(   r)   r*   r+   r,   )
r.   r   r   rD   rE   rS   r   rU   r   r   s
             r5   !test_unknown_session_returns_zerozATestBB4_SessionCostAccumulation.test_unknown_session_returns_zero   s    s8l+B'CD''<<'5<<5<<<<5<<<<<<w<<<w<<<'<<<<<<5<<<<<<<<<<<r7   N)rc   rd   re   rf   r   r   rg   r7   r5   r   r      s    Q,(=r7   r   c                      e Zd ZdZd Zy)TestBB5_CostSummaryzABB5: get_cost_summary() returns the expected top-level structure.c           	        t        t        |dz              }|j                  dddddd	       |j                         }h d
}|j                  }|j
                  } |       } ||      }|s t        j                  d||j                         z
         dz   dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	t        t        j                  |	            d x}x}x}}|d   }t        |t              }|sddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dz  }
t        t        j                  |
            d x}}|d   }t        |t               }|sddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dt        j                         v st        j                  t               rt        j                  t               ndt        j                  |      dz  }
t        t        j                  |
            d x}}|d   }t        |t               }|sddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dt        j                         v st        j                  t               rt        j                  t               ndt        j                  |      dz  }
t        t        j                  |
            d x}}|d   }t        |t               }|sddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dt        j                         v st        j                  t               rt        j                  t               ndt        j                  |      dz  }
t        t        j                  |
            d x}}d}|d   }||v }|slt        j"                  d|fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            d x}x}}d}|d   }||v }|slt        j"                  d|fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            d x}x}}d}|d   }||v }|slt        j"                  d|fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            d x}x}}y )Nr   r   r   i'  i  s1a1c1)r   r   r   r   agent_idcustomer_id>   agentssessions	customersdaily_total_usdzMissing keys: z}
>assert %(py9)s
{%(py9)s = %(py2)s
{%(py2)s = %(py0)s.issubset
}(%(py7)s
{%(py7)s = %(py5)s
{%(py5)s = %(py3)s.keys
}()
})
}required_keyssummary)r   rC   r   r   r#   r   r   z5assert %(py5)s
{%(py5)s = %(py0)s(%(py2)s, %(py3)s)
}rI   float)r   rC   r   r   r   dictr   r   r    )z%(py1)s in %(py4)sr   rK   assert %(py6)srL   )r   rJ   r   get_cost_summaryissubsetkeysr%   r&   r'   r(   r)   r*   r+   r,   rI   r   r   r-   )r.   r   r   r   r   rD   r1   rV   r   r   r2   r3   rE   r0   ra   rT   s                   r5   "test_summary_has_all_required_keysz6TestBB5_CostSummary.test_summary_has_all_required_keys   sx   s8l+B'CD  	 	
 **,N%% 	
gll 	
ln 	
%n5 	
5 	
  ]W\\^;<=	
 	
	6	
 	
   	
 	
 		  	
 	
 		 & 	
 	
	6	
 	
  '. 	
 	
 		 '. 	
 	
 		 '3 	
 	
 		 '5 	
 	
 		 6 	
 	
 	
 	
 	
 	
 ""34<z4e<<<<<<<<z<<<z<<<4<<<<<<e<<<e<<<<<<<<<<!*-4z-t44444444z444z444-444444t444t4444444444!(+2z+T22222222z222z222+222222T222T2222222222!+.5z.55555555z555z555.5555555555555555555*wz**t*****t****t***********(wx((t(((((t((((t(((((((((((+w{++t+++++t++++t+++++++++++r7   N)rc   rd   re   rf   r   rg   r7   r5   r   r      s
    K,r7   r   c                      e Zd ZdZd Zd Zy)TestBB6_CostLogFilez<BB6: record() appends valid JSONL entries to the audit file.c                   t        |dz  dz        }t        |      }|j                  dddd       t        j                  }|j
                  } ||      }|s t        j                  d	      d
z   dt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }t        t        j                  |            dx}x}}t        |d      5 }|j                         }	ddd       t!        	      }
d}|
|k(  }|st        j"                  d|fd|
|f      dt        j                         v st        j                  t               rt        j                  t               nddt        j                         v st        j                  |	      rt        j                  |	      ndt        j                  |
      t        j                  |      dz  }dd|iz  }t        t        j                  |            dx}
x}}t%        j&                  |	d         }|d   }d}||k(  }
|
slt        j"                  d|
fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            dx}x}
}|d   }d}||k(  }
|
slt        j"                  d|
fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            dx}x}
}|d    }d}||k(  }
|
slt        j"                  d|
fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            dx}x}
}d!}||v }
|
st        j"                  d"|
fd#||f      t        j                  |      d$t        j                         v st        j                  |      rt        j                  |      nd$d%z  }d&d'|iz  }t        t        j                  |            dx}}
d(}||v }
|
st        j"                  d"|
fd#||f      t        j                  |      d$t        j                         v st        j                  |      rt        j                  |      nd$d%z  }d&d'|iz  }t        t        j                  |            dx}}
y# 1 sw Y   xY w))zAA log file is created on first record() and contains valid JSONL.subdirr   r   r     i  zs-log-1r   zLog file must be createdzd
>assert %(py7)s
{%(py7)s = %(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.path
}.isfile
}(%(py5)s)
}osr   )r   rC   rK   r   r#   Nutf-8encoding   r?   z0%(py3)s
{%(py3)s = %(py0)s(%(py1)s)
} == %(py6)srO   linesr   r   r   rL   assert %(py8)srP   r   r   z%(py1)s == %(py4)sr   r   rL   r   r   	timestampr    z%(py1)s in %(py3)sentryr   r   r   r   cost_usd)rJ   r   r   r   pathisfiler%   r&   r'   r(   r)   r*   r+   r,   open	readlinesrO   r-   jsonloads)r.   r   r   r   rD   rE   rV   r4   fhr   r0   rS   r1   rT   rW   r   r3   ra   r   r2   s                       r5   %test_log_file_created_and_valid_jsonlz9TestBB6_CostLogFile.test_log_file_created_and_valid_jsonl   sG   x(*\9:x0  	 	 	
 wwCw~~C~h'C'CC)CCCCCCCrCCCrCCCwCCC~CCCCCChCCChCCC'CCCCCC(W- 	#LLNE	#5zQzQzQss55zQ

58$W~//~////~///~//////////^$--$----$---$----------\"/i/"i////"i///"///i///////#{e####{e###{######e###e#######"zU""""zU"""z""""""U"""U"""""""	# 	#s   -U99Vc                L   t        |dz        }t        |      }t        d      D ]  }|j                  d|dz  dd|         t	        |d	
      5 }|j                         }ddd       t              }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}	}|D ]  }t        j                   |        y# 1 sw Y   7xY w)z2Each record() call appends exactly one JSONL line.r   r      r   r   d   zsess-r   r   r   Nr?   r   rO   r   r   r   rP   )rJ   r   ranger   r   r   rO   r%   r-   r'   r(   r)   r*   r+   r,   r   r   )r.   r   r   r   ir   r   r0   rS   r1   rT   rW   lines                r5   ,test_multiple_records_produce_multiple_linesz@TestBB6_CostLogFile.test_multiple_records_produce_multiple_lines  s)   x,./x0q 	ANN$Y!"1#;	  	 (W- 	#LLNE	#5zQzQzQss55zQ 	DJJt			# 	#s   FF#N)rc   rd   re   rf   r   r   rg   r7   r5   r   r      s    F#*r7   r   c                      e Zd ZdZd Zd Zy)TestWB1_TracerInitWithoutKeysz6WB1: Internal _client stays None when keys are absent.c                   t        d d       }|j                  }d }||u }|st        j                  d|fd||f      dt	        j
                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            d x}x}}y )Nr   rr   z/%(py2)s
{%(py2)s = %(py0)s._client
} is %(py5)sr/   rB   r"   r#   
r
   r_   r%   r-   r'   r(   r)   r*   r+   r,   r.   r/   rD   r1   rE   r2   r4   s          r5   test_no_keys_leaves_client_nonez=TestWB1_TracerInitWithoutKeys.test_no_keys_leaves_client_none&  s{    $4@~~%%~%%%%~%%%%%%v%%%v%%%~%%%%%%%%%%r7   c                   t        ddd      }|j                  }d }||u }|st        j                  d|fd||f      dt	        j
                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }d	d
|iz  }t        t        j                  |            d x}x}}y )Npk_fakesk_fakeF)r   r   r:   rr   r   r/   rB   r"   r#   r   r   s          r5   %test_disabled_flag_leaves_client_nonezCTestWB1_TracerInitWithoutKeys.test_disabled_flag_leaves_client_none*  s      
 ~~%%~%%%%~%%%%%%v%%%v%%%~%%%%%%%%%%r7   N)rc   rd   re   rf   r   r   rg   r7   r5   r   r   #  s    @&&r7   r   c                      e Zd ZdZej
                  j                  d        Zej
                  j                  d        Zy)TestWB2_TracedDecoratorzDWB2: @traced creates a trace and spans on success and failure paths.c                @  K   t               }t               }d|_        ||j                  _        t	        d      dd       }t        d|      5   |d       d{   }ddd       d}|k(  }|st        j                  d	|fd
||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}|j                  j                          |j                  j                  }	g }|	d   d   }d}
||
k(  }|}|s|	d   d   }d}||k(  }|}|st        j                  d	|fd||
f      t        j                  |      t        j                  |
      dz  }dd|iz  }|j!                  |       |s_t        j                  d	fdf      t        j                  |      t        j                  |      dz  }dd|iz  }|j!                  |       t        j"                  |d      i z  }dd|iz  }t        t        j                  |            dx}x}x}x}x}
x}x}}|j$                  j                          y7 4# 1 sw Y   4xY ww)z@Decorator must call tracer.trace() and tracer.span() on success.z	trace-123r   c                   K   | dz   S w)Nr   rg   )rp   s    r5   rk   zRTestWB2_TracedDecorator.test_traced_calls_trace_and_span_on_success.<locals>.my_opD  s     q5Ls   (core.observability.decorators.get_tracerreturn_value   N   r?   r   r   r   r   r   r   rq   r   )z%(py3)s == %(py6)s)r   rL   z%(py8)srP   )z%(py11)s == %(py14)s)r   py14z%(py16)spy16assert %(py19)spy19)rp   intreturnr  )r   r   r$   r   r   r   r%   r-   r'   r(   r)   r*   r+   r,   assert_called_once	call_argsappend_format_booloprw   )r.   mock_tracer
mock_tracerk   r   r0   rD   r   r2   call_kwargsrS   r1   r3   @py_assert10@py_assert13@py_assert12rT   rW   @py_format15@py_format17@py_format18@py_format20s                         r5   +test_traced_calls_trace_and_span_on_successzCTestWB2_TracedDecorator.test_traced_calls_trace_and_span_on_success<  s      k[
#
)3&				 
	 =KX 	$ 8^F	$ v{vvv,,.!''11T{1~f%TT%2Tk!nQ6GT9T6G96TTTT%TTT%TTTTTTTTTT6G9TTT6GTTT9TTTTTTTTTTTTTT++- $	$ 	$s1   AJJJJH/JJJJc                  K   t               }t               }d|_        ||j                  _        t	        d      dd       }t        d|      5  t        j                  t        d      5   |        d{    ddd       ddd       |j                  j                          |j                  j                  d	   }|j                  }d
}i } |||      }|j                  }	d}
 |	|
      }d}||k(  }|s%t        j                  d|fd||f      dt        j                          v st        j"                  |      rt        j$                  |      ndt        j$                  |      t        j$                  |      t        j$                  |      t        j$                  |      t        j$                  |	      t        j$                  |
      t        j$                  |      t        j$                  |      d	z  }dd|iz  }t'        t        j(                  |            dx}x}x}x}x}	x}
x}x}}y7 # 1 sw Y   xY w# 1 sw Y   xY ww)z?Decorator must record an error span and re-raise the exception.z	trace-errr[   c                     K   t        d      w)Noops)
ValueErrorrg   r7   r5   	broken_opzVTestWB2_TracedDecorator.test_traced_records_error_span_on_exception.<locals>.broken_opY  s     V$$s   r   r   r  )matchNr   r>   rv   errorr?   )z%(py14)s
{%(py14)s = %(py10)s
{%(py10)s = %(py8)s
{%(py8)s = %(py2)s
{%(py2)s = %(py0)s.get
}(%(py4)s, %(py6)s)
}.get
}(%(py12)s)
} == %(py17)sspan_kwargs)	r   rC   rK   rL   rP   rR   py12r  py17r  r  )r  None)r   r   r$   r   r   r   pytestraisesr  rw   r	  r
  getr%   r-   r'   r(   r)   r*   r+   r,   )r.   r  r  r  r  rD   rE   rS   rU   @py_assert9@py_assert11r  @py_assert16@py_assert15r  r  s                   r5   +test_traced_records_error_span_on_exceptionzCTestWB2_TracedDecorator.test_traced_records_error_span_on_exceptionQ  s      k[
#
)3&			% 
	% =KX 	"z8 "k!!"	" 	++-!&&003GzG2Gz2.G.22G8G28<GG<GGGG<GGGGGG{GGG{GGGGGGzGGG2GGG.GGG2GGG8GGG<GGGGGGGGGGGG	 "" "	" 	"sI   AIH>*H15H.6H1:H>F,I.H11H;	6H>>IIN)	rc   rd   re   rf   r#  markasyncior  r*  rg   r7   r5   r   r   9  sA    N[[. .( [[H Hr7   r   c                      e Zd ZdZej
                  j                  d        Zej
                  j                  d        Zy)"TestWB3_GenerationTrackedDecoratorzMWB3: @generation_tracked calls tracer.generation when result has 'model' key.c                T  K   t               }t               }d|_        ||j                  _        t        dd       }t        d|      5   |        d{   }ddd       d   }d}||k(  }|slt        j                  d|fd	||f      t        j                  |      t        j                  |      d
z  }dd|iz  }	t        t        j                  |	            dx}x}}|j                  j                          |j                  j                  d   }
|
d   }d}||k(  }|slt        j                  d|fd	||f      t        j                  |      t        j                  |      d
z  }dd|iz  }	t        t        j                  |	            dx}x}}|
d   }ddd}||k(  }|slt        j                  d|fd	||f      t        j                  |      t        j                  |      d
z  }dd|iz  }	t        t        j                  |	            dx}x}}y7 # 1 sw Y   xY ww)zIWhen decorated fn returns dict with 'model', tracer.generation is called.zgen-trace-1c                    K   dddddddS w)Nr   HellozHi therer  r   inputoutput)r   prompt
completionusagerg   rg   r7   r5   my_llm_callzlTestWB3_GenerationTrackedDecorator.test_generation_tracked_records_when_dict_with_model.<locals>.my_llm_callv  s#      (!(#$2	 s   r   r   Nr   r   r?   r   r   r   rL   r   r7  r  r   r2  )r  r   )r   r   r$   r   r   r   r%   r-   r*   r+   r,   rx   r	  r
  )r.   r  r  r8  r   r3   rE   r0   ra   rT   
gen_kwargss              r5   4test_generation_tracked_records_when_dict_with_modelzWTestWB3_GenerationTrackedDecorator.test_generation_tracked_records_when_dict_with_modeln  s      k[
%
)3&		 
	 =KX 	)&=(F	) g0.0.0000.000000.0000000113 ++55a8
'"4n4"n4444"n444"444n4444444'"?Q&??"&?????"&????"???&???????? )	) 	)s1   AH(	HHHF?H(HH% H(c                d  K   t               }t               }d|_        ||j                  _        t        dd       }t        d|      5   |        d{   }ddd       d}|k(  }|st        j                  d|fd||f      d	t        j                         v st        j                  |      rt        j                  |      nd	t        j                  |      d
z  }dd|iz  }t        t        j                  |            dx}}|j                  j                          y7 # 1 sw Y   xY ww)zAWhen decorated fn returns a non-dict, generation is NOT recorded.zgen-trace-2c                    K   yw)Nplain stringrg   rg   r7   r5   plain_fnzgTestWB3_GenerationTrackedDecorator.test_generation_tracked_skips_when_result_not_dict.<locals>.plain_fn  s	     !s   r   r   Nr=  r?   r   r   r   r   r   )r  rJ   )r   r   r$   r   r   r   r%   r-   r'   r(   r)   r*   r+   r,   rx   assert_not_called)	r.   r  r  r>  r   r0   rD   r   r2   s	            r5   2test_generation_tracked_skips_when_result_not_dictzUTestWB3_GenerationTrackedDecorator.test_generation_tracked_skips_when_result_not_dict  s       k[
%
)3&		" 
	" =KX 	&#:%F	& ('v''''v''''''v'''v''''''''''002 &	& 	&s1   AD0	D$D"D$C	D0"D$$D-)D0N)	rc   rd   re   rf   r#  r+  r,  r:  r@  rg   r7   r5   r.  r.  k  sA    W[[@ @2 [[3 3r7   r.  c                  $    e Zd ZdZg dZd Zd Zy) TestWB4_ModelPricingCompletenessz5WB4: MODEL_PRICING covers the core Genesis model set.)r   zclaude-sonnet-4-6r   z
gemini-prozgemini-2.5-flashc                   | j                   D ]  }|t        v }|st        j                  d|fd|t        f      dt	        j
                         v st        j                  |      rt        j                  |      nddt	        j
                         v st        j                  t              rt        j                  t              nddz  }t        j                  d| d      dz   d	|iz  }t        t        j                  |            d } y )
Nr    )z%(py0)s in %(py2)sr   r   )r   rC   z$Missing pricing for required model ''z
>assert %(py4)srK   )REQUIRED_MODELSr   r%   r-   r'   r(   r)   r*   r&   r+   r,   )r.   r   rD   @py_format3ra   s        r5   test_required_models_presentz=TestWB4_ModelPricingCompleteness.test_required_models_present  s    )) 	EM)  5M  v     I   v   *  I *    7ugQ?    	r7   c                t   t        j                         D ]   \  }}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  d| d      dz   d	|iz  }t        t        j                  |            d x}}d
}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  d| d      dz   d	|iz  }t        t        j                  |            d x}}|d   }t        t        f}t        ||      }	|	sddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      t        j                  |      t        j                  |	      dz  }
t        t        j                  |
            d x}x}}	|d
   }t        t        f}t        ||      }	|	sddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      t        j                  |      t        j                  |	      dz  }
t        t        j                  |
            d x}x}}	|d   }d}||k\  }|slt        j                  d|fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }
t        t        j                  |
            d x}x}}|d
   }d}||k\  }|slt        j                  d|fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }
t        t        j                  |
            d x}x}}# y )Nr3  r    r   pricingr   rD  z' missing 'input' keyz
>assert %(py5)sr   r4  z' missing 'output' keyz5assert %(py6)s
{%(py6)s = %(py0)s(%(py2)s, %(py4)s)
}rI   )r   rC   rK   rL   r   )>=)z%(py1)s >= %(py4)sr   r   rL   )r   itemsr%   r-   r*   r'   r(   r)   r&   r+   r,   r  r   rI   )r.   r   rI  r3   r0   r   r2   rD   rE   rS   rT   ra   s               r5   +test_all_entries_have_input_and_output_keyszLTestWB4_ModelPricingCompleteness.test_all_entries_have_input_and_output_keys  s   +113 	*NE7G7g%GGG7gGGG7GGGGGGgGGGgGGGG5'1F'GGGGGGGI8w&III8wIII8IIIIIIwIIIwIIII!E72H(IIIIIII%g.=e=:.========:===:===.=============%h/>#u>:/>>>>>>>>:>>>:>>>/>>>>>>>>>>>>>7#(q(#q((((#q(((#(((q(((((((8$))$))))$)))$))))))))))	*r7   N)rc   rd   re   rf   rE  rG  rL  rg   r7   r5   rB  rB    s    ?O*r7   rB  c                  "    e Zd ZdZd Zd Zd Zy)TestWB5_CostCalculationAccuracyz4WB5: Per-token cost arithmetic is precisely correct.c                J   t        t        |dz              }|j                  ddd      }d}||z
  }t        |      }d}||k  }|sSt	        j
                  d	|fd
||f      dt        j                         v st	        j                  t              rt	        j                  t              nddt        j                         v st	        j                  |      rt	        j                  |      nddt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }	t	        j                  d| d|       dz   d|	iz  }
t        t	        j                  |
            dx}x}x}}y)z
        1000 input tokens with claude-opus-4-6 @ $15/1M input, 0 output.
        Expected: 1000 / 1_000_000 * 15.0 = $0.000015.
        r   r   r   r   r   r   gQ?g&.>r   r   r   r   r   r   	Expected , got 
>assert %(py10)srR   Nr   rJ   r   r   r%   r-   r'   r(   r)   r*   r&   r+   r,   r.   r   r   r   r   rE   r1   rU   rV   rW   rX   s              r5   test_1000_opus_input_tokensz;TestWB5_CostCalculationAccuracy.test_1000_opus_input_tokens  s   
 s8l+B'CD~~#  

 ,(?Ns?#NdN#d*NNN#dNNNNNNsNNNsNNNNNN4NNN4NNNNNN(NNN(NNN#NNNdNNNizv,NNNNNNNNr7   c                J   t        t        |dz              }|j                  ddd      }d}||z
  }t        |      }d}||k  }|sSt	        j
                  d	|fd
||f      dt        j                         v st	        j                  t              rt	        j                  t              nddt        j                         v st	        j                  |      rt	        j                  |      nddt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }	t	        j                  d| d|       dz   d|	iz  }
t        t	        j                  |
            dx}x}x}}y)z
        1000 output tokens with gemini-flash @ $0.30/1M output, 0 input.
        Expected: 1000 / 1_000_000 * 0.30 = $0.0000003.
        r   r   r   r   r   r   ga2U0*3?g|=r   r   r   r   r   r   rP  rQ  rR  rR   NrS  rT  s              r5   $test_1000_gemini_flash_output_tokenszDTestWB5_CostCalculationAccuracy.test_1000_gemini_flash_output_tokens  s   
 s8l+B'CD~~   

 ,(?Os?#OeO#e+OOO#eOOOOOOsOOOsOOOOOO4OOO4OOOOOO(OOO(OOO#OOOeOOOy
&-OOOOOOOOr7   c                   t        t        |dz              }|j                  ddd      }d}||z
  }t        |      }d}||k  }|s7t	        j
                  d	|fd
||f      dt        j                         v st	        j                  t              rt	        j                  t              nddt        j                         v st	        j                  |      rt	        j                  |      nddt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }	dd|	iz  }
t        t	        j                  |
            dx}x}x}}y)z
        gemini-flash: 500K input + 200K output.
        Expected: (0.5 * 0.075) + (0.2 * 0.30) = $0.0375 + $0.06 = $0.0975.
        r   r   r   i  r   r   g(\?g:0yE>r   r   r   r   r   r   rQ   rR   Nr   rT  s              r5   test_mixed_tokens_both_modelsz=TestWB5_CostCalculationAccuracy.test_mixed_tokens_both_models  s    
 s8l+B'CD~~  !  

 P(?*s?#*d*#d****#d******s***s******4***4******(***(***#***d*******r7   N)rc   rd   re   rf   rU  rW  rY  rg   r7   r5   rN  rN    s    >OP+r7   rN  c                      e Zd ZdZd Zy)TestPackageImportzEVerify that all __all__ exports are importable from the package root.c                   ddl m}m}m}m}m} d }||u}|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }dd|iz  }	t        t        j                  |	            d x}}d }||u}|st        j                  d|fd||f      d	t        j                         v st        j                  |      rt        j                  |      nd	t        j                  |      dz  }dd|iz  }	t        t        j                  |	            d x}}d }||u}|st        j                  d|fd||f      d
t        j                         v st        j                  |      rt        j                  |      nd
t        j                  |      dz  }dd|iz  }	t        t        j                  |	            d x}}d }||u}|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }dd|iz  }	t        t        j                  |	            d x}}d }||u}|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }dd|iz  }	t        t        j                  |	            d x}}y )Nr   )r   r
   r   r   r   )is not)z%(py0)s is not %(py3)sr
   r   r   r   r   r   r   r   )core.observabilityr   r
   r   r   r   r%   r-   r'   r(   r)   r*   r+   r,   )
r.   r   r
   r   r   r   r0   rD   r   r2   s
             r5    test_package_exports_all_symbolsz2TestPackageImport.test_package_exports_all_symbols  s   	
 	
 %)(}D((((}D((((((}(((}(((D(((((((!%%z%%%%z%%%%%%z%%%z%%%%%%%%%%!!vT!!!!vT!!!!!!v!!!v!!!T!!!!!!!)--!----!------!---!----------"&&{$&&&&{$&&&&&&{&&&{&&&$&&&&&&&r7   N)rc   rd   re   rf   r_  rg   r7   r5   r[  r[    s
    O'r7   r[  )4rf   
__future__r   builtinsr'   _pytest.assertion.rewrite	assertionrewriter%   r,  r   r   syspathlibr   typingr   unittest.mockr   r   r   r#  __file__resolveparents
_REPO_ROOTrJ   r   insert"core.observability.langfuse_clientr
   r   r   core.observability.cost_trackerr   r   r   core.observability.decoratorsr   r   r   ri   r}   r   r   r   r   r   r.  rB  rN  r[  rg   r7   r5   <module>rq     s  < #     	 
   5 5 
 (^##%--a0
z?#(("HHOOAs:' T T 
 E!. !.R ,* *d= =D, ,@( (`& &,*H *Hd-3 -3j* *@++ ++f' 'r7   