@@ -167,36 +167,34 @@ def test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys(ragas, L
167
167
168
168
169
169
@pytest .mark .vcr_logs
170
- def test_ragas_faithfulness_emits_traces (ragas , LLMObs ):
171
- rf_evaluator = RagasFaithfulnessEvaluator (LLMObs )
170
+ def test_ragas_faithfulness_emits_traces (ragas , llmobs , llmobs_events ):
171
+ """Why are we asserting only 7 spans caught?"""
172
+ rf_evaluator = RagasFaithfulnessEvaluator (llmobs )
172
173
rf_evaluator .evaluate (_llm_span_with_expected_ragas_inputs_in_prompt ())
173
- assert rf_evaluator .llmobs_service ._instance ._llmobs_span_writer .enqueue .call_count == 7
174
- calls = rf_evaluator .llmobs_service ._instance ._llmobs_span_writer .enqueue .call_args_list
175
-
176
- spans = [call [0 ][0 ] for call in calls ]
177
-
174
+ ragas_spans = [event for event in llmobs_events if event ["name" ].startswith ("dd-ragas." )]
175
+ ragas_spans = sorted (ragas_spans , key = lambda d : d ["start_ns" ])
176
+ assert len (ragas_spans ) == 7
178
177
# check name, io, span kinds match
179
- assert spans == _expected_ragas_spans ()
178
+ assert ragas_spans == _expected_ragas_spans ()
180
179
181
180
# verify the trace structure
182
- root_span = spans [0 ]
181
+ root_span = ragas_spans [0 ]
183
182
root_span_id = root_span ["span_id" ]
184
183
assert root_span ["parent_id" ] == "undefined"
185
184
assert root_span ["meta" ] is not None
186
185
assert root_span ["meta" ]["metadata" ] is not None
187
186
assert isinstance (root_span ["meta" ]["metadata" ]["faithfulness_list" ], list )
188
187
assert isinstance (root_span ["meta" ]["metadata" ]["statements" ], list )
189
188
root_span_trace_id = root_span ["trace_id" ]
190
- for child_span in spans [1 :]:
189
+ for child_span in ragas_spans [1 :]:
191
190
assert child_span ["trace_id" ] == root_span_trace_id
192
191
193
- assert spans [1 ]["parent_id" ] == root_span_id # input extraction (task)
194
- assert spans [2 ]["parent_id" ] == root_span_id # create statements (workflow)
195
- assert spans [4 ]["parent_id" ] == root_span_id # create verdicts (workflow)
196
- assert spans [6 ]["parent_id" ] == root_span_id # create score (task)
197
-
198
- assert spans [3 ]["parent_id" ] == spans [2 ]["span_id" ] # create statements prompt (task)
199
- assert spans [5 ]["parent_id" ] == spans [4 ]["span_id" ] # create verdicts prompt (task)
192
+ assert ragas_spans [1 ]["parent_id" ] == root_span_id # input extraction (task)
193
+ assert ragas_spans [2 ]["parent_id" ] == root_span_id # create statements (workflow)
194
+ assert ragas_spans [4 ]["parent_id" ] == root_span_id # create verdicts (workflow)
195
+ assert ragas_spans [6 ]["parent_id" ] == root_span_id # create score (task)
196
+ assert ragas_spans [3 ]["parent_id" ] == ragas_spans [2 ]["span_id" ] # create statements prompt (task)
197
+ assert ragas_spans [5 ]["parent_id" ] == ragas_spans [4 ]["span_id" ] # create verdicts prompt (task)
200
198
201
199
202
200
def test_llmobs_with_faithfulness_emits_traces_and_evals_on_exit (mock_writer_logs , run_python_code_in_subprocess ):
0 commit comments