4
4
5
5
import pytest
6
6
7
- from codegate .api . v1_models import PartialQuestions
7
+ from codegate .api import v1_models
8
8
from codegate .api .v1_processing import (
9
9
_get_partial_question_answer ,
10
10
_group_partial_messages ,
11
11
_is_system_prompt ,
12
12
parse_output ,
13
13
parse_request ,
14
+ remove_duplicate_alerts ,
14
15
)
15
16
from codegate .db .models import GetPromptWithOutputsRow
16
17
@@ -193,14 +194,14 @@ async def test_get_question_answer(request_msg_list, output_msg_str, row):
193
194
# 1) No subsets: all items stand alone
194
195
(
195
196
[
196
- PartialQuestions (
197
+ v1_models . PartialQuestions (
197
198
messages = ["A" ],
198
199
timestamp = datetime .datetime (2023 , 1 , 1 , 0 , 0 , 0 ),
199
200
message_id = "pq1" ,
200
201
provider = "providerA" ,
201
202
type = "chat" ,
202
203
),
203
- PartialQuestions (
204
+ v1_models . PartialQuestions (
204
205
messages = ["B" ],
205
206
timestamp = datetime .datetime (2023 , 1 , 1 , 0 , 0 , 1 ),
206
207
message_id = "pq2" ,
@@ -214,14 +215,14 @@ async def test_get_question_answer(request_msg_list, output_msg_str, row):
214
215
# - "Hello" is a subset of "Hello, how are you?"
215
216
(
216
217
[
217
- PartialQuestions (
218
+ v1_models . PartialQuestions (
218
219
messages = ["Hello" ],
219
220
timestamp = datetime .datetime (2022 , 1 , 1 , 0 , 0 , 0 ),
220
221
message_id = "pq1" ,
221
222
provider = "providerA" ,
222
223
type = "chat" ,
223
224
),
224
- PartialQuestions (
225
+ v1_models . PartialQuestions (
225
226
messages = ["Hello" , "How are you?" ],
226
227
timestamp = datetime .datetime (2022 , 1 , 1 , 0 , 0 , 10 ),
227
228
message_id = "pq2" ,
@@ -238,28 +239,28 @@ async def test_get_question_answer(request_msg_list, output_msg_str, row):
238
239
# superset.
239
240
(
240
241
[
241
- PartialQuestions (
242
+ v1_models . PartialQuestions (
242
243
messages = ["Hello" ],
243
244
timestamp = datetime .datetime (2023 , 1 , 1 , 10 , 0 , 0 ),
244
245
message_id = "pq1" ,
245
246
provider = "providerA" ,
246
247
type = "chat" ,
247
248
),
248
- PartialQuestions (
249
+ v1_models . PartialQuestions (
249
250
messages = ["Hello" ],
250
251
timestamp = datetime .datetime (2023 , 1 , 1 , 11 , 0 , 0 ),
251
252
message_id = "pq2" ,
252
253
provider = "providerA" ,
253
254
type = "chat" ,
254
255
),
255
- PartialQuestions (
256
+ v1_models . PartialQuestions (
256
257
messages = ["Hello" ],
257
258
timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 0 ),
258
259
message_id = "pq3" ,
259
260
provider = "providerA" ,
260
261
type = "chat" ,
261
262
),
262
- PartialQuestions (
263
+ v1_models . PartialQuestions (
263
264
messages = ["Hello" , "Bye" ],
264
265
timestamp = datetime .datetime (2023 , 1 , 1 , 11 , 0 , 5 ),
265
266
message_id = "pq4" ,
@@ -281,68 +282,68 @@ async def test_get_question_answer(request_msg_list, output_msg_str, row):
281
282
(
282
283
[
283
284
# Superset
284
- PartialQuestions (
285
+ v1_models . PartialQuestions (
285
286
messages = ["hi" , "welcome" , "bye" ],
286
287
timestamp = datetime .datetime (2023 , 5 , 1 , 9 , 0 , 0 ),
287
288
message_id = "pqS1" ,
288
289
provider = "providerB" ,
289
290
type = "chat" ,
290
291
),
291
292
# Subsets for pqS1
292
- PartialQuestions (
293
+ v1_models . PartialQuestions (
293
294
messages = ["hi" , "welcome" ],
294
295
timestamp = datetime .datetime (2023 , 5 , 1 , 9 , 0 , 5 ),
295
296
message_id = "pqA1" ,
296
297
provider = "providerB" ,
297
298
type = "chat" ,
298
299
),
299
- PartialQuestions (
300
+ v1_models . PartialQuestions (
300
301
messages = ["hi" , "bye" ],
301
302
timestamp = datetime .datetime (2023 , 5 , 1 , 9 , 0 , 10 ),
302
303
message_id = "pqA2" ,
303
304
provider = "providerB" ,
304
305
type = "chat" ,
305
306
),
306
- PartialQuestions (
307
+ v1_models . PartialQuestions (
307
308
messages = ["hi" , "bye" ],
308
309
timestamp = datetime .datetime (2023 , 5 , 1 , 9 , 0 , 12 ),
309
310
message_id = "pqA3" ,
310
311
provider = "providerB" ,
311
312
type = "chat" ,
312
313
),
313
314
# Another superset
314
- PartialQuestions (
315
+ v1_models . PartialQuestions (
315
316
messages = ["apple" , "banana" , "cherry" ],
316
317
timestamp = datetime .datetime (2023 , 5 , 2 , 10 , 0 , 0 ),
317
318
message_id = "pqS2" ,
318
319
provider = "providerB" ,
319
320
type = "chat" ,
320
321
),
321
322
# Subsets for pqS2
322
- PartialQuestions (
323
+ v1_models . PartialQuestions (
323
324
messages = ["banana" ],
324
325
timestamp = datetime .datetime (2023 , 5 , 2 , 10 , 0 , 1 ),
325
326
message_id = "pqB1" ,
326
327
provider = "providerB" ,
327
328
type = "chat" ,
328
329
),
329
- PartialQuestions (
330
+ v1_models . PartialQuestions (
330
331
messages = ["apple" , "banana" ],
331
332
timestamp = datetime .datetime (2023 , 5 , 2 , 10 , 0 , 3 ),
332
333
message_id = "pqB2" ,
333
334
provider = "providerB" ,
334
335
type = "chat" ,
335
336
),
336
337
# Another item alone, not a subset nor superset
337
- PartialQuestions (
338
+ v1_models . PartialQuestions (
338
339
messages = ["xyz" ],
339
340
timestamp = datetime .datetime (2023 , 5 , 3 , 8 , 0 , 0 ),
340
341
message_id = "pqC1" ,
341
342
provider = "providerB" ,
342
343
type = "chat" ,
343
344
),
344
345
# Different provider => should remain separate
345
- PartialQuestions (
346
+ v1_models . PartialQuestions (
346
347
messages = ["hi" , "welcome" ],
347
348
timestamp = datetime .datetime (2023 , 5 , 1 , 9 , 0 , 10 ),
348
349
message_id = "pqProvDiff" ,
@@ -394,7 +395,7 @@ def test_group_partial_messages(pq_list, expected_group_ids):
394
395
# Execute
395
396
grouped = _group_partial_messages (pq_list )
396
397
397
- # Convert from list[list[PartialQuestions]] -> list[list[str]]
398
+ # Convert from list[list[v1_models. PartialQuestions]] -> list[list[str]]
398
399
# so we can compare with expected_group_ids easily.
399
400
grouped_ids = [[pq .message_id for pq in group ] for group in grouped ]
400
401
@@ -406,3 +407,125 @@ def test_group_partial_messages(pq_list, expected_group_ids):
406
407
is_matched = True
407
408
break
408
409
assert is_matched
410
+
411
+
412
+ @pytest .mark .asyncio
413
+ @pytest .mark .parametrize (
414
+ "alerts,expected_count,expected_ids" ,
415
+ [
416
+ # Test Case 1: Non-secret alerts pass through unchanged
417
+ (
418
+ [
419
+ v1_models .Alert (
420
+ id = "1" ,
421
+ prompt_id = "p1" ,
422
+ code_snippet = None ,
423
+ trigger_string = "test1" ,
424
+ trigger_type = "other-alert" ,
425
+ trigger_category = "info" ,
426
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 0 ),
427
+ ),
428
+ v1_models .Alert (
429
+ id = "2" ,
430
+ prompt_id = "p2" ,
431
+ code_snippet = None ,
432
+ trigger_string = "test2" ,
433
+ trigger_type = "other-alert" ,
434
+ trigger_category = "info" ,
435
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 1 ),
436
+ ),
437
+ ],
438
+ 2 , # Expected count
439
+ ["1" , "2" ], # Expected IDs preserved
440
+ ),
441
+ # Test Case 2: Duplicate secrets within 5 seconds - keep newer only
442
+ (
443
+ [
444
+ v1_models .Alert (
445
+ id = "1" ,
446
+ prompt_id = "p1" ,
447
+ code_snippet = None ,
448
+ trigger_string = "secret1 Context xyz" ,
449
+ trigger_type = "codegate-secrets" ,
450
+ trigger_category = "critical" ,
451
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 0 ),
452
+ ),
453
+ v1_models .Alert (
454
+ id = "2" ,
455
+ prompt_id = "p2" ,
456
+ code_snippet = None ,
457
+ trigger_string = "secret1 Context abc" ,
458
+ trigger_type = "codegate-secrets" ,
459
+ trigger_category = "critical" ,
460
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 3 ),
461
+ ),
462
+ ],
463
+ 1 , # Expected count
464
+ ["2" ], # Only newer alert ID
465
+ ),
466
+ # Test Case 3: Similar secrets beyond 5 seconds - keep both
467
+ (
468
+ [
469
+ v1_models .Alert (
470
+ id = "1" ,
471
+ prompt_id = "p1" ,
472
+ code_snippet = None ,
473
+ trigger_string = "secret1 Context xyz" ,
474
+ trigger_type = "codegate-secrets" ,
475
+ trigger_category = "critical" ,
476
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 0 ),
477
+ ),
478
+ v1_models .Alert (
479
+ id = "2" ,
480
+ prompt_id = "p2" ,
481
+ code_snippet = None ,
482
+ trigger_string = "secret1 Context abc" ,
483
+ trigger_type = "codegate-secrets" ,
484
+ trigger_category = "critical" ,
485
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 6 ),
486
+ ),
487
+ ],
488
+ 2 , # Expected count
489
+ ["1" , "2" ], # Both alerts preserved
490
+ ),
491
+ # Test Case 4: Mix of secret and non-secret alerts
492
+ (
493
+ [
494
+ v1_models .Alert (
495
+ id = "1" ,
496
+ prompt_id = "p1" ,
497
+ code_snippet = None ,
498
+ trigger_string = "secret1 Context xyz" ,
499
+ trigger_type = "codegate-secrets" ,
500
+ trigger_category = "critical" ,
501
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 0 ),
502
+ ),
503
+ v1_models .Alert (
504
+ id = "2" ,
505
+ prompt_id = "p2" ,
506
+ code_snippet = None ,
507
+ trigger_string = "non-secret alert" ,
508
+ trigger_type = "other-alert" ,
509
+ trigger_category = "info" ,
510
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 1 ),
511
+ ),
512
+ v1_models .Alert (
513
+ id = "3" ,
514
+ prompt_id = "p3" ,
515
+ code_snippet = None ,
516
+ trigger_string = "secret1 Context abc" ,
517
+ trigger_type = "codegate-secrets" ,
518
+ trigger_category = "critical" ,
519
+ timestamp = datetime .datetime (2023 , 1 , 1 , 12 , 0 , 3 ),
520
+ ),
521
+ ],
522
+ 2 , # Expected count
523
+ ["2" , "3" ], # Non-secret alert and newest secret alert
524
+ ),
525
+ ],
526
+ )
527
+ async def test_remove_duplicate_alerts (alerts , expected_count , expected_ids ):
528
+ result = await remove_duplicate_alerts (alerts )
529
+ assert len (result ) == expected_count
530
+ result_ids = [alert .id for alert in result ]
531
+ assert sorted (result_ids ) == sorted (expected_ids )
0 commit comments