@@ -396,7 +396,9 @@ class TestGetBlockData:
396
396
@pytest .fixture
397
397
def prime_index (self , session : Session ):
398
398
person_1 = models .Person ()
399
+ person_2 = models .Person ()
399
400
session .add (person_1 )
401
+ session .add (person_2 )
400
402
session .flush ()
401
403
402
404
data = [
@@ -498,8 +500,36 @@ def prime_index(self, session: Session):
498
500
],
499
501
"birthdate" : "" ,
500
502
},
501
- models . Person () ,
503
+ person_2 ,
502
504
),
505
+ (
506
+ {
507
+ "name" : [
508
+ {
509
+ "given" : [
510
+ "Ferris" ,
511
+ ],
512
+ "family" : "Bueller" ,
513
+ }
514
+ ],
515
+ "birthdate" : "1974-11-07" ,
516
+ },
517
+ person_2
518
+ ),
519
+ (
520
+ {
521
+ "name" : [
522
+ {
523
+ "given" : [
524
+ "Ferris" ,
525
+ ],
526
+ "family" : "Bueller" ,
527
+ }
528
+ ],
529
+ "birthdate" : "1983-08-17" ,
530
+ },
531
+ person_2
532
+ )
503
533
]
504
534
for datum , person in data :
505
535
mpi_service .insert_patient (session , schemas .PIIRecord (** datum ), person = person )
@@ -553,6 +583,30 @@ def test_block_empty_block_key(self, session: Session, prime_index: None):
553
583
matches = mpi_service .get_block_data (session , schemas .PIIRecord (** data ), algorithm_pass )
554
584
assert len (matches ) == 0
555
585
586
+ def test_block_filter_mpi_candidates (self , session : Session , prime_index : None ):
587
+ """
588
+ Tests filtering candidates returned from the MPI for either blocking
589
+ agreement or missing information. Patients who are in pulled clusters
590
+ but have wrong blocking fields should be eliminated from consideration.
591
+ """
592
+ data = {
593
+ "name" : [
594
+ {
595
+ "given" : [
596
+ "Ferris" ,
597
+ ],
598
+ "family" : "Bueller" ,
599
+ }
600
+ ],
601
+ "birthdate" : "1974-11-07" ,
602
+ }
603
+ algorithm_pass = models .AlgorithmPass (blocking_keys = ["BIRTHDATE" , "FIRST_NAME" ])
604
+ # Will initially be 3 patients in this person cluster
605
+ # One agrees on blocking, one has missing values, and one
606
+ # is wrong, so we should throw away that one
607
+ matches = mpi_service .get_block_data (session , schemas .PIIRecord (** data ), algorithm_pass )
608
+ assert len (matches ) == 2
609
+
556
610
def test_block_on_birthdate (self , session : Session , prime_index : None ):
557
611
data = {
558
612
"name" : [
@@ -600,7 +654,8 @@ def test_block_on_first_name(self, session: Session, prime_index: None):
600
654
}
601
655
algorithm_pass = models .AlgorithmPass (blocking_keys = ["FIRST_NAME" ])
602
656
matches = mpi_service .get_block_data (session , schemas .PIIRecord (** data ), algorithm_pass )
603
- assert len (matches ) == 5
657
+ # One candidate in MPI person_1 is a Bill, will be ruled out
658
+ assert len (matches ) == 4
604
659
605
660
def test_block_on_birthdate_and_first_name (self , session : Session , prime_index : None ):
606
661
data = {
@@ -617,7 +672,8 @@ def test_block_on_birthdate_and_first_name(self, session: Session, prime_index:
617
672
}
618
673
algorithm_pass = models .AlgorithmPass (blocking_keys = ["BIRTHDATE" , "FIRST_NAME" ])
619
674
matches = mpi_service .get_block_data (session , schemas .PIIRecord (** data ), algorithm_pass )
620
- assert len (matches ) == 4
675
+ # One candidate in MPI person_1 is just a Bill, ruled out
676
+ assert len (matches ) == 3
621
677
622
678
def test_block_on_birthdate_first_name_and_last_name (self , session : Session , prime_index : None ):
623
679
data = {
@@ -636,7 +692,8 @@ def test_block_on_birthdate_first_name_and_last_name(self, session: Session, pri
636
692
blocking_keys = ["BIRTHDATE" , "FIRST_NAME" , "LAST_NAME" ]
637
693
)
638
694
matches = mpi_service .get_block_data (session , schemas .PIIRecord (** data ), algorithm_pass )
639
- assert len (matches ) == 3
695
+ # One person in MPI person_1 is just a Bill, ruled out
696
+ assert len (matches ) == 2
640
697
data = {
641
698
"name" : [
642
699
{
@@ -649,7 +706,9 @@ def test_block_on_birthdate_first_name_and_last_name(self, session: Session, pri
649
706
"birthdate" : "Jan 1 1980" ,
650
707
}
651
708
matches = mpi_service .get_block_data (session , schemas .PIIRecord (** data ), algorithm_pass )
652
- assert len (matches ) == 3
709
+ # Blocking uses feature_iter, which yields only the first `given` for a
710
+ # single name object, so only the patient with 'Bill' is caught
711
+ assert len (matches ) == 1
653
712
data = {
654
713
"name" : [
655
714
{
@@ -681,7 +740,8 @@ def test_block_on_multiple_names(self, session: Session, prime_index: None):
681
740
kwargs = {},
682
741
)
683
742
matches = mpi_service .get_block_data (session , schemas .PIIRecord (** data ), algorithm_pass )
684
- assert len (matches ) == 5
743
+ # One of patients in MPI person_1 is a Bill, so is excluded
744
+ assert len (matches ) == 4
685
745
686
746
def test_block_missing_keys (self , session : Session , prime_index : None ):
687
747
data = {"birthdate" : "01/01/1980" }
0 commit comments