@@ -1197,15 +1197,8 @@ def test_ingest_mode_merged(tmp_path):
11971197 assert ds .count (regions = ["chrX:9032893-9032893" ]) == 0
11981198
11991199
1200- # Ok to skip is missing bcftools in Windows CI job
1201- @pytest .mark .skipif (
1202- os .environ .get ("CI" ) == "true"
1203- and platform .system () == "Windows"
1204- and shutil .which ("bcftools" ) is None ,
1205- reason = "no bcftools" ,
1206- )
1207- def test_ingest_with_stats_v3 (tmp_path ):
1208- # tiledbvcf.config_logging("debug")
1200+ @pytest .fixture
1201+ def test_stats_bgzipped_inputs (tmp_path ):
12091202 tmp_path_contents = os .listdir (tmp_path )
12101203 if "stats" in tmp_path_contents :
12111204 shutil .rmtree (os .path .join (tmp_path , "stats" ))
@@ -1221,23 +1214,46 @@ def test_ingest_with_stats_v3(tmp_path):
12211214 check = True ,
12221215 )
12231216 bgzipped_inputs = glob .glob (os .path .join (tmp_path , "stats" , "*.gz" ))
1224- # print(f"bgzipped inputs: {bgzipped_inputs}")
12251217 for vcf_file in bgzipped_inputs :
12261218 assert subprocess .run ("bcftools index " + vcf_file , shell = True ).returncode == 0
12271219 if "outputs" in tmp_path_contents :
12281220 shutil .rmtree (os .path .join (tmp_path , "outputs" ))
12291221 if "stats_test" in tmp_path_contents :
12301222 shutil .rmtree (os .path .join (tmp_path , "stats_test" ))
1231- # tiledbvcf.config_logging("trace")
1223+ return bgzipped_inputs
1224+
1225+
1226+ @pytest .fixture
1227+ def test_stats_sample_names (test_stats_bgzipped_inputs ):
1228+ assert len (test_stats_bgzipped_inputs ) == 8
1229+ return [os .path .basename (file ).split ("." )[0 ] for file in test_stats_bgzipped_inputs ]
1230+
1231+
1232+ @pytest .fixture
1233+ def test_stats_v3_ingestion (tmp_path , test_stats_bgzipped_inputs ):
1234+ assert len (test_stats_bgzipped_inputs ) == 8
1235+ # print(f"bgzipped inputs: {test_stats_bgzipped_inputs}")
12321236 ds = tiledbvcf .Dataset (uri = os .path .join (tmp_path , "stats_test" ), mode = "w" )
12331237 ds .create_dataset (
12341238 enable_variant_stats = True , enable_allele_count = True , variant_stats_version = 3
12351239 )
1236- ds .ingest_samples (bgzipped_inputs )
1240+ ds .ingest_samples (test_stats_bgzipped_inputs )
12371241 ds = tiledbvcf .Dataset (uri = os .path .join (tmp_path , "stats_test" ), mode = "r" )
1238- sample_names = [os .path .basename (file ).split ("." )[0 ] for file in bgzipped_inputs ]
1239- data_frame = ds .read (
1240- samples = sample_names ,
1242+ return ds
1243+
1244+
1245+ # Ok to skip is missing bcftools in Windows CI job
1246+ @pytest .mark .skipif (
1247+ os .environ .get ("CI" ) == "true"
1248+ and platform .system () == "Windows"
1249+ and shutil .which ("bcftools" ) is None ,
1250+ reason = "no bcftools" ,
1251+ )
1252+ def test_ingest_with_stats_v3 (
1253+ tmp_path , test_stats_v3_ingestion , test_stats_sample_names
1254+ ):
1255+ data_frame = test_stats_v3_ingestion .read (
1256+ samples = test_stats_sample_names ,
12411257 attrs = ["contig" , "pos_start" , "id" , "qual" , "info_TILEDB_IAF" , "sample_name" ],
12421258 set_af_filter = "<0.2" ,
12431259 )
@@ -1249,8 +1265,8 @@ def test_ingest_with_stats_v3(tmp_path):
12491265 data_frame [data_frame ["sample_name" ] == "second" ]["info_TILEDB_IAF" ].iloc [0 ][0 ]
12501266 == 0.9375
12511267 )
1252- data_frame = ds .read (
1253- samples = sample_names ,
1268+ data_frame = test_stats_v3_ingestion .read (
1269+ samples = test_stats_sample_names ,
12541270 attrs = ["contig" , "pos_start" , "id" , "qual" , "info_TILEDB_IAF" , "sample_name" ],
12551271 scan_all_samples = True ,
12561272 )
@@ -1260,25 +1276,45 @@ def test_ingest_with_stats_v3(tmp_path):
12601276 ]["info_TILEDB_IAF" ].iloc [0 ][0 ]
12611277 == 0.9375
12621278 )
1263- ds = tiledbvcf .Dataset (uri = os .path .join (tmp_path , "stats_test" ), mode = "r" )
1264- df = ds .read_variant_stats ("chr1:1-10000" )
1279+ df = test_stats_v3_ingestion .read_variant_stats ("chr1:1-10000" )
12651280 assert df .shape == (13 , 5 )
12661281 df = tiledbvcf .allele_frequency .read_allele_frequency (
12671282 os .path .join (tmp_path , "stats_test" ), "chr1:1-10000"
12681283 )
12691284 assert df .pos .is_monotonic_increasing
12701285 df ["an_check" ] = (df .ac / df .af ).round (0 ).astype ("int32" )
12711286 assert df .an_check .equals (df .an )
1272- df = ds .read_variant_stats ("chr1:1-10000" )
1287+ df = test_stats_v3_ingestion .read_variant_stats ("chr1:1-10000" )
12731288 assert df .shape == (13 , 5 )
12741289 df = df .to_pandas ()
1275- df = ds .read_allele_count ("chr1:1-10000" )
1290+ df = test_stats_v3_ingestion .read_allele_count ("chr1:1-10000" )
12761291 assert df .shape == (7 , 6 )
12771292 df = df .to_pandas ()
12781293 assert sum (df ["pos" ] == (0 , 1 , 1 , 2 , 2 , 2 , 3 )) == 7
12791294 assert sum (df ["count" ] == (8 , 5 , 3 , 4 , 2 , 2 , 1 )) == 7
12801295
12811296
1297+ @pytest .mark .skipif (
1298+ os .environ .get ("CI" ) == "true"
1299+ and platform .system () == "Windows"
1300+ and shutil .which ("bcftools" ) is None ,
1301+ reason = "no bcftools" ,
1302+ )
1303+ def test_delete_samples (tmp_path , test_stats_v3_ingestion , test_stats_sample_names ):
1304+ # assert test_stats_v3_ingestion.samples() == test_stats_sample_names
1305+ assert "second" in test_stats_sample_names
1306+ assert "fifth" in test_stats_sample_names
1307+ assert "third" in test_stats_sample_names
1308+ ds = tiledbvcf .Dataset (uri = os .path .join (tmp_path , "stats_test" ), mode = "w" )
1309+ # tiledbvcf.config_logging("trace")
1310+ ds .delete_samples (["second" , "fifth" ])
1311+ ds = tiledbvcf .Dataset (uri = os .path .join (tmp_path , "stats_test" ), mode = "r" )
1312+ sample_names = ds .samples ()
1313+ assert "second" not in sample_names
1314+ assert "fifth" not in sample_names
1315+ assert "third" in sample_names
1316+
1317+
12821318# Ok to skip is missing bcftools in Windows CI job
12831319@pytest .mark .skipif (
12841320 os .environ .get ("CI" ) == "true"
0 commit comments