1
1
/* vcfconvert.c -- convert between VCF/BCF and related formats.
2
2
3
- Copyright (C) 2013-2021 Genome Research Ltd.
3
+ Copyright (C) 2013-2023 Genome Research Ltd.
4
4
5
5
Author: Petr Danecek <[email protected] >
6
6
@@ -59,7 +59,7 @@ struct _args_t
59
59
bcf_hdr_t * header ;
60
60
void (* convert_func )(struct _args_t * );
61
61
struct {
62
- int total , skipped , hom_rr , het_ra , hom_aa , het_aa , missing ;
62
+ int total , skipped , hom_rr , het_ra , hom_aa , het_aa , missing ;
63
63
} n ;
64
64
kstring_t str ;
65
65
int32_t * gts ;
@@ -160,7 +160,7 @@ static int _set_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr)
160
160
// REF,ALT
161
161
args -> str .l = 0 ;
162
162
se = ++ ss ;
163
- while ( se < tsv -> se && * se != '_' ) se ++ ;
163
+ while ( se < tsv -> se && * se != '_' ) se ++ ;
164
164
if ( * se != '_' ) return -1 ;
165
165
kputsn (ss ,se - ss ,& args -> str );
166
166
ss = ++ se ;
@@ -202,14 +202,14 @@ static int tsv_setter_chrom_pos_ref_alt_or_id(tsv_t *tsv, bcf1_t *rec, void *usr
202
202
{
203
203
args_t * args = (args_t * )usr ;
204
204
if ( _set_chrom_pos_ref_alt (tsv ,rec ,usr )== 0 ) return 0 ;
205
- rec -> pos = -1 ; // mark the record as unset
205
+ rec -> pos = CSI_COOR_EMPTY ; // mark the record as unset
206
206
if ( !args -> output_vcf_ids ) return 0 ;
207
207
return tsv_setter_id (tsv ,rec ,usr );
208
208
}
209
209
static int tsv_setter_chrom_pos_ref_alt_id_or_die (tsv_t * tsv , bcf1_t * rec , void * usr )
210
210
{
211
211
args_t * args = (args_t * )usr ;
212
- if ( rec -> pos != -1 )
212
+ if ( rec -> pos != CSI_COOR_EMPTY )
213
213
{
214
214
if ( !args -> output_vcf_ids ) return 0 ;
215
215
return tsv_setter_id (tsv ,rec ,usr );
@@ -269,12 +269,12 @@ static int tsv_setter_gt_gp(tsv_t *tsv, bcf1_t *rec, void *usr)
269
269
if ( aa >= ab )
270
270
{
271
271
if ( aa >= bb ) args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (0 );
272
- else args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
272
+ else args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
273
273
}
274
- else if ( ab >= bb )
274
+ else if ( ab >= bb )
275
275
{
276
276
args -> gts [2 * i + 0 ] = bcf_gt_unphased (0 );
277
- args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
277
+ args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
278
278
}
279
279
else args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
280
280
}
@@ -293,7 +293,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
293
293
else { a0 = bcf_gt_phased (0 ); a1 = bcf_gt_phased (1 ); }
294
294
295
295
// up is short for "unphased"
296
- int nup = 0 ;
296
+ int nup = 0 ;
297
297
for (i = 0 ; i < nsamples ; i ++ )
298
298
{
299
299
char * ss = tsv -> ss + 4 * i + nup ;
@@ -324,11 +324,11 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
324
324
break ;
325
325
default :
326
326
fprintf (stderr ,"Could not parse: [%c][%s]\n" , ss [all * 2 + up ],tsv -> ss );
327
- return -1 ;
327
+ return -1 ;
328
328
}
329
329
if ( ss [all * 2 + up + 1 ]== '*' ) up = up + 1 ;
330
330
}
331
-
331
+
332
332
if (up && up != 2 )
333
333
{
334
334
fprintf (stderr ,"Missing unphased marker '*': [%c][%s]" , ss [2 + up ], tsv -> ss );
@@ -356,13 +356,13 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
356
356
static void gensample_to_vcf (args_t * args )
357
357
{
358
358
/*
359
- * Inpute: IMPUTE2 output (indentation changed here for clarity):
359
+ * Inpute: IMPUTE2 output (indentation changed here for clarity):
360
360
*
361
361
* 20:62116619_C_T 20:62116619 62116619 C T 0.969 0.031 0 ...
362
362
* --- 20:62116698_C_A 62116698 C A 1 0 0 ...
363
363
*
364
364
* Second column is expected in the form of CHROM:POS_REF_ALT. We use second
365
- * column because the first can be empty ("--") when filling sites from reference
365
+ * column because the first can be empty ("--") when filling sites from reference
366
366
* panel. When the option --vcf-ids is given, the first column is used to set the
367
367
* VCF ID.
368
368
*
@@ -784,7 +784,7 @@ char *init_sample2sex(bcf_hdr_t *hdr, char *sex_fname)
784
784
}
785
785
for (i = 0 ; i < nlines ; i ++ ) free (lines [i ]);
786
786
free (lines );
787
- for (i = 0 ; i < bcf_hdr_nsamples (hdr ); i ++ )
787
+ for (i = 0 ; i < bcf_hdr_nsamples (hdr ); i ++ )
788
788
if ( !sample2sex [i ] ) error ("Missing sex for sample %s in %s\n" , bcf_hdr_int2id (hdr , BCF_DT_SAMPLE , i ),sex_fname );
789
789
return sample2sex ;
790
790
}
@@ -847,7 +847,7 @@ static void vcf_to_gensample(args_t *args)
847
847
if (sample_fname ) fprintf (stderr , "Sample file: %s\n" , sample_fname );
848
848
849
849
// write samples file
850
- if (sample_fname )
850
+ if (sample_fname )
851
851
{
852
852
char * sample2sex = NULL ;
853
853
if ( args -> sex_fname ) sample2sex = init_sample2sex (args -> header ,args -> sex_fname );
@@ -877,7 +877,7 @@ static void vcf_to_gensample(args_t *args)
877
877
return ;
878
878
}
879
879
880
- int prev_rid = -1 , prev_pos = -1 ;
880
+ int prev_rid = -1 , prev_pos = CSI_COOR_EMPTY ;
881
881
int no_alt = 0 , non_biallelic = 0 , filtered = 0 , ndup = 0 , nok = 0 ;
882
882
BGZF * gout = bgzf_open (gen_fname , gen_compressed ? "wg" : "wu" );
883
883
while ( bcf_sr_next_line (args -> files ) )
@@ -915,7 +915,7 @@ static void vcf_to_gensample(args_t *args)
915
915
nok ++ ;
916
916
}
917
917
}
918
- fprintf (stderr , "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n" ,
918
+ fprintf (stderr , "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n" ,
919
919
nok , no_alt + non_biallelic + filtered + ndup , no_alt , non_biallelic , filtered , ndup );
920
920
921
921
if ( str .m ) free (str .s );
@@ -976,7 +976,7 @@ static void vcf_to_haplegendsample(args_t *args)
976
976
{
977
977
char * sample2sex = NULL ;
978
978
if ( args -> sex_fname ) sample2sex = init_sample2sex (args -> header ,args -> sex_fname );
979
-
979
+
980
980
int i ;
981
981
BGZF * sout = bgzf_open (sample_fname , sample_compressed ? "wg" : "wu" );
982
982
str .l = 0 ;
@@ -1078,7 +1078,7 @@ static void vcf_to_hapsample(args_t *args)
1078
1078
kputs ("%CHROM:%POS\\_%REF\\_%FIRST_ALT %ID %POS %REF %FIRST_ALT " , & str );
1079
1079
else
1080
1080
kputs ("%CHROM %CHROM:%POS\\_%REF\\_%FIRST_ALT %POS %REF %FIRST_ALT " , & str );
1081
-
1081
+
1082
1082
if ( args -> hap2dip )
1083
1083
kputs ("%_GT_TO_HAP2\n" , & str );
1084
1084
else
@@ -1229,7 +1229,7 @@ static inline int tsv_setter_aa1(args_t *args, char *ss, char *se, int alleles[]
1229
1229
if ( alleles [a0 ]< 0 ) alleles [a0 ] = (* nals )++ ;
1230
1230
if ( alleles [a1 ]< 0 ) alleles [a1 ] = (* nals )++ ;
1231
1231
1232
- gts [0 ] = bcf_gt_unphased (alleles [a0 ]);
1232
+ gts [0 ] = bcf_gt_unphased (alleles [a0 ]);
1233
1233
gts [1 ] = ss [1 ] ? bcf_gt_unphased (alleles [a1 ]) : bcf_int32_vector_end ;
1234
1234
1235
1235
if ( ref == a0 && ref == a1 ) args -> n .hom_rr ++ ; // hom ref: RR
@@ -1265,7 +1265,7 @@ static int tsv_setter_aa(tsv_t *tsv, bcf1_t *rec, void *usr)
1265
1265
}
1266
1266
ret = tsv_setter_aa1 (args , tsv -> ss , tsv -> se , alleles , & nals , iref , args -> gts + i * 2 );
1267
1267
if ( ret == -1 ) error ("Error parsing the site %s:%" PRId64 ", expected two characters\n" , bcf_hdr_id2name (args -> header ,rec -> rid ),(int64_t ) rec -> pos + 1 );
1268
- if ( ret == -2 )
1268
+ if ( ret == -2 )
1269
1269
{
1270
1270
// something else than a SNP
1271
1271
free (ref );
@@ -1275,7 +1275,7 @@ static int tsv_setter_aa(tsv_t *tsv, bcf1_t *rec, void *usr)
1275
1275
1276
1276
args -> str .l = 0 ;
1277
1277
kputc (ref [0 ], & args -> str );
1278
- for (i = 0 ; i < 5 ; i ++ )
1278
+ for (i = 0 ; i < 5 ; i ++ )
1279
1279
{
1280
1280
if ( alleles [i ]> 0 )
1281
1281
{
@@ -1419,7 +1419,7 @@ static void gvcf_to_vcf(args_t *args)
1419
1419
{
1420
1420
int pass = filter_test (args -> filter , line , NULL );
1421
1421
if ( args -> filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1 ;
1422
- if ( !pass )
1422
+ if ( !pass )
1423
1423
{
1424
1424
if ( bcf_write (out_fh ,hdr ,line )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> outfname );
1425
1425
continue ;
@@ -1667,7 +1667,7 @@ int main_vcfconvert(int argc, char *argv[])
1667
1667
else args -> infname = argv [optind ];
1668
1668
}
1669
1669
if ( !args -> infname ) usage ();
1670
-
1670
+
1671
1671
if ( args -> convert_func ) args -> convert_func (args );
1672
1672
else vcf_to_vcf (args );
1673
1673
0 commit comments