Skip to content

Commit

Permalink
Merge branch 'develop' of pd3-github:samtools/bcftools into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
pd3 committed Oct 26, 2024
2 parents 0148624 + 4f8bf53 commit c46a8ef
Show file tree
Hide file tree
Showing 20 changed files with 202 additions and 20 deletions.
1 change: 1 addition & 0 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version: 'vers.{build}'
branches:
except:
- gh-pages
- /.*/ # Appveyor builds are currently disabled.

# Do not build on tags (GitHub and BitBucket)
skip_tags: true
Expand Down
20 changes: 16 additions & 4 deletions .ci_helpers/clone
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,25 @@
# omitted or if there is no branch with that name, checks out origin/HEAD
# from the samtools/htslib repository.

repository=$1
localdir=$2
branch=$3
echo CLONE: ${@+"$@"}

owner=$1
repository="https://github.com/$owner/$2"
localdir=$3
branch=$4
htslib_PR=$5

ref=''
[ -n "$branch" ] && ref=$(git ls-remote --heads "$repository" "$branch" 2>/dev/null)
[ -z "$ref" ] && repository='https://github.com/samtools/htslib.git'

set -x
git clone --recurse-submodules --shallow-submodules --depth=1 ${ref:+--branch="$branch"} "$repository" "$localdir"
git clone --recurse-submodules --shallow-submodules --depth=2 ${ref:+--branch="$branch"} "$repository" "$localdir"

# NB: "samtools" as the owner/organisation, not the repo name
if [ "x$owner" = "xsamtools" -a -z "$ref" -a "x$htslib_PR" != "x" ]
then
cd "$localdir"
git fetch origin "pull/$htslib_PR/head"
git checkout FETCH_HEAD
fi
12 changes: 10 additions & 2 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,16 @@ timeout_in: 10m
# clone with our own commands too.
clone_template: &HTSLIB_CLONE
htslib_clone_script: |
.ci_helpers/clone "https://github.com/${CIRRUS_REPO_OWNER}/htslib" "${HTSDIR}" "${CIRRUS_BRANCH}"
# Tricky, but when run as a PR Cirrus-CI obscures the branch name and
# replaces it by pull/<num>. This means we can't automatically get PRs
# to test whether the user has a similarly named branch to compiler and
# test against.
#
# Instead if we add htslib#NUM into the first line of the commit then
# we will use that PR from htslib instead. This is only needed when
# making a PR, so for development prior to the PR being made the
# CIRRUS_BRANCH will be used in preference.
.ci_helpers/clone ${CIRRUS_REPO_OWNER} htslib "${HTSDIR}" "${CIRRUS_BRANCH}" `printenv CIRRUS_CHANGE_TITLE | sed -n 's/.*htslib#\([0-9]*\).*/\1/p'`
#--------------------------------------------------
# Template: bcftools compile and test
Expand Down
6 changes: 6 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,9 @@
.git* export-ignore
.ci_helpers export-ignore
README.md export-ignore

# Prevent Windows cr-lf endings.
test/** -text
test/**.c text
test/**.h text
test/**.pl text
52 changes: 52 additions & 0 deletions .github/workflows/windows-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Windows/MinGW-W64 CI
on: [push, pull_request]

jobs:
build:
runs-on: windows-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up MSYS2 MinGW-W64
uses: msys2/setup-msys2@v2
with:
msystem: mingw64
update: false
install: >-
mingw-w64-x86_64-toolchain
mingw-w64-x86_64-autotools
mingw-w64-x86_64-curl
mingw-w64-x86_64-libdeflate
mingw-w64-x86_64-tools-git
mingw-w64-x86_64-zlib
mingw-w64-x86_64-bzip2
mingw-w64-x86_64-xz
- name: Clone htslib
shell: msys2 {0}
run: |
export PATH="$PATH:/mingw64/bin:/c/Program Files/Git/bin"
export MSYSTEM=MINGW64
htslib_pr=`git log -2 --format='%s' | sed -n 's/.*htslib#\([0-9]*\).*/\1/p'`
.ci_helpers/clone ${GITHUB_REPOSITORY_OWNER} htslib htslib ${GITHUB_HEAD_REF:-$GITHUB_REF_NAME} $htslib_pr
pushd .
cd htslib
autoreconf -i
popd
- name: Compile bcftools
shell: msys2 {0}
run: |
export PATH="$PATH:/mingw64/bin:/c/Program Files/Git/bin"
export MSYSTEM=MINGW64
autoheader
autoconf -Wno-syntax
./configure --enable-werror
make -j4
- name: Check bcftools
shell: msys2 {0}
run: |
export PATH="$PATH:/mingw64/bin:/c/Program Files/Git/bin"
export MSYSTEM=MINGW64
make check
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,10 @@ libbcftools.a: $(OBJS)

vcfplugin.o: EXTRA_CPPFLAGS += -DPLUGINPATH='"$(pluginpath)"'

%.dll %.cygdll: %.c version.h version.c libbcftools.a $(HTSLIB_DLL)
%.dll: %.c version.h version.c libbcftools.a $(HTSLIB_DLL)
$(CC) $(PLUGIN_FLAGS) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(LDFLAGS) -o $@ version.c $< $(PLUGIN_LIBS)

%.cygdll: %.c version.h version.c libbcftools.a $(HTSLIB_DLL)
$(CC) $(PLUGIN_FLAGS) $(CFLAGS) $(ALL_CPPFLAGS) $(EXTRA_CPPFLAGS) $(LDFLAGS) -o $@ version.c $< $(PLUGIN_LIBS)

%.so: %.c version.h version.c
Expand Down
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## Release a.b

Changes affecting the whole of bcftools, or multiple commands:

* Add support for matching lines by ID (#1739)


Changes affecting specific commands:

Expand Down
5 changes: 2 additions & 3 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ specific commands to see if they apply.

'id';;
only records with identical ID column are compatible.
Supported by *<<merge,bcftools merge>>* only.

*-f, --apply-filters* 'LIST'::
Skip sites where FILTER column does not contain any of the strings listed
Expand Down Expand Up @@ -501,7 +500,7 @@ Add or remove annotations.
*-O, --output-type* 'b'|'u'|'z'|'v'[0-9]::
see *<<common_options,Common Options>>*

*--pair-logic* 'snps'|'indels'|'both'|'all'|'some'|'exact'::
*--pair-logic* 'snps'|'indels'|'both'|'all'|'some'|'exact'|'id'::
Controls how to match records from the annotation file to the target VCF.
Effective only when *-a* is a VCF or BCF. The option replaces the former
uninuitive *--collapse*.
Expand Down Expand Up @@ -1919,7 +1918,7 @@ on the options, the program can output records from one (or more) files
which have (or do not have) corresponding records with the same position
in the other files.

*-c, --collapse* 'snps'|'indels'|'both'|'all'|'some'|'none'::
*-c, --collapse* 'snps'|'indels'|'both'|'all'|'some'|'none'|'id'::
see *<<common_options,Common Options>>*

*-C, --complement*::
Expand Down
12 changes: 12 additions & 0 deletions test/annotate35.1.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=src,Number=1,Type=String,Description="">
##INFO=<ID=dst,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID6 N <INS> . PASS src=ID6
chr1 902 ID5 N <INS> . PASS src=ID5
chr1 902 ID4 N <INS> . PASS src=ID4
chr1 902 ID3 N <INS> . PASS src=ID3
chr1 902 ID2 N <INS> . PASS src=ID2
chr1 902 ID1 N <INS> . PASS src=ID1
12 changes: 12 additions & 0 deletions test/annotate35.2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=src,Number=1,Type=String,Description="">
##INFO=<ID=dst,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID6 N <INS> . PASS src=ID6;dst=ID6
chr1 902 ID5 N <INS> . PASS src=ID5;dst=ID5
chr1 902 ID4 N <INS> . PASS src=ID4;dst=ID4
chr1 902 ID3 N <INS> . PASS src=ID3;dst=ID3
chr1 902 ID2 N <INS> . PASS src=ID2;dst=ID2
chr1 902 ID1 N <INS> . PASS src=ID1;dst=ID1
11 changes: 11 additions & 0 deletions test/annotate35.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
##fileformat=VCFv4.2
##INFO=<ID=src,Number=1,Type=String,Description="">
##INFO=<ID=dst,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID6 N <INS> . PASS src=ID6
chr1 902 ID5 N <INS> . PASS src=ID5
chr1 902 ID4 N <INS> . PASS src=ID4
chr1 902 ID3 N <INS> . PASS src=ID3
chr1 902 ID2 N <INS> . PASS src=ID2
chr1 902 ID1 N <INS> . PASS src=ID1
6 changes: 6 additions & 0 deletions test/annots35.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
chr1 902 ID4 N <INS> ID4
chr1 902 ID2 N <INS> ID2
chr1 902 ID6 N <INS> ID6
chr1 902 ID1 N <INS> ID1
chr1 902 ID3 N <INS> ID3
chr1 902 ID5 N <INS> ID5
11 changes: 11 additions & 0 deletions test/annots35.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
##fileformat=VCFv4.2
##INFO=<ID=src,Number=1,Type=String,Description="">
##INFO=<ID=dst,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID4 N <INS> . PASS src=ID4
chr1 902 ID2 N <INS> . PASS src=ID2
chr1 902 ID6 N <INS> . PASS src=ID6
chr1 902 ID3 N <INS> . PASS src=ID3
chr1 902 ID1 N <INS> . PASS src=ID1
chr1 902 ID5 N <INS> . PASS src=ID5
7 changes: 7 additions & 0 deletions test/isec.match-id.1.1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
##fileformat=VCFv4.2
##INFO=<ID=src,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID1 A C . . .
chr1 902 ID2 A C . . .
chr1 902 ID3 A C . . .
7 changes: 7 additions & 0 deletions test/isec.match-id.1.2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
##fileformat=VCFv4.2
##INFO=<ID=src,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID3 A C . . .
chr1 902 ID2 A C . . .
chr1 902 ID1 A C . . .
8 changes: 8 additions & 0 deletions test/isec.match-id.1.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=src,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID3 A C . . .
chr1 902 ID2 A C . . .
chr1 902 ID1 A C . . .
8 changes: 8 additions & 0 deletions test/isec.match-id.2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##INFO=<ID=src,Number=1,Type=String,Description="">
##contig=<ID=chr1,length=248956422>
#CHROM POS ID REF ALT QUAL FILTER INFO
chr1 902 ID1 A C . . .
chr1 902 ID2 A C . . .
chr1 902 ID3 A C . . .
8 changes: 7 additions & 1 deletion test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
run_test(\&test_vcf_stats,$opts,in=>['stats.counts'],out=>'stats.counts.chk',args=>'-s -');
run_test(\&test_vcf_stats,$opts,in=>['stats.counts'],out=>'stats.counts.2.chk',args=>q[-s - -i 'type="snp"']);
run_test(\&test_vcf_stats,$opts,in=>['stats.vaf'],out=>'stats.vaf.1.chk',args=>q[-s -]);
run_test(\&test_vcf_isec,$opts,in=>['isec.match-id.1.1','isec.match-id.1.2'],out=>'isec.match-id.1.out',args=>'-n =2 -w 2 --no-version');
run_test(\&test_vcf_isec,$opts,in=>['isec.match-id.1.1','isec.match-id.1.2'],out=>'isec.match-id.2.out',args=>'-n =2 -w 2 -c id --no-version');
run_test(\&test_vcf_isec,$opts,in=>['isec.a','isec.b'],out=>'isec.ab.out',args=>'-n =2');
run_test(\&test_vcf_isec,$opts,in=>['isec.a','isec.b'],out=>'isec.ab.flt.out',args=>'-n =2 -i"STRLEN(REF)==2"');
run_test(\&test_vcf_isec,$opts,in=>['isec.a','isec.b'],out=>'isec.ab.both.out',args=>'-n =2 -c both');
Expand Down Expand Up @@ -524,6 +526,8 @@
run_test(\&test_vcf_sort,$opts,in=>'sort',out=>'sort.out',args=>q[-m 0],fmt=>'%CHROM\\t%POS\\t%REF,%ALT\\n');
run_test(\&test_vcf_sort,$opts,in=>'sort',out=>'sort.out',args=>q[-m 1000],fmt=>'%CHROM\\t%POS\\t%REF,%ALT\\n');
run_test(\&test_vcf_regions,$opts,in=>'regions');
run_test(\&test_vcf_annotate,$opts,in=>'annotate35',vcf=>'annots35',out=>'annotate35.1.out',args=>q[-c CHROM,POS,~ID,REF,ALT,INFO/src]);
run_test(\&test_vcf_annotate,$opts,in=>'annotate35',tab=>'annots35',out=>'annotate35.2.out',args=>q[-c CHROM,POS,~ID,REF,ALT,dst:=src]);
run_test(\&test_vcf_annotate,$opts,in=>'annotate.escape.1',tab=>'annotate.escape.1',out=>'annotate.escape.1.1.out',args=>q[-c CHROM,POS,ISTR,FMT/FSTR]);
run_test(\&test_vcf_annotate,$opts,in=>'annotate.match.1',tab=>'annotate.match.1',out=>'annotate.match.1.1.out',args=>q[-c CHROM,POS,-,-,SCORE,~X,-,- -i'STR={X}']);
run_test(\&test_vcf_annotate,$opts,in=>'annotate.match.1',tab=>'annotate.match.1',out=>'annotate.match.1.2.out',args=>q[-c CHROM,POS,REF,ALT,SCORE,-,~X,- -i'INT={X}']);
Expand Down Expand Up @@ -1478,7 +1482,9 @@ sub test_vcf_isec
my $files = join(' ',@files);
$args{args} =~ s/{PATH}/$$opts{path}/g;
test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools isec $args{args} $files");
test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools isec -Ob $args{args} $files");
# Either improve or disable completely: the output type does not make sense in all modes
# test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools isec -Ob $args{args} $files");
}
sub test_vcf_isec2
{
Expand Down
18 changes: 13 additions & 5 deletions vcfannotate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2313,15 +2313,14 @@ static void init_columns(args_t *args)
col->hdr_key_src = strdup(str.s);
col->hdr_key_dst = strdup(str.s);
col->replace = replace;
if ( args->pair_logic==-1 ) bcf_sr_set_opt(args->files,BCF_SR_PAIR_LOGIC,BCF_SR_PAIR_BOTH_REF);
if ( args->pair_logic==-1 ) args->pair_logic = BCF_SR_PAIR_ANY;
}
else args->alt_idx = icol;
}
else if ( !strcasecmp("ID",str.s) || !strcasecmp("~ID",str.s) )
{
if ( replace & REPLACE_NON_MISSING ) error("Apologies, the -ID feature has not been implemented yet.\n");
if ( str.s[0]=='~' ) replace = MATCH_VALUE;
if ( args->tgts_is_vcf && (replace & MATCH_VALUE) ) error("todo: -c ~ID with -a VCF?\n");
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
memset(col,0,sizeof(*col));
Expand All @@ -2330,7 +2329,11 @@ static void init_columns(args_t *args)
col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
col->hdr_key_src = strdup(str.s);
col->hdr_key_dst = strdup(str.s);
if ( replace & MATCH_VALUE ) args->match_id = icol;
if ( replace & MATCH_VALUE )
{
args->match_id = icol;
if ( args->tgts_is_vcf ) args->pair_logic = (args->pair_logic==-1) ? BCF_SR_PAIR_ID : args->pair_logic|BCF_SR_PAIR_ID;
}
}
else if ( !strcasecmp("~INFO/END",str.s) && !args->tgts_is_vcf )
{
Expand Down Expand Up @@ -3122,6 +3125,11 @@ static void init_data(args_t *args)
&args->index_fn, args->write_index) < 0 )
error("Error: failed to initialise index for %s\n",args->output_fname);
}
if ( args->tgts_is_vcf )
{
if ( args->pair_logic==-1 ) args->pair_logic = BCF_SR_PAIR_SOME;
bcf_sr_set_opt(args->files,BCF_SR_PAIR_LOGIC,args->pair_logic);
}
}

static void destroy_data(args_t *args)
Expand Down Expand Up @@ -3650,7 +3658,7 @@ static void usage(args_t *args)
fprintf(stderr, " --no-version Do not append version and command line to the header\n");
fprintf(stderr, " -o, --output FILE Write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type u|b|v|z[0-9] u/b: un/compressed BCF, v/z: un/compressed VCF, 0-9: compression level [v]\n");
fprintf(stderr, " --pair-logic STR Matching records by <snps|indels|both|all|some|exact>, see man page for details [some]\n");
fprintf(stderr, " --pair-logic STR Matching records by <snps|indels|both|all|some|exact|id>, see man page for details [some]\n");
fprintf(stderr, " -r, --regions REGION Restrict to comma-separated list of regions\n");
fprintf(stderr, " -R, --regions-file FILE Restrict to regions listed in FILE\n");
fprintf(stderr, " --regions-overlap 0|1|2 Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1]\n");
Expand Down Expand Up @@ -3784,6 +3792,7 @@ int main_vcfannotate(int argc, char *argv[])
else if ( !strcmp(optarg,"some") ) args->pair_logic |= BCF_SR_PAIR_SOME;
else if ( !strcmp(optarg,"none") ) args->pair_logic = BCF_SR_PAIR_EXACT;
else if ( !strcmp(optarg,"exact") ) args->pair_logic = BCF_SR_PAIR_EXACT;
else if ( !strcmp(optarg,"id") ) args->pair_logic |= BCF_SR_PAIR_ID;
else error("The --pair-logic string \"%s\" not recognised.\n", optarg);
break;
case 3 :
Expand Down Expand Up @@ -3829,7 +3838,6 @@ int main_vcfannotate(int argc, char *argv[])
{
args->tgts_is_vcf = 1;
args->files->require_index = 1;
bcf_sr_set_opt(args->files,BCF_SR_PAIR_LOGIC,args->pair_logic>=0 ? args->pair_logic : BCF_SR_PAIR_SOME);
if ( args->min_overlap_str ) error("The --min-overlap option cannot be used when annotating from a VCF\n");
}
}
Expand Down
9 changes: 5 additions & 4 deletions vcfisec.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* vcfisec.c -- Create intersections, unions and complements of VCF files.
Copyright (C) 2012-2023 Genome Research Ltd.
Copyright (C) 2012-2024 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -460,7 +460,7 @@ static void destroy_data(args_t *args)
{
if ( !args->fnames[i] ) continue;
if ( hts_close(args->fh_out[i])!=0 ) error("[%s] Error: close failed .. %s\n", __func__,args->fnames[i]);
int is_tbi = !args->write_index
int is_tbi = !args->write_index
|| (args->write_index&127) == HTS_FMT_TBI;
if ( args->output_type==FT_VCF_GZ && is_tbi )
{
Expand All @@ -476,8 +476,8 @@ static void destroy_data(args_t *args)
free(args->fh_out);
free(args->fnames);
if ( args->fh_sites ) fclose(args->fh_sites);
if ( args->write ) free(args->write);
}
free(args->write);
}

static void usage(void)
Expand All @@ -487,7 +487,7 @@ static void usage(void)
fprintf(stderr, "Usage: bcftools isec [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " -c, --collapse STRING Treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
fprintf(stderr, " -c, --collapse STRING Treat as identical records with <snps|indels|both|all|some|none|id>, see man page for details [none]\n");
fprintf(stderr, " -C, --complement Output positions present only in the first file but missing in the others\n");
fprintf(stderr, " -e, --exclude EXPR Exclude sites for which the expression is true\n");
fprintf(stderr, " -f, --apply-filters LIST Require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
Expand Down Expand Up @@ -597,6 +597,7 @@ int main_vcfisec(int argc, char *argv[])
else if ( !strcmp(optarg,"all") ) args->files->collapse |= COLLAPSE_ANY;
else if ( !strcmp(optarg,"some") ) args->files->collapse |= COLLAPSE_SOME;
else if ( !strcmp(optarg,"none") ) args->files->collapse = COLLAPSE_NONE;
else if ( !strcmp(optarg,"id") ) args->files->collapse |= BCF_SR_PAIR_ID;
else error("The --collapse string \"%s\" not recognised.\n", optarg);
break;
case 'f': args->files->apply_filters = optarg; break;
Expand Down

0 comments on commit c46a8ef

Please sign in to comment.