-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfisher_exact_test.pl
executable file
·133 lines (110 loc) · 3.04 KB
/
fisher_exact_test.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use feature 'say';
use autodie;
use Math::BigFloat lib => 'GMP';
use List::Util qw/sum/;
use Pod::Usage;
use Getopt::Long;
my $sep = 'tab';
my $cola;
my $colb;
my $colc;
my $cold;
my $input;
my $output = q{-};
my $help;
my $pval = .000_1;
my $threshold = $pval/1000.0;
my $result = GetOptions (
"input|i=s" => \$input,
"output|o=s" => \$output,
"column-a|a=i" => \$cola,
"column-b|b=i" => \$colb,
"column-c|c=i" => \$colc,
"column-d|d=i" => \$cold,
"p-value|p=i" => \$pval,
"sep|s=s" => \$sep,
"help" => \$help,
);
pod2usage(-verbose => 99) if (!($cola && $colb && $colc && $cold && $input) || $help || !$result);
my $log_threshold = log($threshold);
sub logfactorial {
my ($n) = @_;
return 0 if $n==0;
my $accum = 0;
for (my $i=1 ; $i<=$n ; $i++){
$accum += log($i);
}
return $accum;
};
sub fet{
my ($a,$b,$c,$d) = @_;
my $n = $a+$b+$c+$d;
my $logp = sum (
logfactorial($a+$b) , logfactorial($c+$d) , logfactorial($a+$c) , logfactorial($b+$d) ,
-logfactorial($a) , -logfactorial($b) , -logfactorial($c) , -logfactorial($d) , -logfactorial($n) ,
);
if ($logp < $log_threshold){
return ('+', "< $threshold");
} else{
my $p = exp($logp);
return (($p < $pval ? '+' : '-'), sprintf('%10.10f', exp($logp)));
}
}
if ($sep eq 'tab'){
$sep = "\t";
}
my $outfh;
if ($output eq q{-}){
$outfh = *STDOUT;
}
else {
open $outfh, '>', $output;
}
open my $fh, '<', $input;
my $counter = 1;
while (defined (my $line = <$fh>)){
$line =~ tr/\n\r//d;
my @parts = split /$sep/, $line;
my ($a, $b, $c, $d) = @parts[$cola-1, $colb-1, $colc-1, $cold-1];
say $outfh join $sep, @parts, fet($a,$b,$c,$d);
}
close $fh;
if ($output ne q{-}){
close $outfh;
}
=head1 NAME
fisher_exact_test.pl - perform fisher exact test on file
=head1 SYNOPSIS
fisher_exact_test.pl -a 2 -b 4 -c 6 -d 8 -i input.txt -o output.txt
=head1 DESCRIPTION
fisher_exact_test.pl
Input:
takes as input a multi-column file and a list of column numbers to identify
a,b,c,d to calculate the p-value according the the Fisher Exact Test, illustrated below. For each
line in the in the in
+-----+-----+-----+
| a | b | a+b |
+-----+-----+-----+
| c | d | c+d |
+-----+-----+-----+
| a+c | b+d | n |
+-----+-----+-----+
(a+b)! * (c+d)! * (a+c)! * (b+d)!
p = ---------------------------------
a! * b! * c! * d! * n!
=head1 OPTIONS
--column-a -a Position of column a in file.
--column-b -b Position of column b in file.
--column-c -c Position of column c in file.
--column-d -d Position of column d in file.
--sep -s File column separator (default: tab)
--significance -p If P-value is less than this, mark it with a '+' sign
Otherwise, '-'. Default: .0001
--input -i Input file
--output -o Output file (default to screen)
--help print this information
=cut