-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenome_diff.pl
executable file
·108 lines (76 loc) · 2.24 KB
/
genome_diff.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use feature 'say';
use autodie;
use FindBin;
use lib "$FindBin::Bin/lib";
use Fasta;
use Getopt::Euclid qw( :vars<opt_> );
use Pod::Usage;
use Log::Log4perl qw/:easy/;
Log::Log4perl->easy_init( {
level => $DEBUG,
#file => ">run.log",
layout => '%d{HH:mm:ss} %p> (%L) %M - %m%n',
} );
my $logger = get_logger();
pod2usage(-verbose => 99,-sections => [qw/NAME SYNOPSIS OPTIONS/])
unless ($opt_reference_a && $opt_reference_b);
$logger->info("loading $opt_reference_a");
my $refa = slurp_fasta($opt_reference_a);
$logger->info("loading $opt_reference_b");
my $refb = slurp_fasta($opt_reference_b);
my $source = "$opt_ecotype_a->$opt_ecotype_b";
my @seqs = sort keys %$refa;
#say "#Seq\tCoord\t$opt_reference_a\t$opt_reference_b";
my $counter = 0;
for my $seq (@seqs) {
$logger->info("$counter") if ++$counter % 10000 == 0;
$logger->info($seq);
my $seqa = $refa->{$seq};
my $seqb = $refb->{$seq};
$logger->logdie("$seq lengths do not match") if length $seqa != length $seqb;
my $length = length $seqa;
$logger->info($length);
for my $i (0..$length-1){
my $aa = substr($seqa, $i, 1);
my $bb = substr($seqb, $i, 1);
next if $aa eq $bb;
say join "\t", $seq, q{}, $source, $i+1, $i+1, q{}, q{+}, q{}, "$aa>$bb";
}
}
=head1 NAME
genome_diff.pl - create diff between two genomes which
=head1 SYNOPSIS
Usage examples:
genome_diff.pl -a TAIR.fas -b Landsberg > diff
=head1 OPTIONS
=over
=item -a <fasta> | --reference-a <fasta>
=for Euclid
fasta.type: readable
=item -b <fasta> | --reference-b <fasta>
=item -ea <eco> | --ecotype-a <eco>
=for Euclid
eco.default: 'left'
=item -eb <eco> | --ecotype-b <eco>
=for Euclid
eco.default: 'right'
=for Euclid
fasta.type: readable
=back
=cut
# Format:
__DATA__
CHR1 DZ_Ecker_Ler3 711 711 + T>C
CHR1 DZ_Ecker_Ler3 892 892 + T>G
CHR1 DZ_Ecker_Ler3 956 956 + C>T
CHR1 DZ_Ecker_Ler3 10904 10904 + A>T
CHR1 DZ_Ecker_Ler3 32210 32210 + T>C
CHR1 DZ_Ecker_Ler3 37388 37388 + G>T
CHR1 DZ_Ecker_Ler3 71348 71348 + C>T
CHR1 DZ_Ecker_Ler3 88300 88300 + C>T
CHR1 DZ_Ecker_Ler3 90571 90571 + A>C
CHR1 DZ_Ecker_Ler3 90809 90809 + A>T