Skip to content

Commit

Permalink
Release v0.2
Browse files Browse the repository at this point in the history
  • Loading branch information
arendsee committed Jun 19, 2016
2 parents dc8a662 + c9acb52 commit 90aed0a
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 36 deletions.
191 changes: 162 additions & 29 deletions src/contiguous.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,23 @@ ContiguousMap * init_contiguous_map(size_t size){
return cmap;
}

void contiguous_query(Synmap * syn, FILE * intfile){
// EAnsure blocks are sorted (BAD IDEA, REDACTED FOR NOW)
// sort_all_contigs(syn);
void contiguous_query(Synmap * syn, FILE * intfile, bool pblock){
// Ensure blocks are sorted (BAD IDEA, REDACTED FOR NOW)
// sort_all_contigs(syn);

// count total number of unique block-block pairs for hashmap
ContiguousMap *cmap= populate_contiguous_map(syn);

uint32_t interval =0;
uint32_t missloc =0;
char seqname[128];
int chrid, start, stop,flag;
Contig * contigs;
Contig * tcon;
Block * qblk;
uint32_t blkid;
ContiguousNode * qnode;
Block * tblk;
ContiguousNode * original;
Block twoblk;
bool missing;
while ((fscanf(intfile,
Expand All @@ -48,7 +50,14 @@ void contiguous_query(Synmap * syn, FILE * intfile){
if(!((CB(contigs, 0) && block_overlap(CB(contigs, 0), &twoblk)) ||
(CB(contigs, 1) && block_overlap(CB(contigs, 1), &twoblk))
)){
missing = true;
if(contigs->block[0]){
missloc= cmap->map[contigs->block[0]->linkid]->qblkid;
} else {
missloc= cmap->map[contigs->block[1]->linkid]->qblkid;
missloc = missloc >0 ? missloc : 0;

}
missing = true;
}
}

Expand Down Expand Up @@ -77,23 +86,47 @@ void contiguous_query(Synmap * syn, FILE * intfile){
* 2 = Right hand undertrminable (caseF)
* 3 = Neitherside determiable (case C extended to F, or case E
*/

flag = 0;
qblk = SGCB(syn,0,chrid,i);
tblk = QT_SGCB(syn, qblk);
Contig* qcon = SGC(syn,0,chrid);
Block* tblk = init_block( QT_SGCB(syn, qblk)->start, QT_SGCB(syn,qblk)->stop,0,0,0);
tcon = QT_SGC(syn, qblk);
Block* q_blk;
Block* t_blk;
Contig* t_con;

if(missing){
flag=3;
printf("%s\t%s\t.\t.\t%d\n",
seqname, tcon->name,flag);
flag=4;
if(pblock){
q_blk = SGCB(syn,0,chrid,missloc);
t_blk = QT_SGCB(syn,q_blk);
t_con = QT_SGC(syn,q_blk);
printf("Q\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\n",
seqname, qcon->name, q_blk->start, q_blk->stop,
t_con->name,t_blk->start,t_blk->stop, interval);
q_blk = SGCB(syn,0,chrid,missloc+1);
t_blk = QT_SGCB(syn,q_blk);
t_con = QT_SGC(syn,q_blk);
printf("Q\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\n",
seqname, qcon->name, q_blk->start, q_blk->stop,
t_con->name,t_blk->start,t_blk->stop, interval);
}
printf(">\t%u\t%s\t%s\t%u\t%u\t.\t.\t.\t%d\n",
interval,seqname,qcon->name,start,stop,flag);
interval++;
free_block(tblk);
break;
}



qnode = cmap->map[qblk->linkid];
original = cmap->map[qblk->linkid];

// Set return region assumes case D;
tblk->start= qnode->match->start;
tblk->stop = qnode->match->stop;
// Start is BEFORE current query Block;

if (start< qblk->start) {
//Move down contiguous block, stoping at leftmost possible point
while(qnode->prev != NULL && start > qnode->feature->stop){
Expand All @@ -102,39 +135,133 @@ void contiguous_query(Synmap * syn, FILE * intfile){
if(qnode->flag >1 && start > qnode->feature->stop){ //avoid duplicates in cases of overlap
continue;
}


if(start < qnode->feature->start){ // Check we didn't advance into a case E,F Situation
flag = 1;
tblk ->start = qnode->match->start;
if(qnode->flag > -2 || (original->next != NULL && original->next->flag == 0)){
// The next check here and in the next block is to detect edge cases where block we are checking is
// registering as a left translation, but is part of a continuous run to the right
// most often occcurs in the middle of messy overlap blocks

flag = 1;
tblk ->start = qnode->match->start;
} else {
flag = 2;
tblk ->stop = qnode->match->stop;
}
} else if( start > qnode->feature->start) { //Start is contained within current block C,D
tblk->start = qnode->match->start;
if(qnode->flag > -2){
tblk->start = qnode->match->start;
} else {
tblk ->stop = qnode->match->stop;
}
} else { //Case A,B situations
tblk->start = qnode->match->stop;
if(qnode->flag > -2){
tblk->start = qnode->match->stop;
} else {
tblk ->stop = qnode->match->start;
}
}

}
if(pblock){
//qnode = cmap->map[qblk->linkid];
q_blk = SGCB(syn,0,chrid,qnode->qblkid >0?qnode->qblkid-1:0);
t_blk = QT_SGCB(syn,q_blk);
t_con = QT_SGC(syn,q_blk);
printf("Q\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\t\n",
seqname, qcon->name, q_blk->start, q_blk->stop,
t_con->name,t_blk->start,t_blk->stop, interval);
t_blk = SGCB(syn,1,qnode->feature->oseqid,
(qnode->feature->oblkid >0 ? qnode->feature->oblkid-1:0));
q_blk = SGCB(syn,0,t_blk->oseqid, t_blk->oblkid);
t_con = SGC(syn,1,q_blk->oseqid);
printf("T\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\n",
seqname, qcon->name, q_blk->start, q_blk->stop,
t_con->name,t_blk->start,t_blk->stop, interval);

printf("I\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\n",
seqname, qcon->name, qblk->start, qblk->stop,
tcon->name,qnode->match->start,qnode->match->stop, interval);
}


//Stop is AFTER current query Block
if (stop > qblk->stop){ //Stop is AFTER
while(qnode->next != NULL && stop > qnode->feature->start){
qnode = qnode->next;
while(qnode->next != NULL && stop){
ContiguousNode* temp = qnode->next;
//Advance i to avoid repeated ranges due to continuity.
i++;
if(stop > temp->feature->start){
i++;
qnode = temp;
if(pblock){
t_con = SGC(syn,1,qnode->feature->oseqid);
printf("I\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\n",
seqname, qcon->name, qnode->feature->start, qnode->feature->stop,
t_con->name,qnode->match->start,qnode->match->stop, interval);
}
} else {
break;
}
}
if(qnode->flag >1 && stop < qnode->feature->start){ //avoid duplicates in cases of overlap
continue;
}
if(stop > qnode->feature->stop){ //Case E,F situations
flag = flag==1? 3:2;
tblk->stop = qnode->match->stop;
} else if (stop < qnode->feature->stop){
if(stop > qnode->feature->stop){
if(qnode->next == NULL){ //Case E,F situations
if(qnode->flag > -2){
flag = flag==1? 3:2;
tblk->stop = qnode->match->stop;
} else {
flag = flag==2? 3:1;
tblk ->start = qnode->match->start;
}
} else { //Case A,B situations
q_blk = SGCB(syn,0,chrid, i+1 < qcon->size ? i+1:i);
t_blk = QT_SGCB(syn,q_blk);
if(cmap->map[qblk->linkid]->flag > -2 || qnode->next->flag == 0){ //similar check to above
tblk->stop = t_blk->start;

} else {
tblk->start = t_blk->stop;
}
}
} else { //Case C,D
tblk->stop = qnode->match->stop;
if(qnode->flag > -2){
tblk ->stop = qnode->match->stop;
} else {
tblk->start = qnode->match->stop;
}
}

}
if(pblock){
if(stop < qnode->feature->start){
q_blk = qnode->feature;
} else {
tblk->stop = qnode->match->start;
q_blk = SGCB(syn,0,chrid, i+1 < qcon->size ? i+1:i);
}
}

printf("%s\t%s\t%u\t%u\t%d\n",
seqname, tcon->name, tblk->start, tblk->stop, flag);
t_blk = QT_SGCB(syn,q_blk);
t_con = QT_SGC(syn,q_blk);
printf("Q\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\n",
seqname, qcon->name, q_blk->start, q_blk->stop,
t_con->name,t_blk->start,t_blk->stop, interval);
t_blk = SGCB(syn,1,qnode->feature->oseqid,qnode->feature->oblkid+1< tcon->size? qnode->feature->oblkid+1:qnode->feature->oblkid);
q_blk = SGCB(syn,0,t_blk->oseqid, t_blk->oblkid);
t_con = SGC(syn,1,q_blk->oseqid);
printf("T\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%u\n",
seqname, qcon->name, q_blk->start, q_blk->stop,
t_con->name,t_blk->start,t_blk->stop, interval);
}
printf(">\t%u\t%s\t%s\t%u\t%u\t%s\t%u\t%u\t%d\n",
interval,seqname,qcon->name,start,stop,
tcon->name, tblk->start,tblk->stop,flag);

interval++;

free_block(tblk);
}
free(contigs->name);
free(contigs->block);
Expand Down Expand Up @@ -216,10 +343,12 @@ ContiguousMap * populate_contiguous_map(Synmap * syn){
}
if(q_overlap || t_overlap){
if(q_overlap && t_overlap) cnode->flag = 4;
//if(cnode->feature->oblkid < ctig->block[j-1]->oblkid) cnode->flag = -2;
overlap_bound[1]=j;
cmap->map[cnode->feature->linkid] = cnode;
continue;
} else {
}
else {
overlap_bound[0] = j;
overlap_bound[1] = j;
}
Expand All @@ -230,22 +359,26 @@ ContiguousMap * populate_contiguous_map(Synmap * syn){
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
cmap->map[cnode->feature->linkid] = cnode;
} else if (cnode->feature->oblkid == ctig->block[j-1]->oblkid+1){ // Regular contiguous interval
cnode->flag = 0;
cmap->map[cnode->feature->linkid] = cnode;
cmap->map[ctig->block[j-1]->linkid]->next = cnode;
cmap->map[cnode->feature->linkid]->prev = cmap->map[ctig->block[j-1]->linkid];
// printf("Contiguous \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
} else if( cnode->feature->oblkid < ctig->block[j-1]->oblkid){ //Twist to left of previous block
cnode->flag = cmap->map[ctig->block[j-1]->linkid]->flag < -1 ? -3:-1;
cnode->flag = (cmap->map[ctig->block[j-1]->linkid]->flag == -2 || cmap->map[ctig->block[j-1]->linkid]->flag == 0) ? -2:0;
cmap->map[ctig->block[j-1]->linkid]->next = NULL;
cmap->map[cnode->feature->linkid] = cnode;
if(cnode->feature->oblkid == ctig->block[j-1]->oblkid - 1){
cnode->flag = -3;
cmap->map[ctig->block[j-1]->linkid]->next = cnode;
cmap->map[ctig->block[j-1]->linkid]->flag = -3;
cmap->map[cnode->feature->linkid]->prev = cmap->map[ctig->block[j-1]->linkid];
}
// printf("Twist Left \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
} else if( cnode->feature->oblkid > ctig->block[j-1]->oblkid){// Twist to right, possible transposition
cnode->flag=-2;
cnode->flag= ctig->block[j]->oblkid == ctig->block[j+1]->oblkid+1 ? -3:-1;
cmap->map[cnode->feature->linkid] = cnode;
// printf("Twist Right \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
Expand Down
3 changes: 2 additions & 1 deletion src/contiguous.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* prev - Previous contiguous node, if any
* next - Next contiguous node, if any
* flag - Used to indicate several state
* -3 - Part of a transposed block
* -2 - Left of last contiguous block
* -1 - Right of last contiguous block
* 0 - Normal Interval
Expand Down Expand Up @@ -84,6 +85,6 @@ void contiguous_list_reset(ContiguousList *clist, ContiguousNode *cnode);
ContiguousMap * init_contiguous_map(size_t size);
ContiguousMap * populate_contiguous_map(Synmap * syn);

void contiguous_query(Synmap * syn, FILE * intfile);
void contiguous_query(Synmap * syn, FILE * intfile, bool pblock);

#endif
9 changes: 7 additions & 2 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>

#include "ui.h"
#include "io.h"
Expand Down Expand Up @@ -65,8 +66,11 @@ int main(int argc, char * argv[]){
else if(strcmp(args.cmd, "map") == 0){
analysis_map(syn, args.intfile);
}
else if(strcmp(args.cmd, "search") ==0){
contiguous_query(syn, args.intfile);
else if(strcmp(args.cmd, "search") == 0){
contiguous_query(syn, args.intfile, false);
}
else if(strcmp(args.cmd, "searchblock") == 0){
contiguous_query(syn, args.intfile, true);
}
else{
printf("Command '%s' not recognized\n", args.cmd);
Expand All @@ -78,6 +82,7 @@ int main(int argc, char * argv[]){
// ------------------------------------------------------------------------
// Clean up
// ------------------------------------------------------------------------

if(syn)
free_synmap(syn);
close_Arguments(args);
Expand Down
15 changes: 11 additions & 4 deletions src/ui.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,22 @@ void print_help(){
" synder -d SYNTENY_FILE QUERY TARGET DB_DIR\n"
" synder [OPTIONS] -s SYNTENY_DB -c COMMAND\n"
"COMMANDS\n"
" map - print target intervals overlapping each query interval\n"
" count - like map but prints only the number that overlap\n"
" search - find target search spaces for each query interval\n"
" filter - print query-to-target links consistent with the synteny map\n"
" map\n"
" print target intervals overlapping each query interval\n"
" count\n"
" like map but prints only the number that overlap\n"
" filter\n"
" print query-to-target links consistent with the synteny map\n"
" search\n"
" predict target search spaces for each query interval\n"
" searchblock\n"
" as per search, but also prints blocks\n"
"EXAMPLES\n"
" synder -d at-al.tab at al db\n"
" synder -i at.gff -s db/at_al.txt -c map\n"
" synder -i at.gff -s db/at_al.txt -c count\n"
" synder -i at.syn -s db/at_al.txt -c search\n"
" synder -i at.syn -s db/at_al.txt -c searchblock\n"
" synder -f hits.syn -s db/at_al.txt -c filter\n"
" synder test\n"
);
Expand Down

0 comments on commit 90aed0a

Please sign in to comment.