Skip to content

Commit

Permalink
Merge branch 'ctig-testing'
Browse files Browse the repository at this point in the history
  • Loading branch information
arendsee committed Jun 14, 2016
2 parents 05ebbbe + dfe85cb commit 722d523
Show file tree
Hide file tree
Showing 6 changed files with 386 additions and 5 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
# Manual stub

#Contiguous Set Use
./run -i GFF -s DB -c contig

output is like -c map with additon of flag
SEQNAME TARGETNAME SEARCH_INTERVAL_START SEARCH_INTERVAL_STOP FLAG

Flag is to keep track of edge dependability:
0 -> Reliable
1 -> Start Edge Unreliable [Case F]
2 -> Stop Edge Unreliable [Case F]
3 -> Both Edges Unreliable [Case E or C extended to F]

if INTERVAL_START and INTERVAL_END are . then the query interval met the
criteria for Case E



# Definitions

* query interval - an interval on the query genome
Expand Down
5 changes: 4 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CFLAGS=-Wall -g -O2
CC=gcc
TARGET=run
OBJECTS=main.o test.o io.o genome.o analysis.o contig.o interval.o itree.o search.o ia.o iv.o block.o synmap.o ui.o
OBJECTS=main.o test.o io.o genome.o analysis.o contig.o interval.o itree.o search.o ia.o iv.o block.o synmap.o ui.o contiguous.o

${TARGET}: ${OBJECTS}
${CC} -o ${TARGET} ${CFLAGS} ${OBJECTS}
Expand Down Expand Up @@ -33,6 +33,9 @@ synmap.o: synmap.c synmap.h
ui.o: ui.c ui.h
${CC} -c ${CFLAGS} ui.[ch]

contiguous.o: contiguous.c contiguous.h
${CC} -c ${CFLAGS} contiguous.[ch]

itree.o: itree/itree.c itree/itree.h
${CC} -c ${CFLAGS} itree/itree.[ch]

Expand Down
4 changes: 2 additions & 2 deletions src/analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ void analysis_count(Synmap * syn, FILE * intfile){
uint count;
int chrid, start, stop;
while ((fscanf(intfile,
"%d %*s %*s %d %d %*c %*c %*s %s\n",
"%d %*s %*s %d %d %*s %*c %*s %s\n",
&chrid, &start, &stop, seqname)) != EOF)
{
// contig.c::count_overlaps ->
Expand All @@ -30,7 +30,7 @@ void analysis_map(Synmap * syn, FILE * intfile){
Block twoblk;
bool missing;
while ((fscanf(intfile,
"%d %*s %*s %d %d %*c %*c %*s %s\n",
"%d %*s %*s %d %d %*s %*c %*s %s\n",
&chrid, &start, &stop, seqname)) != EOF)
{
missing = false;
Expand Down
267 changes: 267 additions & 0 deletions src/contiguous.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>

#include "contiguous.h"
#include "contig.h"
#include "synmap.h"


ContiguousMap * init_contiguous_map(size_t size){
ContiguousMap* cmap = (ContiguousMap*)malloc(sizeof(ContiguousMap));
cmap->size = size;
cmap->map = (ContiguousNode**)malloc(size * sizeof(ContiguousNode*));

return cmap;
}

void contiguous_query(Synmap * syn, FILE * intfile){
// EAnsure blocks are sorted (BAD IDEA, REDACTED FOR NOW)
// sort_all_contigs(syn);

// count total number of unique block-block pairs for hashmap
ContiguousMap *cmap= populate_contiguous_map(syn);

char seqname[128];
int chrid, start, stop,flag;
Contig * contigs;
Contig * tcon;
Block * qblk;
uint32_t blkid;
ContiguousNode * qnode;
Block * tblk;
Block twoblk;
bool missing;
while ((fscanf(intfile,
"%d %*s %*s %d %d %*s %*c %*s %s\n",
&chrid, &start, &stop, seqname)) != EOF)
{
contigs = get_region(SGC(syn, 0, chrid), start, stop);
uint32_t region[2]={0,0};
missing = false;

if(contigs->size == 2){
twoblk.start = start;
twoblk.stop = stop;
if(!((CB(contigs, 0) && block_overlap(CB(contigs, 0), &twoblk)) ||
(CB(contigs, 1) && block_overlap(CB(contigs, 1), &twoblk))
)){
missing = true;
}
}

for(int i = 0; i < contigs->size; i++){
qblk = contigs->block[i];
if(qblk){
blkid = cmap->map[qblk->linkid]->qblkid;
if(i==0){
region[0] = blkid;
region[1] = blkid;
} else {
if(blkid < region[0]){region[0]=blkid;}
if(blkid > region[1]){region[1]=blkid;}
}
// printf("[%d]\t%u\t%u\t\n",
// qblk->linkid,qblk->start,qblk->stop);
// printf("[%d]\t{%u}\n",
// qblk->linkid,blkid);
}
}
for(int i= region[0]; i<= region[1];i++){
/*
* Flag -> keeps track of edge reliability
* 0 = determinable boundries
* 1 = Left hand undeterminable (case F)
* 2 = Right hand undertrminable (caseF)
* 3 = Neitherside determiable (case C extended to F, or case E
*/
flag = 0;
qblk = SGCB(syn,0,chrid,i);
tblk = QT_SGCB(syn, qblk);
tcon = QT_SGC(syn, qblk);
if(missing){
flag=3;
printf("%s\t%s\t.\t.\t%d\n",
seqname, tcon->name,flag);
break;
}


qnode = cmap->map[qblk->linkid];
// Set return region assumes case D;
tblk->start= qnode->match->start;
tblk->stop = qnode->match->stop;
// Start is BEFORE current query Block;
if (start< qblk->start) {
//Move down contiguous block, stoping at leftmost possible point
while(qnode->prev != NULL && start > qnode->feature->stop){
qnode = qnode->prev;
}
if(qnode->flag >1 && start > qnode->feature->stop){ //avoid duplicates in cases of overlap
continue;
}
if(start < qnode->feature->start){ // Check we didn't advance into a case E,F Situation
flag = 1;
tblk ->start = qnode->match->start;
} else if( start > qnode->feature->start) { //Start is contained within current block C,D
tblk->start = qnode->match->start;
} else { //Case A,B situations
tblk->start = qnode->match->stop;
}

}
//Stop is AFTER current query Block
if (stop > qblk->stop){ //Stop is AFTER
while(qnode->next != NULL && stop > qnode->feature->start){
qnode = qnode->next;
//Advance i to avoid repeated ranges due to continuity.
i++;
}
if(qnode->flag >1 && stop < qnode->feature->start){ //avoid duplicates in cases of overlap
continue;
}
if(stop > qnode->feature->stop){ //Case E,F situations
flag = flag==1? 3:2;
tblk->stop = qnode->match->stop;
} else if (stop < qnode->feature->stop){
tblk->stop = qnode->match->stop;
} else {
tblk->stop = qnode->match->start;
}
}

printf("%s\t%s\t%u\t%u\t%d\n",
seqname, tcon->name, tblk->start, tblk->stop, flag);

}
free(contigs->name);
free(contigs->block);
free(contigs);
}

free_contiguous_map(cmap);
}

void free_contiguous_map(ContiguousMap * cmap){
for(int i=0;i<cmap->size;i++){
if(cmap->map[i]) free(cmap->map[i]);
}
if(cmap->map) free(cmap->map);
if(cmap) free(cmap);
}


ContiguousMap * populate_contiguous_map(Synmap * syn){
// Figure out how many unique blocks the current synmap has
size_t size = 0;
for(uint32_t i=0; i< syn->genome[0]->size; i++){
size += syn->genome[0]->contig[i]->size;
}

// Initialize new ContiguousMap to that size to serve as hashmap
// when looking up overlaping blocks
ContiguousMap * cmap = init_contiguous_map(size);
// Initialize ContiguousList structure to hold head and tail of current
// Contiguous Set
cmap->size = size;

for(uint32_t i =0; i < syn->genome[0]->size; i++){

//Grab contig to variable, to save writing
Contig * ctig = (Contig *)malloc(sizeof(Contig));
ctig = syn->genome[0]->contig[i];
uint32_t overlap_bound[2]= {0,0};

for(uint32_t j=0; j < ctig->size; j++){

//set current block up to be a new node
ContiguousNode *cnode = (ContiguousNode*)malloc(sizeof(ContiguousNode));
cnode->flag =0;
cnode->next = NULL;
cnode->prev = NULL;
cnode->feature =ctig->block[j];
cnode->match = syn->genome[1]->contig[cnode->feature->oseqid]->block[cnode->feature->oblkid];
cnode->qblkid = j;
//Start of contig is always a default node
cmap->map[cnode->feature->linkid] = cnode;
if(j==0){
cmap->map[cnode->feature->linkid] = cnode;
// printf("%d\t head \t%u::%u \t[%u:%u] {%u,%u} \n",i,cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
continue;
//Otherwise we get to figure out if the current block can be added to or breaks the current ContiguousSet
} else {

//// Test for overlap on query side
bool q_overlap = false;
bool t_overlap = false;
for(int k = overlap_bound[0]; k<= overlap_bound[1]; k++){
q_overlap = block_overlap(cnode->feature,ctig->block[k]) || q_overlap;
t_overlap = (block_overlap(cnode->match,cmap->map[ctig->block[k]->linkid]->match) &&
(cnode->feature->oseqid == ctig->block[k]->oseqid)) || t_overlap;
if(q_overlap || t_overlap) continue;
}
if(q_overlap){
cnode->flag=2;
// printf("Query Overlap \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
}
if(t_overlap){
cnode->flag=3;
// printf("Target Overlap \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
// printf("[Target] \t%u::%u \n",cnode->match->start,cnode->match->stop);
}
if(q_overlap || t_overlap){
if(q_overlap && t_overlap) cnode->flag = 4;
overlap_bound[1]=j;
cmap->map[cnode->feature->linkid] = cnode;
continue;
} else {
overlap_bound[0] = j;
overlap_bound[1] = j;
}
}
// On different contig from previous
if(cnode->feature->oseqid != cmap->map[ctig->block[j-1]->linkid]->feature->oseqid){
// printf("Different Contig \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
cmap->map[cnode->feature->linkid] = cnode;
} else if (cnode->feature->oblkid == ctig->block[j-1]->oblkid+1){ // Regular contiguous interval
cmap->map[cnode->feature->linkid] = cnode;
cmap->map[ctig->block[j-1]->linkid]->next = cnode;
cmap->map[cnode->feature->linkid]->prev = cmap->map[ctig->block[j-1]->linkid];
// printf("Contiguous \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
} else if( cnode->feature->oblkid < ctig->block[j-1]->oblkid){ //Twist to left of previous block
cnode->flag = cmap->map[ctig->block[j-1]->linkid]->flag < -1 ? -3:-1;
cmap->map[cnode->feature->linkid] = cnode;
if(cnode->feature->oblkid == ctig->block[j-1]->oblkid - 1){
cmap->map[ctig->block[j-1]->linkid]->next = cnode;
cmap->map[cnode->feature->linkid]->prev = cmap->map[ctig->block[j-1]->linkid];
}
// printf("Twist Left \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
} else if( cnode->feature->oblkid > ctig->block[j-1]->oblkid){// Twist to right, possible transposition
cnode->flag=-2;
cmap->map[cnode->feature->linkid] = cnode;
// printf("Twist Right \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
} else { // Default case that should never be reached.
cmap->map[cnode->feature->linkid] = cnode;
// printf("OTHER \t%u::%u \t[%u:%u] {%u,%u} \n",cnode->feature->start,cnode->feature->stop,cnode->feature->oseqid,cnode->feature->oblkid,j,cnode->match->oblkid);
// printf("\t[%u:%u] \n",cnode->match->start,cnode->match->stop);
}
}

}
//The actual list has done its job, free.
return cmap;

}




Loading

0 comments on commit 722d523

Please sign in to comment.