diff --git a/src/hhalign.cpp b/src/hhalign.cpp index bf2b74e3..d1cb61a7 100755 --- a/src/hhalign.cpp +++ b/src/hhalign.cpp @@ -111,6 +111,8 @@ void HHalign::help(Parameters& par, char all) { printf(" -norealign do NOT realign displayed hits with MAC algorithm (def=realign) \n"); printf(" -mact [0,1[ posterior prob threshold for MAC realignment controlling greedi- \n"); printf(" ness at alignment ends: 0:global >0.1:local (default=%.2f) \n", par.mact); + printf(" -mac_min_length minimum length of MAC hit in comparison with the Viterbi hit\n"); + printf(" (in terms of matched_cols, default=%.2f)\n", par.mac_min_length); printf(" -glob/-loc use global/local alignment mode for searching/ranking (def=local)\n"); if (all) { @@ -518,6 +520,9 @@ void HHalign::ProcessArguments(Parameters& par) { else if (!strcmp(argv[i], "-mact") && (i < argc - 1)) { par.mact = atof(argv[++i]); } + else if (!strcmp(argv[i], "-mac_min_length") + && (i < argc - 1)) + par.mac_min_length = atof(argv[++i]); //not in the help - but intended else if (!strcmp(argv[i], "-scwin") && (i < argc - 1)) { par.columnscore = 5; diff --git a/src/hhbacktracemac.cpp b/src/hhbacktracemac.cpp index 9c1a4a19..2907d7cf 100644 --- a/src/hhbacktracemac.cpp +++ b/src/hhbacktracemac.cpp @@ -108,70 +108,162 @@ void PosteriorDecoder::writeProfilesToHits(HMM &q, HMM &t, PosteriorMatrix &p_mm } } -void PosteriorDecoder::backtraceMAC(HMM & q, HMM & t, PosteriorMatrix & p_mm, ViterbiMatrix & backtrace_matrix, const int elem, Hit & hit, float corr) { - - // Trace back trough the matrix b[i][j] until STOP state is found - - LogLevel actual_level = Log::reporting_level(); - int step; // counts steps in path through 5-layered dynamic programming matrix - int i,j; // query and template match state indices - - initializeBacktrace(t,hit); - - // Make sure that backtracing stops when t:M1 or q:M1 is reached (Start state), e.g. sMM[i][1], or sIM[i][1] (M:MM, B:IM) - for (i = 0; i <= q.L; ++i) backtrace_matrix.setMatMat(i, 1, elem, ViterbiMatrix::STOP); // b[i][1] = STOP; - for (j = 1; j <= t.L; ++j) backtrace_matrix.setMatMat(1, j, elem, ViterbiMatrix::STOP); // b[1][j] = STOP; - +void PosteriorDecoder::findBacktraceMAC(ViterbiMatrix & backtrace_matrix, const int elem, + int & i1, int & j1, int i2, int j2, std::vector & vi, std::vector & vj, std::vector & states, + int & matched_cols, int & nsteps, + LogLevel & actual_level) { // Back-tracing loop // In contrast to the Viterbi-Backtracing, STOP signifies the first Match-Match state, NOT the state before the first MM state - hit.matched_cols = 1; // for each MACTH (or STOP) state matched_col is incremented by 1 - hit.state = ViterbiMatrix::MM; // lowest state with maximum score must be match-match state - step = 0; // steps through the matrix correspond to alignment columns (from 1 to nsteps) - i = hit.i2; j = hit.j2; // last aligned pair is (i2,j2) + matched_cols = 1; // for each MACTH (or STOP) state matched_col is incremented by 1 + char state = ViterbiMatrix::MM; // lowest state with maximum score must be match-match state + int step = 0; // steps through the matrix correspond to alignment columns (from 1 to nsteps) + int i = i2; // last aligned pair is (i2,j2) + int j = j2; + + // arrays start from 1 + vi.push_back(0); + vj.push_back(0); + states.push_back((char) ViterbiMatrix::STOP); // note: static cast is needed for avoiding "undefined" ViterbiMatrix::MM object error + if (backtrace_matrix.getMatMat(i, j, elem) != ViterbiMatrix::MM) { // b[i][j] != MM if (Log::reporting_level() > DEBUG) fprintf(stderr,"Error: backtrace does not start in match-match state, but in state %i, (i,j)=(%i,%i)\n",backtrace_matrix.getMatMat(i, j, elem),i,j); step = 0; - hit.i[step] = i; - hit.j[step] = j; - hit.alt_i->push_back(i); - hit.alt_j->push_back(j); - hit.state = ViterbiMatrix::STOP; + vi[0] = i; + vj[0] = j; + state = ViterbiMatrix::STOP; } else { - while (hit.state != ViterbiMatrix::STOP) { + while (state != ViterbiMatrix::STOP) { step++; - hit.states[step] = hit.state = backtrace_matrix.getMatMat(i, j, elem); // b[i][j]; - hit.i[step] = i; - hit.j[step] = j; - hit.alt_i->push_back(i); - hit.alt_j->push_back(j); - // Exclude cells in direct neighbourhood from all further alignments - for (int ii = imax(i-2,1); ii <= imin(i+2, q.L); ++ii) -// hit.cell_off[ii][j] = 1; - backtrace_matrix.setCellOff(ii, j, elem, true); - for (int jj = imax(j-2,1); jj <= imin(j+2, t.L); ++jj) - backtrace_matrix.setCellOff(i, jj, elem, true); - - if (hit.state == ViterbiMatrix::MM) hit.matched_cols++; - - switch (hit.state) { + state = backtrace_matrix.getMatMat(i, j, elem); // b[i][j]; + states.push_back(state); + vi.push_back(i); + vj.push_back(j); + + if (state == ViterbiMatrix::MM) matched_cols++; + + switch (state) { case ViterbiMatrix::MM: i--; j--; break; case ViterbiMatrix::IM: j--; break; case ViterbiMatrix::MI: i--; break; case ViterbiMatrix::STOP: break; default: - fprintf(stderr,"Error: unallowed state value %i occurred during backtracing at step %i, (i,j)=(%i,%i)\n", hit.state, step, i, j); - hit.state = 0; + fprintf(stderr,"Error: unallowed state value %i occurred during backtracing at step %i, (i,j)=(%i,%i)\n", state, step, i, j); + state = 0; actual_level = DEBUG1; break; } //end switch (state) } //end while (state) } - hit.i1 = hit.i[step]; - hit.j1 = hit.j[step]; - hit.states[step] = ViterbiMatrix::MM; // first state (STOP state) is set to MM state - hit.nsteps = step; + // first state (STOP state) is set to MM state + states.push_back((char) ViterbiMatrix::MM); // note: static cast is needed for avoiding "undefined" ViterbiMatrix::MM object error + nsteps = step; + i1 = i2; + j1 = j2; + i1 = vi[nsteps]; + j1 = vj[nsteps]; +} + +void PosteriorDecoder::backtraceMAC(HMM & q, HMM & t, PosteriorMatrix & p_mm, ViterbiMatrix & backtrace_matrix, const int elem, Hit & hit, float corr, float mac_min_length) { + + // Trace back trough the matrix b[i][j] until STOP state is found + + LogLevel actual_level = Log::reporting_level(); + int step; // counts steps in path through 5-layered dynamic programming matrix + int i,j; // query and template match state indices + + initializeBacktrace(q,t,hit); + + // Make sure that backtracing stops when t:M1 or q:M1 is reached (Start state), e.g. sMM[i][1], or sIM[i][1] (M:MM, B:IM) + for (i = 0; i <= q.L; ++i) backtrace_matrix.setMatMat(i, 1, elem, ViterbiMatrix::STOP); // b[i][1] = STOP; + for (j = 1; j <= t.L; ++j) backtrace_matrix.setMatMat(1, j, elem, ViterbiMatrix::STOP); // b[1][j] = STOP; + + std::vector vi; + std::vector vj; + std::vector states; + int matched_cols, nsteps; + int i2 = hit.i2; + int j2 = hit.j2; + int i1, j1; + + findBacktraceMAC(backtrace_matrix, elem, i1, j1, i2, j2, vi, vj, states, matched_cols, nsteps, actual_level); + + char msg[512]; + if (i2 < m_temp_hit->i1 || j2 < m_temp_hit->j1 || i1 > m_temp_hit->i2 || j1 > m_temp_hit->j2) { + snprintf(msg, sizeof(msg), "The main MAC hit (%d,%d)-(%d,%d) is outside the Viterbi hit scope (%d,%d)-(%d,%d), looking for the alternative...\n", + i1, j1, i2, j2, m_temp_hit->i1, m_temp_hit->j1, m_temp_hit->i2, m_temp_hit->j2); + HH_LOG(WARNING) << msg; + // find first hit from mac_hit_end_positions which overlaps with Viterbi hit + int no = 1; + int found = 0; + int i2_0 = i2, j2_0 = j2; + for(std::multimap >::iterator it = hit.mac_hit_end_positions.begin(); + it != hit.mac_hit_end_positions.end(); it++, no++) { + double score = -it->first; + i2 = it->second.first; + j2 = it->second.second; + vi.clear(); + vj.clear(); + states.clear(); + findBacktraceMAC(backtrace_matrix, elem, i1, j1, i2, j2, vi, vj, states, matched_cols, nsteps, actual_level); + if (!(i2 < m_temp_hit->i1 || j2 < m_temp_hit->j1 || i1 > m_temp_hit->i2 || j1 > m_temp_hit->j2)) { + snprintf(msg, sizeof(msg), "Found the alternative MAC hit #%d (%d,%d)-(%d,%d) (score=%f) inside the Viterbi hit scope (%d,%d)-(%d,%d)\n", + no, i1, j1, i2, j2, score, m_temp_hit->i1, m_temp_hit->j1, m_temp_hit->i2, m_temp_hit->j2); + HH_LOG(WARNING) << msg; + found = no; + break; + } + } + if (found == 0) { + nsteps = 0; + } + } + int mac_is_valid = 1; + if (matched_cols < hit.matched_cols * mac_min_length) { + snprintf(msg, sizeof(msg), "MAC alignment is too short, matched_cols=%d, the threshold is %f of the original matched_cols=%d: restoring the original Viterbi hit (%d,%d)-(%d,%d). You can consider using lower -mact value in order to avoid such cases.\n", matched_cols, mac_min_length, hit.matched_cols, m_temp_hit->i1, m_temp_hit->j1, m_temp_hit->i2, m_temp_hit->j2); + HH_LOG(WARNING) << msg; + mac_is_valid = 0; + } else if (matched_cols < hit.matched_cols) { + snprintf(msg, sizeof(msg), "MAC alignment is shorter (matched_cols=%d) than the Viterbi hit (matched_cols=%d). You can consider using lower -mact value or higher -mac_min_length value in order to avoid such cases.\n", matched_cols, hit.matched_cols); + HH_LOG(WARNING) << msg; + } + if (!nsteps) { + snprintf(msg, sizeof(msg), "Could not find valid MAC alternative hit for the original Viterbi hit scope: restoring the original Viterbi hit (%d,%d)-(%d,%d). You can consider using lower -mact value in order to avoid such cases.\n", m_temp_hit->i1, m_temp_hit->j1, m_temp_hit->i2, m_temp_hit->j2); + HH_LOG(WARNING) << msg; + mac_is_valid = 0; + } + if (!mac_is_valid) { + restoreHitPath(hit); + return; + } + + // register the checked hit + for (step = 1; step <= nsteps; step++) { + hit.states[step] = hit.state = states[step]; + i = vi[step]; j = vj[step]; + hit.i[step] = i; + hit.j[step] = j; + hit.alt_i->push_back(i); + hit.alt_j->push_back(j); + // Exclude cells in direct neighbourhood from all further alignments + for (int ii = imax(i-2,1); ii <= imin(i+2, q.L); ++ii) + backtrace_matrix.setCellOff(ii, j, elem, true); + for (int jj = imax(j-2,1); jj <= imin(j+2, t.L); ++jj) + backtrace_matrix.setCellOff(i, jj, elem, true); + } + if (nsteps == 0) { + hit.i[0] = vi[0]; + hit.j[0] = vj[0]; + hit.alt_i->push_back(vi[0]); + hit.alt_j->push_back(vj[0]); + hit.state = ViterbiMatrix::STOP; + } + hit.i1 = hit.i[nsteps]; + hit.j1 = hit.j[nsteps]; + hit.states[nsteps] = ViterbiMatrix::MM; // first state (STOP state) is set to MM state + hit.nsteps = nsteps; + hit.matched_cols = matched_cols; // Allocate new space for alignment scores hit.S = new float[hit.nsteps+1]; @@ -270,22 +362,23 @@ void PosteriorDecoder::backtraceMAC(HMM & q, HMM & t, PosteriorMatrix & p_mm, Vi ///////////////////////////////////////////////////////////////////////////////////// // Allocate memory for data of new alignment (sequence names, alignment, scores,...) ///////////////////////////////////////////////////////////////////////////////////// -void PosteriorDecoder::initializeBacktrace(HMM & t, Hit & hit) { +void PosteriorDecoder::initializeBacktrace(HMM & q, HMM & t, Hit & hit) { + int maxlen = q.L + t.L + 1; // Allocate new space if(hit.i) { delete[] hit.i; } - hit.i = new int[hit.i2 + hit.j2 + 2]; + hit.i = new int[maxlen]; if(hit.j) { delete[] hit.j; } - hit.j = new int[hit.i2 + hit.j2 + 2]; + hit.j = new int[maxlen]; if(hit.states) { delete[] hit.states; } - hit.states = new char[hit.i2 + hit.j2 + 2]; + hit.states = new char[maxlen]; if(hit.S) { delete[] hit.S; diff --git a/src/hhblits.cpp b/src/hhblits.cpp index 1a20dac7..583218a5 100644 --- a/src/hhblits.cpp +++ b/src/hhblits.cpp @@ -292,6 +292,8 @@ void HHblits::help(Parameters& par, char all) { printf(" -realign_old_hits realign hits from previous iterations \n"); printf(" -mact [0,1[ posterior prob threshold for MAC realignment controlling greedi- \n"); printf(" ness at alignment ends: 0:global >0.1:local (default=%.2f) \n", par.mact); + printf(" -mac_min_length minimum length of MAC hit in comparison with the Viterbi hit\n"); + printf(" (in terms of matched_cols, default=%.2f)\n", par.mac_min_length); printf(" -glob/-loc use global/local alignment mode for searching/ranking (def=local)\n"); if (all) { printf(" -realign realign displayed hits with max. accuracy (MAC) algorithm \n"); @@ -762,6 +764,9 @@ void HHblits::ProcessArguments(Parameters& par) { else if ((!strcmp(argv[i], "-mact") || !strcmp(argv[i], "-mapt")) && (i < argc - 1)) par.mact = atof(argv[++i]); + else if (!strcmp(argv[i], "-mac_min_length") + && (i < argc - 1)) + par.mac_min_length = atof(argv[++i]); else if (!strcmp(argv[i], "-sc") && (i < argc - 1)) par.columnscore = atoi(argv[++i]); //no help required diff --git a/src/hhdecl.cpp b/src/hhdecl.cpp index ef355046..32b42f6c 100755 --- a/src/hhdecl.cpp +++ b/src/hhdecl.cpp @@ -85,6 +85,7 @@ Parameters::Parameters(const int argc, const char** argv) : argc(argc), argv(arg ssa = 1.0f; // weight of ss evolution matrix shift = -0.03f; // Shift match score up mact = 0.3501f; // Probability threshold for MAC alignment in local mode for alignment ends (set to 0.3501 to track user modification) + mac_min_length = 0.0f; // Minimum length of MAC hit in comparison with the original Viterbi hit (in terms of matched_cols) corr = 0.1f; // Weight of correlations of scores for |i-j|<=4 egq = 0.0f; // no charge for end gaps as default diff --git a/src/hhdecl.h b/src/hhdecl.h index e9265b35..04854e1b 100644 --- a/src/hhdecl.h +++ b/src/hhdecl.h @@ -239,6 +239,7 @@ class Parameters // Parameters for gap penalties and pseudocounts float corr; // Weight of correlations between scores with |i-j|<=4 float shift; // Score offset for match-match states double mact; // Probability threshold (negative offset) in MAC alignment determining greediness at ends of alignment + float mac_min_length; // Minimum length of MAC hit in comparison with the original Viterbi hit (in terms of matched_cols) int realign_max; // Realign max ... hits float maxmem; // maximum available memory in GB for realignment (approximately) diff --git a/src/hhhit.h b/src/hhhit.h index 6c9d259e..9dc73045 100755 --- a/src/hhhit.h +++ b/src/hhhit.h @@ -12,6 +12,7 @@ #include #include #include +#include class Hit; @@ -138,6 +139,39 @@ class Hit char state; // 0: Start/STOP state 1: MM state 2: GD state (-D) 3: IM state 4: DG state (D-) 5 MI state int min_overlap; + /* macAlgorithm() method stores here all the possible MAC path end positions (it's needed by backtraceMAC()) */ + std::multimap > mac_hit_end_positions; + int max_mac_hit_end_positions; + + void initMACHitEndPositions(int max) { + if (mac_hit_end_positions.size() > 0) { + mac_hit_end_positions.erase(mac_hit_end_positions.begin(), mac_hit_end_positions.end()); + } + max_mac_hit_end_positions = max; + } + + void storeMACHitEndPosition(double score, int i2, int j2) { + std::pair p = std::pair(i2, j2); + mac_hit_end_positions.insert(std::pair >(-score, p)); + // store no more than max_mac_hit_end_positions for the best MAC hits + while (mac_hit_end_positions.size() > max_mac_hit_end_positions) { + mac_hit_end_positions.erase(std::next(mac_hit_end_positions.rbegin()).base()); + } + } + + void eraseMACHitEndPosition(double score, int i2, int j2) { + typedef std::multimap >::iterator mapIterator; + + std::pair its = mac_hit_end_positions.equal_range(-score); + std::pair p = std::pair(i2, j2); + for(mapIterator it = its.first; it != its.second; it++) { + if (it->second == p) { + mac_hit_end_positions.erase(it); + return; + } + } + } + private: // Calculate probability of true positive : p_TP(score)/( p_TP(score)+p_FP(score) ) // TP: same superfamily OR MAXSUB score >=0.1 diff --git a/src/hhmacalgorithm.cpp b/src/hhmacalgorithm.cpp index f053d1b9..3f7feb9a 100644 --- a/src/hhmacalgorithm.cpp +++ b/src/hhmacalgorithm.cpp @@ -50,6 +50,7 @@ void PosteriorDecoder::macAlgorithm(HMM & q, HMM & t, Hit & hit, // char * c_ptr = new char; char val; hit.min_overlap = 0; + hit.initMACHitEndPositions(imax(q.L, t.L)); // reasonable limit on the number of alternative MAC hits // Dynamic programming for (i = 1; i <= q.L; ++i) { // Loop through query positions i // If q is compared to t, exclude regions where overlap of q with t < min_overlap residues @@ -128,6 +129,13 @@ void PosteriorDecoder::macAlgorithm(HMM & q, HMM & t, Hit & hit, hit.i2 = i; hit.j2 = j; score_MAC = S_curr[j]; } + if((m_local || i == q.L) && val == ViterbiMatrix::MM) { // local alignment path ends with MM state + hit.storeMACHitEndPosition(S_curr[j], i, j); + if (val == ViterbiMatrix::MM) { + // delete suboptimal previous position + hit.eraseMACHitEndPosition(S_prev[j-1], i - 1, j - 1); + } + } } // end if @@ -145,6 +153,9 @@ void PosteriorDecoder::macAlgorithm(HMM & q, HMM & t, Hit & hit, hit.j2 = jmax; score_MAC = S_curr[jmax]; } + if (!m_local) { + hit.storeMACHitEndPosition(S_curr[jmax], i, jmax); + } for (j = 0; j <= t.L; ++j) S_prev[j] = S_curr[j]; diff --git a/src/hhposteriordecoder.cpp b/src/hhposteriordecoder.cpp index ce37ff84..9fed0cb4 100644 --- a/src/hhposteriordecoder.cpp +++ b/src/hhposteriordecoder.cpp @@ -86,7 +86,7 @@ PosteriorDecoder::~PosteriorDecoder() { void PosteriorDecoder::realign(HMM &q, HMM &t, Hit &hit, PosteriorMatrix &p_mm, ViterbiMatrix &viterbi_matrix, std::vector alignment_to_exclude, - char * exclstr, char* template_exclstr, int par_min_overlap, float shift, float mact, float corr) { + char * exclstr, char* template_exclstr, int par_min_overlap, float shift, float mact, float corr, float mac_min_length) { HMM & curr_q_hmm = q; HMM & curr_t_hmm = t; @@ -112,7 +112,7 @@ void PosteriorDecoder::realign(HMM &q, HMM &t, Hit &hit, backwardAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, shift, 0); macAlgorithm(curr_q_hmm, curr_t_hmm, hit, p_mm, viterbi_matrix, mact, 0); - backtraceMAC(curr_q_hmm, curr_t_hmm, p_mm, viterbi_matrix, 0, hit, corr); + backtraceMAC(curr_q_hmm, curr_t_hmm, p_mm, viterbi_matrix, 0, hit, corr, mac_min_length); restoreHitValues(hit); writeProfilesToHits(curr_q_hmm, curr_t_hmm, p_mm, viterbi_matrix, hit); // add result to exclution paths (needed to align 2nd, 3rd, ... best alignment) @@ -279,6 +279,41 @@ void PosteriorDecoder::memorizeHitValues(Hit &curr_hit) { m_temp_hit->Eval = curr_hit.Eval; m_temp_hit->logEval = curr_hit.logEval; m_temp_hit->Probab = curr_hit.Probab; + m_temp_hit->i1 = curr_hit.i1; + m_temp_hit->i2 = curr_hit.i2; + m_temp_hit->j1 = curr_hit.j1; + m_temp_hit->j2 = curr_hit.j2; + m_temp_hit->nsteps = curr_hit.nsteps; + if (curr_hit.i) { + m_temp_hit->i = new int[curr_hit.nsteps + 1]; + for(int step = 1; step <= curr_hit.nsteps; step++) { + m_temp_hit->i[step] = curr_hit.i[step]; + } + } + if (curr_hit.j) { + m_temp_hit->j = new int[curr_hit.nsteps + 1]; + for(int step = 1; step <= curr_hit.nsteps; step++) { + m_temp_hit->j[step] = curr_hit.j[step]; + } + } + if (curr_hit.S) { + m_temp_hit->S = new float[curr_hit.nsteps + 1]; + for(int step = 1; step <= curr_hit.nsteps; step++) { + m_temp_hit->S[step] = curr_hit.S[step]; + } + } + if (curr_hit.S_ss) { + m_temp_hit->S_ss = new float[curr_hit.nsteps + 1]; + for(int step = 1; step <= curr_hit.nsteps; step++) { + m_temp_hit->S_ss[step] = curr_hit.S_ss[step]; + } + } + if (curr_hit.states) { + m_temp_hit->states = new char[curr_hit.nsteps + 1]; + for(int step = 1; step <= curr_hit.nsteps; step++) { + m_temp_hit->states[step] = curr_hit.states[step]; + } + } } /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -296,6 +331,58 @@ void PosteriorDecoder::restoreHitValues(Hit &curr_hit) { curr_hit.Eval = m_temp_hit->Eval; curr_hit.logEval = m_temp_hit->logEval; curr_hit.Probab = m_temp_hit->Probab; + if (m_temp_hit->i) { + delete[] m_temp_hit->i; + } + if (m_temp_hit->j) { + delete[] m_temp_hit->j; + } + if (m_temp_hit->S) { + delete[] m_temp_hit->S; + } + if (m_temp_hit->S_ss) { + delete[] m_temp_hit->S_ss; + } + if (m_temp_hit->states) { + delete[] m_temp_hit->states; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Restore the current hit with Viterbi alignment path +/////////////////////////////////////////////////////////////////////////////////////////////////// +void PosteriorDecoder::restoreHitPath(Hit &curr_hit) { + curr_hit.nsteps = m_temp_hit->nsteps; + if (curr_hit.S) { + delete[] curr_hit.S; + } + curr_hit.S = new float[curr_hit.nsteps + 1]; + if (curr_hit.S_ss) { + delete[] curr_hit.S_ss; + curr_hit.S_ss = NULL; + } + if (m_temp_hit->S_ss) { + curr_hit.S_ss = new float[curr_hit.nsteps + 1]; + } + if (curr_hit.states) { + delete[] curr_hit.states; + } + curr_hit.states = new char[curr_hit.nsteps + 1]; + for(int step = 1; step <= curr_hit.nsteps; step++) { + curr_hit.i[step] = m_temp_hit->i[step]; + curr_hit.j[step] = m_temp_hit->j[step]; + curr_hit.S[step] = m_temp_hit->S[step]; + if (m_temp_hit->S_ss) { + curr_hit.S_ss[step] = m_temp_hit->S_ss[step]; + } + curr_hit.states[step] = m_temp_hit->states[step]; + curr_hit.alt_i->push_back(curr_hit.i[step]); + curr_hit.alt_j->push_back(curr_hit.j[step]); + } + curr_hit.i1 = curr_hit.i[curr_hit.nsteps]; + curr_hit.j1 = curr_hit.j[curr_hit.nsteps]; + curr_hit.i2 = curr_hit.i[1]; + curr_hit.j2 = curr_hit.j[1]; } void PosteriorDecoder::printVector(float * vec) { diff --git a/src/hhposteriordecoder.h b/src/hhposteriordecoder.h index 67493ad1..dfbd74fd 100644 --- a/src/hhposteriordecoder.h +++ b/src/hhposteriordecoder.h @@ -66,7 +66,7 @@ class PosteriorDecoder { ///////////////////////////////////////////////////////////////////////////////////// void realign(HMM &q, HMM &t, Hit &hit, PosteriorMatrix &p_mm, ViterbiMatrix &viterbi_matrix, std::vector alignment_to_exclude, char * exclstr, - char* template_exclstr, int par_min_overlap, float shift, float mact, float corr); + char* template_exclstr, int par_min_overlap, float shift, float mact, float corr, float mac_min_length); void excludeMACAlignment(const int q_length, const int t_length, ViterbiMatrix &celloff_matrix, const int elem, MACBacktraceResult & alignment); @@ -125,15 +125,20 @@ class PosteriorDecoder { ViterbiMatrix & viterbi_matrix, float shift, const int elem); void macAlgorithm(HMM & q_hmm, HMM & t_hmm, Hit & hit_vec, PosteriorMatrix & p_mm, ViterbiMatrix & viterbi_matrix, float par_mact, const int elem); - void backtraceMAC(HMM & q, HMM & t, PosteriorMatrix & p_mm, ViterbiMatrix & backtrace_matrix, const int elem, Hit & hit, float corr); + void findBacktraceMAC(ViterbiMatrix & backtrace_matrix, const int elem, + int & i1, int & j1, int i2, int j2, std::vector & vi, std::vector & vj, std::vector & states, + int & matched_cols, int & nsteps, + LogLevel & actual_level); + void backtraceMAC(HMM & q, HMM & t, PosteriorMatrix & p_mm, ViterbiMatrix & backtrace_matrix, const int elem, Hit & hit, float corr, float mac_min_length); void writeProfilesToHits(HMM &q, HMM &t, PosteriorMatrix &p_mm, ViterbiMatrix & backtrace_matrix, Hit &hit); - void initializeBacktrace(HMM & t, Hit & hit); + void initializeBacktrace(HMM & q, HMM & t, Hit & hit); void initializeForAlignment(HMM &q, HMM &t, Hit &hit, ViterbiMatrix &viterbi_matrix, const int elem, const int t_max_L, int par_min_overlap); void maskViterbiAlignment(const int q_length, const int t_length, ViterbiMatrix &celloff_matrix, const int elem, Hit const &hit) const; void memorizeHitValues(Hit & curr_hit); void restoreHitValues(Hit &curr_hit); + void restoreHitPath(Hit &curr_hit); void printVector(simd_float * vec); void printVector(simd_int * vec); diff --git a/src/hhposteriordecoderrunner.cpp b/src/hhposteriordecoderrunner.cpp index 0baeda10..4ee59b59 100644 --- a/src/hhposteriordecoderrunner.cpp +++ b/src/hhposteriordecoderrunner.cpp @@ -97,7 +97,7 @@ void PosteriorDecoderRunner::executeComputation(HMM &q, std::vector hits // start realignment process decoder->realign(*q_hmm, *t_hmm[current_thread_id], *hit_cur, *m_posterior_matrices[current_thread_id], - *m_backtrace_matrix[current_thread_id], alignment_to_exclude, par.exclstr, par.template_exclstr, par.min_overlap, par.shift, par.mact, par.corr); + *m_backtrace_matrix[current_thread_id], alignment_to_exclude, par.exclstr, par.template_exclstr, par.min_overlap, par.shift, par.mact, par.corr, par.mac_min_length); // add result to exclution paths (needed to align 2nd, 3rd, ... best alignment) alignment_to_exclude.push_back(PosteriorDecoder::MACBacktraceResult(hit_cur->alt_i, hit_cur->alt_j)); } // end idb diff --git a/src/hhsearch.cpp b/src/hhsearch.cpp index 8adfe901..6c025e9c 100755 --- a/src/hhsearch.cpp +++ b/src/hhsearch.cpp @@ -130,6 +130,8 @@ void HHsearch::help(Parameters& par, char all) { printf(" -ovlp banded alignment: forbid largest diagonals |i-j| of DP matrix (def=%i)\n", par.min_overlap); printf(" -mact [0,1[ posterior prob threshold for MAC realignment controlling greedi- \n"); printf(" ness at alignment ends: 0:global >0.1:local (default=%.2f) \n", par.mact); + printf(" -mac_min_length minimum length of MAC hit in comparison with the Viterbi hit\n"); + printf(" (in terms of matched_cols, default=%.2f)\n", par.mac_min_length); printf(" -glob/-loc use global/local alignment mode for searching/ranking (def=local)\n"); if (all) { printf(" -realign realign displayed hits with max. accuracy (MAC) algorithm \n"); @@ -486,6 +488,9 @@ void HHsearch::ProcessArguments(Parameters& par) { else if ((!strcmp(argv[i], "-mact")) && (i < argc - 1)) par.mact = atof(argv[++i]); + else if (!strcmp(argv[i], "-mac_min_length") + && (i < argc - 1)) + par.mac_min_length = atof(argv[++i]); else if (!strcmp(argv[i], "-sc") && (i < argc - 1)) par.columnscore = atoi(argv[++i]); //no help required