Skip to content

Commit e1bd3a1

Browse files
committed
remove omp critical from loop iterations in prefilter_db (should improve performance in hhblits_omp by a lot in some cases)
1 parent 50822b8 commit e1bd3a1

File tree

1 file changed

+27
-26
lines changed

1 file changed

+27
-26
lines changed

src/hhprefilter.cpp

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -463,22 +463,22 @@ void Prefilter::prefilter_db(HMM* q_tmp, Hash<Hit>* previous_hits,
463463
workspace[i] = (simd_int*) malloc_simd_int(
464464
3 * (LQ + element_count) * sizeof(char));
465465

466-
#pragma omp parallel for schedule(static)
467-
// Loop over all database sequences
468-
for (size_t n = 0; n < num_dbs; n++) {
466+
#pragma omp parallel
467+
{
469468
int thread_id = 0;
470469
#ifdef OPENMP
471470
thread_id = omp_get_thread_num();
472471
#endif
473-
// Perform search step
474-
int score = ungapped_sse_score(qc, LQ, first[n], length[n],
475-
prefilter_score_offset, workspace[thread_id]);
476-
477-
score = score
478-
- (int) (prefilter_bit_factor * (log_qlen + flog2(length[n])));
479-
472+
std::vector<std::pair<int, int>> first_prefilter_local;
473+
first_prefilter_local.reserve(300);
474+
#pragma omp for schedule(static) nowait
475+
for (size_t n = 0; n < num_dbs; n++) {
476+
int score = ungapped_sse_score(qc, LQ, first[n], length[n], prefilter_score_offset, workspace[thread_id]);
477+
score = score - (int) (prefilter_bit_factor * (log_qlen + flog2(length[n])));
478+
first_prefilter_local.emplace_back(score, n);
479+
}
480480
#pragma omp critical
481-
first_prefilter.push_back(std::pair<int, int>(score, n));
481+
first_prefilter.insert(first_prefilter.end(), first_prefilter_local.begin(), first_prefilter_local.end());
482482
}
483483
//filter after calculation of ungapped sse score to include at least min_prefilter_hits
484484
std::vector<std::pair<int, int> >::iterator it;
@@ -509,30 +509,31 @@ void Prefilter::prefilter_db(HMM* q_tmp, Hash<Hit>* previous_hits,
509509
<< "HMMs passed 1st prefilter (gapless profile-profile alignment) : "
510510
<< count_dbs << std::endl;
511511

512-
#pragma omp parallel for schedule(static)
513-
// Loop over all database sequences
514-
// for (int n = 0; n < count_dbs; n++) {
515-
for (size_t i = 0; i < first_prefilter.size(); i++) {
512+
#pragma omp parallel
513+
{
516514
int thread_id = 0;
517515
#ifdef OPENMP
518516
thread_id = omp_get_thread_num();
519517
#endif
520-
521-
int n = first_prefilter[i].second;
522-
523-
// Perform search step
524-
int score = swStripedByte(qc, LQ, first[n], length[n], gap_init,
518+
std::vector<std::pair<double, int>> hits_local;
519+
hits_local.reserve(300);
520+
// Loop over all database sequences
521+
// for (int n = 0; n < count_dbs; n++) {
522+
#pragma omp for schedule(static) nowait
523+
for (size_t i = 0; i < first_prefilter.size(); i++) {
524+
int n = first_prefilter[i].second;
525+
int score = swStripedByte(qc, LQ, first[n], length[n], gap_init,
525526
gap_extend, workspace[thread_id], workspace[thread_id] + W,
526527
workspace[thread_id] + 2 * W, prefilter_score_offset);
527528

528-
double evalue = factor * length[n] * fpow2(-score / prefilter_bit_factor);
529-
530-
if (evalue < prefilter_evalue_coarse_thresh) {
531-
#pragma omp critical
532-
hits.push_back(std::pair<double, int>(evalue, n));
529+
double evalue = factor * length[n] * fpow2(-score / prefilter_bit_factor);
530+
if (evalue < prefilter_evalue_coarse_thresh) {
531+
hits_local.emplace_back(evalue, n);
532+
}
533533
}
534+
#pragma omp critical
535+
hits.insert(hits.end(), hits_local.begin(), hits_local.end());
534536
}
535-
536537
//filter after calculation of evalues to include at least min_prefilter_hits
537538
std::sort(hits.begin(), hits.end(), comparePair());
538539

0 commit comments

Comments
 (0)