Merge pull request #26 from chrisdjscott/set-nthreads

doddsk · web-flow · commit 9864f6154fe6 · 2018-08-31T17:07:27.000+12:00
Setting number of OpenMP threads with a variable
diff --git a/GBS-Chip-Gmatrix.R b/GBS-Chip-Gmatrix.R
@@ -11,6 +11,7 @@ if (!exists("functions.only"))   functions.only   <- FALSE
 if (!exists("alleles.keep"))     alleles.keep     <- FALSE
 if (!exists("outlevel"))         outlevel         <- 9
 if (!exists("use.Rcpp"))         use.Rcpp         <- TRUE
+if (!exists("nThreads"))         nThreads         <- 0  # 0 means use all available
 
 # function to locate Rcpp file (assume it is in the same directory as this file and this file was 'sourced')
 pathToCppFile = function() {
@@ -246,7 +247,7 @@ GBSsummary <- function() {
  if(gform != "chip") {
   if (!havedepth) depth <<- alleles[, seq(1, 2 * nsnps - 1, 2)] + alleles[, seq(2, 2 * nsnps, 2)]
   if (have_rcpp) {
-   sampdepth.max <<- rcpp_rowMaximums(depth)
+   sampdepth.max <<- rcpp_rowMaximums(depth, nThreads)
   }
   else {
    sampdepth.max <<- apply(depth, 1, max)
@@ -338,7 +339,7 @@ cat("Analysing", nind, "individuals and", nsnps, "SNPs\n")
  #if(outlevel > 4) sampdepth.med <<- apply(depth, 1, median)
  if(outlevel > 4) {
    if (have_rcpp) {
-     sampdepth.med <<- rcpp_rowMedians(depth)
+     sampdepth.med <<- rcpp_rowMedians(depth, nThreads)
    }
    else {
      sampdepth.med <<- apply(depth, 1, median)
@@ -420,7 +421,7 @@ if(!functions.only) {
      depth2K <- function(depthvals) {
          # Rcpp version only works with matrix as input, so fallback to R version otherwise
          if (is.matrix(depthvals)) {
-             result <- rcpp_depth2K(depthvals)
+             result <- rcpp_depth2K(depthvals, nThreads)
          } else {
              result <- r_depth2K(depthvals)
          }
@@ -440,7 +441,7 @@ if(!functions.only) {
         # Rcpp version only works with matrix as input, so fallback to R version otherwise
         if (is.matrix(depthvals) & alph < Inf) {
            if(alph < Inf) {
-            result <- rcpp_depth2Kbb(depthvals, alph)
+            result <- rcpp_depth2Kbb(depthvals, nThreads, alph)
             } else {
               result <- depth2K(depthvals)
             }
@@ -464,7 +465,7 @@ if(!functions.only) {
     depth2Kmodp <- function(depthvals, modp=0.5) {
         # Rcpp version only works with matrix as input, so fallback to R version otherwise
         if (is.matrix(depthvals)) {
-            result <- rcpp_depth2Kmodp(depthvals, modp)
+            result <- rcpp_depth2Kmodp(depthvals, modp, nThreads)
         } else {
             result <- r_depth2Kmodp(depthvals, modp)
         }
@@ -694,7 +695,7 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
    dev.off()
   lowpairs <- which(cocall/nsnpsub <= cocall.thresh & upper.tri(cocall),arr.ind=TRUE)
   if (have_rcpp) {
-    sampdepth.max <- rcpp_rowMaximums(depthsub)
+    sampdepth.max <- rcpp_rowMaximums(depthsub, nThreads)
   }
   else {
     sampdepth.max <- apply(depthsub, 1, max)
@@ -762,7 +763,7 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
   P0 <- matrix(puse[snpsubset], nrow = nindsub, ncol = nsnpsub, byrow = TRUE)
   P1 <- 1 - P0
   if (have_rcpp) {
-    rcpp_assignP0P1Genon01(P0, P1, genon01, usegeno, depth[indsubset, snpsubset])
+    rcpp_assignP0P1Genon01(P0, P1, genon01, usegeno, depth[indsubset, snpsubset], nThreads)
   }
   else {
     genon01[depth[indsubset, snpsubset] < 2] <- 0
diff --git a/GBS-Rcpp-functions.cpp b/GBS-Rcpp-functions.cpp
@@ -6,20 +6,43 @@
 #include <RcppArmadillo.h>
 
 // we need OpenMP for parallelisation
+#include <omp.h>
 // [[Rcpp::plugins(openmp)]]
 
+
+// helper function for deciding number of threads
+static int check_nThreads(int nThreads) {
+	// maximum number of threads available
+	int maxThreads = omp_get_max_threads();
+
+	if (nThreads <= 0) {
+		// if nThreads is set to zero then use everything
+		nThreads = maxThreads;
+	}
+	else if (nThreads > maxThreads) {
+		// don't allow more threads than the maximum available
+		nThreads = maxThreads;
+	}
+
+	return nThreads;
+}
+
+
 // function for finding row medians (alternative to apply(depth, 1, median))
 // requires integer type matrix as input, returns list of doubles
 // [[Rcpp::export]]
-std::vector<double> rcpp_rowMedians(const arma::imat &depth) {
+std::vector<double> rcpp_rowMedians(const arma::imat &depth, int nThreads) {
+	// set up number of threads
+	nThreads = check_nThreads(nThreads);
+
     // number of rows
     const int nrows = depth.n_rows;
 
     // vector for storing the result
     std::vector<double> medians(nrows);
 
     // loop over the rows
-    #pragma omp parallel for
+    #pragma omp parallel for num_threads(nThreads)
     for (int i = 0; i < nrows; i++) {
         // convert the row to double type, to compute median correctly
         arma::rowvec row = arma::conv_to<arma::rowvec>::from(depth.row(i));
@@ -34,14 +57,18 @@ std::vector<double> rcpp_rowMedians(const arma::imat &depth) {
 // function for finding row maximums (alternative to apply(mat, 1, max))
 // requires integer type matrix as input, return list of integers
 // [[Rcpp::export]]
-std::vector<int> rcpp_rowMaximums(const arma::imat &mat) {
+std::vector<int> rcpp_rowMaximums(const arma::imat &mat, int nThreads) {
+	// set up number of threads
+	nThreads = check_nThreads(nThreads);
+
+	// number of rows
     const int nrows = mat.n_rows;
 
     // create vector to store the result
     std::vector<int> maximums(nrows);
 
     // loop over rows
-    #pragma omp parallel for
+    #pragma omp parallel for num_threads(nThreads)
     for (int i = 0; i < nrows; i++) {
         // find the maximum for this row
         maximums[i] = mat.row(i).max();
@@ -52,15 +79,18 @@ std::vector<int> rcpp_rowMaximums(const arma::imat &mat) {
 
 // C++ version of depth2K function
 // [[Rcpp::export]]
-Rcpp::NumericMatrix rcpp_depth2K(const Rcpp::NumericMatrix &A) {
+Rcpp::NumericMatrix rcpp_depth2K(const Rcpp::NumericMatrix &A, int nThreads) {
+	// set up number of threads
+	nThreads = check_nThreads(nThreads);
+
     // create the output matrix (same size as input)
     Rcpp::NumericMatrix Aout(A.rows(), A.cols());
 
     // number of elements
     const long Asize = A.rows() * A.cols();
 
     // loop over elements in parallel and apply operation
-    #pragma omp parallel for
+    #pragma omp parallel for num_threads(nThreads)
     for (long i = 0; i < Asize; i++) {
         Aout[i] = 1.0 / pow(2.0, A[i]);
     }
@@ -71,15 +101,18 @@ Rcpp::NumericMatrix rcpp_depth2K(const Rcpp::NumericMatrix &A) {
 
 // C++ version of depth2Kmodp function
 // [[Rcpp::export]]
-Rcpp::NumericMatrix rcpp_depth2Kmodp(const Rcpp::NumericMatrix &depthvals, double modp = 0.5) {
+Rcpp::NumericMatrix rcpp_depth2Kmodp(const Rcpp::NumericMatrix &depthvals, double modp, int nThreads) {
+	// set up number of threads
+	nThreads = check_nThreads(nThreads);
+
     // create matrix for storing the result
     Rcpp::NumericMatrix result(depthvals.rows(), depthvals.cols());
 
     // size of the matrix
     const long size = depthvals.rows() * depthvals.cols();
 
     // loop over the elements in parallel
-    #pragma omp parallel for
+    #pragma omp parallel for num_threads(nThreads)
     for (long i = 0; i < size; i++) {
         double value = 0.5 * pow(modp, depthvals[i] - 1.0);
         result[i] = (value == 0) ? 1.0 : value;
@@ -89,15 +122,17 @@ Rcpp::NumericMatrix rcpp_depth2Kmodp(const Rcpp::NumericMatrix &depthvals, doubl
 
 // C++ version of depth2Kbb function
 // [[Rcpp::export]]
-    Rcpp::NumericMatrix rcpp_depth2Kbb(const Rcpp::NumericMatrix & depthvals, const double alph = 9999) {
+    Rcpp::NumericMatrix rcpp_depth2Kbb(const Rcpp::NumericMatrix & depthvals, int nThreads, const double alph = 9999) {
+        // set up number of threads
+        nThreads = check_nThreads(nThreads);
         // create matrix for storing the result
         Rcpp::NumericMatrix result(depthvals.rows(), depthvals.cols());
         // size of the matrix
         const long size = depthvals.rows() * depthvals.cols();
         // precompute factor
         const double factor = 1.0/R::beta(alph, alph);
         // loop over the elements in parallel
-        #pragma omp parallel for
+        #pragma omp parallel for num_threads(nThreads)
         for (long i = 0; i < size; i++) {
             result[i] = R::beta(alph, depthvals[i] + alph) * factor;
         }
@@ -109,12 +144,15 @@ Rcpp::NumericMatrix rcpp_depth2Kmodp(const Rcpp::NumericMatrix &depthvals, doubl
 // modifies the matrices in-place (i.e. doesn't return anything)
 // [[Rcpp::export]]
 void rcpp_assignP0P1Genon01(Rcpp::NumericMatrix &P0, Rcpp::NumericMatrix &P1, Rcpp::NumericMatrix &genon01,
-        const Rcpp::LogicalMatrix &usegeno, const Rcpp::NumericMatrix &dsub) {
+        const Rcpp::LogicalMatrix &usegeno, const Rcpp::NumericMatrix &dsub, int nThreads) {
+	// set up number of threads
+	nThreads = check_nThreads(nThreads);
+
     // number of elements (assumes all inputs are the same size!)
     const long size = P0.rows() * P0.cols();
 
     // loop over elements in parallel
-    #pragma omp parallel for
+    #pragma omp parallel for num_threads(nThreads)
     for (long i = 0; i < size; i++) {
         // set to zero if they match the conditions
         if (dsub[i] < 2.0) {