From 23c97c64429c4d4facf3590a11be251041acc5f6 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 25 May 2021 15:23:23 +0300 Subject: [PATCH] Improve performance tips in readme (#69) * improve performance tips * update --- README.md | 15 ++++++++++----- src/Makevars.in | 3 ++- src/Makevars.win | 3 ++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ad9f913..c988d11 100644 --- a/README.md +++ b/README.md @@ -54,19 +54,24 @@ _Note: the optimized matrix operations which `rparse` used to offer have been mo # Installation -Most of the algorithms benefit from OpenMP and many of them could utilize high-performance implementation of BLAS. If you want make maximum out of the package please read the section below carefuly. +Most of the algorithms benefit from OpenMP and many of them could utilize high-performance implementations of BLAS. If you want to make the maximum out of this package, please read the section below carefully. It is recommended to: 1. Use high-performance BLAS (such as OpenBLAS, MKL, Apple Accelerate). -1. Add proper compiler optimizations in your `~/.R/Makevars`. For example on recent processors (with AVX support) and complier with OpenMP support following lines could be a good option: +1. Add proper compiler optimizations in your `~/.R/Makevars`. For example on recent processors (with AVX support) and compiler with OpenMP support, the following lines could be a good option: ```txt - CXX11FLAGS += -O3 -march=native -mavx -fopenmp -ffast-math - CXXFLAGS += -O3 -march=native -mavx -fopenmp -ffast-math + CXX11FLAGS += -O3 -march=native -fopenmp + CXXFLAGS += -O3 -march=native -fopenmp ``` -If you are on **Mac** follow instructions [here](https://github.com/coatless/r-macos-rtools). After installation of `clang4` additionally put `PKG_CXXFLAGS += -DARMA_USE_OPENMP` line to your `~/.R/Makevars`. After that install `rsparse` in a usual way. +If you are on **Mac** follow the instructions [here](https://github.com/coatless/r-macos-rtools). After installation of `clang4`, additionally put a `PKG_CXXFLAGS += -DARMA_USE_OPENMP` line in your `~/.R/Makevars`. After that, install `rsparse` in the usual way. +On Linux, it's enough to just create this file if it doesn't exist (`~/.R/Makevars`). + +If using OpenBLAS, it is highly recommended to use the `openmp` variant rather than the `pthreads` variant. On Linux, it is usually available as a separate package in typical distribution package managers (e.g. for Debian, it can be obtained by installing `libopenblas-openmp-dev`, which is not the default version), and if there are multiple BLASes installed, can be set as the default through the [Debian alternatives system](https://wiki.debian.org/DebianScience/LinearAlgebraLibraries) - which can also be used [for MKL](https://stackoverflow.com/a/49842944/5941695). + +By default, R for Windows comes with unoptimized BLAS and LAPACK libraries, and `rsparse` will prefer using Armadillo's replacements instead. In order to use BLAS, **install `rsparse` from source** (not from CRAN), removing the option `-DARMA_DONT_USE_BLAS` from `src/Makevars.win` and ideally adding `-march=native` (under `PKG_CXXFLAGS`). See [this tutorial](https://github.com/david-cortes/R-openblas-in-windows) for instructions on getting R for Windows to use OpenBLAS. Alternatively, Microsoft's MRAN distribution for Windows comes with MKL. # Materials diff --git a/src/Makevars.in b/src/Makevars.in index fae360d..ab8408c 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -1,4 +1,5 @@ -PKG_CXXFLAGS = -I../inst/include/ @OPENMP_FLAG@ -DARMA_32BIT_WORD +PKG_CPPFLAGS = -DARMA_32BIT_WORD -I../inst/include/ +PKG_CXXFLAGS = @OPENMP_FLAG@ PKG_LIBS = @OPENMP_FLAG@ @FLOAT_LIBS@ $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) CXX_STD = CXX11 diff --git a/src/Makevars.win b/src/Makevars.win index 2754320..3c5fdce 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,7 +1,8 @@ R_SCMD = ${R_HOME}/bin${R_ARCH_BIN}/Rscript -e FLOAT_LIBS = $(shell ${R_SCMD} "float:::ldflags()") -PKG_CXXFLAGS = -I../inst/include/ $(SHLIB_OPENMP_CXXFLAGS) -DARMA_32BIT_WORD -DARMA_DONT_USE_BLAS -DARMA_NO_DEBUG +PKG_CPPFLAGS = -I../inst/include/ -DARMA_32BIT_WORD -DARMA_DONT_USE_BLAS -DARMA_NO_DEBUG +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(FLOAT_LIBS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) CXX_STD = CXX11