Skip to content

Commit

Permalink
add convenience casting functions (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes authored Jan 11, 2021
1 parent 4cb4477 commit a887082
Show file tree
Hide file tree
Showing 5 changed files with 417 additions and 1 deletion.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ export(PureSVD)
export(ScaleNormalize)
export(WRMF)
export(ap_k)
export(as.coo.matrix)
export(as.csc.matrix)
export(as.csr.matrix)
export(detect_number_omp_threads)
export(ndcg_k)
export(soft_impute)
Expand Down
185 changes: 185 additions & 0 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,188 @@ check_dimensions_match = function(x, y, y_transposed = FALSE) {
}

# nocov end

#' Conversions between matrix types
#'
#' @description Convenience functions for converting to different sparse matrix formats.
#'
#' @details The functions internally use as(x, "?sparseMatrix"), so they might work
#' with other object classes if they register a conversion method for `Matrix` base
#' types.
#'
#' When passed a vector, the functions `as.csr.matrix` and `as.coo.matrix` will
#' assume that it is a row vector, while `as.csc.matrix` will assume it's a column vector.
#'
#' @param x A matrix which is to be converted to a different format.
#' @param binary Whether the result should be a binary-only matrix (inheriting from
#' class `nsparseMatrix` - these don't have slot `x`).
#' Supported input types are:\itemize{
#' \item Sparse matrices from `Matrix` package, in any format.
#' \item Sparse vector from `Matrix` (class `dsparseVector`).
#' \item Dense matrix from base R.
#' \item Dense vector from base R (classes `numeric` and `integer`).
#' \item Dense matrix or vector from package `float` (class `float32`).
#' \item `data.frame` and `data.table`.
#' }
#'
#' @return A sparse matrix, with format:\itemize{
#' \item CSR (a.k.a. `RsparseMatrix`) when calling `as.csr.matrix`
#' (class `dgRMatrix` with `binary=FALSE`, class `ngRMatrix` with `binary=TRUE`).
#' \item CSC (a.k.a. `CsparseMatrix`) when calling `as.csc.matrix`
#' (class `dgCMatrix` with `binary=FALSE`, class `ngCMatrix` with `binary=TRUE`).
#' \item COO (a.k.a. `TsparseMatrix`) when calling `as.coo.matrix`
#' (class `dgTMatrix` with `binary=FALSE`, class `ngTMatrix` with `binary=TRUE`).
#' }
#'
#' @name casting
#' @examples
#' library(Matrix)
#' library(rsparse)
#'
#' m.coo = as(matrix(1:3), "TsparseMatrix")
#' as.csr.matrix(m.coo)
#' as.csr.matrix(1:3) # <- assumes it's a row vector
#' as.csc.matrix(1:3) # <- assumes it's a column vector
#'
#' library(float)
#' m.f32 = float::fl(matrix(1:10, nrow=5))
#' as.csr.matrix(m.f32)
#'
#' library(data.table)
#' as.coo.matrix(data.table(col1=1:3))
NULL

#' @rdname casting
#' @export
as.csr.matrix = function(x, binary=FALSE) {
if ((inherits(x, "dgRMatrix") && !binary) || (inherits(x, "ngRMatrix") && binary))
return(x)

if (inherits(x, "float32"))
x = float::dbl(x)

if (inherits(x, c("numeric", "integer")))
x = matrix(x, nrow=1L)

if (inherits(x, c("data.frame", "tibble", "data.table")))
x = as.matrix(x)

if (inherits(x, "dsparseVector")) {
X.csr = new("dgRMatrix")
X.csr@Dim = c(1L, x@length)
X.csr@p = c(0L, length(x@i))
X.csr@j = x@i - 1L
X.csr@x = x@x
x = X.csr
}

if (!inherits(x, "RsparseMatrix"))
x = as(x, "RsparseMatrix")

if (!binary && !inherits(x, "dgRMatrix")) {
X.csr = new("dgRMatrix")
X.csr@Dim = x@Dim
X.csr@Dimnames = x@Dimnames
X.csr@p = x@p
X.csr@j = x@j
if (.hasSlot(x, "x"))
X.csr@x = as.numeric(x@x)
else
X.csr@x = rep(1., length(x@j))
x = X.csr
}

if (binary && !inherits(x, "ngRMatrix")) {
X.csr = new("ngRMatrix")
X.csr@Dim = x@Dim
X.csr@Dimnames = x@Dimnames
X.csr@p = x@p
X.csr@j = x@j
x = X.csr
}
return(x)
}

#' @rdname casting
#' @export
as.csc.matrix = function(x, binary=FALSE) {
if ((inherits(x, "dgCMatrix") && !binary) || (inherits(x, "ngCMatrix") && binary))
return(x)

if (inherits(x, "float32"))
x = float::dbl(x)

if (inherits(x, c("numeric", "integer", "data.frame", "tibble", "data.table")))
x = as.matrix(x)

if (!inherits(x, "CsparseMatrix"))
x = as(x, "CsparseMatrix")

if (!binary && !inherits(x, "dgCMatrix")) {
X.csc = new("dgCMatrix")
X.csc@Dim = x@Dim
X.csc@Dimnames = x@Dimnames
X.csc@p = x@p
X.csc@i = x@i
if (.hasSlot(x, "x"))
X.csc@x = as.numeric(x@x)
else
X.csc@x = rep(1., length(x@i))
x = X.csc
}

if (binary && !inherits(x, "ngCMatrix")) {
X.csc = new("ngCMatrix")
X.csc@Dim = x@Dim
X.csc@Dimnames = x@Dimnames
X.csc@p = x@p
X.csc@i = x@i
x = X.csc
}
return(x)
}

#' @rdname casting
#' @export
as.coo.matrix = function(x, binary=FALSE) {
if ((inherits(x, "dgTMatrix") && !binary) || (inherits(x, "ngTMatrix") && binary))
return(x)

if (inherits(x, "float32"))
x = float::dbl(x)

if (inherits(x, c("numeric", "integer")))
x = matrix(x, nrow=1L)

if (inherits(x, c("data.frame", "tibble", "data.table")))
x = as.matrix(x)

if (inherits(x, "dsparseVector"))
x = as.csr.matrix(x)

if (!inherits(x, "TsparseMatrix"))
x = as(x, "TsparseMatrix")

if (!binary && !inherits(x, "dgTMatrix")) {
X.coo = new("dgTMatrix")
X.coo@Dim = x@Dim
X.coo@Dimnames = x@Dimnames
X.coo@i = x@i
X.coo@j = x@j
if (.hasSlot(x, "x"))
X.coo@x = as.numeric(x@x)
else
X.coo@x = rep(1., length(x@j))
x = X.coo
}

if (binary && !inherits(x, "ngTMatrix")) {
X.coo = new("ngTMatrix")
X.coo@Dim = x@Dim
X.coo@Dimnames = x@Dimnames
X.coo@i = x@i
X.coo@j = x@j
x = X.coo
}
return(x)
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

`rsparse` is an R package for statistical learning primarily on **sparse matrices** - **matrix factorizations, factorization machines, out-of-core regression**. Many of the implemented algorithms are particularly useful for **recommender systems** and **NLP**.

On top of that we provide some optimized routines to work on sparse matrices - multithreaded <dense, sparse> matrix multiplications and improved support for sparse matrices in CSR format (`Matrix::RsparseMatrix`).
On top of that we provide some optimized routines to work on sparse matrices - multithreaded <dense, sparse> matrix multiplications and improved support for sparse matrices in CSR format (`Matrix::RsparseMatrix`), as well as convenience functions to convert between matrix types.

We've paid some attention to the implementation details - we try to avoid data copies, utilize multiple threads via OpenMP and use SIMD where appropriate. Package **allows to work on datasets with millions of rows and millions of columns**.

Expand Down
66 changes: 66 additions & 0 deletions man/casting.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a887082

Please sign in to comment.