Skip to content

Commit

Permalink
Merge branch 'master' of github.com:rexyai/rsparse
Browse files Browse the repository at this point in the history
  • Loading branch information
dselivanov committed Jan 23, 2021
2 parents 5b18d39 + 48f12a4 commit fac84e7
Show file tree
Hide file tree
Showing 8 changed files with 421 additions and 6 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ LazyData: true
ByteCompile: true
Depends: R (>= 3.6.0), methods
Imports:
Matrix (>= 1.2),
Matrix (>= 1.3),
Rcpp (>= 0.11),
data.table (>= 1.10.0),
float (>= 0.2-2),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ export(PureSVD)
export(ScaleNormalize)
export(WRMF)
export(ap_k)
export(as.coo.matrix)
export(as.csc.matrix)
export(as.csr.matrix)
export(detect_number_omp_threads)
export(ndcg_k)
export(soft_impute)
Expand Down
2 changes: 1 addition & 1 deletion R/MatrixFactorizationRecommender.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ MatrixFactorizationRecommender = R6::R6Class(
not_recommend = as(not_recommend, "RsparseMatrix")

uids = rownames(user_embeddings)
indices = find_top_product(user_embeddings, item_embeddings, k, not_recommend, items_exclude, self$global_bias)
indices = find_top_product(user_embeddings, item_embeddings, k, not_recommend, items_exclude, glob_mean=self$global_bias)

data.table::setattr(indices, "dimnames", list(uids, NULL))
data.table::setattr(indices, "ids", NULL)
Expand Down
185 changes: 185 additions & 0 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,188 @@ check_dimensions_match = function(x, y, y_transposed = FALSE) {
}

# nocov end

#' Conversions between matrix types
#'
#' @description Convenience functions for converting to different sparse matrix formats.
#'
#' @details The functions internally use as(x, "?sparseMatrix"), so they might work
#' with other object classes if they register a conversion method for `Matrix` base
#' types.
#'
#' When passed a vector, the functions `as.csr.matrix` and `as.coo.matrix` will
#' assume that it is a row vector, while `as.csc.matrix` will assume it's a column vector.
#'
#' @param x A matrix which is to be converted to a different format.
#' @param binary Whether the result should be a binary-only matrix (inheriting from
#' class `nsparseMatrix` - these don't have slot `x`).
#' Supported input types are:\itemize{
#' \item Sparse matrices from `Matrix` package, in any format.
#' \item Sparse vector from `Matrix` (class `dsparseVector`).
#' \item Dense matrix from base R.
#' \item Dense vector from base R (classes `numeric` and `integer`).
#' \item Dense matrix or vector from package `float` (class `float32`).
#' \item `data.frame` and `data.table`.
#' }
#'
#' @return A sparse matrix, with format:\itemize{
#' \item CSR (a.k.a. `RsparseMatrix`) when calling `as.csr.matrix`
#' (class `dgRMatrix` with `binary=FALSE`, class `ngRMatrix` with `binary=TRUE`).
#' \item CSC (a.k.a. `CsparseMatrix`) when calling `as.csc.matrix`
#' (class `dgCMatrix` with `binary=FALSE`, class `ngCMatrix` with `binary=TRUE`).
#' \item COO (a.k.a. `TsparseMatrix`) when calling `as.coo.matrix`
#' (class `dgTMatrix` with `binary=FALSE`, class `ngTMatrix` with `binary=TRUE`).
#' }
#'
#' @name casting
#' @examples
#' library(Matrix)
#' library(rsparse)
#'
#' m.coo = as(matrix(1:3), "TsparseMatrix")
#' as.csr.matrix(m.coo)
#' as.csr.matrix(1:3) # <- assumes it's a row vector
#' as.csc.matrix(1:3) # <- assumes it's a column vector
#'
#' library(float)
#' m.f32 = float::fl(matrix(1:10, nrow=5))
#' as.csr.matrix(m.f32)
#'
#' library(data.table)
#' as.coo.matrix(data.table(col1=1:3))
NULL

#' @rdname casting
#' @export
as.csr.matrix = function(x, binary=FALSE) {
if ((inherits(x, "dgRMatrix") && !binary) || (inherits(x, "ngRMatrix") && binary))
return(x)

if (inherits(x, "float32"))
x = float::dbl(x)

if (inherits(x, c("numeric", "integer")))
x = matrix(x, nrow=1L)

if (inherits(x, c("data.frame", "tibble", "data.table")))
x = as.matrix(x)

if (inherits(x, "dsparseVector")) {
X.csr = new("dgRMatrix")
X.csr@Dim = c(1L, x@length)
X.csr@p = c(0L, length(x@i))
X.csr@j = x@i - 1L
X.csr@x = x@x
x = X.csr
}

if (!inherits(x, "RsparseMatrix"))
x = as(x, "RsparseMatrix")

if (!binary && !inherits(x, "dgRMatrix")) {
X.csr = new("dgRMatrix")
X.csr@Dim = x@Dim
X.csr@Dimnames = x@Dimnames
X.csr@p = x@p
X.csr@j = x@j
if (.hasSlot(x, "x"))
X.csr@x = as.numeric(x@x)
else
X.csr@x = rep(1., length(x@j))
x = X.csr
}

if (binary && !inherits(x, "ngRMatrix")) {
X.csr = new("ngRMatrix")
X.csr@Dim = x@Dim
X.csr@Dimnames = x@Dimnames
X.csr@p = x@p
X.csr@j = x@j
x = X.csr
}
return(x)
}

#' @rdname casting
#' @export
as.csc.matrix = function(x, binary=FALSE) {
if ((inherits(x, "dgCMatrix") && !binary) || (inherits(x, "ngCMatrix") && binary))
return(x)

if (inherits(x, "float32"))
x = float::dbl(x)

if (inherits(x, c("numeric", "integer", "data.frame", "tibble", "data.table")))
x = as.matrix(x)

if (!inherits(x, "CsparseMatrix"))
x = as(x, "CsparseMatrix")

if (!binary && !inherits(x, "dgCMatrix")) {
X.csc = new("dgCMatrix")
X.csc@Dim = x@Dim
X.csc@Dimnames = x@Dimnames
X.csc@p = x@p
X.csc@i = x@i
if (.hasSlot(x, "x"))
X.csc@x = as.numeric(x@x)
else
X.csc@x = rep(1., length(x@i))
x = X.csc
}

if (binary && !inherits(x, "ngCMatrix")) {
X.csc = new("ngCMatrix")
X.csc@Dim = x@Dim
X.csc@Dimnames = x@Dimnames
X.csc@p = x@p
X.csc@i = x@i
x = X.csc
}
return(x)
}

#' @rdname casting
#' @export
as.coo.matrix = function(x, binary=FALSE) {
if ((inherits(x, "dgTMatrix") && !binary) || (inherits(x, "ngTMatrix") && binary))
return(x)

if (inherits(x, "float32"))
x = float::dbl(x)

if (inherits(x, c("numeric", "integer")))
x = matrix(x, nrow=1L)

if (inherits(x, c("data.frame", "tibble", "data.table")))
x = as.matrix(x)

if (inherits(x, "dsparseVector"))
x = as.csr.matrix(x)

if (!inherits(x, "TsparseMatrix"))
x = as(x, "TsparseMatrix")

if (!binary && !inherits(x, "dgTMatrix")) {
X.coo = new("dgTMatrix")
X.coo@Dim = x@Dim
X.coo@Dimnames = x@Dimnames
X.coo@i = x@i
X.coo@j = x@j
if (.hasSlot(x, "x"))
X.coo@x = as.numeric(x@x)
else
X.coo@x = rep(1., length(x@j))
x = X.coo
}

if (binary && !inherits(x, "ngTMatrix")) {
X.coo = new("ngTMatrix")
X.coo@Dim = x@Dim
X.coo@Dimnames = x@Dimnames
X.coo@i = x@i
X.coo@j = x@j
x = X.coo
}
return(x)
}
5 changes: 2 additions & 3 deletions R/model_FTRL.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
#' x = sample(c(-1, 1), 1000 * 100, TRUE)
#' odd = seq(1, 99, 2)
#' x[i %in% which(y == 1) & j %in% odd] = 1
#' m = sparseMatrix(i = i, j = j, x = x, dims = c(1000, 1000), giveCsparse = FALSE)
#' x = as(m, "RsparseMatrix")
#' x = sparseMatrix(i = i, j = j, x = x, dims = c(1000, 1000), repr="R")
#'
#' ftrl = FTRL$new(learning_rate = 0.01, learning_rate_decay = 0.1,
#' lambda = 10, l1_ratio = 1, dropout = 0)
Expand All @@ -21,7 +20,7 @@
#' w = ftrl$coef()
#' head(w)
#' sum(w != 0)
#' p = ftrl$predict(m)
#' p = ftrl$predict(x)
#' @export
FTRL = R6::R6Class(
classname = "FTRL",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

`rsparse` is an R package for statistical learning primarily on **sparse matrices** - **matrix factorizations, factorization machines, out-of-core regression**. Many of the implemented algorithms are particularly useful for **recommender systems** and **NLP**.

On top of that we provide some optimized routines to work on sparse matrices - multithreaded <dense, sparse> matrix multiplications and improved support for sparse matrices in CSR format (`Matrix::RsparseMatrix`).
On top of that we provide some optimized routines to work on sparse matrices - multithreaded <dense, sparse> matrix multiplications and improved support for sparse matrices in CSR format (`Matrix::RsparseMatrix`), as well as convenience functions to convert between matrix types.

We've paid some attention to the implementation details - we try to avoid data copies, utilize multiple threads via OpenMP and use SIMD where appropriate. Package **allows to work on datasets with millions of rows and millions of columns**.

Expand Down
66 changes: 66 additions & 0 deletions man/casting.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fac84e7

Please sign in to comment.