Fixes for implicit biases (#64)

fixes #53 * fixes for implicit biases #ref 53 * fixes for implicit bias initialization
dselivanov · May 9, 2021 · 459e33d · 459e33d
1 parent 4f19439
commit 459e33d
Show file tree

Hide file tree

Showing 12 changed files with 329 additions and 102 deletions.
diff --git a/R/MatrixFactorizationRecommender.R b/R/MatrixFactorizationRecommender.R
@@ -8,6 +8,9 @@ MatrixFactorizationRecommender = R6::R6Class(
     components = NULL,
     #' @field global_bias global mean (for centering values in explicit feedback)
     global_bias = 0.,
+    #' @field global_bias_base Pre-calculated `-(factors*global_bias)` (for centering values in
+    #' implicit feedback when not using user/item biases)
+    global_bias_base = numeric(),
     #' @description recommends items for users
     #' @param x user-item interactions matrix (usually sparse - `Matrix::sparseMatrix`).Users are
     #' rows and items are columns

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -117,19 +117,19 @@ als_explicit_float <- function(m_csc_r, X_, Y_, cnt_X_, lambda, n_threads, solve
     .Call(`_rsparse_als_explicit_float`, m_csc_r, X_, Y_, cnt_X_, lambda, n_threads, solver, cg_steps, dynamic_lambda, with_biases, is_x_bias_last_row)
 }
 
-als_implicit_double <- function(m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row) {
-    .Call(`_rsparse_als_implicit_double`, m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row)
+als_implicit_double <- function(m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row, global_bias, global_bias_base, initialize_bias_base) {
+    .Call(`_rsparse_als_implicit_double`, m_csc_r, X, Y, XtX, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row, global_bias, global_bias_base, initialize_bias_base)
 }
 
-als_implicit_float <- function(m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row) {
-    .Call(`_rsparse_als_implicit_float`, m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row)
+als_implicit_float <- function(m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row, global_bias, global_bias_base_, initialize_bias_base) {
+    .Call(`_rsparse_als_implicit_float`, m_csc_r, X_, Y_, XtX_, lambda, n_threads, solver, cg_steps, with_biases, is_x_bias_last_row, global_bias, global_bias_base_, initialize_bias_base)
 }
 
-initialize_biases_double <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE) {
-    .Call(`_rsparse_initialize_biases_double`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias)
+initialize_biases_double <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE, is_explicit_feedback = FALSE, initialize_item_biases = FALSE) {
+    .Call(`_rsparse_initialize_biases_double`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias, is_explicit_feedback, initialize_item_biases)
 }
 
-initialize_biases_float <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE) {
-    .Call(`_rsparse_initialize_biases_float`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias)
+initialize_biases_float <- function(m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias = FALSE, is_explicit_feedback = FALSE, initialize_item_biases = FALSE) {
+    .Call(`_rsparse_initialize_biases_float`, m_csc_r, m_csr_r, user_bias, item_bias, lambda, dynamic_lambda, non_negative, calculate_global_bias, is_explicit_feedback, initialize_item_biases)
 }
 
diff --git a/R/model_WRMF.R b/R/model_WRMF.R
@@ -87,18 +87,6 @@ WRMF = R6::R6Class(
       solver = match.arg(solver)
       feedback = match.arg(feedback)
 
-      if (feedback == 'implicit') {
-        # FIXME
-
-        if (solver == "conjugate_gradient" && with_user_item_bias == TRUE) {
-          msg = paste("'conjugate_gradient' is not supported for a model",
-            "`with_user_item_bias == TRUE`. Setting to 'cholesky'."
-          )
-          warning(msg)
-          solver = "cholesky"
-        }
-        with_global_bias = FALSE
-      }
       private$non_negative = ifelse(solver == "nnls", TRUE, FALSE)
 
       if (private$non_negative && with_global_bias == TRUE) {
@@ -141,7 +129,10 @@ WRMF = R6::R6Class(
             precision = private$precision,
             with_user_item_bias = private$with_user_item_bias,
             is_bias_last_row = is_bias_last_row,
-            XtX = XtX)
+            global_bias = self$global_bias,
+            initialize_bias_base = !avoid_cg,
+            XtX = XtX,
+            global_bias_base = self$global_bias_base)
         } else {
           als_explicit(
             x, X, Y, cnt_X,
@@ -162,7 +153,8 @@ WRMF = R6::R6Class(
                      initialize_biases_double,
                      initialize_biases_float)
         FUN(c_ui, c_iu, user_bias, item_bias, private$lambda, private$dynamic_lambda,
-            private$non_negative, private$with_global_bias)
+            private$non_negative, private$with_global_bias, feedback == "explicit",
+            private$solver_code != 1)
       }
 
       self$components = init
@@ -211,28 +203,43 @@ WRMF = R6::R6Class(
         private$U = large_rand_matrix(private$rank, n_user)
         # for item biases
         if (private$with_user_item_bias) {
-          private$U[1, ] = rep(1.0, n_user)
+          private$U[1L, ] = rep(1.0, n_user)
         }
       } else {
         private$U = flrnorm(private$rank, n_user, 0, 0.01)
         if (private$with_user_item_bias) {
-          private$U[1, ] = float::fl(rep(1.0, n_user))
+          private$U[1L, ] = float::fl(rep(1.0, n_user))
         }
       }
 
       if (is.null(self$components)) {
-        if (private$precision == "double") {
-          self$components = large_rand_matrix(private$rank, n_item)
-          # for user biases
-          if (private$with_user_item_bias) {
-            self$components[private$rank, ] = rep(1.0, n_item)
+
+        if (private$solver_code == 1L) { ### <- cholesky
+          if (private$precision == "double") {
+            self$components = matrix(0, private$rank, n_item)
+            if (private$with_user_item_bias) {
+              self$components[private$rank, ] = rep(1.0, n_item)
+            }
+          } else {
+            self$components = float::float(0, private$rank, n_item)
+            if (private$with_user_item_bias) {
+              self$components[private$rank, ] = float::fl(rep(1.0, n_item))
+            }
           }
-        } else {
-          self$components = flrnorm(private$rank, n_item, 0, 0.01)
-          if (private$with_user_item_bias) {
-            self$components[private$rank, ] = float::fl(rep(1.0, n_item))
+          } else {
+            if (private$precision == "double") {
+              self$components = large_rand_matrix(private$rank, n_item)
+              # for user biases
+              if (private$with_user_item_bias) {
+                self$components[private$rank, ] = rep(1.0, n_item)
+              }
+            } else {
+              self$components = flrnorm(private$rank, n_item, 0, 0.01)
+              if (private$with_user_item_bias) {
+                self$components[private$rank, ] = float::fl(rep(1.0, n_item))
+              }
+            }
           }
-        }
       } else {
         stopifnot(is.matrix(self$components) || is.float(self$components))
         stopifnot(ncol(self$components) == n_item)
@@ -268,10 +275,23 @@ WRMF = R6::R6Class(
         self$components[1L, ] = item_bias
         private$U[private$rank, ] = user_bias
         if(private$with_global_bias) self$global_bias = global_bias
-      } else if (private$feedback == "explicit" && private$with_global_bias) {
-        self$global_bias = mean(c_ui@x)
-        c_ui@x = c_ui@x - self$global_bias
-        c_iu@x = c_iu@x - self$global_bias
+      } else if (private$with_global_bias) {
+        if (private$feedback == "explicit") {
+          self$global_bias = mean(c_ui@x)
+          c_ui@x = c_ui@x - self$global_bias
+          c_iu@x = c_iu@x - self$global_bias
+        } else {
+          s = sum(c_ui@x)
+          self$global_bias = s / (s + as.numeric(nrow(c_ui))*as.numeric(ncol(c_ui)) - length(c_ui@x))
+        }
+      }
+
+      if (private$feedback == "implicit") {
+        size_global_bias_base = ifelse(private$with_user_item_bias, 0L, private$rank-1L)
+        if (private$precision == "double")
+          self$global_bias_base = numeric(size_global_bias_base)
+        else
+          self$global_bias_base = float(size_global_bias_base)
       }
 
       logger$info("starting factorization with %d threads", getOption("rsparse_omp_threads", 1L))
@@ -350,7 +370,7 @@ WRMF = R6::R6Class(
 
       x = as(x, "CsparseMatrix")
       x = private$preprocess(x)
-      if (self$global_bias != 0.)
+      if (self$global_bias != 0. && private$feedback == "explicit")
         x@x = x@x - self$global_bias
 
       if (private$precision == "double") {
@@ -420,7 +440,10 @@ als_implicit = function(
   precision,
   with_user_item_bias,
   is_bias_last_row,
-  XtX = NULL) {
+  initialize_bias_base,
+  global_bias = 0.,
+  XtX = NULL,
+  global_bias_base = NULL) {
 
   solver = ifelse(precision == "float",
                   als_implicit_float,
@@ -437,8 +460,15 @@ als_implicit = function(
     }
     XtX = tcrossprod(XX) + ridge
   }
+  if (is.null(global_bias_base)) {
+    global_bias_base = numeric()
+    if (precision == "float")
+      global_bias_base = float::fl(global_bias_base)
+  }
   # Y is modified in-place
-  loss = solver(x, X, Y, XtX, lambda, n_threads, solver_code, cg_steps, with_user_item_bias, is_bias_last_row)
+  loss = solver(x, X, Y, XtX, lambda, n_threads, solver_code, cg_steps,
+                with_user_item_bias, is_bias_last_row, global_bias,
+                global_bias_base, initialize_bias_base)
 }
 
 als_explicit = function(

diff --git a/inst/include/wrmf_explicit.hpp b/inst/include/wrmf_explicit.hpp
@@ -32,8 +32,8 @@ arma::Col<T> cg_solver_explicit(const arma::Mat<T>& X_nnz, const arma::Col<T>& c
 
 template <class T>
 T als_explicit(const dMappedCSC& Conf, arma::Mat<T>& X, arma::Mat<T>& Y,
-               const double lambda, const unsigned n_threads, const unsigned solver,
-               const unsigned cg_steps, const bool dynamic_lambda,
+               const double lambda, const int n_threads, const unsigned int solver,
+               const unsigned int cg_steps, const bool dynamic_lambda,
                const arma::Col<T>& cnt_X, const bool with_biases,
                const bool is_x_bias_last_row) {
   /* Note about biases:
@@ -63,7 +63,7 @@ T als_explicit(const dMappedCSC& Conf, arma::Mat<T>& X, arma::Mat<T>& Y,
       x_biases = X.row(0).t();
   }
 
-  T loss = 0;
+  double loss = 0;
   size_t nc = Conf.n_cols;
 #ifdef _OPENMP
 #pragma omp parallel for num_threads(n_threads) schedule(dynamic, GRAIN_SIZE) reduction(+:loss)

diff --git a/inst/include/wrmf_implicit.hpp b/inst/include/wrmf_implicit.hpp
@@ -31,11 +31,68 @@ arma::Col<T> cg_solver_implicit(const arma::Mat<T>& X_nnz, const arma::Col<T>& c
   return x;
 }
 
+/* FIXME: 'global_bias_base' used like this has very poor numerical precision and ends up making things worse */
+template <class T>
+arma::Col<T> cg_solver_implicit_global_bias(const arma::Mat<T>& X_nnz, const arma::Col<T>& confidence,
+                                            const arma::Col<T>& x_old, const arma::uword n_iter,
+                                            const arma::Mat<T>& XtX,   const arma::Col<T> global_bias_base,
+                                            T global_bias) {
+  arma::Col<T> x = x_old;
+  const arma::Col<T> confidence_1 = confidence - 1.0;
+
+  arma::Col<T> Ap;
+  arma::Col<T> r = X_nnz * (confidence - (confidence_1 % (X_nnz.t() * x + global_bias))) - XtX * x + global_bias_base;
+  arma::Col<T> p = r;
+  double rsold, rsnew, alpha;
+  rsold = arma::dot(r, r);
+
+  for (auto k = 0; k < n_iter; k++) {
+    Ap = XtX * p + X_nnz * (confidence_1 % (X_nnz.t() * p));
+    alpha = rsold / dot(p, Ap);
+    x += alpha * p;
+    r -= alpha * Ap;
+    rsnew = dot(r, r);
+    if (rsnew < CG_TOL) break;
+    p = r + p * (rsnew / rsold);
+    rsold = rsnew;
+  }
+  return x;
+}
+
+template <class T>
+arma::Col<T> cg_solver_implicit_user_item_bias(const arma::Mat<T>& X_nnz, const arma::Col<T>& confidence,
+                                               const arma::Col<T>& x_old, const arma::uword n_iter,
+                                               const arma::Mat<T>& XtX,   const arma::Col<T> &rhs_init,
+                                               const arma::Col<T> &x_biases, T global_bias) {
+  arma::Col<T> x = x_old;
+  const arma::Col<T> confidence_1 = confidence - 1.0;
+
+  arma::Col<T> Ap;
+  arma::Col<T> r = X_nnz * (confidence - (confidence_1 % (X_nnz.t() * x + x_biases + global_bias)))
+                    - XtX * x + rhs_init;
+  arma::Col<T> p = r;
+  double rsold, rsnew, alpha;
+  rsold = arma::dot(r, r);
+
+  for (auto k = 0; k < n_iter; k++) {
+    Ap = XtX * p + X_nnz * (confidence_1 % (X_nnz.t() * p));
+    alpha = rsold / dot(p, Ap);
+    x += alpha * p;
+    r -= alpha * Ap;
+    rsnew = dot(r, r);
+    if (rsnew < CG_TOL) break;
+    p = r + p * (rsnew / rsold);
+    rsold = rsnew;
+  }
+  return x;
+}
+
 template <class T>
 T als_implicit(const dMappedCSC& Conf, arma::Mat<T>& X, arma::Mat<T>& Y,
-               const arma::Mat<T>& XtX, double lambda, unsigned n_threads,
-               unsigned solver, unsigned cg_steps, const bool with_biases,
-               bool is_x_bias_last_row) {
+               const arma::Mat<T>& XtX, double lambda, int n_threads,
+               const unsigned int solver, unsigned int cg_steps, const bool with_biases,
+               const bool is_x_bias_last_row, double global_bias,
+               arma::Col<T> &global_bias_base, const bool initialize_bias_base) {
   // if is_x_bias_last_row == true
   // X = [1, ..., x_bias]
   // Y = [y_bias, ..., 1]
@@ -48,6 +105,12 @@ T als_implicit(const dMappedCSC& Conf, arma::Mat<T>& X, arma::Mat<T>& Y,
   arma::Col<T> x_biases;
   arma::Mat<T> rhs_init;
 
+  if (global_bias < std::sqrt(std::numeric_limits<T>::epsilon()))
+    global_bias = 0;
+
+  if (global_bias && initialize_bias_base && !with_biases)
+    global_bias_base = arma::sum(X, 1) * (-global_bias);
+
   if (with_biases) {
     if (is_x_bias_last_row)  // last row
       x_biases = X.row(X.n_rows - 1).t();
@@ -78,16 +141,27 @@ T als_implicit(const dMappedCSC& Conf, arma::Mat<T>& X, arma::Mat<T>& Y,
     //    X_nnz_user * 1 * (0 - x_biases_nnz_user) +
     //    X_nnz_user * C_nnz_user * (1 - x_biases_nnz_user)
 
-    // here we do following:
-    // drop row with "ones" placeholder for convenient ALS form
-    rhs_init = drop_row<T>(X, is_x_bias_last_row);
-    // p = 0
-    // C = 1 (so we omit multiplication on eye matrix)
-    // rhs = X * eye * (0 - x_biases) = -X * x_biases
-    rhs_init *= -x_biases;
+    if (!global_bias) {
+      // here we do following:
+      // drop row with "ones" placeholder for convenient ALS form
+      rhs_init = drop_row<T>(X, is_x_bias_last_row);
+      // p = 0
+      // C = 1 (so we omit multiplication on eye matrix)
+      // rhs = X * eye * (0 - x_biases) = -X * x_biases
+      rhs_init *= -x_biases;
+    }
+
+    else {
+      rhs_init = - (drop_row<T>(X, is_x_bias_last_row) * (x_biases + global_bias));
+    }
+  }
+
+  else if (global_bias) {
+    rhs_init = arma::Mat<T>(&global_bias_base[0], rank - (int)with_biases, 1, false, true);
   }
 
-  T loss = 0;
+
+  double loss = 0;
   size_t nc = Conf.n_cols;
 #ifdef _OPENMP
 #pragma omp parallel for num_threads(n_threads) schedule(dynamic, GRAIN_SIZE) reduction(+:loss)
@@ -114,7 +188,15 @@ T als_implicit(const dMappedCSC& Conf, arma::Mat<T>& X, arma::Mat<T>& Y,
       arma::Col<T> Y_new;
 
       if (solver == CONJUGATE_GRADIENT) {
-        Y_new = cg_solver_implicit<T>(X_nnz, confidence, init, cg_steps, XtX);
+        if (!with_biases && !global_bias)
+          Y_new = cg_solver_implicit<T>(X_nnz, confidence, init, cg_steps, XtX);
+        else if (with_biases)
+          Y_new = cg_solver_implicit_user_item_bias<T>(X_nnz, confidence, init, cg_steps, XtX,
+                                                       rhs_init, x_biases(idx), global_bias);
+        else
+          Y_new = cg_solver_implicit_global_bias<T>(X_nnz, confidence, init, cg_steps, XtX,
+                                                    rhs_init, global_bias);
+
       } else {
         const arma::Mat<T> lhs =
             XtX + X_nnz.each_row() % (confidence.t() - 1) * X_nnz.t();
@@ -137,6 +219,8 @@ T als_implicit(const dMappedCSC& Conf, arma::Mat<T>& X, arma::Mat<T>& Y,
           // expression above can be simplified further:
           rhs = rhs_init + X_nnz * (confidence - x_biases(idx) % (confidence - 1));
 
+        } else if (global_bias) {
+          rhs = X_nnz * confidence + rhs_init;
         } else {
           rhs = X_nnz * confidence;
         }