Skip to content

Commit 0d9b106

Browse files
Support complex data in post-processing utilities
- Move post-processing (tensor PCA projection and U smoothing) to template functions to allow real and complex data support - These depend on ClustRVizLogger so create a new header to implement them - Write R/C++ glue functions
1 parent 918b135 commit 0d9b106

File tree

4 files changed

+164
-103
lines changed

4 files changed

+164
-103
lines changed

src/clustRviz.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,3 +287,31 @@ SEXP matrix_col_prox(SEXP Xsexp,
287287
return R_NilValue;
288288
};
289289

290+
// [[Rcpp::export(rng = false)]]
291+
SEXP smooth_u_clustering(SEXP U_oldSEXP, Rcpp::List cluster_info_list){
292+
switch(TYPEOF(U_oldSEXP)){
293+
case REALSXP: return Rcpp::wrap(smooth_u_clustering_impl<Rcpp::NumericVector, double>(Rcpp::as<Rcpp::NumericVector>(U_oldSEXP), cluster_info_list));
294+
case CPLXSXP: return Rcpp::wrap(smooth_u_clustering_impl<Rcpp::ComplexVector, std::complex<double> >(Rcpp::as<Rcpp::ComplexVector>(U_oldSEXP), cluster_info_list));
295+
default: Rcpp::stop("Unsupported type of X.");
296+
}
297+
298+
// Should not trigger but appease compiler...
299+
return R_NilValue;
300+
}
301+
302+
// [[Rcpp::export(rng = false)]]
303+
SEXP tensor_projection(SEXP Xsexp, SEXP Ysexp){
304+
if(TYPEOF(Xsexp) != TYPEOF(Ysexp)){
305+
Rcpp::stop("Type of X and Y must match.");
306+
}
307+
308+
switch(TYPEOF(Xsexp)){
309+
case REALSXP: return Rcpp::wrap(tensor_projection_impl<Rcpp::NumericVector, double>(Rcpp::as<Rcpp::NumericVector>(Xsexp), Rcpp::as<Eigen::MatrixXd>(Ysexp)));
310+
case CPLXSXP: return Rcpp::wrap(tensor_projection_impl<Rcpp::ComplexVector, std::complex<double> >(Rcpp::as<Rcpp::ComplexVector>(Xsexp), Rcpp::as<Eigen::MatrixXcd>(Ysexp)));
311+
default: Rcpp::stop("Unsupported type of X.");
312+
}
313+
314+
// Should not trigger but appease compiler...
315+
return R_NilValue;
316+
}
317+

src/clustRviz.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "clustRviz_base.h"
22
#include "clustRviz_logging.h"
3+
#include "clustRviz_utils.h"
34
#include "clustering_impl.h"
45
#include "biclustering_impl.h"
56
#include "trout_impl.h"

src/clustRviz_utils.h

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#ifndef CLUSTRVIZ_UTILS_H
2+
#define CLUSTRVIZ_UTILS_H 1
3+
// This header defines template versions of complex utilities
4+
// These are templated on the data type to allow for both real and complex data
5+
// These are not in clustRviz_base.h since some of them depend on ClustRVizLogger...
6+
7+
#include <RcppEigen.h>
8+
#include "clustRviz_logging.h"
9+
10+
// U-smoothing for convex clustering
11+
//
12+
// Given cluster memberships, replace rows of U which belong to the same cluster
13+
// with their mutual mean....
14+
template <typename RcppVector, typename DataType>
15+
RcppVector smooth_u_clustering_impl(RcppVector U_old, Rcpp::List cluster_info_list){
16+
// The first argument is really an array but we pass as a NumericVector
17+
// The second argument is a list produced by get_cluster_assignments()
18+
Rcpp::IntegerVector U_dims = U_old.attr("dim");
19+
if(U_dims.size() != 3){
20+
ClustRVizLogger::error("U must be a three rank tensor.");
21+
}
22+
int N = U_dims(0);
23+
int P = U_dims(1);
24+
int Q = U_dims(2);
25+
26+
// Check length of cluster_info
27+
if(cluster_info_list.size() != Q){
28+
ClustRVizLogger::error("Dimensions of U and cluster_info do not match");
29+
}
30+
31+
RcppVector U(N * P * Q);
32+
U.attr("dim") = U_dims;
33+
Rcpp::rownames(U) = Rcpp::rownames(U_old);
34+
Rcpp::colnames(U) = Rcpp::colnames(U_old);
35+
36+
for(int q = 0; q < Q; q++){
37+
Rcpp::List cluster_info = cluster_info_list[q];
38+
int n_clusters = Rcpp::as<int>(cluster_info[2]);
39+
40+
Rcpp::IntegerVector cluster_ids = cluster_info[0];
41+
Rcpp::IntegerVector cluster_sizes = cluster_info[1];
42+
43+
// There's a lot going on on the RHS here, so let's un-pack (inside outwards)
44+
// First, we get a pointer to the relevant slice of U_old
45+
// This works because the RcppVector is a C++ wrapper around a SEXP which
46+
// is ultimately just a pointer to the relevant memory
47+
// We when cast it to an appropriate C++ pointer type
48+
// For real data, this is a no-op since both R and Eigen use doubles for real data
49+
// For complex data, this matters because we convert from R's homegrown Rcomplex*
50+
// to a std::complex* pointer as eigen expects
51+
// We then use Eigen::Map<Eigen::Matrix<DataType>> to get an Eigen::Matrix<DataType> backed
52+
// by R's memory in a read only fashion.
53+
// The same construct is used below to load the smoothed data into U
54+
Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> U_old_slice = Eigen::Map<Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> >(reinterpret_cast<DataType*>(&U_old[N * P * q]), N, P);
55+
Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> U_new(N, P);
56+
57+
for(int j = 1; j <= n_clusters; j++){ // Cluster IDs are 1-based (per R conventions)
58+
Eigen::Matrix<DataType, Eigen::Dynamic, 1> vec(P); vec.setZero();
59+
60+
// Manually work out new mean
61+
for(int n = 0; n < N; n++){
62+
if(cluster_ids[n] == j){
63+
vec += U_old_slice.row(n);
64+
}
65+
}
66+
67+
vec /= cluster_sizes[j - 1]; // Subtract 1 to adjust to C++ indexing
68+
69+
// Assign new mean where needed...
70+
for(int n = 0; n < N; n++){
71+
if(cluster_ids[n] == j){
72+
U_new.row(n) = vec;
73+
}
74+
}
75+
}
76+
77+
Eigen::Map<Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> >(reinterpret_cast<DataType*>(&U[N * P * q]), N, P) = U_new;
78+
}
79+
80+
return U;
81+
}
82+
83+
// Tensor projection along the second mode
84+
//
85+
// Given a 3D tensor X in F^{n-by-p-by-q} (observations by features by iterations)
86+
// and a rotation matrix Y in F^{p-by-k} (features by principal components), we
87+
// want to get a projected array in F^{n-by-k-by-q} giving the path of the principal
88+
// components
89+
//
90+
// This is straightforward, but "loopy" so we implement it in Rcpp / RcppEigen for speed
91+
// We use some template magic to support F = R (real) and F = C (complex) data
92+
template <typename RcppVector, typename DataType>
93+
RcppVector tensor_projection_impl(RcppVector X, const Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic>& Y){
94+
95+
// Validate X
96+
Rcpp::IntegerVector X_dims = X.attr("dim");
97+
if(X_dims.size() != 3){
98+
ClustRVizLogger::error("X must be a three rank tensor.");
99+
}
100+
int n = X_dims(0);
101+
int p = X_dims(1);
102+
int q = X_dims(2);
103+
104+
// Validate Y
105+
if(Y.rows() != p){
106+
ClustRVizLogger::error("The dimensions of X and Y do not match -- ") << p << " != " << Y.rows();
107+
}
108+
109+
int k = Y.cols();
110+
111+
RcppVector result(n * k * q);
112+
Rcpp::IntegerVector result_dims{n, k, q};
113+
result.attr("dim") = result_dims;
114+
115+
for(int i = 0; i < q; i++){
116+
// There's a lot going on on the RHS here, so let's un-pack (inside outwards)
117+
// First, we get a pointer to the relevant slice of X
118+
// This works because the RcppVector is a C++ wrapper around a SEXP which
119+
// is ultimately just a pointer to the relevant memory
120+
// We when cast it to an appropriate C++ pointer type
121+
// For real data, this is a no-op since both R and Eigen use doubles for real data
122+
// For complex data, this matters because we convert from R's homegrown Rcomplex*
123+
// to a std::complex* pointer as eigen expects
124+
// We then use Eigen::Map<Eigen::Matrix<DataType>> to get an Eigen::Matrix<DataType> backed
125+
// by R's memory in a read only fashion.
126+
// The same construct is used below to load the smoothed data into result
127+
Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> X_slice = Eigen::Map<Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> >(reinterpret_cast<DataType*>(&X[n * p * i]), n, p);
128+
Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> X_slice_projected = X_slice * Y;
129+
Eigen::Map<Eigen::Matrix<DataType, Eigen::Dynamic, Eigen::Dynamic> >(reinterpret_cast<DataType*>(&result[n * k * i]), n, k) = X_slice_projected;
130+
}
131+
132+
return result;
133+
}
134+
135+
#endif

src/utils.cpp

Lines changed: 0 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -67,109 +67,6 @@ void check_weight_matrix(const Eigen::MatrixXd& weight_matrix){
6767
}
6868
}
6969

70-
// U-smoothing for convex clustering
71-
//
72-
// Given cluster memberships, replace rows of U which belong to the same cluster
73-
// with their mutual mean....
74-
//
75-
// [[Rcpp::export(rng = false)]]
76-
Rcpp::NumericVector smooth_u_clustering(Rcpp::NumericVector U_old, Rcpp::List cluster_info_list){
77-
// The first argument is really an array but we pass as a NumericVector
78-
// The second argument is a list produced by get_cluster_assignments()
79-
Rcpp::IntegerVector U_dims = U_old.attr("dim");
80-
if(U_dims.size() != 3){
81-
ClustRVizLogger::error("U must be a three rank tensor.");
82-
}
83-
int N = U_dims(0);
84-
int P = U_dims(1);
85-
int Q = U_dims(2);
86-
87-
// Check length of cluster_info
88-
if(cluster_info_list.size() != Q){
89-
ClustRVizLogger::error("Dimensions of U and cluster_info do not match");
90-
}
91-
92-
Rcpp::NumericVector U(N * P * Q);
93-
U.attr("dim") = U_dims;
94-
Rcpp::rownames(U) = Rcpp::rownames(U_old);
95-
Rcpp::colnames(U) = Rcpp::colnames(U_old);
96-
97-
for(int q = 0; q < Q; q++){
98-
Rcpp::List cluster_info = cluster_info_list[q];
99-
int n_clusters = Rcpp::as<int>(cluster_info[2]);
100-
101-
Rcpp::IntegerVector cluster_ids = cluster_info[0];
102-
Rcpp::IntegerVector cluster_sizes = cluster_info[1];
103-
104-
Eigen::MatrixXd U_old_slice = Eigen::Map<Eigen::MatrixXd>(&U_old[N * P * q], N, P);
105-
Eigen::MatrixXd U_new(N, P);
106-
107-
for(int j = 1; j <= n_clusters; j++){ // Cluster IDs are 1-based (per R conventions)
108-
Eigen::VectorXd vec = Eigen::VectorXd::Zero(P);
109-
110-
// Manually work out new mean
111-
for(int n = 0; n < N; n++){
112-
if(cluster_ids[n] == j){
113-
vec += U_old_slice.row(n);
114-
}
115-
}
116-
117-
vec /= cluster_sizes[j - 1]; // Subtract 1 to adjust to C++ indexing
118-
119-
// Assign new mean where needed...
120-
for(int n = 0; n < N; n++){
121-
if(cluster_ids[n] == j){
122-
U_new.row(n) = vec;
123-
}
124-
}
125-
}
126-
127-
Eigen::Map<Eigen::MatrixXd>(&U[N * P * q], N, P) = U_new;
128-
}
129-
130-
return U;
131-
}
132-
133-
// Tensor projection along the second mode
134-
//
135-
// Given a 3D tensor X in R^{n-by-p-by-q} (observations by features by iterations)
136-
// and a rotation matrix Y in R^{p-by-k} (features by principal components), we
137-
// want to get a projected array in R^{n-by-k-by-q} giving the path of the principal
138-
// components
139-
//
140-
// This is straightforward, but "loopy" so we implement it in Rcpp / RcppEigen for speed
141-
// [[Rcpp::export(rng = false)]]
142-
Rcpp::NumericVector tensor_projection(Rcpp::NumericVector X, const Eigen::MatrixXd& Y){
143-
144-
// Validate X
145-
Rcpp::IntegerVector X_dims = X.attr("dim");
146-
if(X_dims.size() != 3){
147-
ClustRVizLogger::error("X must be a three rank tensor.");
148-
}
149-
int n = X_dims(0);
150-
int p = X_dims(1);
151-
int q = X_dims(2);
152-
153-
// Validate Y
154-
if(Y.rows() != p){
155-
ClustRVizLogger::error("The dimensions of X and Y do not match -- ") << p << " != " << Y.rows();
156-
}
157-
158-
int k = Y.cols();
159-
160-
Rcpp::NumericVector result(n * k * q);
161-
Rcpp::IntegerVector result_dims{n, k, q};
162-
result.attr("dim") = result_dims;
163-
164-
for(int i = 0; i < q; i++){
165-
Eigen::MatrixXd X_slice = Eigen::Map<Eigen::MatrixXd>(&X[n * p * i], n, p);
166-
Eigen::MatrixXd X_slice_projected = X_slice * Y;
167-
Eigen::Map<Eigen::MatrixXd>(&result[n * k * i], n, k) = X_slice_projected;
168-
}
169-
170-
return result;
171-
}
172-
17370
// TROUT Alignment
17471
// FIXME - Why doesn't this play nice with overloading? Prototype missing somewhwere?
17572
Eigen::VectorXcd align_phase_v(const Eigen::VectorXcd& u,

0 commit comments

Comments
 (0)