add new internal function __vectorized_square_accumulate fixing issue #126

James · James · commit c303094cfc45 · 2018-08-09T20:11:12.000-07:00
diff --git a/library/Makefile b/library/Makefile
@@ -25,7 +25,7 @@ OPT_FLAGS	:= -O1
 LDFLAGS		:= -lm -lrt -pthread -shared -Wl,-soname,$(SONAME)
 
 # different compile flags for math libs
-MATH_OPT_FLAGS	:= -O3 -ffast-math -ftree-vectorize -Wno-restrict
+MATH_OPT_FLAGS	:= -O3 -ffast-math -ftree-vectorize
 
 # commands
 RM		:= rm -rf
diff --git a/library/src/math/algebra.c b/library/src/math/algebra.c
@@ -74,7 +74,7 @@ int __householder_reflection(int step, rc_matrix_t* Q, rc_matrix_t* R)
 
 	// pre-calculate matrix multiplication coefficient tau
 	// doing this on one line causes a compiler optimization error :-/
-	dot = __vectorized_mult_accumulate(x,x,n);
+	dot = __vectorized_square_accumulate(x,n);
 	tau = -2.0/dot;
 
 	// fill in diagonal and upper triangle of H
diff --git a/library/src/math/algebra_common.c b/library/src/math/algebra_common.c
@@ -15,3 +15,13 @@ double __vectorized_mult_accumulate(double * __restrict__ a, double * __restrict
 	return sum;
 }
 
+
+double __vectorized_square_accumulate(double * __restrict__ a, int n)
+{
+	int i;
+	double sum = 0.0;
+	for(i=0;i<n;i++){
+		sum+=a[i]*a[i];
+	}
+	return sum;
+}
diff --git a/library/src/math/algebra_common.h b/library/src/math/algebra_common.h
@@ -25,4 +25,14 @@
  */
 double __vectorized_mult_accumulate(double * __restrict__ a, double * __restrict__ b, int n);
 
+/*
+ * Performs a vector dot product on the contents of a with itself
+ *
+ * This is a dangerous function that could segfault if not used properly. Hence
+ * it is only for internal use in the RC library. the 'restrict' attributes tell
+ * the C compiler that the pointers are not aliased which helps the vectorization
+ * process for optimization with the NEON FPU or similar
+ */
+double __vectorized_square_accumulate(double * __restrict__ a, int n);
+
 #endif // RC_ALGEBRA_COMMON_H