From e9a1f08b31690528d5f8911c61479d07fc00c250 Mon Sep 17 00:00:00 2001 From: Gabriel Santamaria Date: Mon, 18 Nov 2024 15:04:00 +0100 Subject: [PATCH] Moved the definition outside extern C. - Kept the declaration inside the extern C but moved away the declaration, as an attempt to fix MacOS builds. --- src/owl/fftpack/owl_fftpack_impl.h | 540 ++++++++++++++--------------- 1 file changed, 268 insertions(+), 272 deletions(-) diff --git a/src/owl/fftpack/owl_fftpack_impl.h b/src/owl/fftpack/owl_fftpack_impl.h index 3def4d2e4..12bb48420 100644 --- a/src/owl/fftpack/owl_fftpack_impl.h +++ b/src/owl/fftpack/owl_fftpack_impl.h @@ -51,342 +51,338 @@ T compute_norm_factor(const shape_t &dims, const shape_t &axes, int inorm, size_ return norm_fct(inorm, N); } -extern "C" +/** Owl's stub functions **/ + +/** + * Complex-to-complex FFT + * @param forward: true for forward transform, false for backward transform + * @param X: input array + * @param Y: output array + * @param d: dimension along which to perform the transform + * @param norm: normalization factor + * @param nthreads: number of threads to use + * + * @return unit + */ +value STUB_CFFT(value vForward, value vX, value vY, value vD, value vNorm, value vNthreads) { + struct caml_ba_array *X = Caml_ba_array_val(vX); + std::complex *X_data = reinterpret_cast *>(X->data); - /** Owl's stub functions **/ - - /** - * Complex-to-complex FFT - * @param forward: true for forward transform, false for backward transform - * @param X: input array - * @param Y: output array - * @param d: dimension along which to perform the transform - * @param norm: normalization factor - * @param nthreads: number of threads to use - * - * @return unit - */ - value STUB_CFFT(value vForward, value vX, value vY, value vD, value vNorm, value vNthreads) - { - struct caml_ba_array *X = Caml_ba_array_val(vX); - std::complex *X_data = reinterpret_cast *>(X->data); + struct caml_ba_array *Y = Caml_ba_array_val(vY); + std::complex *Y_data = reinterpret_cast *>(Y->data); - struct caml_ba_array *Y = Caml_ba_array_val(vY); - std::complex *Y_data = reinterpret_cast *>(Y->data); + int d = Long_val(vD); + int n = X->dim[d]; + int norm = Long_val(vNorm); + int nthreads = Long_val(vNthreads); + int forward = Bool_val(vForward); - int d = Long_val(vD); - int n = X->dim[d]; - int norm = Long_val(vNorm); - int nthreads = Long_val(vNthreads); - int forward = Bool_val(vForward); + shape_t dims; + stride_t stride_in, stride_out; - shape_t dims; - stride_t stride_in, stride_out; + for (int i = 0; i < X->num_dims; ++i) + { + dims.push_back(static_cast(X->dim[i])); + } - for (int i = 0; i < X->num_dims; ++i) - { - dims.push_back(static_cast(X->dim[i])); - } + size_t multiplier = sizeof(std::complex); + for (int i = 0; i < X->num_dims; ++i) + { + stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); + stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + } - size_t multiplier = sizeof(std::complex); - for (int i = 0; i < X->num_dims; ++i) + shape_t axes{static_cast(d)}; + { + Treal norm_factor = compute_norm_factor(dims, axes, norm); + try { - stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); - stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + pocketfft::c2c(dims, stride_in, stride_out, axes, forward, + X_data, Y_data, norm_factor, nthreads); } - - shape_t axes{static_cast(d)}; + catch (const std::exception &e) { - Treal norm_factor = compute_norm_factor(dims, axes, norm); - try - { - pocketfft::c2c(dims, stride_in, stride_out, axes, forward, - X_data, Y_data, norm_factor, nthreads); - } - catch (const std::exception &e) - { - caml_failwith(e.what()); // maybe raise an OCaml exception here ?? - } + caml_failwith(e.what()); // maybe raise an OCaml exception here ?? } - - return Val_unit; } - /** - * Complex-to-complex FFT - * @param argv: array of arguments - * @param argn: number of arguments - * @see STUB_CFFT, https://ocaml.org/manual/5.2/intfc.html#ss:c-prim-impl - */ - value STUB_CFFT_bytecode(value *argv, int argn) - { - return STUB_CFFT(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]); - } + return Val_unit; +} - /** - * Forward Real-to-complex FFT - * @param X: input array (real data) - * @param Y: output array (complex data) - * @param d: dimension along which to perform the transform - * @param norm: normalization factor - * @param nthreads: number of threads to use - * - * @return unit - */ - value STUB_RFFTF(value vX, value vY, value vD, value vNorm, value vNthreads) - { - struct caml_ba_array *X = Caml_ba_array_val(vX); - Treal *X_data = reinterpret_cast(X->data); +/** + * Complex-to-complex FFT + * @param argv: array of arguments + * @param argn: number of arguments + * @see STUB_CFFT, https://ocaml.org/manual/5.2/intfc.html#ss:c-prim-impl + */ +value STUB_CFFT_bytecode(value *argv, int argn) +{ + return STUB_CFFT(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]); +} - struct caml_ba_array *Y = Caml_ba_array_val(vY); - std::complex *Y_data = reinterpret_cast *>(Y->data); +/** + * Forward Real-to-complex FFT + * @param X: input array (real data) + * @param Y: output array (complex data) + * @param d: dimension along which to perform the transform + * @param norm: normalization factor + * @param nthreads: number of threads to use + * + * @return unit + */ +value STUB_RFFTF(value vX, value vY, value vD, value vNorm, value vNthreads) +{ + struct caml_ba_array *X = Caml_ba_array_val(vX); + Treal *X_data = reinterpret_cast(X->data); - int d = Long_val(vD); - int n = X->dim[d]; - int norm = Long_val(vNorm); - int nthreads = Long_val(vNthreads); + struct caml_ba_array *Y = Caml_ba_array_val(vY); + std::complex *Y_data = reinterpret_cast *>(Y->data); - shape_t dims; - stride_t stride_in, stride_out; + int d = Long_val(vD); + int n = X->dim[d]; + int norm = Long_val(vNorm); + int nthreads = Long_val(vNthreads); - for (int i = 0; i < X->num_dims; ++i) - { - dims.push_back(static_cast(X->dim[i])); - } + shape_t dims; + stride_t stride_in, stride_out; - size_t multiplier = sizeof(Treal); - for (int i = 0; i < X->num_dims; ++i) - { - stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); - } + for (int i = 0; i < X->num_dims; ++i) + { + dims.push_back(static_cast(X->dim[i])); + } + + size_t multiplier = sizeof(Treal); + for (int i = 0; i < X->num_dims; ++i) + { + stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); + } - multiplier = sizeof(std::complex); - for (int i = 0; i < Y->num_dims; ++i) + multiplier = sizeof(std::complex); + for (int i = 0; i < Y->num_dims; ++i) + { + stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + } + + shape_t axes{static_cast(d)}; + { + Treal norm_factor = compute_norm_factor(dims, axes, norm); + try { - stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + pocketfft::r2c(dims, stride_in, stride_out, axes, pocketfft::FORWARD, + X_data, Y_data, norm_factor, nthreads); } - - shape_t axes{static_cast(d)}; + catch (const std::exception &e) { - Treal norm_factor = compute_norm_factor(dims, axes, norm); - try - { - pocketfft::r2c(dims, stride_in, stride_out, axes, pocketfft::FORWARD, - X_data, Y_data, norm_factor, nthreads); - } - catch (const std::exception &e) - { - caml_failwith(e.what()); // maybe raise an OCaml exception here ?? - } + caml_failwith(e.what()); // maybe raise an OCaml exception here ?? } - - return Val_unit; } - /** - * Backward Real-to-complex FFT - * @param X: input array (complex data) - * @param Y: output array (real data) - * @param d: dimension along which to perform the transform - * @param norm: normalization factor - * @param nthreads: number of threads to use - * - * @return unit - */ - value STUB_RFFTB(value vX, value vY, value vD, value vNorm, value vNthreads) - { - struct caml_ba_array *X = Caml_ba_array_val(vX); - std::complex *X_data = reinterpret_cast *>(X->data); + return Val_unit; +} + +/** + * Backward Real-to-complex FFT + * @param X: input array (complex data) + * @param Y: output array (real data) + * @param d: dimension along which to perform the transform + * @param norm: normalization factor + * @param nthreads: number of threads to use + * + * @return unit + */ +value STUB_RFFTB(value vX, value vY, value vD, value vNorm, value vNthreads) +{ + struct caml_ba_array *X = Caml_ba_array_val(vX); + std::complex *X_data = reinterpret_cast *>(X->data); - struct caml_ba_array *Y = Caml_ba_array_val(vY); - Treal *Y_data = reinterpret_cast(Y->data); + struct caml_ba_array *Y = Caml_ba_array_val(vY); + Treal *Y_data = reinterpret_cast(Y->data); - int d = Long_val(vD); - int n = X->dim[d]; - int norm = Long_val(vNorm); - int nthreads = Long_val(vNthreads); + int d = Long_val(vD); + int n = X->dim[d]; + int norm = Long_val(vNorm); + int nthreads = Long_val(vNthreads); - if (Y->dim[d] != (X->dim[d] - 1) * 2) - caml_failwith("Invalid output dimension for inverse real FFT"); + if (Y->dim[d] != (X->dim[d] - 1) * 2) + caml_failwith("Invalid output dimension for inverse real FFT"); - shape_t dims; - stride_t stride_in, stride_out; + shape_t dims; + stride_t stride_in, stride_out; - int ncomplex = X->dim[d]; - int nreal = Y->dim[d]; + int ncomplex = X->dim[d]; + int nreal = Y->dim[d]; - for (int i = 0; i < X->num_dims; ++i) + for (int i = 0; i < X->num_dims; ++i) + { + if (i == d) { - if (i == d) - { - dims.push_back(static_cast(nreal)); - } - else - { - dims.push_back(static_cast(X->dim[i])); - } + dims.push_back(static_cast(nreal)); } - - size_t multiplier = sizeof(std::complex); - for (int i = 0; i < X->num_dims; ++i) + else { - stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); + dims.push_back(static_cast(X->dim[i])); } + } + + size_t multiplier = sizeof(std::complex); + for (int i = 0; i < X->num_dims; ++i) + { + stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); + } - multiplier = sizeof(Treal); - for (int i = 0; i < Y->num_dims; ++i) + multiplier = sizeof(Treal); + for (int i = 0; i < Y->num_dims; ++i) + { + stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + } + + shape_t axes{static_cast(d)}; + { + Treal norm_factor = compute_norm_factor(dims, axes, norm); + try { - stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + pocketfft::c2r(dims, stride_in, stride_out, axes, pocketfft::BACKWARD, + X_data, Y_data, norm_factor, nthreads); } - - shape_t axes{static_cast(d)}; + catch (const std::exception &e) { - Treal norm_factor = compute_norm_factor(dims, axes, norm); - try - { - pocketfft::c2r(dims, stride_in, stride_out, axes, pocketfft::BACKWARD, - X_data, Y_data, norm_factor, nthreads); - } - catch (const std::exception &e) - { - caml_failwith(e.what()); // maybe raise an OCaml exception here ?? - } + caml_failwith(e.what()); // maybe raise an OCaml exception here ?? } - - return Val_unit; } - /** - * Discrete Cosine Transform - * @param X: input array - * @param Y: output array - * @param d: dimension along which to perform the transform - * @param type: type of DCT (1, 2, 3, or 4) - * @param norm: normalization factor - * @param nthreads: number of threads to use - * - * @return unit - */ - value STUB_RDCT(value vX, value vY, value vD, value vType, value vNorm, value vOrtho, value vNthreads) - { - struct caml_ba_array *X = Caml_ba_array_val(vX); - Treal *X_data = reinterpret_cast(X->data); + return Val_unit; +} - struct caml_ba_array *Y = Caml_ba_array_val(vY); - Treal *Y_data = reinterpret_cast(Y->data); +/** + * Discrete Cosine Transform + * @param X: input array + * @param Y: output array + * @param d: dimension along which to perform the transform + * @param type: type of DCT (1, 2, 3, or 4) + * @param norm: normalization factor + * @param nthreads: number of threads to use + * + * @return unit + */ +value STUB_RDCT(value vX, value vY, value vD, value vType, value vNorm, value vOrtho, value vNthreads) +{ + struct caml_ba_array *X = Caml_ba_array_val(vX); + Treal *X_data = reinterpret_cast(X->data); - int d = Long_val(vD); - int n = X->dim[d]; - int type = Long_val(vType); - if (type < 1 || type > 4) // should not happen as it's checked on the OCaml side - caml_failwith("invalid value for type (must be 1, 2, 3, or 4)"); - int norm = Long_val(vNorm); - bool ortho = Bool_val(vOrtho); - int nthreads = Long_val(vNthreads); + struct caml_ba_array *Y = Caml_ba_array_val(vY); + Treal *Y_data = reinterpret_cast(Y->data); - shape_t dims; - stride_t stride_in, stride_out; + int d = Long_val(vD); + int n = X->dim[d]; + int type = Long_val(vType); + if (type < 1 || type > 4) // should not happen as it's checked on the OCaml side + caml_failwith("invalid value for type (must be 1, 2, 3, or 4)"); + int norm = Long_val(vNorm); + bool ortho = Bool_val(vOrtho); + int nthreads = Long_val(vNthreads); - for (int i = 0; i < X->num_dims; ++i) - { - dims.push_back(static_cast(X->dim[i])); - } + shape_t dims; + stride_t stride_in, stride_out; + + for (int i = 0; i < X->num_dims; ++i) + { + dims.push_back(static_cast(X->dim[i])); + } + + size_t multiplier = sizeof(Treal); + for (int i = 0; i < X->num_dims; ++i) + { + stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); + stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + } - size_t multiplier = sizeof(Treal); - for (int i = 0; i < X->num_dims; ++i) + shape_t axes{static_cast(d)}; + { + Treal norm_factor = (type == 1) ? compute_norm_factor(dims, axes, norm, 2, -1) + : compute_norm_factor(dims, axes, norm, 2); + try { - stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); - stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + pocketfft::dct(dims, stride_in, stride_out, axes, type, + X_data, Y_data, norm_factor, ortho, nthreads); } - - shape_t axes{static_cast(d)}; + catch (const std::exception &e) { - Treal norm_factor = (type == 1) ? compute_norm_factor(dims, axes, norm, 2, -1) - : compute_norm_factor(dims, axes, norm, 2); - try - { - pocketfft::dct(dims, stride_in, stride_out, axes, type, - X_data, Y_data, norm_factor, ortho, nthreads); - } - catch (const std::exception &e) - { - caml_failwith(e.what()); // maybe raise an OCaml exception here ?? - } + caml_failwith(e.what()); // maybe raise an OCaml exception here ?? } - - return Val_unit; } - value STUB_RDCT_bytecode(value *argv, int argn) - { - return STUB_RDCT(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); - } + return Val_unit; +} - /** - * Discrete Sine Transform - * @param X: input array - * @param Y: output array - * @param d: dimension along which to perform the transform - * @param type: type of DST (1, 2, 3, or 4) - * @param norm: normalization factor - * @param nthreads: number of threads to use - * - * @return unit - */ - value STUB_RDST(value vX, value vY, value vD, value vType, value vNorm, value vOrtho, value vNthreads) - { - struct caml_ba_array *X = Caml_ba_array_val(vX); - Treal *X_data = reinterpret_cast(X->data); +value STUB_RDCT_bytecode(value *argv, int argn) +{ + return STUB_RDCT(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); +} + +/** + * Discrete Sine Transform + * @param X: input array + * @param Y: output array + * @param d: dimension along which to perform the transform + * @param type: type of DST (1, 2, 3, or 4) + * @param norm: normalization factor + * @param nthreads: number of threads to use + * + * @return unit + */ +value STUB_RDST(value vX, value vY, value vD, value vType, value vNorm, value vOrtho, value vNthreads) +{ + struct caml_ba_array *X = Caml_ba_array_val(vX); + Treal *X_data = reinterpret_cast(X->data); - struct caml_ba_array *Y = Caml_ba_array_val(vY); - Treal *Y_data = reinterpret_cast(Y->data); + struct caml_ba_array *Y = Caml_ba_array_val(vY); + Treal *Y_data = reinterpret_cast(Y->data); - int d = Long_val(vD); - int n = X->dim[d]; - int type = Long_val(vType); - if (type < 1 || type > 4) // should not happen as it's checked on the OCaml side - caml_failwith("invalid value for type (must be 1, 2, 3, or 4)"); - int norm = Long_val(vNorm); - bool ortho = Bool_val(vOrtho); - int nthreads = Long_val(vNthreads); + int d = Long_val(vD); + int n = X->dim[d]; + int type = Long_val(vType); + if (type < 1 || type > 4) // should not happen as it's checked on the OCaml side + caml_failwith("invalid value for type (must be 1, 2, 3, or 4)"); + int norm = Long_val(vNorm); + bool ortho = Bool_val(vOrtho); + int nthreads = Long_val(vNthreads); - shape_t dims; - stride_t stride_in, stride_out; + shape_t dims; + stride_t stride_in, stride_out; - for (int i = 0; i < X->num_dims; ++i) - { - dims.push_back(static_cast(X->dim[i])); - } + for (int i = 0; i < X->num_dims; ++i) + { + dims.push_back(static_cast(X->dim[i])); + } - size_t multiplier = sizeof(Treal); - for (int i = 0; i < X->num_dims; ++i) + size_t multiplier = sizeof(Treal); + for (int i = 0; i < X->num_dims; ++i) + { + stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); + stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + } + + shape_t axes{static_cast(d)}; + { + Treal norm_factor = (type == 1) ? compute_norm_factor(dims, axes, norm, 2, 1) + : compute_norm_factor(dims, axes, norm, 2); + try { - stride_in.push_back(c_ndarray_stride_dim(X, i) * multiplier); - stride_out.push_back(c_ndarray_stride_dim(Y, i) * multiplier); + pocketfft::dst(dims, stride_in, stride_out, axes, type, + X_data, Y_data, norm_factor, ortho, nthreads); } - - shape_t axes{static_cast(d)}; + catch (const std::exception &e) { - Treal norm_factor = (type == 1) ? compute_norm_factor(dims, axes, norm, 2, 1) - : compute_norm_factor(dims, axes, norm, 2); - try - { - pocketfft::dst(dims, stride_in, stride_out, axes, type, - X_data, Y_data, norm_factor, ortho, nthreads); - } - catch (const std::exception &e) - { - caml_failwith(e.what()); // maybe raise an OCaml exception here ?? - } + caml_failwith(e.what()); // maybe raise an OCaml exception here ?? } - - return Val_unit; } - value STUB_RDST_bytecode(value *argv, int argn) - { - return STUB_RDST(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); - } -} // extern "C" + return Val_unit; +} + +value STUB_RDST_bytecode(value *argv, int argn) +{ + return STUB_RDST(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); +} #endif // Treal