Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c46b32e
more null-element fixes for Array<ToT> ops
evaleev Mar 12, 2025
1c4672e
yet more null-element fixes for Array<ToT> ops
evaleev Mar 13, 2025
322525c
fixup clone_or_cast to handle non-range tensors, this should fix the …
evaleev Mar 13, 2025
1290ca0
bump BTAS tag to pull in https://github.com/ValeevGroup/BTAS/pull/183
evaleev Mar 13, 2025
1efccd5
amend clone_or_cast to avoid static_assert(false)
evaleev Mar 14, 2025
53d7703
support for non-char characters in printing
evaleev Mar 14, 2025
ea54acb
Tensor printing structured like curly numpy
evaleev Mar 14, 2025
184a7d7
can't test printing to wcout since boost does not redirect it
evaleev Mar 14, 2025
867e0b0
support for printing Array<ToT> with nbatch>1
evaleev Mar 14, 2025
ba0035a
revert https://github.com/ValeevGroup/tiledarray/commit/c9cc69ffa0958…
evaleev Mar 14, 2025
0b6582f
[cmake] reorg to restore .h/.cpp separation
evaleev Mar 16, 2025
3db8736
introduced to_string(Range) to help with debugging if debugger does n…
evaleev Mar 16, 2025
a061e7d
[unit] tensor_suite/anatomy tests size of Tensor
evaleev Mar 16, 2025
c056cb2
Tensor<T,Allocator>: Allocator must be default constructible
evaleev Mar 16, 2025
b77c7eb
Tensor<T>::clone: optimize for rvalues
evaleev Mar 17, 2025
4578704
[unit] tensor_suite/print: test printing to narrow and wide streams
evaleev Mar 17, 2025
a1c8bf2
cont_engine.h: cosmetic touches
evaleev Mar 17, 2025
0e9bdb8
Tile: introduced use_count and reset + dox cleanup
evaleev Mar 17, 2025
6afad42
to detect some (NOT ALL) deep vs. shallow copy mistakes configure wit…
evaleev Mar 17, 2025
61e98e0
Tensor: cleanup to make it obvious that a copy of empty tensor is a "…
evaleev Mar 17, 2025
523c1ad
Tensor::subt_to(right,...): early exit for empty right
evaleev Mar 17, 2025
e4538a3
detail::has_total_size duplicted detail::has_member_function_total_si…
evaleev Mar 17, 2025
b5f0810
reverts https://github.com/ValeevGroup/tiledarray/commit/bea7b7419d45…
evaleev Mar 17, 2025
62dbd60
reorg https://github.com/ValeevGroup/tiledarray/commit/bea7b7419d454c…
evaleev Mar 17, 2025
89bc0e5
typos
evaleev Mar 17, 2025
129a96d
support TA_ENABLE_TILE_OPS_LOGGING when nbatch > 1
evaleev Mar 17, 2025
b7bb7e1
BinaryWrapper: can move args that were cast
evaleev Mar 17, 2025
3c17e8a
move tensor ostream operators to from tensor.h to tensor/operators.h
evaleev Mar 17, 2025
53271ee
more member function type traits for tile concept
evaleev Mar 17, 2025
a1768c5
introduced detail::is_tile_v trait to detect Tile<>
evaleev Mar 17, 2025
730d887
generic and Tile<>-specific nonintrusive mutating ops (_to, inplace_)…
evaleev Mar 17, 2025
0f3afd8
rvalue-optimize inplace tile_ops
evaleev Mar 17, 2025
ecf2e2e
tiny move optimizations in BinaryWrapper
evaleev Mar 17, 2025
b166a48
set minimum Boost version to 1.81, to match the rest of the VRG stack
evaleev Mar 17, 2025
ee2ff1f
fixed btas and kronecker delta Tile API
evaleev Mar 17, 2025
71a51e9
[ci] bump to Ubuntu24/gcc14
evaleev Mar 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ jobs:
strategy:
fail-fast: false
matrix:
os : [ macos-latest, ubuntu-22.04 ]
os : [ macos-latest, ubuntu-24.04 ]
build_type : [ Release, Debug ]
task_backend: [ Pthreads, PaRSEC ]
include:
- os: ubuntu-22.04
cc: /usr/bin/gcc-12
cxx: /usr/bin/g++-12
- os: ubuntu-24.04
cc: /usr/bin/gcc-14
cxx: /usr/bin/g++-14
- os: macos-latest
cc: clang
cxx: clang++
Expand All @@ -36,12 +36,12 @@ jobs:
-DMADNESS_TASK_BACKEND=${{ matrix.task_backend }}
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-DMPIEXEC_PREFLAGS='--bind-to;none;--allow-run-as-root'
-DCMAKE_PREFIX_PATH="/usr/local/opt/bison;/usr/local/opt/scalapack"
-DCMAKE_PREFIX_PATH="/usr/local/opt/bison;/usr/local/opt/scalapack;/usr/local/opt/boost"
-DTA_ASSERT_POLICY=TA_ASSERT_THROW
-DENABLE_SCALAPACK=ON

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Host system info
shell: bash
Expand All @@ -55,19 +55,28 @@ jobs:
echo "MPIEXEC=/opt/homebrew/bin/mpiexec" >> $GITHUB_ENV

- name: Install prerequisites Ubuntu packages
if: ${{ matrix.os == 'ubuntu-22.04' }}
if: ${{ matrix.os == 'ubuntu-24.04' }}
run: |
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main"
sudo apt-get update
sudo apt-get -y install ninja-build g++-12 liblapack-dev libboost-dev libboost-serialization-dev libboost-random-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache flex bison libscalapack-mpi-dev cmake doxygen
sudo apt-get -y install ninja-build g++-14 liblapack-dev libboost-dev libboost-serialization-dev libboost-random-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache flex bison libscalapack-mpi-dev cmake doxygen
sudo ln -s /usr/lib/x86_64-linux-gnu/libscalapack-openmpi.so /usr/lib/x86_64-linux-gnu/libscalapack.so
echo "MPIEXEC=/usr/bin/mpiexec" >> $GITHUB_ENV

- name: Setup ccache
uses: hendrikmuhs/[email protected]
- name: Prepare ccache timestamp
id: ccache_cache_timestamp
shell: cmake -P {0}
run: |
string(TIMESTAMP current_date "%Y-%m-%d-%H;%M;%S" UTC)
message("::set-output name=timestamp::${current_date}")
- name: Setup ccache cache files
uses: actions/cache@v4
with:
key: ccache-${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.task_backend }}
path: ${{github.workspace}}/build/.ccache
key: ${{ matrix.config.name }}-ccache-${{ steps.ccache_cache_timestamp.outputs.timestamp }}
restore-keys: |
${{ matrix.config.name }}-ccache-

- name: "Configure build: ${{ env.BUILD_CONFIG }}"
shell: bash
Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ add_feature_info(TENSOR_MEM_TRACE TA_TENSOR_MEM_TRACE "instrumented tracing of T
option(TA_TENSOR_MEM_PROFILE "Turn on instrumented profiling of TA::Tensor memory use" ${TA_TENSOR_MEM_TRACE})
add_feature_info(TENSOR_MEM_PROFILE TA_TENSOR_MEM_PROFILE "instrumented profiling of TA::Tensor memory use")

option(TA_TENSOR_ASSERT_NO_MUTABLE_OPS_WHILE_SHARED "Turn on TA_ASSERT that no mutable operations occur on TA::{Tensor,Tile} objects that share data" OFF)
add_feature_info(TENSOR_ASSERT_NO_MUTABLE_OPS_WHILE_SHARED TA_TENSOR_ASSERT_NO_MUTABLE_OPS_WHILE_SHARED "TA_ASSERT that no mutable operations occur on TA::{Tensor,Tile} objects that share data")

option(TA_EXPERT "TiledArray Expert mode: disables automatically downloading or building dependencies" OFF)

option(TA_SIGNED_1INDEX_TYPE "Enables the use of signed 1-index coordinate type (OFF in 1.0.0-alpha.2 and older)" ON)
Expand Down
6 changes: 3 additions & 3 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ Both methods are supported. However, for most users we _strongly_ recommend to b
- [CMake](https://cmake.org/), version 3.15 or higher; if {CUDA,HIP} support is needed, CMake {3.18,3.21} or higher is required.
- [Git](https://git-scm.com/) 1.8 or later (required to obtain TiledArray and MADNESS source code from GitHub)
- [Eigen](http://eigen.tuxfamily.org/), version 3.3.5 or higher; if CUDA is enabled then 3.3.7 is required (will be downloaded automatically, if missing)
- [Boost libraries](www.boost.org/), version 1.59 or higher (will be downloaded automatically, if missing). The following principal Boost components are used:
- [Boost libraries](www.boost.org/), version 1.81 or higher (will be downloaded automatically, if missing). The following principal Boost components are used:
- Boost.Iterator: header-only
- Boost.Container: header-only
- Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing*
- Boost.Range: header-only, *only used for unit testing*
- [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later.
- [BTAS](http://github.com/ValeevGroup/BTAS), tag 1cfcb12647c768ccd83b098c64cda723e1275e49 . If usable BTAS installation is not found, TiledArray will download and compile
- [BTAS](http://github.com/ValeevGroup/BTAS), tag 62d57d9b1e0c733b4b547bc9cfdd07047159dbca . If usable BTAS installation is not found, TiledArray will download and compile
BTAS from source. *This is the recommended way to compile BTAS for all users*.
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag ef97ad1f0080da04f9592f03185c1a331cd5e001 .
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag bd84a52766ab497dedc2f15f2162fb0eb7ec4653 .
Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray.
If usable MADNESS installation is not found, TiledArray will download and compile
MADNESS from source. *This is the recommended way to compile MADNESS for all users*.
Expand Down
15 changes: 15 additions & 0 deletions external/boost.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# -*- mode: cmake -*-

# update the Boost version that we can tolerate
if (NOT DEFINED Boost_OLDEST_BOOST_VERSION)
set(Boost_OLDEST_BOOST_VERSION ${TA_OLDEST_BOOST_VERSION})
else()
if (${Boost_OLDEST_BOOST_VERSION} VERSION_LESS ${TA_OLDEST_BOOST_VERSION})
if (DEFINED CACHE{Boost_OLDEST_BOOST_VERSION})
set(Boost_OLDEST_BOOST_VERSION "${TA_OLDEST_BOOST_VERSION}" CACHE STRING "Oldest Boost version to use" FORCE)
else()
set(Boost_OLDEST_BOOST_VERSION ${TA_OLDEST_BOOST_VERSION})
endif()
endif()
endif()

# Boost can be discovered by every (sub)package but only the top package can *build* it ...
# in either case must declare the components used by TA
set(required_components
Expand Down
8 changes: 6 additions & 2 deletions external/versions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG ef97ad1f0080da04f9592f03185c1a331cd5e001)
set(TA_TRACKED_MADNESS_VERSION 0.10.1)
set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)

set(TA_TRACKED_BTAS_TAG 1cfcb12647c768ccd83b098c64cda723e1275e49)
set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a)
set(TA_TRACKED_BTAS_TAG 62d57d9b1e0c733b4b547bc9cfdd07047159dbca)
set(TA_TRACKED_BTAS_PREVIOUS_TAG 1cfcb12647c768ccd83b098c64cda723e1275e49)

set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece)
set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83)
Expand All @@ -34,3 +34,7 @@ set(TA_TRACKED_RANGEV3_PREVIOUS_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864)
set(TA_TRACKED_TTG_URL https://github.com/TESSEorg/ttg)
set(TA_TRACKED_TTG_TAG 3fe4a06dbf4b05091269488aab38223da1f8cb8e)
set(TA_TRACKED_TTG_PREVIOUS_TAG 26da9b40872660b864794658d4fdeee1a95cb4d6)

# oldest Boost we can tolerate ... old is fine but if Boost is missing build it requires something much younger
# SeQuant requires at least 1.81, so go with that
set(TA_OLDEST_BOOST_VERSION 1.81)
50 changes: 29 additions & 21 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ TiledArray/expressions/index_list.h
TiledArray/external/btas.h
TiledArray/external/madness.h
TiledArray/external/umpire.h
TiledArray/host/env.cpp
TiledArray/host/env.h
TiledArray/math/blas.h
TiledArray/math/gemm_helper.h
Expand Down Expand Up @@ -162,6 +161,8 @@ TiledArray/tensor/complex.h
TiledArray/tensor/kernels.h
TiledArray/tensor/operators.h
TiledArray/tensor/permute.h
TiledArray/tensor/print.ipp
TiledArray/tensor/print.h
TiledArray/tensor/shift_wrapper.h
TiledArray/tensor/tensor.h
TiledArray/tensor/tensor_interface.h
Expand Down Expand Up @@ -204,11 +205,32 @@ TiledArray/util/time.h
TiledArray/util/vector.h
)

set(TILEDARRAY_SOURCE_FILES
TiledArray/array_impl.cpp
TiledArray/dist_array.cpp
TiledArray/range.cpp
TiledArray/sparse_shape.cpp
TiledArray/tensor_impl.cpp
TiledArray/tiledarray.cpp
TiledArray/version.cpp
TiledArray/einsum/index.cpp
TiledArray/expressions/permopt.cpp
TiledArray/host/env.cpp
TiledArray/math/linalg/basic.cpp
TiledArray/math/linalg/rank-local.cpp
TiledArray/tensor/print.cpp
TiledArray/tensor/tensor.cpp
TiledArray/util/backtrace.cpp
TiledArray/util/bug.cpp
TiledArray/util/ptr_registry.cpp
TiledArray/util/random.cpp
TiledArray/util/threads.cpp
)

if(TILEDARRAY_HAS_HIP OR TILEDARRAY_HAS_CUDA)
list(APPEND TILEDARRAY_HEADER_FILES
TiledArray/external/device.h
TiledArray/external/librett.h
TiledArray/device/blas.cpp
TiledArray/device/blas.h
TiledArray/device/btas.h
TiledArray/device/btas_um_tensor.h
Expand All @@ -219,32 +241,18 @@ if(TILEDARRAY_HAS_HIP OR TILEDARRAY_HAS_CUDA)
TiledArray/device/kernel/thrust/reduce_kernel.h
TiledArray/device/platform.h
TiledArray/device/thrust.h
TiledArray/device/um_storage.h)
TiledArray/device/um_storage.h
)
list(APPEND TILEDARRAY_SOURCE_FILES
TiledArray/device/blas.cpp
)
if(TILEDARRAY_HAS_CUDA)
list(APPEND TILEDARRAY_HEADER_FILES
TiledArray/external/cuda.h
TiledArray/device/cpu_cuda_vector.h)
endif(TILEDARRAY_HAS_CUDA)
endif(TILEDARRAY_HAS_HIP OR TILEDARRAY_HAS_CUDA)

set(TILEDARRAY_SOURCE_FILES
TiledArray/tiledarray.cpp
TiledArray/tensor/tensor.cpp
TiledArray/sparse_shape.cpp
TiledArray/tensor_impl.cpp
TiledArray/array_impl.cpp
TiledArray/dist_array.cpp
TiledArray/version.cpp
TiledArray/einsum/index.cpp
TiledArray/expressions/permopt.cpp
TiledArray/math/linalg/basic.cpp
TiledArray/math/linalg/rank-local.cpp
TiledArray/util/backtrace.cpp
TiledArray/util/bug.cpp
TiledArray/util/ptr_registry.cpp
TiledArray/util/random.cpp
TiledArray/util/threads.cpp
)
# feed TILEDARRAY_GIT_REVISION and TILEDARRAY_GIT_DESCRIPTION to TiledArray/version.cpp only to avoid recompiling everything
set_source_files_properties(
TiledArray/version.cpp
Expand Down
11 changes: 7 additions & 4 deletions src/TiledArray/array_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,9 @@ bool operator!=(const TileReference<Impl>& a, const TileReference<Impl>& b) {
}

/// redirect operator to std::ostream for TileReference objects
template <typename Impl>
std::ostream& operator<<(std::ostream& os, const TileReference<Impl>& a) {
template <typename Char, typename CharTraits, typename Impl>
std::basic_ostream<Char, CharTraits>& operator<<(
std::basic_ostream<Char, CharTraits>& os, const TileReference<Impl>& a) {
os << a.get();
return os;
}
Expand Down Expand Up @@ -192,8 +193,10 @@ bool operator!=(const TileConstReference<Impl>& a,
}

/// redirect operator to std::ostream for TileConstReference objects
template <typename Impl>
std::ostream& operator<<(std::ostream& os, const TileConstReference<Impl>& a) {
template <typename Char, typename CharTraits, typename Impl>
std::basic_ostream<Char, CharTraits>& operator<<(
std::basic_ostream<Char, CharTraits>& os,
const TileConstReference<Impl>& a) {
os << a.get();
return os;
}
Expand Down
5 changes: 3 additions & 2 deletions src/TiledArray/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,9 @@ Bitset<Block> operator^(Bitset<Block> left, const Bitset<Block>& right) {
return left;
}

template <typename Block>
std::ostream& operator<<(std::ostream& os, const Bitset<Block>& bitset) {
template <typename Char, typename CharTraits, typename Block>
std::basic_ostream<Char, CharTraits>& operator<<(
std::basic_ostream<Char, CharTraits>& os, const Bitset<Block>& bitset) {
os << std::hex;
for (long i = bitset.num_blocks() - 1l; i >= 0l; --i)
os << std::setfill('0') << std::setw(sizeof(Block) * 2) << bitset.get()[i]
Expand Down
3 changes: 3 additions & 0 deletions src/TiledArray/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@
/* Is TA::Tensor memory tracing enabled? */
#cmakedefine TA_TENSOR_MEM_TRACE 1

/* TA_ASSERT that no mutable operations occur on TA::{Tensor,Tile} objects that share data? */
#cmakedefine TA_TENSOR_ASSERT_NO_MUTABLE_OPS_WHILE_SHARED 1

/* Is TTG available? */
#cmakedefine TILEDARRAY_HAS_TTG 1

Expand Down
4 changes: 3 additions & 1 deletion src/TiledArray/dense_shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,9 @@ constexpr inline bool is_replicated(World& world, const DenseShape& t) {
/// \param os The output stream
/// \param shape the DenseShape object
/// \return A reference to the output stream
inline std::ostream& operator<<(std::ostream& os, const DenseShape& shape) {
template <typename Char, typename CharTraits>
inline std::basic_ostream<Char, CharTraits>& operator<<(
std::basic_ostream<Char, CharTraits>& os, const DenseShape& shape) {
os << "DenseShape:" << std::endl;
return os;
}
Expand Down
7 changes: 4 additions & 3 deletions src/TiledArray/dist_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -1776,9 +1776,10 @@ extern template class DistArray<Tensor<std::complex<float>>, SparsePolicy>;
/// \param a The array to be put in the output stream
/// \return A reference to the output stream
/// \note this is a collective operation
template <typename Tile, typename Policy>
inline std::ostream& operator<<(std::ostream& os,
const DistArray<Tile, Policy>& a) {
template <typename Char, typename CharTraits, typename Tile, typename Policy>
inline std::basic_ostream<Char, CharTraits>& operator<<(
std::basic_ostream<Char, CharTraits>& os,
const DistArray<Tile, Policy>& a) {
if (a.world().rank() == 0) {
for (std::size_t i = 0; i < a.size(); ++i)
if (!a.is_zero(i)) {
Expand Down
27 changes: 12 additions & 15 deletions src/TiledArray/expressions/cont_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,9 @@ class ContEngine : public BinaryEngine<Derived> {
outer_size(left_indices_), outer_size(right_indices_),
(!implicit_permute_outer_ ? std::move(outer_perm) : Permutation{}));
} else {

auto make_total_perm = [this]() -> BipartitePermutation {
if (this->product_type() != TensorProduct::Contraction
|| this->implicit_permute_inner_)
if (this->product_type() != TensorProduct::Contraction ||
this->implicit_permute_inner_)
return this->implicit_permute_outer_
? BipartitePermutation()
: BipartitePermutation(outer(this->perm_));
Expand All @@ -299,11 +298,9 @@ class ContEngine : public BinaryEngine<Derived> {
auto total_perm = make_total_perm();

// factor_ is absorbed into inner_tile_nonreturn_op_
op_ = op_type(
left_op, right_op, scalar_type(1), outer_size(indices_),
outer_size(left_indices_), outer_size(right_indices_),
total_perm,
this->element_nonreturn_op_);
op_ = op_type(left_op, right_op, scalar_type(1), outer_size(indices_),
outer_size(left_indices_), outer_size(right_indices_),
total_perm, this->element_nonreturn_op_);
}
trange_ = ContEngine_::make_trange(outer_perm);
shape_ = ContEngine_::make_shape(outer_perm);
Expand All @@ -314,10 +311,9 @@ class ContEngine : public BinaryEngine<Derived> {
op_ = op_type(left_op, right_op, factor_, outer_size(indices_),
outer_size(left_indices_), outer_size(right_indices_));
} else {

auto make_total_perm = [this]() -> BipartitePermutation {
if (this->product_type() != TensorProduct::Contraction
|| this->implicit_permute_inner_)
if (this->product_type() != TensorProduct::Contraction ||
this->implicit_permute_inner_)
return {};

// Here,
Expand Down Expand Up @@ -547,7 +543,7 @@ class ContEngine : public BinaryEngine<Derived> {
inner_size(this->right_indices_));
this->element_nonreturn_op_ =
[contrreduce_op, permute_inner = this->product_type() !=
TensorProduct::Contraction](
TensorProduct::Contraction](
result_tile_element_type& result,
const left_tile_element_type& left,
const right_tile_element_type& right) {
Expand Down Expand Up @@ -582,11 +578,11 @@ class ContEngine : public BinaryEngine<Derived> {
[mult_op, outer_prod](result_tile_element_type& result,
const left_tile_element_type& left,
const right_tile_element_type& right) {
TA_ASSERT(outer_prod == TensorProduct::Hadamard ||
outer_prod == TensorProduct::Contraction);
if (outer_prod == TensorProduct::Hadamard)
result = mult_op(left, right);
else {
TA_ASSERT(outer_prod == TensorProduct::Hadamard ||
outer_prod == TensorProduct::Contraction);
else { // outer_prod == TensorProduct::Contraction
// there is currently no fused MultAdd ternary Op, only Add
// and Mult thus implement this as 2 separate steps
// TODO optimize by implementing (ternary) MultAdd
Expand Down Expand Up @@ -677,6 +673,7 @@ class ContEngine : public BinaryEngine<Derived> {
const left_tile_element_type& left,
const right_tile_element_type& right) {
if (outer_prod == TensorProduct::Contraction) {
// TODO implement X-permuting AXPY
if (empty(result))
result = scal_op(left, right);
else {
Expand Down
4 changes: 2 additions & 2 deletions src/TiledArray/expressions/expr_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ class ExprTraceTarget {
/// \param os The output stream for the expression trace
/// \param tsr The tensor that will be the target of the expression
/// \return The expression trace object
template <typename A, bool Alias>
inline ExprTraceTarget operator<<(std::ostream& os,
template <typename Char, typename CharTraits, typename A, bool Alias>
inline ExprTraceTarget operator<<(std::basic_ostream<Char, CharTraits>& os,
const TsrExpr<A, Alias>& tsr) {
return ExprTraceTarget(os, tsr.annotation());
}
Expand Down
Loading
Loading