diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index b691b96996d0..1747a9fe0efd 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -45,7 +45,7 @@ jobs:
       uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 # v3.28.6
+      uses: github/codeql-action/init@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8
       with:
         languages: ${{ matrix.language }}
         build-mode: ${{ matrix.build-mode }}
@@ -108,6 +108,6 @@ jobs:
           ninja -j 16
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 # v3.28.6
+      uses: github/codeql-action/analyze@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8
       with:
         category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 790aee6fcded..06ace5e1b065 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -66,6 +66,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@17a820bf2e43b47be2c72b39cc905417bc1ab6d0 # v3.28.6
+        uses: github/codeql-action/upload-sarif@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8
         with:
           sarif_file: results.sarif
diff --git a/cmake/ProjectCompilerPostConfig.cmake b/cmake/ProjectCompilerPostConfig.cmake
index 0b73eba8f904..44647d503632 100644
--- a/cmake/ProjectCompilerPostConfig.cmake
+++ b/cmake/ProjectCompilerPostConfig.cmake
@@ -76,6 +76,7 @@ set(promoted_warnings
     sequence-point
     sign-compare
     strict-aliasing
+    switch
     type-limits
     uninitialized
     unused-function
@@ -95,4 +96,6 @@ elseif("${Trilinos_WARNINGS_MODE}" STREQUAL "ERROR")
     disable_warnings_for_deprecated_packages()
 endif()
 
-disable_warnings("${explicitly_disabled_warnings}")
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    disable_warnings("${explicitly_disabled_warnings}")
+endif()
diff --git a/packages/amesos2/example/SimpleSolve_File.cpp b/packages/amesos2/example/SimpleSolve_File.cpp
index 347908857fed..e3a13418ddf0 100644
--- a/packages/amesos2/example/SimpleSolve_File.cpp
+++ b/packages/amesos2/example/SimpleSolve_File.cpp
@@ -64,6 +64,7 @@ int main(int argc, char *argv[]) {
 
   Teuchos::oblackholestream blackhole;
 
+  bool multi_solve     = false;
   bool printMatrix     = false;
   bool printSolution   = false;
   bool checkSolution   = false;
@@ -83,6 +84,7 @@ int main(int argc, char *argv[]) {
   cmdp.setOption("rhs_filename",&rhs_filename,"Filename for Matrix-Market right-hand-side.");
   cmdp.setOption("solvername",&solvername,"Name of solver.");
   cmdp.setOption("xml_filename",&xml_filename,"XML Filename for Solver parameters.");
+  cmdp.setOption("multi-solve","no-multi-solve",&multi_solve,"Test multiple numFacto & solve per symbolic.");
   cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it.");
   cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve.");
   cmdp.setOption("check-solution","no-check-solution",&checkSolution,"Check solution vector after solve.");
@@ -104,7 +106,7 @@ int main(int argc, char *argv[]) {
   const size_t numVectors = 1;
 
   // Read matrix
-  RCP<const MAT> A = Tpetra::MatrixMarket::Reader<MAT>::readSparseFile(mat_filename, comm);
+  RCP<MAT> A = Tpetra::MatrixMarket::Reader<MAT>::readSparseFile(mat_filename, comm);
 
   // get the map (Range Map used for both X & B)
   RCP<const Map<LO,GO> > rngmap = A->getRangeMap();
@@ -217,11 +219,34 @@ int main(int argc, char *argv[]) {
     stackedTimer = rcp(new Teuchos::StackedTimer("Amesos2 SimpleSolve-File"));
     Teuchos::TimeMonitor::setStackedTimer(stackedTimer);
   }
-  solver->symbolicFactorization().numericFactorization().solve();
+  solver->symbolicFactorization(); comm->barrier();
+  solver->numericFactorization();  comm->barrier();
+  solver->solve(); comm->barrier();
+  if (multi_solve) {
+    {
+      // change (1,1) diagonal entry value
+      Teuchos::Array<GO> gblColIndsBuf (1);
+      Teuchos::Array<Scalar> valsBuf (1);
+      valsBuf[0] = 7.0;
+      gblColIndsBuf[0] = 0;
+
+      Teuchos::ArrayView<GO> gblColInds = gblColIndsBuf.view (0, 1);
+      Teuchos::ArrayView<Scalar> vals = valsBuf.view (0, 1);
+
+      A->resumeFill();
+      A->replaceGlobalValues (0, gblColInds, vals);
+      A->fillComplete();
+    }
+    // perform numeric for the second time
+    solver->numericFactorization();
+
+    // chage RHS, and re-do solve
+    B->putScalar(10);
+    solver->solve();
+  }
   if(useStackedTimer) {
     stackedTimer->stopBaseTimer();
   }
-
   if( printSolution ){
     // Print the solution
     RCP<Map<LO,GO> > root_map
diff --git a/packages/amesos2/src/Amesos2_KLU2_def.hpp b/packages/amesos2/src/Amesos2_KLU2_def.hpp
index 1244a262230f..f75488cb3d16 100644
--- a/packages/amesos2/src/Amesos2_KLU2_def.hpp
+++ b/packages/amesos2/src/Amesos2_KLU2_def.hpp
@@ -453,7 +453,7 @@ KLU2<Matrix,Vector>::loadA_impl(EPhase current_phase)
         Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
     }
 
-    local_ordinal_type nnz_ret = 0;
+    local_ordinal_type nnz_ret = -1;
     bool gather_supported = (this->matrixA_->getComm()->getSize() > 1 && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value));
     {
 #ifdef HAVE_AMESOS2_TIMERS
diff --git a/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp b/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp
index 9b2a64cee1b9..a3b495f04329 100644
--- a/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp
+++ b/packages/amesos2/src/Amesos2_ShyLUBasker_def.hpp
@@ -587,7 +587,7 @@ ShyLUBasker<Matrix,Vector>::loadA_impl(EPhase current_phase)
       Kokkos::resize(colptr_view_, this->globalNumCols_ + 1); //this will be wrong for case of gapped col ids, e.g. 0,2,4,9; num_cols = 10 ([0,10)) but num GIDs = 4...
     }
 
-    local_ordinal_type nnz_ret = 0;
+    local_ordinal_type nnz_ret = -1;
     bool gather_supported = (this->matrixA_->getComm()->getSize() > 1 && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value));
     {
     #ifdef HAVE_AMESOS2_TIMERS
diff --git a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp
index 3a3fa27547f4..98e5610387e1 100644
--- a/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp
+++ b/packages/amesos2/src/Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp
@@ -98,16 +98,18 @@ namespace Amesos2 {
                     const EPhase current_phase) const
     {
       typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type;
-      auto rowMap = this->mat_->getRowMap();
-      auto colMap = this->mat_->getColMap();
-      auto rowComm = rowMap->getComm();
-      auto colComm = colMap->getComm();
-
+      using Teuchos::RCP;
+      using Teuchos::rcp;
 #ifdef HAVE_AMESOS2_TIMERS
       auto reindexTimer = Teuchos::TimeMonitor::getNewTimer("Time to re-index matrix gids");
       Teuchos::TimeMonitor ReindexTimer(*reindexTimer);
 #endif
 
+      auto rowMap = this->mat_->getRowMap();
+      auto colMap = this->mat_->getColMap();
+      auto rowComm = rowMap->getComm();
+      auto colComm = colMap->getComm();
+
       GlobalOrdinal indexBase = rowMap->getIndexBase();
       GlobalOrdinal numDoFs = this->mat_->getGlobalNumRows();
       LocalOrdinal nRows = this->mat_->getLocalNumRows();
@@ -120,12 +122,17 @@ namespace Amesos2 {
         global_ordinal_t frow = tmpMap->getMinGlobalIndex();
 
         // Create new GID list for RowMap
-        Kokkos::View<global_ordinal_t*, HostExecSpaceType> rowIndexList ("indexList", nRows);
+        Kokkos::View<global_ordinal_t*, HostExecSpaceType> rowIndexList ("rowIndexList", nRows);
         for (local_ordinal_t k = 0; k < nRows; k++) {
           rowIndexList(k) = frow+k; // based on index-base of rowMap
         }
         // Create new GID list for ColMap
-        Kokkos::View<global_ordinal_t*, HostExecSpaceType> colIndexList ("indexList", nCols);
+        Kokkos::View<global_ordinal_t*, HostExecSpaceType> colIndexList ("colIndexList", nCols);
+        // initialize to catch col GIDs that are not in row GIDs
+        // they will be all assigned to (n+1)th columns
+        for (local_ordinal_t k = 0; k < nCols; k++) {
+          colIndexList(k) = numDoFs+indexBase;
+        }
         typedef Tpetra::MultiVector<global_ordinal_t,
                                     local_ordinal_t,
                                     global_ordinal_t,
@@ -307,9 +314,11 @@ namespace Amesos2 {
                 recvDispls(p+1) = recvDispls(p) + recvCounts(p);
               }
             }
-            // -- convert to global colids & convert to 0-base
+            // -- convert to global colids & ** convert to base-zero **
             KV_GO lclColind_ ("localColind_", lclColind.extent(0));
-            for (int i = 0; i < int(lclColind.extent(0)); i++) lclColind_(i) = (colMap->getGlobalElement((lclColind(i))) - colIndexBase);
+            for (int i = 0; i < int(lclColind.extent(0)); i++) {
+              lclColind_(i) = (colMap->getGlobalElement((lclColind(i))) - colIndexBase);
+            }
             if (column_major || need_to_perm) {
               Kokkos::resize(indices_t, indices.extent(0));
               Teuchos::gatherv<int, LocalOrdinal> (lclColind_.data(), lclColind_.extent(0), indices_t.data(),
@@ -326,6 +335,7 @@ namespace Amesos2 {
             Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose index)");
             Teuchos::TimeMonitor GatherTimer(*gatherTime);
 #endif
+            // (note: column idexes are now in base-0)
             if (column_major) {
               // Map to transpose
               Kokkos::resize(transpose_map, ret);
@@ -347,9 +357,14 @@ namespace Amesos2 {
                 int i = perm_l2g(row);
                 for (int k=pointers_t(i); k<pointers_t(i+1); k++) {
                   int col = indices_t(k);
-                  transpose_map(k) = pointers(1+col);
-                  indices(pointers(1+col)) = row;
-                  pointers(1+col) ++;
+                  if (col < nRows) {
+                    transpose_map(k) = pointers(1+col);
+                    indices(pointers(1+col)) = row;
+                    pointers(1+col) ++;
+                  } else {
+                    // extra columns
+                    transpose_map(k) = -1;
+                  }
                 }
               }
             } else if (need_to_perm) {
@@ -366,9 +381,13 @@ namespace Amesos2 {
                 int row = perm_g2l(i);
                 for (int k=pointers_t(i); k<pointers_t(i+1); k++) {
                   int col = indices_t(k);
-                  transpose_map(k) = pointers(1+row);
-                  indices(pointers(1+row)) = col;
-                  pointers(1+row) ++;
+                  if (col < nRows) {
+                    transpose_map(k) = pointers(1+row);
+                    indices(pointers(1+row)) = col;
+                    pointers(1+row) ++;
+                  } else {
+                    transpose_map(k) = -1;
+                  }
                 }
               }
             } else {
@@ -409,7 +428,9 @@ namespace Amesos2 {
 #endif
             if (transpose_map.extent(0) > 0) {
               for (int k=0; k<ret; k++) {
-                nzvals(transpose_map(k)) = nzvals_t(k);
+                if (transpose_map(k) >= 0) {
+                  nzvals(transpose_map(k)) = nzvals_t(k);
+                }
               }
             }
           }
diff --git a/packages/amesos2/test/solvers/CMakeLists.txt b/packages/amesos2/test/solvers/CMakeLists.txt
index c7b1b2300346..17a7d73e66ff 100644
--- a/packages/amesos2/test/solvers/CMakeLists.txt
+++ b/packages/amesos2/test/solvers/CMakeLists.txt
@@ -390,14 +390,22 @@ IF (${PACKAGE_NAME}_ENABLE_ShyLU_NodeBasker)
     STANDARD_PASS_OUTPUT
     )
 
-    ##Copy shylubasker_test.xml, but do not test
-    ##depends on not included matrices
+    ##Copy shylubasker_test.xml
     TRIBITS_COPY_FILES_TO_BINARY_DIR(SolverTestCopyShyLUBaskerFiles
     SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
     SOURCE_FILES shylubasker_test.xml
     EXEDEPS Solver_Test
     )
 
+    ##Test only if ShyLU-Basker is enabled
+    TRIBITS_ADD_TEST(
+    Solver_Test
+    NAME ShyLUBasker_Solver_Test
+    ARGS "--xml-params=shylubasker_test.xml --filedir=${CMAKE_CURRENT_BINARY_DIR}/../matrices/ --multiple-solves --refactor"
+    STANDARD_PASS_OUTPUT
+    NUM_MPI_PROCS 2
+    COMM serial mpi
+    )
 ENDIF()
 
 ##### MUMPS Tests ####
diff --git a/packages/amesos2/test/solvers/shylubasker_test.xml b/packages/amesos2/test/solvers/shylubasker_test.xml
index 5aab6ec3770d..601f268fb2ad 100644
--- a/packages/amesos2/test/solvers/shylubasker_test.xml
+++ b/packages/amesos2/test/solvers/shylubasker_test.xml
@@ -6,7 +6,7 @@
 
   <!-- Wathen120 is a bad performance matrix that is symm with nice supernodes -->
   <ParameterList name="wathen120.mtx">
-    <ParameterList name="Basker">
+    <ParameterList name="ShyLUBasker">
       
       <!-- Test Epetra objects first -->
       <ParameterList name="epetra">
@@ -221,13 +221,13 @@
 	   </ParameterList> <!-- end solver_run_params -->
 	</ParameterList> <!-- end run-int-long-p#-default -->
       </ParameterList> <!-- end tpetra -->
-    </ParameterList> <!-- end Basker -->
+    </ParameterList> <!-- end ShyLUBasker -->
   </ParameterList> <!-- end wathen120 -->
 
 
   <!-- Asic_680ks is a key matrix with medium performance first btf blk small -->
   <ParameterList name="asic_680ks.mtx">
-    <ParameterList name="Basker">
+    <ParameterList name="ShyLUBasker">
       
       <!-- Test Epetra objects first -->
       <ParameterList name="epetra">
@@ -442,12 +442,12 @@
 	   </ParameterList> <!-- end solver_run_params -->
 	</ParameterList> <!-- end run-int-long-p#-default -->
       </ParameterList> <!-- end tpetra -->
-    </ParameterList> <!-- end Basker -->
+    </ParameterList> <!-- end ShyLUBasker -->
   </ParameterList> <!-- end asic680ks -->
 
   <!-- G2 Very bad matrix sym large supernodes -->
   <ParameterList name="G2_circuit.mtx">
-    <ParameterList name="Basker">
+    <ParameterList name="ShyLUBasker">
       
       <!-- Test Epetra objects first -->
       <ParameterList name="epetra">
@@ -662,12 +662,12 @@
 	   </ParameterList> <!-- end solver_run_params -->
 	</ParameterList> <!-- end run-int-long-p#-default -->
       </ParameterList> <!-- end tpetra -->
-    </ParameterList> <!-- end Basker -->
+    </ParameterList> <!-- end ShyLUBasker -->
   </ParameterList> <!-- end G2_circuit -->
 
   <!-- Private Large Power Simulation Matrix, good very btf matrix -->
   <ParameterList name="power0.mtx">
-    <ParameterList name="Basker">
+    <ParameterList name="ShyLUBasker">
       
       <!-- Test Epetra objects first -->
       <ParameterList name="epetra">
@@ -882,12 +882,12 @@
 	   </ParameterList> <!-- end solver_run_params -->
 	</ParameterList> <!-- end run-int-long-p#-default -->
       </ParameterList> <!-- end tpetra -->
-    </ParameterList> <!-- end Basker -->
+    </ParameterList> <!-- end ShyLUBasker -->
   </ParameterList> <!-- end power0 -->
 
   <!-- hvdc2, interesting matrix with need for pivoting -->
   <ParameterList name="hvdc2.mtx">
-    <ParameterList name="Basker">
+    <ParameterList name="ShyLUBasker">
       
       <!-- Test Epetra objects first -->
       <ParameterList name="epetra">
@@ -1102,12 +1102,12 @@
 	   </ParameterList> <!-- end solver_run_params -->
 	</ParameterList> <!-- end run-int-long-p#-default -->
       </ParameterList> <!-- end tpetra -->
-    </ParameterList> <!-- end Basker -->
+    </ParameterList> <!-- end ShyLUBasker -->
   </ParameterList> <!-- end hvdc2 -->
 
   <!-- amesos_test_mat1 has good tests (limit to 4 threads) -->
   <ParameterList name="amesos2_test_mat1.mtx">
-    <ParameterList name="Basker">
+    <ParameterList name="ShyLUBasker">
       
       <!-- Test Epetra objects first -->
       <ParameterList name="epetra">
@@ -1272,12 +1272,12 @@
 	   </ParameterList> <!-- end solver_run_params -->
 	</ParameterList> <!-- end run-int-long-p#-default -->
       </ParameterList> <!-- end tpetra -->
-    </ParameterList> <!-- end Basker -->
+    </ParameterList> <!-- end ShyLUBasker -->
   </ParameterList> <!-- end amesos2_test_mat1 -->
 
   <!-- amesos2_test_mat4 has good tests (limit to 4 threads) -->
   <ParameterList name="amesos2_test_mat4.mtx">
-    <ParameterList name="Basker">
+    <ParameterList name="ShyLUBasker">
       
       <!-- Test Epetra objects first -->
       <ParameterList name="epetra">
@@ -1442,7 +1442,7 @@
 	   </ParameterList> <!-- end solver_run_params -->
 	</ParameterList> <!-- end run-int-long-p#-default -->
       </ParameterList> <!-- end tpetra -->
-    </ParameterList> <!-- end Basker -->
+    </ParameterList> <!-- end ShyLUBasker -->
   </ParameterList> <!-- end amesos2_test_mat4 -->
  <!-- TO BE CONT... -->
 </ParameterList> <!-- end test_parms -->
diff --git a/packages/belos/src/BelosBlockCGIter.hpp b/packages/belos/src/BelosBlockCGIter.hpp
index ec979d75623d..ac26b7cab39d 100644
--- a/packages/belos/src/BelosBlockCGIter.hpp
+++ b/packages/belos/src/BelosBlockCGIter.hpp
@@ -36,6 +36,41 @@
 
 namespace Belos {
 
+//! @name BlockCGIteration Structures
+  //@{
+
+  /** \brief Structure to contain pointers to BlockCGIteration state variables.
+   *
+   * This struct is utilized by BlockCGIteration::initialize() and BlockCGIteration::getState().
+   */
+  template <class ScalarType, class MV>
+  class BlockCGIterationState : public CGIterationStateBase<ScalarType, MV> {
+
+  public:
+    BlockCGIterationState() = default;
+
+    BlockCGIterationState(Teuchos::RCP<const MV> tmp) {
+      initialize(tmp);
+    }
+
+    virtual ~BlockCGIterationState() = default;
+
+    void initialize(Teuchos::RCP<const MV> tmp, int _numVectors) {
+      using MVT = MultiVecTraits<ScalarType, MV>;
+      this->R = MVT::Clone( *tmp, _numVectors );
+      this->Z = MVT::Clone( *tmp, _numVectors );
+      this->P = MVT::Clone( *tmp, _numVectors );
+      this->AP = MVT::Clone(*tmp, _numVectors );
+
+      CGIterationStateBase<ScalarType, MV>::initialize(tmp, _numVectors);
+    }
+
+    bool matches(Teuchos::RCP<const MV> tmp, int _numVectors=1) const {
+      return CGIterationStateBase<ScalarType, MV>::matches(tmp, _numVectors);
+    }
+
+};
+
 /// \class BlockCGIter
 /// \brief Implementation of the block preconditioned Conjugate
 ///   Gradient (CG) iteration.
@@ -69,7 +104,7 @@ class BlockCGIter : virtual public CGIteration<ScalarType, MV, OP> {
     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Stub");
   }
 
-  void initializeCG (CGIterationState<ScalarType,MV>& /* newstate */) {
+  void initializeCG (Teuchos::RCP<BlockCGIterationState<ScalarType,MV> > /* newstate */, Teuchos::RCP<MV> /* R_0 */) {
     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Stub");
   }
 
@@ -77,7 +112,11 @@ class BlockCGIter : virtual public CGIteration<ScalarType, MV, OP> {
     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Stub");
   }
 
-  CGIterationState<ScalarType,MV> getState () const {
+  Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > getState () const {
+    TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Stub");
+  }
+
+  void setState(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > state) {
     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Stub");
   }
 
@@ -118,11 +157,6 @@ class BlockCGIter : virtual public CGIteration<ScalarType, MV, OP> {
     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Stub");
   }
 
-
-private:
-  void setStateSize() {
-    TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Stub");
-  }
 };
 
 /// \brief Partial specialization for ScalarType types for which
@@ -137,10 +171,10 @@ class BlockCGIter<ScalarType, MV, OP, true> :
   //
   // Convenience typedefs
   //
-  typedef MultiVecTraits<ScalarType,MV> MVT;
-  typedef OperatorTraits<ScalarType,MV,OP> OPT;
-  typedef Teuchos::ScalarTraits<ScalarType> SCT;
-  typedef typename SCT::magnitudeType MagnitudeType;
+  using MVT = MultiVecTraits<ScalarType, MV>;
+  using OPT = OperatorTraits<ScalarType, MV, OP>;
+  using SCT = Teuchos::ScalarTraits<ScalarType>;
+  using MagnitudeType = typename SCT::magnitudeType;
 
   //! @name Constructors/Destructor
   //@{
@@ -157,7 +191,7 @@ class BlockCGIter<ScalarType, MV, OP, true> :
                Teuchos::ParameterList &params );
 
   //! Destructor.
-  virtual ~BlockCGIter() {};
+  virtual ~BlockCGIter() = default;
   //@}
 
 
@@ -192,32 +226,39 @@ class BlockCGIter<ScalarType, MV, OP, true> :
    * \note For any pointer in \c newstate which directly points to the multivectors in
    * the solver, the data is not copied.
    */
-  void initializeCG(CGIterationState<ScalarType,MV>& newstate);
+  void initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0);
 
   /*! \brief Initialize the solver with the initial vectors from the linear problem
    *  or random data.
    */
   void initialize()
   {
-    CGIterationState<ScalarType,MV> empty;
-    initializeCG(empty);
+    initializeCG(Teuchos::null, Teuchos::null);
   }
 
   /*! \brief Get the current state of the linear solver.
    *
    * The data is only valid if isInitialized() == \c true.
    *
-   * \returns A CGIterationState object containing const pointers to the current solver state.
+   * \returns A BlockCGIterationState object containing const pointers to the current solver state.
    */
-  CGIterationState<ScalarType,MV> getState() const {
-    CGIterationState<ScalarType,MV> state;
-    state.R = R_;
-    state.P = P_;
-    state.AP = AP_;
-    state.Z = Z_;
+  Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > getState() const {
+    auto state = Teuchos::rcp(new BlockCGIterationState<ScalarType,MV>());
+    state->R = R_;
+    state->P = P_;
+    state->AP = AP_;
+    state->Z = Z_;
     return state;
   }
 
+  void setState(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > state) {
+    auto s = Teuchos::rcp_dynamic_cast<BlockCGIterationState<ScalarType,MV> >(state, true);
+    R_ = s->R;
+    Z_ = s->Z;
+    P_ = s->P;
+    AP_ = s->AP;
+  }
+
   //@}
 
 
@@ -260,7 +301,7 @@ class BlockCGIter<ScalarType, MV, OP, true> :
   void setDoCondEst(bool /* val */){/*ignored*/}
 
   //! Gets the diagonal for condition estimation (NOT_IMPLEMENTED)
-  Teuchos::ArrayView<MagnitudeType> getDiag() { 
+  Teuchos::ArrayView<MagnitudeType> getDiag() {
     Teuchos::ArrayView<MagnitudeType> temp;
     return temp;
   }
@@ -276,9 +317,6 @@ class BlockCGIter<ScalarType, MV, OP, true> :
 
   //
   // Internal methods
-  //
-  //! Method for initalizing the state storage needed by block CG.
-  void setStateSize();
 
   //
   // Classes inputed through constructor that define the linear problem to be solved.
@@ -348,40 +386,6 @@ class BlockCGIter<ScalarType, MV, OP, true> :
     setBlockSize( bs );
   }
 
-  template <class ScalarType, class MV, class OP>
-  void BlockCGIter<ScalarType,MV,OP,true>::setStateSize ()
-  {
-    if (! stateStorageInitialized_) {
-      // Check if there is any multivector to clone from.
-      Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
-      Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
-      if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
-        stateStorageInitialized_ = false;
-        return;
-      }
-      else {
-        // Initialize the state storage If the subspace has not be
-        // initialized before, generate it using the LHS or RHS from
-        // lp_.
-        if (R_ == Teuchos::null || MVT::GetNumberVecs(*R_)!=blockSize_) {
-          // Get the multivector that is not null.
-          Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
-          TEUCHOS_TEST_FOR_EXCEPTION
-            (tmp == Teuchos::null,std:: invalid_argument,
-             "Belos::BlockCGIter::setStateSize: LinearProblem lacks "
-             "multivectors from which to clone.");
-          R_ = MVT::Clone (*tmp, blockSize_);
-          Z_ = MVT::Clone (*tmp, blockSize_);
-          P_ = MVT::Clone (*tmp, blockSize_);
-          AP_ = MVT::Clone (*tmp, blockSize_);
-        }
-
-        // State storage has now been initialized.
-        stateStorageInitialized_ = true;
-      }
-    }
-  }
-
   template <class ScalarType, class MV, class OP>
   void BlockCGIter<ScalarType,MV,OP,true>::setBlockSize (int blockSize)
   {
@@ -398,45 +402,40 @@ class BlockCGIter<ScalarType, MV, OP, true> :
     }
     blockSize_ = blockSize;
     initialized_ = false;
-    // Use the current blockSize_ to initialize the state storage.
-    setStateSize ();
   }
 
   template <class ScalarType, class MV, class OP>
   void BlockCGIter<ScalarType,MV,OP,true>::
-  initializeCG (CGIterationState<ScalarType,MV>& newstate)
+  initializeCG (Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0)
   {
     const char prefix[] = "Belos::BlockCGIter::initialize: ";
 
     // Initialize the state storage if it isn't already.
-    if (! stateStorageInitialized_) {
-      setStateSize();
-    }
-
-    TEUCHOS_TEST_FOR_EXCEPTION
-      (! stateStorageInitialized_, std::invalid_argument,
-       prefix << "Cannot initialize state storage!");
+    Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
+    Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
+    Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
+    TEUCHOS_ASSERT(!newstate.is_null());
+    if (!Teuchos::rcp_dynamic_cast<BlockCGIterationState<ScalarType,MV> >(newstate, true)->matches(tmp, blockSize_))
+      newstate->initialize(tmp, blockSize_);
+    setState(newstate);
 
     // NOTE:  In BlockCGIter R_, the initial residual, is required!!!
     const char errstr[] = "Specified multivectors must have a consistent "
       "length and width.";
 
-    // Create convenience variables for zero and one.
-    //const MagnitudeType zero = Teuchos::ScalarTraits<MagnitudeType>::zero(); // unused
-
-    if (newstate.R != Teuchos::null) {
+    {
 
       TEUCHOS_TEST_FOR_EXCEPTION
-        (MVT::GetGlobalLength(*newstate.R) != MVT::GetGlobalLength(*R_),
+        (MVT::GetGlobalLength(*R_0) != MVT::GetGlobalLength(*R_),
          std::invalid_argument, prefix << errstr );
       TEUCHOS_TEST_FOR_EXCEPTION
-        (MVT::GetNumberVecs(*newstate.R) != blockSize_,
+        (MVT::GetNumberVecs(*R_0) != blockSize_,
          std::invalid_argument, prefix << errstr );
 
       // Copy basis vectors from newstate into V
-      if (newstate.R != R_) {
+      if (R_0 != R_) {
         // copy over the initial residual (unpreconditioned).
-        MVT::Assign( *newstate.R, *R_ );
+        MVT::Assign( *R_0, *R_ );
       }
       // Compute initial direction vectors
       // Initially, they are set to the preconditioned residuals
@@ -444,9 +443,9 @@ class BlockCGIter<ScalarType, MV, OP, true> :
       if ( lp_->getLeftPrec() != Teuchos::null ) {
         lp_->applyLeftPrec( *R_, *Z_ );
         if ( lp_->getRightPrec() != Teuchos::null ) {
-          Teuchos::RCP<MV> tmp = MVT::Clone( *Z_, blockSize_ );
-          lp_->applyRightPrec( *Z_, *tmp );
-          Z_ = tmp;
+          Teuchos::RCP<MV> tmp2 = MVT::Clone( *Z_, blockSize_ );
+          lp_->applyRightPrec( *Z_, *tmp2 );
+          Z_ = tmp2;
         }
       }
       else if ( lp_->getRightPrec() != Teuchos::null ) {
@@ -457,11 +456,6 @@ class BlockCGIter<ScalarType, MV, OP, true> :
       }
       MVT::Assign( *Z_, *P_ );
     }
-    else {
-      TEUCHOS_TEST_FOR_EXCEPTION
-        (newstate.R == Teuchos::null, std::invalid_argument,
-         prefix << "BlockCGStateIterState does not have initial residual.");
-    }
 
     // The solver is initialized
     initialized_ = true;
diff --git a/packages/belos/src/BelosBlockCGSolMgr.hpp b/packages/belos/src/BelosBlockCGSolMgr.hpp
index c1baba76ebe8..7419746c10cf 100644
--- a/packages/belos/src/BelosBlockCGSolMgr.hpp
+++ b/packages/belos/src/BelosBlockCGSolMgr.hpp
@@ -30,6 +30,7 @@
 #include "BelosStatusTestOutputFactory.hpp"
 #include "BelosOutputManager.hpp"
 #include "Teuchos_LAPACK.hpp"
+#include "Teuchos_RCPDecl.hpp"
 #ifdef BELOS_TEUCHOS_TIME_MONITOR
 #  include "Teuchos_TimeMonitor.hpp"
 #endif
@@ -80,8 +81,7 @@ namespace Belos {
   {
     static const bool requiresLapack =
       Belos::Details::LapackSupportsScalar<ScalarType>::value;
-    typedef Details::SolverManagerRequiresLapack<ScalarType, MV, OP,
-                                                 requiresLapack> base_type;
+    using base_type = Details::SolverManagerRequiresLapack<ScalarType, MV, OP, requiresLapack>;
 
   public:
     BlockCGSolMgr () :
@@ -91,7 +91,7 @@ namespace Belos {
                   const Teuchos::RCP<Teuchos::ParameterList>& pl) :
       base_type ()
     {}
-    virtual ~BlockCGSolMgr () {}
+    virtual ~BlockCGSolMgr () = default;
   };
 
 
@@ -123,11 +123,11 @@ namespace Belos {
 // #endif // defined(HAVE_TEUCHOSCORE_CXX11)
 
   private:
-    typedef MultiVecTraits<ScalarType,MV> MVT;
-    typedef OperatorTraits<ScalarType,MV,OP> OPT;
-    typedef Teuchos::ScalarTraits<ScalarType> SCT;
-    typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
-    typedef Teuchos::ScalarTraits<MagnitudeType> MT;
+    using MVT = MultiVecTraits<ScalarType, MV>;
+    using OPT = OperatorTraits<ScalarType, MV, OP>;
+    using SCT = Teuchos::ScalarTraits<ScalarType>;
+    using MagnitudeType = typename Teuchos::ScalarTraits<ScalarType>::magnitudeType;
+    using MT = Teuchos::ScalarTraits<MagnitudeType>;
 
   public:
 
@@ -181,7 +181,7 @@ namespace Belos {
                    const Teuchos::RCP<Teuchos::ParameterList> &pl );
 
     //! Destructor.
-    virtual ~BlockCGSolMgr() {};
+    virtual ~BlockCGSolMgr() = default;
 
     //! clone for Inverted Injection (DII)
     Teuchos::RCP<SolverManager<ScalarType, MV, OP> > clone () const override {
@@ -364,6 +364,8 @@ namespace Belos {
     bool assertPositiveDefiniteness_;
     bool foldConvergenceDetectionIntoAllreduce_;
 
+    Teuchos::RCP<CGIterationStateBase<ScalarType, MV> > state_;
+
     //! Prefix label for all the timers.
     std::string label_;
 
@@ -594,7 +596,7 @@ setParameters (const Teuchos::RCP<Teuchos::ParameterList> &params)
   // Create orthogonalization manager if we need to.
   if (ortho_ == Teuchos::null || changedOrthoType) {
     Belos::OrthoManagerFactory<ScalarType, MV, OP> factory;
-    Teuchos::RCP<Teuchos::ParameterList> paramsOrtho;   
+    Teuchos::RCP<Teuchos::ParameterList> paramsOrtho;
     if (orthoType_=="DGKS" && orthoKappa_ > 0) {
       paramsOrtho = Teuchos::rcp(new Teuchos::ParameterList());
       paramsOrtho->set ("depTol", orthoKappa_ );
@@ -604,8 +606,8 @@ setParameters (const Teuchos::RCP<Teuchos::ParameterList> &params)
   }
 
   // Convergence
-  typedef Belos::StatusTestCombo<ScalarType,MV,OP>  StatusTestCombo_t;
-  typedef Belos::StatusTestGenResNorm<ScalarType,MV,OP>  StatusTestResNorm_t;
+  using StatusTestCombo_t = Belos::StatusTestCombo<ScalarType, MV, OP>;
+  using StatusTestResNorm_t = Belos::StatusTestGenResNorm<ScalarType, MV, OP>;
 
   // Check for convergence tolerance
   if (params->isParameter("Convergence Tolerance")) {
@@ -864,16 +866,23 @@ ReturnType BlockCGSolMgr<ScalarType,MV,OP,true>::solve() {
       block_cg_iter =
         rcp (new CGSingleRedIter<ScalarType,MV,OP> (problem_, printer_,
                                                     outputTest_, convTest_, plist));
+      if (state_.is_null() || Teuchos::rcp_dynamic_cast<CGSingleRedIterationState<ScalarType, MV> >(state_).is_null())
+        state_ = Teuchos::rcp(new CGSingleRedIterationState<ScalarType, MV>());
+
     }
     else {
       block_cg_iter =
         rcp (new CGIter<ScalarType,MV,OP> (problem_, printer_,
                                            outputTest_, convTest_, plist));
+      if (state_.is_null() || Teuchos::rcp_dynamic_cast<CGIterationState<ScalarType, MV> >(state_).is_null())
+        state_ = Teuchos::rcp(new CGIterationState<ScalarType, MV>());
     }
   } else {
     block_cg_iter =
       rcp (new BlockCGIter<ScalarType,MV,OP> (problem_, printer_, outputTest_,
                                               ortho_, plist));
+    if (state_.is_null() || Teuchos::rcp_dynamic_cast<BlockCGIterationState<ScalarType, MV> >(state_).is_null())
+        state_ = Teuchos::rcp(new BlockCGIterationState<ScalarType, MV>());
   }
 
 
@@ -900,11 +909,9 @@ ReturnType BlockCGSolMgr<ScalarType,MV,OP,true>::solve() {
       RCP<MV> R_0 = MVT::CloneViewNonConst( *(rcp_const_cast<MV>(problem_->getInitResVec())), currIdx );
 
       // Set the new state and initialize the solver.
-      CGIterationState<ScalarType,MV> newstate;
-      newstate.R = R_0;
-      block_cg_iter->initializeCG(newstate);
+      block_cg_iter->initializeCG(state_, R_0);
 
-      while(1) {
+      while(true) {
 
         // tell block_cg_iter to iterate
         try {
@@ -916,7 +923,7 @@ ReturnType BlockCGSolMgr<ScalarType,MV,OP,true>::solve() {
             // At least one of the linear system(s) converged.
             //
             // Get the column indices of the linear systems that converged.
-            typedef StatusTestGenResNorm<ScalarType,MV,OP> conv_test_type;
+            using conv_test_type = StatusTestGenResNorm<ScalarType, MV, OP>;
             std::vector<int> convIdx =
               rcp_dynamic_cast<conv_test_type>(convTest_)->convIndices();
 
@@ -964,9 +971,7 @@ ReturnType BlockCGSolMgr<ScalarType,MV,OP,true>::solve() {
             block_cg_iter->setBlockSize( have );
 
             // Set the new state and initialize the solver.
-            CGIterationState<ScalarType,MV> defstate;
-            defstate.R = R_0;
-            block_cg_iter->initializeCG(defstate);
+            block_cg_iter->initializeCG(state_, R_0);
           }
           //
           // None of the linear systems converged.  Check whether the
@@ -992,7 +997,7 @@ ReturnType BlockCGSolMgr<ScalarType,MV,OP,true>::solve() {
           achievedTol_ = MT::one();
           Teuchos::RCP<MV> X = problem_->getLHS();
           MVT::MvInit( *X, SCT::zero() );
-          printer_->stream(Warnings) << "Belos::BlockCGSolMgr::solve(): Warning! NaN has been detected!" 
+          printer_->stream(Warnings) << "Belos::BlockCGSolMgr::solve(): Warning! NaN has been detected!"
                                      << std::endl;
           return Unconverged;
         }
@@ -1063,7 +1068,7 @@ ReturnType BlockCGSolMgr<ScalarType,MV,OP,true>::solve() {
 
   // Save the convergence test value ("achieved tolerance") for this solve.
   {
-    typedef StatusTestGenResNorm<ScalarType,MV,OP> conv_test_type;
+    using conv_test_type = StatusTestGenResNorm<ScalarType, MV, OP>;
     // testValues is nonnull and not persistent.
     const std::vector<MagnitudeType>* pTestValues =
       rcp_dynamic_cast<conv_test_type>(convTest_)->getTestValue();
diff --git a/packages/belos/src/BelosCGIter.hpp b/packages/belos/src/BelosCGIter.hpp
index 9aa294776c70..dd0871ac66ee 100644
--- a/packages/belos/src/BelosCGIter.hpp
+++ b/packages/belos/src/BelosCGIter.hpp
@@ -31,53 +31,100 @@
 #include "Teuchos_ParameterList.hpp"
 #include "Teuchos_TimeMonitor.hpp"
 
-/*!	
+/*!
   \class Belos::CGIter
-  
+
   \brief This class implements the preconditioned Conjugate Gradient (CG) iteration.
 
   \ingroup belos_solver_framework
- 
+
   \author Teri Barth and Heidi Thornquist
 */
 
 namespace Belos {
-  
+
+  //! @name CGIteration Structures
+  //@{
+
+  /** \brief Structure to contain pointers to CGIteration state variables.
+   *
+   * This struct is utilized by CGIteration::initialize() and CGIteration::getState().
+   */
+  template <class ScalarType, class MV>
+  class CGIterationState : public CGIterationStateBase<ScalarType, MV> {
+
+  public:
+    CGIterationState() = default;
+
+    CGIterationState(Teuchos::RCP<const MV> tmp) {
+      initialize(tmp);
+    }
+
+    virtual ~CGIterationState() = default;
+
+    void initialize(Teuchos::RCP<const MV> tmp, int _numVectors) {
+      using MVT = MultiVecTraits<ScalarType, MV>;
+      TEUCHOS_ASSERT(_numVectors == 1);
+
+      // S = (R, Z)
+      // This allows to compute the inner products (R, S) = ((R, R), (R, Z)) using a single reduction.
+      S = MVT::Clone( *tmp, 2 );
+      std::vector<int> index(1,0);
+      index[0] = 0;
+      this->R = MVT::CloneViewNonConst( *S, index );
+      index[0] = 1;
+      this->Z = MVT::CloneViewNonConst( *S, index );
+
+      this->P = MVT::Clone( *tmp, 1 );
+      this->AP = MVT::Clone(*tmp, 1);
+
+      CGIterationStateBase<ScalarType, MV>::initialize(tmp, _numVectors);
+    }
+
+    bool matches(Teuchos::RCP<const MV> tmp, int _numVectors=1) const {
+      return (CGIterationStateBase<ScalarType, MV>::matches(tmp, _numVectors) &&
+              !S.is_null());
+    }
+
+    Teuchos::RCP<MV> S;
+
+};
+
 template<class ScalarType, class MV, class OP>
 class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
 
   public:
-    
+
   //
   // Convenience typedefs
   //
-  typedef MultiVecTraits<ScalarType,MV> MVT;
-  typedef OperatorTraits<ScalarType,MV,OP> OPT;
-  typedef Teuchos::ScalarTraits<ScalarType> SCT;
-  typedef typename SCT::magnitudeType MagnitudeType;
+  using MVT = MultiVecTraits<ScalarType, MV>;
+  using OPT = OperatorTraits<ScalarType, MV, OP>;
+  using SCT = Teuchos::ScalarTraits<ScalarType>;
+  using MagnitudeType = typename SCT::magnitudeType;
 
   //! @name Constructors/Destructor
-  //@{ 
+  //@{
 
   /*! \brief %CGIter constructor with linear problem, solver utilities, and parameter list of solver options.
    *
    * This constructor takes pointers required by the linear solver iteration, in addition
    * to a parameter list of options for the linear solver.
    */
-  CGIter( const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem, 
+  CGIter( const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
 		  const Teuchos::RCP<OutputManager<ScalarType> > &printer,
 		  const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
                   const Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> > &convTester,
 		  Teuchos::ParameterList &params );
 
   //! Destructor.
-  virtual ~CGIter() {};
+  virtual ~CGIter() = default;
   //@}
 
 
   //! @name Solver methods
-  //@{ 
-  
+  //@{
+
   /*! \brief This method performs CG iterations until the status
    * test indicates the need to stop or an error occurs (in which case, an
    * std::exception is thrown).
@@ -86,7 +133,7 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
    * not, it will call initialize() using default arguments. After
    * initialization, the solver performs CG iterations until the
    * status test evaluates as ::Passed, at which point the method returns to
-   * the caller. 
+   * the caller.
    *
    * The status test is queried at the beginning of the iteration.
    */
@@ -94,53 +141,62 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
 
   /*! \brief Initialize the solver to an iterate, providing a complete state.
    *
-   * The %CGIter contains a certain amount of state, consisting of the current 
+   * The %CGIter contains a certain amount of state, consisting of the current
    * residual, preconditioned residual, and decent direction.
    *
    * initialize() gives the user the opportunity to manually set these,
    * although only the current unpreconditioned residual is required.
    *
-   * \post 
+   * \post
    * <li>isInitialized() == \c true (see post-conditions of isInitialize())
    *
-   * \note For any pointer in \c newstate which directly points to the multivectors in 
+   * \note For any pointer in \c newstate which directly points to the multivectors in
    * the solver, the data is not copied.
    */
-  void initializeCG(CGIterationState<ScalarType,MV>& newstate);
+  void initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0);
 
   /*! \brief Initialize the solver with the initial vectors from the linear problem
    *  or random data.
    */
   void initialize()
   {
-    CGIterationState<ScalarType,MV> empty;
-    initializeCG(empty);
+    initializeCG(Teuchos::null, Teuchos::null);
   }
-  
+
   /*! \brief Get the current state of the linear solver.
    *
    * The data is only valid if isInitialized() == \c true.
    *
    * \returns A CGIterationState object containing const pointers to the current solver state.
    */
-  CGIterationState<ScalarType,MV> getState() const {
-    CGIterationState<ScalarType,MV> state;
-    state.R = R_;
-    state.P = P_;
-    state.AP = AP_;
-    state.Z = Z_;
+  Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > getState() const {
+    auto state = Teuchos::rcp(new CGIterationState<ScalarType,MV>());
+    state->R = R_;
+    state->P = P_;
+    state->AP = AP_;
+    state->Z = Z_;
+    state->S = S_;
     return state;
   }
 
+  void setState(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > state) {
+    auto s = Teuchos::rcp_dynamic_cast<CGIterationState<ScalarType,MV> >(state, true);
+    R_ = s->R;
+    Z_ = s->Z;
+    P_ = s->P;
+    AP_ = s->AP;
+    S_ = s->S;
+  }
+
   //@}
 
-  
+
   //! @name Status methods
-  //@{ 
+  //@{
 
   //! \brief Get the current iteration count.
   int getNumIters() const { return iter_; }
-  
+
   //! \brief Reset the iteration count.
   void resetNumIters( int iter = 0 ) { iter_ = iter; }
 
@@ -160,9 +216,9 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
   Teuchos::RCP<MV> getCurrentUpdate() const { return Teuchos::null; }
 
   //@}
-  
+
   //! @name Accessor methods
-  //@{ 
+  //@{
 
   //! Get a constant reference to the linear problem.
   const LinearProblem<ScalarType,MV,OP>& getProblem() const { return *lp_; }
@@ -178,26 +234,26 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
 
   //! States whether the solver has been initialized or not.
   bool isInitialized() { return initialized_; }
-  
+
 
   //! Sets whether or not to store the diagonal for condition estimation
   void setDoCondEst(bool val) {
-   if (numEntriesForCondEst_) doCondEst_=val;
+   if (numEntriesForCondEst_ != 0) doCondEst_=val;
   }
-  
+
   //! Gets the diagonal for condition estimation
   Teuchos::ArrayView<MagnitudeType> getDiag() {
     // NOTE (mfh 30 Jul 2015) See note on getOffDiag() below.
     // getDiag() didn't actually throw for me in that case, but why
     // not be cautious?
-    typedef typename Teuchos::ArrayView<MagnitudeType>::size_type size_type;
+    using size_type = typename Teuchos::ArrayView<MagnitudeType>::size_type;
     if (static_cast<size_type> (iter_) >= diag_.size ()) {
       return diag_ ();
     } else {
       return diag_ (0, iter_);
     }
     }
-  
+
   //! Gets the off-diagonal for condition estimation
   Teuchos::ArrayView<MagnitudeType> getOffDiag() {
     // NOTE (mfh 30 Jul 2015) The implementation as I found it
@@ -205,24 +261,20 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
     // debug mode) when the maximum number of iterations has been
     // reached, because iter_ == offdiag_.size() in that case.  The
     // new logic fixes this.
-    typedef typename Teuchos::ArrayView<MagnitudeType>::size_type size_type;
+    using size_type = typename Teuchos::ArrayView<MagnitudeType>::size_type;
     if (static_cast<size_type> (iter_) >= offdiag_.size ()) {
       return offdiag_ ();
     } else {
       return offdiag_ (0, iter_);
     }
   }
-  
+
   //@}
 
   private:
 
   //
   // Internal methods
-  //
-  //! Method for initalizing the state storage needed by CG.
-  void setStateSize();
-  
   //
   // Classes inputed through constructor that define the linear problem to be solved.
   //
@@ -231,7 +283,7 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
   const Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       stest_;
   const Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> >       convTest_;
 
-  //  
+  //
   // Current solver state
   //
   // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
@@ -239,11 +291,6 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
   // For the implications of the state of initialized_, please see documentation for initialize()
   bool initialized_;
 
-  // stateStorageInitialized_ specifies that the state storage has been initialized.
-  // This initialization may be postponed if the linear problem was generated without 
-  // the right-hand side or solution vectors.
-  bool stateStorageInitialized_;
-
   // Current number of iterations performed.
   int iter_;
 
@@ -261,11 +308,11 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
   int numEntriesForCondEst_;
   bool doCondEst_;
 
- 
-  
-  // 
+
+
+  //
   // State Storage
-  // 
+  //
   // Residual
   Teuchos::RCP<MV> R_;
   //
@@ -285,7 +332,7 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
   //////////////////////////////////////////////////////////////////////////////////////////////////
   // Constructor.
   template<class ScalarType, class MV, class OP>
-  CGIter<ScalarType,MV,OP>::CGIter(const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem, 
+  CGIter<ScalarType,MV,OP>::CGIter(const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
 						   const Teuchos::RCP<OutputManager<ScalarType> > &printer,
 						   const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
                                                    const Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> > &convTester,
@@ -295,7 +342,6 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
     stest_(tester),
     convTest_(convTester),
     initialized_(false),
-    stateStorageInitialized_(false),
     iter_(0),
     assertPositiveDefiniteness_( params.get("Assert Positive Definiteness", true) ),
     numEntriesForCondEst_(params.get("Max Size For Condest",0) ),
@@ -304,80 +350,39 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
     foldConvergenceDetectionIntoAllreduce_ = params.get<bool>("Fold Convergence Detection Into Allreduce",false);
   }
 
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-  // Setup the state storage.
-  template <class ScalarType, class MV, class OP>
-  void CGIter<ScalarType,MV,OP>::setStateSize ()
-  {
-    if (!stateStorageInitialized_) {
-
-      // Check if there is any multivector to clone from.
-      Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
-      Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
-      if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
-	stateStorageInitialized_ = false;
-	return;
-      }
-      else {
-	
-	// Initialize the state storage
-	// If the subspace has not be initialized before, generate it using the LHS or RHS from lp_.
-	if (R_ == Teuchos::null) {
-	  // Get the multivector that is not null.
-	  Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
-	  TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::invalid_argument,
-			     "Belos::CGIter::setStateSize(): linear problem does not specify multivectors to clone from.");
-          S_ = MVT::Clone( *tmp, 2 );
-          std::vector<int> index(1,0);
-          index[0] = 0;
-          R_ = MVT::CloneViewNonConst( *S_, index );
-          index[0] = 1;
-          Z_ = MVT::CloneViewNonConst( *S_, index );
-	  P_ = MVT::Clone( *tmp, 1 );
-	  AP_ = MVT::Clone( *tmp, 1 );
-
-        }
-
-        // Tracking information for condition number estimation
-        if(numEntriesForCondEst_ > 0) {
-          diag_.resize(numEntriesForCondEst_);
-          offdiag_.resize(numEntriesForCondEst_-1);
-        }
-        	
-	// State storage has now been initialized.
-	stateStorageInitialized_ = true;
-      }
-    }
-  }
-
 
   //////////////////////////////////////////////////////////////////////////////////////////////////
   // Initialize this iteration object
   template <class ScalarType, class MV, class OP>
-  void CGIter<ScalarType,MV,OP>::initializeCG(CGIterationState<ScalarType,MV>& newstate)
+  void CGIter<ScalarType,MV,OP>::initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0)
   {
     // Initialize the state storage if it isn't already.
-    if (!stateStorageInitialized_) 
-      setStateSize();
+    Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
+    Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
+    Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
+    TEUCHOS_ASSERT(!newstate.is_null());
+    if (!Teuchos::rcp_dynamic_cast<CGIterationState<ScalarType,MV> >(newstate, true)->matches(tmp, 1))
+      newstate->initialize(tmp, 1);
+    setState(newstate);
+
+    // Tracking information for condition number estimation
+    if(numEntriesForCondEst_ > 0) {
+      diag_.resize(numEntriesForCondEst_);
+      offdiag_.resize(numEntriesForCondEst_-1);
+    }
 
-    TEUCHOS_TEST_FOR_EXCEPTION(!stateStorageInitialized_,std::invalid_argument,
-		       "Belos::CGIter::initialize(): Cannot initialize state storage!");
-    
-    // NOTE:  In CGIter R_, the initial residual, is required!!!  
-    //
     std::string errstr("Belos::CGIter::initialize(): Specified multivectors must have a consistent length and width.");
+    {
 
-    if (newstate.R != Teuchos::null) {
-
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.R) != MVT::GetGlobalLength(*R_),
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*R_0) != MVT::GetGlobalLength(*R_),
                           std::invalid_argument, errstr );
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) != 1,
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*R_0) != 1,
                           std::invalid_argument, errstr );
 
       // Copy basis vectors from newstate into V
-      if (newstate.R != R_) {
+      if (R_0 != R_) {
         // copy over the initial residual (unpreconditioned).
-	MVT::Assign( *newstate.R, *R_ );
+	MVT::Assign( *R_0, *R_ );
       }
 
       // Compute initial direction vectors
@@ -386,23 +391,18 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
       if ( lp_->getLeftPrec() != Teuchos::null ) {
         lp_->applyLeftPrec( *R_, *Z_ );
         if ( lp_->getRightPrec() != Teuchos::null ) {
-          Teuchos::RCP<MV> tmp = MVT::CloneCopy( *Z_ );
-          lp_->applyRightPrec( *tmp, *Z_ );
+          Teuchos::RCP<MV> tmp2 = MVT::CloneCopy( *Z_ );
+          lp_->applyRightPrec( *tmp2, *Z_ );
         }
       }
       else if ( lp_->getRightPrec() != Teuchos::null ) {
         lp_->applyRightPrec( *R_, *Z_ );
-      } 
+      }
       else {
         MVT::Assign( *R_, *Z_ );
       }
       MVT::Assign( *Z_, *P_ );
     }
-    else {
-
-      TEUCHOS_TEST_FOR_EXCEPTION(newstate.R == Teuchos::null,std::invalid_argument,
-                         "Belos::CGIter::initialize(): CGIterationState does not have initial residual.");
-    }
 
     // The solver is initialized
     initialized_ = true;
@@ -417,14 +417,16 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
     //
     // Allocate/initialize data structures
     //
-    if (initialized_ == false) {
+    if (!initialized_) {
       initialize();
     }
 
     // Allocate memory for scalars.
     std::vector<ScalarType> alpha(1);
     std::vector<ScalarType> beta(1);
-    std::vector<ScalarType> rHz(1), rHz_old(1), pAp(1);
+    std::vector<ScalarType> rHz(1);
+    std::vector<ScalarType> rHz_old(1);
+    std::vector<ScalarType> pAp(1);
     Teuchos::SerialDenseMatrix<int,ScalarType> rHs( 1, 2 );
 
     // Create convenience variables for zero and one.
@@ -432,12 +434,14 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
     const MagnitudeType zero = Teuchos::ScalarTraits<MagnitudeType>::zero();
 
     // Scalars for condition estimation (if needed) - These will always use entry zero, for convenience
-    ScalarType pAp_old = one, beta_old = one, rHz_old2 = one;
-             
+    ScalarType pAp_old = one;
+    ScalarType beta_old = one;
+    ScalarType rHz_old2 = one;
+
     // Get the current solution vector.
     Teuchos::RCP<MV> cur_soln_vec = lp_->getCurrLHSVec();
 
-    // Check that the current solution vector only has one column. 
+    // Check that the current solution vector only has one column.
     TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*cur_soln_vec) != 1, CGIterateFailure,
                         "Belos::CGIter::iterate(): current linear system has more than one vector!" );
 
@@ -453,17 +457,17 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
     // Iterate until the status test tells us to stop.
     //
     while (stest_->checkStatus(this) != Passed) {
-      
+
       // Increment the iteration
       iter_++;
 
       // Multiply the current direction vector by A and store in AP_
       lp_->applyOp( *P_, *AP_ );
-      
+
       // Compute alpha := <R_,Z_> / <P_,AP_>
       MVT::MvDot( *P_, *AP_, pAp );
       alpha[0] = rHz[0] / pAp[0];
-      
+
       // Check that alpha is a positive number!
       if(assertPositiveDefiniteness_) {
         TEUCHOS_TEST_FOR_EXCEPTION( SCT::real(alpha[0]) <= zero, CGPositiveDefiniteFailure,
@@ -484,7 +488,7 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
       //
       MVT::MvAddMv( one, *R_, -alpha[0], *AP_, *R_ );
       //
-      // Compute beta := [ new <R_, Z_> ] / [ old <R_, Z_> ], 
+      // Compute beta := [ new <R_, Z_> ] / [ old <R_, Z_> ],
       // and the new direction vector p.
       //
       if ( lp_->getLeftPrec() != Teuchos::null ) {
@@ -496,7 +500,7 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
       }
       else if ( lp_->getRightPrec() != Teuchos::null ) {
         lp_->applyRightPrec( *R_, *Z_ );
-      } 
+      }
       else {
         MVT::Assign( *R_, *Z_ );
       }
@@ -511,7 +515,7 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
       beta[0] = rHz[0] / rHz_old[0];
       //
       MVT::MvAddMv( one, *Z_, beta[0], *P_, *P_ );
-     
+
       // Condition estimate (if needed)
       if (doCondEst_) {
         if (iter_ > 1) {
@@ -525,7 +529,7 @@ class CGIter : virtual public CGIteration<ScalarType,MV,OP> {
         beta_old = beta[0];
         pAp_old = pAp[0];
       }
- 
+
     } // end while (sTest_->checkStatus(this) != Passed)
   }
 
diff --git a/packages/belos/src/BelosCGIteration.hpp b/packages/belos/src/BelosCGIteration.hpp
index 951f3760db8a..5617e381dedb 100644
--- a/packages/belos/src/BelosCGIteration.hpp
+++ b/packages/belos/src/BelosCGIteration.hpp
@@ -17,47 +17,76 @@
 #include "BelosConfigDefs.hpp"
 #include "BelosTypes.hpp"
 #include "BelosIteration.hpp"
+#include "BelosMultiVecTraits.hpp"
+#include "Teuchos_Assert.hpp"
 
 namespace Belos {
 
-  //! @name CGIteration Structures 
-  //@{ 
-  
+  //! @name CGIteration Structures
+  //@{
+
   /** \brief Structure to contain pointers to CGIteration state variables.
    *
    * This struct is utilized by CGIteration::initialize() and CGIteration::getState().
    */
   template <class ScalarType, class MV>
-  struct CGIterationState {
+  class CGIterationStateBase {
+
+  public:
+    virtual void initialize(Teuchos::RCP<const MV> tmp, int _numVectors) {
+      TEUCHOS_ASSERT(!R.is_null());
+      TEUCHOS_ASSERT(!Z.is_null());
+      TEUCHOS_ASSERT(!P.is_null());
+      TEUCHOS_ASSERT(!AP.is_null());
+      isInitialized_ = true;
+      numVectors_ = _numVectors;
+    }
+
+    bool isInitialized() const { return isInitialized_; }
+
+    int numVectors() const { return numVectors_; }
+
+    virtual bool matches(Teuchos::RCP<const MV> tmp, int _numVectors=1) const {
+      using MVT = MultiVecTraits<ScalarType, MV>;
+      return (isInitialized() &&
+              !R.is_null() &&
+              !Z.is_null() &&
+              !P.is_null() &&
+              !AP.is_null() &&
+              (numVectors() == _numVectors) &&
+              (MVT::GetGlobalLength(*tmp) == MVT::GetGlobalLength(*R)));
+    }
 
     /*! \brief The current residual. */
-    Teuchos::RCP<const MV> R;
+    Teuchos::RCP<MV> R;
 
     /*! \brief The current preconditioned residual. */
-    Teuchos::RCP<const MV> Z;
+    Teuchos::RCP<MV> Z;
 
     /*! \brief The current decent direction vector */
-    Teuchos::RCP<const MV> P;
+    Teuchos::RCP<MV> P;
 
     /*! \brief The matrix A applied to current decent direction vector */
-    Teuchos::RCP<const MV> AP;
-    
-    CGIterationState() : R(Teuchos::null), Z(Teuchos::null), 
-		    P(Teuchos::null), AP(Teuchos::null)
-    {}
+    Teuchos::RCP<MV> AP;
+
+  private:
+
+    bool isInitialized_;
+    int numVectors_;
+
   };
 
   //! @name CGIteration Exceptions
-  //@{ 
-  
+  //@{
+
   /** \brief CGIterationInitFailure is thrown when the CGIteration object is unable to
-   * generate an initial iterate in the CGIteration::initialize() routine. 
+   * generate an initial iterate in the CGIteration::initialize() routine.
    *
    * This std::exception is thrown from the CGIteration::initialize() method, which is
    * called by the user or from the CGIteration::iterate() method if isInitialized()
    * == \c false.
    *
-   * In the case that this std::exception is thrown, 
+   * In the case that this std::exception is thrown,
    * CGIteration::isInitialized() will be \c false and the user will need to provide
    * a new initial iterate to the iteration.
    */
@@ -66,7 +95,7 @@ namespace Belos {
     {}};
 
   /** \brief CGIterateFailure is thrown when the CGIteration object is unable to
-   * compute the next iterate in the CGIteration::iterate() routine. 
+   * compute the next iterate in the CGIteration::iterate() routine.
    *
    * This std::exception is thrown from the CGIteration::iterate() method.
    *
@@ -85,7 +114,7 @@ namespace Belos {
     {}};
 
   /** \brief CGIterationOrthoFailure is thrown when the CGIteration object is unable to
-   * compute independent direction vectors in the CGIteration::iterate() routine. 
+   * compute independent direction vectors in the CGIteration::iterate() routine.
    *
    * This std::exception is thrown from the CGIteration::iterate() method.
    *
@@ -103,7 +132,7 @@ namespace Belos {
   class CGIterationLAPACKFailure : public BelosError {public:
     CGIterationLAPACKFailure(const std::string& what_arg) : BelosError(what_arg)
     {}};
-  
+
   //@}
 
 
@@ -113,22 +142,22 @@ class CGIteration : virtual public Iteration<ScalarType,MV,OP> {
   public:
 
   //! @name State methods
-  //@{ 
+  //@{
   /*! \brief Initialize the solver to an iterate, providing a complete state.
    *
-   * The %CGIteration contains a certain amount of state, consisting of the current 
+   * The %CGIteration contains a certain amount of state, consisting of the current
    * residual, preconditioned residual, and decent direction.
    *
    * initialize() gives the user the opportunity to manually set these,
    * although only the current unpreconditioned residual is required.
    *
-   * \post 
+   * \post
    * <li>isInitialized() == \c true (see post-conditions of isInitialize())
    *
-   * \note For any pointer in \c newstate which directly points to the multivectors in 
+   * \note For any pointer in \c newstate which directly points to the multivectors in
    * the solver, the data is not copied.
    */
-  virtual void initializeCG(CGIterationState<ScalarType,MV>& newstate) = 0;
+  virtual void initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0) = 0;
 
   /*! \brief Get the current state of the linear solver.
    *
@@ -136,7 +165,9 @@ class CGIteration : virtual public Iteration<ScalarType,MV,OP> {
    *
    * \returns A CGIterationState object containing const pointers to the current solver state.
    */
-  virtual CGIterationState<ScalarType,MV> getState() const = 0;
+  virtual Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > getState() const = 0;
+
+  virtual void setState(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > state) = 0;
   //@}
 
 
@@ -146,7 +177,7 @@ class CGIteration : virtual public Iteration<ScalarType,MV,OP> {
   //! Gets the diagonal for condition estimation
   virtual Teuchos::ArrayView<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getDiag() = 0;
 
-  //! Gets the off-diagonal for condition estimation 
+  //! Gets the off-diagonal for condition estimation
   virtual Teuchos::ArrayView<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getOffDiag() = 0;
 
 };
diff --git a/packages/belos/src/BelosCGSingleRedIter.hpp b/packages/belos/src/BelosCGSingleRedIter.hpp
index 30d0546357b4..0620a27054f1 100644
--- a/packages/belos/src/BelosCGSingleRedIter.hpp
+++ b/packages/belos/src/BelosCGSingleRedIter.hpp
@@ -31,53 +31,133 @@
 #include "Teuchos_ParameterList.hpp"
 #include "Teuchos_TimeMonitor.hpp"
 
-/*!	
+/*!
   \class Belos::CGSingleRedIter
-  
+
   \brief This class implements the preconditioned single-reduction Conjugate Gradient (CG) iteration.
 
   \ingroup belos_solver_framework
- 
+
   \author Heidi Thornquist
 */
 
 namespace Belos {
-  
+
+//! @name CGSingleRedIteration Structures
+  //@{
+
+  /** \brief Structure to contain pointers to CGSingleRedIteration state variables.
+   *
+   * This struct is utilized by CGSingleRedIteration::initialize() and CGSingleRedIteration::getState().
+   */
+  template <class ScalarType, class MV>
+  class CGSingleRedIterationState : public CGIterationStateBase<ScalarType, MV> {
+
+  public:
+    CGSingleRedIterationState() = default;
+
+    CGSingleRedIterationState(Teuchos::RCP<const MV> tmp) {
+      initialize(tmp);
+    }
+
+    virtual ~CGSingleRedIterationState() = default;
+
+    void initialize(Teuchos::RCP<const MV> tmp, int _numVectors) {
+      using MVT = MultiVecTraits<ScalarType, MV>;
+
+      TEUCHOS_ASSERT(_numVectors == 1);
+
+      // W = (AZ, R, Z)
+      W = MVT::Clone( *tmp, 3 );
+      std::vector<int> index2(2,0);
+      std::vector<int> index(1,0);
+
+      // S = (AZ, R)
+      index2[0] = 0;
+      index2[1] = 1;
+      S = MVT::CloneViewNonConst( *W, index2 );
+
+      // U = (AZ, Z)
+      index2[0] = 0;
+      index2[1] = 2;
+      U = MVT::CloneViewNonConst( *W, index2 );
+
+      index[0] = 1;
+      this->R = MVT::CloneViewNonConst( *W, index );
+      index[0] = 0;
+      AZ = MVT::CloneViewNonConst( *W, index );
+      index[0] = 2;
+      this->Z = MVT::CloneViewNonConst( *W, index );
+
+      // T = (R, Z)
+      index2[0] = 1;
+      index2[1] = 2;
+      T = MVT::CloneViewNonConst( *W, index2 );
+
+      // V = (AP, P)
+      V = MVT::Clone( *tmp, 2 );
+      index[0] = 0;
+      this->AP = MVT::CloneViewNonConst( *V, index );
+      index[0] = 1;
+      this->P = MVT::CloneViewNonConst( *V, index );
+
+      CGIterationStateBase<ScalarType, MV>::initialize(tmp, _numVectors);
+    }
+
+    bool matches(Teuchos::RCP<const MV> tmp, int _numVectors=1) const {
+      return (CGIterationStateBase<ScalarType, MV>::matches(tmp, _numVectors) &&
+              !W.is_null() &&
+              !V.is_null() &&
+              !U.is_null() &&
+              !S.is_null() &&
+              !T.is_null() &&
+              !AZ.is_null());
+    }
+
+    Teuchos::RCP<MV> W;
+    Teuchos::RCP<MV> V;
+    Teuchos::RCP<MV> U;
+    Teuchos::RCP<MV> S;
+    Teuchos::RCP<MV> T;
+    Teuchos::RCP<MV> AZ;
+
+  };
+
 template<class ScalarType, class MV, class OP>
 class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
 
   public:
-    
+
   //
   // Convenience typedefs
   //
-  typedef MultiVecTraits<ScalarType,MV> MVT;
-  typedef OperatorTraits<ScalarType,MV,OP> OPT;
-  typedef Teuchos::ScalarTraits<ScalarType> SCT;
-  typedef typename SCT::magnitudeType MagnitudeType;
+  using MVT = MultiVecTraits<ScalarType, MV>;
+  using OPT = OperatorTraits<ScalarType, MV, OP>;
+  using SCT = Teuchos::ScalarTraits<ScalarType>;
+  using MagnitudeType = typename SCT::magnitudeType;
 
   //! @name Constructors/Destructor
-  //@{ 
+  //@{
 
   /*! \brief %CGSingleRedIter constructor with linear problem, solver utilities, and parameter list of solver options.
    *
    * This constructor takes pointers required by the linear solver iteration, in addition
    * to a parameter list of options for the linear solver.
    */
-  CGSingleRedIter( const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem, 
+  CGSingleRedIter( const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
                    const Teuchos::RCP<OutputManager<ScalarType> > &printer,
                    const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
                    const Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> > &convTester,
                    Teuchos::ParameterList &params );
 
   //! Destructor.
-  virtual ~CGSingleRedIter() {};
+  virtual ~CGSingleRedIter() = default;
   //@}
 
 
   //! @name Solver methods
-  //@{ 
-  
+  //@{
+
   /*! \brief This method performs CG iterations until the status
    * test indicates the need to stop or an error occurs (in which case, an
    * std::exception is thrown).
@@ -86,7 +166,7 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
    * not, it will call initialize() using default arguments. After
    * initialization, the solver performs CG iterations until the
    * status test evaluates as ::Passed, at which point the method returns to
-   * the caller. 
+   * the caller.
    *
    * The status test is queried at the beginning of the iteration.
    */
@@ -94,53 +174,72 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
 
   /*! \brief Initialize the solver to an iterate, providing a complete state.
    *
-   * The %CGSingleRedIter contains a certain amount of state, consisting of the current 
+   * The %CGSingleRedIter contains a certain amount of state, consisting of the current
    * residual, preconditioned residual, and decent direction.
    *
    * initialize() gives the user the opportunity to manually set these,
    * although only the current unpreconditioned residual is required.
    *
-   * \post 
+   * \post
    * <li>isInitialized() == \c true (see post-conditions of isInitialize())
    *
-   * \note For any pointer in \c newstate which directly points to the multivectors in 
+   * \note For any pointer in \c newstate which directly points to the multivectors in
    * the solver, the data is not copied.
    */
-  void initializeCG(CGIterationState<ScalarType,MV>& newstate);
+  void initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0);
 
   /*! \brief Initialize the solver with the initial vectors from the linear problem
    *  or random data.
    */
   void initialize()
   {
-    CGIterationState<ScalarType,MV> empty;
-    initializeCG(empty);
+    initializeCG(Teuchos::null, Teuchos::null);
   }
-  
+
   /*! \brief Get the current state of the linear solver.
    *
    * The data is only valid if isInitialized() == \c true.
    *
-   * \returns A CGIterationState object containing const pointers to the current solver state.
+   * \returns A CGSingleRedIterationState object containing const pointers to the current solver state.
    */
-  CGIterationState<ScalarType,MV> getState() const {
-    CGIterationState<ScalarType,MV> state;
-    state.R = R_;
-    state.P = P_;
-    state.AP = AP_;
-    state.Z = Z_;
+  Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > getState() const {
+    auto state = Teuchos::rcp(new CGSingleRedIterationState<ScalarType,MV>());
+    state->W = W_;
+    state->V = V_;
+    state->U = U_;
+    state->S = S_;
+    state->T = T_;
+    state->R = R_;
+    state->Z = Z_;
+    state->P = P_;
+    state->AP = AP_;
+    state->AZ = AZ_;
     return state;
   }
 
+  void setState(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> >  state) {
+    auto s = Teuchos::rcp_dynamic_cast<CGSingleRedIterationState<ScalarType,MV> >(state, true);
+    W_ = s->W;
+    V_ = s->V;
+    U_ = s->U;
+    S_ = s->S;
+    T_ = s->T;
+    R_ = s->R;
+    Z_ = s->Z;
+    P_ = s->P;
+    AP_ = s->AP;
+    AZ_ = s->AZ;
+  }
+
   //@}
 
-  
+
   //! @name Status methods
-  //@{ 
+  //@{
 
   //! \brief Get the current iteration count.
   int getNumIters() const { return iter_; }
-  
+
   //! \brief Reset the iteration count.
   void resetNumIters( int iter = 0 ) { iter_ = iter; }
 
@@ -154,9 +253,9 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
   Teuchos::RCP<MV> getCurrentUpdate() const { return Teuchos::null; }
 
   //@}
-  
+
   //! @name Accessor methods
-  //@{ 
+  //@{
 
   //! Get a constant reference to the linear problem.
   const LinearProblem<ScalarType,MV,OP>& getProblem() const { return *lp_; }
@@ -177,7 +276,7 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
   void setDoCondEst(bool /* val */){/*ignored*/}
 
   //! Gets the diagonal for condition estimation (NOT_IMPLEMENTED)
-  Teuchos::ArrayView<MagnitudeType> getDiag() { 
+  Teuchos::ArrayView<MagnitudeType> getDiag() {
     Teuchos::ArrayView<MagnitudeType> temp;
     return temp;
   }
@@ -195,10 +294,7 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
   //
   // Internal methods
   //
-  //! Method for initalizing the state storage needed by CG.
-  void setStateSize();
-  
-  //
+
   // Classes inputed through constructor that define the linear problem to be solved.
   //
   const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> >    lp_;
@@ -206,7 +302,7 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
   const Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       stest_;
   const Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> >       convTest_;
 
-  //  
+  //
   // Current solver state
   //
   // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
@@ -214,11 +310,6 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
   // For the implications of the state of initialized_, please see documentation for initialize()
   bool initialized_;
 
-  // stateStorageInitialized_ specifies that the state storage has been initialized.
-  // This initialization may be postponed if the linear problem was generated without 
-  // the right-hand side or solution vectors.
-  bool stateStorageInitialized_;
-
   // Current number of iterations performed.
   int iter_;
 
@@ -229,8 +320,8 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
   ScalarType rHz_;
   // <r,r>
   ScalarType rHr_;
-  
-  // 
+
+  //
   // State Storage
   //
   // Residual
@@ -260,7 +351,7 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
   //////////////////////////////////////////////////////////////////////////////////////////////////
   // Constructor.
   template<class ScalarType, class MV, class OP>
-  CGSingleRedIter<ScalarType,MV,OP>::CGSingleRedIter(const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem, 
+  CGSingleRedIter<ScalarType,MV,OP>::CGSingleRedIter(const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
 						     const Teuchos::RCP<OutputManager<ScalarType> > &printer,
 						     const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
                                                      const Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> > &convTester,
@@ -270,106 +361,38 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
     stest_(tester),
     convTest_(convTester),
     initialized_(false),
-    stateStorageInitialized_(false),
     iter_(0)
   {
     foldConvergenceDetectionIntoAllreduce_ = params.get<bool>("Fold Convergence Detection Into Allreduce",false);
   }
 
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-  // Setup the state storage.
-  template <class ScalarType, class MV, class OP>
-  void CGSingleRedIter<ScalarType,MV,OP>::setStateSize ()
-  {
-    if (!stateStorageInitialized_) {
-
-      // Check if there is any multivector to clone from.
-      Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
-      Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
-      if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
-	stateStorageInitialized_ = false;
-	return;
-      }
-      else {
-	
-	// Initialize the state storage
-	// If the subspace has not be initialized before, generate it using the LHS or RHS from lp_.
-	if (R_ == Teuchos::null) {
-	  // Get the multivector that is not null.
-	  Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
-	  TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::invalid_argument,
-			     "Belos::CGSingleRedIter::setStateSize(): linear problem does not specify multivectors to clone from.");
-
-          // W_ = (AZ_, R_, Z_)
-          W_ = MVT::Clone( *tmp, 3 );
-          std::vector<int> index2(2,0);
-          std::vector<int> index(1,0);
-
-          // S_ = (AZ_, R_)
-          index2[0] = 0;
-          index2[1] = 1;
-          S_ = MVT::CloneViewNonConst( *W_, index2 );
-
-          // U_ = (AZ_, Z_)
-          index2[0] = 0;
-          index2[1] = 2;
-          U_ = MVT::CloneViewNonConst( *W_, index2 );
-
-          index[0] = 1;
-          R_ = MVT::CloneViewNonConst( *W_, index );
-          index[0] = 0;
-          AZ_ = MVT::CloneViewNonConst( *W_, index );
-          index[0] = 2;
-          Z_ = MVT::CloneViewNonConst( *W_, index );
-
-          // T_ = (R_, Z_)
-          index2[0] = 1;
-          index2[1] = 2;
-          T_ = MVT::CloneViewNonConst( *W_, index2 );
-
-          // V_ = (AP_, P_)
-          V_ = MVT::Clone( *tmp, 2 );
-          index[0] = 0;
-          AP_ = MVT::CloneViewNonConst( *V_, index );
-          index[0] = 1;
-	  P_ = MVT::CloneViewNonConst( *V_, index );
-
-	}
-	
-	// State storage has now been initialized.
-	stateStorageInitialized_ = true;
-      }
-    }
-  }
-
-
   //////////////////////////////////////////////////////////////////////////////////////////////////
   // Initialize this iteration object
   template <class ScalarType, class MV, class OP>
-  void CGSingleRedIter<ScalarType,MV,OP>::initializeCG(CGIterationState<ScalarType,MV>& newstate)
+  void CGSingleRedIter<ScalarType,MV,OP>::initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0)
   {
     // Initialize the state storage if it isn't already.
-    if (!stateStorageInitialized_) 
-      setStateSize();
+    Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
+    Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
+    Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
+    TEUCHOS_ASSERT(!newstate.is_null());
+    if (!Teuchos::rcp_dynamic_cast<CGSingleRedIterationState<ScalarType,MV> >(newstate, true)->matches(tmp, 1))
+      newstate->initialize(tmp, 1);
+    setState(newstate);
 
-    TEUCHOS_TEST_FOR_EXCEPTION(!stateStorageInitialized_,std::invalid_argument,
-		       "Belos::CGSingleRedIter::initialize(): Cannot initialize state storage!");
-    
-    // NOTE:  In CGSingleRedIter R_, the initial residual, is required!!!  
-    //
     std::string errstr("Belos::CGSingleRedIter::initialize(): Specified multivectors must have a consistent length and width.");
 
-    if (newstate.R != Teuchos::null) {
+    {
 
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.R) != MVT::GetGlobalLength(*R_),
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate->R) != MVT::GetGlobalLength(*R_),
                           std::invalid_argument, errstr );
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) != 1,
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate->R) != 1,
                           std::invalid_argument, errstr );
 
       // Copy basis vectors from newstate into V
-      if (newstate.R != R_) {
+      if (R_0 != R_) {
         // copy over the initial residual (unpreconditioned).
-	MVT::Assign( *newstate.R, *R_ );
+	MVT::Assign( *R_0, *R_ );
       }
 
       // Compute initial direction vectors
@@ -378,14 +401,14 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
       if ( lp_->getLeftPrec() != Teuchos::null ) {
         lp_->applyLeftPrec( *R_, *Z_ );
         if ( lp_->getRightPrec() != Teuchos::null ) {
-          Teuchos::RCP<MV> tmp = MVT::Clone( *Z_, 1 );
-          lp_->applyRightPrec( *Z_, *tmp );
-          MVT::Assign( *tmp, *Z_ );
+          Teuchos::RCP<MV> tmp2 = MVT::Clone( *Z_, 1 );
+          lp_->applyRightPrec( *Z_, *tmp2 );
+          MVT::Assign( *tmp2, *Z_ );
         }
       }
       else if ( lp_->getRightPrec() != Teuchos::null ) {
         lp_->applyRightPrec( *R_, *Z_ );
-      } 
+      }
       else {
         MVT::Assign( *R_, *Z_ );
       }
@@ -397,11 +420,6 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
       // Logically, AP_ := AZ_
       MVT::Assign( *U_, *V_);
     }
-    else {
-
-      TEUCHOS_TEST_FOR_EXCEPTION(newstate.R == Teuchos::null,std::invalid_argument,
-                         "Belos::CGSingleRedIter::initialize(): CGIterationState does not have initial residual.");
-    }
 
     // The solver is initialized
     initialized_ = true;
@@ -432,23 +450,26 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
     //
     // Allocate/initialize data structures
     //
-    if (initialized_ == false) {
+    if (!initialized_) {
       initialize();
     }
 
     // Allocate memory for scalars.
     Teuchos::SerialDenseMatrix<int,ScalarType> sHz( 2, 1 );
     Teuchos::SerialDenseMatrix<int,ScalarType> sHt( 2, 2 );
-    ScalarType rHz_old, alpha, beta, delta;
+    ScalarType rHz_old;
+    ScalarType alpha;
+    ScalarType beta;
+    ScalarType delta;
 
     // Create convenience variables for zero and one.
     const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
     const MagnitudeType zero = Teuchos::ScalarTraits<MagnitudeType>::zero();
-    
+
     // Get the current solution vector.
     Teuchos::RCP<MV> cur_soln_vec = lp_->getCurrLHSVec();
 
-    // Check that the current solution vector only has one column. 
+    // Check that the current solution vector only has one column.
     TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*cur_soln_vec) != 1, CGIterateFailure,
                         "Belos::CGSingleRedIter::iterate(): current linear system has more than one vector!" );
 
@@ -472,7 +493,7 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
     // Check that alpha is a positive number!
     TEUCHOS_TEST_FOR_EXCEPTION( SCT::real(alpha) <= zero, CGPositiveDefiniteFailure,
       "Belos::CGSingleRedIter::iterate(): non-positive value for p^H*A*p encountered!" );
- 
+
     ////////////////////////////////////////////////////////////////
     // Iterate until the status test tells us to stop.
     //
@@ -480,7 +501,7 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
       ////////////////////////////////////////////////////////////////
       // Iterate until the status test tells us to stop.
       //
-      while (1) {
+      while (true) {
 
         // Update the solution vector x := x + alpha * P_
         //
@@ -546,8 +567,8 @@ class CGSingleRedIter : virtual public CGIteration<ScalarType,MV,OP> {
       ////////////////////////////////////////////////////////////////
       // Iterate until the status test tells us to stop.
       //
-      while (1) {
-      
+      while (true) {
+
         // Update the solution vector x := x + alpha * P_
         //
         MVT::MvAddMv( one, *cur_soln_vec, alpha, *P_, *cur_soln_vec );
diff --git a/packages/belos/src/BelosPseudoBlockCGIter.hpp b/packages/belos/src/BelosPseudoBlockCGIter.hpp
index c7cdd1a2ede2..1260d10d356e 100644
--- a/packages/belos/src/BelosPseudoBlockCGIter.hpp
+++ b/packages/belos/src/BelosPseudoBlockCGIter.hpp
@@ -25,6 +25,7 @@
 #include "BelosOperatorTraits.hpp"
 #include "BelosMultiVecTraits.hpp"
 
+#include "Teuchos_Assert.hpp"
 #include "Teuchos_SerialDenseMatrix.hpp"
 #include "Teuchos_SerialDenseVector.hpp"
 #include "Teuchos_ScalarTraits.hpp"
@@ -44,6 +45,40 @@
 
 namespace Belos {
 
+  //! @name PseudoBlockCGIteration Structures
+  //@{
+
+  /** \brief Structure to contain pointers to PseudoBlockCGIteration state variables.
+   *
+   * This struct is utilized by PseudoBlockCGIteration::initialize() and PseudoBlockCGIteration::getState().
+   */
+  template <class ScalarType, class MV>
+  class PseudoBlockCGIterationState : public CGIterationStateBase<ScalarType, MV> {
+
+  public:
+    PseudoBlockCGIterationState() = default;
+
+    PseudoBlockCGIterationState(Teuchos::RCP<const MV> tmp) {
+      initialize(tmp);
+    }
+
+    virtual ~PseudoBlockCGIterationState() = default;
+
+    void initialize(Teuchos::RCP<const MV> tmp, int _numVectors) {
+      using MVT = MultiVecTraits<ScalarType, MV>;
+      this->R = MVT::Clone( *tmp, _numVectors );
+      this->Z = MVT::Clone( *tmp, _numVectors );
+      this->P = MVT::Clone( *tmp, _numVectors );
+      this->AP = MVT::Clone(*tmp, _numVectors );
+
+      CGIterationStateBase<ScalarType, MV>::initialize(tmp, _numVectors);
+    }
+
+    bool matches(Teuchos::RCP<const MV> tmp, int _numVectors=1) const {
+      return CGIterationStateBase<ScalarType, MV>::matches(tmp, _numVectors);
+    }
+};
+
   template<class ScalarType, class MV, class OP>
   class PseudoBlockCGIter : virtual public CGIteration<ScalarType,MV,OP> {
 
@@ -52,10 +87,10 @@ namespace Belos {
     //
     // Convenience typedefs
     //
-    typedef MultiVecTraits<ScalarType,MV> MVT;
-    typedef OperatorTraits<ScalarType,MV,OP> OPT;
-    typedef Teuchos::ScalarTraits<ScalarType> SCT;
-    typedef typename SCT::magnitudeType MagnitudeType;
+    using MVT = MultiVecTraits<ScalarType, MV>;
+    using OPT = OperatorTraits<ScalarType, MV, OP>;
+    using SCT = Teuchos::ScalarTraits<ScalarType>;
+    using MagnitudeType = typename SCT::magnitudeType;
 
     //! @name Constructors/Destructor
     //@{
@@ -71,7 +106,7 @@ namespace Belos {
                           Teuchos::ParameterList &params );
 
     //! Destructor.
-    virtual ~PseudoBlockCGIter() {};
+    virtual ~PseudoBlockCGIter() = default;
     //@}
 
 
@@ -113,15 +148,14 @@ namespace Belos {
      * \note For any pointer in \c newstate which directly points to the multivectors in
      * the solver, the data is not copied.
      */
-    void initializeCG(CGIterationState<ScalarType,MV>& newstate);
+    void initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0);
 
     /*! \brief Initialize the solver with the initial vectors from the linear problem
      *  or random data.
      */
     void initialize()
     {
-      CGIterationState<ScalarType,MV> empty;
-      initializeCG(empty);
+      initializeCG(Teuchos::null, Teuchos::null);
     }
 
     /*! \brief Get the current state of the linear solver.
@@ -131,15 +165,23 @@ namespace Belos {
      * \returns A CGIterationState object containing const pointers to the current
      * solver state.
      */
-    CGIterationState<ScalarType,MV> getState() const {
-      CGIterationState<ScalarType,MV> state;
-      state.R = R_;
-      state.P = P_;
-      state.AP = AP_;
-      state.Z = Z_;
+    Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > getState() const {
+      auto state = Teuchos::rcp(new PseudoBlockCGIterationState<ScalarType,MV>());
+      state->R = R_;
+      state->P = P_;
+      state->AP = AP_;
+      state->Z = Z_;
       return state;
     }
 
+    void setState(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > state) {
+      auto s = Teuchos::rcp_dynamic_cast<PseudoBlockCGIterationState<ScalarType,MV> >(state, true);
+      R_ = s->R;
+      Z_ = s->Z;
+      P_ = s->P;
+      AP_ = s->AP;
+    }
+
     //@}
 
 
@@ -185,7 +227,7 @@ namespace Belos {
 
     //! Sets whether or not to store the diagonal for condition estimation
     void setDoCondEst(bool val) {
-     if (numEntriesForCondEst_) doCondEst_=val;
+     if (numEntriesForCondEst_ != 0) doCondEst_=val;
     }
 
     //! Gets the diagonal for condition estimation
@@ -193,7 +235,7 @@ namespace Belos {
       // NOTE (mfh 30 Jul 2015) See note on getOffDiag() below.
       // getDiag() didn't actually throw for me in that case, but why
       // not be cautious?
-      typedef typename Teuchos::ArrayView<MagnitudeType>::size_type size_type;
+      using size_type = typename Teuchos::ArrayView<MagnitudeType>::size_type;
       if (static_cast<size_type> (iter_) >= diag_.size ()) {
         return diag_ ();
       } else {
@@ -208,7 +250,7 @@ namespace Belos {
       // debug mode) when the maximum number of iterations has been
       // reached, because iter_ == offdiag_.size() in that case.  The
       // new logic fixes this.
-      typedef typename Teuchos::ArrayView<MagnitudeType>::size_type size_type;
+      using size_type = typename Teuchos::ArrayView<MagnitudeType>::size_type;
       if (static_cast<size_type> (iter_) >= offdiag_.size ()) {
         return offdiag_ ();
       } else {
@@ -291,8 +333,8 @@ namespace Belos {
   //////////////////////////////////////////////////////////////////////////////////////////////////
   // Initialize this iteration object
   template <class ScalarType, class MV, class OP>
-  void PseudoBlockCGIter<ScalarType,MV,OP>::initializeCG(CGIterationState<ScalarType,MV>& newstate)
-  {
+  void PseudoBlockCGIter<ScalarType, MV, OP>::initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType, MV> > newstate, Teuchos::RCP<MV> R_0) {
+
     // Check if there is any mltivector to clone from.
     Teuchos::RCP<const MV> lhsMV = lp_->getCurrLHSVec();
     Teuchos::RCP<const MV> rhsMV = lp_->getCurrRHSVec();
@@ -306,14 +348,11 @@ namespace Belos {
     int numRHS = MVT::GetNumberVecs(*tmp);
     numRHS_ = numRHS;
 
-    // Initialize the state storage
-    // If the subspace has not be initialized before or has changed sizes, generate it using the LHS or RHS from lp_.
-    if (Teuchos::is_null(R_) || MVT::GetNumberVecs(*R_)!=numRHS_) {
-      R_ = MVT::Clone( *tmp, numRHS_ );
-      Z_ = MVT::Clone( *tmp, numRHS_ );
-      P_ = MVT::Clone( *tmp, numRHS_ );
-      AP_ = MVT::Clone( *tmp, numRHS_ );
-    }
+    // Initialize the state storage if it isn't already.
+    TEUCHOS_ASSERT(!newstate.is_null());
+    if (!Teuchos::rcp_dynamic_cast<PseudoBlockCGIterationState<ScalarType,MV> >(newstate, true)->matches(tmp, numRHS_))
+      newstate->initialize(tmp, numRHS_);
+    setState(newstate);
 
     // Tracking information for condition number estimation
     if(numEntriesForCondEst_ > 0) {
@@ -321,25 +360,19 @@ namespace Belos {
       offdiag_.resize(numEntriesForCondEst_-1);
     }
 
-    // NOTE:  In CGIter R_, the initial residual, is required!!!
-    //
     std::string errstr("Belos::BlockPseudoCGIter::initialize(): Specified multivectors must have a consistent length and width.");
 
-    // Create convenience variables for zero and one.
-    const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
-    const MagnitudeType zero = Teuchos::ScalarTraits<MagnitudeType>::zero();
-
-    if (!Teuchos::is_null(newstate.R)) {
+    {
 
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.R) != MVT::GetGlobalLength(*R_),
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*R_0) != MVT::GetGlobalLength(*R_),
                           std::invalid_argument, errstr );
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) != numRHS_,
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*R_0) != numRHS_,
                           std::invalid_argument, errstr );
 
       // Copy basis vectors from newstate into V
-      if (newstate.R != R_) {
+      if (R_0 != R_) {
         // copy over the initial residual (unpreconditioned).
-        MVT::MvAddMv( one, *newstate.R, zero, *newstate.R, *R_ );
+        MVT::Assign( *R_0, *R_ );
       }
 
       // Compute initial direction vectors
@@ -357,14 +390,9 @@ namespace Belos {
         lp_->applyRightPrec( *R_, *Z_ );
       }
       else {
-        Z_ = R_;
+        MVT::Assign( *R_, *Z_ );
       }
-      MVT::MvAddMv( one, *Z_, zero, *Z_, *P_ );
-    }
-    else {
-
-      TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::is_null(newstate.R),std::invalid_argument,
-                         "Belos::CGIter::initialize(): CGStateIterState does not have initial residual.");
+      MVT::Assign( *Z_, *P_ );
     }
 
     // The solver is initialized
@@ -380,14 +408,17 @@ namespace Belos {
     //
     // Allocate/initialize data structures
     //
-    if (initialized_ == false) {
+    if (!initialized_) {
       initialize();
     }
 
     // Allocate memory for scalars.
     int i=0;
     std::vector<int> index(1);
-    std::vector<ScalarType> rHz( numRHS_ ), rHz_old( numRHS_ ), pAp( numRHS_ ), beta( numRHS_ );
+    std::vector<ScalarType> rHz( numRHS_ );
+    std::vector<ScalarType> rHz_old( numRHS_ );
+    std::vector<ScalarType> pAp( numRHS_ );
+    std::vector<ScalarType> beta( numRHS_ );
     Teuchos::SerialDenseMatrix<int, ScalarType> alpha( numRHS_,numRHS_ );
 
     // Create convenience variables for zero and one.
@@ -477,7 +508,7 @@ namespace Belos {
         index[0] = i;
         Teuchos::RCP<const MV> Z_i = MVT::CloneView( *Z_, index );
         Teuchos::RCP<MV> P_i = MVT::CloneViewNonConst( *P_, index );
-        MVT::MvAddMv( one, *Z_i, beta[i], *P_i, *P_i );       
+        MVT::MvAddMv( one, *Z_i, beta[i], *P_i, *P_i );
       }
 
       // Condition estimate (if needed)
diff --git a/packages/belos/src/BelosPseudoBlockCGSolMgr.hpp b/packages/belos/src/BelosPseudoBlockCGSolMgr.hpp
index 12e7c2169eb2..cbc3727022fa 100644
--- a/packages/belos/src/BelosPseudoBlockCGSolMgr.hpp
+++ b/packages/belos/src/BelosPseudoBlockCGSolMgr.hpp
@@ -14,6 +14,7 @@
  *  \brief The Belos::PseudoBlockCGSolMgr provides a solver manager for the BlockCG linear solver.
 */
 
+#include "BelosCGIteration.hpp"
 #include "BelosConfigDefs.hpp"
 #include "BelosTypes.hpp"
 
@@ -21,7 +22,6 @@
 #include "BelosSolverManager.hpp"
 
 #include "BelosPseudoBlockCGIter.hpp"
-#include "BelosCGSingleRedIter.hpp"
 #include "BelosCGIter.hpp"
 #include "BelosStatusTestMaxIters.hpp"
 #include "BelosStatusTestGenResNorm.hpp"
@@ -79,8 +79,7 @@ namespace Belos {
   {
     static const bool scalarTypeIsSupported =
       Belos::Details::LapackSupportsScalar<ScalarType>::value;
-    typedef Details::SolverManagerRequiresLapack<ScalarType, MV, OP,
-                                                 scalarTypeIsSupported> base_type;
+    using base_type = Details::SolverManagerRequiresLapack<ScalarType, MV, OP, scalarTypeIsSupported>;
 
   public:
     PseudoBlockCGSolMgr () :
@@ -90,7 +89,7 @@ namespace Belos {
                          const Teuchos::RCP<Teuchos::ParameterList> &pl) :
       base_type ()
     {}
-    virtual ~PseudoBlockCGSolMgr () {}
+    virtual ~PseudoBlockCGSolMgr () = default;
 
     Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> >
     getResidualStatusTest() const { return Teuchos::null; }
@@ -102,11 +101,11 @@ namespace Belos {
     public Details::SolverManagerRequiresLapack<ScalarType, MV, OP, true>
   {
   private:
-    typedef MultiVecTraits<ScalarType,MV> MVT;
-    typedef OperatorTraits<ScalarType,MV,OP> OPT;
-    typedef Teuchos::ScalarTraits<ScalarType> SCT;
-    typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
-    typedef Teuchos::ScalarTraits<MagnitudeType> MT;
+    using MVT = MultiVecTraits<ScalarType, MV>;
+    using OPT = OperatorTraits<ScalarType, MV, OP>;
+    using SCT = Teuchos::ScalarTraits<ScalarType>;
+    using MagnitudeType = typename Teuchos::ScalarTraits<ScalarType>::magnitudeType;
+    using MT = Teuchos::ScalarTraits<MagnitudeType>;
 
   public:
 
@@ -139,7 +138,7 @@ namespace Belos {
                          const Teuchos::RCP<Teuchos::ParameterList> &pl );
 
     //! Destructor.
-    virtual ~PseudoBlockCGSolMgr() {};
+    virtual ~PseudoBlockCGSolMgr() = default;
 
     //! clone for Inverted Injection (DII)
     Teuchos::RCP<SolverManager<ScalarType, MV, OP> > clone () const override {
@@ -315,6 +314,8 @@ namespace Belos {
     ScalarType condEstimate_;
     Teuchos::ArrayRCP<MagnitudeType> eigenEstimates_;
 
+    Teuchos::RCP<CGIterationStateBase<ScalarType, MV> > state_;
+
     // Timers.
     std::string label_;
     Teuchos::RCP<Teuchos::Time> timerSolve_;
@@ -523,8 +524,8 @@ setParameters (const Teuchos::RCP<Teuchos::ParameterList>& params)
   }
 
   // Convergence
-  typedef Belos::StatusTestCombo<ScalarType,MV,OP> StatusTestCombo_t;
-  typedef Belos::StatusTestGenResNorm<ScalarType,MV,OP> StatusTestResNorm_t;
+  using StatusTestCombo_t = Belos::StatusTestCombo<ScalarType, MV, OP>;
+  using StatusTestResNorm_t = Belos::StatusTestGenResNorm<ScalarType, MV, OP>;
 
   // Check for convergence tolerance
   if (params->isParameter ("Convergence Tolerance")) {
@@ -761,9 +762,13 @@ ReturnType PseudoBlockCGSolMgr<ScalarType,MV,OP,true>::solve ()
               foldConvergenceDetectionIntoAllreduce_);
     block_cg_iter =
       Teuchos::rcp (new CGIter<ScalarType,MV,OP> (problem_, printer_, outputTest_, convTest_, plist));
+    if (state_.is_null() || Teuchos::rcp_dynamic_cast<CGIterationState<ScalarType, MV> >(state_).is_null())
+      state_ = Teuchos::rcp(new CGIterationState<ScalarType, MV>());
   } else {
     block_cg_iter =
       Teuchos::rcp (new PseudoBlockCGIter<ScalarType,MV,OP> (problem_, printer_, outputTest_, plist));
+    if (state_.is_null() || Teuchos::rcp_dynamic_cast<PseudoBlockCGIterationState<ScalarType, MV> >(state_).is_null())
+      state_ = Teuchos::rcp(new PseudoBlockCGIterationState<ScalarType, MV>());
   }
 
   // Setup condition estimate
@@ -793,11 +798,9 @@ ReturnType PseudoBlockCGSolMgr<ScalarType,MV,OP,true>::solve ()
       Teuchos::RCP<MV> R_0 = MVT::CloneViewNonConst( *(Teuchos::rcp_const_cast<MV>(problem_->getInitResVec())), currIdx );
 
       // Get a new state struct and initialize the solver.
-      CGIterationState<ScalarType,MV> newState;
-      newState.R = R_0;
-      block_cg_iter->initializeCG(newState);
+      block_cg_iter->initializeCG(state_, R_0);
 
-      while(1) {
+      while(true) {
 
         // tell block_gmres_iter to iterate
         try {
@@ -813,7 +816,7 @@ ReturnType PseudoBlockCGSolMgr<ScalarType,MV,OP,true>::solve ()
 
             // Figure out which linear systems converged.
             std::vector<int> convIdx = Teuchos::rcp_dynamic_cast<StatusTestGenResNorm<ScalarType,MV,OP> >(convTest_)->convIndices();
- 
+
             // If the number of converged linear systems is equal to the
             // number of current linear systems, then we are done with this block.
             if (convIdx.size() == currRHSIdx.size())
@@ -850,7 +853,7 @@ ReturnType PseudoBlockCGSolMgr<ScalarType,MV,OP,true>::solve ()
               compute_condnum_tridiag_sym(diag,offdiag,eigenEstimates_,l_min,l_max,condEstimate_);
 
               // Make sure not to do more condition estimate computations for this solve.
-              block_cg_iter->setDoCondEst(false); 
+              block_cg_iter->setDoCondEst(false);
               condEstPerf = true;
             }
 
@@ -863,9 +866,7 @@ ReturnType PseudoBlockCGSolMgr<ScalarType,MV,OP,true>::solve ()
             for (int i=0; i<have; ++i) { currIdx2[i] = i; }
 
             // Set the new state and initialize the solver.
-            CGIterationState<ScalarType,MV> defstate;
-            defstate.R = R_0;
-            block_cg_iter->initializeCG(defstate);
+            block_cg_iter->initializeCG(state_, R_0);
           }
 
           ////////////////////////////////////////////////////////////////////////////////////
@@ -896,7 +897,7 @@ ReturnType PseudoBlockCGSolMgr<ScalarType,MV,OP,true>::solve ()
           achievedTol_ = MT::one();
           Teuchos::RCP<MV> X = problem_->getLHS();
           MVT::MvInit( *X, SCT::zero() );
-          printer_->stream(Warnings) << "Belos::PseudoBlockCGSolMgr::solve(): Warning! NaN has been detected!" 
+          printer_->stream(Warnings) << "Belos::PseudoBlockCGSolMgr::solve(): Warning! NaN has been detected!"
                                      << std::endl;
           return Unconverged;
         }
@@ -993,7 +994,7 @@ compute_condnum_tridiag_sym (Teuchos::ArrayView<MagnitudeType> diag,
                              ScalarType & lambda_max,
                              ScalarType & ConditionNumber )
 {
-  typedef Teuchos::ScalarTraits<ScalarType> STS;
+  using STS = Teuchos::ScalarTraits<ScalarType>;
 
   /* Copied from az_cg.c: compute_condnum_tridiag_sym */
   /* diag ==      ScalarType vector of size N, containing the diagonal
diff --git a/packages/epetra/doc/Doxyfile.options b/packages/epetra/doc/Doxyfile.options
index 1f68050de429..297acda9b8a7 100755
--- a/packages/epetra/doc/Doxyfile.options
+++ b/packages/epetra/doc/Doxyfile.options
@@ -2,7 +2,7 @@
 # Include the global look and feel options
 #
 @INCLUDE_PATH          = $(TRILINOS_HOME)/packages
-@INCLUDE               = common/Doxyfile
+@INCLUDE               = ../../common/Doxyfile
 #
 # Package options
 #
diff --git a/packages/epetra/doc/DoxyfileWeb b/packages/epetra/doc/DoxyfileWeb
index 4bb390446b34..40aca7bff4ce 100755
--- a/packages/epetra/doc/DoxyfileWeb
+++ b/packages/epetra/doc/DoxyfileWeb
@@ -51,6 +51,12 @@ INPUT                  = ../src ../../aztecoo/src index.doc \
 
 FILE_PATTERNS          = E*.h
 
-EXAMPLE_PATH           = ../example/petra_power_method
+EXAMPLE_PATH           = ../example/petra_power_method/cxx_main.cpp \
+                         ../example/Lessons/Lesson01-Init \
+                         ../example/Lessons/Lesson02-Map-Vector \
+                         ../example/Lessons/Lesson03-Power-Method \
+                         ../example/Lessons/Lesson04-Sparse-Matrix-Fill \
+                         ../example/Lessons/Lesson05-Redistribution
+
 
 GENERATE_TAGFILE       = ../../common/tag_files/epetra.tag
diff --git a/packages/epetra/doc/build_docs b/packages/epetra/doc/build_docs
index 05a274ca6d5c..308544c20457 100755
--- a/packages/epetra/doc/build_docs
+++ b/packages/epetra/doc/build_docs
@@ -13,14 +13,10 @@ echo
 
 doxygen DoxyfileWeb
 
-echo
-echo "Generating epetra/thyra doxygen documentation ..."
-echo
-
-doxygen ../thyra/doc/Doxyfile
-
-echo
-echo "Generating doxygen browser documentation for all of epetra ..."
-echo
+# --  Uncomment below if you want 
+# --  "Epetra Package Browser (Single Doxygen Collection)"
+#echo
+#echo "Generating doxygen browser documentation for all of epetra ..."
+#echo
 
-doxygen ../browser/doc/Doxyfile
+#doxygen ../browser/doc/Doxyfile
diff --git a/packages/epetra/src/Epetra_BasicRowMatrix.h b/packages/epetra/src/Epetra_BasicRowMatrix.h
index 1043f234fd76..6f98dddeda74 100644
--- a/packages/epetra/src/Epetra_BasicRowMatrix.h
+++ b/packages/epetra/src/Epetra_BasicRowMatrix.h
@@ -323,7 +323,7 @@ class EPETRA_LIB_DLL_EXPORT Epetra_BasicRowMatrix: public Epetra_CompObject, pub
     /*! @brief Returns the infinity norm of the global matrix.
 
        Returns the quantity \f$ \| A \|_\infty\f$ such that
-       \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f].
+       \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f].
 
      @warning This method is supported if and only if the Epetra_RowMatrix Object that was used to create this supports this method.
 
@@ -333,7 +333,7 @@ class EPETRA_LIB_DLL_EXPORT Epetra_BasicRowMatrix: public Epetra_CompObject, pub
     /*! @brief Returns the one norm of the global matrix.
 
        Returns the quantity \f$ \| A \|_1\f$ such that
-       \f[\| A \|_1= \max_{1\lej\len} \sum_{i=1}^m |a_{ij}| \f].
+       \f[\| A \|_1= \max_{1\leq j\leq n} \sum_{i=1}^m |a_{ij}| \f].
 
      @warning This method is supported if and only if the Epetra_RowMatrix Object that was used to create this supports this method.
 
diff --git a/packages/epetra/src/Epetra_CrsMatrix.h b/packages/epetra/src/Epetra_CrsMatrix.h
index 9393ecdf4f62..a31a328e7382 100644
--- a/packages/epetra/src/Epetra_CrsMatrix.h
+++ b/packages/epetra/src/Epetra_CrsMatrix.h
@@ -1042,14 +1042,14 @@ or if the number of entries in this row exceed the Length parameter.
 
   //! Returns the infinity norm of the global matrix.
   /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-     \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f]
+     \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f]
      \warning The NormInf() method will not properly calculate the infinity norm for a matrix that has entries that are
      replicated on multiple processors.  */
   double NormInf() const;
 
   //! Returns the one norm of the global matrix.
   /* Returns the quantity \f$ \| A \|_1\f$ such that
-     \f[\| A \|_1= \max_{1\lej\len} \sum_{i=1}^m |a_{ij}| \f].
+     \f[\| A \|_1= \max_{1\leq j\leq n} \sum_{i=1}^m |a_{ij}| \f].
      \warning The NormOne() method will not properly calculate the one norm for a matrix that has entries that are
      replicated on multiple processors.
   */
diff --git a/packages/epetra/src/Epetra_FastCrsMatrix.h b/packages/epetra/src/Epetra_FastCrsMatrix.h
index 863f054d70c0..dcddb677a726 100644
--- a/packages/epetra/src/Epetra_FastCrsMatrix.h
+++ b/packages/epetra/src/Epetra_FastCrsMatrix.h
@@ -106,7 +106,7 @@ class Epetra_FastCrsOperator: public Epetra_CompObject, public virtual Epetra_Op
 
   //! Returns the infinity norm of the global matrix.
   /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-     \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f].
+     \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f].
   */
   double NormInf() const {return(CrsMatrix_.NormInf());};
 
diff --git a/packages/epetra/src/Epetra_InvOperator.h b/packages/epetra/src/Epetra_InvOperator.h
index 81a24ccafcd5..1f95b082f882 100644
--- a/packages/epetra/src/Epetra_InvOperator.h
+++ b/packages/epetra/src/Epetra_InvOperator.h
@@ -126,7 +126,7 @@ class Epetra_InvOperator: public virtual Epetra_Operator {
 
   //! Returns the infinity norm of the global matrix.
   /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-     \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f].
+     \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f].
 
      \warning This method must not be called unless HasNormInf() returns true.
   */
diff --git a/packages/epetra/src/Epetra_Operator.h b/packages/epetra/src/Epetra_Operator.h
index 0385b5a6019d..1bcccc543aa2 100644
--- a/packages/epetra/src/Epetra_Operator.h
+++ b/packages/epetra/src/Epetra_Operator.h
@@ -121,7 +121,7 @@ class EPETRA_LIB_DLL_EXPORT Epetra_Operator {
 
     //! Returns the infinity norm of the global matrix.
     /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-       \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f].
+       \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f].
 
        \warning This method must not be called unless HasNormInf() returns true.
     */
diff --git a/packages/epetra/src/Epetra_RowMatrix.h b/packages/epetra/src/Epetra_RowMatrix.h
index 0ad7234f928f..6c04107e5ae5 100644
--- a/packages/epetra/src/Epetra_RowMatrix.h
+++ b/packages/epetra/src/Epetra_RowMatrix.h
@@ -218,13 +218,13 @@ class EPETRA_LIB_DLL_EXPORT Epetra_RowMatrix: public virtual Epetra_Operator, pu
 
     //! Returns the infinity norm of the global matrix.
     /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-       \f[\| A \|_\infty = \max_{1\lei\len} \sum_{i=1}^m |a_{ij}| \f].
+       \f[\| A \|_\infty = \max_{1\leq i\leq n} \sum_{i=1}^m |a_{ij}| \f].
     */
     virtual double NormInf() const = 0;
 
     //! Returns the one norm of the global matrix.
     /* Returns the quantity \f$ \| A \|_1\f$ such that
-       \f[\| A \|_1= \max_{1\lej\len} \sum_{j=1}^n |a_{ij}| \f].
+       \f[\| A \|_1= \max_{1\leq j\leq n} \sum_{j=1}^n |a_{ij}| \f].
     */
     virtual double NormOne() const = 0;
 
diff --git a/packages/epetra/src/Epetra_SerialDenseOperator.h b/packages/epetra/src/Epetra_SerialDenseOperator.h
index ce683ef878b6..fe9c1aa905ab 100644
--- a/packages/epetra/src/Epetra_SerialDenseOperator.h
+++ b/packages/epetra/src/Epetra_SerialDenseOperator.h
@@ -117,7 +117,7 @@ class EPETRA_LIB_DLL_EXPORT Epetra_SerialDenseOperator {
 
     //! Returns the infinity norm of the global matrix.
     /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-       \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f].
+       \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f].
 
        \warning This method must not be called unless HasNormInf() returns true.
     */
diff --git a/packages/epetra/src/Epetra_SerialDenseSVD.h b/packages/epetra/src/Epetra_SerialDenseSVD.h
index d887f8cb8528..3c69950bf59f 100644
--- a/packages/epetra/src/Epetra_SerialDenseSVD.h
+++ b/packages/epetra/src/Epetra_SerialDenseSVD.h
@@ -433,7 +433,7 @@ class EPETRA_LIB_DLL_EXPORT Epetra_SerialDenseSVD : public virtual Epetra_Serial
 
     //! Returns the infinity norm of the global matrix.
     /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-       \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f].
+       \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f].
 
        \warning This method must not be called unless HasNormInf() returns true.
     */
diff --git a/packages/epetra/src/Epetra_VbrMatrix.h b/packages/epetra/src/Epetra_VbrMatrix.h
index 41c34309df0b..ac187cde4703 100644
--- a/packages/epetra/src/Epetra_VbrMatrix.h
+++ b/packages/epetra/src/Epetra_VbrMatrix.h
@@ -890,14 +890,14 @@ class EPETRA_LIB_DLL_EXPORT Epetra_VbrMatrix : public Epetra_DistObject,
 
     //! Returns the infinity norm of the global matrix.
     /* Returns the quantity \f$ \| A \|_\infty\f$ such that
-       \f[\| A \|_\infty = \max_{1\lei\lem} \sum_{j=1}^n |a_{ij}| \f].
+       \f[\| A \|_\infty = \max_{1\leq i\leq m} \sum_{j=1}^n |a_{ij}| \f].
      \warning The NormInf() method will not properly calculate the infinity norm for a matrix that has entries that are
      replicated on multiple processors.  */
     double NormInf() const;
 
     //! Returns the one norm of the global matrix.
     /* Returns the quantity \f$ \| A \|_1\f$ such that
-       \f[\| A \|_1 = \max_{1\lej\len} \sum_{i=1}^m |a_{ij}| \f].
+       \f[\| A \|_1 = \max_{1\leq j\leq n} \sum_{i=1}^m |a_{ij}| \f].
      \warning The NormOne() method will not properly calculate the one norm for a matrix that has entries that are
     */
     double NormOne() const;
diff --git a/packages/ifpack2/example/CMakeLists.txt b/packages/ifpack2/example/CMakeLists.txt
index 05f1956173d8..3df5e94665cb 100644
--- a/packages/ifpack2/example/CMakeLists.txt
+++ b/packages/ifpack2/example/CMakeLists.txt
@@ -35,6 +35,38 @@ ASSERT_DEFINED (
   ${PACKAGE_NAME}_ENABLE_Galeri
 )
 
+IF(${PACKAGE_NAME}_ENABLE_Xpetra AND ${PACKAGE_NAME}_ENABLE_Galeri)
+# Correctness test with maximum block size (32)
+# Use a small grid so that GPU memory requirement isn't too large
+# Block TriDi
+TRIBITS_ADD_TEST(
+  BlockTriDiagonalSolver
+  NAME BlockTriDiLargeBlock
+  ARGS "--matrixType=Laplace3D --blockSize=32 --nx=20 --ny=20 --nz=20"
+  COMM serial mpi
+  NUM_MPI_PROCS 1-4
+  STANDARD_PASS_OUTPUT
+)
+# Block TriDi with Schur line splitting
+TRIBITS_ADD_TEST(
+  BlockTriDiagonalSolver
+  NAME BlockTriDiLargeBlockSchur
+  ARGS "--matrixType=Laplace3D --blockSize=32 --nx=20 --ny=20 --nz=20 --sublinesPerLine=1 --sublinesPerLineSchur=2"
+  COMM serial mpi
+  NUM_MPI_PROCS 1-4
+  STANDARD_PASS_OUTPUT
+)
+# Block Jacobi
+TRIBITS_ADD_TEST(
+  BlockTriDiagonalSolver
+  NAME BlockTriDiLargeBlockJacobi
+  ARGS "--matrixType=Laplace3D --blockSize=32 --nx=20 --ny=20 --nz=20 --sublinesPerLine=-1"
+  COMM serial mpi
+  NUM_MPI_PROCS 1-4
+  STANDARD_PASS_OUTPUT
+)
+ENDIF()
+
 IF(${PACKAGE_NAME}_ENABLE_Xpetra AND ${PACKAGE_NAME}_ENABLE_Galeri)
 
   set(blockSize 11)
@@ -87,4 +119,4 @@ IF(${PACKAGE_NAME}_ENABLE_Xpetra AND ${PACKAGE_NAME}_ENABLE_Galeri)
       ENDWHILE()
     ENDIF()
   endforeach()
-ENDIF()
\ No newline at end of file
+ENDIF()
diff --git a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp
index 256400e1470f..e5dd69c1a3d0 100644
--- a/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp
+++ b/packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp
@@ -2750,7 +2750,7 @@ namespace Ifpack2 {
 */
       Kokkos::Experimental::local_deep_copy(member, view1, view2);
     }
-    template<typename MatrixType>
+    template<typename MatrixType, int ScratchLevel>
     struct ExtractAndFactorizeTridiags {
     public:
       using impl_type = BlockHelperDetails::ImplType<MatrixType>;
@@ -2785,6 +2785,8 @@ namespace Ifpack2 {
       using internal_vector_type = typename impl_type::internal_vector_type;
       static constexpr int vector_length = impl_type::vector_length;
       static constexpr int internal_vector_length = impl_type::internal_vector_length;
+      static_assert(vector_length >= internal_vector_length, "Ifpack2 BlockTriDi Numeric: vector_length must be at least as large as internal_vector_length");
+      static_assert(vector_length % internal_vector_length == 0, "Ifpack2 BlockTriDi Numeric: vector_length must be divisible by internal_vector_length");
 
       /// team policy member type
       using team_policy_type = Kokkos::TeamPolicy<execution_space>;
@@ -2812,7 +2814,6 @@ namespace Ifpack2 {
       // diagonal safety
       const magnitude_type tiny;
       const local_ordinal_type vector_loop_size;
-      const local_ordinal_type vector_length_value;
 
       bool hasBlockCrsMatrix;
 
@@ -2873,8 +2874,7 @@ namespace Ifpack2 {
         blocksize_square(blocksize*blocksize),
         // diagonal weight to avoid zero pivots
         tiny(tiny_),
-        vector_loop_size(vector_length/internal_vector_length),
-        vector_length_value(vector_length) {
+        vector_loop_size(vector_length/internal_vector_length) {
           using crs_matrix_type = typename impl_type::tpetra_crs_matrix_type;
           using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type;
 
@@ -3191,7 +3191,7 @@ namespace Ifpack2 {
         const local_ordinal_type nrows = partptr_sub(subpartidx,1) - partptr_sub(subpartidx,0);
 
         internal_vector_scratch_type_3d_view
-          WW(member.team_scratch(0), blocksize, blocksize, vector_loop_size);
+          WW(member.team_scratch(ScratchLevel), blocksize, blocksize, vector_loop_size);
 
 #ifdef IFPACK2_BLOCKTRIDICONTAINER_USE_PRINTF
         printf("rank = %d, i0 = %d, npacks = %d, nrows = %d, packidx = %d, subpartidx = %d, partidx = %d, local_subpartidx = %d;\n", member.league_rank(), i0, npacks, nrows, packidx, subpartidx, partidx, local_subpartidx);
@@ -3294,7 +3294,7 @@ namespace Ifpack2 {
         (void) npacks;
 
         internal_vector_scratch_type_3d_view
-          WW(member.team_scratch(0), blocksize, num_vectors, vector_loop_size);
+          WW(member.team_scratch(ScratchLevel), blocksize, num_vectors, vector_loop_size);
         if (local_subpartidx == 0) {
           Kokkos::parallel_for
             (Kokkos::ThreadVectorRange(member, vector_loop_size),[&](const int &v) {
@@ -3334,9 +3334,6 @@ namespace Ifpack2 {
         //const local_ordinal_type r0 = part2packrowidx0_sub(partidx,local_subpartidx);
         //const local_ordinal_type nrows = partptr_sub(subpartidx,1) - partptr_sub(subpartidx,0);
 
-        internal_vector_scratch_type_3d_view
-          WW(member.team_scratch(0), blocksize, blocksize, vector_loop_size);
-
         // Compute S = D - C E
 
         const local_ordinal_type local_subpartidx_schur = (local_subpartidx-1)/2;
@@ -3440,7 +3437,7 @@ namespace Ifpack2 {
         const local_ordinal_type nrows = 2*(pack_td_ptr_schur.extent(1)-1);
 
         internal_vector_scratch_type_3d_view
-          WW(member.team_scratch(0), blocksize, blocksize, vector_loop_size);
+          WW(member.team_scratch(ScratchLevel), blocksize, blocksize, vector_loop_size);
         
 #ifdef IFPACK2_BLOCKTRIDICONTAINER_USE_PRINTF
         printf("FactorizeSchurTag rank = %d, i0 = %d, nrows = %d, vector_loop_size = %d;\n", member.league_rank(), i0, nrows, vector_loop_size);
@@ -3477,7 +3474,7 @@ namespace Ifpack2 {
           const local_ordinal_type n_parts = part2packrowidx0_sub.extent(0);
           writeBTDValuesToFile(n_parts, scalar_values, "before.mm");
 
-          policy.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch));
+          policy.set_scratch_size(ScratchLevel, Kokkos::PerTeam(per_team_scratch));
           Kokkos::parallel_for("ExtractAndFactorize::TeamPolicy::run<ExtractAndFactorizeSubLineTag>",
                               policy, *this);
           execution_space().fence();
@@ -3504,7 +3501,7 @@ namespace Ifpack2 {
               Kokkos::TeamPolicy<execution_space,ExtractBCDTag>
                 policy(packindices_schur.extent(0)*packindices_schur.extent(1), team_size, vector_loop_size);
 
-              policy.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch));
+              policy.set_scratch_size(ScratchLevel, Kokkos::PerTeam(per_team_scratch));
               Kokkos::parallel_for("ExtractAndFactorize::TeamPolicy::run<ExtractBCDTag>",
                                   policy, *this);
               execution_space().fence();
@@ -3523,7 +3520,7 @@ namespace Ifpack2 {
               Kokkos::TeamPolicy<execution_space,ComputeETag>
                 policy(packindices_sub.extent(0), team_size, vector_loop_size);
 
-              policy.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch));
+              policy.set_scratch_size(ScratchLevel, Kokkos::PerTeam(per_team_scratch));
               Kokkos::parallel_for("ExtractAndFactorize::TeamPolicy::run<ComputeETag>",
                                   policy, *this);
               execution_space().fence();
@@ -3544,7 +3541,6 @@ namespace Ifpack2 {
             Kokkos::TeamPolicy<execution_space,ComputeSchurTag>
               policy(packindices_schur.extent(0)*packindices_schur.extent(1), team_size, vector_loop_size);
 
-            policy.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch));
             Kokkos::parallel_for("ExtractAndFactorize::TeamPolicy::run<ComputeSchurTag>",
                                 policy, *this);
             writeBTDValuesToFile(part2packrowidx0_sub.extent(0), scalar_values_schur, "after_schur.mm");
@@ -3561,7 +3557,7 @@ namespace Ifpack2 {
             IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase::FactorizeSchurTag", FactorizeSchurTag0);
             Kokkos::TeamPolicy<execution_space,FactorizeSchurTag>
               policy(packindices_schur.extent(0), team_size, vector_loop_size);
-            policy.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch));
+            policy.set_scratch_size(ScratchLevel, Kokkos::PerTeam(per_team_scratch));
             Kokkos::parallel_for("ExtractAndFactorize::TeamPolicy::run<FactorizeSchurTag>",
                                 policy, *this);
             execution_space().fence();
@@ -3587,9 +3583,29 @@ namespace Ifpack2 {
                         const BlockHelperDetails::PartInterface<MatrixType> &interf,
                         BlockTridiags<MatrixType> &btdm,
                         const typename BlockHelperDetails::ImplType<MatrixType>::magnitude_type tiny) {
+      using impl_type = BlockHelperDetails::ImplType<MatrixType>;
+      using execution_space = typename impl_type::execution_space;
+      using team_policy_type = Kokkos::TeamPolicy<execution_space>;
+      using internal_vector_scratch_type_3d_view = Scratch<typename impl_type::internal_vector_type_3d_view>;
+
       IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NumericPhase", NumericPhase);
-      ExtractAndFactorizeTridiags<MatrixType> function(btdm, interf, A, G, tiny);
-      function.run();
+
+      int blocksize = btdm.values.extent(1);
+      // Both Kokkos policy vector length and SIMD type vector length are hardcoded in KokkosBatched.
+      // For large block sizes, have to fall back to level 1 scratch.
+      int scratch_required = internal_vector_scratch_type_3d_view::shmem_size(blocksize, blocksize, impl_type::vector_length / impl_type::internal_vector_length);
+      int max_scratch = team_policy_type::scratch_size_max(0);
+
+      if(scratch_required < max_scratch) {
+        // Can use level 0 scratch
+        ExtractAndFactorizeTridiags<MatrixType, 0> function(btdm, interf, A, G, tiny);
+        function.run();
+      }
+      else {
+        // Not enough level 0 scratch, so fall back to level 1
+        ExtractAndFactorizeTridiags<MatrixType, 1> function(btdm, interf, A, G, tiny);
+        function.run();
+      }
       IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
     }
 
@@ -3654,7 +3670,6 @@ namespace Ifpack2 {
           packed_multivector(pmv) {}
 
       // TODO:: modify this routine similar to the team level functions
-      // inline  ---> FIXME HIP: should not need the KOKKOS_INLINE_FUNCTION below...
       KOKKOS_INLINE_FUNCTION
       void
       operator() (const local_ordinal_type &packidx) const {
diff --git a/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp b/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp
index 7892138358fb..b1130642b67d 100644
--- a/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp
+++ b/packages/muelu/adapters/stratimikos/Thyra_MueLuTpetraQ2Q1PreconditionerFactory_def.hpp
@@ -848,7 +848,7 @@ MueLuTpetraQ2Q1PreconditionerFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>:
   for (int i = 0; i < valA.size(); i++) valB[i] = Teuchos::ScalarTraits<SC>::magnitude(valA[i]);
 
   RCP<Matrix> B       = rcp(new CrsMatrixWrap(A.getRowMap(), A.getColMap(), 0));
-  RCP<CrsMatrix> Bcrs = rcp_dynamic_cast<CrsMatrixWrap>(B)->getCrsMatrix();
+  RCP<CrsMatrix> Bcrs = toCrsMatrix(B);
   Bcrs->setAllValues(iaB, jaB, valB);
   Bcrs->expertStaticFillComplete(A.getDomainMap(), A.getRangeMap());
 
diff --git a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp
index 43648627fae0..9052c2227d8e 100644
--- a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp
+++ b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacianOperator_def.hpp
@@ -42,7 +42,7 @@ ShiftedLaplacianOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
   }
 
   RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tpA =
-      Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraCrs(A);
+      toTpetra(A);
   return tpA->getDomainMap();
 }
 
@@ -63,7 +63,7 @@ ShiftedLaplacianOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
     return Xpetra::toTpetraNonZero(tpbA->getRangeMap());
 
   RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tpA =
-      Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraCrs(A);
+      toTpetra(A);
   return tpA->getRangeMap();
 }
 
diff --git a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp
index d3ca3e3707f2..13de4708e535 100644
--- a/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp
+++ b/packages/muelu/adapters/tpetra/MueLu_ShiftedLaplacian_def.hpp
@@ -101,7 +101,7 @@ template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
 void ShiftedLaplacian<Scalar, LocalOrdinal, GlobalOrdinal, Node>::setProblemMatrix(RCP<Matrix>& A) {
   A_ = A;
   if (A_ != Teuchos::null)
-    TpetraA_ = Utilities::Op2NonConstTpetraCrs(A_);
+    TpetraA_ = toTpetra(A_);
 #ifdef HAVE_MUELU_TPETRA_INST_INT_INT
   if (LinearProblem_ != Teuchos::null)
     LinearProblem_->setOperator(TpetraA_);
diff --git a/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp b/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp
index c3a6b969a296..b44846898e33 100644
--- a/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp
+++ b/packages/muelu/example/advanced/blockcrs/BlockCrs.cpp
@@ -281,7 +281,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg
       std::string matrixType = galeriParameters.GetMatrixType();
       RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Axp;
       MueLuExamples::generate_user_matrix_and_nullspace<Scalar, LocalOrdinal, GlobalOrdinal, Node>(matrixType, lib, galeriList, comm, Axp, nullspace);
-      Acrs = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstTpetraCrs(Axp);
+      Acrs = toTpetra(Axp);
     }
     // Block this bad boy
     Ablock = Tpetra::convertToBlockCrsMatrix<SC, LO, GO, NO>(*Acrs, blocksize);
diff --git a/packages/muelu/research/max/AdditiveMG/MultiplicativeBricks.cpp b/packages/muelu/research/max/AdditiveMG/MultiplicativeBricks.cpp
index fb661f734451..6fffe13f5a58 100644
--- a/packages/muelu/research/max/AdditiveMG/MultiplicativeBricks.cpp
+++ b/packages/muelu/research/max/AdditiveMG/MultiplicativeBricks.cpp
@@ -195,8 +195,8 @@ int main(int argc, char* argv[]) {
   RCP<ADRXpetraProblem> Pr = ADR::Xpetra::BuildProblem<scalar_type, local_ordinal_type, global_ordinal_type, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList());
   RCP<Matrix> xpetraA      = Pr->BuildMatrix();
 
-  RCP<crs_matrix_type> A         = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA);
-  RCP<const driver_map_type> map = MueLuUtilities::Map2TpetraMap(*xpetraMap);
+  RCP<crs_matrix_type> A         = toTpetra(xpetraA);
+  RCP<const driver_map_type> map = toTpetra(xpetraMap);
 
   // ===================================================
   // 	Domain Decomposition Preconditioner
@@ -228,7 +228,7 @@ int main(int argc, char* argv[]) {
   // 	===================================
 
   RCP<muelu_tpetra_operator_type> M;
-  M = MueLu::CreateTpetraPreconditioner((RCP<operator_type>)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates));
+  M = MueLu::CreateTpetraPreconditioner((RCP<operator_type>)A, mueluParams, toTpetra(coordinates));
 
   RCP<multivector_type> X_muelu = rcp(new multivector_type(map, 1));
   RCP<multivector_type> B       = rcp(new multivector_type(map, 1));
diff --git a/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp b/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp
index c6282f6536b8..9da027918017 100644
--- a/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp
+++ b/packages/muelu/research/max/AdditiveMG/MultiplicativeStride.cpp
@@ -187,11 +187,11 @@ int main(int argc, char *argv[]) {
   RCP<ADRXpetraProblem> Pr = ADR::Xpetra::BuildProblem<scalar_type, local_ordinal_type, global_ordinal_type, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList());
   RCP<Matrix> xpetraA      = Pr->BuildMatrix();
 
-  RCP<crs_matrix_type> A         = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA);
-  RCP<const driver_map_type> map = MueLuUtilities::Map2TpetraMap(*xpetraMap);
+  RCP<crs_matrix_type> A         = MueLutoTpetra(xpetraA);
+  RCP<const driver_map_type> map = toTpetra(xpetraMap);
 
   // Construct a multigrid preconditioner
-  RCP<muelu_tpetra_operator_type> M = MueLu::CreateTpetraPreconditioner((RCP<operator_type>)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates));
+  RCP<muelu_tpetra_operator_type> M = MueLu::CreateTpetraPreconditioner((RCP<operator_type>)A, mueluParams, toTpetra(coordinates));
 
   RCP<multivector_type> X = rcp(new multivector_type(map, 1));
   RCP<multivector_type> B = rcp(new multivector_type(map, 1));
diff --git a/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp b/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp
index 21bb302a06de..ddb10e6fa9a4 100644
--- a/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp
+++ b/packages/muelu/research/max/AdditiveMG/Repartition_ADR.cpp
@@ -197,14 +197,14 @@ int main(int argc, char *argv[]) {
   RCP<ADRXpetraProblem> Pr = ADR::Xpetra::BuildProblem<scalar_type, local_ordinal_type, global_ordinal_type, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList());
   RCP<Matrix> xpetraA      = Pr->BuildMatrix();
 
-  RCP<crs_matrix_type> A         = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA);
-  RCP<const driver_map_type> map = MueLuUtilities::Map2TpetraMap(*xpetraMap);
+  RCP<crs_matrix_type> A = toTpetra(xpetraA);
+  RCP<const driver_map_type> toTpetra(xpetraMap);
 
   //
   // Construct a multigrid preconditioner
   //
 
-  RCP<muelu_tpetra_operator_type> M = MueLu::CreateTpetraPreconditioner((RCP<operator_type>)A, mueluParams, Utilities::MV2NonConstTpetraMV(coordinates));
+  RCP<muelu_tpetra_operator_type> M = MueLu::CreateTpetraPreconditioner((RCP<operator_type>)A, mueluParams, toTpetra(coordinates));
 
   RCP<multivector_type> X = rcp(new multivector_type(map, 1));
   RCP<multivector_type> B = rcp(new multivector_type(map, 1));
diff --git a/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp b/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp
index c9a339e80194..027a1d4db8c2 100644
--- a/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp
+++ b/packages/muelu/research/max/AdditiveMG/Smooth_Prolongation.cpp
@@ -133,8 +133,8 @@ coarse_file_sublist.set("R", "{1}");*/
   if (L->IsAvailable("R"))
     restr = L->template Get<RCP<Xpetra::Matrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>>>("R");
 
-  RCP<crs_matrix_type> tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong);
-  RCP<crs_matrix_type> tpetra_restr   = MueLuUtilities::Op2NonConstTpetraCrs(restr);
+  RCP<crs_matrix_type> tpetra_prolong = toTpetra(prolong);
+  RCP<crs_matrix_type> tpetra_restr   = toTpetra(restr);
 
   int mypid = GlobalComm_->getRank();
   GlobalComm_->barrier();
diff --git a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp
index 3c1e7ee2f743..d6e59a54f1c0 100644
--- a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp
+++ b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveBricks.cpp
@@ -201,8 +201,8 @@ int main(int argc, char* argv[]) {
   RCP<ADRXpetraProblem> Pr = ADR::Xpetra::BuildProblem<scalar_type, local_ordinal_type, global_ordinal_type, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList());
   RCP<Matrix> xpetraA      = Pr->BuildMatrix();
 
-  RCP<crs_matrix_type> A         = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA);
-  RCP<const driver_map_type> map = MueLuUtilities::Map2TpetraMap(*xpetraMap);
+  RCP<crs_matrix_type> A         = toTpetra(xpetraA);
+  RCP<const driver_map_type> map = toTpetra(xpetraMap);
 
   // ===================================================
   // 	Domain Decomposition Preconditioner
@@ -256,8 +256,8 @@ int main(int argc, char* argv[]) {
   if (L->IsAvailable("R"))
     restr = L->template Get<RCP<Xpetra::Matrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>>>("R");
 
-  RCP<crs_matrix_type> tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong);
-  RCP<crs_matrix_type> tpetra_restr   = MueLuUtilities::Op2NonConstTpetraCrs(restr);
+  RCP<crs_matrix_type> tpetra_prolong = toTpetra(prolong);
+  RCP<crs_matrix_type> tpetra_restr   = toTpetra(restr);
 
 #include <Teuchos_TimeMonitor.hpp>
   RCP<Teuchos::Time> PbarSetUp = Teuchos::TimeMonitor::getNewCounter("Pbar: SetUp");
diff --git a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp
index 3d0af82af399..efffaf9b1c26 100644
--- a/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp
+++ b/packages/muelu/research/max/AdditiveMG/SmoothedAdditiveStride.cpp
@@ -190,8 +190,8 @@ int main(int argc, char* argv[]) {
   RCP<ADRXpetraProblem> Pr = ADR::Xpetra::BuildProblem<scalar_type, local_ordinal_type, global_ordinal_type, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList());
   RCP<Matrix> xpetraA      = Pr->BuildMatrix();
 
-  RCP<crs_matrix_type> A         = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA);
-  RCP<const driver_map_type> map = MueLuUtilities::Map2TpetraMap(*xpetraMap);
+  RCP<crs_matrix_type> A         = toTpetra(xpetraA);
+  RCP<const driver_map_type> map = toTpetra(xpetraMap);
 
   // ===================================================
   // 	Domain Decomposition Preconditioner
@@ -244,8 +244,8 @@ int main(int argc, char* argv[]) {
   if (L->IsAvailable("R"))
     restr = L->template Get<RCP<Xpetra::Matrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>>>("R");
 
-  RCP<crs_matrix_type> tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong);
-  RCP<crs_matrix_type> tpetra_restr   = MueLuUtilities::Op2NonConstTpetraCrs(restr);
+  RCP<crs_matrix_type> tpetra_prolong = toTpetra(prolong);
+  RCP<crs_matrix_type> tpetra_restr   = toTpetra(restr);
 
   Tpetra::MatrixMarket::Writer<crs_matrix_type>::writeSparseFile("P.mtx", tpetra_prolong);  // Auxiliary prints introduced to generate pictures
 
diff --git a/packages/muelu/research/max/AdditiveMG/tentative.cpp b/packages/muelu/research/max/AdditiveMG/tentative.cpp
index 96e916654d05..1ad1a2ab6096 100644
--- a/packages/muelu/research/max/AdditiveMG/tentative.cpp
+++ b/packages/muelu/research/max/AdditiveMG/tentative.cpp
@@ -200,8 +200,8 @@ int main(int argc, char* argv[]) {
   RCP<ADRXpetraProblem> Pr = ADR::Xpetra::BuildProblem<scalar_type, local_ordinal_type, global_ordinal_type, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), xpetraMap, matrixParameters.GetParameterList());
   RCP<Matrix> xpetraA      = Pr->BuildMatrix();
 
-  RCP<crs_matrix_type> A         = MueLuUtilities::Op2NonConstTpetraCrs(xpetraA);
-  RCP<const driver_map_type> map = MueLuUtilities::Map2TpetraMap(*xpetraMap);
+  RCP<crs_matrix_type> A         = toTpetra(xpetraA);
+  RCP<const driver_map_type> map = toTpetra(xpetraMap);
 
   // ===================================================
   // 	Domain Decomposition Preconditioner
@@ -252,8 +252,8 @@ int main(int argc, char* argv[]) {
   if (L->IsAvailable("R"))
     restr = L->template Get<RCP<Xpetra::Matrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>>>("R");
 
-  RCP<crs_matrix_type> tpetra_prolong = MueLuUtilities::Op2NonConstTpetraCrs(prolong);
-  RCP<crs_matrix_type> tpetra_restr   = MueLuUtilities::Op2NonConstTpetraCrs(restr);
+  RCP<crs_matrix_type> tpetra_prolong = toTpetra(prolong);
+  RCP<crs_matrix_type> tpetra_restr   = toTpetra(restr);
 
 #include <Teuchos_TimeMonitor.hpp>
   RCP<Teuchos::Time> PbarSetUp = Teuchos::TimeMonitor::getNewCounter("Pbar: SetUp");
diff --git a/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp b/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp
index f196d78e65f7..99c353528ac1 100644
--- a/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp
+++ b/packages/muelu/research/max/XpetraSplitting/Xpetra_MatrixSplitting.hpp
@@ -26,6 +26,7 @@
 
 // Ifpack2
 #include "Ifpack2_OverlappingRowMatrix_def.hpp"
+#include "Xpetra_TpetraMultiVector_decl.hpp"
 
 // MueLu
 #include <MueLu_Utilities.hpp>
@@ -621,7 +622,7 @@ class MatrixSplitting : public Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
     TEUCHOS_TEST_FOR_EXCEPTION(num_total_regions_ != regionMatrixData_.size(), Exceptions::RuntimeError, "Number of regions does not match with the size of regionMatrixData_ structure \n");
     RCP<Matrix> region_matrix = regionMatrixData_[region_idx];
 
-    RCP<tpetra_crs_matrix> tpetraGlobalMatrix = MueLu::Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraCrs(compositeMatrixData_);
+    RCP<tpetra_crs_matrix> tpetraGlobalMatrix = toTpetra(compositeMatrixData_);
     Ifpack2::OverlappingRowMatrix<tpetra_row_matrix> enlargedMatrix(tpetraGlobalMatrix, 2);
 
     region_matrix->resumeFill();
diff --git a/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp b/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp
index 38902739880c..e24ddefe3f0b 100644
--- a/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp
+++ b/packages/muelu/research/q2q1/MueLu_Q2Q1uPFactory.hpp
@@ -590,7 +590,7 @@ void Q2Q1uPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildP(Level& fin
 
     // FIXME: remove magic number 30
     RCP<Matrix> amalgA        = MatrixFactory::Build(nodeMap, nodeMap, 30);
-    RCP<CrsMatrix> amalgA_crs = rcp_dynamic_cast<CrsMatrixWrap>(amalgA)->getCrsMatrix();
+    RCP<CrsMatrix> amalgA_crs = toCrsMatrix(amalgA);
 
     // FIXME: this should be written similar to CoalesceDropFactory Merge
     for (LO row = 0; row < as<LO>(AForPat->getRowMap()->getLocalNumElements()); row += NDim) {
@@ -1665,7 +1665,7 @@ void Q2Q1uPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
   RCP<const Map> coarseMap = MapFactory::Build(rowMap->lib(), Cptlist.size(), rowMap->getIndexBase() + offset, rowMap->getComm());
 
   P                   = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
-  RCP<CrsMatrix> Pcrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> Pcrs = toCrsMatrix(P);
 
   ArrayRCP<size_t> iaP;
   ArrayRCP<LO> jaP;
diff --git a/packages/muelu/research/q2q1/Q2Q1.cpp b/packages/muelu/research/q2q1/Q2Q1.cpp
index d409abb27626..6efe75a7e14e 100644
--- a/packages/muelu/research/q2q1/Q2Q1.cpp
+++ b/packages/muelu/research/q2q1/Q2Q1.cpp
@@ -316,7 +316,7 @@ int main(int argc, char* argv[]) {
     // Cyr and would be Teko operators.
 
     int numElem             = A12->getRangeMap()->getLocalNumElements() + A21->getRangeMap()->getLocalNumElements();
-    RCP<const tMap> fullMap = Utilities::Map2TpetraMap(*(MapFactory::createUniformContigMap(Xpetra::UseTpetra, numElem, comm)));
+    RCP<const tMap> fullMap = toTpetra(MapFactory::createUniformContigMap(Xpetra::UseTpetra, numElem, comm));
 
     RCP<tOperator> A;
     if (!binary)
diff --git a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp
index a4ed28627bd7..4bc6ef1bbca5 100644
--- a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp
+++ b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp
@@ -398,7 +398,7 @@ MakeCompositeDirectSolver(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal
     RCP<TimeMonitor> tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("MakeCompositeDirectSolver: 1 - Setup")));
 
     // convert matrix to Tpetra
-    RCP<Tpetra_CrsMatrix> tMat = Utilities::Op2NonConstTpetraCrs(compOp);
+    RCP<Tpetra_CrsMatrix> tMat = toTpetra(compOp);
 
     // Amesos2-specific key phrase that denote smoother type
     std::string amesos2SolverName = "KLU2";
diff --git a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp
index 44240be30025..b409554a767b 100644
--- a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp
+++ b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp
@@ -194,10 +194,10 @@ void MakeRegionMatrices(const RCP<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, Gl
     regionMats = rcp(new CrsMatrixWrap(revisedRowMap, revisedColMap, 9));
 
     // Extract current region CrsMatrix
-    RCP<CrsMatrix> regionCrsMat = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(regionMats)->getCrsMatrix();
+    RCP<CrsMatrix> regionCrsMat = toCrsMatrix(regionMats);
 
     // Extract current quasi-region CrsMatrix
-    RCP<CrsMatrix> quasiRegionCrsMat = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(quasiRegionMats)->getCrsMatrix();
+    RCP<CrsMatrix> quasiRegionCrsMat = toCrsMatrix(quasiRegionMats);
 
     // Pull out the data from the quasi-region CrsMatrix
     ArrayRCP<const size_t> rowptrQuasiRegion;
@@ -295,7 +295,7 @@ void MakeRegionMatrices(const RCP<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, Gl
   regDiag->update(-SC_ONE, *regCorrection, SC_ONE, *regNspViolation, SC_ONE);
 
   // Extract current region matrix in as CrsMatrix
-  RCP<CrsMatrix> regionCrsMat = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(regionMats)->getCrsMatrix();
+  RCP<CrsMatrix> regionCrsMat = toCrsMatrix(regionMats);
   regionCrsMat->replaceDiag(*regDiag);
 
   tm = Teuchos::null;
@@ -348,10 +348,10 @@ void regionalToComposite(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOr
                                         regMat->getCrsGraph()->getLocalMaxNumRowEntries()));
 
     // Extract current quasi-region CrsMatrix
-    RCP<CrsMatrix> quasiRegionCrsMat = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(quasiRegMat)->getCrsMatrix();
+    RCP<CrsMatrix> quasiRegionCrsMat = toCrsMatrix(quasiRegMat);
 
     // Extract current region CrsMatrix
-    RCP<CrsMatrix> regionCrsMat = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(regMat)->getCrsMatrix();
+    RCP<CrsMatrix> regionCrsMat = toCrsMatrix(regMat);
 
     // Pull out the data from the region CrsMatrix
     Teuchos::ArrayRCP<const size_t> rowptrRegion;
diff --git a/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp b/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp
index db287cf2ebc7..4b6a41ef39aa 100644
--- a/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp
+++ b/packages/muelu/research/regionMG/src/SolveRegionHierarchy_def.hpp
@@ -192,11 +192,11 @@ void MgCycle(const int levelID,  ///< ID of current level
 
         // From here on we switch to Tpetra for simplicity
         // we could also implement a similar Epetra branch
-        using Tpetra_MultiVector = Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
+        using Tpetra_Vector = Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
 
         //    *fos << "Attempting to use Amesos2 to solve the coarse grid problem" << std::endl;
-        RCP<Tpetra_MultiVector> tX       = Utilities::MV2NonConstTpetraMV2(*compX);
-        RCP<const Tpetra_MultiVector> tB = Utilities::MV2TpetraMV(compRhs);
+        RCP<Tpetra_Vector> tX       = toTpetra(compX);
+        RCP<const Tpetra_Vector> tB = toTpetra(compRhs);
 
         /* Solve!
          *
diff --git a/packages/muelu/src/CMakeLists.txt b/packages/muelu/src/CMakeLists.txt
index 6719fb231b1d..927489868522 100644
--- a/packages/muelu/src/CMakeLists.txt
+++ b/packages/muelu/src/CMakeLists.txt
@@ -358,7 +358,6 @@ APPEND_GLOB(HEADERS */*/*/*.hpp)
 ###############################################
 SET(SOURCES ${MUELU_ETI_CPP_SOURCES})
 APPEND_GLOB(SOURCES */*.cpp)
-APPEND_GLOB(SOURCES ${DIR}/Graph/Containers/MueLu_LinkedList.cpp)
 
 #
 # Explicit instantiation
diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp
deleted file mode 100644
index a405ddbb1586..000000000000
--- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-// @HEADER
-// *****************************************************************************
-//        MueLu: A package for multigrid based preconditioning
-//
-// Copyright 2012 NTESS and the MueLu contributors.
-// SPDX-License-Identifier: BSD-3-Clause
-// *****************************************************************************
-// @HEADER
-
-#include <stddef.h>  // for NULL
-#include "MueLu_LinkedList.hpp"
-
-namespace MueLu {
-
-LinkedList::LinkedList()
-  : nodeHead(NULL)
-  , nodeTail(NULL) {}
-
-LinkedList::~LinkedList() {
-  while (nodeHead != NULL)
-    DeleteHead();
-}
-
-bool LinkedList::IsEmpty() {
-  return nodeHead == NULL;
-}
-
-void LinkedList::Add(int iNode) {
-  MueLu_Node *newNode = new MueLu_Node;
-  newNode->nodeId     = iNode;
-  newNode->next       = NULL;
-  if (nodeHead == NULL) {
-    nodeHead = newNode;
-    nodeTail = newNode;
-  } else {
-    nodeTail->next = newNode;
-    nodeTail       = newNode;
-  }
-}
-
-int LinkedList::Pop() {  // get head and remove first node
-  if (IsEmpty()) return -1;
-
-  int iNode = nodeHead->nodeId;
-  DeleteHead();
-  return iNode;
-}
-
-void LinkedList::DeleteHead() {
-  if (IsEmpty()) return;
-
-  MueLu_Node *newNode = nodeHead;
-  nodeHead            = newNode->next;
-  delete newNode;
-}
-
-}  // namespace MueLu
-
-// TODO: nodeTail unused -> remove?
diff --git a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp b/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp
deleted file mode 100644
index d4db3c530f62..000000000000
--- a/packages/muelu/src/Graph/Containers/MueLu_LinkedList.hpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// @HEADER
-// *****************************************************************************
-//        MueLu: A package for multigrid based preconditioning
-//
-// Copyright 2012 NTESS and the MueLu contributors.
-// SPDX-License-Identifier: BSD-3-Clause
-// *****************************************************************************
-// @HEADER
-
-#ifndef MUELU_LINKEDLIST_HPP
-#define MUELU_LINKEDLIST_HPP
-
-/* ------------------------------------------------------------------------- */
-/* linked list structures from ML for holding free node information          */
-/* ------------------------------------------------------------------------- */
-
-namespace MueLu {
-
-typedef struct MueLu_Node_Struct {
-  int nodeId;
-  struct MueLu_Node_Struct *next;
-} MueLu_Node;
-
-class LinkedList {
- public:
-  LinkedList();
-
-  ~LinkedList();
-
-  bool IsEmpty();
-
-  void Add(int iNode);
-
-  int Pop();
-
- private:
-  MueLu_Node *nodeHead;
-  MueLu_Node *nodeTail;
-
-  void DeleteHead();
-};
-
-}  // namespace MueLu
-
-#endif  // MUELU_LINKEDLIST_HPP
diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp
index b23f58e081f4..e07d8e2f214a 100644
--- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp
+++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp
@@ -248,7 +248,7 @@ std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrd
 
   // TODO: We could merge pass 1 and 2.
 
-  auto crsA  = rcp_dynamic_cast<CrsMatrixWrap>(A, true)->getCrsMatrix();
+  auto crsA  = toCrsMatrix(A);
   auto lclA  = crsA->getLocalMatrixDevice();
   auto range = range_type(0, lclA.numRows());
 
@@ -634,6 +634,44 @@ std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrd
               MueLu_runDroppingFunctors(drop_boundaries,
                                         preserve_diagonals,
                                         cut_drop);
+            } else if (distanceLaplacianMetric == "material") {
+              auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
+              if (material->getNumVectors() == 1) {
+                GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
+
+                auto dist2      = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
+                auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
+                auto cut_drop   = CutDrop::CutDropFunctor(comparison, threshold);
+
+                MueLu_runDroppingFunctors(drop_boundaries,
+                                          preserve_diagonals,
+                                          cut_drop);
+              } else {
+                TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
+
+                {
+                  std::stringstream ss;
+                  ss << "material tensor mean =" << std::endl;
+                  size_t k = 0;
+                  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
+                    ss << "   ";
+                    for (size_t j = 0; j < coords->getNumVectors(); ++j) {
+                      ss << material->getVector(k)->meanValue() << " ";
+                      ++k;
+                    }
+                    ss << std::endl;
+                  }
+                  GetOStream(Runtime0) << ss.str();
+                }
+
+                auto dist2      = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
+                auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
+                auto cut_drop   = CutDrop::CutDropFunctor(comparison, threshold);
+
+                MueLu_runDroppingFunctors(drop_boundaries,
+                                          preserve_diagonals,
+                                          cut_drop);
+              }
             }
           } else if (distanceLaplacianAlgoStr == "scaled cut") {
             if (distanceLaplacianMetric == "unweighted") {
@@ -644,6 +682,44 @@ std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrd
               MueLu_runDroppingFunctors(drop_boundaries,
                                         preserve_diagonals,
                                         cut_drop);
+            } else if (distanceLaplacianMetric == "material") {
+              auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
+              if (material->getNumVectors() == 1) {
+                GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
+
+                auto dist2      = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
+                auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
+                auto cut_drop   = CutDrop::CutDropFunctor(comparison, threshold);
+
+                MueLu_runDroppingFunctors(drop_boundaries,
+                                          preserve_diagonals,
+                                          cut_drop);
+              } else {
+                TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
+
+                {
+                  std::stringstream ss;
+                  ss << "material tensor mean =" << std::endl;
+                  size_t k = 0;
+                  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
+                    ss << "   ";
+                    for (size_t j = 0; j < coords->getNumVectors(); ++j) {
+                      ss << material->getVector(k)->meanValue() << " ";
+                      ++k;
+                    }
+                    ss << std::endl;
+                  }
+                  GetOStream(Runtime0) << ss.str();
+                }
+
+                auto dist2      = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
+                auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
+                auto cut_drop   = CutDrop::CutDropFunctor(comparison, threshold);
+
+                MueLu_runDroppingFunctors(drop_boundaries,
+                                          preserve_diagonals,
+                                          cut_drop);
+              }
             }
           } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
             if (distanceLaplacianMetric == "unweighted") {
@@ -654,6 +730,44 @@ std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrd
               MueLu_runDroppingFunctors(drop_boundaries,
                                         preserve_diagonals,
                                         cut_drop);
+            } else if (distanceLaplacianMetric == "material") {
+              auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
+              if (material->getNumVectors() == 1) {
+                GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
+
+                auto dist2      = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
+                auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
+                auto cut_drop   = CutDrop::CutDropFunctor(comparison, threshold);
+
+                MueLu_runDroppingFunctors(drop_boundaries,
+                                          preserve_diagonals,
+                                          cut_drop);
+              } else {
+                TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
+
+                {
+                  std::stringstream ss;
+                  ss << "material tensor mean =" << std::endl;
+                  size_t k = 0;
+                  for (size_t i = 0; i < coords->getNumVectors(); ++i) {
+                    ss << "   ";
+                    for (size_t j = 0; j < coords->getNumVectors(); ++j) {
+                      ss << material->getVector(k)->meanValue() << " ";
+                      ++k;
+                    }
+                    ss << std::endl;
+                  }
+                  GetOStream(Runtime0) << ss.str();
+                }
+
+                auto dist2      = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
+                auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
+                auto cut_drop   = CutDrop::CutDropFunctor(comparison, threshold);
+
+                MueLu_runDroppingFunctors(drop_boundaries,
+                                          preserve_diagonals,
+                                          cut_drop);
+              }
             }
 
             auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
@@ -902,7 +1016,7 @@ std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrd
 
   // TODO: We could merge pass 1 and 2.
 
-  auto crsA  = rcp_dynamic_cast<CrsMatrixWrap>(A, true)->getCrsMatrix();
+  auto crsA  = toCrsMatrix(A);
   auto lclA  = crsA->getLocalMatrixDevice();
   auto range = range_type(0, numNodes);
 
diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp
index 3adc1e6f038f..34ce1c0f96d7 100644
--- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp
+++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_UnsmooshFactory_def.hpp
@@ -85,8 +85,7 @@ void UnsmooshFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fi
   Teuchos::ArrayRCP<const size_t> amalgRowPtr(amalgP->getLocalNumRows());
   Teuchos::ArrayRCP<const LocalOrdinal> amalgCols(amalgP->getLocalNumEntries());
   Teuchos::ArrayRCP<const Scalar> amalgVals(amalgP->getLocalNumEntries());
-  Teuchos::RCP<CrsMatrixWrap> amalgPwrap = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(amalgP);
-  Teuchos::RCP<CrsMatrix> amalgPcrs      = amalgPwrap->getCrsMatrix();
+  Teuchos::RCP<CrsMatrix> amalgPcrs = toCrsMatrix(amalgP);
   amalgPcrs->getAllValues(amalgRowPtr, amalgCols, amalgVals);
 
   // calculate number of dof rows for new prolongator
diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp
index c0f7715e4d63..3951bc7e1e84 100644
--- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp
+++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_VariableDofLaplacianFactory_def.hpp
@@ -161,8 +161,7 @@ void VariableDofLaplacianFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Bui
 
   // start variable dof amalgamation
 
-  Teuchos::RCP<CrsMatrixWrap> Awrap = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(A);
-  Teuchos::RCP<CrsMatrix> Acrs      = Awrap->getCrsMatrix();
+  Teuchos::RCP<CrsMatrix> Acrs = toCrsMatrix(A);
   // Acrs->describe(*fancy, Teuchos::VERB_EXTREME);
 
   size_t nNonZeros = 0;
diff --git a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp
index e7f48a9968ad..c5a5c194eec9 100644
--- a/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp
+++ b/packages/muelu/src/Misc/MueLu_LowPrecisionFactory_def.hpp
@@ -89,7 +89,7 @@ void LowPrecisionFactory<double, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
   RCP<Matrix> A = Get<RCP<Matrix> >(currentLevel, matrixKey);
 
   if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same<Scalar, double>::value) {
-    auto tpA        = rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix();
+    auto tpA        = toTpetra(A);
     auto tpLowA     = tpA->template convert<HalfScalar>();
     auto tpLowOpA   = rcp(new Tpetra::CrsMatrixMultiplyOp<Scalar, HalfScalar, LocalOrdinal, GlobalOrdinal, Node>(tpLowA));
     auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA));
@@ -136,7 +136,7 @@ void LowPrecisionFactory<std::complex<double>, LocalOrdinal, GlobalOrdinal, Node
   RCP<Matrix> A = Get<RCP<Matrix> >(currentLevel, matrixKey);
 
   if ((A->getRowMap()->lib() == Xpetra::UseTpetra) && std::is_same<Scalar, std::complex<double> >::value) {
-    auto tpA        = rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(A)->getCrsMatrix(), true)->getTpetra_CrsMatrix();
+    auto tpA        = toTpetra(A);
     auto tpLowA     = tpA->template convert<HalfScalar>();
     auto tpLowOpA   = rcp(new Tpetra::CrsMatrixMultiplyOp<Scalar, HalfScalar, LocalOrdinal, GlobalOrdinal, Node>(tpLowA));
     auto xpTpLowOpA = rcp(new TpetraOperator(tpLowOpA));
diff --git a/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp
index d6f0ad05c03d..7c48de5fef21 100644
--- a/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp
+++ b/packages/muelu/src/Smoothers/MueLu_Amesos2Smoother_def.hpp
@@ -276,7 +276,7 @@ void Amesos2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(Level& cu
     }
 
     RCP<Matrix> newA       = rcp(new CrsMatrixWrap(rowMap, colMap, 0));
-    RCP<CrsMatrix> newAcrs = rcp_dynamic_cast<CrsMatrixWrap>(newA)->getCrsMatrix();
+    RCP<CrsMatrix> newAcrs = toCrsMatrix(newA);
     newAcrs->setAllValues(newRowPointers, newColIndices, newValues);
     newAcrs->expertStaticFillComplete(A->getDomainMap(), A->getRangeMap(),
                                       importer, A->getCrsGraph()->getExporter());
@@ -287,7 +287,7 @@ void Amesos2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(Level& cu
     factorA = A;
   }
 
-  RCP<Tpetra_CrsMatrix> tA = Utilities::Op2NonConstTpetraCrs(factorA);
+  RCP<const Tpetra_CrsMatrix> tA = toTpetra(factorA);
 
   prec_ = Amesos2::create<Tpetra_CrsMatrix, Tpetra_MultiVector>(type_, tA);
   TEUCHOS_TEST_FOR_EXCEPTION(prec_ == Teuchos::null, Exceptions::RuntimeError, "Amesos2::create returns Teuchos::null");
@@ -311,8 +311,8 @@ void Amesos2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVect
 
   RCP<Tpetra_MultiVector> tX, tB;
   if (!useTransformation_) {
-    tX = Utilities::MV2NonConstTpetraMV2(X);
-    tB = Utilities::MV2NonConstTpetraMV2(const_cast<MultiVector&>(B));
+    tX = toTpetra(Teuchos::rcpFromRef(X));
+    tB = toTpetra(Teuchos::rcpFromRef(const_cast<MultiVector&>(B)));
   } else {
     // Copy data of the original vectors into the transformed ones
     size_t numVectors = X.getNumVectors();
@@ -328,8 +328,8 @@ void Amesos2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVect
       B_data[i] = Bdata[i];
     }
 
-    tX = Utilities::MV2NonConstTpetraMV2(*X_);
-    tB = Utilities::MV2NonConstTpetraMV2(*B_);
+    tX = toTpetra(X_);
+    tB = toTpetra(B_);
   }
 
   prec_->setX(tX);
diff --git a/packages/muelu/src/Smoothers/MueLu_BelosSmoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_BelosSmoother_def.hpp
index c3a2f7a85391..d24ef63ada50 100644
--- a/packages/muelu/src/Smoothers/MueLu_BelosSmoother_def.hpp
+++ b/packages/muelu/src/Smoothers/MueLu_BelosSmoother_def.hpp
@@ -84,8 +84,8 @@ void BelosSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::SetupBelos(Level&
   bool useTpetra = A_->getRowMap()->lib() == Xpetra::UseTpetra;
 
   if (useTpetra) {
-    tBelosProblem_ = rcp(new Belos::LinearProblem<Scalar, tMV, tOP>());
-    RCP<tOP> tA    = Utilities::Op2NonConstTpetraCrs(A_);
+    tBelosProblem_    = rcp(new Belos::LinearProblem<Scalar, tMV, tOP>());
+    RCP<const tOP> tA = toTpetra(A_);
     tBelosProblem_->setOperator(tA);
 
     Belos::SolverFactory<SC, tMV, tOP> solverFactory;
@@ -104,8 +104,8 @@ void BelosSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector
     if (InitialGuessIsZero) {
       X.putScalar(0.0);
 
-      RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpX       = rcpFromRef(Utilities::MV2NonConstTpetraMV(X));
-      RCP<const Tpetra::MultiVector<SC, LO, GO, NO> > tpB = rcpFromRef(Utilities::MV2TpetraMV(B));
+      RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpX       = toTpetra(rcpFromRef(X));
+      RCP<const Tpetra::MultiVector<SC, LO, GO, NO> > tpB = toTpetra(rcpFromRef(B));
 
       tBelosProblem_->setInitResVec(tpB);
       tBelosProblem_->setProblem(tpX, tpB);
@@ -116,8 +116,8 @@ void BelosSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector
       RCP<MultiVector> Residual   = Utilities::Residual(*A_, X, B);
       RCP<MultiVector> Correction = MultiVectorFactory::Build(A_->getDomainMap(), X.getNumVectors());
 
-      RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpX       = rcpFromRef(Utilities::MV2NonConstTpetraMV(*Correction));
-      RCP<const Tpetra::MultiVector<SC, LO, GO, NO> > tpB = rcpFromRef(Utilities::MV2TpetraMV(*Residual));
+      RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpX       = toTpetra(Correction);
+      RCP<const Tpetra::MultiVector<SC, LO, GO, NO> > tpB = toTpetra(Residual);
 
       tBelosProblem_->setInitResVec(tpB);
       tBelosProblem_->setProblem(tpX, tpB);
diff --git a/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp
index e8dca02fbfff..7c655214134e 100644
--- a/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp
+++ b/packages/muelu/src/Smoothers/MueLu_Ifpack2Smoother_def.hpp
@@ -965,8 +965,8 @@ void Ifpack2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVect
 
   // Apply
   if (InitialGuessIsZero || supportInitialGuess) {
-    Tpetra::MultiVector<SC, LO, GO, NO>& tpX       = Utilities::MV2NonConstTpetraMV(X);
-    const Tpetra::MultiVector<SC, LO, GO, NO>& tpB = Utilities::MV2TpetraMV(B);
+    Tpetra::MultiVector<SC, LO, GO, NO>& tpX       = toTpetra(X);
+    const Tpetra::MultiVector<SC, LO, GO, NO>& tpB = toTpetra(B);
     prec_->apply(tpB, tpX);
   } else {
     typedef Teuchos::ScalarTraits<Scalar> TST;
@@ -980,8 +980,8 @@ void Ifpack2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVect
 
     RCP<MultiVector> Correction = MultiVectorFactory::Build(A_->getDomainMap(), X.getNumVectors());
 
-    Tpetra::MultiVector<SC, LO, GO, NO>& tpX       = Utilities::MV2NonConstTpetraMV(*Correction);
-    const Tpetra::MultiVector<SC, LO, GO, NO>& tpB = Utilities::MV2TpetraMV(*Residual);
+    Tpetra::MultiVector<SC, LO, GO, NO>& tpX       = toTpetra(*Correction);
+    const Tpetra::MultiVector<SC, LO, GO, NO>& tpB = toTpetra(*Residual);
 
     prec_->apply(tpB, tpX);
 
diff --git a/packages/muelu/src/Smoothers/MueLu_ProjectorSmoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_ProjectorSmoother_def.hpp
index c93f43eb6a39..c30ccfae2b39 100644
--- a/packages/muelu/src/Smoothers/MueLu_ProjectorSmoother_def.hpp
+++ b/packages/muelu/src/Smoothers/MueLu_ProjectorSmoother_def.hpp
@@ -70,7 +70,7 @@ void ProjectorSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(Level &
 #if defined(HAVE_XPETRA_TPETRA)
 #ifdef HAVE_MUELU_TPETRA_INST_INT_INT
   // Orthonormalize
-  RCP<const Tpetra::MultiVector<SC, LO, GO, NO> > B_ = Utilities::MV2TpetraMV(B);
+  RCP<const Tpetra::MultiVector<SC, LO, GO, NO> > B_ = toTpetra(B);
   // TAW: Oct 16 2015: subCopy is not part of Xpetra. One should either add it to Xpetra (with an emulator for Epetra)
   //                   or replace this call by a local loop. I'm not motivated to do this now...
   RCP<Tpetra::MultiVector<SC, LO, GO, NO> > Borth = B_->subCopy(selectedIndices);  // copy
diff --git a/packages/muelu/src/Smoothers/MueLu_StratimikosSmoother_def.hpp b/packages/muelu/src/Smoothers/MueLu_StratimikosSmoother_def.hpp
index fbc5a125e77e..3aedc5a66518 100644
--- a/packages/muelu/src/Smoothers/MueLu_StratimikosSmoother_def.hpp
+++ b/packages/muelu/src/Smoothers/MueLu_StratimikosSmoother_def.hpp
@@ -77,9 +77,9 @@ void StratimikosSmoother<double, LocalOrdinal, GlobalOrdinal, Node>::SetupStrati
   if (recurMgOnFilteredA_) {
     RCP<Matrix> filteredA;
     ExperimentalDropVertConnections(filteredA, currentLevel);
-    thyraA = Xpetra::ThyraUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::toThyra(Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(filteredA)->getCrsMatrix());
+    thyraA = Xpetra::ThyraUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::toThyra(toCrsMatrix(filteredA));
   } else
-    thyraA = Xpetra::ThyraUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::toThyra(Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(A_)->getCrsMatrix());
+    thyraA = Xpetra::ThyraUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::toThyra(toCrsMatrix(A_));
 
   // Build Stratimikos solver
   Stratimikos::DefaultLinearSolverBuilder linearSolverBuilder;
diff --git a/packages/muelu/src/Transfers/BlackBox/MueLu_BlackBoxPFactory_def.hpp b/packages/muelu/src/Transfers/BlackBox/MueLu_BlackBoxPFactory_def.hpp
index 6c0ae590c2b1..12e4280c1c0b 100644
--- a/packages/muelu/src/Transfers/BlackBox/MueLu_BlackBoxPFactory_def.hpp
+++ b/packages/muelu/src/Transfers/BlackBox/MueLu_BlackBoxPFactory_def.hpp
@@ -425,7 +425,7 @@ void BlackBoxPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildP(Level&
   // Create the matrix itself using the above maps
   RCP<Matrix> P;
   P                   = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
-  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> PCrs = toCrsMatrix(P);
 
   ArrayRCP<size_t> iaP;
   ArrayRCP<LO> jaP;
diff --git a/packages/muelu/src/Transfers/Classical/MueLu_ClassicalPFactory_def.hpp b/packages/muelu/src/Transfers/Classical/MueLu_ClassicalPFactory_def.hpp
index 518316b8121a..389f37e4152e 100644
--- a/packages/muelu/src/Transfers/Classical/MueLu_ClassicalPFactory_def.hpp
+++ b/packages/muelu/src/Transfers/Classical/MueLu_ClassicalPFactory_def.hpp
@@ -811,7 +811,7 @@ void ClassicalPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
 
   // Allocate memory & copy
   P                   = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
-  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> PCrs = toCrsMatrix(P);
   ArrayRCP<size_t> P_rowptr;
   ArrayRCP<LO> P_colind;
   ArrayRCP<SC> P_values;
diff --git a/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeneralGeometricPFactory_def.hpp b/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeneralGeometricPFactory_def.hpp
index 43557239d01f..2bdf0adc1c91 100644
--- a/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeneralGeometricPFactory_def.hpp
+++ b/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeneralGeometricPFactory_def.hpp
@@ -839,7 +839,7 @@ void GeneralGeometricPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
   ghostCoords->doImport(*fineCoords, *ghostImporter, Xpetra::INSERT);
 
   P                   = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
-  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> PCrs = toCrsMatrix(P);
 
   ArrayRCP<size_t> iaP;
   ArrayRCP<LO> jaP;
diff --git a/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_def.hpp b/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_def.hpp
index bbd8ce60676d..dcf37d840f1e 100644
--- a/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_def.hpp
+++ b/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_def.hpp
@@ -282,7 +282,7 @@ void GeometricInterpolationPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
     // Create the prolongator matrix and its associated objects
     RCP<ParameterList> dummyList = rcp(new ParameterList());
     P                            = rcp(new CrsMatrixWrap(prolongatorGraph, dummyList));
-    RCP<CrsMatrix> PCrs          = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+    RCP<CrsMatrix> PCrs          = toCrsMatrix(P);
     PCrs->setAllToScalar(1.0);
     PCrs->fillComplete();
 
@@ -319,7 +319,7 @@ void GeometricInterpolationPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
 
   RCP<ParameterList> dummyList = rcp(new ParameterList());
   P                            = rcp(new CrsMatrixWrap(prolongatorGraph, dummyList));
-  RCP<CrsMatrix> PCrs          = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> PCrs          = toCrsMatrix(P);
   PCrs->resumeFill();  // The Epetra matrix is considered filled at this point.
 
   {
@@ -446,7 +446,7 @@ void GeometricInterpolationPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
     P                           = rcp(new CrsMatrixWrap(prolongatorGraph->getRowMap(),
                                                         prolongatorGraph->getColMap(),
                                                         nnzOnRows));
-    RCP<CrsMatrix> PCrsSqueezed = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+    RCP<CrsMatrix> PCrsSqueezed = toCrsMatrix(P);
     PCrsSqueezed->resumeFill();  // The Epetra matrix is considered filled at this point.
     PCrsSqueezed->setAllValues(rowPtr, colInd, values);
     PCrsSqueezed->expertStaticFillComplete(prolongatorGraph->getDomainMap(),
diff --git a/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_kokkos_def.hpp b/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_kokkos_def.hpp
index cb6773af7870..19c678f8b457 100644
--- a/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_kokkos_def.hpp
+++ b/packages/muelu/src/Transfers/GeneralGeometric/MueLu_GeometricInterpolationPFactory_kokkos_def.hpp
@@ -261,7 +261,7 @@ void GeometricInterpolationPFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal,
     // Create the prolongator matrix and its associated objects
     RCP<ParameterList> dummyList = rcp(new ParameterList());
     P                            = rcp(new CrsMatrixWrap(prolongatorGraph, dummyList));
-    RCP<CrsMatrix> PCrs          = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+    RCP<CrsMatrix> PCrs          = toCrsMatrix(P);
     PCrs->setAllToScalar(1.0);
     PCrs->fillComplete();
 
@@ -325,7 +325,7 @@ void GeometricInterpolationPFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal,
 
   RCP<ParameterList> dummyList = rcp(new ParameterList());
   P                            = rcp(new CrsMatrixWrap(prolongatorGraph, dummyList));
-  RCP<CrsMatrix> PCrs          = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> PCrs          = toCrsMatrix(P);
   PCrs->resumeFill();  // The Epetra matrix is considered filled at this point.
 
   LO interpolationNodeIdx = 0, rowIdx = 0;
diff --git a/packages/muelu/src/Transfers/PCoarsen/MueLu_IntrepidPCoarsenFactory_def.hpp b/packages/muelu/src/Transfers/PCoarsen/MueLu_IntrepidPCoarsenFactory_def.hpp
index 796509b18c84..f7bcf00983ef 100644
--- a/packages/muelu/src/Transfers/PCoarsen/MueLu_IntrepidPCoarsenFactory_def.hpp
+++ b/packages/muelu/src/Transfers/PCoarsen/MueLu_IntrepidPCoarsenFactory_def.hpp
@@ -570,7 +570,7 @@ void IntrepidPCoarsenFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Generat
 
   // Allocate P
   P                   = rcp(new CrsMatrixWrap(hi_map, lo_colMap, numFieldsHi));  // FIXLATER: Need faster fill
-  RCP<CrsMatrix> Pcrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> Pcrs = toCrsMatrix(P);
 
   // Slow-ish fill
   size_t Nelem = hi_elemToNode.extent(0);
@@ -635,7 +635,7 @@ void IntrepidPCoarsenFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Generat
 
   // Allocate P
   P                   = rcp(new CrsMatrixWrap(hi_map, lo_colMap, numFieldsHi));  // FIXLATER: Need faster fill
-  RCP<CrsMatrix> Pcrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> Pcrs = toCrsMatrix(P);
 
   // Slow-ish fill
   size_t Nelem = hi_elemToNode.extent(0);
diff --git a/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_def.hpp b/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_def.hpp
index 36369a07c2f2..36a97bd781a7 100644
--- a/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_def.hpp
+++ b/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_def.hpp
@@ -906,7 +906,7 @@ LocalOrdinal SemiCoarsenPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
   ArrayRCP<LO> rcpColumns;
   ArrayRCP<SC> rcpValues;
 
-  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> PCrs = toCrsMatrix(P);
   LO nnz              = Pptr[Ndofs];
   PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
 
@@ -927,7 +927,7 @@ LocalOrdinal SemiCoarsenPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
   RCP<CrsMatrix> RCrs;
   if (buildRestriction) {
     R    = rcp(new CrsMatrixWrap(coarseMap, rowMap, 0));
-    RCrs = rcp_dynamic_cast<CrsMatrixWrap>(R)->getCrsMatrix();
+    RCrs = toCrsMatrix(R);
     nnz  = Rptr[coarseMap->getLocalNumElements()];
     RCrs->allocateAllValues(nnz, RrcpRowPtr, RrcpColumns, RrcpValues);
 
diff --git a/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_kokkos_def.hpp b/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_kokkos_def.hpp
index 338f000aa1e1..cd4430e2f0cc 100644
--- a/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_kokkos_def.hpp
+++ b/packages/muelu/src/Transfers/SemiCoarsen/MueLu_SemiCoarsenPFactory_kokkos_def.hpp
@@ -613,7 +613,7 @@ void SemiCoarsenPFactory_kokkos<
       rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
       stridingInfo_, rowMap->getComm(), -1, 0);
   P                   = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
-  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
+  RCP<CrsMatrix> PCrs = toCrsMatrix(P);
   PCrs->setAllValues(Pptr, Pcols, Pvals);
   PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
 
diff --git a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_TentativePFactory_def.hpp b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_TentativePFactory_def.hpp
index 220db70dc868..5969fc6a1a8d 100644
--- a/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_TentativePFactory_def.hpp
+++ b/packages/muelu/src/Transfers/Smoothed-Aggregation/MueLu_TentativePFactory_def.hpp
@@ -851,7 +851,7 @@ void TentativePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
 
   // Time to construct the matrix and fill in the values
   Ptentative              = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
-  RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
+  RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
 
   ArrayRCP<size_t> iaPtent;
   ArrayRCP<LO> jaPtent;
diff --git a/packages/muelu/src/Utils/ClassList/EI-Exceptions.classList b/packages/muelu/src/Utils/ClassList/EI-Exceptions.classList
index db1d26b8b4c2..a772cb03d3d5 100644
--- a/packages/muelu/src/Utils/ClassList/EI-Exceptions.classList
+++ b/packages/muelu/src/Utils/ClassList/EI-Exceptions.classList
@@ -2,7 +2,6 @@
 FakeSmootherPrototype
 GraphBase
 HierarchyFactory
-HierarchyManager
 PreDropFunctionBaseClass
 PRFactory
 Smoother
diff --git a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake
index f38b99ced3c6..08302d116540 100644
--- a/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake
+++ b/packages/muelu/src/Utils/ExplicitInstantiation/ETI_SC_LO_GO_NO_classes.cmake
@@ -48,6 +48,7 @@ APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GeometricInterpolationPFactory )
 APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GeometricInterpolationPFactory_kokkos )
 APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::GMRESSolver )
 APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Hierarchy )
+APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::HierarchyManager )
 APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::HierarchyUtils )
 APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::InterfaceAggregationFactory )
 APPEND_SET(MUELU_SC_LO_GO_NO_ETI_CLASSES MueLu::Ifpack2Smoother-.?if.defined[HAVE_MUELU_IFPACK2] )
diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp
index f6e1740d47fe..b5124b3eaeb8 100644
--- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp
+++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp
@@ -1033,7 +1033,7 @@ DetectDirichletRows_kokkos(const Xpetra::Matrix<SC, LO, GO, NO>& A,
   Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
 
   if (helpers::isTpetraBlockCrs(A)) {
-    const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = helpers::Op2TpetraBlockCrs(A);
+    const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = toTpetraBlock(A);
     auto b_graph                                     = Am.getCrsGraph().getLocalGraphDevice();
     auto b_rowptr                                    = Am.getCrsGraph().getLocalRowPtrsDevice();
     auto values                                      = Am.getValuesDevice();
diff --git a/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp b/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp
index 7f738cef23dc..e1988f482445 100644
--- a/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp
+++ b/packages/muelu/src/Utils/MueLu_Utilities_decl.hpp
@@ -144,30 +144,9 @@ class Utilities : public UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node
 #endif
 
   //! Helper utility to pull out the underlying Tpetra objects from an Xpetra object
-  static RCP<const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> MV2TpetraMV(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> const vec);
-  static RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> MV2NonConstTpetraMV(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> vec);
-  static RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> MV2NonConstTpetraMV2(Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& vec);
-
-  static const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& MV2TpetraMV(const Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& vec);
-  static Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& MV2NonConstTpetraMV(Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& vec);
-
-  static RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2TpetraCrs(RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op);
-  static RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2NonConstTpetraCrs(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op);
-
-  static const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2TpetraCrs(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op);
-  static Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2NonConstTpetraCrs(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op);
-
-  static RCP<const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2TpetraBlockCrs(RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op);
-  static RCP<Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2NonConstTpetraBlockCrs(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op);
-
-  static const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2TpetraBlockCrs(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op);
-  static Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2NonConstTpetraBlockCrs(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op);
-
   static RCP<const Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2TpetraRow(RCP<const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op);
   static RCP<Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2NonConstTpetraRow(RCP<Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op);
 
-  static const RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> Map2TpetraMap(const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>& map);
-
   static void MyOldScaleMatrix(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const Teuchos::ArrayRCP<const Scalar>& scalingVector, bool doInverse = true,
                                bool doFillComplete = true, bool doOptimizeStorage = true);
 
@@ -291,192 +270,6 @@ class Utilities<double, int, int, Xpetra::EpetraNode> : public UtilitiesBase<dou
   // @}
 
   //! Helper utility to pull out the underlying Tpetra objects from an Xpetra object
-  static RCP<const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> MV2TpetraMV(RCP<MultiVector> const vec) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("MV2TpetraMV: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    RCP<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> tmpVec = rcp_dynamic_cast<Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(vec);
-    if (tmpVec == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::MultiVector to Xpetra::TpetraMultiVector failed");
-    return tmpVec->getTpetra_MultiVector();
-#endif
-  }
-  static RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> MV2NonConstTpetraMV(RCP<MultiVector> vec) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("MV2NonConstTpetraMV: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    RCP<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> tmpVec = rcp_dynamic_cast<Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(vec);
-    if (tmpVec == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::MultiVector to Xpetra::TpetraMultiVector failed");
-    return tmpVec->getTpetra_MultiVector();
-#endif
-  }
-  static RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> MV2NonConstTpetraMV2(MultiVector& vec) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("MV2NonConstTpetraMV2: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmpVec = dynamic_cast<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(vec);
-    return tmpVec.getTpetra_MultiVector();
-#endif
-  }
-
-  static const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& MV2TpetraMV(const MultiVector& vec) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("MV2TpetraMV: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmpVec = dynamic_cast<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(vec);
-    return *(tmpVec.getTpetra_MultiVector());
-#endif
-  }
-  static Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& MV2NonConstTpetraMV(MultiVector& vec) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("MV2NonConstTpetraMV: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmpVec = dynamic_cast<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(vec);
-    return *(tmpVec.getTpetra_MultiVector());
-#endif
-  }
-
-  static RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2TpetraCrs(RCP<const Matrix> Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2TpetraCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    // Get the underlying Tpetra Mtx
-    RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(Op);
-    if (crsOp == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    const RCP<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-    if (tmp_ECrsMtx == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-    return tmp_ECrsMtx->getTpetra_CrsMatrix();
-#endif
-  }
-  static RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2NonConstTpetraCrs(RCP<Matrix> Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2NonConstTpetraCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(Op);
-    if (crsOp == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    const RCP<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-    if (tmp_ECrsMtx == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-    return tmp_ECrsMtx->getTpetra_CrsMatrixNonConst();
-#endif
-  };
-
-  static const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2TpetraCrs(const Matrix& Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2TpetraCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    try {
-      const CrsMatrixWrap& crsOp = dynamic_cast<const CrsMatrixWrap&>(Op);
-      try {
-        const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-        return *tmp_ECrsMtx.getTpetra_CrsMatrix();
-      } catch (std::bad_cast&) {
-        throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-      }
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    }
-#endif
-  }
-  static Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2NonConstTpetraCrs(Matrix& Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2NonConstTpetraCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    try {
-      CrsMatrixWrap& crsOp = dynamic_cast<CrsMatrixWrap&>(Op);
-      try {
-        Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-        return *tmp_ECrsMtx.getTpetra_CrsMatrixNonConst();
-      } catch (std::bad_cast&) {
-        throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-      }
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    }
-#endif
-  }
-
-  static RCP<const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2TpetraBlockCrs(RCP<const Matrix> Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2TpetraBlockCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    // Get the underlying Tpetra Mtx
-    RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(Op);
-    if (crsOp == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    const RCP<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-    if (tmp_ECrsMtx == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-    return tmp_ECrsMtx->getTpetra_BlockCrsMatrix();
-#endif
-  }
-
-  static RCP<Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2NonConstTpetraBlockCrs(RCP<Matrix> Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2NonConstTpetraBlockCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(Op);
-    if (crsOp == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    const RCP<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-    if (tmp_ECrsMtx == Teuchos::null)
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-    return tmp_ECrsMtx->getTpetra_BlockCrsMatrixNonConst();
-#endif
-  };
-
-  static const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2TpetraBlockCrs(const Matrix& Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2TpetraBlockCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    try {
-      const CrsMatrixWrap& crsOp = dynamic_cast<const CrsMatrixWrap&>(Op);
-      try {
-        const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-        return *tmp_ECrsMtx.getTpetra_BlockCrsMatrix();
-      } catch (std::bad_cast&) {
-        throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-      }
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    }
-#endif
-  }
-  static Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op2NonConstTpetraBlockCrs(Matrix& Op) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Op2NonConstTpetraCrs: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    try {
-      CrsMatrixWrap& crsOp = dynamic_cast<CrsMatrixWrap&>(Op);
-      try {
-        Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-        return *tmp_ECrsMtx.getTpetra_BlockCrsMatrixNonConst();
-      } catch (std::bad_cast&) {
-        throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-      }
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-    }
-#endif
-  }
-
   static RCP<const Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op2TpetraRow(RCP<const Operator> Op) {
 #if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
      (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
@@ -545,18 +338,6 @@ class Utilities<double, int, int, Xpetra::EpetraNode> : public UtilitiesBase<dou
 #endif
   };
 
-  static const RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> Map2TpetraMap(const Map& map) {
-#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
-     (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
-    throw Exceptions::RuntimeError("Map2TpetraMap: Tpetra has not been compiled with support for LO=GO=int.");
-#else
-    const RCP<const Xpetra::TpetraMap<LocalOrdinal, GlobalOrdinal, Node>>& tmp_TMap = rcp_dynamic_cast<const Xpetra::TpetraMap<LocalOrdinal, GlobalOrdinal, Node>>(rcpFromRef(map));
-    if (tmp_TMap == Teuchos::null)
-      throw Exceptions::BadCast("Utilities::Map2TpetraMap : Cast from Xpetra::Map to Xpetra::TpetraMap failed");
-    return tmp_TMap->getTpetra_Map();
-#endif
-  };
-
   static void MyOldScaleMatrix(Matrix& Op, const Teuchos::ArrayRCP<const Scalar>& scalingVector, bool doInverse = true,
                                bool doFillComplete = true, bool doOptimizeStorage = true) {
     Scalar one = Teuchos::ScalarTraits<Scalar>::one();
@@ -591,7 +372,7 @@ class Utilities<double, int, int, Xpetra::EpetraNode> : public UtilitiesBase<dou
     throw Exceptions::RuntimeError("Matrix scaling is not possible because Tpetra has not been compiled with support for LO=GO=int.");
 #else
     try {
-      Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tpOp = Op2NonConstTpetraCrs(Op);
+      Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tpOp = toTpetra(Op);
 
       const RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowMap    = tpOp.getRowMap();
       const RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domainMap = tpOp.getDomainMap();
@@ -731,7 +512,7 @@ class Utilities<double, int, int, Xpetra::EpetraNode> : public UtilitiesBase<dou
         else if (Helpers::isTpetraBlockCrs(Op)) {
           using BCRS = Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
           // using CRS  = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
-          const BCRS& tpetraOp = Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraBlockCrs(Op);
+          const BCRS& tpetraOp = toTpetraBlock(Op);
           RCP<BCRS> At;
           {
             Tpetra::BlockCrsMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer(rcpFromRef(tpetraOp), label);
diff --git a/packages/muelu/src/Utils/MueLu_Utilities_def.hpp b/packages/muelu/src/Utils/MueLu_Utilities_def.hpp
index 5548cc4595a1..92a0390a9e88 100644
--- a/packages/muelu/src/Utils/MueLu_Utilities_def.hpp
+++ b/packages/muelu/src/Utils/MueLu_Utilities_def.hpp
@@ -172,152 +172,6 @@ const Epetra_Map& Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Map2Epet
 }
 #endif
 
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
-Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MV2TpetraMV(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> const vec) {
-  RCP<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> tmpVec = rcp_dynamic_cast<Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(vec);
-  if (tmpVec == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::MultiVector to Xpetra::TpetraMultiVector failed");
-  return tmpVec->getTpetra_MultiVector();
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MV2NonConstTpetraMV(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> vec) {
-  RCP<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> tmpVec = rcp_dynamic_cast<Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(vec);
-  if (tmpVec == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::MultiVector to Xpetra::TpetraMultiVector failed");
-  return tmpVec->getTpetra_MultiVector();
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MV2NonConstTpetraMV(Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& vec) {
-  const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmpVec = dynamic_cast<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(vec);
-  return *(tmpVec.getTpetra_MultiVector());
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MV2NonConstTpetraMV2(Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& vec) {
-  const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmpVec = dynamic_cast<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(vec);
-  return tmpVec.getTpetra_MultiVector();
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>&
-Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MV2TpetraMV(const Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& vec) {
-  const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmpVec = dynamic_cast<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(vec);
-  return *(tmpVec.getTpetra_MultiVector());
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraCrs(RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op) {
-  // Get the underlying Tpetra Mtx
-  RCP<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>> crsOp = rcp_dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(Op);
-  if (crsOp == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  const RCP<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-  if (tmp_ECrsMtx == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-  return tmp_ECrsMtx->getTpetra_CrsMatrix();
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraCrs(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op) {
-  RCP<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>> crsOp = rcp_dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(Op);
-  if (crsOp == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  const RCP<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-  if (tmp_ECrsMtx == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-  return tmp_ECrsMtx->getTpetra_CrsMatrixNonConst();
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraCrs(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
-  try {
-    const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>& crsOp = dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(Op);
-    try {
-      const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-      return *tmp_ECrsMtx.getTpetra_CrsMatrix();
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-    }
-  } catch (std::bad_cast&) {
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  }
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraCrs(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
-  try {
-    Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>& crsOp = dynamic_cast<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(Op);
-    try {
-      Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-      return *tmp_ECrsMtx.getTpetra_CrsMatrixNonConst();
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraCrsMatrix failed");
-    }
-  } catch (std::bad_cast&) {
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  }
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraBlockCrs(RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op) {
-  using XCrsMatrixWrap = Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
-  // Get the underlying Tpetra Mtx
-  RCP<const XCrsMatrixWrap> crsOp = rcp_dynamic_cast<const XCrsMatrixWrap>(Op);
-  if (crsOp == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  const RCP<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-  if (tmp_ECrsMtx == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-  return tmp_ECrsMtx->getTpetra_BlockCrsMatrix();
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraBlockCrs(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op) {
-  using XCrsMatrixWrap            = Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
-  RCP<const XCrsMatrixWrap> crsOp = rcp_dynamic_cast<const XCrsMatrixWrap>(Op);
-  if (crsOp == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  const RCP<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& tmp_ECrsMtx = rcp_dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(crsOp->getCrsMatrix());
-  if (tmp_ECrsMtx == Teuchos::null)
-    throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-  return tmp_ECrsMtx->getTpetra_BlockCrsMatrixNonConst();
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraBlockCrs(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
-  try {
-    using XCrsMatrixWrap        = Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
-    const XCrsMatrixWrap& crsOp = dynamic_cast<const XCrsMatrixWrap&>(Op);
-    try {
-      const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-      return *tmp_ECrsMtx.getTpetra_BlockCrsMatrix();
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-    }
-  } catch (std::bad_cast&) {
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  }
-}
-
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraBlockCrs(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op) {
-  try {
-    using XCrsMatrixWrap  = Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
-    XCrsMatrixWrap& crsOp = dynamic_cast<XCrsMatrixWrap&>(Op);
-    try {
-      Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tmp_ECrsMtx = dynamic_cast<Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>&>(*crsOp.getCrsMatrix());
-      return *tmp_ECrsMtx.getTpetra_BlockCrsMatrixNonConst();
-    } catch (std::bad_cast&) {
-      throw Exceptions::BadCast("Cast from Xpetra::CrsMatrix to Xpetra::TpetraBlockCrsMatrix failed");
-    }
-  } catch (std::bad_cast&) {
-    throw Exceptions::BadCast("Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");
-  }
-}
-
 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
 RCP<const Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraRow(RCP<const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Op) {
   RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> mat           = rcp_dynamic_cast<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(Op);
@@ -378,14 +232,6 @@ RCP<Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scal
   }
 }
 
-template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-const RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Map2TpetraMap(const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>& map) {
-  const RCP<const Xpetra::TpetraMap<LocalOrdinal, GlobalOrdinal, Node>>& tmp_TMap = rcp_dynamic_cast<const Xpetra::TpetraMap<LocalOrdinal, GlobalOrdinal, Node>>(rcpFromRef(map));
-  if (tmp_TMap == Teuchos::null)
-    throw Exceptions::BadCast("Utilities::Map2TpetraMap : Cast from Xpetra::Map to Xpetra::TpetraMap failed");
-  return tmp_TMap->getTpetra_Map();
-}
-
 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
 void Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const Teuchos::ArrayRCP<const Scalar>& scalingVector, bool doInverse,
                                                                             bool doFillComplete,
@@ -424,7 +270,7 @@ void Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix_Tpet
                                                                                    bool doFillComplete,
                                                                                    bool doOptimizeStorage) {
   try {
-    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tpOp = Op2NonConstTpetraCrs(Op);
+    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tpOp = toTpetra(Op);
 
     const RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowMap    = tpOp.getRowMap();
     const RCP<const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> domainMap = tpOp.getDomainMap();
@@ -513,7 +359,7 @@ Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
     using Helpers = Xpetra::Helpers<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
     /***************************************************************/
     if (Helpers::isTpetraCrs(Op)) {
-      const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tpetraOp = Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraCrs(Op);
+      const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& tpetraOp = toTpetra(Op);
 
       RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> A;
       Tpetra::RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> transposer(rcpFromRef(tpetraOp), label);  // more than meets the eye
@@ -542,7 +388,7 @@ Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
       using XCrsMatrixWrap = Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
       using BCRS           = Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
       // using CRS  = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
-      const BCRS& tpetraOp = Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2TpetraBlockCrs(Op);
+      const BCRS& tpetraOp = toTpetraBlock(Op);
 
       RCP<BCRS> At;
       {
diff --git a/packages/muelu/test/interface/CreateOperator.cpp b/packages/muelu/test/interface/CreateOperator.cpp
index 00a4eaaea530..9c07f031676f 100644
--- a/packages/muelu/test/interface/CreateOperator.cpp
+++ b/packages/muelu/test/interface/CreateOperator.cpp
@@ -64,7 +64,7 @@ void setup_system_list(Xpetra::UnderlyingLib& lib, Teuchos::RCP<Xpetra::Matrix<S
       oldbuffer = std::cout.rdbuf(&buffer);
     }
 
-    RCP<Tpetra_CrsMatrix> At = Xpetra::Helpers<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraCrs(A);
+    RCP<Tpetra_CrsMatrix> At = toTpetra(A);
     RCP<Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> > opA(At);
     RCP<Tpetra_Operator> Mt = MueLu::CreateTpetraPreconditioner(opA, mueluList);
 
diff --git a/packages/muelu/test/maxwell/Maxwell3D.cpp b/packages/muelu/test/maxwell/Maxwell3D.cpp
index 6742df63872c..5b6e0c41ced4 100644
--- a/packages/muelu/test/maxwell/Maxwell3D.cpp
+++ b/packages/muelu/test/maxwell/Maxwell3D.cpp
@@ -408,10 +408,10 @@ bool SetupSolve(std::map<std::string, void*> inputs) {
           sublist->set(*key_it, Teuchos::rcp_dynamic_cast<Xpetra::EpetraMultiVectorT<GlobalOrdinal, Node> >(coords, true)->getEpetra_MultiVector());
 #endif
         else if (value == "tD0") {
-          auto tD0 = Teuchos::rcp_dynamic_cast<TpetraCrsMatrix>(Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix, true)->getCrsMatrix(), true)->getTpetra_CrsMatrix();
+          auto tD0 = toTpetra(D0_Matrix);
           sublist->set(*key_it, tD0);
         } else if (value == "tCoordinates") {
-          sublist->set(*key_it, Teuchos::rcp_dynamic_cast<TpetraMultiVector>(coords, true)->getTpetra_MultiVector());
+          sublist->set(*key_it, toTpetra(coords));
         }
       }
     }
diff --git a/packages/muelu/test/perf_tests_kokkos/Redirection.cpp b/packages/muelu/test/perf_tests_kokkos/Redirection.cpp
index 2e085a260db9..3d10550278d9 100644
--- a/packages/muelu/test/perf_tests_kokkos/Redirection.cpp
+++ b/packages/muelu/test/perf_tests_kokkos/Redirection.cpp
@@ -119,7 +119,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar
 
     if (lib == Xpetra::UseTpetra) {
       typedef Tpetra::CrsMatrix<SC, LO, GO, NO> tCrsMatrix;
-      RCP<const tCrsMatrix> tA = Utilities::Op2TpetraCrs(A);
+      RCP<const tCrsMatrix> tA = toTpetra(A);
       TEUCHOS_TEST_FOR_EXCEPTION(tA.is_null(), MueLu::Exceptions::RuntimeError,
                                  "A is not a Tpetra CrsMatrix");
 
diff --git a/packages/muelu/test/scaling/Driver.cpp b/packages/muelu/test/scaling/Driver.cpp
index 81c45ffd5a0e..3894faa73aec 100644
--- a/packages/muelu/test/scaling/Driver.cpp
+++ b/packages/muelu/test/scaling/Driver.cpp
@@ -106,7 +106,7 @@ void equilibrateMatrix(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalO
   bool assumeSymmetric   = false;
   typedef typename Tpetra::Details::EquilibrationInfo<typename Kokkos::ArithTraits<Scalar>::val_type, typename Node::device_type> equil_type;
 
-  Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > A = Utilities::Op2NonConstTpetraCrs(Axpetra);
+  Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > A = toTpetra(Axpetra);
 
   if (Axpetra->getRowMap()->lib() == Xpetra::UseTpetra) {
     equil_type equibResult_ = computeRowAndColumnOneNorms(*A, assumeSymmetric);
diff --git a/packages/muelu/test/scaling/DriverCore.hpp b/packages/muelu/test/scaling/DriverCore.hpp
index 6a111bbb9103..ccc63ed8f815 100644
--- a/packages/muelu/test/scaling/DriverCore.hpp
+++ b/packages/muelu/test/scaling/DriverCore.hpp
@@ -144,7 +144,7 @@ void PreconditionerSetup(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, Globa
     A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
     if (useAMGX) {
 #if defined(HAVE_MUELU_AMGX)
-      RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ac      = Utilities::Op2NonConstTpetraCrs(A);
+      RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ac      = toTpetra(A);
       RCP<Tpetra::Operator<SC, LO, GO, NO>> At       = Teuchos::rcp_dynamic_cast<Tpetra::Operator<SC, LO, GO, NO>>(Ac);
       RCP<MueLu::TpetraOperator<SC, LO, GO, NO>> Top = MueLu::CreateTpetraPreconditioner(At, mueluList);
       Prec                                           = Teuchos::rcp(new Xpetra::TpetraOperator<SC, LO, GO, NO>(Top));
@@ -259,7 +259,7 @@ void SystemSolve(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal
   Teuchos::RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atpetra;
   Teuchos::RCP<Tpetra::MultiVector<SC, LO, GO, NO>> Xtpetra, Btpetra;
   if (lib == Xpetra::UseTpetra) {
-    Atpetra = Utilities::Op2NonConstTpetraCrs(A);
+    Atpetra = toTpetra(A);
     Xtpetra = rcp(&Xpetra::toTpetra(*X), false);
     Btpetra = rcp(&Xpetra::toTpetra(*B), false);
   }
diff --git a/packages/muelu/test/scaling/ImportPerformance.cpp b/packages/muelu/test/scaling/ImportPerformance.cpp
index a501130ce2e0..ee3ae3cd088b 100644
--- a/packages/muelu/test/scaling/ImportPerformance.cpp
+++ b/packages/muelu/test/scaling/ImportPerformance.cpp
@@ -344,8 +344,8 @@ void TestTransfer(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdina
     typedef Tpetra::CrsMatrix<SC, LO, GO, NO> crs_matrix_type;
     typedef Tpetra::Import<LO, GO, NO> import_type;
 
-    RCP<const crs_matrix_type> Au = Utilities::Op2TpetraCrs(A);
-    RCP<const crs_matrix_type> Pu = Utilities::Op2TpetraCrs(P);
+    RCP<const crs_matrix_type> Au = toTpetra(A);
+    RCP<const crs_matrix_type> Pu = toTpetra(P);
     if (Au->getComm()->getSize() == 1) return;
 
     // ==================
diff --git a/packages/muelu/test/scaling/JacobiKernelDriver.cpp b/packages/muelu/test/scaling/JacobiKernelDriver.cpp
index a022a185a043..da956e7c69bd 100644
--- a/packages/muelu/test/scaling/JacobiKernelDriver.cpp
+++ b/packages/muelu/test/scaling/JacobiKernelDriver.cpp
@@ -80,9 +80,9 @@ void Jacobi_MKL_SPMM(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, N
   typedef typename vector_type::device_type device_type;
   typedef typename Kokkos::View<MKL_INT *, typename lno_nnz_view_t::array_layout, typename lno_nnz_view_t::device_type> mkl_int_type;
 
-  RCP<const crs_matrix_type> Au = Utilities::Op2TpetraCrs(rcp(&A, false));
-  RCP<const crs_matrix_type> Bu = Utilities::Op2TpetraCrs(rcp(&B, false));
-  RCP<const crs_matrix_type> Cu = Utilities::Op2TpetraCrs(rcp(&C, false));
+  RCP<const crs_matrix_type> Au = toTpetra(rcp(&A, false));
+  RCP<const crs_matrix_type> Bu = toTpetra(rcp(&B, false));
+  RCP<const crs_matrix_type> Cu = toTpetra(rcp(&C, false));
   RCP<crs_matrix_type> Cnc      = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
   RCP<const vector_type> Du     = Xpetra::toTpetra(D);
 
@@ -245,9 +245,9 @@ void Jacobi_Wrapper(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, No
     typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
     LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
     RCP<const import_type> Cimport;
-    RCP<const crs_matrix_type> Au = Utilities::Op2TpetraCrs(rcp(&A, false));
-    RCP<const crs_matrix_type> Bu = Utilities::Op2TpetraCrs(rcp(&B, false));
-    RCP<const crs_matrix_type> Cu = Utilities::Op2TpetraCrs(rcp(&C, false));
+    RCP<const crs_matrix_type> Au = toTpetra(rcp(&A, false));
+    RCP<const crs_matrix_type> Bu = toTpetra(rcp(&B, false));
+    RCP<const crs_matrix_type> Cu = toTpetra(rcp(&C, false));
     RCP<crs_matrix_type> Cnc      = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
 
     RCP<const vector_type> Du = Xpetra::toTpetra(D);
diff --git a/packages/muelu/test/scaling/MMKernelDriver.cpp b/packages/muelu/test/scaling/MMKernelDriver.cpp
index 737a17cfa94a..2474bb9e4cf9 100644
--- a/packages/muelu/test/scaling/MMKernelDriver.cpp
+++ b/packages/muelu/test/scaling/MMKernelDriver.cpp
@@ -247,9 +247,9 @@ void Multiply_ViennaCL(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal,
     typedef typename KCRS::values_type::non_const_type scalar_view_t;
     typedef typename Kokkos::View<unsigned int *, typename lno_nnz_view_t::array_layout, typename lno_nnz_view_t::device_type> vcl_size_t_type;
 
-    RCP<const crs_matrix_type> Au = Utilities::Op2TpetraCrs(rcp(&A, false));
-    RCP<const crs_matrix_type> Bu = Utilities::Op2TpetraCrs(rcp(&B, false));
-    RCP<const crs_matrix_type> Cu = Utilities::Op2TpetraCrs(rcp(&C, false));
+    RCP<const crs_matrix_type> Au = toTpetra(rcp(&A, false));
+    RCP<const crs_matrix_type> Bu = toTpetra(rcp(&B, false));
+    RCP<const crs_matrix_type> Cu = toTpetra(rcp(&C, false));
     RCP<crs_matrix_type> Cnc      = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
 
     const KCRS &Amat = Au->getLocalMatrixDevice();
@@ -414,9 +414,9 @@ void Multiply_MKL_SPMM(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal,
 
   using no_init_view = Kokkos::ViewAllocateWithoutInitializing;
 
-  RCP<const crs_matrix_type> Au = Utilities::Op2TpetraCrs(rcp(&A, false));
-  RCP<const crs_matrix_type> Bu = Utilities::Op2TpetraCrs(rcp(&B, false));
-  RCP<const crs_matrix_type> Cu = Utilities::Op2TpetraCrs(rcp(&C, false));
+  RCP<const crs_matrix_type> Au = toTpetra(rcp(&A, false));
+  RCP<const crs_matrix_type> Bu = toTpetra(rcp(&B, false));
+  RCP<const crs_matrix_type> Cu = toTpetra(rcp(&C, false));
   RCP<crs_matrix_type> Cnc      = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
 
   const KCRS &Amat = Au->getLocalMatrixDevice();
@@ -598,9 +598,9 @@ void Multiply_KokkosKernels(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrd
     typedef typename KCRS::values_type::non_const_type scalar_view_t;
     typedef typename KCRS::device_type device_t;
 
-    RCP<const crs_matrix_type> Au = Utilities::Op2TpetraCrs(rcp(&A, false));
-    RCP<const crs_matrix_type> Bu = Utilities::Op2TpetraCrs(rcp(&B, false));
-    RCP<const crs_matrix_type> Cu = Utilities::Op2TpetraCrs(rcp(&C, false));
+    RCP<const crs_matrix_type> Au = toTpetra(rcpFromRef(A));
+    RCP<const crs_matrix_type> Bu = toTpetra(rcpFromRef(B));
+    RCP<const crs_matrix_type> Cu = toTpetra(rcpFromRef(C));
     RCP<crs_matrix_type> Cnc      = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
 
     const KCRS &Amat = Au->getLocalMatrixDevice();
@@ -794,9 +794,9 @@ struct LTG_Tests<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::Kokk
       typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
       LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
       RCP<const import_type> Cimport;
-      RCP<const crs_matrix_type> Au = Utilities::Op2TpetraCrs(rcp(&A, false));
-      RCP<const crs_matrix_type> Bu = Utilities::Op2TpetraCrs(rcp(&B, false));
-      RCP<const crs_matrix_type> Cu = Utilities::Op2TpetraCrs(rcp(&C, false));
+      RCP<const crs_matrix_type> Au = toTpetra(rcp(&A, false));
+      RCP<const crs_matrix_type> Bu = toTpetra(rcp(&B, false));
+      RCP<const crs_matrix_type> Cu = toTpetra(rcp(&C, false));
       RCP<crs_matrix_type> Cnc      = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
 
       using no_init_view = Kokkos::ViewAllocateWithoutInitializing;
diff --git a/packages/muelu/test/scaling/MatvecKernelDriver.cpp b/packages/muelu/test/scaling/MatvecKernelDriver.cpp
index 3de6a2480dab..3d510b676f08 100644
--- a/packages/muelu/test/scaling/MatvecKernelDriver.cpp
+++ b/packages/muelu/test/scaling/MatvecKernelDriver.cpp
@@ -944,7 +944,7 @@ int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib& lib, int ar
     vector_type xt;
     vector_type yt;
 
-    At                         = Utilities::Op2TpetraCrs(A);
+    At                         = toTpetra(A);
     const crs_matrix_type& Att = *At;
     xt                         = Xpetra::toTpetra(*x);
     yt                         = Xpetra::toTpetra(*y);
diff --git a/packages/muelu/test/scaling/TwoMatrixMMKernelDriver.cpp b/packages/muelu/test/scaling/TwoMatrixMMKernelDriver.cpp
index 1a4b53b9a7e3..65db6b5de7e6 100644
--- a/packages/muelu/test/scaling/TwoMatrixMMKernelDriver.cpp
+++ b/packages/muelu/test/scaling/TwoMatrixMMKernelDriver.cpp
@@ -108,10 +108,10 @@ void MM2_MKL(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A,
 
   typedef typename Kokkos::View<MKL_INT *, typename lno_nnz_view_t::array_layout, typename lno_nnz_view_t::device_type> mkl_int_type;
 
-  RCP<const crs_matrix_type> Au  = Utilities::Op2TpetraCrs(rcp(&A, false));
-  RCP<const crs_matrix_type> B1u = Utilities::Op2TpetraCrs(rcp(&B1, false));
-  RCP<const crs_matrix_type> B2u = Utilities::Op2TpetraCrs(rcp(&B2, false));
-  RCP<const crs_matrix_type> Cu  = Utilities::Op2TpetraCrs(rcp(&C, false));
+  RCP<const crs_matrix_type> Au  = toTpetra(rcp(&A, false));
+  RCP<const crs_matrix_type> B1u = toTpetra(rcp(&B1, false));
+  RCP<const crs_matrix_type> B2u = toTpetra(rcp(&B2, false));
+  RCP<const crs_matrix_type> Cu  = toTpetra(rcp(&C, false));
   RCP<crs_matrix_type> Cnc       = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
 
   const KCRS &Amat  = Au->getLocalMatrixDevice();
@@ -308,10 +308,10 @@ void MM2_Wrapper(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
     typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
     LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
     RCP<const import_type> Cimport;
-    RCP<const crs_matrix_type> Au  = Utilities::Op2TpetraCrs(rcp(&A, false));
-    RCP<const crs_matrix_type> B1u = Utilities::Op2TpetraCrs(rcp(&B1, false));
-    RCP<crs_matrix_type> B2u       = Teuchos::rcp_const_cast<crs_matrix_type>(Utilities::Op2TpetraCrs(rcp(&B2, false)));
-    RCP<const crs_matrix_type> Cu  = Utilities::Op2TpetraCrs(rcp(&C, false));
+    RCP<const crs_matrix_type> Au  = toTpetra(rcpFromRef(A));
+    RCP<const crs_matrix_type> B1u = toTpetra(rcp(&B1, false));
+    RCP<crs_matrix_type> B2u       = Teuchos::rcp_const_cast<crs_matrix_type>(toTpetra(rcp(&B2, false)));
+    RCP<const crs_matrix_type> Cu  = toTpetra(rcp(&C, false));
     RCP<crs_matrix_type> Cnc       = Teuchos::rcp_const_cast<crs_matrix_type>(Cu);
 
     // **********************************
diff --git a/packages/muelu/test/unit_tests/Adapters/AmgxOperatorAdapter.cpp b/packages/muelu/test/unit_tests/Adapters/AmgxOperatorAdapter.cpp
index f072d8ba3d43..ebb2ac78842c 100644
--- a/packages/muelu/test/unit_tests/Adapters/AmgxOperatorAdapter.cpp
+++ b/packages/muelu/test/unit_tests/Adapters/AmgxOperatorAdapter.cpp
@@ -59,7 +59,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(AMGXOperator, Apply, Scalar, LocalOrdinal, Glo
 
     // matrix
     RCP<Matrix> Op                                                         = TestHelpers::TestFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build2DPoisson(nx, -1, Xpetra::UseTpetra);
-    RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tpA = MueLu::Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Op2NonConstTpetraCrs(Op);
+    RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tpA = toTpetra(Op);
     RCP<Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tOp  = tpA;
     Teuchos::ParameterList params, dummyList;
     params.set("use external multigrid package", "amgx");
@@ -77,7 +77,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(AMGXOperator, Apply, Scalar, LocalOrdinal, Glo
     RHS->putScalar((double)1.0);
     X->putScalar((double)0.0);
 
-    aH->apply(*(MueLu::Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MV2TpetraMV(RHS)), *(MueLu::Utilities<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MV2NonConstTpetraMV(X)));
+    aH->apply(*(toTpetra(RHS)), *(toTpetra(X)));
     // if(comm->getSize() == 1) TEST_EQUALITY(aH->iters()==16,true);
     TEST_EQUALITY(aH->getStatus() == 0, true);
 
diff --git a/packages/muelu/test/unit_tests/Adapters/BelosAdapters.cpp b/packages/muelu/test/unit_tests/Adapters/BelosAdapters.cpp
index f7e78702c312..7dd79839bf17 100644
--- a/packages/muelu/test/unit_tests/Adapters/BelosAdapters.cpp
+++ b/packages/muelu/test/unit_tests/Adapters/BelosAdapters.cpp
@@ -230,8 +230,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(BelosAdapters, XpetraOp_TpetraMV, Scalar, Loca
   RCP<OP> belosPrec = rcp(new Belos::MueLuOp<Scalar, LocalOrdinal, GlobalOrdinal, Node>(p->GetH()));
 
   // X, B
-  RCP<MV> X = Utilities::MV2NonConstTpetraMV(p->GetNewX0());
-  RCP<MV> B = Utilities::MV2NonConstTpetraMV(p->GetRHS());
+  RCP<MV> X = toTpetra(p->GetNewX0());
+  RCP<MV> B = toTpetra(p->GetRHS());
 
   // Run Belos
   int numIters = MueLuTests::BelosAdaptersTest<SC, MV, OP>(belosOp, belosPrec, X, B, out, success);
diff --git a/packages/muelu/test/unit_tests/Adapters/CreatePreconditioner.cpp b/packages/muelu/test/unit_tests/Adapters/CreatePreconditioner.cpp
index bcf3b145440c..f1a2d2635fc4 100644
--- a/packages/muelu/test/unit_tests/Adapters/CreatePreconditioner.cpp
+++ b/packages/muelu/test/unit_tests/Adapters/CreatePreconditioner.cpp
@@ -89,12 +89,12 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(PetraOperator, CreatePreconditioner, Scalar, L
     RCP<MultiVector> nullspace             = Xpetra::MultiVectorFactory<SC, LO, GO, NO>::Build(Op->getDomainMap(), 1);
     nullspace->putScalar(Teuchos::ScalarTraits<SC>::one());
 
-    RCP<tpetra_crsmatrix_type> tpA = MueLu::Utilities<SC, LO, GO, NO>::Op2NonConstTpetraCrs(Op);
+    RCP<tpetra_crsmatrix_type> tpA = toTpetra(Op);
 
     out << "========== Create Preconditioner from xmlFile ==========" << std::endl;
     out << "xmlFileName: " << xmlFileName << std::endl;
     RCP<MueLu::TpetraOperator<SC, LO, GO, NO> > tH = MueLu::CreateTpetraPreconditioner<SC, LO, GO, NO>(RCP<tpetra_operator_type>(tpA), xmlFileName);
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
     out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
 
 #endif
@@ -213,16 +213,16 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(PetraOperator, CreatePreconditioner_XMLOnList,
     RCP<MultiVector> nullspace             = Xpetra::MultiVectorFactory<SC, LO, GO, NO>::Build(Op->getDomainMap(), 1);
     nullspace->putScalar(Teuchos::ScalarTraits<SC>::one());
 
-    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA = MueLu::Utilities<SC, LO, GO, NO>::Op2NonConstTpetraCrs(Op);
+    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA = toTpetra(Op);
 
     RCP<MueLu::TpetraOperator<SC, LO, GO, NO> > tH = MueLu::CreateTpetraPreconditioner<SC, LO, GO, NO>(RCP<tpetra_operator_type>(tpA), mylist);
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
     out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
 
     mylist.set("xml parameter file", "testWithRebalance.xml");
 
-    RCP<Tpetra::MultiVector<real_type, LO, GO, NO> > tpcoordinates = MueLu::Utilities<real_type, LO, GO, NO>::MV2NonConstTpetraMV(coordinates);
-    RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpnullspace          = Utils::MV2NonConstTpetraMV(nullspace);
+    RCP<Tpetra::MultiVector<real_type, LO, GO, NO> > tpcoordinates = toTpetra(coordinates);
+    RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpnullspace          = toTpetra(nullspace);
 
     std::string mueluXML = mylist.get("xml parameter file", "");
     Teuchos::ParameterList mueluList;
@@ -232,7 +232,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(PetraOperator, CreatePreconditioner_XMLOnList,
     userParamList.set<RCP<Tpetra::MultiVector<SC, LO, GO, NO> > >("Nullspace", tpnullspace);
     tH = MueLu::CreateTpetraPreconditioner<SC, LO, GO, NO>(RCP<tpetra_operator_type>(tpA), mueluList);
     X1->putScalar(Teuchos::ScalarTraits<SC>::zero());
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
     out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
 
 #endif
@@ -360,9 +360,9 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(PetraOperator, CreatePreconditioner_PDESystem,
       RCP<MultiVector> X1 = MultiVectorFactory::Build(Op->getRowMap(), 1);
       X1->putScalar(Teuchos::ScalarTraits<SC>::zero());
 
-      RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA                    = MueLu::Utilities<SC, LO, GO, NO>::Op2NonConstTpetraCrs(Op);
-      RCP<Tpetra::MultiVector<real_type, LO, GO, NO> > tpcoordinates = MueLu::Utilities<real_type, LO, GO, NO>::MV2NonConstTpetraMV(coordinates);
-      RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpnullspace          = Utils::MV2NonConstTpetraMV(nullspace);
+      RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA                    = toTpetra(Op);
+      RCP<Tpetra::MultiVector<real_type, LO, GO, NO> > tpcoordinates = toTpetra(coordinates);
+      RCP<Tpetra::MultiVector<SC, LO, GO, NO> > tpnullspace          = toTpetra(nullspace);
 
       Teuchos::ParameterList paramList;
       Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr<Teuchos::ParameterList>(&paramList), *tpA->getDomainMap()->getComm());
@@ -370,7 +370,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(PetraOperator, CreatePreconditioner_PDESystem,
       userParamList.set<RCP<Tpetra::MultiVector<real_type, LO, GO, NO> > >("Coordinates", tpcoordinates);
       userParamList.set<RCP<Tpetra::MultiVector<SC, LO, GO, NO> > >("Nullspace", tpnullspace);
       RCP<MueLu::TpetraOperator<SC, LO, GO, NO> > tH = MueLu::CreateTpetraPreconditioner<SC, LO, GO, NO>(RCP<tpetra_operator_type>(tpA), paramList);
-      tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+      tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
       out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
 #else
       std::cout << "Skip PetraOperator::CreatePreconditioner_PDESystem: Tpetra is not available (with GO=int enabled)" << std::endl;
@@ -480,17 +480,17 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(PetraOperator, ReusePreconditioner, Scalar, Lo
     RCP<MultiVector> X1 = MultiVectorFactory::Build(Op->getRowMap(), 1);
     X1->putScalar(Teuchos::ScalarTraits<SC>::zero());
 
-    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA = MueLu::Utilities<SC, LO, GO, NO>::Op2NonConstTpetraCrs(Op);
+    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA = toTpetra(Op);
 
     RCP<MueLu::TpetraOperator<SC, LO, GO, NO> > tH = MueLu::CreateTpetraPreconditioner<SC, LO, GO, NO>(RCP<tpetra_operator_type>(tpA), xmlFileName);
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
     out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
 
     // Reuse preconditioner
     MueLu::ReuseTpetraPreconditioner(tpA, *tH);
 
     X1->putScalar(Teuchos::ScalarTraits<SC>::zero());
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
     out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
 #else
     std::cout << "Skip PetraOperator::ReusePreconditioner: Tpetra is not available (with GO=int enabled)" << std::endl;
@@ -587,21 +587,21 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(PetraOperator, ReusePreconditioner2, Scalar, L
     RCP<MultiVector> X1 = MultiVectorFactory::Build(Op->getRowMap(), 1);
     X1->putScalar(Teuchos::ScalarTraits<SC>::zero());
 
-    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA    = MueLu::Utilities<SC, LO, GO, NO>::Op2NonConstTpetraCrs(Op);
+    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA    = toTpetra(Op);
     RCP<MueLu::TpetraOperator<SC, LO, GO, NO> > tH = MueLu::CreateTpetraPreconditioner<SC, LO, GO, NO>(RCP<tpetra_operator_type>(tpA), params);
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
     out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
 
     // Reuse preconditioner
 
     matrixFile                                   = "TestMatrices/fuego1.mm";
     RCP<Matrix> Op2                              = Xpetra::IO<SC, LO, GO, Node>::Read(matrixFile, rowmap, null, null, null);
-    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA2 = MueLu::Utilities<SC, LO, GO, NO>::Op2NonConstTpetraCrs(Op2);
+    RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> > tpA2 = toTpetra(Op2);
 
     MueLu::ReuseTpetraPreconditioner(tpA2, *tH);
 
     X1->putScalar(Teuchos::ScalarTraits<SC>::zero());
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
     out << "after apply, ||b-A*x||_2 = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << Utils::ResidualNorm(*Op, *X1, *RHS1) << std::endl;
   } else if (lib == Xpetra::UseEpetra) {
 #ifdef HAVE_MUELU_EPETRA
diff --git a/packages/muelu/test/unit_tests/Adapters/TpetraOperatorAdapter.cpp b/packages/muelu/test/unit_tests/Adapters/TpetraOperatorAdapter.cpp
index 5e86c7be2a76..c20d3c74c1e0 100644
--- a/packages/muelu/test/unit_tests/Adapters/TpetraOperatorAdapter.cpp
+++ b/packages/muelu/test/unit_tests/Adapters/TpetraOperatorAdapter.cpp
@@ -76,7 +76,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(TpetraOperator, Apply, Scalar, LocalOrdinal, G
 
     X1->putScalar((SC)0.0);
 
-    tH->apply(*(Utils::MV2TpetraMV(RHS1)), *(Utils::MV2NonConstTpetraMV(X1)));
+    tH->apply(*(toTpetra(RHS1)), *(toTpetra(X1)));
 
     X1->norm2(norms);
     out << "after apply, ||X1|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << norms[0] << std::endl;
diff --git a/packages/muelu/test/unit_tests/MueLu_TestHelpers_HO.hpp b/packages/muelu/test/unit_tests/MueLu_TestHelpers_HO.hpp
index 437749bda702..aaa9040c5aeb 100644
--- a/packages/muelu/test/unit_tests/MueLu_TestHelpers_HO.hpp
+++ b/packages/muelu/test/unit_tests/MueLu_TestHelpers_HO.hpp
@@ -79,7 +79,7 @@ Build1DPseudoPoissonHigherOrder(GlobalOrdinal nx, int degree,
   int Nproc                           = comm->getSize();
 
   // Get maps
-  RCP<CrsMatrix> Acrs      = rcp_dynamic_cast<CrsMatrixWrap>(A)->getCrsMatrix();
+  RCP<CrsMatrix> Acrs      = toCrsMatrix(A);
   RCP<const Map> p1_colmap = Acrs->getColMap();
   RCP<const Map> p1_rowmap = Acrs->getRowMap();
 
diff --git a/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp b/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp
index e29fe020ea25..09f2b8a53dc6 100644
--- a/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp
+++ b/packages/muelu/test/unit_tests/ParameterList/ParameterListInterpreter.cpp
@@ -126,8 +126,8 @@ MT compare_matrices(RCP<Matrix> &Ap, RCP<Matrix> &Ab) {
   SC one    = Teuchos::ScalarTraits<SC>::one();
   SC zero   = Teuchos::ScalarTraits<SC>::zero();
 
-  RCP<const CRS> Ap_t  = MueLu::Utilities<SC, LO, GO, NO>::Op2TpetraCrs(Ap);
-  auto Ab_t            = MueLu::Utilities<SC, LO, GO, NO>::Op2TpetraBlockCrs(Ab);
+  RCP<const CRS> Ap_t  = toTpetra(Ap);
+  auto Ab_t            = toTpetraBlock(Ab);
   RCP<CRS> Ab_as_point = Tpetra::convertToCrsMatrix<SC, LO, GO, NO>(*Ab_t);
 
   RCP<CRS> diff = rcp(new CRS(Ap_t->getCrsGraph()));
@@ -154,7 +154,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(ParameterListInterpreter, PointCrs_vs_BlockCrs
     {
       using XCRS = Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO>;
 
-      auto tA      = MueLu::Utilities<SC, LO, GO, NO>::Op2TpetraCrs(PointA);
+      auto tA      = toTpetra(PointA);
       auto bA      = Tpetra::convertToBlockCrsMatrix<SC, LO, GO, NO>(*tA, 1);
       RCP<XCRS> AA = rcp(new XCRS(bA));
       BlockA       = rcp(new CrsMatrixWrap(rcp_implicit_cast<CrsMatrix>(AA)));
diff --git a/packages/muelu/test/unit_tests_kokkos/Utilities_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/Utilities_kokkos.cpp
index e0461368cf51..95688825c44a 100644
--- a/packages/muelu/test/unit_tests_kokkos/Utilities_kokkos.cpp
+++ b/packages/muelu/test/unit_tests_kokkos/Utilities_kokkos.cpp
@@ -473,19 +473,19 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Utilities_kokkos, TransformFunctions, Scalar,
     TEST_EQUALITY(mv.getData(0).size(), tpetraMV.getData(0).size());
   };
 
-  auto tpetraMV = Utils::MV2TpetraMV(vector);
+  auto tpetraMV = toTpetra(vector);
   compareMV(*vector, *tpetraMV);
 
-  auto tpetraMV2 = Utils::MV2TpetraMV(*vector);
+  auto tpetraMV2 = toTpetra(*vector);
   compareMV(*vector, tpetraMV2);
 
-  auto nonConstTpetraMV = Utils::MV2NonConstTpetraMV(vector);
+  auto nonConstTpetraMV = toTpetra(vector);
   compareMV(*vector, *nonConstTpetraMV);
 
-  auto nonConstTpetraMV2 = Utils::MV2NonConstTpetraMV2(*vector);
+  auto nonConstTpetraMV2 = toTpetra(vector);
   compareMV(*vector, *nonConstTpetraMV2);
 
-  auto nonConstTpetraMV3 = Utils::MV2NonConstTpetraMV(*vector);
+  auto nonConstTpetraMV3 = toTpetra(*vector);
   compareMV(*vector, nonConstTpetraMV3);
 
   using TpetraMat        = Tpetra::RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
@@ -500,16 +500,16 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Utilities_kokkos, TransformFunctions, Scalar,
     TEST_EQUALITY(xpetraMat.getGlobalNumEntries(), tpetraMat.getGlobalNumEntries());
   };
 
-  auto tpetraCrsMat = Utils::Op2TpetraCrs(A);
+  auto tpetraCrsMat = toTpetra(A);
   compareMat(*A, *tpetraCrsMat);
 
-  auto nonConstTpetraCrs = Utils::Op2NonConstTpetraCrs(A);
+  auto nonConstTpetraCrs = toTpetra(A);
   compareMat(*A, *nonConstTpetraCrs);
 
-  auto tpetraCrs = Utils::Op2TpetraCrs(*A);
+  auto tpetraCrs = *toTpetra(A);
   compareMat(*A, tpetraCrs);
 
-  auto nonConstTpetraCrs2 = Utils::Op2NonConstTpetraCrs(*A);
+  auto nonConstTpetraCrs2 = *toTpetra(A);
   compareMat(*A, nonConstTpetraCrs2);
 
   auto crsMat = CrsMatrixFactory::Build(map);
@@ -529,7 +529,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Utilities_kokkos, TransformFunctions, Scalar,
   auto nonConstTpetraRow = Utils::Op2NonConstTpetraRow(A);
   compareMat(*A, *nonConstTpetraRow);
 
-  auto tpetraMap = Utils::Map2TpetraMap(*map);
+  auto tpetraMap = toTpetra(map);
   TEST_INEQUALITY(tpetraMap, Teuchos::null);
   TEST_EQUALITY_CONST(tpetraMap->getGlobalNumElements(), map->getGlobalNumElements());
 
diff --git a/packages/rol/adapters/sacado/test/test_01.cpp b/packages/rol/adapters/sacado/test/test_01.cpp
index 82bc4312283e..9466a9afbf15 100644
--- a/packages/rol/adapters/sacado/test/test_01.cpp
+++ b/packages/rol/adapters/sacado/test/test_01.cpp
@@ -117,6 +117,7 @@ int main(int argc, char *argv[]) {
         break;
 
         case TYPE_LAST:
+        default:
           ROL_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Error: Unsupported problem type!");
         break;
       }
diff --git a/packages/rol/src/algorithm/ROL_OptimizationSolver.hpp b/packages/rol/src/algorithm/ROL_OptimizationSolver.hpp
index d304a574c300..f1ff19809e37 100644
--- a/packages/rol/src/algorithm/ROL_OptimizationSolver.hpp
+++ b/packages/rol/src/algorithm/ROL_OptimizationSolver.hpp
@@ -219,6 +219,7 @@ class OptimizationSolver {
         output_ = algo_->run(*x_,*g_,*l_,*c_,*obj_,*con_,*bnd_,true,outStream);
       break;
       case TYPE_LAST:
+      default:
         ROL_TEST_FOR_EXCEPTION(true,std::invalid_argument,
           "Error in OptimizationSolver::solve() : Unsupported problem type");
     }
diff --git a/packages/seacas/libraries/exodus/src/ex_get_init_ext.c b/packages/seacas/libraries/exodus/src/ex_get_init_ext.c
index cd2fa90e2165..ac416dbd0793 100644
--- a/packages/seacas/libraries/exodus/src/ex_get_init_ext.c
+++ b/packages/seacas/libraries/exodus/src/ex_get_init_ext.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(C) 1999-2022, 2024 National Technology & Engineering Solutions
+ * Copyright(C) 1999-2022, 2024, 2025 National Technology & Engineering Solutions
  * of Sandia, LLC (NTESS).  Under the terms of Contract DE-NA0003525 with
  * NTESS, the U.S. Government retains certain rights in this software.
  *
@@ -28,14 +28,21 @@ static void exi_get_entity_count(int exoid, ex_init_params *info)
 {
   int include_parent_group = 0; // Only want dims in current group
   int ndims                = 0;
+#if NC_HAS_NC4
   nc_inq_dimids(exoid, &ndims, NULL, include_parent_group);
   int *dimids = calloc(ndims, sizeof(int));
   nc_inq_dimids(exoid, &ndims, dimids, include_parent_group);
-
+#else
+  nc_inq(exoid, &ndims, NULL, NULL, NULL);
+#endif
   for (int dimid = 0; dimid < ndims; dimid++) {
     char   dim_nm[NC_MAX_NAME + 1] = {'\0'};
     size_t dim_sz;
+#if NC_HAS_NC4
     nc_inq_dim(exoid, dimids[dimid], dim_nm, &dim_sz);
+#else
+    nc_inq_dim(exoid, dimid, dim_nm, &dim_sz);
+#endif    
     /* For assemblies, we check for a dim starting with "num_entity_assembly" */
     if (strncmp(dim_nm, "num_entity_assembly", 19) == 0) {
       info->num_assembly++;
@@ -44,7 +51,9 @@ static void exi_get_entity_count(int exoid, ex_init_params *info)
       info->num_blob++;
     }
   }
+#if NC_HAS_NC4
   free(dimids);
+#endif
 }
 
 /* Used to reduce repeated code below */
diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C
index 0d49fa766328..11e742a3f25a 100644
--- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C
+++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C
@@ -1,4 +1,4 @@
-// Copyright(C) 1999-2024 National Technology & Engineering Solutions
+// Copyright(C) 1999-2025 National Technology & Engineering Solutions
 // of Sandia, LLC (NTESS).  Under the terms of Contract DE-NA0003525 with
 // NTESS, the U.S. Government retains certain rights in this software.
 //
@@ -731,60 +731,65 @@ namespace Ioex {
       {
         Ioss::SerializeIO serializeIO_(this);
         timestepCount       = ex_inquire_int(get_file_pointer(), EX_INQ_TIME);
-        int exTimestepCount = timestepCount;
-        // Need to sync timestep count across ranks if parallel...
-        if (isParallel) {
-          auto min_timestep_count =
-              util().global_minmax(timestepCount, Ioss::ParallelUtils::DO_MIN);
-          if (min_timestep_count == 0) {
-            auto max_timestep_count =
-                util().global_minmax(timestepCount, Ioss::ParallelUtils::DO_MAX);
-            if (max_timestep_count != 0) {
-              if (myProcessor == 0) {
-                // NOTE: Don't want to warn on all processors if the
-                // timestep count is zero on some, but not all ranks.
-                fmt::print(Ioss::WarnOut(),
-                           "At least one database has no timesteps.  No times will be read on ANY"
-                           " database for consistency.\n");
-              }
-            }
-          }
-          timestepCount = min_timestep_count;
-        }
-
-        if (timestepCount <= 0) {
-          return tsteps;
-        }
-
-        // For an exodus file, timesteps are global and are stored in the region.
-        // Read the timesteps and add to the region
-        tsteps.resize(exTimestepCount, -std::numeric_limits<double>::max());
-
-        // The `EXODUS_CALL_GET_ALL_TIMES=NO` is typically only used in
-        // isSerialParallel mode and the client is responsible for
-        // making sure that the step times are handled correctly.  All
-        // databases will know about the number of timesteps, but if
-        // this is skipped, then the times will all be zero.  Use case
-        // is that in isSerialParallel, each call to
-        // `ex_get_all_times` for all files is performed sequentially,
-        // so if you have hundreds to thousands of files, the time for
-        // the call is additive and since timesteps are record
-        // variables in netCDF, accessing the data for all timesteps
-        // involves lseeks throughout the file.
-        bool call_ex_get_all_times = true;
-        Ioss::Utils::check_set_bool_property(properties, "EXODUS_CALL_GET_ALL_TIMES",
-                                             call_ex_get_all_times);
-        if (call_ex_get_all_times) {
-          int error = ex_get_all_times(get_file_pointer(), Data(tsteps));
-          if (error < 0) {
-            Ioex::exodus_error(get_file_pointer(), __LINE__, __func__, __FILE__);
-          }
-        }
-
-        // See if the "last_written_time" attribute exists and if it
-        // does, check that it matches the largest time in 'tsteps'.
-        exists = Ioex::read_last_time_attribute(get_file_pointer(), &last_time);
       }
+      int exTimestepCount = timestepCount;
+      // Need to sync timestep count across ranks if parallel...
+      if (isParallel) {
+	auto min_timestep_count =
+	  util().global_minmax(timestepCount, Ioss::ParallelUtils::DO_MIN);
+	if (min_timestep_count == 0) {
+	  auto max_timestep_count =
+	    util().global_minmax(timestepCount, Ioss::ParallelUtils::DO_MAX);
+	  if (max_timestep_count != 0) {
+	    if (myProcessor == 0) {
+	      // NOTE: Don't want to warn on all processors if the
+	      // timestep count is zero on some, but not all ranks.
+	      fmt::print(Ioss::WarnOut(),
+			 "At least one database has no timesteps.  No times will be read on ANY"
+			 " database for consistency.\n");
+	    }
+	  }
+	}
+	timestepCount = min_timestep_count;
+      }
+
+      if (timestepCount <= 0) {
+	return tsteps;
+      }
+
+      // For an exodus file, timesteps are global and are stored in the region.
+      // Read the timesteps and add to the region
+      tsteps.resize(exTimestepCount, -std::numeric_limits<double>::max());
+
+      // The `EXODUS_CALL_GET_ALL_TIMES=NO` is typically only used in
+      // isSerialParallel mode and the client is responsible for
+      // making sure that the step times are handled correctly.  All
+      // databases will know about the number of timesteps, but if
+      // this is skipped, then the times will all be zero.  Use case
+      // is that in isSerialParallel, each call to
+      // `ex_get_all_times` for all files is performed sequentially,
+      // so if you have hundreds to thousands of files, the time for
+      // the call is additive and since timesteps are record
+      // variables in netCDF, accessing the data for all timesteps
+      // involves lseeks throughout the file.
+      bool call_ex_get_all_times = true;
+      Ioss::Utils::check_set_bool_property(properties, "EXODUS_CALL_GET_ALL_TIMES",
+					   call_ex_get_all_times);
+      if (call_ex_get_all_times) {
+	Ioss::SerializeIO serializeIO_(this);
+	int error = ex_get_all_times(get_file_pointer(), Data(tsteps));
+	if (error < 0) {
+	  Ioex::exodus_error(get_file_pointer(), __LINE__, __func__, __FILE__);
+	}
+      }
+
+      // See if the "last_written_time" attribute exists and if it
+      // does, check that it matches the largest time in 'tsteps'.
+      {
+	Ioss::SerializeIO serializeIO_(this);
+	exists = Ioex::read_last_time_attribute(get_file_pointer(), &last_time);
+      }
+
       if (exists && isParallel) {
         // Assume that if it exists on 1 processor, it exists on
         // all... Sync value among processors since could have a
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp
index 7fd24c7fb1d7..848dd57f140d 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp
@@ -464,7 +464,7 @@ namespace BaskerNS
     int sfactor_copy();
 
     BASKER_INLINE
-    int sfactor_copy2(bool alloc_BTFA = false, bool copy_BTFA = true);
+    int sfactor_copy2(bool doSymbolic = true, bool alloc_BTFA = false, bool copy_BTFA = true);
 
 
     //old
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp
index 35d8588b0bd9..0b3b8af99654 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp
@@ -357,6 +357,7 @@ namespace BaskerNS
     Kokkos::Timer timer;
     #endif
 
+    int err = 0;
     if(Options.verbose == BASKER_TRUE)
     {
       std::cout << "\n == Basker Symbolic ==" << std::endl;
@@ -533,21 +534,6 @@ namespace BaskerNS
       symb_flag = BASKER_TRUE;
     }
 
-
-    if(Options.verbose == BASKER_TRUE)
-    {
-      printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout);
-    }
-
-    #ifdef BASKER_TIMER
-    time = timer.seconds();
-    stats.time_sfactor += time;
-    std::cout << "Basker Symbolic total time: " << time
-              << std::endl << std::endl;
-    std::cout.precision(old_precision);
-    std::cout.flags(old_settings);
-    #endif
-
     // NDE store matrix dims here for comparison in Factor
     sym_gn = A.ncol;
     sym_gm = A.nrow;
@@ -577,8 +563,30 @@ namespace BaskerNS
       }
     }
     printf("];\n");*/
+    bool allocate_nd_workspace = (Options.blk_matching == 0 && Options.static_delayed_pivot == 0);
+    if (btf_tabs_offset != 0 && allocate_nd_workspace) {
+      // setup data-structure for ND
+      bool doSymbolic = true;
+      bool copy_BTFA = (Options.blk_matching == 0 || Options.static_delayed_pivot != 0);
+      bool alloc_BTFA = (Options.static_delayed_pivot != 0);
+      err = sfactor_copy2(doSymbolic, alloc_BTFA, copy_BTFA);
+    }
 
-    return 0;
+    #ifdef BASKER_TIMER
+    time = timer.seconds();
+    stats.time_sfactor += time;
+    std::cout << "Basker Symbolic total time: " << time
+              << std::endl << std::endl;
+    std::cout.precision(old_precision);
+    std::cout.flags(old_settings);
+    #endif
+
+    if(Options.verbose == BASKER_TRUE)
+    {
+      printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout);
+    }
+
+    return err;
   } //end Symbolic()
 
 
@@ -1934,6 +1942,7 @@ namespace BaskerNS
     // sfactor_copy2 is now only responsible for the copy from BTF_A to 2D blocks
     Kokkos::Timer timer_sfactorcopy;
     double sfactorcopy_time = 0.0;
+    bool doSymbolic_ND = (Options.blk_matching != 0 || Options.static_delayed_pivot != 0);
     if (btf_tabs_offset != 0) {
       bool flag = true;
       #ifdef BASKER_KOKKOS
@@ -1947,16 +1956,19 @@ namespace BaskerNS
       }*/
 
       Kokkos::Timer nd_setup2_timer;
-#ifdef BASKER_PARALLEL_INIT_WORKSPACE
-      kokkos_sfactor_init_workspace<Int,Entry,Exe_Space>
-        iWS(flag, this);
-      Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS);
-      Kokkos::fence();
-#else
-      for (Int p = 0; p < num_threads; p++) {
-        this->t_init_workspace(flag, p);
+      // if sfactor_copy2 has been called in symbolic
+      // then all the blocks have been allocated and can initialize them in parallel-for
+      // if not, then use non-parallel for
+      if (doSymbolic_ND) {
+        for (Int p = 0; p < num_threads; p++) {
+          this->t_init_workspace(flag, p);
+        }
+      } else {
+        kokkos_sfactor_init_workspace<Int,Entry,Exe_Space>
+          iWS(flag, this);
+        Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS);
+        Kokkos::fence();
       }
-#endif
       if(Options.verbose == BASKER_TRUE) {
         std::cout<< " > Basker Factor: Time for workspace allocation after ND on a big block A: " << nd_setup2_timer.seconds() << std::endl;
       }
@@ -1964,7 +1976,7 @@ namespace BaskerNS
     }
     bool copy_BTFA = (Options.blk_matching == 0 || Options.static_delayed_pivot != 0);
     bool alloc_BTFA = (Options.static_delayed_pivot != 0);
-    err = sfactor_copy2(alloc_BTFA, copy_BTFA);
+    err = sfactor_copy2(doSymbolic_ND, alloc_BTFA, copy_BTFA);
 
     if(Options.verbose == BASKER_TRUE) {
       sfactorcopy_time += timer_sfactorcopy.seconds();
@@ -1973,6 +1985,7 @@ namespace BaskerNS
     }
     if(err == BASKER_ERROR)
     { return BASKER_ERROR; }
+    //BTF_A.print_matrix("AA.dat");
 
     Kokkos::Timer timer_factornotoken;
     double fnotoken_time = 0.0;
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp
index 4bbd86507d9d..1a3f95fd10f6 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp
@@ -146,6 +146,7 @@ namespace BaskerNS
     Int mnnz; //malloc nnz
     
     INT_1DARRAY   col_ptr;
+    INT_1DARRAY   dig_ptr;
     INT_1DARRAY   col_idx; // NOTE: auxiliary for find_2D_convert
     INT_1DARRAY   row_idx;
     ENTRY_1DARRAY val;
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp
index e40361e6f988..ddf77f743733 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp
@@ -122,6 +122,7 @@ namespace BaskerNS
     if(v_fill == BASKER_TRUE)
     {
       FREE_INT_1DARRAY(col_ptr);
+      FREE_INT_1DARRAY(dig_ptr);
       FREE_INT_1DARRAY(row_idx);
       FREE_ENTRY_1DARRAY(val);
       v_fill = BASKER_FALSE;
@@ -190,10 +191,12 @@ namespace BaskerNS
     //printf( " init_col(n=%d)\n",ncol );
     BASKER_ASSERT(ncol >= 0, "INIT_COL, ncol > 0");
     MALLOC_INT_1DARRAY(col_ptr, ncol+1);
+    MALLOC_INT_1DARRAY(dig_ptr, ncol+1);
     MALLOC_INT_1DARRAY(col_idx, ncol+1);
     for(Int i = 0; i < ncol+1; ++i)
     {
       col_ptr(i) = (Int) BASKER_MAX_IDX;
+      dig_ptr(i) = (Int) BASKER_MAX_IDX;
       col_idx(i) = (Int) BASKER_MAX_IDX;
     }
   }//end init_col()
@@ -206,6 +209,7 @@ namespace BaskerNS
     for(Int i = 0; i < ncol+1; ++i)
     {
       col_ptr(i) = (Int) BASKER_MAX_IDX;
+      dig_ptr(i) = (Int) BASKER_MAX_IDX;
       col_idx(i) = (Int) BASKER_MAX_IDX;
     }
     nnz = 0;
@@ -228,6 +232,7 @@ namespace BaskerNS
     {
       BASKER_ASSERT((ncol+1)>0, "matrix init_vector ncol");
       MALLOC_INT_1DARRAY(col_ptr,ncol+1);
+      MALLOC_INT_1DARRAY(dig_ptr,ncol+1);
     }
     if(nnz > 0)
     {
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp
index 7b65e1d94ed0..a969dea8dc8a 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp
@@ -192,7 +192,7 @@ namespace BaskerNS
 
 
     //----------------Sep level upper tri-------------
-    for(Int l = 1; l < (lvl) && info == BASKER_SUCCESS; ++l)
+    for(Int l = 1; l < lvl && info == BASKER_SUCCESS; ++l)
     {
       for(Int k = 0; k < ncol; ++k)
       {
@@ -352,10 +352,10 @@ namespace BaskerNS
 
         // ------------------------------------------------------- //
         // > factor the k-th column of the off-diagonal blocks
+        #ifdef BASKER_TIMER
+        timer_facoff.reset();
+        #endif
         if (info == BASKER_SUCCESS) {
-          #ifdef BASKER_TIMER
-          timer_facoff.reset();
-          #endif
           #ifdef BASKER_DEBUG_NFACTOR_COL2
           printf(" calling lower offdiag factor, kid: %d k: %d \n",
                  kid, k); fflush(stdout);
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp
index c4d352d349f4..7bdd72d47dbf 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp
@@ -1161,7 +1161,7 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
       }
       return info_scotch;
     } else if(Options.verbose == BASKER_TRUE) {
-      printf( "\n part_scotch done (num_threads = %d,%lu)\n",num_threads,part_tree.leaf_nnz.extent(0) );
+      printf( "\n part_scotch done (num_threads = %d,%lu)\n",int(num_threads),part_tree.leaf_nnz.extent(0) );
       //for (Int i = 0; i < num_threads; i++) printf( " nnz_leaf[%d] = %d\n",i,part_tree.leaf_nnz[i] ); printf( "\n" );
     }
     nd_flag = BASKER_TRUE;
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp
index af889c5b77b1..9566f48f9ef1 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp
@@ -117,11 +117,11 @@ namespace BaskerNS
       //           thread.team_rank());
       Int kid = basker->t_get_kid(thread);
       #endif
-      printf( " * kokkos_sfactor_init_factor(%d) *\n",kid ); fflush(stdout);
+      //printf( " * kokkos_sfactor_init_factor(%d) *\n",int(kid) ); fflush(stdout);
 
       basker->t_init_factor(kid);
 
-      printf( " * kokkos_sfactor_init_factor(%d) done *\n",kid ); fflush(stdout);
+      //printf( " * kokkos_sfactor_init_factor(%d) done *\n",int(kid) ); fflush(stdout);
       //This needs to be done earlier in ordering now
       //basker->t_init_2DA(kid);
 
@@ -384,9 +384,9 @@ int Basker<Int, Entry, Exe_Space>::sfactor()
         if(Options.verbose == BASKER_TRUE)
         {
           printf( " >> leaf_assign_nnz(LL(%d)(%d)) = (1.0 + %.1f + %.1f) + leaf_nnz[%d] = %d from AMD\n",(int)blk,0, 
-                  BASKER_DOM_NNZ_OVER,Options.user_fill,p,part_tree.leaf_nnz[p] );
+                  BASKER_DOM_NNZ_OVER,Options.user_fill,(int)p,(int)part_tree.leaf_nnz[p] );
           printf( " >> leaf_assign_nnz(LU(%d)(%d)) = (1.0 + %.1f + %.1f) + leaf_nnz[%d] = %d from AMD\n",(int)blk,(int)LU_size(blk)-1,
-                  BASKER_DOM_NNZ_OVER,Options.user_fill,p,part_tree.leaf_nnz[p] );
+                  BASKER_DOM_NNZ_OVER,Options.user_fill,(int)p,(int)part_tree.leaf_nnz[p] );
         }
         LL(blk)(0).nnz = part_tree.leaf_nnz[p] * fill_factor;
         LU(blk)(LU_size(blk)-1).nnz = part_tree.leaf_nnz[p] * fill_factor;
@@ -2349,6 +2349,7 @@ int Basker<Int, Entry, Exe_Space>::sfactor()
    Int option
   )
   {
+    const Int izero = 0;
     if(option == 0 || option == 1)
     {
       const Int Int_MAX = std::numeric_limits<Int>::max();
@@ -2377,8 +2378,8 @@ int Basker<Int, Entry, Exe_Space>::sfactor()
         if (fill_factor > 1.0 && k_nnz > t_nnz) {
           t_nnz = k_nnz;
         }
-        Int mn = max(0,M.nrow*M.ncol);
-        if (mn > 0 && mn < t_nnz) {
+        Int mn = std::max(izero, M.nrow*M.ncol);
+        if (mn > izero && mn < t_nnz) {
           t_nnz = mn;
         }
         M.nnz = t_nnz;
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp
index 0ea6467be927..2f77fa684e4f 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp
@@ -24,6 +24,8 @@
 
 using namespace std;
 
+//#define BASKER_TIMER_FINE
+
 namespace BaskerNS
 {
   ///wrapper init tree function for selection
@@ -1146,7 +1148,7 @@ namespace BaskerNS
           }
         }
         start_col = BASKER_FALSE;
-      }//over each row        
+      }//over each row
     }//over each colunm
 
   }//end find_2d_convert()
@@ -1286,7 +1288,7 @@ namespace BaskerNS
   // NDE: sfactor_copy2 is now only responsible for mapping blocks to 2D blocks
   template <class Int, class Entry, class Exe_Space>
   BASKER_INLINE
-  int Basker<Int,Entry,Exe_Space>::sfactor_copy2(bool alloc_BTFA, bool copy_BTFA)
+  int Basker<Int,Entry,Exe_Space>::sfactor_copy2(bool doSymbolic, bool alloc_BTFA, bool copy_BTFA)
   {
     //Timers
     #ifdef BASKER_TIMER_FINE
@@ -1310,19 +1312,55 @@ namespace BaskerNS
       #ifdef BASKER_TIMER_FINE 
       double twod_time = 0.0;
       Kokkos::Timer timer_twod;
+      Kokkos::Timer tic_twod;
+      #endif
+      if (doSymbolic) {
+        // clear vals from ALM, AVM - views of views that store the local 2D block CCS reordered matrix info
+        clean_2d();
+
+        //matrix_to_views_2D(BTF_A);
+        //Find starting point
+        find_2D_convert(BTF_A); //prepare CCS 'sizes' of each ALM(i)(j), AVM(i)(j) (nnz, col_idx, )
+
+        // save ptrs
+        for(Int b = 0 ; b < tree.nblks; ++b) {
+          for(Int sb = 0; sb < LL_size(b); ++sb) {
+            for (int j = 0; j <= ALM(b)(sb).ncol; j++) {
+              ALM(b)(sb).dig_ptr(j) = ALM(b)(sb).col_ptr(j);
+            }
+          }
+          for(Int sb = 0; sb < LU_size(b); ++sb) {
+            for (int j = 0; j <= AVM(b)(sb).ncol; j++) {
+              AVM(b)(sb).dig_ptr(j) = AVM(b)(sb).col_ptr(j);
+            }
+          }
+        }
+      } else {
+        // load ptrs
+        for(Int b = 0 ; b < tree.nblks; ++b) {
+          for(Int sb = 0; sb < LL_size(b); ++sb) {
+            for (int j = 0; j <= ALM(b)(sb).ncol; j++) {
+              ALM(b)(sb).col_ptr(j) = ALM(b)(sb).dig_ptr(j);
+            }
+          }
+          for(Int sb = 0; sb < LU_size(b); ++sb) {
+            for (int j = 0; j <= AVM(b)(sb).ncol; j++) {
+              AVM(b)(sb).col_ptr(j) = AVM(b)(sb).dig_ptr(j);
+            }
+          }
+        }
+      }
+      #ifdef BASKER_TIMER_FINE
+      double tic_time = tic_twod.seconds();
+      std::cout << "    > Basker 2D convert time: " << tic_time << std::endl;
+      tic_twod.reset();
       #endif
-
-      clean_2d(); // clear vals from ALM, AVM - views of views that store the local 2D block CCS reordered matrix info
-
-      //matrix_to_views_2D(BTF_A);
-      //Find starting point
-      find_2D_convert(BTF_A); //prepare CCS 'sizes' of each ALM(i)(j), AVM(i)(j) (nnz, col_idx, )
 
       //Fill 2D structure
       #ifdef BASKER_KOKKOS
       BASKER_BOOL keep_zeros = BASKER_FALSE;
       BASKER_BOOL alloc      = alloc_BTFA; //BASKER_FALSE;
-      #ifdef BASKER_PARALLEL_INIT_2D
+      #if 1//def BASKER_PARALLEL_INIT_2D
        kokkos_order_init_2D<Int,Entry,Exe_Space> iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D
        Kokkos::parallel_for(TeamPolicy(num_threads,1), iO);
        Kokkos::fence();
@@ -1336,6 +1374,8 @@ namespace BaskerNS
       #endif
 
       #ifdef BASKER_TIMER_FINE
+      tic_time = tic_twod.seconds();
+      std::cout << "    > Basker init 2D time: " << tic_time << std::endl;
       tmp_time = timer_twod.seconds();
       twod_time += tmp_time;
       std::cout << "    Basker move into 2D ND reorder time: " << tmp_time << std::endl;
@@ -1400,4 +1440,5 @@ namespace BaskerNS
 
 }//end namespace basker
 
+#undef BASKER_TIMER_FINE
 #endif //end ifndefbasker_tree_hpp
diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp
index 9d30b714553d..46327f323d54 100644
--- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp
+++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp
@@ -162,7 +162,7 @@ enum BASKER_INCOMPLETE_CODE
 #define MALLOC_INT_1DARRAY_PAIRS(a,s)   \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC malloc_pairs_1d: size to alloc >= 0 fails"); \
-    if (s > 0) {                                          \
+    if (s > 0 && Int(a.extent(0)) != s) {                 \
       Kokkos::resize(a, s);                               \
       if(a.data() == NULL)                                \
         throw std::bad_alloc();                           \
@@ -171,7 +171,7 @@ enum BASKER_INCOMPLETE_CODE
 #define MALLOC_INT_1DARRAY(a,s)   \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC int_1d: size to alloc >= 0 fails"); \
-    if (s > 0) {                                  \
+    if (s > 0 && Int(a.extent(0)) != s) {         \
       Kokkos::resize(a, s);                       \
       if(a.data() == NULL)                        \
         throw std::bad_alloc();                   \
@@ -181,23 +181,25 @@ enum BASKER_INCOMPLETE_CODE
   { \
     BASKER_ASSERT(s0>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \
     BASKER_ASSERT(s1>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \
-    Kokkos::resize(a, s0,s1);      \
-    if(a.data() == NULL)           \
-      throw std::bad_alloc();	   \
+    if (Int(a.extent(0)) != s0 || Int(a.extent(1)) != s1) { \
+      Kokkos::resize(a, s0,s1);                             \
+      if(a.data() == NULL)                                  \
+        throw std::bad_alloc();                             \
+    }                                                       \
   }
 #define MALLOC_INT_2DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \
-    if (s > 0) {                   \
+    if (s > 0 && Int(a.extent(0)) != s) {                                          \
       a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s); \
-      if(a.data() == NULL)         \
-        throw std::bad_alloc();    \
-    }                              \
+      if(a.data() == NULL)                                                         \
+        throw std::bad_alloc();                                                    \
+    }                                                                              \
   }
 #define MALLOC_ENTRY_1DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_1d: size to alloc >= 0 fails"); \
-    if (s > 0) {                                      \
+    if (s > 0 && Int(a.extent(0)) != s) {             \
       Kokkos::resize(a, s);                           \
       if(a.data() == NULL)                            \
         throw std::bad_alloc();                       \
@@ -206,16 +208,16 @@ enum BASKER_INCOMPLETE_CODE
 #define MALLOC_ENTRY_2DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_2d: size to alloc >= 0 fails"); \
-    if (s > 0) {                       \
+    if (s > 0 && Int(a.extent(0)) != s) {   \
       a = ENTRY_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \
-      if(a.data() == NULL)             \
-        throw std::bad_alloc();        \
-    }                                  \
+      if(a.data() == NULL)                  \
+        throw std::bad_alloc();             \
+    }                                       \
   }
 #define MALLOC_BOOL_1DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_1d: size to alloc >= 0 fails"); \
-    if (s > 0) {                                     \
+    if (s > 0 && Int(a.extent(0)) != s) {            \
       Kokkos::resize(a, s);                          \
       if(a.data() == NULL)                           \
         throw std::bad_alloc();                      \
@@ -224,56 +226,56 @@ enum BASKER_INCOMPLETE_CODE
 #define MALLOC_BOOL_2DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_2d: size to alloc >= 0 fails"); \
-    if (s > 0) {                      \
-      Kokkos::resize(a, s);           \
-      if(a.data() == NULL)            \
-        throw std::bad_alloc();       \
-    }                                 \
+    if (s > 0 && Int(a.extent(0)) != s) {  \
+      Kokkos::resize(a, s);                \
+      if(a.data() == NULL)                 \
+        throw std::bad_alloc();            \
+    }                                      \
   }
 #define MALLOC_MATRIX_1DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \
-    if (s > 0) {                         \
+    if (s > 0 && Int(a.extent(0)) != s) {  \
       a = MATRIX_1DARRAY(Kokkos::view_alloc("matrix_1d", Kokkos::SequentialHostInit),s); \
-      if(a.data() == NULL)               \
-        throw std::bad_alloc();          \
-    }                                    \
+      if(a.data() == NULL)                 \
+        throw std::bad_alloc();            \
+    }                                      \
   }
 #define MALLOC_MATRIX_2DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \
-    if (s > 0) {                         \
+    if (s > 0 && Int(a.extent(0)) != s) {  \
       a = MATRIX_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \
-      if(a.data() == NULL)               \
-        throw std::bad_alloc();          \
-    }                                    \
+      if(a.data() == NULL)                 \
+        throw std::bad_alloc();            \
+    }                                      \
   }
 #define MALLOC_MATRIX_VIEW_1DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_1d: size to alloc >= 0 fails"); \
-    if (s > 0) {                                   \
+    if (s > 0 && Int(a.extent(0)) != s) {  \
       a = MATRIX_VIEW_1DARRAY(Kokkos::view_alloc("matrix_view_1d", Kokkos::SequentialHostInit),s); \
-      if(a.data() == NULL)                         \
-        throw std::bad_alloc();                    \
-    }                                              \
+      if(a.data() == NULL)                 \
+        throw std::bad_alloc();            \
+    }                                      \
   }
 #define MALLOC_MATRIX_VIEW_2DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_2d: size to alloc >= 0 fails"); \
-    if (s > 0) {                                   \
+    if (s > 0 && Int(a.extent(0)) != s) {  \
       a = MATRIX_VIEW_2DARRAY(Kokkos::view_alloc("matrix_view_2d", Kokkos::SequentialHostInit),s); \
-      if(a.data() == NULL)                         \
-        throw std::bad_alloc();                    \
-    }                                              \
+      if(a.data() == NULL)                 \
+        throw std::bad_alloc();            \
+    }                                      \
   }
 #define MALLOC_THREAD_1DARRAY(a,s) \
   { \
     BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_1d: size to alloc >= 0 fails"); \
-    if (s > 0) {                         \
+    if (s > 0 && Int(a.extent(0)) != s) {  \
       a = THREAD_1DARRAY(Kokkos::view_alloc("thread_1d", Kokkos::SequentialHostInit),s); \
-      if(a.data() == NULL)               \
-        throw std::bad_alloc();          \
-    }                                    \
+      if(a.data() == NULL)                 \
+        throw std::bad_alloc();            \
+    }                                      \
   }
 
 //REALLOC (no copy)
diff --git a/packages/shylu/shylu_node/basker/test/amesos2_interface_coverage_test.cpp b/packages/shylu/shylu_node/basker/test/amesos2_interface_coverage_test.cpp
index 7903111ac302..a9403f59d2e2 100644
--- a/packages/shylu/shylu_node/basker/test/amesos2_interface_coverage_test.cpp
+++ b/packages/shylu/shylu_node/basker/test/amesos2_interface_coverage_test.cpp
@@ -27,6 +27,7 @@ int main(int argc, char* argv[])
 
   std::string mname;
 
+  int error = 0;
   if(argc > 2)
   {
     std::cout <<"Test Input is only the coverage matrix"
@@ -53,9 +54,9 @@ int main(int argc, char* argv[])
   std::cout << "Matrix read" << std::endl;
   double rmatrix = myTime();
   readMatrix<Int,Entry>(mname, m, n, nnz, 
-			&col_ptr, &row_idx, &val);
+                        &col_ptr, &row_idx, &val);
   std::cout << "Read Matrix, Time: " 
-	    << totalTime(rmatrix,myTime()) << std::endl;
+            << totalTime(rmatrix,myTime()) << std::endl;
   
   //RHS
   Int vn, vm;
@@ -85,14 +86,14 @@ int main(int argc, char* argv[])
   int nthreads = 4; // We will not use all 4 in all tests
   Kokkos::initialize(Kokkos::InitializationSettings().set_num_threads(nthreads));
   std::cout << "Kokkos Settings" << std::endl;
-  std::cout << "hwloc aval: " 
-	    << Kokkos::hwloc::available()<< std::endl;
-  std::cout << "numa count: " 
-	    << Kokkos::hwloc::get_available_numa_count() 
-	    << std::endl;
-  std::cout << "thrd numa:  " 
-	    << Kokkos::hwloc::get_available_cores_per_numa() 
-	    << std::endl;
+  std::cout << "hwloc aval: "
+            << Kokkos::hwloc::available()<< std::endl;
+  std::cout << "numa count: "
+            << Kokkos::hwloc::get_available_numa_count()
+            << std::endl;
+  std::cout << "thrd numa:  "
+            << Kokkos::hwloc::get_available_cores_per_numa()
+            << std::endl;
  
   //-----------------------Start Basker (Test - 1, 1 thread)-----------------
   {
@@ -103,7 +104,7 @@ int main(int argc, char* argv[])
     BaskerNS::Basker<Int, Entry, Exe_Space> mybasker;
     //---Options
     mybasker.Options.same_pattern       = BASKER_FALSE;
-    mybasker.Options.verbose            = BASKER_FALSE;
+    mybasker.Options.verbose            = BASKER_TRUE;
     mybasker.Options.verbose_matrix_out = BASKER_FALSE;
     mybasker.Options.realloc            = BASKER_TRUE;
     mybasker.Options.transpose          = BASKER_FALSE;
@@ -123,22 +124,28 @@ int main(int argc, char* argv[])
     mybasker.SetThreads(1);
     std::cout << "Setting Threads:" << 1 << std::endl;
     double stime = myTime();
-    mybasker.Symbolic(m,n,nnz,col_ptr,row_idx,val);
-    std::cout << "Done with Symbolic, Time: " 
-	      << totalTime(stime, myTime()) << std::endl;
+    error = mybasker.Symbolic(m,n,nnz,col_ptr,row_idx,val);
+    std::cout << "Done with Symbolic, Time: "
+              << totalTime(stime, myTime())
+              << " with error = " << error << std::endl;
+    if(error != 0) return error;
     double ftime = myTime();
-    mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
+    error = mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
     std::cout << "Done with Factor, Time: "
-	      << totalTime(ftime, myTime()) << std::endl;
+              << totalTime(ftime, myTime())
+              << " with error = " << error << std::endl;
+    if(error != 0) return error;
     //mybasker.DEBUG_PRINT();
     double ttime = myTime();
     Int *lperm;
     Int *rperm;
     mybasker.GetPerm(&lperm,&rperm);
     
-    mybasker.Solve(y,x);
+    error = mybasker.Solve(y,x);
     std::cout << "Done with Solve, Time: "
-	      << totalTime(ttime, myTime()) << std::endl;
+              << totalTime(ttime, myTime())
+              << " with error = " << error << std::endl;
+    if(error != 0) return error;
 
     multiply<Int,Entry>(m,n,col_ptr,row_idx,val, x, xhat);
     for(Int i = 0; i < m; i++)
@@ -146,19 +153,19 @@ int main(int argc, char* argv[])
       xhat[i] = y[i] - xhat[i];
     }
     std::cout << "||X||: " << norm2<Int,Entry>(n,x)
-	      << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
-	      << std::endl;
+              << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
+              << std::endl;
     
     //Refactor
     double rftime = myTime();
     mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
     std::cout << "Done with Refactor Factor, Time: "
-	      << totalTime(rftime, myTime()) << std::endl;
+              << totalTime(rftime, myTime()) << std::endl;
     //ReSolve
     double rttime = myTime();
     mybasker.Solve(y,x);
     std::cout << "Done with Refactor Solve, Time: "
-	      << totalTime(rttime, myTime()) << std::endl;
+              << totalTime(rttime, myTime()) << std::endl;
 
     multiply<Int,Entry>(m,n,col_ptr,row_idx,val, x, xhat);
     for(Int i = 0; i < m; i++)
@@ -167,8 +174,8 @@ int main(int argc, char* argv[])
     }
     
     std::cout << "||X||: " << norm2<Int,Entry>(n,x)
-	      << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
-	      << std::endl;
+              << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
+              << std::endl;
 
     mybasker.Finalize();
   }
@@ -182,7 +189,7 @@ int main(int argc, char* argv[])
     BaskerNS::Basker<Int, Entry, Exe_Space> mybasker;
     //---Options
     mybasker.Options.same_pattern       = BASKER_FALSE;
-    mybasker.Options.verbose            = BASKER_FALSE;
+    mybasker.Options.verbose            = BASKER_TRUE;
     mybasker.Options.verbose_matrix_out = BASKER_FALSE;
     mybasker.Options.realloc            = BASKER_TRUE;
     mybasker.Options.transpose          = BASKER_FALSE;
@@ -202,22 +209,25 @@ int main(int argc, char* argv[])
     mybasker.SetThreads(2);
     std::cout << "Setting Threads:" << 2 << std::endl;
     double stime = myTime();
-    mybasker.Symbolic(m,n,nnz,col_ptr,row_idx,val);
-    std::cout << "Done with Symbolic, Time: " 
-	      << totalTime(stime, myTime()) << std::endl;
+    error = mybasker.Symbolic(m,n,nnz,col_ptr,row_idx,val);
+    std::cout << "Done with Symbolic, Time: "
+              << totalTime(stime, myTime()) << std::endl;
+    if(error != 0) return error;
     double ftime = myTime();
-    mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
+    error = mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
     std::cout << "Done with Factor, Time: "
-	      << totalTime(ftime, myTime()) << std::endl;
+              << totalTime(ftime, myTime()) << std::endl;
+    if(error != 0) return error;
     //mybasker.DEBUG_PRINT();
     double ttime = myTime();
     Int *lperm;
     Int *rperm;
     mybasker.GetPerm(&lperm,&rperm);
     
-    mybasker.Solve(y,x);
+    error = mybasker.Solve(y,x);
     std::cout << "Done with Solve, Time: "
-	      << totalTime(ttime, myTime()) << std::endl;
+              << totalTime(ttime, myTime()) << std::endl;
+    if(error != 0) return error;
 
     multiply<Int,Entry>(m,n,col_ptr,row_idx,val, x, xhat);
     for(Int i = 0; i < m; i++)
@@ -225,19 +235,19 @@ int main(int argc, char* argv[])
       xhat[i] = y[i] - xhat[i];
     }
     std::cout << "||X||: " << norm2<Int,Entry>(n,x)
-	      << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
-	      << std::endl;
+              << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
+              << std::endl;
     
     //Refactor
     double rftime = myTime();
     mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
     std::cout << "Done with Refactor Factor, Time: "
-	      << totalTime(rftime, myTime()) << std::endl;
+              << totalTime(rftime, myTime()) << std::endl;
     //ReSolve
     double rttime = myTime();
     mybasker.Solve(y,x);
     std::cout << "Done with Refactor Solve, Time: "
-	      << totalTime(rttime, myTime()) << std::endl;
+              << totalTime(rttime, myTime()) << std::endl;
 
     multiply<Int,Entry>(m,n,col_ptr,row_idx,val, x, xhat);
     for(Int i = 0; i < m; i++)
@@ -246,8 +256,8 @@ int main(int argc, char* argv[])
     }
 
     std::cout << "||X||: " << norm2<Int,Entry>(n,x)
-	      << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
-	      << std::endl;
+              << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
+              << std::endl;
 
     mybasker.Finalize();
   }
@@ -286,11 +296,11 @@ int main(int argc, char* argv[])
     double stime = myTime();
     mybasker.Symbolic(m,n,nnz,col_ptr,row_idx,val);
     std::cout << "Done with Symbolic, Time: " 
-	      << totalTime(stime, myTime()) << std::endl;
+              << totalTime(stime, myTime()) << std::endl;
     double ftime = myTime();
     mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
     std::cout << "Done with Factor, Time: "
-	      << totalTime(ftime, myTime()) << std::endl;
+              << totalTime(ftime, myTime()) << std::endl;
     //mybasker.DEBUG_PRINT();
     double ttime = myTime();
     Int *lperm;
@@ -299,37 +309,37 @@ int main(int argc, char* argv[])
     
     mybasker.Solve(y,x);
     std::cout << "Done with Solve, Time: "
-	      << totalTime(ttime, myTime()) << std::endl;
+              << totalTime(ttime, myTime()) << std::endl;
 
     multiply<Int,Entry>(m,n,col_ptr,row_idx,val, x, xhat);
     for(Int i = 0; i < m; i++)
     {
-     	xhat[i] = y[i] - xhat[i];
+             xhat[i] = y[i] - xhat[i];
     }
     std::cout << "||X||: " << norm2<Int,Entry>(n,x)
-	      << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
-	      << std::endl;
+              << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
+              << std::endl;
     
     //Refactor
     double rftime = myTime();
     mybasker.Factor(m,n,nnz,col_ptr,row_idx,val);
     std::cout << "Done with Refactor Factor, Time: "
-	      << totalTime(rftime, myTime()) << std::endl;
+              << totalTime(rftime, myTime()) << std::endl;
     //ReSolve
     double rttime = myTime();
     mybasker.Solve(y,x);
     std::cout << "Done with Refactor Solve, Time: "
-	      << totalTime(rttime, myTime()) << std::endl;
+              << totalTime(rttime, myTime()) << std::endl;
 
     multiply<Int,Entry>(m,n,col_ptr,row_idx,val, x, xhat);
     for(Int i = 0; i < m; i++)
     {
-     	xhat[i] = y[i] - xhat[i];
+             xhat[i] = y[i] - xhat[i];
     }
     
     std::cout << "||X||: " << norm2<Int,Entry>(n,x)
-	      << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
-	      << std::endl;
+              << " ||Y-AX||: " << norm2<Int,Entry>(m,xhat)
+              << std::endl;
 
     mybasker.Finalize();
   }
@@ -337,4 +347,5 @@ int main(int argc, char* argv[])
   
   Kokkos::finalize();
 
+  return error;
 }//end main
diff --git a/packages/shylu/shylu_node/basker/test/singular_matrix_check.cpp b/packages/shylu/shylu_node/basker/test/singular_matrix_check.cpp
index a2fc3218f0d0..4f0bb91ea0d9 100644
--- a/packages/shylu/shylu_node/basker/test/singular_matrix_check.cpp
+++ b/packages/shylu/shylu_node/basker/test/singular_matrix_check.cpp
@@ -86,7 +86,7 @@ int main(int argc, char* argv[])
       std::cout << "Done with Symbolic"
                 << "\nError code: " << error
                 << "\nTime: " 
-        	      << totalTime(stime, myTime()) << std::endl;
+                << totalTime(stime, myTime()) << std::endl;
 
       double ftime = myTime();
       try
@@ -95,13 +95,14 @@ int main(int argc, char* argv[])
       }
       catch (std::runtime_error& e)
       {
-        std::cout << " ** Factor threw exception **" << std::endl;
+        std::cout << " ** Factor threw exception **" << std::endl
+                  << e.what () << std::endl;
         error = 1;
       }
       std::cout << "Done with Factor"
                 << "\nError code: " << error
                 << "\nTime: " 
-	              << totalTime(ftime, myTime()) << std::endl;
+                << totalTime(ftime, myTime()) << std::endl;
       //mybasker.DEBUG_PRINT();
     
       if (error == 0) {
diff --git a/packages/stokhos/src/sacado/kokkos/vector/belos/Belos_PseudoBlockCGIter_MP_Vector.hpp b/packages/stokhos/src/sacado/kokkos/vector/belos/Belos_PseudoBlockCGIter_MP_Vector.hpp
index db0f66086811..d88d1c2d0cf7 100644
--- a/packages/stokhos/src/sacado/kokkos/vector/belos/Belos_PseudoBlockCGIter_MP_Vector.hpp
+++ b/packages/stokhos/src/sacado/kokkos/vector/belos/Belos_PseudoBlockCGIter_MP_Vector.hpp
@@ -110,15 +110,14 @@ namespace Belos {
      * \note For any pointer in \c newstate which directly points to the multivectors in
      * the solver, the data is not copied.
      */
-    void initializeCG(CGIterationState<ScalarType,MV>& newstate);
+    void initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > newstate, Teuchos::RCP<MV> R_0);
 
     /*! \brief Initialize the solver with the initial vectors from the linear problem
      *  or random data.
      */
     void initialize()
     {
-      CGIterationState<ScalarType,MV> empty;
-      initializeCG(empty);
+      initializeCG(Teuchos::null, Teuchos::null);
     }
 
     /*! \brief Get the current state of the linear solver.
@@ -128,15 +127,23 @@ namespace Belos {
      * \returns A CGIterationState object containing const pointers to the current
      * solver state.
      */
-    CGIterationState<ScalarType,MV> getState() const {
-      CGIterationState<ScalarType,MV> state;
-      state.R = R_;
-      state.P = P_;
-      state.AP = AP_;
-      state.Z = Z_;
+    Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > getState() const {
+      auto state = Teuchos::rcp(new PseudoBlockCGIterationState<ScalarType,MV>());
+      state->R = R_;
+      state->P = P_;
+      state->AP = AP_;
+      state->Z = Z_;
       return state;
     }
 
+    void setState(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> > state) {
+      auto s = Teuchos::rcp_dynamic_cast<PseudoBlockCGIterationState<ScalarType,MV> >(state, true);
+      R_ = s->R;
+      Z_ = s->Z;
+      P_ = s->P;
+      AP_ = s->AP;
+    }
+
     //@}
 
 
@@ -290,8 +297,8 @@ namespace Belos {
   // Initialize this iteration object
   template <class StorageType, class MV, class OP>
   void PseudoBlockCGIter<Sacado::MP::Vector<StorageType>,MV,OP>::
-  initializeCG(CGIterationState<ScalarType,MV>& newstate)
-  {
+  initializeCG(Teuchos::RCP<CGIterationStateBase<ScalarType,MV> >  newstate, Teuchos::RCP<MV> R_0) {
+
     // Check if there is any mltivector to clone from.
     Teuchos::RCP<const MV> lhsMV = lp_->getCurrLHSVec();
     Teuchos::RCP<const MV> rhsMV = lp_->getCurrRHSVec();
@@ -305,14 +312,10 @@ namespace Belos {
     int numRHS = MVT::GetNumberVecs(*tmp);
     numRHS_ = numRHS;
 
-    // Initialize the state storage
-    // If the subspace has not be initialized before or has changed sizes, generate it using the LHS or RHS from lp_.
-    if (Teuchos::is_null(R_) || MVT::GetNumberVecs(*R_)!=numRHS_) {
-      R_ = MVT::Clone( *tmp, numRHS_ );
-      Z_ = MVT::Clone( *tmp, numRHS_ );
-      P_ = MVT::Clone( *tmp, numRHS_ );
-      AP_ = MVT::Clone( *tmp, numRHS_ );
-    }
+    // Initialize the state storage if it isn't already.
+    if (!Teuchos::rcp_dynamic_cast<PseudoBlockCGIterationState<ScalarType,MV> >(newstate, true)->matches(tmp, numRHS_))
+      newstate->initialize(tmp, numRHS_);
+    setState(newstate);
 
     // Tracking information for condition number estimation
     if(numEntriesForCondEst_ > 0) {
@@ -320,25 +323,19 @@ namespace Belos {
       offdiag_.resize(numEntriesForCondEst_-1);
     }
 
-    // NOTE:  In CGIter R_, the initial residual, is required!!!
-    //
     std::string errstr("Belos::BlockPseudoCGIter::initialize(): Specified multivectors must have a consistent length and width.");
 
-    // Create convenience variables for zero and one.
-    const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
-    const MagnitudeType zero = Teuchos::ScalarTraits<MagnitudeType>::zero();
-
-    if (!Teuchos::is_null(newstate.R)) {
+    {
 
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.R) != MVT::GetGlobalLength(*R_),
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*R_0) != MVT::GetGlobalLength(*R_),
                           std::invalid_argument, errstr );
-      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) != numRHS_,
+      TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*R_0) != numRHS_,
                           std::invalid_argument, errstr );
 
       // Copy basis vectors from newstate into V
-      if (newstate.R != R_) {
+      if (R_0 != R_) {
         // copy over the initial residual (unpreconditioned).
-        MVT::MvAddMv( one, *newstate.R, zero, *newstate.R, *R_ );
+        MVT::Assign( *R_0, *R_ );
       }
 
       // Compute initial direction vectors
@@ -356,14 +353,9 @@ namespace Belos {
         lp_->applyRightPrec( *R_, *Z_ );
       }
       else {
-        Z_ = R_;
+        MVT::Assign( *R_, *Z_ );
       }
-      MVT::MvAddMv( one, *Z_, zero, *Z_, *P_ );
-    }
-    else {
-
-      TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::is_null(newstate.R),std::invalid_argument,
-                         "Belos::CGIter::initialize(): CGStateIterState does not have initial residual.");
+      MVT::Assign( *Z_, *P_ );
     }
 
     // The solver is initialized
diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
index 17fbcbfb9a5d..3301d63e9ce8 100644
--- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
+++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
@@ -7465,7 +7465,7 @@ namespace Tpetra {
     output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
     output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
     output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
-    output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
+    output = this->sortGhostsAssociatedWithEachProcessor_ == graph.sortGhostsAssociatedWithEachProcessor_ ? output : false;
 
     // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
     // nonlocals_ isa std::map<GO, std::vector<GO> >
diff --git a/packages/xpetra/src/CrsMatrix/Xpetra_TpetraCrsMatrix_decl.hpp b/packages/xpetra/src/CrsMatrix/Xpetra_TpetraCrsMatrix_decl.hpp
index a8f38f64e871..5ed4c5f8ce6b 100644
--- a/packages/xpetra/src/CrsMatrix/Xpetra_TpetraCrsMatrix_decl.hpp
+++ b/packages/xpetra/src/CrsMatrix/Xpetra_TpetraCrsMatrix_decl.hpp
@@ -58,51 +58,51 @@ class TpetraCrsMatrix
   //@{
 
   //! Constructor specifying fixed number of entries for each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
 
   //! Constructor specifying (possibly different) number of entries in each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
 
   //! Constructor specifying column Map and fixed number of entries for each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
 
   //! Constructor specifying column Map and number of entries in each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
 
   //! Constructor specifying a previously constructed graph.
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> > &graph, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>> &graph, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
 
   //! Constructor specifying a previously constructed graph and values array.
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> > &graph, typename Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_type::values_type &values, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>> &graph, typename Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_type::values_type &values, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null);
 
   //! Constructor for a fused import
   TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix> &sourceMatrix,
                   const Import<LocalOrdinal, GlobalOrdinal, Node> &importer,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-                  const Teuchos::RCP<Teuchos::ParameterList> &params                           = Teuchos::null);
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+                  const Teuchos::RCP<Teuchos::ParameterList> &params                          = Teuchos::null);
 
   //! Constructor for a fused export
   TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix> &sourceMatrix,
                   const Export<LocalOrdinal, GlobalOrdinal, Node> &exporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-                  const Teuchos::RCP<Teuchos::ParameterList> &params                           = Teuchos::null);
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+                  const Teuchos::RCP<Teuchos::ParameterList> &params                          = Teuchos::null);
 
   //! Constructor for a fused import
   TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix> &sourceMatrix,
                   const Import<LocalOrdinal, GlobalOrdinal, Node> &RowImporter,
-                  const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > DomainImporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
+                  const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> DomainImporter,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
                   const Teuchos::RCP<Teuchos::ParameterList> &params);
 
   //! Constructor for a fused export
   TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix> &sourceMatrix,
                   const Export<LocalOrdinal, GlobalOrdinal, Node> &RowExporter,
-                  const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> > DomainExporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
+                  const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node>> DomainExporter,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
                   const Teuchos::RCP<Teuchos::ParameterList> &params);
 
 #ifdef HAVE_XPETRA_TPETRA
@@ -126,29 +126,29 @@ class TpetraCrsMatrix
   /// \param params [in/out] Optional list of parameters.  If not
   ///   null, any missing parameters will be filled in with their
   ///   default values.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap,
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap,
                   const local_matrix_type &lclMatrix,
                   const Teuchos::RCP<Teuchos::ParameterList> &params = null);
 
   /// \brief Constructor specifying local matrix and 4 maps
   TpetraCrsMatrix(
       const local_matrix_type &lclMatrix,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-      const Teuchos::RCP<Teuchos::ParameterList> &params                           = null);
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+      const Teuchos::RCP<Teuchos::ParameterList> &params                          = null);
 
   /// \brief Constructor specifying local matrix, four maps, import and export objects.
   TpetraCrsMatrix(
       const local_matrix_type &lclMatrix,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
-      const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > &importer,
-      const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> > &exporter,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
+      const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> &importer,
+      const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node>> &exporter,
       const Teuchos::RCP<Teuchos::ParameterList> &params = null);
 #endif
 
@@ -205,20 +205,20 @@ class TpetraCrsMatrix
   void resumeFill(const RCP<ParameterList> &params = null);
 
   //! Signal that data entry is complete, specifying domain and range maps.
-  void fillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap, const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap, const RCP<ParameterList> &params = null);
+  void fillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap, const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap, const RCP<ParameterList> &params = null);
 
   //! Signal that data entry is complete.
   void fillComplete(const RCP<ParameterList> &params = null);
 
   //!  Replaces the current domainMap and importer with the user-specified objects.
-  void replaceDomainMapAndImporter(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &newDomainMap, Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > &newImporter);
+  void replaceDomainMapAndImporter(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &newDomainMap, Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> &newImporter);
 
   //! Expert static fill complete
-  void expertStaticFillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                                const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
-                                const RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > &importer = Teuchos::null,
-                                const RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> > &exporter = Teuchos::null,
-                                const RCP<ParameterList> &params                                      = Teuchos::null);
+  void expertStaticFillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                                const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
+                                const RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> &importer = Teuchos::null,
+                                const RCP<const Export<LocalOrdinal, GlobalOrdinal, Node>> &exporter = Teuchos::null,
+                                const RCP<ParameterList> &params                                     = Teuchos::null);
 
   //@}
 
@@ -226,13 +226,13 @@ class TpetraCrsMatrix
   //@{
 
   //! Returns the Map that describes the row distribution in this matrix.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getRowMap() const;
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getRowMap() const;
 
   //! Returns the Map that describes the column distribution in this matrix.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getColMap() const;
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getColMap() const;
 
   //! Returns the CrsGraph associated with this matrix.
-  RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> > getCrsGraph() const;
+  RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>> getCrsGraph() const;
 
   //! Number of global elements in the row map of this matrix.
   global_size_t getGlobalNumRows() const;
@@ -303,13 +303,13 @@ class TpetraCrsMatrix
   void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, Scalar alpha = ScalarTraits<Scalar>::one(), Scalar beta = ScalarTraits<Scalar>::zero()) const;
 
   //! Computes the matrix-multivector multiplication for region layout matrices
-  void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta, bool sumInterfaceValues, const RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node> > &regionInterfaceImporter, const Teuchos::ArrayRCP<LocalOrdinal> &regionInterfaceLIDs) const;
+  void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta, bool sumInterfaceValues, const RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> &regionInterfaceImporter, const Teuchos::ArrayRCP<LocalOrdinal> &regionInterfaceLIDs) const;
 
   //! Returns the Map associated with the domain of this operator. This will be null until fillComplete() is called.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getDomainMap() const;
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getDomainMap() const;
 
   //!
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getRangeMap() const;
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getRangeMap() const;
 
   //@}
 
@@ -357,7 +357,7 @@ class TpetraCrsMatrix
   //{@
 
   //! Access function for the Tpetra::Map this DistObject was constructed with.
-  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getMap() const;
+  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getMap() const;
 
   //! Import.
   void doImport(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node> &source,
@@ -375,7 +375,7 @@ class TpetraCrsMatrix
   void doExport(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node> &dest,
                 const Export<LocalOrdinal, GlobalOrdinal, Node> &exporter, CombineMode CM);
 
-  void removeEmptyProcessesInPlace(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &newMap);
+  void removeEmptyProcessesInPlace(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &newMap);
 
   // @}
   //! @name Xpetra specific
@@ -385,13 +385,13 @@ class TpetraCrsMatrix
   bool hasMatrix() const;
 
   //! TpetraCrsMatrix constructor to wrap a Tpetra::CrsMatrix object
-  TpetraCrsMatrix(const Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &mtx);
+  TpetraCrsMatrix(const Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &mtx);
 
   //! Get the underlying Tpetra matrix
-  RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getTpetra_CrsMatrix() const;
+  RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getTpetra_CrsMatrix() const;
 
   //! Get the underlying Tpetra matrix
-  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getTpetra_CrsMatrixNonConst() const;  // TODO: remove
+  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getTpetra_CrsMatrixNonConst() const;  // TODO: remove
 
 #ifdef HAVE_XPETRA_TPETRA
   /// \brief Access the local Kokkos::CrsMatrix data
@@ -421,7 +421,7 @@ class TpetraCrsMatrix
   //@}
 
  private:
-  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > mtx_;
+  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> mtx_;
 };  // TpetraCrsMatrix class
 
 #ifdef HAVE_XPETRA_EPETRA
@@ -453,64 +453,64 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   //@{
 
   //! Constructor specifying fixed number of entries for each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying (possibly different) number of entries in each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying column Map and fixed number of entries for each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying column Map and number of entries in each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying a previously constructed graph.
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> > &graph, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>> &graph, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused import
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Import<LocalOrdinal, GlobalOrdinal, Node> &importer,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-                  const Teuchos::RCP<Teuchos::ParameterList> &params                           = Teuchos::null) {
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+                  const Teuchos::RCP<Teuchos::ParameterList> &params                          = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused export
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Export<LocalOrdinal, GlobalOrdinal, Node> &exporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-                  const Teuchos::RCP<Teuchos::ParameterList> &params                           = Teuchos::null) {
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+                  const Teuchos::RCP<Teuchos::ParameterList> &params                          = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused import
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Import<LocalOrdinal, GlobalOrdinal, Node> &RowImporter,
-                  const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > DomainImporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
+                  const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> DomainImporter,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
                   const Teuchos::RCP<Teuchos::ParameterList> &params) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused export
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Export<LocalOrdinal, GlobalOrdinal, Node> &RowExporter,
-                  const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> > DomainExporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
+                  const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node>> DomainExporter,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
                   const Teuchos::RCP<Teuchos::ParameterList> &params) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
@@ -536,8 +536,8 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   /// \param params [in/out] Optional list of parameters.  If not
   ///   null, any missing parameters will be filled in with their
   ///   default values.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap,
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap,
                   const local_matrix_type &lclMatrix,
                   const Teuchos::RCP<Teuchos::ParameterList> &params = null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "TpetraCrsMatrix<int,int>", "int", typeid(EpetraNode).name());
@@ -546,11 +546,11 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   /// \brief Constructor specifying local matrix and 4 maps
   TpetraCrsMatrix(
       const local_matrix_type &lclMatrix,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-      const Teuchos::RCP<Teuchos::ParameterList> &params                           = null) {
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+      const Teuchos::RCP<Teuchos::ParameterList> &params                          = null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "TpetraCrsMatrix<int,int>", "int", typeid(EpetraNode).name());
   }
 #endif
@@ -608,20 +608,20 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   void resumeFill(const RCP<ParameterList> &params = null) {}
 
   //! Signal that data entry is complete, specifying domain and range maps.
-  void fillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap, const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap, const RCP<ParameterList> &params = null) {}
+  void fillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap, const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap, const RCP<ParameterList> &params = null) {}
 
   //! Signal that data entry is complete.
   void fillComplete(const RCP<ParameterList> &params = null) {}
 
   //!  Replaces the current domainMap and importer with the user-specified objects.
-  void replaceDomainMapAndImporter(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &newDomainMap, Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > &newImporter) {}
+  void replaceDomainMapAndImporter(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &newDomainMap, Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> &newImporter) {}
 
   //! Expert static fill complete
-  void expertStaticFillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                                const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
-                                const RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > &importer = Teuchos::null,
-                                const RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> > &exporter = Teuchos::null,
-                                const RCP<ParameterList> &params                                      = Teuchos::null) {}
+  void expertStaticFillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                                const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
+                                const RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> &importer = Teuchos::null,
+                                const RCP<const Export<LocalOrdinal, GlobalOrdinal, Node>> &exporter = Teuchos::null,
+                                const RCP<ParameterList> &params                                     = Teuchos::null) {}
 
   //@}
 
@@ -629,13 +629,13 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   //@{
 
   //! Returns the Map that describes the row distribution in this matrix.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getRowMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getRowMap() const { return Teuchos::null; }
 
   //! Returns the Map that describes the column distribution in this matrix.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getColMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getColMap() const { return Teuchos::null; }
 
   //! Returns the CrsGraph associated with this matrix.
-  RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> > getCrsGraph() const { return Teuchos::null; }
+  RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>> getCrsGraph() const { return Teuchos::null; }
 
   //! Number of global elements in the row map of this matrix.
   global_size_t getGlobalNumRows() const { return 0; }
@@ -706,13 +706,13 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   void apply(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &X, MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, Scalar alpha = ScalarTraits<Scalar>::one(), Scalar beta = ScalarTraits<Scalar>::zero()) const {}
 
   //! Computes the matrix-multivector multiplication for region layout matrices
-  void apply(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &X, MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta, bool sumInterfaceValues, const RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node> > &regionInterfaceImporter, const Teuchos::ArrayRCP<LocalOrdinal> &regionInterfaceLIDs) const {}
+  void apply(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &X, MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta, bool sumInterfaceValues, const RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> &regionInterfaceImporter, const Teuchos::ArrayRCP<LocalOrdinal> &regionInterfaceLIDs) const {}
 
   //! Returns the Map associated with the domain of this operator. This will be null until fillComplete() is called.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getDomainMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getDomainMap() const { return Teuchos::null; }
 
   //!
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getRangeMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getRangeMap() const { return Teuchos::null; }
 
   //@}
 
@@ -757,7 +757,7 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   //{@
 
   //! Access function for the Tpetra::Map this DistObject was constructed with.
-  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getMap() const { return Teuchos::null; }
+  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getMap() const { return Teuchos::null; }
 
   //! Import.
   void doImport(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node> &source,
@@ -775,7 +775,7 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   void doExport(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node> &dest,
                 const Export<LocalOrdinal, GlobalOrdinal, Node> &exporter, CombineMode CM) {}
 
-  void removeEmptyProcessesInPlace(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &newMap) {}
+  void removeEmptyProcessesInPlace(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &newMap) {}
 
   // @}
   //! @name Xpetra specific
@@ -785,15 +785,15 @@ class TpetraCrsMatrix<Scalar, int, int, EpetraNode>
   bool hasMatrix() const { return false; }
 
   //! TpetraCrsMatrix constructor to wrap a Tpetra::CrsMatrix object
-  TpetraCrsMatrix(const Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &mtx) {
+  TpetraCrsMatrix(const Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &mtx) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, int, int, EpetraNode>).name(), "int", typeid(EpetraNode).name());
   }
 
   //! Get the underlying Tpetra matrix
-  RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getTpetra_CrsMatrix() const { return Teuchos::null; }
+  RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getTpetra_CrsMatrix() const { return Teuchos::null; }
 
   //! Get the underlying Tpetra matrix
-  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getTpetra_CrsMatrixNonConst() const { return Teuchos::null; }  // TODO: remove
+  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getTpetra_CrsMatrixNonConst() const { return Teuchos::null; }  // TODO: remove
 
 #ifdef HAVE_XPETRA_TPETRA
   /// \brief Access the local Kokkos::CrsMatrix data
@@ -845,64 +845,64 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   //@{
 
   //! Constructor specifying fixed number of entries for each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying (possibly different) number of entries in each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying column Map and fixed number of entries for each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap, size_t maxNumEntriesPerRow, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying column Map and number of entries in each row.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap, const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap, const ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor specifying a previously constructed graph.
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> > &graph, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>> &graph, const Teuchos::RCP<Teuchos::ParameterList> &params = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused import
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Import<LocalOrdinal, GlobalOrdinal, Node> &importer,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-                  const Teuchos::RCP<Teuchos::ParameterList> &params                           = Teuchos::null) {
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+                  const Teuchos::RCP<Teuchos::ParameterList> &params                          = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused export
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Export<LocalOrdinal, GlobalOrdinal, Node> &exporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-                  const Teuchos::RCP<Teuchos::ParameterList> &params                           = Teuchos::null) {
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+                  const Teuchos::RCP<Teuchos::ParameterList> &params                          = Teuchos::null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused import
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Import<LocalOrdinal, GlobalOrdinal, Node> &RowImporter,
-                  const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > DomainImporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
+                  const Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> DomainImporter,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
                   const Teuchos::RCP<Teuchos::ParameterList> &params) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Constructor for a fused export
-  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &sourceMatrix,
+  TpetraCrsMatrix(const Teuchos::RCP<const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &sourceMatrix,
                   const Export<LocalOrdinal, GlobalOrdinal, Node> &RowExporter,
-                  const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> > DomainExporter,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
+                  const Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node>> DomainExporter,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
                   const Teuchos::RCP<Teuchos::ParameterList> &params) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
@@ -928,8 +928,8 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   /// \param params [in/out] Optional list of parameters.  If not
   ///   null, any missing parameters will be filled in with their
   ///   default values.
-  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap,
-                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap,
+  TpetraCrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap,
+                  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap,
                   const local_matrix_type &lclMatrix,
                   const Teuchos::RCP<Teuchos::ParameterList> &params = null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
@@ -938,11 +938,11 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   /// \brief Constructor specifying local matrix and 4 maps
   TpetraCrsMatrix(
       const local_matrix_type &lclMatrix,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rowMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &colMap,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap = Teuchos::null,
-      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap  = Teuchos::null,
-      const Teuchos::RCP<Teuchos::ParameterList> &params                           = null) {
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rowMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &colMap,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap = Teuchos::null,
+      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap  = Teuchos::null,
+      const Teuchos::RCP<Teuchos::ParameterList> &params                          = null) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 #endif
@@ -1000,20 +1000,20 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   void resumeFill(const RCP<ParameterList> &params = null) {}
 
   //! Signal that data entry is complete, specifying domain and range maps.
-  void fillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap, const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap, const RCP<ParameterList> &params = null) {}
+  void fillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap, const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap, const RCP<ParameterList> &params = null) {}
 
   //! Signal that data entry is complete.
   void fillComplete(const RCP<ParameterList> &params = null) {}
 
   //!  Replaces the current domainMap and importer with the user-specified objects.
-  void replaceDomainMapAndImporter(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &newDomainMap, Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > &newImporter) {}
+  void replaceDomainMapAndImporter(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &newDomainMap, Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> &newImporter) {}
 
   //! Expert static fill complete
-  void expertStaticFillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &domainMap,
-                                const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &rangeMap,
-                                const RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > &importer = Teuchos::null,
-                                const RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> > &exporter = Teuchos::null,
-                                const RCP<ParameterList> &params                                      = Teuchos::null) {}
+  void expertStaticFillComplete(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &domainMap,
+                                const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &rangeMap,
+                                const RCP<const Import<LocalOrdinal, GlobalOrdinal, Node>> &importer = Teuchos::null,
+                                const RCP<const Export<LocalOrdinal, GlobalOrdinal, Node>> &exporter = Teuchos::null,
+                                const RCP<ParameterList> &params                                     = Teuchos::null) {}
 
   //@}
 
@@ -1021,13 +1021,13 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   //@{
 
   //! Returns the Map that describes the row distribution in this matrix.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getRowMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getRowMap() const { return Teuchos::null; }
 
   //! Returns the Map that describes the column distribution in this matrix.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getColMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getColMap() const { return Teuchos::null; }
 
   //! Returns the CrsGraph associated with this matrix.
-  RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> > getCrsGraph() const { return Teuchos::null; }
+  RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>> getCrsGraph() const { return Teuchos::null; }
 
   //! Number of global elements in the row map of this matrix.
   global_size_t getGlobalNumRows() const { return 0; }
@@ -1098,13 +1098,13 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   void apply(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &X, MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, Scalar alpha = ScalarTraits<Scalar>::one(), Scalar beta = ScalarTraits<Scalar>::zero()) const {}
 
   //! Computes the matrix-multivector multiplication for region layout matrices
-  void apply(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &X, MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta, bool sumInterfaceValues, const RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node> > &regionInterfaceImporter, const Teuchos::ArrayRCP<LocalOrdinal> &regionInterfaceLIDs) const {}
+  void apply(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &X, MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta, bool sumInterfaceValues, const RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node>> &regionInterfaceImporter, const Teuchos::ArrayRCP<LocalOrdinal> &regionInterfaceLIDs) const {}
 
   //! Returns the Map associated with the domain of this operator. This will be null until fillComplete() is called.
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getDomainMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getDomainMap() const { return Teuchos::null; }
 
   //!
-  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getRangeMap() const { return Teuchos::null; }
+  const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getRangeMap() const { return Teuchos::null; }
 
   //@}
 
@@ -1149,7 +1149,7 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   //{@
 
   //! Access function for the Tpetra::Map this DistObject was constructed with.
-  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getMap() const { return Teuchos::null; }
+  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getMap() const { return Teuchos::null; }
 
   //! Import.
   void doImport(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node> &source,
@@ -1167,7 +1167,7 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   void doExport(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node> &dest,
                 const Export<LocalOrdinal, GlobalOrdinal, Node> &exporter, CombineMode CM) {}
 
-  void removeEmptyProcessesInPlace(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &newMap) {}
+  void removeEmptyProcessesInPlace(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &newMap) {}
 
   // @}
   //! @name Xpetra specific
@@ -1177,15 +1177,15 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
   bool hasMatrix() const { return false; }
 
   //! TpetraCrsMatrix constructor to wrap a Tpetra::CrsMatrix object
-  TpetraCrsMatrix(const Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &mtx) {
+  TpetraCrsMatrix(const Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &mtx) {
     XPETRA_TPETRA_ETI_EXCEPTION(typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), typeid(TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, EpetraNode>).name(), "long long", typeid(EpetraNode).name());
   }
 
   //! Get the underlying Tpetra matrix
-  RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getTpetra_CrsMatrix() const { return Teuchos::null; }
+  RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getTpetra_CrsMatrix() const { return Teuchos::null; }
 
   //! Get the underlying Tpetra matrix
-  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getTpetra_CrsMatrixNonConst() const { return Teuchos::null; }  // TODO: remove
+  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getTpetra_CrsMatrixNonConst() const { return Teuchos::null; }  // TODO: remove
 
 #ifdef HAVE_XPETRA_TPETRA
   /// \brief Access the local Kokkos::CrsMatrix data
@@ -1221,6 +1221,30 @@ class TpetraCrsMatrix<Scalar, int, long long, EpetraNode>
 
 #endif  // HAVE_XPETRA_EPETRA
 
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return Teuchos::rcp_dynamic_cast<Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(A, true)->getTpetra_CrsMatrixNonConst();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<const Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return Teuchos::rcp_dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(A, true)->getTpetra_CrsMatrix();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetra(Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return *dynamic_cast<Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &>(A).getTpetra_CrsMatrixNonConst();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetra(const Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return *dynamic_cast<const Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &>(A).getTpetra_CrsMatrix();
+}
+
 }  // namespace Xpetra
 
 #define XPETRA_TPETRACRSMATRIX_SHORT
diff --git a/packages/xpetra/src/Matrix/Xpetra_CrsMatrixWrap_decl.hpp b/packages/xpetra/src/Matrix/Xpetra_CrsMatrixWrap_decl.hpp
index e92212017b20..67f721d89df2 100644
--- a/packages/xpetra/src/Matrix/Xpetra_CrsMatrixWrap_decl.hpp
+++ b/packages/xpetra/src/Matrix/Xpetra_CrsMatrixWrap_decl.hpp
@@ -23,6 +23,7 @@
 #include "Xpetra_CrsMatrixFactory.hpp"
 
 #include "Xpetra_Matrix.hpp"
+#include "Xpetra_TpetraBlockCrsMatrix_decl.hpp"
 
 #include <Teuchos_SerialDenseMatrix.hpp>
 #include <Teuchos_Hashtable.hpp>
@@ -355,16 +356,16 @@ class CrsMatrixWrap : public Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
                      Scalar alpha,
                      Scalar beta,
                      bool sumInterfaceValues,
-                     const RCP<Import<LocalOrdinal, GlobalOrdinal, Node> > &regionInterfaceImporter,
+                     const RCP<Import<LocalOrdinal, GlobalOrdinal, Node>> &regionInterfaceImporter,
                      const Teuchos::ArrayRCP<LocalOrdinal> &regionInterfaceLIDs) const;
 
   //! \brief Returns the Map associated with the domain of this operator.
   //! This will be <tt>null</tt> until fillComplete() is called.
-  const RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> > getDomainMap() const;
+  const RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> getDomainMap() const;
 
   //! Returns the Map associated with the domain of this operator.
   //! This will be <tt>null</tt> until fillComplete() is called.
-  const RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> > getRangeMap() const;
+  const RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> getRangeMap() const;
 
   //! \brief Returns the Map that describes the column distribution in this matrix.
   //! This might be <tt>null</tt> until fillComplete() is called.
@@ -381,7 +382,7 @@ class CrsMatrixWrap : public Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
   //{@
 
   //! Access function for the Tpetra::Map this DistObject was constructed with.
-  const Teuchos::RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> > getMap() const;
+  const Teuchos::RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> getMap() const;
 
   //! Import.
   void doImport(const Matrix &source,
@@ -463,6 +464,102 @@ class CrsMatrixWrap : public Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
 
 };  // class CrsMatrixWrap
 
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toCrsMatrix(const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &matrix) {
+  return Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(matrix, true)->getCrsMatrix();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<const Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toCrsMatrix(const Teuchos::RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &matrix) {
+  return Teuchos::rcp_dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(matrix, true)->getCrsMatrix();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toCrsMatrix(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &matrix) {
+  return *dynamic_cast<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node> &>(matrix).getCrsMatrix();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toCrsMatrix(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &matrix) {
+  return *dynamic_cast<const Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node> &>(matrix).getCrsMatrix();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return toTpetra(toCrsMatrix(A));
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<const Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return toTpetra(toCrsMatrix(A));
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetra(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return toTpetra(toCrsMatrix(A));
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetra(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return toTpetra(toCrsMatrix(A));
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetraBlock(const Teuchos::RCP<Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return Teuchos::rcp_dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(A, true)->getTpetra_BlockCrsMatrix();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetraBlock(const Teuchos::RCP<const Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return Teuchos::rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(A, true)->getTpetra_BlockCrsMatrixNonConst();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetraBlock(Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return *(dynamic_cast<Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &>(A).getTpetra_BlockCrsMatrixNonConst());
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetraBlock(const Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return *(dynamic_cast<const Xpetra::TpetraBlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &>(A).getTpetra_BlockCrsMatrix());
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetraBlock(const Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return toTpetraBlock(toCrsMatrix(A));
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetraBlock(const Teuchos::RCP<const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &A) {
+  return toTpetraBlock(toCrsMatrix(A));
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetraBlock(Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return toTpetraBlock(toCrsMatrix(A));
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+const Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &
+toTpetraBlock(const Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> &A) {
+  return toTpetraBlock(toCrsMatrix(A));
+}
+
 }  // namespace Xpetra
 
 #define XPETRA_CRSMATRIXWRAP_SHORT
diff --git a/packages/xpetra/src/MultiVector/Xpetra_TpetraMultiVector_decl.hpp b/packages/xpetra/src/MultiVector/Xpetra_TpetraMultiVector_decl.hpp
index aef5878f0415..13f7cabbd4db 100644
--- a/packages/xpetra/src/MultiVector/Xpetra_TpetraMultiVector_decl.hpp
+++ b/packages/xpetra/src/MultiVector/Xpetra_TpetraMultiVector_decl.hpp
@@ -37,10 +37,10 @@ class TpetraVector;
 
 // Because we aren't including the header...
 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > toXpetra(RCP<Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > vec);
+RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> toXpetra(RCP<Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> vec);
 
 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > toXpetra(RCP<const Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > vec);
+RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> toXpetra(RCP<const Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> vec);
 
 template <class Scalar,
           class LocalOrdinal,
@@ -56,16 +56,16 @@ class TpetraMultiVector
   //@{
 
   //! Basic constuctor.
-  TpetraMultiVector(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &map, size_t NumVectors, bool zeroOut = true);
+  TpetraMultiVector(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &map, size_t NumVectors, bool zeroOut = true);
 
   //! Copy constructor (performs a deep copy).
   TpetraMultiVector(const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &source, const Teuchos::DataAccess copyOrView = Teuchos::Copy);
 
   //! Create multivector by copying two-dimensional array of local data.
-  TpetraMultiVector(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &map, const Teuchos::ArrayView<const Scalar> &A, size_t LDA, size_t NumVectors);
+  TpetraMultiVector(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &map, const Teuchos::ArrayView<const Scalar> &A, size_t LDA, size_t NumVectors);
 
   //! Create multivector by copying array of views of local data.
-  TpetraMultiVector(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &map, const Teuchos::ArrayView<const Teuchos::ArrayView<const Scalar> > &ArrayOfPtrs, size_t NumVectors);
+  TpetraMultiVector(const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &map, const Teuchos::ArrayView<const Teuchos::ArrayView<const Scalar>> &ArrayOfPtrs, size_t NumVectors);
 
   virtual ~TpetraMultiVector();
 
@@ -96,10 +96,10 @@ class TpetraMultiVector
   //@{
 
   //! Return a Vector which is a const view of column j.
-  Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getVector(size_t j) const;
+  Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getVector(size_t j) const;
 
   //! Return a Vector which is a nonconst view of column j.
-  Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getVectorNonConst(size_t j);
+  Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getVectorNonConst(size_t j);
 
   //! Const view of the local values in a particular vector of this multivector.
   Teuchos::ArrayRCP<const Scalar> getData(size_t j) const;
@@ -111,19 +111,19 @@ class TpetraMultiVector
   void get1dCopy(Teuchos::ArrayView<Scalar> A, size_t LDA) const;
 
   //! Fill the given array with a copy of this multivector's local values.
-  void get2dCopy(Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > ArrayOfPtrs) const;
+  void get2dCopy(Teuchos::ArrayView<const Teuchos::ArrayView<Scalar>> ArrayOfPtrs) const;
 
   //! Const persisting (1-D) view of this multivector's local values.
   Teuchos::ArrayRCP<const Scalar> get1dView() const;
 
   //! Return const persisting pointers to values.
-  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > get2dView() const;
+  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar>> get2dView() const;
 
   //! Nonconst persisting (1-D) view of this multivector's local values.
   Teuchos::ArrayRCP<Scalar> get1dViewNonConst();
 
   //! Return non-const persisting pointers to values.
-  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > get2dViewNonConst();
+  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar>> get2dViewNonConst();
 
   //@}
 
@@ -209,7 +209,7 @@ class TpetraMultiVector
   //{@
   // Implements DistObject interface
 
-  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > getMap() const;
+  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> getMap() const;
 
   void doImport(const DistObject<Scalar, LocalOrdinal, GlobalOrdinal, Node> &source, const Import<LocalOrdinal, GlobalOrdinal, Node> &importer, CombineMode CM);
 
@@ -235,7 +235,7 @@ class TpetraMultiVector
 
   void endExport(const DistObject<Scalar, LocalOrdinal, GlobalOrdinal, Node> &dest, const Export<LocalOrdinal, GlobalOrdinal, Node> &exporter, CombineMode CM);
 
-  void replaceMap(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > &map);
+  void replaceMap(const RCP<const Map<LocalOrdinal, GlobalOrdinal, Node>> &map);
 
   //@}
 
@@ -243,10 +243,10 @@ class TpetraMultiVector
   //@{
 
   //! TpetraMultiVector constructor to wrap a Tpetra::MultiVector object
-  TpetraMultiVector(const Teuchos::RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > &vec);
+  TpetraMultiVector(const Teuchos::RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &vec);
 
   //! Get the underlying Tpetra multivector
-  RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > getTpetra_MultiVector() const;
+  RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> getTpetra_MultiVector() const;
 
   //! Set seed for Random function.
   void setSeed(unsigned int seed);
@@ -273,7 +273,7 @@ class TpetraMultiVector
 
  private:
   //! The Tpetra::MultiVector which this class wraps.
-  RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > vec_;
+  RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> vec_;
 
 };  // TpetraMultiVector class
 
@@ -281,7 +281,7 @@ class TpetraMultiVector
 
 // Things we actually need
 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > toXpetra(RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > vec) {
+RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> toXpetra(RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> vec) {
   if (!vec.is_null())
     return rcp(new TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>(vec));
 
@@ -289,7 +289,7 @@ RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > toXpetra(RCP<Tpetra
 }
 
 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
-RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > toXpetra(RCP<const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > vec) {
+RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> toXpetra(RCP<const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> vec) {
   if (!vec.is_null())
     return rcp(new TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>(vec));
 
@@ -310,6 +310,18 @@ Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &toTpetra(MultiVe
   return *tX.getTpetra_MultiVector();
 }
 
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &X) {
+  return Teuchos::rcp_dynamic_cast<Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(X, true)->getTpetra_MultiVector();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<const Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> &X) {
+  return Teuchos::rcp_dynamic_cast<const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(X, true)->getTpetra_MultiVector();
+}
+
 }  // namespace Xpetra
 
 #define XPETRA_TPETRAMULTIVECTOR_SHORT
diff --git a/packages/xpetra/src/Vector/Xpetra_TpetraVector_decl.hpp b/packages/xpetra/src/Vector/Xpetra_TpetraVector_decl.hpp
index 35f3d10ee260..9ca4cd99390f 100644
--- a/packages/xpetra/src/Vector/Xpetra_TpetraVector_decl.hpp
+++ b/packages/xpetra/src/Vector/Xpetra_TpetraVector_decl.hpp
@@ -162,6 +162,18 @@ toXpetra(RCP<const Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> ve
   return toXpetra(Teuchos::rcp_const_cast<Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(vec));
 }
 
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& X) {
+  return Teuchos::rcp_dynamic_cast<Xpetra::TpetraVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(X, true)->getTpetra_Vector();
+}
+
+template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
+Teuchos::RCP<const Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
+toTpetra(const Teuchos::RCP<const Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& X) {
+  return Teuchos::rcp_dynamic_cast<const Xpetra::TpetraVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(X, true)->getTpetra_Vector();
+}
+
 }  // namespace Xpetra
 
 #define XPETRA_TPETRAVECTOR_SHORT