From aa76b30773a98764976488895c359ef0f3032b22 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 9 Aug 2023 13:45:36 -0400 Subject: [PATCH 01/15] Remove unused lambda captures --- pybind_interface/pybind_main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pybind_interface/pybind_main.cpp b/pybind_interface/pybind_main.cpp index c5615ebc..63a03e4a 100644 --- a/pybind_interface/pybind_main.cpp +++ b/pybind_interface/pybind_main.cpp @@ -377,7 +377,7 @@ std::vector> qsim_simulate(const py::dict &options) { std::vector> amplitudes; amplitudes.reserve(bitstrings.size()); - auto measure = [&bitstrings, &circuit, &litudes]( + auto measure = [&bitstrings, &litudes]( unsigned k, const StateSpace &state_space, const State &state) { for (const auto &b : bitstrings) { @@ -481,7 +481,7 @@ std::vector> qtrajectory_simulate(const py::dict &options) { Simulator simulator = factory.CreateSimulator(); StateSpace state_space = factory.CreateStateSpace(); - auto measure = [&bitstrings, &ncircuit, &litudes, &state_space]( + auto measure = [&bitstrings, &litudes, &state_space]( unsigned k, const State &state, Runner::Stat& stat) { for (const auto &b : bitstrings) { amplitudes.push_back(state_space.GetAmpl(state, b)); @@ -1082,7 +1082,7 @@ std::vector qtrajectory_sample(const py::dict &options) { std::vector> results; - auto measure = [&results, &ncircuit, &state_space]( + auto measure = [&results, &ncircuit]( unsigned k, const State& state, Runner::Stat& stat) { // Converts stat (which matches the MeasurementResult 'bits' field) into // bitstrings matching the MeasurementResult 'bitstring' field. From 036654dfa75e450551b6d9a0db41055566e2efce Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 9 Aug 2023 13:48:30 -0400 Subject: [PATCH 02/15] Remove unused variable --- lib/fuser_mqubit.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/fuser_mqubit.h b/lib/fuser_mqubit.h index aa4e6305..fdbfddb0 100644 --- a/lib/fuser_mqubit.h +++ b/lib/fuser_mqubit.h @@ -561,8 +561,6 @@ class MultiQubitGateFuser final : public Fuser { static void FuseOrphanedGates(unsigned max_fused_size, Stat& stat, std::vector& orphaned_gates, std::vector& fused_gates) { - unsigned count = 0; - for (std::size_t i = 0; i < orphaned_gates.size(); ++i) { auto ogate1 = orphaned_gates[i]; @@ -575,8 +573,6 @@ class MultiQubitGateFuser final : public Fuser { if (ogate2->visited == kFinal) continue; - ++count; - unsigned cur_size = ogate1->qubits.size() + ogate2->qubits.size(); if (cur_size <= max_fused_size) { From 61c9d09d22924edd242e3e2e1ee16244501d9ab7 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 9 Aug 2023 13:53:55 -0400 Subject: [PATCH 03/15] Add missing CUDA error checks --- lib/statespace_cuda.h | 2 +- lib/vectorspace_cuda.h | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/statespace_cuda.h b/lib/statespace_cuda.h index 82f5128b..8f3b2055 100644 --- a/lib/statespace_cuda.h +++ b/lib/statespace_cuda.h @@ -102,7 +102,7 @@ class StateSpaceCUDA : } void SetAllZeros(State& state) const { - cudaMemset(state.get(), 0, MinSize(state.num_qubits()) * sizeof(fp_type)); + ErrorCheck(cudaMemset(state.get(), 0, MinSize(state.num_qubits()) * sizeof(fp_type))); } // Uniform superposition. diff --git a/lib/vectorspace_cuda.h b/lib/vectorspace_cuda.h index 0c1d6167..de210734 100644 --- a/lib/vectorspace_cuda.h +++ b/lib/vectorspace_cuda.h @@ -28,7 +28,7 @@ namespace detail { inline void do_not_free(void*) {} inline void free(void* ptr) { - cudaFree(ptr); + ErrorCheck(cudaFree(ptr)); } } // namespace detail @@ -114,9 +114,10 @@ class VectorSpaceCUDA { return false; } - cudaMemcpy(dest.get(), src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToDevice)); return true; } @@ -124,9 +125,10 @@ class VectorSpaceCUDA { // It is the client's responsibility to make sure that dest has at least // Impl::MinSize(src.num_qubits()) elements. bool Copy(const Vector& src, fp_type* dest) const { - cudaMemcpy(dest, src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToHost); + ErrorCheck( + cudaMemcpy(dest, src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToHost)); return true; } @@ -134,9 +136,10 @@ class VectorSpaceCUDA { // It is the client's responsibility to make sure that src has at least // Impl::MinSize(dest.num_qubits()) elements. bool Copy(const fp_type* src, Vector& dest) const { - cudaMemcpy(dest.get(), src, - sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), - cudaMemcpyHostToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), + cudaMemcpyHostToDevice)); return true; } @@ -145,12 +148,13 @@ class VectorSpaceCUDA { // min(size, Impl::MinSize(dest.num_qubits())) elements. bool Copy(const fp_type* src, uint64_t size, Vector& dest) const { size = std::min(size, Impl::MinSize(dest.num_qubits())); - cudaMemcpy(dest.get(), src, sizeof(fp_type) * size, cudaMemcpyHostToDevice); + ErrorCheck( + cudaMemcpy(dest.get(), src, sizeof(fp_type) * size, cudaMemcpyHostToDevice)); return true; } void DeviceSync() { - cudaDeviceSynchronize(); + ErrorCheck(cudaDeviceSynchronize()); } protected: From caa5463d6e0f6ee52719d6d84594b06774218594 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 16 Aug 2023 15:13:35 -0400 Subject: [PATCH 04/15] Add CUDA to HIP translation --- lib/cuda2hip.h | 61 +++++++++++++++++++++++++++++++++++ lib/simulator_cuda_kernels.h | 13 +++++--- lib/statespace_cuda.h | 7 +++- lib/statespace_cuda_kernels.h | 7 +++- lib/util_cuda.h | 6 +++- lib/vectorspace_cuda.h | 9 ++++-- 6 files changed, 94 insertions(+), 9 deletions(-) create mode 100644 lib/cuda2hip.h diff --git a/lib/cuda2hip.h b/lib/cuda2hip.h new file mode 100644 index 00000000..da2d074c --- /dev/null +++ b/lib/cuda2hip.h @@ -0,0 +1,61 @@ +// Copyright 2023 Advanced Micro Devices, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SIMULATOR_CUDA2HIP_H_ +#define SIMULATOR_CUDA2HIP_H_ + +#define cublasCaxpy hipblasCaxpy +#define cublasCdotc hipblasCdotc +#define cublasCreate hipblasCreate +#define cublasCscal hipblasCscal +#define cublasCsscal hipblasCsscal +#define cublasDestroy hipblasDestroy +#define cublasDznrm2 hipblasDznrm2 +#define cublasHandle_t hipblasHandle_t +#define cublasScnrm2 hipblasScnrm2 +#define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS +#define cublasStatus_t hipblasStatus_t +#define cublasZaxpy hipblasZaxpy +#define cublasZdotc hipblasZdotc +#define cublasZdscal hipblasZdscal +#define cublasZscal hipblasZscal +#define cuCimagf hipCimagf +#define cuCimag hipCimag +#define cuComplex hipComplex +#define cuCrealf hipCrealf +#define cuCreal hipCreal +#define CUDA_C_32F HIPBLAS_C_32F +#define CUDA_C_64F HIPBLAS_C_64F +#define cudaDeviceSynchronize hipDeviceSynchronize +#define cudaError_t hipError_t +#define cudaFree hipFree +#define cudaGetErrorString hipGetErrorString +#define cudaMalloc hipMalloc +#define cudaMemcpyAsync hipMemcpyAsync +#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost +#define cudaMemcpy hipMemcpy +#define cudaMemcpyHostToDevice hipMemcpyHostToDevice +#define cudaMemset hipMemset +#define cudaPeekAtLastError hipPeekAtLastError +#define cudaSuccess hipSuccess +#define cuDoubleComplex hipDoubleComplex + +template +__device__ __forceinline__ T __shfl_down_sync( + unsigned mask, T var, unsigned int delta, int width = warpSize) { + return __shfl_down(var, delta, width); +} + +#endif // SIMULATOR_CUDA2HIP_H_ diff --git a/lib/simulator_cuda_kernels.h b/lib/simulator_cuda_kernels.h index 6510fadf..e21a9d62 100644 --- a/lib/simulator_cuda_kernels.h +++ b/lib/simulator_cuda_kernels.h @@ -15,10 +15,15 @@ #ifndef SIMULATOR_CUDA_KERNELS_H_ #define SIMULATOR_CUDA_KERNELS_H_ -#include -#include - -#include "util_cuda.h" +#ifdef __NVCC__ + #include + #include + + #include "util_cuda.h" +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif namespace qsim { diff --git a/lib/statespace_cuda.h b/lib/statespace_cuda.h index 8f3b2055..81780150 100644 --- a/lib/statespace_cuda.h +++ b/lib/statespace_cuda.h @@ -15,7 +15,12 @@ #ifndef STATESPACE_CUDA_H_ #define STATESPACE_CUDA_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include #include diff --git a/lib/statespace_cuda_kernels.h b/lib/statespace_cuda_kernels.h index bcb7fd25..b54ebca9 100644 --- a/lib/statespace_cuda_kernels.h +++ b/lib/statespace_cuda_kernels.h @@ -15,7 +15,12 @@ #ifndef STATESPACE_CUDA_KERNELS_H_ #define STATESPACE_CUDA_KERNELS_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include "util_cuda.h" diff --git a/lib/util_cuda.h b/lib/util_cuda.h index 591d852d..5d8cb5df 100644 --- a/lib/util_cuda.h +++ b/lib/util_cuda.h @@ -15,7 +15,11 @@ #ifndef UTIL_CUDA_H_ #define UTIL_CUDA_H_ -#include +#ifdef __NVCC__ + #include +#elif __HIP__ + #include +#endif #include diff --git a/lib/vectorspace_cuda.h b/lib/vectorspace_cuda.h index de210734..ebf21095 100644 --- a/lib/vectorspace_cuda.h +++ b/lib/vectorspace_cuda.h @@ -15,8 +15,13 @@ #ifndef VECTORSPACE_CUDA_H_ #define VECTORSPACE_CUDA_H_ -#include -#include +#ifdef __NVCC__ + #include + #include +#elif __HIP__ + #include + #include "cuda2hip.h" +#endif #include #include From 6dd69e2095393697083d449b755e1eaf7d2675bd Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 16 Aug 2023 15:42:57 -0400 Subject: [PATCH 05/15] Add HIP to pybind11 --- pybind_interface/decide/decide.cpp | 4 +- pybind_interface/hip/CMakeLists.txt | 28 ++++++++++++++ pybind_interface/hip/pybind_main_hip.cpp | 47 ++++++++++++++++++++++++ pybind_interface/hip/pybind_main_hip.h | 17 +++++++++ qsimcirq/__init__.py | 2 + setup.py | 1 + 6 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 pybind_interface/hip/CMakeLists.txt create mode 100644 pybind_interface/hip/pybind_main_hip.cpp create mode 100644 pybind_interface/hip/pybind_main_hip.h diff --git a/pybind_interface/decide/decide.cpp b/pybind_interface/decide/decide.cpp index 6355ad3b..5e9a63f1 100644 --- a/pybind_interface/decide/decide.cpp +++ b/pybind_interface/decide/decide.cpp @@ -60,7 +60,7 @@ int detect_instructions() { } enum GPUCapabilities { - CUDA = 0, CUSTATEVEC = 1, NO_GPU = 10, NO_CUSTATEVEC = 11 }; + CUDA = 0, CUSTATEVEC = 1, HIP = 2, NO_GPU = 10, NO_CUSTATEVEC = 11 }; // For now, GPU detection is performed at compile time, as our wheels are // generated on Github Actions runners which do not have GPU support. @@ -70,6 +70,8 @@ enum GPUCapabilities { int detect_gpu() { #ifdef __NVCC__ GPUCapabilities gpu = CUDA; + #elif __HIP__ + GPUCapabilities gpu = HIP; #else GPUCapabilities gpu = NO_GPU; #endif diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt new file mode 100644 index 00000000..3e89113d --- /dev/null +++ b/pybind_interface/hip/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.18) +project(qsim LANGUAGES CXX HIP) + +IF (WIN32) + set(CMAKE_CXX_FLAGS "/O2 /openmp") +ELSE() + set(CMAKE_CXX_FLAGS "-O3 -fopenmp") +ENDIF() + +if(APPLE) + set(CMAKE_CXX_STANDARD 14) + include_directories("/usr/local/include" "/usr/local/opt/llvm/include") + link_directories("/usr/local/lib" "/usr/local/opt/llvm/lib") +endif() + +INCLUDE(../GetPybind11.cmake) +find_package(PythonLibs 3.7 REQUIRED) +find_package(HIP REQUIRED) + +include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) + +hip_add_library(qsim_hip MODULE pybind_main_hip.cpp) + +set_target_properties(qsim_hip PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" +) +set_source_files_properties(pybind_main_hip.cpp PROPERTIES LANGUAGE HIP) diff --git a/pybind_interface/hip/pybind_main_hip.cpp b/pybind_interface/hip/pybind_main_hip.cpp new file mode 100644 index 00000000..d6ee181f --- /dev/null +++ b/pybind_interface/hip/pybind_main_hip.cpp @@ -0,0 +1,47 @@ +// Copyright 2019 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pybind_main_hip.h" + +#include "../../lib/simulator_cuda.h" + +namespace qsim { + using Simulator = SimulatorCUDA; + + struct Factory { + using Simulator = qsim::Simulator; + using StateSpace = Simulator::StateSpace; + + Factory( + unsigned num_sim_threads, + unsigned num_state_threads, + unsigned num_dblocks + ) : ss_params{num_state_threads, num_dblocks} {} + + StateSpace CreateStateSpace() const { + return StateSpace(ss_params); + } + + Simulator CreateSimulator() const { + return Simulator(); + } + + StateSpace::Parameter ss_params; + }; + + inline void SetFlushToZeroAndDenormalsAreZeros() {} + inline void ClearFlushToZeroAndDenormalsAreZeros() {} +} + +#include "../pybind_main.cpp" diff --git a/pybind_interface/hip/pybind_main_hip.h b/pybind_interface/hip/pybind_main_hip.h new file mode 100644 index 00000000..55a672d5 --- /dev/null +++ b/pybind_interface/hip/pybind_main_hip.h @@ -0,0 +1,17 @@ +// Copyright 2019 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "../pybind_main.h" + +PYBIND11_MODULE(qsim_hip, m) { GPU_MODULE_BINDINGS } diff --git a/qsimcirq/__init__.py b/qsimcirq/__init__.py index 0f022a1a..d9c5504d 100644 --- a/qsimcirq/__init__.py +++ b/qsimcirq/__init__.py @@ -34,6 +34,8 @@ def _load_qsim_gpu(): instr = qsim_decide.detect_gpu() if instr == 0: qsim_gpu = importlib.import_module("qsimcirq.qsim_cuda") + elif instr == 2: + qsim_gpu = importlib.import_module("qsimcirq.qsim_hip") else: qsim_gpu = None return qsim_gpu diff --git a/setup.py b/setup.py index c57a37a9..eafa4387 100644 --- a/setup.py +++ b/setup.py @@ -111,6 +111,7 @@ def build_extension(self, ext): CMakeExtension("qsimcirq/qsim_cuda"), CMakeExtension("qsimcirq/qsim_custatevec"), CMakeExtension("qsimcirq/qsim_decide"), + CMakeExtension("qsimcirq/qsim_hip"), ], cmdclass=dict(build_ext=CMakeBuild), zip_safe=False, From 5edafd6893f910733d1e5441e9796d587faa20f9 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 16 Aug 2023 15:44:17 -0400 Subject: [PATCH 06/15] Add HIP to make and cmake files --- CMakeLists.txt | 8 ++++++- Makefile | 16 +++++++++++++ apps/Makefile | 9 ++++++++ apps/make.sh | 16 +++++++++---- pybind_interface/Makefile | 32 ++++++++++++++++++++------ pybind_interface/decide/CMakeLists.txt | 24 +++++++++++++++++-- tests/Makefile | 13 +++++++++++ tests/make.sh | 28 ++++++++++++++-------- 8 files changed, 121 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 615a33db..f902a421 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,13 @@ cmake_minimum_required(VERSION 3.11) execute_process(COMMAND which nvcc OUTPUT_VARIABLE has_nvcc) if(has_nvcc STREQUAL "") - project(qsim) + execute_process(COMMAND which hipcc OUTPUT_VARIABLE has_hipcc) + if(has_hipcc STREQUAL "") + project(qsim) + else() + project(qsim LANGUAGES CXX HIP) + ADD_SUBDIRECTORY(pybind_interface/hip) + endif() else() project(qsim LANGUAGES CXX CUDA) ADD_SUBDIRECTORY(pybind_interface/cuda) diff --git a/Makefile b/Makefile index 9ef0afdd..227ca603 100644 --- a/Makefile +++ b/Makefile @@ -6,10 +6,12 @@ TESTS = run-cxx-tests CXX=g++ NVCC=nvcc +HIPCC=hipcc CXXFLAGS = -O3 -fopenmp ARCHFLAGS = -march=native NVCCFLAGS = -O3 +HIPCCFLAGS = -O3 # CUQUANTUM_ROOT should be set. CUSTATEVECFLAGS = -I$(CUQUANTUM_ROOT)/include -L${CUQUANTUM_ROOT}/lib -L$(CUQUANTUM_ROOT)/lib64 -lcustatevec -lcublas @@ -22,6 +24,8 @@ export ARCHFLAGS export NVCC export NVCCFLAGS export CUSTATEVECFLAGS +export HIPCC +export HIPCCFLAGS ifeq ($(PYBIND11), true) TARGETS += pybind @@ -43,6 +47,10 @@ qsim-cuda: qsim-custatevec: $(MAKE) -C apps/ qsim-custatevec +.PHONY: qsim-hip +qsim-hip: + $(MAKE) -C apps/ qsim-hip + .PHONY: pybind pybind: $(MAKE) -C pybind_interface/ pybind @@ -59,6 +67,10 @@ cuda-tests: custatevec-tests: $(MAKE) -C tests/ custatevec-tests +.PHONY: hip-tests +hip-tests: + $(MAKE) -C tests/ hip-tests + .PHONY: run-cxx-tests run-cxx-tests: cxx-tests $(MAKE) -C tests/ run-cxx-tests @@ -71,6 +83,10 @@ run-cuda-tests: cuda-tests run-custatevec-tests: custatevec-tests $(MAKE) -C tests/ run-custatevec-tests +.PHONY: run-hip-tests +run-hip-tests: hip-tests + $(MAKE) -C tests/ run-hip-tests + PYTESTS = $(shell find qsimcirq_tests/ -name '*_test.py') .PHONY: run-py-tests diff --git a/apps/Makefile b/apps/Makefile index 41fb81e5..48b25cab 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -7,6 +7,9 @@ CUDA_TARGETS := $(CUDA_TARGETS:%cuda.cu=%cuda.x) CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec.cu") CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec.cu=%custatevec.x) +HIP_TARGETS = $(shell find . -maxdepth 1 -name '*cuda.cu') +HIP_TARGETS := $(HIP_TARGETS:%cuda.cu=%hip.x) + .PHONY: qsim qsim: $(CXX_TARGETS) @@ -16,6 +19,9 @@ qsim-cuda: $(CUDA_TARGETS) .PHONY: qsim-custatevec qsim-custatevec: $(CUSTATEVEC_TARGETS) +.PHONY: qsim-hip +qsim-hip: $(HIP_TARGETS) + %.x: %.cc $(CXX) -o ./$@ $< $(CXXFLAGS) $(ARCHFLAGS) @@ -25,6 +31,9 @@ qsim-custatevec: $(CUSTATEVEC_TARGETS) %custatevec.x: %custatevec.cu $(NVCC) -o ./$@ $< $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%hip.x: %cuda.cu + $(HIPCC) -o ./$@ $< $(HIPCCFLAGS) + .PHONY: clean clean: -rm -f ./*.x ./*.a ./*.so ./*.mod diff --git a/apps/make.sh b/apps/make.sh index f2e777e5..5ddd4d6b 100755 --- a/apps/make.sh +++ b/apps/make.sh @@ -23,9 +23,15 @@ g++ -O3 -march=native -fopenmp -o qsim_amplitudes.x qsim_amplitudes.cc g++ -O3 -march=native -fopenmp -o qsimh_base.x qsimh_base.cc g++ -O3 -march=native -fopenmp -o qsimh_amplitudes.x qsimh_amplitudes.cc -nvcc -O3 -o qsim_base_cuda.x qsim_base_cuda.cu -nvcc -O3 -o qsim_qtrajectory_cuda.x qsim_qtrajectory_cuda.cu +if command -v nvcc &>/dev/null; then + nvcc -O3 -o qsim_base_cuda.x qsim_base_cuda.cu + nvcc -O3 -o qsim_qtrajectory_cuda.x qsim_qtrajectory_cuda.cu -# CUQUANTUM_ROOT should be set. -CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" -nvcc -O3 $CUSTATEVECFLAGS -o qsim_base_custatevec.x qsim_base_custatevec.cu + if [ -n "$CUQUANTUM_ROOT" ]; then + CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" + nvcc -O3 $CUSTATEVECFLAGS -o qsim_base_custatevec.x qsim_base_custatevec.cu + fi +elif command -v hipcc &>/dev/null; then + hipcc -O3 -o qsim_base_hip.x qsim_base_cuda.cu + hipcc -O3 -o qsim_qtrajectory_hip.x qsim_qtrajectory_cuda.cu +fi diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index c7c70860..60e7cc99 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -5,6 +5,7 @@ QSIMLIB_AVX2 = ../qsimcirq/qsim_avx2`python3-config --extension-suffix` QSIMLIB_AVX512 = ../qsimcirq/qsim_avx512`python3-config --extension-suffix` QSIMLIB_CUDA = ../qsimcirq/qsim_cuda`python3-config --extension-suffix` QSIMLIB_CUSTATEVEC = ../qsimcirq/qsim_custatevec`python3-config --extension-suffix` +QSIMLIB_HIP = ../qsimcirq/qsim_hip`python3-config --extension-suffix` QSIMLIB_DECIDE = ../qsimcirq/qsim_decide`python3-config --extension-suffix` # The flags for the compilation of the simd-specific Pybind11 interfaces @@ -13,21 +14,29 @@ PYBINDFLAGS_SSE = -msse4.1 -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 - PYBINDFLAGS_AVX2 = -mavx2 -mfma -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 --includes` PYBINDFLAGS_AVX512 = -mavx512f -mbmi2 -Wall -shared -std=c++17 -fPIC `python3 -m pybind11 --includes` -# The flags for the compilation of GPU-specific Pybind11 interfaces +# The flags for the compilation of CUDA-specific Pybind11 interfaces PYBINDFLAGS_CUDA = -std=c++17 -x cu -Xcompiler "-Wall -shared -fPIC `python3 -m pybind11 --includes`" # The flags for the compilation of cuStateVec-specific Pybind11 interfaces PYBINDFLAGS_CUSTATEVEC = $(CUSTATEVECFLAGS) $(PYBINDFLAGS_CUDA) +# The flags for the compilation of HIP-specific Pybind11 interfaces +PYBINDFLAGS_HIP = -std=c++17 -Wall -shared -fPIC `python3 -m pybind11 --includes` + # Check for nvcc to decide compilation mode. ifeq ($(shell which $(NVCC)),) +# Check for hipcc to decide compilation mode. +ifeq ($(shell which $(HIPCC)),) pybind: pybind-cpu decide-cpu else +pybind: pybind-cpu pybind-hip decide-hip +endif +else # Check for the cuStateVec library. ifeq ($(CUQUANTUM_ROOT),) -pybind: pybind-cpu pybind-gpu decide-gpu +pybind: pybind-cpu pybind-cuda decide-cuda else -pybind: pybind-cpu pybind-gpu pybind-custatevec decide-custatevec +pybind: pybind-cpu pybind-cuda pybind-custatevec decide-custatevec endif endif @@ -40,14 +49,15 @@ pybind-cpu: .PHONY: decide-cpu decide-cpu: + echo "building decide-cpu" $(CXX) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(CXXFLAGS) $(PYBINDFLAGS_BASIC) -.PHONY: pybind-gpu -pybind-gpu: +.PHONY: pybind-cuda +pybind-cuda: $(NVCC) cuda/pybind_main_cuda.cpp -o $(QSIMLIB_CUDA) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) -.PHONY: decide-gpu -decide-gpu: +.PHONY: decide-cuda +decide-cuda: $(NVCC) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) .PHONY: pybind-custatevec @@ -58,6 +68,14 @@ pybind-custatevec: decide-custatevec: $(NVCC) decide/decide.cpp -D__CUSTATEVEC__ -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) +.PHONY: pybind-hip +pybind-hip: + $(HIPCC) hip/pybind_main_hip.cpp -o $(QSIMLIB_HIP) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) + +.PHONY: decide-hip +decide-hip: + $(HIPCC) decide/decide.cpp -o $(QSIMLIB_DECIDE) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) + .PHONY: clean clean: -rm -f ./basic/*.x ./basic/*.a ./basic/*.so ./basic/*.mod $(QSIMLIB_BASIC) diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 441f3d66..2b28ca96 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -2,7 +2,12 @@ cmake_minimum_required(VERSION 3.11) execute_process(COMMAND which nvcc OUTPUT_VARIABLE has_nvcc) if(has_nvcc STREQUAL "") - project(qsim) + execute_process(COMMAND which hipcc OUTPUT_VARIABLE has_hipcc) + if(has_hipcc STREQUAL "") + project(qsim) + else() + project(qsim LANGUAGES CXX HIP) + endif() else() project(qsim LANGUAGES CXX CUDA) endif() @@ -22,7 +27,22 @@ endif() INCLUDE(../GetPybind11.cmake) if(has_nvcc STREQUAL "") - pybind11_add_module(qsim_decide decide.cpp) + if(has_hipcc STREQUAL "") + pybind11_add_module(qsim_decide decide.cpp) + else() + find_package(HIP REQUIRED) + find_package(PythonLibs 3.7 REQUIRED) + + include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) + + hip_add_library(qsim_decide MODULE decide.cpp) + + set_target_properties(qsim_decide PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" + ) + set_source_files_properties(decide.cpp PROPERTIES LANGUAGE HIP) + endif() else() find_package(PythonLibs 3.7 REQUIRED) find_package(CUDA REQUIRED) diff --git a/tests/Makefile b/tests/Makefile index f4a37278..6a81e70b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -14,6 +14,9 @@ CUDA_TARGETS := $(CUDA_TARGETS:%cuda_test.cu=%cuda_test.x) CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec_test.cu") CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec_test.cu=%custatevec_test.x) +HIP_TARGETS = $(shell find . -maxdepth 1 -name "*cuda_test.cu") +HIP_TARGETS := $(HIP_TARGETS:%cuda_test.cu=%hip_test.x) + GTEST_DIR = $(CURDIR)/googletest/googletest GMOCK_DIR = $(CURDIR)/googletest/googlemock @@ -30,6 +33,9 @@ cuda-tests: $(CUDA_TARGETS) .PHONY: custatevec-tests custatevec-tests: $(CUSTATEVEC_TARGETS) +.PHONY: hip-tests +hip-tests: $(HIP_TARGETS) + .PHONY: run-cxx-tests run-cxx-tests: cxx-tests for exe in $(CXX_TARGETS); do if ! ./$$exe; then exit 1; fi; done @@ -42,6 +48,10 @@ run-cuda-tests: cuda-tests run-custatevec-tests: custatevec-tests for exe in $(CUSTATEVEC_TARGETS); do if ! ./$$exe; then exit 1; fi; done +.PHONY: run-hip-tests +run-hip-tests: hip-tests + for exe in $(HIP_TARGETS); do if ! ./$$exe; then exit 1; fi; done + $(GTEST_DIR)/make: -git submodule update --init --recursive googletest mkdir -p $(GTEST_DIR)/make @@ -56,6 +66,9 @@ $(GTEST_DIR)/make: %custatevec_test.x: %custatevec_test.cu $(GTEST_DIR)/make $(NVCC) -o ./$@ $< $(TESTFLAGS) $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%hip_test.x: %cuda_test.cu $(GTEST_DIR)/make + $(HIPCC) -o ./$@ $< $(TESTFLAGS) $(HIPCCFLAGS) + .PHONY: clean clean: -rm -f ./*.x ./*.a ./*.so ./*.mod diff --git a/tests/make.sh b/tests/make.sh index 890663da..b38742df 100755 --- a/tests/make.sh +++ b/tests/make.sh @@ -55,14 +55,22 @@ g++ -O3 -I$path_to_include -L$path_to_lib -fopenmp -o unitaryspace_basic_test.x g++ -O3 -I$path_to_include -L$path_to_lib -msse4 -fopenmp -o unitaryspace_sse_test.x unitaryspace_sse_test.cc -lgtest -lpthread g++ -O3 -I$path_to_include -L$path_to_lib -o vectorspace_test.x vectorspace_test.cc -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_cuda_test.x hybrid_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_cuda_test.x qtrajectory_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o simulator_cuda_test.x simulator_cuda_test.cu -lgtest -lpthread -nvcc -O3 -I$path_to_include -L$path_to_lib -o statespace_cuda_test.x statespace_cuda_test.cu -lgtest -lpthread +if command -v nvcc &>/dev/null; then + nvcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_cuda_test.x hybrid_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_cuda_test.x qtrajectory_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o simulator_cuda_test.x simulator_cuda_test.cu -lgtest -lpthread + nvcc -O3 -I$path_to_include -L$path_to_lib -o statespace_cuda_test.x statespace_cuda_test.cu -lgtest -lpthread -# CUQUANTUM_ROOT should be set. -CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o hybrid_custatevec_test.x hybrid_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o qtrajectory_custatevec_test.x qtrajectory_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o simulator_custatevec_test.x simulator_custatevec_test.cu -lgtest -lpthread -nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o statespace_custatevec_test.x statespace_custatevec_test.cu -lgtest -lpthread + if [ -n "$CUQUANTUM_ROOT" ]; then + CUSTATEVECFLAGS="-I${CUQUANTUM_ROOT}/include -L${CUQUANTUM_ROOT}/lib -L${CUQUANTUM_ROOT}/lib64 -lcustatevec -lcublas" + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o hybrid_custatevec_test.x hybrid_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o qtrajectory_custatevec_test.x qtrajectory_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o simulator_custatevec_test.x simulator_custatevec_test.cu -lgtest -lpthread + nvcc -O3 $CUSTATEVECFLAGS -I$path_to_include -L$path_to_lib -o statespace_custatevec_test.x statespace_custatevec_test.cu -lgtest -lpthread + fi +elif command -v hipcc &>/dev/null; then + hipcc -O3 -I$path_to_include -L$path_to_lib -o hybrid_hip_test.x hybrid_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o qtrajectory_hip_test.x qtrajectory_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o simulator_hip_test.x simulator_cuda_test.cu -lgtest -lpthread + hipcc -O3 -I$path_to_include -L$path_to_lib -o statespace_hip_test.x statespace_cuda_test.cu -lgtest -lpthread +fi From c80f91d0140ba47e10fbbeeb55d65521eb9a2cb6 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 12:56:29 -0400 Subject: [PATCH 07/15] Add HIP cleanup in pybind_interface --- pybind_interface/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index 60e7cc99..253d3f85 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -83,5 +83,6 @@ clean: -rm -f ./avx2/*.x ./avx2/*.a ./avx2/*.so ./avx2/*.mod $(QSIMLIB_AVX2) -rm -f ./avx512/*.x ./avx512/*.a ./avx512/*.so ./avx512/*.mod $(QSIMLIB_AVX512) -rm -f ./cuda/*.x ./cuda/*.a ./cuda/*.so ./cuda/*.mod $(QSIMLIB_CUDA) + -rm -f ./hip/*.x ./hip/*.a ./hip/*.so ./hip/*.mod $(QSIMLIB_HIP) -rm -f ./custatevec/*.x ./custatevec/*.a ./custatevec/*.so ./custatevec/*.mod $(QSIMLIB_CUSTATEVEC) -rm -f ./decide/*.x ./decide/*.a ./decide/*.so ./decide/*.mod $(QSIMLIB_DECIDE) From 09611cfd2185bdf376f9575459201556c2aafdeb Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:19:29 -0400 Subject: [PATCH 08/15] Remove APPLE from pybind for HIP --- pybind_interface/hip/CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index 3e89113d..982579fb 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -7,12 +7,6 @@ ELSE() set(CMAKE_CXX_FLAGS "-O3 -fopenmp") ENDIF() -if(APPLE) - set(CMAKE_CXX_STANDARD 14) - include_directories("/usr/local/include" "/usr/local/opt/llvm/include") - link_directories("/usr/local/lib" "/usr/local/opt/llvm/lib") -endif() - INCLUDE(../GetPybind11.cmake) find_package(PythonLibs 3.7 REQUIRED) find_package(HIP REQUIRED) From d732ea83b5ad321c63191a7f25b1664e70dfd05d Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:20:52 -0400 Subject: [PATCH 09/15] Use HIPCC for CPU and GPU pybind files --- pybind_interface/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index 253d3f85..95f64b32 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -29,7 +29,7 @@ ifeq ($(shell which $(NVCC)),) ifeq ($(shell which $(HIPCC)),) pybind: pybind-cpu decide-cpu else -pybind: pybind-cpu pybind-hip decide-hip +pybind: pybind-hip decide-hip endif else # Check for the cuStateVec library. @@ -70,6 +70,10 @@ decide-custatevec: .PHONY: pybind-hip pybind-hip: + $(HIPCC) basic/pybind_main_basic.cpp -o $(QSIMLIB_BASIC) $(CXXFLAGS) $(PYBINDFLAGS_BASIC) + $(HIPCC) sse/pybind_main_sse.cpp -o $(QSIMLIB_SSE) $(CXXFLAGS) $(PYBINDFLAGS_SSE) + $(HIPCC) avx2/pybind_main_avx2.cpp -o $(QSIMLIB_AVX2) $(CXXFLAGS) $(PYBINDFLAGS_AVX2) + $(HIPCC) avx512/pybind_main_avx512.cpp -o $(QSIMLIB_AVX512) $(CXXFLAGS) $(PYBINDFLAGS_AVX512) $(HIPCC) hip/pybind_main_hip.cpp -o $(QSIMLIB_HIP) $(HIPCCFLAGS) $(PYBINDFLAGS_HIP) .PHONY: decide-hip From 31fff3bd04d3933b892f2ee5e5924e83f7dae86f Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:23:01 -0400 Subject: [PATCH 10/15] Add CMAKE_MODULE_PATH for pybind with HIP --- pybind_interface/decide/CMakeLists.txt | 1 + pybind_interface/hip/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 2b28ca96..b94b0ade 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -30,6 +30,7 @@ if(has_nvcc STREQUAL "") if(has_hipcc STREQUAL "") pybind11_add_module(qsim_decide decide.cpp) else() + list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") find_package(HIP REQUIRED) find_package(PythonLibs 3.7 REQUIRED) diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index 982579fb..fe4b1c54 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -9,6 +9,8 @@ ENDIF() INCLUDE(../GetPybind11.cmake) find_package(PythonLibs 3.7 REQUIRED) + +list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") find_package(HIP REQUIRED) include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) From 9e12b68ca77ceab07e656b93721cf17bbe4b2937 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Fri, 18 Aug 2023 14:24:13 -0400 Subject: [PATCH 11/15] Add hipcc detection in setup.py --- setup.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/setup.py b/setup.py index eafa4387..593620a6 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import os import re import sys +import shutil import platform import subprocess @@ -63,6 +64,12 @@ def build_extension(self, ext): "-DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++", ] + if shutil.which("hipcc") is not None: + cmake_args += [ + "-DCMAKE_C_COMPILER=hipcc", + "-DCMAKE_CXX_COMPILER=hipcc", + ] + env = os.environ.copy() env["CXXFLAGS"] = '{} -DVERSION_INFO=\\"{}\\"'.format( env.get("CXXFLAGS", ""), self.distribution.get_version() From d25454331acdd3f31d8049f45b99b6a0637cbde2 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Mon, 16 Oct 2023 14:43:59 -0400 Subject: [PATCH 12/15] Fix style --- lib/statespace_cuda.h | 3 ++- lib/vectorspace_cuda.h | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/statespace_cuda.h b/lib/statespace_cuda.h index 81780150..660db074 100644 --- a/lib/statespace_cuda.h +++ b/lib/statespace_cuda.h @@ -107,7 +107,8 @@ class StateSpaceCUDA : } void SetAllZeros(State& state) const { - ErrorCheck(cudaMemset(state.get(), 0, MinSize(state.num_qubits()) * sizeof(fp_type))); + ErrorCheck(cudaMemset(state.get(), 0, + MinSize(state.num_qubits()) * sizeof(fp_type))); } // Uniform superposition. diff --git a/lib/vectorspace_cuda.h b/lib/vectorspace_cuda.h index ebf21095..fd91553d 100644 --- a/lib/vectorspace_cuda.h +++ b/lib/vectorspace_cuda.h @@ -120,9 +120,9 @@ class VectorSpaceCUDA { } ErrorCheck( - cudaMemcpy(dest.get(), src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToDevice)); + cudaMemcpy(dest.get(), src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToDevice)); return true; } @@ -131,9 +131,9 @@ class VectorSpaceCUDA { // Impl::MinSize(src.num_qubits()) elements. bool Copy(const Vector& src, fp_type* dest) const { ErrorCheck( - cudaMemcpy(dest, src.get(), - sizeof(fp_type) * Impl::MinSize(src.num_qubits()), - cudaMemcpyDeviceToHost)); + cudaMemcpy(dest, src.get(), + sizeof(fp_type) * Impl::MinSize(src.num_qubits()), + cudaMemcpyDeviceToHost)); return true; } @@ -142,9 +142,9 @@ class VectorSpaceCUDA { // Impl::MinSize(dest.num_qubits()) elements. bool Copy(const fp_type* src, Vector& dest) const { ErrorCheck( - cudaMemcpy(dest.get(), src, - sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), - cudaMemcpyHostToDevice)); + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * Impl::MinSize(dest.num_qubits()), + cudaMemcpyHostToDevice)); return true; } @@ -154,7 +154,9 @@ class VectorSpaceCUDA { bool Copy(const fp_type* src, uint64_t size, Vector& dest) const { size = std::min(size, Impl::MinSize(dest.num_qubits())); ErrorCheck( - cudaMemcpy(dest.get(), src, sizeof(fp_type) * size, cudaMemcpyHostToDevice)); + cudaMemcpy(dest.get(), src, + sizeof(fp_type) * size, + cudaMemcpyHostToDevice)); return true; } From a1ab2e2700fc3f948da72f1355755ce79ebd03eb Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Tue, 7 Nov 2023 14:14:54 -0500 Subject: [PATCH 13/15] Add documentation for AMD GPU support --- docs/_book.yaml | 2 + docs/tutorials/amd_gpu.md | 82 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 docs/tutorials/amd_gpu.md diff --git a/docs/_book.yaml b/docs/_book.yaml index 2fe9a2c8..05c862b3 100644 --- a/docs/_book.yaml +++ b/docs/_book.yaml @@ -26,6 +26,8 @@ upper_tabs: path: /qsim/tutorials/q32d14 - title: "Simulate noise" path: /qsim/tutorials/noisy_qsimcirq + - title: "AMD GPU support" + path: /qsim/tutorials/amd_gpu - name: "Guides" contents: diff --git a/docs/tutorials/amd_gpu.md b/docs/tutorials/amd_gpu.md new file mode 100644 index 00000000..dcd29500 --- /dev/null +++ b/docs/tutorials/amd_gpu.md @@ -0,0 +1,82 @@ +# Support for AMD Instinct™ MI Series Accelerators + +Qsim provides support for AMD Instinct accelerators. +The implementation covers the native GPU support in Qsim +by utilizing [HIP](https://rocm.docs.amd.com/projects/HIP) +(Heterogeneous-Compute Interface for Portability). +The cuQuantum implementation is currently not covered. + +## Building + +To enable support for AMD Instinct GPUs, Qsim needs to be built from sources. +This can be done as follows: + +``` +conda env list +conda create -y -n CirqDevEnv python=3 +conda activate CirqDevEnv +pip install pybind11 + +git clone https://github.com/quantumlib/qsim.git +cd qsim + +make -j qsim # to build CPU qsim +make -j qsim-hip # to build HIP qsim +make -j pybind # to build Python bindings +make -j cxx-tests # to build CPU tests +make -j hip-tests # to build HIP tests + +pip install . +``` + +Note: To avoid problems when building Qsim with support for AMD GPUs, +make sure to use the latest version of CMake. + +## Testing + +### Simulator + +To test the Qsim simulator: + +``` +make run-cxx-tests # to run CPU tests +make run-hip-tests # to run HIP tests +``` + +or + +``` +cd tests +for file in *.x; do ./"$file"; done # to run all tests +for file in *_hip_test.x; do ./"$file"; done # to run HIP tests only +``` + +### Python Bindings + +To test the Python bindings: + +``` +make run-py-tests +``` + +or + +``` +cd qsimcirq_tests +python3 -m pytest -v qsimcirq_test.py +``` + +## Using + +Using Qsim on AMD Instinct GPUs is identical to using it on NVIDIA GPUs. +I.e., it is done by passing `use_gpu=True` and `gpu_mode=0` as `qsimcirq.QSimOptions`: + +``` +simulator = qsimcirq.QSimSimulator(qsim_options=qsimcirq.QSimOptions( + use_gpu=True, + gpu_mode=0, + ... + )) +``` + +Note: `gpu_mode` has to be set to zero for AMD GPUs, as cuStateVec is not supported. From 56c27c60db5a5725c078b96818fa272d95ca82de Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 8 Nov 2023 12:13:00 -0500 Subject: [PATCH 14/15] Replace Qsim with qsim in amd_gpu.md --- docs/tutorials/amd_gpu.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/tutorials/amd_gpu.md b/docs/tutorials/amd_gpu.md index dcd29500..11965c2d 100644 --- a/docs/tutorials/amd_gpu.md +++ b/docs/tutorials/amd_gpu.md @@ -1,14 +1,14 @@ # Support for AMD Instinct™ MI Series Accelerators -Qsim provides support for AMD Instinct accelerators. -The implementation covers the native GPU support in Qsim +qsim provides support for AMD Instinct accelerators. +The implementation covers the native GPU support in qsim by utilizing [HIP](https://rocm.docs.amd.com/projects/HIP) (Heterogeneous-Compute Interface for Portability). The cuQuantum implementation is currently not covered. ## Building -To enable support for AMD Instinct GPUs, Qsim needs to be built from sources. +To enable support for AMD Instinct GPUs, qsim needs to be built from sources. This can be done as follows: ``` @@ -29,14 +29,14 @@ make -j hip-tests # to build HIP tests pip install . ``` -Note: To avoid problems when building Qsim with support for AMD GPUs, +Note: To avoid problems when building qsim with support for AMD GPUs, make sure to use the latest version of CMake. ## Testing ### Simulator -To test the Qsim simulator: +To test the qsim simulator: ``` make run-cxx-tests # to run CPU tests @@ -68,7 +68,7 @@ python3 -m pytest -v qsimcirq_test.py ## Using -Using Qsim on AMD Instinct GPUs is identical to using it on NVIDIA GPUs. +Using qsim on AMD Instinct GPUs is identical to using it on NVIDIA GPUs. I.e., it is done by passing `use_gpu=True` and `gpu_mode=0` as `qsimcirq.QSimOptions`: ``` From 62d64c4198ae8701b9bac7337bc28959fd855053 Mon Sep 17 00:00:00 2001 From: Jakub Kurzak Date: Wed, 8 Nov 2023 12:27:33 -0500 Subject: [PATCH 15/15] Add pointers to AMD ROCm Platform to amd_gpu.md --- docs/tutorials/amd_gpu.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/amd_gpu.md b/docs/tutorials/amd_gpu.md index 11965c2d..e7c5f613 100644 --- a/docs/tutorials/amd_gpu.md +++ b/docs/tutorials/amd_gpu.md @@ -2,13 +2,17 @@ qsim provides support for AMD Instinct accelerators. The implementation covers the native GPU support in qsim -by utilizing [HIP](https://rocm.docs.amd.com/projects/HIP) +by utilizing [AMD HIP SDK](https://rocm.docs.amd.com/projects/HIP) (Heterogeneous-Compute Interface for Portability). The cuQuantum implementation is currently not covered. ## Building -To enable support for AMD Instinct GPUs, qsim needs to be built from sources. +Building qsim with support for AMD Instinct accelerators requires installation of +[AMD ROCm™ Open Software Platform](https://www.amd.com/en/developer/resources/rocm-hub.html). +Instructions for installing ROCm are available at https://rocm.docs.amd.com/. + +To enable support for AMD GPUs, qsim needs to be built from sources. This can be done as follows: ```