From 8058e2b0f0eed4415591dc49cce4ff0dd237bf93 Mon Sep 17 00:00:00 2001 From: Nicolas Cornu Date: Tue, 14 Dec 2021 11:20:07 +0100 Subject: [PATCH 1/4] Implement omp_get_mapped_ptr --- coreneuron/utils/offload.hpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 7ec41f4f4..4be1add8b 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -21,16 +21,30 @@ #define nrn_pragma_omp(x) #include #endif +#include +#include #include namespace coreneuron { +#define cnrn_target_deviceptr(h_ptr) [&]() { std::cout << #h_ptr << std::endl; return cnrn_target_deviceptr2(h_ptr);}() + template -T* cnrn_target_deviceptr(const T* h_ptr) { +T* cnrn_target_deviceptr2(T* h_ptr) { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENACC) - return static_cast(acc_deviceptr(const_cast(h_ptr))); + return static_cast(acc_deviceptr(h_ptr)); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) - return static_cast(omp_get_mapped_ptr(const_cast(h_ptr), omp_get_default_device())); + if (omp_get_mapped_ptr(h_ptr, omp_get_default_device()) == nullptr) { + throw std::runtime_error("here"); + } + T *d_ptr = nullptr; + + nrn_pragma_omp(target data use_device_ptr(h_ptr)) + { + d_ptr = h_ptr; + } + + return d_ptr; #else throw std::runtime_error("cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); #endif From b50448c31f6f59e9df9e4c0ce397f43a2858acee Mon Sep 17 00:00:00 2001 From: Nicolas Cornu Date: Tue, 14 Dec 2021 19:49:30 +0100 Subject: [PATCH 2/4] If mechanism is artificial data should be copied --- coreneuron/gpu/nrn_acc_manager.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 9bd635d77..34325a431 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -188,8 +188,13 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { int szdp = corenrn.get_prop_dparam_size()[type]; int is_art = corenrn.get_is_artificial()[type]; - // get device pointer for corresponding mechanism data - dptr = cnrn_target_deviceptr(tml->ml->data); + if (is_art) { + // data has been allocated somewhere else + dptr = cnrn_target_copyin(tml->ml->data); + } else { + // get device pointer for corresponding mechanism data + dptr = cnrn_target_deviceptr(tml->ml->data); + } cnrn_target_memcpy_to_device(&(d_ml->data), &(dptr)); From 72dcb0a1061e0f8b1828df081ba39c897fd409cd Mon Sep 17 00:00:00 2001 From: Nicolas Cornu Date: Wed, 15 Dec 2021 00:14:30 +0100 Subject: [PATCH 3/4] clean --- coreneuron/utils/offload.hpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp index 4be1add8b..ad4189ec1 100644 --- a/coreneuron/utils/offload.hpp +++ b/coreneuron/utils/offload.hpp @@ -21,27 +21,21 @@ #define nrn_pragma_omp(x) #include #endif -#include -#include #include namespace coreneuron { -#define cnrn_target_deviceptr(h_ptr) [&]() { std::cout << #h_ptr << std::endl; return cnrn_target_deviceptr2(h_ptr);}() - template -T* cnrn_target_deviceptr2(T* h_ptr) { +T* cnrn_target_deviceptr(const T* h_ptr) { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENACC) - return static_cast(acc_deviceptr(h_ptr)); + return static_cast(acc_deviceptr(const_cast(h_ptr))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) - if (omp_get_mapped_ptr(h_ptr, omp_get_default_device()) == nullptr) { - throw std::runtime_error("here"); - } T *d_ptr = nullptr; + T *_h_ptr = const_cast(h_ptr); - nrn_pragma_omp(target data use_device_ptr(h_ptr)) + nrn_pragma_omp(target data use_device_ptr(_h_ptr)) { - d_ptr = h_ptr; + d_ptr = _h_ptr; } return d_ptr; @@ -56,7 +50,7 @@ T* cnrn_target_copyin(const T* h_ptr, std::size_t len = 1) { return static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) #pragma omp target enter data map(to:h_ptr[:len]) - return cnrn_target_deviceptr(const_cast(h_ptr)); + return cnrn_target_deviceptr(h_ptr); #else throw std::runtime_error("cnrn_target_copyin() not implemented without OpenACC/OpenMP and gpu build"); #endif From 0c880ec3623808cf343db11b4457b2cf37b52e20 Mon Sep 17 00:00:00 2001 From: Nicolas Cornu Date: Wed, 15 Dec 2021 17:17:30 +0100 Subject: [PATCH 4/4] Don't allocate if art --- coreneuron/gpu/nrn_acc_manager.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp index 34325a431..2c18f22d9 100644 --- a/coreneuron/gpu/nrn_acc_manager.cpp +++ b/coreneuron/gpu/nrn_acc_manager.cpp @@ -188,13 +188,10 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { int szdp = corenrn.get_prop_dparam_size()[type]; int is_art = corenrn.get_is_artificial()[type]; - if (is_art) { - // data has been allocated somewhere else - dptr = cnrn_target_copyin(tml->ml->data); - } else { - // get device pointer for corresponding mechanism data - dptr = cnrn_target_deviceptr(tml->ml->data); - } + // If the mechanism is artificial data are not inside nt->_data but in a newly + // allocated block. As we never run code for artificial cell inside GPU + // we don't copy it. + dptr = is_art ? nullptr : cnrn_target_deviceptr(tml->ml->data); cnrn_target_memcpy_to_device(&(d_ml->data), &(dptr));