Actual source code: cupmallocator.hpp
1: #ifndef CUPMALLOCATOR_HPP
2: #define CUPMALLOCATOR_HPP
4: #if defined(__cplusplus)
5: #include <petsc/private/cpp/object_pool.hpp>
7: #include "../segmentedmempool.hpp"
8: #include "cupmthrustutility.hpp"
10: #include <limits> // std::numeric_limits
12: namespace Petsc
13: {
15: namespace device
16: {
18: namespace cupm
19: {
21: // ==========================================================================================
22: // CUPM Host Allocator
23: // ==========================================================================================
25: template <DeviceType T, typename PetscType = char>
26: class HostAllocator;
28: // Allocator class to allocate pinned host memory for use with device
29: template <DeviceType T, typename PetscType>
30: class HostAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
31: public:
32: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(interface_type, T);
33: using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
34: using real_value_type = typename base_type::real_value_type;
35: using size_type = typename base_type::size_type;
36: using value_type = typename base_type::value_type;
38: template <typename U>
39: static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
40: template <typename U>
41: static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
42: template <typename U>
43: static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
44: };
46: template <DeviceType T, typename P>
47: template <typename U>
48: inline PetscErrorCode HostAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *) noexcept
49: {
50: PetscFunctionBegin;
51: PetscCall(PetscCUPMMallocHost(ptr, n));
52: PetscFunctionReturn(PETSC_SUCCESS);
53: }
55: template <DeviceType T, typename P>
56: template <typename U>
57: inline PetscErrorCode HostAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *) noexcept
58: {
59: PetscFunctionBegin;
60: PetscCallCUPM(cupmFreeHost(ptr));
61: PetscFunctionReturn(PETSC_SUCCESS);
62: }
64: template <DeviceType T, typename P>
65: template <typename U>
66: inline PetscErrorCode HostAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
67: {
68: PetscFunctionBegin;
69: PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyHostToHost, stream->get_stream(), true));
70: PetscFunctionReturn(PETSC_SUCCESS);
71: }
73: // ==========================================================================================
74: // CUPM Device Allocator
75: // ==========================================================================================
77: template <DeviceType T, typename PetscType = char>
78: class DeviceAllocator;
80: template <DeviceType T, typename PetscType>
81: class DeviceAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>, impl::Interface<T> {
82: public:
83: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(interface_type, T);
84: using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase<PetscType>;
85: using real_value_type = typename base_type::real_value_type;
86: using size_type = typename base_type::size_type;
87: using value_type = typename base_type::value_type;
89: template <typename U>
90: static PetscErrorCode allocate(value_type **, size_type, const StreamBase<U> *) noexcept;
91: template <typename U>
92: static PetscErrorCode deallocate(value_type *, const StreamBase<U> *) noexcept;
93: template <typename U>
94: static PetscErrorCode zero(value_type *, size_type, const StreamBase<U> *) noexcept;
95: template <typename U>
96: static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase<U> *) noexcept;
97: template <typename U>
98: static PetscErrorCode set_canary(value_type *, size_type, const StreamBase<U> *) noexcept;
99: };
101: template <DeviceType T, typename P>
102: template <typename U>
103: inline PetscErrorCode DeviceAllocator<T, P>::allocate(value_type **ptr, size_type n, const StreamBase<U> *stream) noexcept
104: {
105: PetscFunctionBegin;
106: PetscCall(PetscCUPMMallocAsync(ptr, n, stream->get_stream()));
107: PetscFunctionReturn(PETSC_SUCCESS);
108: }
110: template <DeviceType T, typename P>
111: template <typename U>
112: inline PetscErrorCode DeviceAllocator<T, P>::deallocate(value_type *ptr, const StreamBase<U> *stream) noexcept
113: {
114: PetscFunctionBegin;
115: PetscCallCUPM(cupmFreeAsync(ptr, stream->get_stream()));
116: PetscFunctionReturn(PETSC_SUCCESS);
117: }
119: template <DeviceType T, typename P>
120: template <typename U>
121: inline PetscErrorCode DeviceAllocator<T, P>::zero(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
122: {
123: PetscFunctionBegin;
124: PetscCall(PetscCUPMMemsetAsync(ptr, 0, n, stream->get_stream(), true));
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: template <DeviceType T, typename P>
129: template <typename U>
130: inline PetscErrorCode DeviceAllocator<T, P>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase<U> *stream) noexcept
131: {
132: PetscFunctionBegin;
133: PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyDeviceToDevice, stream->get_stream(), true));
134: PetscFunctionReturn(PETSC_SUCCESS);
135: }
137: template <DeviceType T, typename P>
138: template <typename U>
139: inline PetscErrorCode DeviceAllocator<T, P>::set_canary(value_type *ptr, size_type n, const StreamBase<U> *stream) noexcept
140: {
141: using limit_t = std::numeric_limits<real_value_type>;
142: const value_type canary = limit_t::has_signaling_NaN ? limit_t::signaling_NaN() : limit_t::max();
144: PetscFunctionBegin;
145: PetscCall(impl::ThrustSet<T>(stream->get_stream(), n, ptr, &canary));
146: PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: } // namespace cupm
151: } // namespace device
153: } // namespace Petsc
155: #endif // __cplusplus
157: #endif // CUPMALLOCATOR_HPP