ManagedMat.hpp
1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#pragma once
#include <cuda_runtime.h>
#include <opencv2/core/utility.hpp>
#include <opencv2/cudaarithm.hpp>
class ManagedMat {
public:
ManagedMat(const ManagedMat &) = delete;
ManagedMat &operator=(const ManagedMat &) = delete;
ManagedMat &operator=(ManagedMat &&) noexcept = delete;
ManagedMat(ManagedMat &&) noexcept = delete;
ManagedMat(int rows, int cols, int type)
: memorySize(rows * cols * cv::getElemSize(type)) {
cudaMallocManaged(&managedMemory, memorySize);
hostMat = cv::Mat(rows, cols, type, managedMemory);
deviceMat = cv::cuda::GpuMat(rows, cols, type, managedMemory);
}
ManagedMat(const cv::Size &size, int type)
: ManagedMat(size.height, size.width, type) {}
cv::Mat &host() {
prefetchToCpu();
return hostMat;
}
cv::cuda::GpuMat &device() {
prefetchToGpu();
return deviceMat;
}
~ManagedMat() { cudaFree(managedMemory); }
private:
const std::size_t memorySize;
void *managedMemory{nullptr};
cv::Mat hostMat;
cv::cuda::GpuMat deviceMat;
void prefetchToGpu() const {
cudaStreamAttachMemAsync(nullptr, managedMemory, memorySize,
cudaMemAttachGlobal);
}
void prefetchToCpu() const {
cudaStreamAttachMemAsync(nullptr, managedMemory, memorySize,
cudaMemAttachHost);
cudaStreamSynchronize(nullptr);
}
};