ManagedMat.hpp 1.3 KB
#pragma once

#include <cuda_runtime.h>

#include <opencv2/core/utility.hpp>
#include <opencv2/cudaarithm.hpp>

class ManagedMat {
  public:
	ManagedMat(const ManagedMat &) = delete;
	ManagedMat &operator=(const ManagedMat &) = delete;
	ManagedMat &operator=(ManagedMat &&) noexcept = delete;
	ManagedMat(ManagedMat &&) noexcept = delete;

	ManagedMat(int rows, int cols, int type)
	    : memorySize(rows * cols * cv::getElemSize(type)) {
		cudaMallocManaged(&managedMemory, memorySize);

		hostMat = cv::Mat(rows, cols, type, managedMemory);
		deviceMat = cv::cuda::GpuMat(rows, cols, type, managedMemory);
	}
	ManagedMat(const cv::Size &size, int type)
	    : ManagedMat(size.height, size.width, type) {}

	cv::Mat &host() {
		prefetchToCpu();
		return hostMat;
	}

	cv::cuda::GpuMat &device() {
		prefetchToGpu();
		return deviceMat;
	}

	~ManagedMat() { cudaFree(managedMemory); }

  private:
	const std::size_t memorySize;
	void *managedMemory{nullptr};

	cv::Mat hostMat;
	cv::cuda::GpuMat deviceMat;

	void prefetchToGpu() const {
		cudaStreamAttachMemAsync(nullptr, managedMemory, memorySize,
		                         cudaMemAttachGlobal);
	}

	void prefetchToCpu() const {
		cudaStreamAttachMemAsync(nullptr, managedMemory, memorySize,
		                         cudaMemAttachHost);
		cudaStreamSynchronize(nullptr);
	}
};