thermal_event_detection_benchmark.hpp 3.23 KB
#pragma once

#include "benchmark.hpp"

#include "data/ResourceLocator.hpp"

#include "algorithm/gpu/hotspot/OverloadHotspotDetectionAlgorithm.hpp"
#include "algorithm/gpu/surfacelayer/SurfaceLayerDetectionAlgorithm.hpp"
/* CPU implementations are in Cpu:: namespace */
#include "algorithm/cpu/hotspot/OverloadHotspotDetectionAlgorithm.hpp"
#include "algorithm/cpu/surfacelayer/SurfaceLayerDetectionAlgorithm.hpp"

namespace ThermalEventDetectionBenchmark {

template <typename T, std::size_t firstFrame, std::size_t lastFrame>
std::vector<double> benchmarkAlgorithm(cv::Mat frame) {
	std::vector<double> timings;
	timings.reserve(lastFrame - firstFrame + 1);

	const CVMatLoader loader(
	    ResourceLocator::getPathProvider().path("20171114.053_AEF20.h5"));

	T algorithm{};
	algorithm.setup(frame.size(), loader);

	auto provider = Benchmark::createFrameProvider();
	for (std::size_t i = 0; provider->hasNext() && i <= lastFrame; ++i) {
		const auto timestamp = provider->next(frame.data);
		const auto start = static_cast<double>(cv::getTickCount());
		algorithm.handleFrame(frame, timestamp);
		const auto timeMs = 1e3 *
		                    (static_cast<double>(cv::getTickCount()) - start) /
		                    cv::getTickFrequency();
		if (i >= firstFrame) {
			timings.push_back(timeMs);
		}
	}

	return timings;
}

template <typename T, std::size_t firstFrame, std::size_t lastFrame,
          std::size_t iters>
std::vector<double> benchmarkAlgorithmN(cv::Mat frame) {
	std::vector<double> timings;
	timings.reserve(iters);

	for (std::size_t i = 0; i < iters; ++i) {
		timings.push_back(
		    cv::mean(benchmarkAlgorithm<T, firstFrame, lastFrame>(frame))[0]);
	}

	return timings;
}

template <typename T, std::size_t firstFrame, std::size_t lastFrame,
          std::size_t iters>
std::vector<double> benchmarkGpu() {
	/* Page-locked memory */
	void *data{nullptr};
	cudaSetDeviceFlags(cudaDeviceMapHost);
	cudaHostAlloc(&data, 1024 * 768 * cv::getElemSize(CV_16UC1),
	              cudaHostAllocMapped);
	const cv::Mat frame(cv::Size(1024, 768), CV_16UC1, data);

	auto timings = benchmarkAlgorithmN<T, firstFrame, lastFrame, iters>(frame);
	cudaFreeHost(data);

	return timings;
}

template <typename T, std::size_t firstFrame, std::size_t lastFrame,
          std::size_t iters>
std::vector<double> benchmarkCpu() {
	const cv::Mat frame(cv::Size(1024, 768), CV_16UC1);
	return benchmarkAlgorithmN<T, firstFrame, lastFrame, iters>(frame);
}

void run() {
	constexpr auto iterations{20};

	std::cout << "GPU Overload Hotspot Detection Algorithm\n";
	Benchmark::displayStats(benchmarkGpu<OverloadHotspotDetectionAlgorithm, 239,
	                                     258, iterations>());
	std::cout << "GPU Surface Layer Detection Algorithm\n";
	Benchmark::displayStats(
	    benchmarkGpu<SurfaceLayerDetectionAlgorithm, 16, 41, iterations>());
	std::cout << "CPU Overload Hotspot Detection Algorithm\n";
	Benchmark::displayStats(benchmarkCpu<Cpu::OverloadHotspotDetectionAlgorithm,
	                                     239, 258, iterations>());
	std::cout << "CPU Surface Layer Detection Algorithm\n";
	Benchmark::displayStats(benchmarkCpu<Cpu::SurfaceLayerDetectionAlgorithm,
	                                     16, 41, iterations>());
}

} // namespace ThermalEventDetectionBenchmark