thermal_event_detection_benchmark.hpp
3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#pragma once
#include "benchmark.hpp"
#include "data/ResourceLocator.hpp"
#include "algorithm/gpu/hotspot/OverloadHotspotDetectionAlgorithm.hpp"
#include "algorithm/gpu/surfacelayer/SurfaceLayerDetectionAlgorithm.hpp"
/* CPU implementations are in Cpu:: namespace */
#include "algorithm/cpu/hotspot/OverloadHotspotDetectionAlgorithm.hpp"
#include "algorithm/cpu/surfacelayer/SurfaceLayerDetectionAlgorithm.hpp"
namespace ThermalEventDetectionBenchmark {
template <typename T, std::size_t firstFrame, std::size_t lastFrame>
std::vector<double> benchmarkAlgorithm(cv::Mat frame) {
std::vector<double> timings;
timings.reserve(lastFrame - firstFrame + 1);
const CVMatLoader loader(
ResourceLocator::getPathProvider().path("20171114.053_AEF20.h5"));
T algorithm{};
algorithm.setup(frame.size(), loader);
auto provider = Benchmark::createFrameProvider();
for (std::size_t i = 0; provider->hasNext() && i <= lastFrame; ++i) {
const auto timestamp = provider->next(frame.data);
const auto start = static_cast<double>(cv::getTickCount());
algorithm.handleFrame(frame, timestamp);
const auto timeMs = 1e3 *
(static_cast<double>(cv::getTickCount()) - start) /
cv::getTickFrequency();
if (i >= firstFrame) {
timings.push_back(timeMs);
}
}
return timings;
}
template <typename T, std::size_t firstFrame, std::size_t lastFrame,
std::size_t iters>
std::vector<double> benchmarkAlgorithmN(cv::Mat frame) {
std::vector<double> timings;
timings.reserve(iters);
for (std::size_t i = 0; i < iters; ++i) {
timings.push_back(
cv::mean(benchmarkAlgorithm<T, firstFrame, lastFrame>(frame))[0]);
}
return timings;
}
template <typename T, std::size_t firstFrame, std::size_t lastFrame,
std::size_t iters>
std::vector<double> benchmarkGpu() {
/* Page-locked memory */
void *data{nullptr};
cudaSetDeviceFlags(cudaDeviceMapHost);
cudaHostAlloc(&data, 1024 * 768 * cv::getElemSize(CV_16UC1),
cudaHostAllocMapped);
const cv::Mat frame(cv::Size(1024, 768), CV_16UC1, data);
auto timings = benchmarkAlgorithmN<T, firstFrame, lastFrame, iters>(frame);
cudaFreeHost(data);
return timings;
}
template <typename T, std::size_t firstFrame, std::size_t lastFrame,
std::size_t iters>
std::vector<double> benchmarkCpu() {
const cv::Mat frame(cv::Size(1024, 768), CV_16UC1);
return benchmarkAlgorithmN<T, firstFrame, lastFrame, iters>(frame);
}
void run() {
constexpr auto iterations{20};
std::cout << "GPU Overload Hotspot Detection Algorithm\n";
Benchmark::displayStats(benchmarkGpu<OverloadHotspotDetectionAlgorithm, 239,
258, iterations>());
std::cout << "GPU Surface Layer Detection Algorithm\n";
Benchmark::displayStats(
benchmarkGpu<SurfaceLayerDetectionAlgorithm, 16, 41, iterations>());
std::cout << "CPU Overload Hotspot Detection Algorithm\n";
Benchmark::displayStats(benchmarkCpu<Cpu::OverloadHotspotDetectionAlgorithm,
239, 258, iterations>());
std::cout << "CPU Surface Layer Detection Algorithm\n";
Benchmark::displayStats(benchmarkCpu<Cpu::SurfaceLayerDetectionAlgorithm,
16, 41, iterations>());
}
} // namespace ThermalEventDetectionBenchmark