Skip to content

Commit 388a7ce

Browse files
committed
Merge pull request opencv#16956 from alalek:ocl_control_buffer_mapping
2 parents 95341c2 + 54063c4 commit 388a7ce

File tree

2 files changed

+155
-1
lines changed

2 files changed

+155
-1
lines changed

modules/core/perf/opencl/perf_matop.cpp

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,141 @@ OCL_PERF_TEST_P(CopyToFixture, CopyToWithMaskUninit,
146146
SANITY_CHECK(dst);
147147
}
148148

149+
150+
151+
enum ROIType {
152+
ROI_FULL,
153+
ROI_2_RECT,
154+
ROI_2_TOP, // contiguous memory block
155+
ROI_2_LEFT,
156+
ROI_4,
157+
ROI_16,
158+
};
159+
static Rect getROI(enum ROIType t, const Size& sz)
160+
{
161+
switch (t)
162+
{
163+
case ROI_FULL: return Rect(0, 0, sz.width, sz.height);
164+
case ROI_2_RECT: return Rect(0, 0, sz.width * 71 / 100, sz.height * 71 / 100); // 71 = sqrt(1/2) * 100
165+
case ROI_2_TOP: return Rect(0, 0, sz.width, sz.height / 2); // 71 = sqrt(1/2) * 100
166+
case ROI_2_LEFT: return Rect(0, 0, sz.width / 2, sz.height); // 71 = sqrt(1/2) * 100
167+
case ROI_4: return Rect(0, 0, sz.width / 2, sz.height / 2);
168+
case ROI_16: return Rect(0, 0, sz.width / 4, sz.height / 4);
169+
}
170+
CV_Assert(false);
171+
}
172+
173+
typedef TestBaseWithParam< tuple<cv::Size, MatType, ROIType> > OpenCLBuffer;
174+
175+
static inline void PrintTo(const tuple<cv::Size, MatType, enum ROIType>& v, std::ostream* os)
176+
{
177+
*os << "(" << get<0>(v) << ", " << typeToString(get<1>(v)) << ", ";
178+
enum ROIType roiType = get<2>(v);
179+
if (roiType == ROI_FULL)
180+
*os << "ROI_100_FULL";
181+
else if (roiType == ROI_2_RECT)
182+
*os << "ROI_050_RECT_HALF";
183+
else if (roiType == ROI_2_TOP)
184+
*os << "ROI_050_TOP_HALF";
185+
else if (roiType == ROI_2_LEFT)
186+
*os << "ROI_050_LEFT_HALF";
187+
else if (roiType == ROI_4)
188+
*os << "ROI_025_1/4";
189+
else
190+
*os << "ROI_012_1/16";
191+
*os << ")";
192+
}
193+
194+
PERF_TEST_P_(OpenCLBuffer, cpu_write)
195+
{
196+
const Size srcSize = get<0>(GetParam());
197+
const int type = get<1>(GetParam());
198+
const Rect roi = getROI(get<2>(GetParam()), srcSize);
199+
200+
checkDeviceMaxMemoryAllocSize(srcSize, type);
201+
202+
UMat src(srcSize, type);
203+
declare.in(src(roi), WARMUP_NONE);
204+
205+
OCL_TEST_CYCLE()
206+
{
207+
Mat m = src(roi).getMat(ACCESS_WRITE);
208+
m.setTo(Scalar(1, 2, 3, 4));
209+
}
210+
211+
SANITY_CHECK_NOTHING();
212+
}
213+
214+
PERF_TEST_P_(OpenCLBuffer, cpu_read)
215+
{
216+
const Size srcSize = get<0>(GetParam());
217+
const int type = get<1>(GetParam());
218+
const Rect roi = getROI(get<2>(GetParam()), srcSize);
219+
220+
checkDeviceMaxMemoryAllocSize(srcSize, type);
221+
222+
UMat src(srcSize, type, Scalar(1, 2, 3, 4));
223+
declare.in(src(roi), WARMUP_NONE);
224+
225+
OCL_TEST_CYCLE()
226+
{
227+
unsigned counter = 0;
228+
Mat m = src(roi).getMat(ACCESS_READ);
229+
for (int y = 0; y < m.rows; y++)
230+
{
231+
uchar* ptr = m.ptr(y);
232+
size_t width_bytes = m.cols * m.elemSize();
233+
for (size_t x_bytes = 0; x_bytes < width_bytes; x_bytes++)
234+
counter += (unsigned)(ptr[x_bytes]);
235+
}
236+
}
237+
238+
SANITY_CHECK_NOTHING();
239+
}
240+
241+
PERF_TEST_P_(OpenCLBuffer, cpu_update)
242+
{
243+
const Size srcSize = get<0>(GetParam());
244+
const int type = get<1>(GetParam());
245+
const Rect roi = getROI(get<2>(GetParam()), srcSize);
246+
247+
checkDeviceMaxMemoryAllocSize(srcSize, type);
248+
249+
UMat src(srcSize, type, Scalar(1, 2, 3, 4));
250+
declare.in(src(roi), WARMUP_NONE);
251+
252+
OCL_TEST_CYCLE()
253+
{
254+
Mat m = src(roi).getMat(ACCESS_READ | ACCESS_WRITE);
255+
for (int y = 0; y < m.rows; y++)
256+
{
257+
uchar* ptr = m.ptr(y);
258+
size_t width_bytes = m.cols * m.elemSize();
259+
for (size_t x_bytes = 0; x_bytes < width_bytes; x_bytes++)
260+
ptr[x_bytes] += 1;
261+
}
262+
}
263+
264+
SANITY_CHECK_NOTHING();
265+
}
266+
267+
INSTANTIATE_TEST_CASE_P(/*FULL*/, OpenCLBuffer,
268+
testing::Combine(
269+
testing::Values(szVGA, sz720p, sz1080p, sz2160p),
270+
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4),
271+
testing::Values(ROI_FULL)
272+
)
273+
);
274+
275+
INSTANTIATE_TEST_CASE_P(ROI, OpenCLBuffer,
276+
testing::Combine(
277+
testing::Values(sz1080p, sz2160p),
278+
testing::Values(CV_8UC1),
279+
testing::Values(ROI_16, ROI_4, ROI_2_RECT, ROI_2_LEFT, ROI_2_TOP, ROI_FULL)
280+
)
281+
);
282+
283+
149284
} } // namespace opencv_test::ocl
150285

151286
#endif // HAVE_OPENCL

modules/core/src/ocl.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4607,14 +4607,33 @@ class OpenCLAllocator CV_FINAL : public MatAllocator
46074607
return u;
46084608
}
46094609

4610+
static bool isOpenCLMapForced() // force clEnqueueMapBuffer / clEnqueueUnmapMemObject OpenCL API
4611+
{
4612+
static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_MAPPING", false);
4613+
return value;
4614+
}
4615+
static bool isOpenCLCopyingForced() // force clEnqueueReadBuffer[Rect] / clEnqueueWriteBuffer[Rect] OpenCL API
4616+
{
4617+
static bool value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_BUFFER_FORCE_COPYING", false);
4618+
return value;
4619+
}
4620+
46104621
void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
46114622
{
46124623
const Device& dev = ctx.device(0);
46134624
createFlags = 0;
46144625
if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
46154626
createFlags |= CL_MEM_ALLOC_HOST_PTR;
46164627

4617-
if( dev.hostUnifiedMemory() )
4628+
if (!isOpenCLCopyingForced() &&
4629+
(isOpenCLMapForced() ||
4630+
(dev.hostUnifiedMemory()
4631+
#ifndef __APPLE__
4632+
|| dev.isIntel()
4633+
#endif
4634+
)
4635+
)
4636+
)
46184637
flags0 = 0;
46194638
else
46204639
flags0 = UMatData::COPY_ON_MAP;

0 commit comments

Comments
 (0)