与 CPU 版本相比,OpenCV GPU 对象检测速度较慢且检测次数较少
Posted
技术标签:
【中文标题】与 CPU 版本相比,OpenCV GPU 对象检测速度较慢且检测次数较少【英文标题】:OpenCV GPU object detection is slow and gives less detections as compared to CPU version 【发布时间】:2014-05-23 15:39:16 【问题描述】:以下是来自 OpenCV 的对象检测代码的 CPU 和 GPU 实现。
1) 与 CPU 版本相比,GPU 实现速度较慢
2) 与同一分类器的代码的 CPU 版本相比,检测速度较慢
知道为什么会这样吗?
代码的CPU版本
#include <windows.h>
#include <mmsystem.h>
#pragma comment(lib, "winmm.lib")
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <stdio.h>
using namespace std;
using namespace cv;
int main(int argc, const char** argv)
//create the cascade classifier object used for the face detection
CascadeClassifier face_cascade;
//use the haarcascade_frontalface_alt.xml library
face_cascade.load("C:/cascades/haarcascade_frontalface_alt_tree.xml");
//setup video capture device and link it to the first capture device
VideoCapture captureDevice;
captureDevice.open(3);
//setup image files used in the capture process
Mat captureFrame;
Mat grayscaleFrame;
//create a window to present the results
namedWindow("outputCapture", 1);
//create a loop to capture and find faces
while(true)
//capture a new image frame
captureDevice>>captureFrame;
//convert captured image to gray scale and equalize
cvtColor(captureFrame, grayscaleFrame, CV_BGR2GRAY);
equalizeHist(grayscaleFrame, grayscaleFrame);
//create a vector array to store the face found
std::vector<Rect> faces;
//find faces and store them in the vector array
face_cascade.detectMultiScale(grayscaleFrame, faces);
//draw a rectangle for all found faces in the vector array on the original image
for(int i = 0; i < (int)faces.size(); i++)
Scalar color(0, 0, 255);
Point pt1(faces[i].x + faces[i].width, faces[i].y + faces[i].height);
Point pt2(faces[i].x, faces[i].y);
rectangle(captureFrame, pt1, pt2, color, 1, 8, 0);
string text = "Adam yuzi";
int fontFace = FONT_HERSHEY_TRIPLEX;
double fontScale = 1;
int thickness = 2;
putText(captureFrame, text, pt2, fontFace, fontScale, color, thickness);
//PlaySound(TEXT("C:/cascades/adam.wav"), NULL, SND_FILENAME | SND_SYNC);
// the correct code
//Sleep(1000);
//break;
//cout<<char(7);
//print the output
imshow("outputCapture", captureFrame);
//pause for 33ms
waitKey(33);
return 0;
和GPU版本实现is provided in this sample ink CODE的GPU版本
// WARNING: this sample is under construction! Use it on your own risk.
#if defined _MSC_VER && _MSC_VER >= 1400
#pragma warning(disable : 4100)
#endif
#include <iostream>
#include <iomanip>
#include "opencv2/contrib/contrib.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/cuda.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
static void help()
cout << "Usage: ./cascadeclassifier_gpu \n\t--cascade <cascade_file>\n\t(<image>|-- video <video>|--camera <camera_id>)\n"
"Using OpenCV version " << CV_VERSION << endl << endl;
static void convertAndResize(const Mat& src, Mat& gray, Mat& resized, double scale)
if (src.channels() == 3)
cv::cvtColor( src, gray, COLOR_BGR2GRAY );
else
gray = src;
Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
if (scale != 1)
cv::resize(gray, resized, sz);
else
resized = gray;
static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, double scale)
if (src.channels() == 3)
cv::cuda::cvtColor( src, gray, COLOR_BGR2GRAY );
else
gray = src;
Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
if (scale != 1)
cv::cuda::resize(gray, resized, sz);
else
resized = gray;
static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss)
int fontFace = FONT_HERSHEY_DUPLEX;
double fontScale = 0.8;
int fontThickness = 2;
Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0);
Point org;
org.x = 1;
org.y = 3 * fontSize.height * (lineOffsY + 1) / 2;
putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16);
putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16);
static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps)
Scalar fontColorRed = Scalar(255,0,0);
Scalar fontColorNV = Scalar(118,185,0);
ostringstream ss;
ss << "FPS = " << setprecision(1) << fixed << fps;
matPrint(canvas, 0, fontColorRed, ss.str());
ss.str("");
ss << "[" << canvas.cols << "x" << canvas.rows << "], " <<
(bGpu ? "GPU, " : "CPU, ") <<
(bLargestFace ? "OneFace, " : "MultiFace, ") <<
(bFilter ? "Filter:ON" : "Filter:OFF");
matPrint(canvas, 1, fontColorRed, ss.str());
// by Anatoly. MacOS fix. ostringstream(const string&) is a private
// matPrint(canvas, 2, fontColorNV, ostringstream("Space - switch GPU / CPU"));
if (bHelp)
matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU");
matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace");
matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter");
matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help");
matPrint(canvas, 6, fontColorNV, "1/Q - increase/decrease scale");
else
matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help");
int main(int argc, const char *argv[])
if (argc == 1)
help();
return -1;
if (getCudaEnabledDeviceCount() == 0)
return cerr << "No GPU found or the library is compiled without CUDA support" << endl, -1;
cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
string cascadeName;
string inputName;
bool isInputImage = false;
bool isInputVideo = false;
bool isInputCamera = false;
for (int i = 1; i < argc; ++i)
if (string(argv[i]) == "--cascade")
cascadeName = argv[++i];
else if (string(argv[i]) == "--video")
inputName = argv[++i];
isInputVideo = true;
else if (string(argv[i]) == "--camera")
inputName = argv[++i];
isInputCamera = true;
else if (string(argv[i]) == "--help")
help();
return -1;
else if (!isInputImage)
inputName = argv[i];
isInputImage = true;
else
cout << "Unknown key: " << argv[i] << endl;
return -1;
CascadeClassifier_CUDA cascade_gpu;
if (!cascade_gpu.load(cascadeName))
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
CascadeClassifier cascade_cpu;
if (!cascade_cpu.load(cascadeName))
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
VideoCapture capture;
Mat image;
if (isInputImage)
image = imread(inputName);
CV_Assert(!image.empty());
else if (isInputVideo)
capture.open(inputName);
CV_Assert(capture.isOpened());
else
capture.open(atoi(inputName.c_str()));
CV_Assert(capture.isOpened());
namedWindow("result", 1);
Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
vector<Rect> facesBuf_cpu;
GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
/* parameters */
bool useGPU = true;
double scaleFactor = 1.0;
bool findLargestObject = false;
bool filterRects = true;
bool helpScreen = false;
int detections_num;
for (;;)
if (isInputCamera || isInputVideo)
capture >> frame;
if (frame.empty())
break;
(image.empty() ? frame : image).copyTo(frame_cpu);
frame_gpu.upload(image.empty() ? frame : image);
convertAndResize(frame_gpu, gray_gpu, resized_gpu, scaleFactor);
convertAndResize(frame_cpu, gray_cpu, resized_cpu, scaleFactor);
TickMeter tm;
tm.start();
if (useGPU)
//cascade_gpu.visualizeInPlace = true;
cascade_gpu.findLargestObject = findLargestObject;
detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu, 1.2,
(filterRects || findLargestObject) ? 4 : 0);
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
else
Size minSize = cascade_gpu.getClassifierSize();
cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2,
(filterRects || findLargestObject) ? 4 : 0,
(findLargestObject ? CASCADE_FIND_BIGGEST_OBJECT : 0)
| CASCADE_SCALE_IMAGE,
minSize);
detections_num = (int)facesBuf_cpu.size();
if (!useGPU && detections_num)
for (int i = 0; i < detections_num; ++i)
rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255));
if (useGPU)
resized_gpu.download(resized_cpu);
for (int i = 0; i < detections_num; ++i)
rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i], Scalar(255));
tm.stop();
double detectionTime = tm.getTimeMilli();
double fps = 1000 / detectionTime;
//print detections to console
cout << setfill(' ') << setprecision(2);
cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det";
if ((filterRects || findLargestObject) && detections_num > 0)
Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
for (int i = 0; i < min(detections_num, 2); ++i)
cout << ", [" << setw(4) << faceRects[i].x
<< ", " << setw(4) << faceRects[i].y
<< ", " << setw(4) << faceRects[i].width
<< ", " << setw(4) << faceRects[i].height << "]";
cout << endl;
cv::cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR);
displayState(frameDisp, helpScreen, useGPU, findLargestObject, filterRects, fps);
imshow("result", frameDisp);
char key = (char)waitKey(5);
if (key == 27)
break;
switch (key)
case ' ':
useGPU = !useGPU;
break;
case 'm':
case 'M':
findLargestObject = !findLargestObject;
break;
case 'f':
case 'F':
filterRects = !filterRects;
break;
case '1':
scaleFactor *= 1.05;
break;
case 'q':
case 'Q':
scaleFactor /= 1.05;
break;
case 'h':
case 'H':
helpScreen = !helpScreen;
break;
return 0;
注意:我没有编写此代码,我使用了CPU version from 和GPU version from here 。我还发布了我的观察in。
【问题讨论】:
【参考方案1】:试试这个代码,它对我来说很好用:
#define _CRT_SECURE_NO_DEPRECATE
#include <stdio.h>
#include <direct.h>
#include "fstream"
#include "iostream"
#include <vector>
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/opengl_interop.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/ml/ml.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/contrib/contrib.hpp"
#include "opencv2/video/tracking.hpp"
#include "opencv2/imgproc/imgproc.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
cv::gpu::CascadeClassifier_GPU cascade_gpu;
//-------------------------------------------------------------------------------------------------------------
vector<Rect> detect_faces(Mat& image)
vector<Rect> res;
bool findLargestObject = true;
bool filterRects = true;
int detections_num;
Mat faces_downloaded;
Mat im(image.size(),CV_8UC1);
GpuMat facesBuf_gpu;
if(image.channels()==3)
cvtColor(image,im,CV_BGR2GRAY);
else
image.copyTo(im);
GpuMat gray_gpu(im);
cascade_gpu.visualizeInPlace = false;
cascade_gpu.findLargestObject = findLargestObject;
detections_num = cascade_gpu.detectMultiScale(gray_gpu, facesBuf_gpu, 1.2,(filterRects || findLargestObject) ? 4 : 0,Size(image.cols/4,image.rows/4));
if(detections_num==0)return res;
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
Rect *faceRects = faces_downloaded.ptr<Rect>();
for(int i=0;i<detections_num;i++)
res.push_back(faceRects[i]);
gray_gpu.release();
facesBuf_gpu.release();
return res;
//-----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------
// MAIN
//----------------------------------------------------------------------
int main(int argc, char * argv[])
cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
cascade_gpu.load("haarcascade_frontalface_alt2.xml");
Mat frame,img;
namedWindow("frame");
VideoCapture capture(0);
capture >> frame;
vector<Rect> rects;
if (capture.isOpened())
while(waitKey(20)!=27) // Exit by escape press
capture >> frame;
cvtColor(frame,img,CV_BGR2GRAY);
rects=detect_faces(img);
if(rects.size()>0)
cv::rectangle(frame,rects[0],CV_RGB(255,0,0));
imshow("frame",frame);
return 0;
【讨论】:
谢谢安德烈。我已经调整了 detectMultiScale(...) 函数的参数。 不客气。它是从我的一个项目中剪下来的,还有另一个需要的任务和图像参数。这就是为什么它不适用于您的项目中的默认设置。 对我来说 cascade_gpu.load 调用永远无法完成。程序似乎正在运行并消耗内存, load() 永远不会返回。知道为什么会这样吗?我正在使用相同的 xml 文件 谢谢安德烈。我必须调整 detectMultiScale(...) 函数的参数。它不适用于默认值。我禁用了 findLargestObject、filterRects 并指定了大小。进行这些更改后,程序开始运行。以上是关于与 CPU 版本相比,OpenCV GPU 对象检测速度较慢且检测次数较少的主要内容,如果未能解决你的问题,请参考以下文章
在 iPhone 上处理 GPU(金属)和 CPU(OpenCV)上的摄像头馈送数据