计算机视觉案例:应用opencv+keras完成视频物体检测
Posted ZSYL
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了计算机视觉案例:应用opencv+keras完成视频物体检测相关的知识,希望对你有一定的参考价值。
1. 案例结构目录
2. 案例效果演示
3. 步骤分析
- 配置获取相关预测数据类别,网络参数
- 获取摄像头视频
- 获取摄像每帧数据,进行格式形状处理
- 模型预测、结果NMS过滤
- 画图:显示物体位置,FPS值(每秒帧数)
4. 代码实现
导入相关包
import cv2
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.preprocessing import image
import pickle
import numpy as np
from random import shuffle
from scipy.misc import imread, imresize
from timeit import default_timer as timer
from utils.ssd_utils import BBoxUtility
- 1、配置获取相关预测数据类别,网络参数
class VideoTag(object):
"""
"""
def __init__(self, class_names, model, input_shape):
# 获取
self.class_names = class_names
self.num_classes = len(class_names)
self.model = model
self.input_shape = input_shape
self.bbox_util = BBoxUtility(self.num_classes)
- 2、获取摄像头视频
def run(self, video_path=0, conf_thresh=0.6):
"""运行测试
"""
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError(("找不到对应的视频或者摄像头"))
- 3、获取摄像每帧数据,进行格式形状处理
# 获取视频或者摄像头内容
while True:
retval, orig_image = cap.read()
if not retval:
print("视频检测结束!")
return
source_image = np.copy(orig_image)
# 进行输入每帧数据形状修改以及图片的格式修改BGR--->RGB
im_size = (self.input_shape[0], self.input_shape[1])
resized = cv2.resize(orig_image, im_size)
rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
# 将数据转换成原始需要画出的图片
to_draw = cv2.resize(resized, (int(source_image.shape[1]), int(source_image.shape[0])))
- 4、模型预测、结果NMS过滤
# 使用模型进行每帧数据预测
inputs = [image.img_to_array(rgb)]
tmp_inp = np.array(inputs)
x = preprocess_input(tmp_inp)
y = self.model.predict(x)
# 对预测结果进行NMS过滤
results = self.bbox_util.detection_out(y)
- 5、画图显示
- 画出物体位置,给定固定阈值
# 画图显示
if len(results) > 0 and len(results[0]) > 0:
# 获取每个框的位置以及类别概率
det_label = results[0][:, 0]
det_conf = results[0][:, 1]
det_xmin = results[0][:, 2]
det_ymin = results[0][:, 3]
det_xmax = results[0][:, 4]
det_ymax = results[0][:, 5]
# 过滤概率小的
top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]
top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]
for i in range(top_conf.shape[0]):
xmin = int(round(top_xmin[i] * to_draw.shape[1]))
ymin = int(round(top_ymin[i] * to_draw.shape[0]))
xmax = int(round(top_xmax[i] * to_draw.shape[1]))
ymax = int(round(top_ymax[i] * to_draw.shape[0]))
# 对于四个坐标物体框进行画图显示
class_num = int(top_label_indices[i])
cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
self.class_colors[class_num], 2)
text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i])
# 文本框进行设置显示
text_top = (xmin, ymin - 10)
text_bot = (xmin + 80, ymin + 5)
text_pos = (xmin + 5, ymin)
cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)
cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)
- 显示FPS参数
# 计算 FPS显示
fps = "FPS: " + str(cap.get(cv2.CAP_PROP_FPS))
# 画出FPS
cv2.rectangle(to_draw, (0, 0), (50, 17), (255, 255, 255), -1)
cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)
- 显示图片
# 显示图片
cv2.imshow("SSD result", to_draw)
cv2.waitKey(1)
# 释放capture资源
cap.release()
cv2.destroyAllWindows()
5. 调用视频预测
import sys
import keras
from utils.tag_video import VideoTag
from nets.ssd_net import SSD300
def main():
input_shape = (300, 300, 3)
# 数据集的配置
class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
"tvmonitor"]
NUM_CLASSES = len(class_names)
model = SSD300(input_shape, num_classes=NUM_CLASSES)
# 加载模型
model.load_weights('./ckpt/pre_trained/weights_SSD300.hdf5')
vid_test = VideoTag(class_names, model, input_shape)
vid_test.run(0)
if __name__ == '__main__':
main()
6. 完整代码
"""
配置获取相关预测数据类别,网络参数
获取摄像头视频
获取摄像每帧数据,进行格式形状处理
模型预测、结果NMS过滤
画图:显示物体位置,FPS值(每秒帧数)
"""
from tensorflow.python.keras.preprocessing.image import img_to_array
from tensorflow.python.keras.applications.imagenet_utils import preprocess_input
from computerVision.utils.ssd_utils import BBoxUtility
import cv2
import numpy as np
class VideoTag(object):
def __init__(self, model, input_shape, num_classes):
self.model = model
self.input_shape = input_shape
self.num_classes = num_classes
self.bbox_util = BBoxUtility(num_classes=self.num_classes)
self.class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
"tvmonitor"]
# 创建不同类别20种显示的颜色类型
self.class_colors = []
for i in range(0, self.num_classes):
hue = 255 * i / self.num_classes
col = np.zeros((1, 1, 3)).astype('uint8')
col[0][0][0] = hue
col[0][0][1] = 128
col[0][0][2] = 255
cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
self.class_colors.append(col)
def run(self, file_path, conf_thresh=0.6):
"""
运行捕捉摄像头,每一帧图片数据,进行预测,标记显示
:return:
"""
# 获取摄像头视频
cap = cv2.VideoCapture(file_path)
if not cap.isOpened():
raise IOError(('打开本地视频或者摄像头失败!'))
# 获取摄像每帧数据,进行格式形状处理
while True:
ret, orig_image = cap.read()
if not ret:
print('视频检测结束!')
return
# 对每一帧视频中的图片或者摄像头捕捉的图片,进行大小改变(300,300)
# BGR-->RGB
source_image = np.copy(orig_image)
resized = cv2.resize(orig_image, (self.input_shape[0], self.input_shape[1]))
# 颜色改变
rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
# 保留原始图片数据
# print(source_image.shape)
to_draw = cv2.resize(resized, (int(source_image.shape[1]), int(source_image.shape[0])))
# 模型预测、结果NMS过滤
# 3维-->4维:preprocess_input
inputs = [img_to_array(rgb)]
x = preprocess_input(np.array(inputs))
y = self.model.predict(x)
# 进行NMS过滤
results = self.bbox_util.detection_out(y)
print(results[0].shape)
# 画图:显示物体位置,FPS值(每秒帧数)
if len(results) > 0 and len(results[0]) > 0:
# 获取每个框的位置以及类别概率
det_label = results[0][:, 0]
det_conf = results[0][:, 1]
det_xmin = results[0][:, 2]
det_ymin = results[0][:, 3]
det_xmax = results[0][:, 4]
det_ymax = results[0][:, 5]
# 过滤概率小的下标
top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]
top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist() # 这些方框中有哪些类别(下标)
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]
for i in range(top_conf.shape[0]): # 读取图片中物体数
xmin = int(round(top_xmin[i] * to_draw.shape[1]))
ymin = int(round(top_ymin[i] * to_draw.shape[0]))
xmax = int(round(top_xmax[i] * to_draw.shape[1]))
ymax = int(round(top_ymax[i] * to_draw.shape[1]))
class_num = int(top_label_indices[i])
print('该帧图片检测到{}物体,索引为{}'.format(i, class_num))
# 画出这一帧中所有物体框的位置
cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), self.class_colors[class_num], 2)
# 画出文本框
text = self.class_names[class_num] + ' ' + ('%.2f' % (top_conf[i]))
# 矩形框
text_top = (xmin - 1, ymin - 10)
text_bot = (xmin + 80, ymin + 5)
text_pos = (xmin + 5, ymin)
cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1) # 矩形填充
cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1, cv2.LINE_AA)
# 计算 FPS显示
fps = 'FPS: ' + str(cap.get(cv2.CAP_PROP_FPS))
# 画出FPS
cv2.rectangle(to_draw, (0, 0), (58, 12), (255, 255, 255), -1)
cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)
# 显示当前图片
cv2.imshow('SSD detector result', to_draw)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 释放资源关闭窗口
cap.release()
cv2.destroyAllWindows()
return None
if __name__ == '__main__':
input_shape = (300, 300, 3)
# 数据集的配置
class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", 以上是关于计算机视觉案例:应用opencv+keras完成视频物体检测的主要内容,如果未能解决你的问题,请参考以下文章
使用Python+OpenCV+Keras创建自己的图像分类模型