我的 OpenCV 实时网络摄像头演示没有显示准​​确的情绪

Posted

技术标签:

【中文标题】我的 OpenCV 实时网络摄像头演示没有显示准​​确的情绪【英文标题】:My OpenCV Live Webcam Demo Doesn't Show Accurate Emotions 【发布时间】:2021-06-16 20:33:15 【问题描述】:

我按照使用 PyTorch 进行情绪识别的不完整教程开发了一个代码。我有很多错误,但由于这里的其他问题,我修复了它们。但我只是坚持这一点。我正在运行我的代码,然后我的网络摄像头打开,但我一直看到“中性”情绪,即使我表现出不同的情绪。但在控制台上,我看到记录的其他情绪,例如:

tensor(0, device='cuda:0')
tensor(6, device='cuda:0')
tensor(4, device='cuda:0')
tensor(4, device='cuda:0') 

你有什么想法吗?

这是我的代码:

import cv2 ### pip install opencv-python
##pip install open-cv-contrib-python fullpackage
#from deepface import DeepFace ##pip install deepface
import numpy as np

path = "haarcascade_frontalface_default.xml"
font_scale = 1.5
font = cv2.FONT_HERSHEY_PLAIN

#set the rectangle background to white
rectangle_bgr = (255, 255, 255)
# make a black image
img = np.zeros((500, 500))
#set some text
text = "VİDGA Projesi"
#get the width and height of the text box
(text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=1)[0]
#set the text start position
text_offset_x = 10
text_offset_y = img.shape[0] - 25
#make the coords of the box with a small padding of two pixels
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height -2))
cv2.rectangle(img, box_coords[0], box_coords[1], rectangle_bgr, cv2.FILLED)
cv2.putText(img, text, (text_offset_x, text_offset_y), font, fontScale=font_scale, color= (0,0,0), thickness=1)

cap = cv2.VideoCapture(1)
#Check if the webcam is opened correctly
if not cap.isOpened():
    cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise IOError("Cannot open webcam")
    
while True:
    ret, frame = cap.read()
    #eye_Cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
    faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    if ret == True:
    
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        #print(faceCascade.empty())
        faces = faceCascade.detectMultiScale(gray,1.1,4)
        for x,y,w,h in faces:
            roi_gray = gray[y:y+h, x:x+w]
            roi_color = frame[y:y+h, x:x+w]
            cv2.rectangle(frame, (x,y), (x+w, y+h), (255, 0, 0), 2)
            facess = faceCascade.detectMultiScale(roi_gray)
            if len(facess) == 0:
                print("Face not detected")
            else: 
                for(ex,ey,ew,eh) in facess:
                    face_roi = roi_color[ey: ey+eh, ex: ex+ew] ##cropping the face
                
    graytemp = cv2.cvtColor(face_roi, cv2.COLOR_BGR2GRAY)
    final_image = cv2.resize(graytemp, (48,48))
    final_image = np.expand_dims(final_image, axis =0) #add third dimension
    final_image = np.expand_dims(final_image, axis =0) #add fourth dimension
    final_image = final_image/255.0 # normalization
    dataa = torch.from_numpy(final_image)
    dataa = dataa.type(torch.FloatTensor)
    dataa = dataa.to(device)
    outputs = net(dataa)
    Pred = F.softmax(outputs, dim=1)
    Predictions = torch.argmax(Pred)
    print(Predictions)
    
    font = cv2.FONT_HERSHEY_SIMPLEX
    
    font_scale = 1.5
    font = cv2.FONT_HERSHEY_PLAIN
    
    if ((Predictions)==0):
        status = "Angry"
        
        x1,y1,w1,h1, = 0,0,175,5
        # Draw black background rectangle
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0,0,0), -1)
        # Add text
        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        
        cv2.putText(frame, status, (100,150), font, 3,(0,0,255), 2, cv2.LINE_4)
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255))
        
    elif ((Predictions)==1):
        status = "Disgust"
        
        x1,y1,w1,h1, = 0,0,175,5
        # Draw black background rectangle
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0,0,0), -1)
        # Add text
        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        
        cv2.putText(frame, status, (100,150), font, 3,(0,0,255), 2, cv2.LINE_4)
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255))
        
    elif ((Predictions)==2):
        status = "Fear"
        
        x1,y1,w1,h1, = 0,0,175,5
        # Draw black background rectangle
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0,0,0), -1)
        # Add text
        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        
        cv2.putText(frame, status, (100,150), font, 3,(0,0,255), 2, cv2.LINE_4)
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255))
        
    elif ((Predictions)==3):
        status = "Happy"
        
        x1,y1,w1,h1, = 0,0,175,5
        # Draw black background rectangle
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0,0,0), -1)
        # Add text
        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        
        cv2.putText(frame, status, (100,150), font, 3,(0,0,255), 2, cv2.LINE_4)
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255))
        
    elif ((Predictions)==4):
        status = "Sad"
        
        x1,y1,w1,h1, = 0,0,175,5
        # Draw black background rectangle
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0,0,0), -1)
        # Add text
        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        
        cv2.putText(frame, status, (100,150), font, 3,(0,0,255), 2, cv2.LINE_4)
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255))
        
    elif ((Predictions)==5):
        status = "Surprised"
        
        x1,y1,w1,h1, = 0,0,175,5
        # Draw black background rectangle
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0,0,0), -1)
        # Add text
        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        
        cv2.putText(frame, status, (100,150), font, 3,(0,0,255), 2, cv2.LINE_4)
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255))
        
    elif ((Predictions)==6):
        status = "Neutral"
        
        x1,y1,w1,h1, = 0,0,175,5
        # Draw black background rectangle
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0,0,0), -1)
        # Add text
        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        
        cv2.putText(frame, status, (100,150), font, 3,(0,0,255), 2, cv2.LINE_4)
        
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255))
    
    
        if ret == True:
            cv2.imshow('VIDGA Emotion Recognition', frame) 
            if cv2.waitKey(2) & 0xFF == ord('q'):
                break
        
cap.release()
cv2.destroyAllWindows

【问题讨论】:

是打印(预测)什么是打印张量(0,设备='cuda:0')。因为如果是这样,Predictions 就不是一个数字而是一个张量。 【参考方案1】:

Predictions 是张量,因此您需要它的值而不是张量本身。

改变这一行:

Predictions = torch.argmax(Pred)

用这个:

Predictions = torch.argmax(Pred).item()

【讨论】:

我做了那个改变,但我仍然在相机上一直保持中性 @GökhanUçar 你还在控制台中看到像“tensor(0, device='cuda:0')”这样的行吗? 不,现在是这样的:3 3 3 未检测到面部 0 3 6 6 6 您只看到中性,因为您的 imshow 仅在 if 的最后一个分支上。仅当情绪是中性时,您才显示带有文本的图像。尝试取消缩进检查 ret == True 的 if @GökhanUçar 这对 OpenCV 来说不是一件容易的事,请尝试在 SO 上搜索其他类似问题,例如 this one

以上是关于我的 OpenCV 实时网络摄像头演示没有显示准​​确的情绪的主要内容,如果未能解决你的问题,请参考以下文章

Visual Studio、OpenCV、Python - 应用程序中没有网络摄像头

Android App人脸识别中借助摄像头和OpenCV实时检测人脸讲解及实战(附源码和演示 超详细)

使用 OpenCV 和 Python 显示网络摄像头源

演示需要用于网络摄像头图像的 OpenCV。(已安装 opencv 并设置 opencv4=1)

通过网络摄像头实时采集视频,采用OpenCV识别运动物体,实现有运动物体经过时存储,没有时则不存储.

OpenCV 网络摄像头提要未在 PictureBox Visual Studio 2015 中显示