我是计算机视觉方面的新手。我正在尝试使用基于深度学习 dnn 模块的人脸检测部分来实现具有本地二进制模式的实时人脸识别。我正在使用 caltech_faces 数据集,并添加了一个包含我的 20 张照片的文件夹。



predName = le.inverse_transform([predictions[i]])[0]
TabError: inconsistent use of tabs and spaces in indentation


# import the necessary packages

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils.video import VideoStream
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import imutils
import time
import cv2
import os

#Creating our face detector

def detect_faces(net, frame, minConfidence=0.5):
    # grab the dimensions of the image and then construct a blob
    # from it
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
        (104.0, 177.0, 123.0))

    # pass the blob through the network to obtain the face detections,
    # then initialize a list to store the predicted bounding boxes
    detections = net.forward()
    boxes = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the detection
        confidence = detections[0, 0, i, 2]

        # filter out weak detections by ensuring the confidence is
        # greater than the minimum confidence
        if confidence > minConfidence:
            # compute the (x, y)-coordinates of the bounding box for
            # the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # update our bounding box results list
            boxes.append((startX, startY, endX, endY))

    # return the face detection bounding boxes
    return boxes

#Loading the CALTECH Faces dataset

def load_face_dataset(inputPath, net, minConfidence=0.5,
    # grab the paths to all images in our input directory, extract
    # the name of the person (i.e., class label) from the directory
    # structure, and count the number of example images we have per
    # face
    imagePaths = list(paths.list_images(inputPath))
    names = [p.split(os.path.sep)[-2] for p in imagePaths]
    (names, counts) = np.unique(names, return_counts=True)
    names = names.tolist()

    # initialize lists to store our extracted faces and associated
    # labels
    faces = []
    labels = []

    # loop over the image paths
    for imagePath in imagePaths:
        # load the image from disk and extract the name of the person
        # from the subdirectory structure
        frame = cv2.imread(imagePath)
        name = imagePath.split(os.path.sep)[-2]

        # only process images that have a sufficient number of
        # examples belonging to the class
        if counts[names.index(name)] < minSamples:

        # perform face detection
        boxes = detect_faces(net, frame, minConfidence)

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # extract the face ROI, resize it, and convert it to
            # grayscale
            faceROI = frame[startY:endY, startX:endX]
            faceROI = cv2.resize(faceROI, (47, 62))
            faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)

            # update our faces and labels lists

    # convert our faces and labels lists to NumPy arrays
    faces = np.array(faces)
    labels = np.array(labels)

    # return a 2-tuple of the faces and labels
    return (faces, labels)

#Implementing Local Binary Patterns for face recognition    

# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-i", "--input", type=str, required=True,
#   help="path to input directory of images")
# ap.add_argument("-f", "--face", type=str,
#   default="face_detector",
#   help="path to face detector model directory")
# ap.add_argument("-c", "--confidence", type=float, default=0.5,
#   help="minimum probability to filter weak detections")
# args = vars(ap.parse_args())

# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = 
    "input": "caltech_faces",
    "face": "face_detector",
    "confidence": 0.5,

# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
net = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the CALTECH faces dataset
print("[INFO] loading dataset...")
(faces, labels) = load_face_dataset(args["input"], net,
    minConfidence=0.5, minSamples=20)
print("[INFO]  images in dataset".format(len(faces)))

# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# construct our training and testing split
(trainX, testX, trainY, testY) = train_test_split(faces,
    labels, test_size=0.25, stratify=labels, random_state=42)

# train our LBP face recognizer
print("[INFO] training face recognizer...")
recognizer = cv2.face.LBPHFaceRecognizer_create(
    radius=2, neighbors=16, grid_x=8, grid_y=8)
start = time.time()
recognizer.train(trainX, trainY)
end = time.time()
print("[INFO] training took :.4f seconds".format(end - start))

# initialize the list of predictions and confidence scores
print("[INFO] gathering predictions...")
predictions = []
confidence = []
start = time.time()

# loop over the test data
for i in range(0, len(testX)):
    # classify the face and update the list of predictions and
    # confidence scores
    (prediction, conf) = recognizer.predict(testX[i])

# measure how long making predictions took
end = time.time()
print("[INFO] inference took :.4f seconds".format(end - start))

# show the classification report
print(classification_report(testY, predictions,

# initialize the video stream and allow the cammera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()

# loop over the frames from the video stream
while True:

    # grab the frame from the threaded video stream and resize it
    # to have a maximum width of 400 pixels
    face = vs.read()
    face = imutils.resize(face, width=400)

    # loop over the detections
    for i in range(0, detections.shape[2]):

        # grab the predicted name and actual name
    predName = le.inverse_transform([predictions[i]])[0]
    actualName = le.classes_[testY[i]]

    # draw the predicted name and actual name on the image
    cv2.putText(face, "pred: ".format(predName), (5, 25),
    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.putText(face, "actual: ".format(actualName), (5, 60),
    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # display the predicted name, actual name, and confidence of the
    # prediction (i.e., chi-squared distance; the *lower* the distance
    # is the *more confident* the prediction is)
    print("[INFO] prediction: , actual: , confidence: :.2f".format(predName, actualName, confidence[i]))

# show the output frame
cv2.imshow("Face", face)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):


您有一个 for 循环,没有任何代码行,但在导致问题的行之前有一个注释:

# loop over the detections
for i in range(0, detections.shape[2]):

    # grab the predicted name and actual name
predName = le.inverse_transform([predictions[i]])[0]
actualName = le.classes_[testY[i]]

问题来自这个空循环;如果你有一个循环,你必须在里面至少有一行代码。所以删除它或在里面添加 pass 关键字。



我为此使用 google collab,首先,请确保您已安装 OpenCV。您可以使用 pip 安装它:

pip install opencv-python

在检测人脸之前,我们必须使用 google collab 打开网络摄像头。

from IPython.display import display, javascript
from google.colab.output import eval_js
from base64 import b64decode
def take_photo(filename='photo.jpg', quality=0.8):
js = Javascript('''
async function takePhoto(quality) 
const div = document.createElement('div');
const capture = document.createElement('button');
capture.textContent = 'Capture';
const video = document.createElement('video');
video.style.display = 'block';
const stream = await navigator.mediaDevices.getUserMedia(video: true);
video.srcObject = stream;
await video.play();
// Resize the output to fit the video element.     google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
// Wait for Capture to be clicked.
await new Promise((resolve) => capture.onclick = resolve);
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
canvas.getContext('2d').drawImage(video, 0, 0);
return canvas.toDataURL('image/jpeg', quality);

data = eval_js('takePhoto()'.format(quality))
binary = b64decode(data.split(',')[1])
with open(filename, 'wb') as f:
return filename


from IPython.display import Image
filename = take_photo()
print('Saved to '.format(filename))
# Show the image which was just taken.
except Exception as err:
# Errors will be thrown if the user does not have a webcam or if they do 
# grant the page permission to access it.

运行这两个代码后,网络摄像头打开,您可以拍摄照片。 照片保存为 photo.jpg。

使用 Haar 级联的人脸检测是一种基于机器学习的方法,其中使用一组输入数据训练级联函数。 OpenCV 已经包含许多针对面部、眼睛、微笑等的预训练分类器。今天我们将使用面部分类器。您也可以尝试使用其他分类器。


