CV源码解析Yolov3-Darknet版本计算mAP

Posted 2022-07-31 mb62c788fd198da

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了CV源码解析Yolov3-Darknet版本计算mAP相关的知识，希望对你有一定的参考价值。

VOC中的xml文件

<annotation>
    <folder>VOC2012</folder>                               //文件名
    <filename>2007_000346.jpg</filename>
    <source>                                                //文件来源
        <database>The VOC2007 Database</database>
        <annotation>PASCAL VOC2007</annotation>
        <image>flickr</image>
    </source>
    <size>                                           //文件大小（宽度、高度、通道数）
        <width>500</width>
        <height>375</height>
        <depth>3</depth>
    </size>
    <segmented>1</segmented>                       //是否用于分割
    <object>                                      //检测物体说明
        <name>bottle</name>                         //所检测到的物体
        <pose>Unspecified</pose>                    //拍摄角度（未详细说明）
        <truncated>0</truncated>                     //是否被截断（0代表未被截断）
        <difficult>0</difficult>                     //目标是否难以识别（0代表容易识别）
        <bndbox>                                     //左下和右上坐标
            <xmin>124</xmin>
            <ymin>107</ymin>
            <xmax>230</xmax>
            <ymax>343</ymax>
        </bndbox>
    </object>
    <object>                       //所检测的另一物体
        <name>person</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>137</xmin>
            <ymin>78</ymin>
            <xmax>497</xmax>
            <ymax>375</ymax>
        </bndbox>
    </object>
    <object>
        <name>person</name>
        <pose>Unspecified</pose>
        <truncated>1</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>89</xmin>
            <ymin>202</ymin>
            <xmax>129</xmax>
            <ymax>247</ymax>
        </bndbox>
    </object>
    <object>
        <name>person</name>
        <pose>Frontal</pose>
        <truncated>1</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>72</xmin>
            <ymin>209</ymin>
            <xmax>111</xmax>
            <ymax>259</ymax>
        </bndbox>
    </object>
</annotation>

View Code

其中

<segmented>1</segmented>                       //是否用于分割
    <object>                                      //检测物体说明
        <name>bottle</name>                         //所检测到的物体
        <pose>Unspecified</pose>                    //拍摄角度（未详细说明）
        <truncated>0</truncated>                     //是否被截断（0代表未被截断）
        <difficult>0</difficult>                     //目标是否难以识别（0代表容易识别）
        <bndbox>                                     //左下和右上坐标
            <xmin>124</xmin>
            <ymin>107</ymin>
            <xmax>230</xmax>
            <ymax>343</ymax>
        </bndbox>

difficult代表是否难以识别，0表示易识别，1表示难识别。通常读取时略过这类；

定制数据集计算mAP，需要根据数据集的属性，改写代码；

计算过程：

1) 选择一个类，做ap计算;

2) 对所有图片，中该类box的检测，按照预测得分从大到小排序;

3) 按照这个顺序，计算累计tp， fp，进而计算累计precision, recall;

4) 然后按照0， 0.1， 0.2 ... 0.9 1.0 ，这11各点，10个区间，得到折线图，然后得到11个顶点，进而计算这11个顶点的平均值，就得到了，这个类的ap;

5) 对所有类，计算ap，然后对这些ap加和，除以类别数，得到mAP;

【CV源码解析】Yolov3-Darknet版本计算mAP_xml

【CV源码解析】Yolov3-Darknet版本计算mAP_xml_02

import os
import numpy as np
# imagesetfile = "../tfluisee/valid.txt"
# annopath = ""
# imgfile = open(imagesetfile, r)
# lines = imgfile.readlines()
# imagename = lines[0].strip()
# print(imagename)
# imagenames = [x.strip() for x in lines]
# # annopath = .xml
# # annopath.format(imagename)
# 
# # parse_tfluisee_anno
# annoname = imagename.replace(images, labels)
# annoname = annoname.replace(png, txt)
# annofile = open(annoname, r)
# annolines = annofile.readlines()
# annoline = annolines[0].strip().split( )
# clas = tuple(map(int, annoline[0]))
# bbox = tuple(map(float, annoline[1:]))

# det
img_w = 1280
img_h = 720
# tfl_label = circle_green:0, circle_red:1, circle_yellow:2, circle_off:3, left_green:4, left_red:5, left_yellow:6, left_off:7, nomotor_green:8, nomotor_red:9, nomotor_yellow:10, nomotor_off:11
tfl_label = [circle_green, circle_red, circle_yellow, circle_off, left_green, left_red, left_yellow, left_off, nomotor_green, nomotor_red, nomotor_yellow, nomotor_off]
def parse_tfluisee(filename):
    """
    Parse a TFL uisee txt file
    anno bbox: xc, yc, w, h
    """
    objects = []
    annofile = open(filename, r)
    annolines = annofile.readlines()
    for i, annoline in enumerate(annolines):
        obj_struct = 
        annoline = annoline.strip().split( )
        obj_struct[name] = tfl_label[tuple(map(int, annoline[0]))[0]]
        bbox = tuple(map(float, annoline[1:]))
        # (xc, yc, w, h) ---> (xmin, ymin, xmax, ymax)
        xmin = (bbox[0] - bbox[2]*0.5) * img_w
        xmax = (bbox[0] + bbox[2]*0.5) * img_w
        ymin = (bbox[1] - bbox[3]*0.5) * img_h
        ymax = (bbox[1] + bbox[3]*0.5) * img_h
        if xmin < 0:
            xmin = 0
        if ymin < 0:
            ymin = 0
        if xmax > img_w-1:
            xmax = img_w-1
        if ymax > img_h-1:
            ymax = img_h-1
        obj_struct[bbox] = [int(xmin), int(ymin), int(xmax), int(ymax)]
        obj_struct[difficult] = 0 # 0-easy, 1-difficult
        objects.append(obj_struct)
    return objects

def voc_ap(rec, prec, use_07_metric=False):
    """ ap = voc_ap(rec, prec, [use_07_metric])
    Compute VOC AP given precision and recall.
    If use_07_metric is true, uses the
    VOC 07 11 point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

def tfl_eval(detpath, annopath, datasetfile, classname, ovthresh, use_07_metric=False):
    print(tfl eval of class: , classname)
    imgfile = open(datasetfile, r)
    imglines = imgfile.readlines()
    imagenames = [x.strip() for x in imglines]
    # load annots
    recs = 
    for i, filename in enumerate(imagenames):
        imagename = filename.split(/)[-1].split(.)[-2]
        annofile = filename.replace(images, labels)
        annofile = annofile.replace(png, txt)
        recs[imagename] = parse_tfluisee(annofile)
    # extra gt object for this class
    class_recs = 
    npos = 0
    for i, filename in enumerate(imagenames):
        imagename = filename.split(/)[-1].split(.)[-2]
        R = [obj for obj in recs[imagename] if obj[name] == classname]
        # print(R: , R)
        bbox = np.array([x[bbox] for x in R])
        difficult = np.array([x[difficult] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        # print(det:  \\t bbox: .format(det, bbox))
        class_recs[imagename] = bbox: bbox, difficult: difficult, det: det
    # read dets
    detfile = detpath.format(classname)
    # print(detfile: , detfile)
    with open(detfile, r) as f:
        lines = f.readlines()
    splitlines = [x.strip().split( ) for x in lines]    
    print(det image len: , len(splitlines))
    if len(splitlines)==0:
       return 0, 0, 0
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]
    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R[bbox].astype(float)
        # print(BBGT: , len(BBGT))

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            # print(inters:  \\t uni:  \\n.format(inters, uni))
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)
            # print(name:  \\n ---overlaps:  \\t ovmax:  \\t jmax: \\n.format(image_ids[d], overlaps, ovmax, jmax))

        if ovmax > ovthresh:
            if not R[difficult][jmax]:
                if not R[det][jmax]:
                    tp[d] = 1.
                    R[det][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    # print(before npos: \\t fp: \\t, tp: .format(npos, fp, tp))
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    # print(npos: \\t fp: \\t, tp: .format(npos, fp, tp))
    if npos>0:
        rec = tp / float(npos)
    else:
        print(********************************************classname: , classname)
        return 0, 0, 0 
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap

if __name__ == "__main__":
    path = os.getcwd()
    # single class
    detpath = os.path.join(path, .., results/.txt)
    annopath = os.path.join(path, .., tfluisee/valid.txt)
    datasetfile = os.path.join(path, .., tfluisee/valid.txt)
    thresh = 0.5
    classname = circle_green
    use_07_metric = True
    recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
    # print(use_07_metric: \\tclassname: \\t recall:\\t precision: \\t ap: \\t.format(use_07_metric, classname, recall, precision, ap))
    
    # multi class
    resultspath = os.path.join(path, .., results)
    subfiles = os.listdir(resultspath)
    mAP = []
    for i in range(len(subfiles)):
        classname = subfiles[i].split(.txt)[0]
        recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
        # print(classname: \\t recall:\\t precision: \\t ap: \\t.format(classname, recall, precision, ap))
        mAP.append(ap)
    mAP = tuple(mAP)
    print(AP: \\nmAP: .format(mAP, float(sum(mAP)/len(mAP))))
    
    print(***************FalseFalse**************************************************)
    use_07_metric = False
    recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
    # print(use_07_metric: \\tclassname: \\t recall:\\t precision: \\t ap: \\t.format(use_07_metric, classname, recall, precision, ap))
    
    # multi class
    resultspath = os.path.join(path, .., results)
    subfiles = os.listdir(resultspath)
    mAP = []
    for i in range(len(subfiles)):
        classname = subfiles[i].split(.txt)[0]
        recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
        # print(classname: \\t recall:\\t precision: \\t ap: \\t.format(classname, recall, precision, ap))
        mAP.append(ap)
    mAP = tuple(mAP)
    print(AP: \\nmAP: .format(mAP, float(sum(mAP)/len(mAP))))

View Code

对红绿灯的颜色进行评估

【CV源码解析】Yolov3-Darknet版本计算mAP_xml

【CV源码解析】Yolov3-Darknet版本计算mAP_xml_02

import os
import numpy as np
# imagesetfile = "../tfluisee/valid.txt"
# annopath = ""
# imgfile = open(imagesetfile, r)
# lines = imgfile.readlines()
# imagename = lines[0].strip()
# print(imagename)
# imagenames = [x.strip() for x in lines]
# # annopath = .xml
# # annopath.format(imagename)
# 
# # parse_tfluisee_anno
# annoname = imagename.replace(images, labels)
# annoname = annoname.replace(png, txt)
# annofile = open(annoname, r)
# annolines = annofile.readlines()
# annoline = annolines[0].strip().split( )
# clas = tuple(map(int, annoline[0]))
# bbox = tuple(map(float, annoline[1:]))

# det
img_w = 1280
img_h = 720
# tfl_label = circle_green:0, circle_red:1, circle_yellow:2, circle_off:3, left_green:4, left_red:5, left_yellow:6, left_off:7, nomotor_green:8, nomotor_red:9, nomotor_yellow:10, nomotor_off:11
tfl_label = [circle_green, circle_red, circle_yellow, circle_off, left_green, left_red, left_yellow, left_off, nomotor_green, nomotor_red, nomotor_yellow, nomotor_off]
def parse_tfluisee(filename):
    """
    Parse a TFL uisee txt file
    anno bbox: xc, yc, w, h
    """
    objects = []
    annofile = open(filename, r)
    annolines = annofile.readlines()
    for i, annoline in enumerate(annolines):
        obj_struct = 
        annoline = annoline.strip().split( )
        obj_struct[name] = tfl_label[tuple(map(int, annoline[0]))[0]]
        bbox = tuple(map(float, annoline[1:]))
        # (xc, yc, w, h) ---> (xmin, ymin, xmax, ymax)
        xmin = (bbox[0] - bbox[2]*0.5) * img_w
        xmax = (bbox[0] + bbox[2]*0.5) * img_w
        ymin = (bbox[1] - bbox[3]*0.5) * img_h
        ymax = (bbox[1] + bbox[3]*0.5) * img_h
        if xmin < 0:
            xmin = 0
        if ymin < 0:
            ymin = 0
        if xmax > img_w-1:
            xmax = img_w-1
        if ymax > img_h-1:
            ymax = img_h-1
        obj_struct[bbox] = [int(xmin), int(ymin), int(xmax), int(ymax)]
        obj_struct[difficult] = 0 # 0-easy, 1-difficult
        objects.append(obj_struct)
    return objects

def voc_ap(rec, prec, use_07_metric=False):
    """ ap = voc_ap(rec, prec, [use_07_metric])
    Compute VOC AP given precision and recall.
    If use_07_metric is true, uses the
    VOC 07 11 point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0., 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

def tfl_eval(detpath, annopath, datasetfile, classname, ovthresh, use_07_metric=False):
    # print(tfl eval of class: , classname)
    imgfile = open(datasetfile, r)
    imglines = imgfile.readlines()
    imagenames = [x.strip() for x in imglines]
    # load annots
    recs = 
    for i, filename in enumerate(imagenames):
        imagename = filename.split(/)[-1].split(.)[-2]
        annofile = filename.replace(images, labels)
        annofile = annofile.replace(png, txt)
        recs[imagename] = parse_tfluisee(annofile)
    # extra gt object for this class
    class_recs = 
    npos = 0
    if classname==red:
        subname = [circle_red, left_red, nomotor_red] 
    elif classname==green:
        subname = [circle_green, left_green, nomotor_green] 
    elif classname==yellow:
        subname = [circle_yellow, left_yellow, nomotor_yellow] 
    for i, filename in enumerate(imagenames):
        imagename = filename.split(/)[-1].split(.)[-2]
        R = [obj for obj in recs[imagename] if obj[name] in subname]
        # print(R: , R)
        bbox = np.array([x[bbox] for x in R])
        difficult = np.array([x[difficult] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        # print(det:  \\t bbox: .format(det, bbox))
        class_recs[imagename] = bbox: bbox, difficult: difficult, det: det
    # read dets
    splitlines = []
    for i in range(len(subname)):
        detfile = detpath.format(subname[i])
        # print(detfile: , detfile)
        with open(detfile, r) as f:
            lines = f.readlines()
            splitlines += [x.strip().split( ) for x in lines]  
        # print(det image len: , len(splitlines))
    # print(det image len all**********************: , len(splitlines))
    if len(splitlines)==0:
       return 0, 0, 0
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]
    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R[bbox].astype(float)
        # print(BBGT: , len(BBGT))

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            # print(inters:  \\t uni:  \\n.format(inters, uni))
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)
            # print(name:  \\n ---overlaps:  \\t ovmax:  \\t jmax: \\n.format(image_ids[d], overlaps, ovmax, jmax))

        if ovmax > ovthresh:
            if not R[difficult][jmax]:
                if not R[det][jmax]:
                    tp[d] = 1.
                    R[det][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    # print(before npos: \\t fp: \\t, tp: .format(npos, fp, tp))
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    # print(npos: \\t fp: \\t, tp: .format(npos, fp, tp))
    if npos>0:
        rec = tp / float(npos)
    else:
        # print(********************************************classname: , classname)
        return 0, 0, 0 
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap

if __name__ == "__main__":
    path = os.getcwd()
    # single class
    detpath = os.path.join(path, .., results/.txt)
    annopath = os.path.join(path, .., tfluisee/valid.txt)
    datasetfile = os.path.join(path, .., tfluisee/valid.txt)
    thresh = 0.45
    classname = red
    use_07_metric = True
    print(thresh: \\t use_07_metric: .format(thresh, use_07_metric))
    recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
    # print(use_07_metric: \\tclassname: \\t recall:\\t precision: \\t ap: \\t.format(use_07_metric, classname, recall, precision, ap))
    
    # multi class
    resultspath = os.path.join(path, .., results)
    subfiles = os.listdir(resultspath)
    mAP = []
    color = [red, green, yellow]
    for i in range(len(color)):
        # classname = subfiles[i].split(.txt)[0]
        classname = color[i]
        recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
        # print(classname: \\t recall:\\t precision: \\t ap: \\t.format(classname, recall, precision, ap))
        mAP.append(ap)
    mAP = tuple(mAP)
    print(AP: \\nmAP: .format(mAP, float(sum(mAP)/len(mAP))))
    
    # use_07_metric = False
    # print(thresh: \\t use_07_metric: .format(thresh, use_07_metric))
    # recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
    # # print(use_07_metric: \\tclassname: \\t recall:\\t precision: \\t ap: \\t.format(use_07_metric, classname, recall, precision, ap))
    # 
    # # multi class
    # resultspath = os.path.join(path, .., results)
    # subfiles = os.listdir(resultspath)
    # mAP = []
    # for i in range(len(subfiles)):
    #     classname = subfiles[i].split(.txt)[0]
    #     recall, precision, ap = tfl_eval(detpath, annopath, datasetfile, classname, thresh, use_07_metric)
    #     # print(classname: \\t recall:\\t precision: \\t ap: \\t.format(classname, recall, precision, ap))
    #     mAP.append(ap)
    # mAP = tuple(mAP)
    # print(AP: \\nmAP: .format(mAP, float(sum(mAP)/len(mAP))))