yolotv5和resnet152模型预测

Posted Wchime

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了yolotv5和resnet152模型预测相关的知识,希望对你有一定的参考价值。

我已经训练完成了yolov5检测和resnet152分类的模型,下面开始对一张图片进行检测分类。

首先用yolo算法对猫和狗进行检测,然后将检测到的目标进行裁剪,然后用resnet152对裁剪的图片进行分类。

首先我有以下这些训练好的模型

 猫狗检测的,猫的分类,狗的分类

 

我的预测文件my_detect.py

import os
import sys
from pathlib import Path

from tools_detect import draw_box_and_save_img, dataLoad, predict_classify, detect_img_2_classify_img, get_time_uuid

FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

from models.common import DetectMultiBackend
from utils.general import (non_max_suppression)
from utils.plots import save_one_box

import config as cfg

conf_thres = cfg.conf_thres
iou_thres = cfg.iou_thres

detect_size = cfg.detect_img_size
classify_size = cfg.classify_img_size


def detect_img(img, device, detect_weights=\'\', detect_class=[], save_dir=\'\'):
    # 选择计算设备
    # device = select_device(device)
    # 加载数据
    imgsz = (detect_size, detect_size)
    im0s, im = dataLoad(img, imgsz, device)
    # print(im0)
    # print(im)
    # 加载模型
    model = DetectMultiBackend(detect_weights, device=device)
    stride, names, pt = model.stride, model.names, model.pt
    # print((1, 3, *imgsz))
    model.warmup(imgsz=(1, 3, *imgsz))  # warmup

    pred = model(im, augment=False, visualize=False)
    # print(pred)
    pred = non_max_suppression(pred, conf_thres, iou_thres, None, False, max_det=1000)
    # print(pred)
    im0 = im0s.copy()
    # 画框,保存图片
    # ret_bytes= None
    ret_bytes = draw_box_and_save_img(pred, names, detect_class, save_dir, im0, im)
    ret_li = list()
    # print(pred)
    im0_arc = int(im0.shape[0]) * int(im0.shape[1])
    count = 1
    for det in reversed(pred[0]):
        # print(det)
        # print(det)
        # 目标太小跳过
        xyxy_arc = (int(det[2]) - int(det[0])) * (int(det[3]) - int(det[1]))
        # print(xyxy_arc)
        if xyxy_arc / im0_arc < 0.01:
            continue
        # 裁剪图片
        xyxy = det[:4]
        im_crop = save_one_box(xyxy, im0, file=Path(\'im.jpg\'), gain=1.1, pad=10, square=False, BGR=False, save=False)
        # 将裁剪的图片转为分类的大小及tensor类型
        im_crop = detect_img_2_classify_img(im_crop, classify_size, device)

        d = dict()
        # print(det)
        c = int(det[-1])
        label = detect_class[c]
        # 开始做具体分类
        if label == detect_class[0]:
            classify_predict = predict_classify(cfg.cat_weight, im_crop, device)
            classify_label = cfg.cat_class[int(classify_predict)]
        else:
            classify_predict = predict_classify(cfg.dog_weight, im_crop, device)
            classify_label = cfg.dog_class[int(classify_predict)]
        # print(classify_label)
        d[\'details\'] = classify_label
        conf = round(float(det[-2]), 2)
        d[\'label\'] = label+str(count)
        d[\'conf\'] = conf
        ret_li.append(d)
        count += 1

    return ret_li, ret_bytes


def start_predict(img, save_dir=\'\'):
    weights = cfg.detect_weight
    detect_class = cfg.detect_class
    device = cfg.device
    ret_li, ret_bytes = detect_img(img, device, weights, detect_class, save_dir)
    # print(ret_li)
    return ret_li, ret_bytes


if __name__ == \'__main__\':
    name = get_time_uuid()
    save_dir = f\'./save/name.jpg\'
    # path = r\'./test_img/hashiqi20230312_00010.jpg\'
    path = r\'./test_img/hashiqi20230312_00116.jpg\'
    # path = r\'./test_img/kejiquan20230312_00046.jpg\'
    f = open(path, \'rb\')
    img = f.read()
    f.close()
    # print(img)
    # print(type(img))
    img_ret_li, img_bytes = start_predict(img, save_dir=save_dir)
    print(img_ret_li)

 

我的tools_detect.py文件

import datetime
import os
import random
import sys
import time
from pathlib import Path

import torch
from PIL import Image
from torch import nn

from utils.augmentations import letterbox

FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

from utils.general import (cv2,
                           scale_boxes, xyxy2xywh)
from utils.plots import Annotator, colors
import numpy as np

def bytes_to_ndarray(byte_img):
    """
    图片二进制转numpy格式
    """
    image = np.asarray(bytearray(byte_img), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    return image


def ndarray_to_bytes(ndarray_img):
    """
    图片numpy格式转二进制
    """
    ret, buf = cv2.imencode(".jpg", ndarray_img)
    img_bin = Image.fromarray(np.uint8(buf)).tobytes()
    # print(type(img_bin))
    return img_bin

def get_time_uuid():
    """
        :return: 20220525140635467912
        :PS :并发较高时尾部随机数增加
    """
    uid = str(datetime.datetime.fromtimestamp(time.time())).replace("-", "").replace(" ", "").replace(":","").replace(".", "") + str(random.randint(100, 999))
    return uid


def dataLoad(img, img_size, device, half=False):
    image = bytes_to_ndarray(img)
    # print(image.shape)
    im = letterbox(image, img_size)[0]  # padded resize
    im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
    im = np.ascontiguousarray(im)  # contiguous

    im = torch.from_numpy(im).to(device)
    im = im.half() if half else im.float()  # uint8 to fp16/32
    im /= 255  # 0 - 255 to 0.0 - 1.0
    if len(im.shape) == 3:
        im = im[None]  # expand for batch dim

    return image, im


def draw_box_and_save_img(pred, names, class_names, save_dir, im0, im):

    save_path = save_dir
    fontpath = "./simsun.ttc"
    for i, det in enumerate(pred):
        annotator = Annotator(im0, line_width=3, example=str(names), font=fontpath, pil=True)
        if len(det):
            det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
            count = 1
            im0_arc = int(im0.shape[0]) * int(im0.shape[1])
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            base_path = os.path.split(save_path)[0]
            file_name = os.path.split(save_path)[1].split(\'.\')[0]
            txt_path = os.path.join(base_path, \'labels\')
            if not os.path.exists(txt_path):
                os.mkdir(txt_path)
            txt_path = os.path.join(txt_path, file_name)
            for *xyxy, conf, cls in reversed(det):
                # 目标太小跳过
                xyxy_arc = (int(xyxy[2]) - int(xyxy[0])) * (int(xyxy[3]) - int(xyxy[1]))
                # print(im0.shape, xyxy, xyxy_arc, im0_arc, xyxy_arc / im0_arc)
                if xyxy_arc / im0_arc < 0.01:
                    continue
                # print(im0.shape, xyxy)
                c = int(cls)  # integer class
                label = f"class_names[c]count round(float(conf), 2)" #  .encode(\'utf-8\')
                # print(xyxy)
                annotator.box_label(xyxy, label, color=colors(c, True))

                im0 = annotator.result()
                count += 1
                # print(im0)

                # print(type(im0))
                # im0 为 numpy.ndarray类型

                # Write to file
                # print(\'+++++++++++\')
                xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                # print(xywh)
                line = (cls, *xywh)  # label format
                with open(f\'txt_path.txt\', \'a\') as f:
                    f.write((\'%g \' * len(line)).rstrip() % line + \'\\n\')
    cv2.imwrite(save_path, im0)

    ret_bytes = ndarray_to_bytes(im0)
    return ret_bytes


def predict_classify(model_path, img, device):
    # im = torch.nn.functional.interpolate(img, (160, 160), mode=\'bilinear\', align_corners=True)
    # print(device)
    if torch.cuda.is_available():
        model = torch.load(model_path)
    else:
        model = torch.load(model_path, map_location=\'cpu\')
    # print(help(model))
    model.to(device)
    model.eval()
    predicts = model(img)
    _, preds = torch.max(predicts, 1)
    pred = torch.squeeze(preds)
    # print(pred)
    return pred


def detect_img_2_classify_img(img, classify_size, device):
    im_crop1 = img.copy()
    im_crop1 = np.float32(im_crop1)
    image = cv2.resize(im_crop1, (classify_size, classify_size))
    image = image.transpose((2, 0, 1))
    im = torch.from_numpy(image).unsqueeze(0)
    im_crop = im.to(device)
    return im_crop

 

我的config.py文件

import torch
import os

base_path = r\'.\\weights\'

detect_weight = os.path.join(base_path, r\'cat_dog_detect/best.pt\')
detect_class = [\'\', \'\']

cat_weight = os.path.join(base_path, r\'cat_predict/best.pt\')
cat_class = [\'东方短毛猫\', \'亚洲豹猫\', \'加菲猫\', \'安哥拉猫\', \'布偶猫\', \'德文卷毛猫\', \'折耳猫\', \'无毛猫\', \'暹罗猫\', \'森林猫\', \'橘猫\', \'奶牛猫\', \'狞猫\', \'狮子猫\', \'狸花猫\', \'玳瑁猫\', \'白猫\', \'蓝猫\', \'蓝白猫\', \'薮猫\', \'金渐层猫\', \'阿比西尼亚猫\', \'黑猫\']

dog_weight = os.path.join(base_path, r\'dog_predict/best.pt\')
dog_class = [\'中华田园犬\', \'博美犬\', \'吉娃娃\', \'哈士奇\', \'喜乐蒂\', \'巴哥犬\', \'德牧\', \'拉布拉多犬\', \'杜宾犬\', \'松狮犬\', \'柯基犬\', \'柴犬\', \'比格犬\', \'比熊\', \'法国斗牛犬\', \'秋田犬\', \'约克夏\', \'罗威纳犬\', \'腊肠犬\', \'萨摩耶\', \'西高地白梗犬\', \'贵宾犬\', \'边境牧羊犬\', \'金毛犬\', \'阿拉斯加犬\', \'雪纳瑞\', \'马尔济斯犬\']

# device = 0
# device = torch.device(\'cuda\' if torch.cuda.is_available() else \'cpu\')
device = torch.device(\'cpu\')
conf_thres = 0.5
iou_thres = 0.45

detect_img_size = 416
classify_img_size = 160

 

整体文件结构

 其中models和utils文件夹都是yolov5源码的文件

运行my_detect.py的结果

 

paddlepaddle十二生肖分类之模型训练和预测

导读

这篇文章我们来介绍如何来使用paddlepaddle来训练一个十二生肖的分类模型,前面两篇文章我们分别介绍了
paddlepaddle实现十二生肖的分类之数据的预处理(一)
paddlepaddle十二生肖分类之模型(ResNet)构建(二)
这篇文章我们主要来介绍一下如何进行模型的训练,以及预测

模型训练及预测

  • 导包
import os
import paddle
from paddle.vision import transforms
from PIL import Image
import numpy as np
  • 数据加载器
class ZodiacDatasets(paddle.io.Dataset):
    """
    加载十二生肖数据
    """
    def __init__(self,mode="train",data_root="data/signs",img_size=(224,224)):
        super(ZodiacDatasets, self).__init__()
        self.data_root = data_root
        #判断mode是否正确
        if mode not in ["train","valid","test"]:
            assert(" is illegal,mode need is one of train,valid,test")
        #获取数据集的目录
        self._data_dir_path = os.path.join(data_root,mode)
        #获取十二生肖的类别名称
        self._zodiac_names = sorted(os.listdir(self._data_dir_path))
        #用来保存图片的路径
        self._img_path_list = []
        for name in self._zodiac_names:
            img_dir_path = os.path.join(self._data_dir_path,name)
            img_name_list = os.listdir(img_dir_path)
            for img_name in img_name_list:
                img_path = os.path.join(img_dir_path,img_name)
                self._img_path_list.append(img_path)
        #定义图像的预处理函数
        if mode == "train":
            self._transform = transforms.Compose([
                transforms.RandomResizedCrop(img_size),   #缩放图片并随机裁剪图片为指定shape
                transforms.RandomHorizontalFlip(0.5),     #随机水平翻转图片的概率为0.5
                transforms.ToTensor(),                    #转换图片的格式由HWC ==> CHW
                transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])  #图片通道像素的标准化
            ])
        else:
            self._transform = transforms.Compose([
                transforms.Resize(256),
                transforms.RandomCrop(img_size),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
            ])
    def __getitem__(self,index):
        """根据index获取图片数据
        """
        #获取图片的路径
        img_path = self._img_path_list[index]
        #获取图片的标签
        img_label = img_path.split("/")[-2]
        #将生肖的标签名称转换为数字标签
        label_index = self._zodiac_names.index(img_label)
        #读取图片
        img = Image.open(img_path)
        if img.mode != "RGB":
            img = img.convert("RGB")
        #图片的预处理
        img = self._transform(img)
        return img,np.array(label_index,dtype=np.int64)

    def __len__(self):
        """获取数据集的大小
        """
        return len(self._img_path_list)
  • 删除非图片格式的文件以及被损坏的图片
import tqdm

def check_image(mode="train"):
    #加载数据集
    datasets = ZodiacDatasets(mode)
    #获取数据集中所有的图片路径
    img_path_list = datasets._img_path_list
    #遍历数据集中的所有图片
    for index,img_path in enumerate(tqdm.tqdm(img_path_list)):
        try:
            img,img_label = datasets[index]
        except Exception as e:
            print("remove image path:".format(img_path))
            #删除数据异常的图片
            os.remove(img_path)

#检查训练集,测试集和验证集中的图片数据是否正确
check_image("train")
check_image("valid")
check_image("test")
  • 定义模型结构
import paddle
from paddle import nn

class BasicBlock(nn.Layer):
    expansion = 1

    def __init__(self,inchannels,channels,stride=1,downsample=None,
                 groups=1,base_width=64,dilation=1,norm_layer=None):
        """resnet18和resnet32的block
        :param inchannels:block输入的通道数
        :param channels:block输出的通道数
        :param stride:卷积移动的步长
        :param downsample:下采样
        :param groups:
        :param base_width:
        :param dilation:
        :param norm_layer: 标准化
        """
        super(BasicBlock, self).__init__()
        if norm_layer is not None:
            norm_layer = nn.BatchNorm2D
        if dilation > 1:
            raise("BasicBlock not support dilation > 1")
        #bias_attr设置为False表示卷积没有偏置项
        self.conv1 = nn.Conv2D(inchannels,channels,3,padding=1,
                               stride=stride,bias_attr=False)
        self.bn1 = norm_layer(channels)
        # stride默认为1,kernel_size为3,padding为1等价于same的卷积
        self.conv2 = nn.Conv2D(channels,channels,3,padding=1,bias_attr=False)
        self.bn2 = norm_layer(channels)

        self.relu = nn.ReLU()
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        input = x

        #block的第一层卷积
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        #block的第二层卷积
        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            input = self.downsample(x)

        #残差块
        out += input
        out = self.relu(out)

        return out

class BottleneckBlock(nn.Layer):

    expansion = 4

    def __init__(self,inchannels,channels,stride=1,downsample=None,
                 groups=1,base_width=64,dilation=1,norm_layer=None):
        """resnet50/101/151的block
        :param inchannels:block的输入通道数
        :param channels:block的输出通道数
        :param stride:卷积的步长
        :param downsample:下采样
        :param groups:
        :param base_width:
        :param dilation:
        :param norm_layer:Batch Norm层
        """
        super(BottleneckBlock, self).__init__()
        #是否使用了BatchNorm
        if norm_layer is None:
            norm_layer = nn.BatchNorm2D
        #计算block第一层卷积的输出通道数
        width = int(channels * (base_width / 64)) * groups
        
        #block的第一层卷积
        self.conv1 = nn.Conv2D(inchannels,width,1,bias_attr=False)
        self.bn1 = norm_layer(width)
        
        #block的第二层卷积
        self.conv2 = nn.Conv2D(width,width,3,
                               padding=dilation,
                               stride=stride,
                               dilation=dilation,
                               bias_attr=False)
        self.bn2 = norm_layer(width)
        
        #block的第三层卷积
        self.conv3 = nn.Conv2D(width,channels*self.expansion,1,bias_attr=False)
        self.bn3 = norm_layer(channels * self.expansion)

        self.relu = nn.ReLU()
        self.downsample = downsample
        self.stride = stride

    def forward(self,x):
        input = x
        
        #第一层卷积
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        #第二层卷积
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
    
        #第三层卷积
        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            input = self.downsample(x)
        
        #残差块
        out += input
        out = self.relu(out)
        
        return out


class ResNet(nn.Layer):
    
    def __init__(self,block,depth,num_classes=1000,with_pool=True):
        super(ResNet, self).__init__()
        layer_cfg = 
            18:[2,2,2,2],
            34:[3,4,6,3],
            50:[3,4,6,3],
            101:[3,4,23,3],
            151:[3,8,36,3]
        
        layers = layer_cfg[depth]
        self.num_classes = num_classes
        self.with_pool = with_pool
        self._norm_layer = nn.BatchNorm2D

        self.inchannels = 64
        self.dilation = 1

        self.conv1 = nn.Conv2D(3,self.inchannels,kernel_size=7,
                               stride=2,padding=3,bias_attr=False)
        self.bn1 = self._norm_layer(self.inchannels)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2D(kernel_size=3,stride=2,padding=1)
        
        #ResNet第一层
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        #全局平均池化层
        if with_pool:
            self.avgpool = nn.AdaptiveAvgPool2D((1, 1))

        if num_classes > 0:
            self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self,block,channels,blocks,stride=1,dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation

        if dilate:
            self.dilation *= stride
            stride = 1

        if stride != 1 or self.inchannels != channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2D(
                    self.inchannels,
                    channels * block.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias_attr=False
                ),
                norm_layer(channels * block.expansion)
            )

        layers = []
        layers.append(block(self.inchannels,channels,stride,downsample,1,64,
                            previous_dilation,norm_layer))
        self.inchannels = channels * block.expansion

        for _ in range(1,blocks):
            layers.append(block(self.inchannels,channels,norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        if self.with_pool:
            x = self.avgpool(x)

        if self.num_classes > 0:
            x = paddle.flatten(x,1)
            x = self.fc(x)

        return x

from paddle import summary

#构建ResNet50
resnet50 = ResNet(BottleneckBlock,50)
#打印ResNet网络结构
summary(resnet50,(1,3,224,224))

  • 模型训练

在训练模型的时候,我采用的是ResNet50来训练的,指训练的了20个epoch,如果想要更高的模型精度,可以尝试加epoch增大和增加网络的层数,也可以通过多堆叠几个不同的模型来提高最终的精度

#定义网络结构,并且设置类别的数量
network = ResNet(BottleneckBlock,50,num_classes=12)
model = paddle.Model(network)

#获取训练数据和验证数据
#加载训练集
train_datasets = ZodiacDatasets(mode="train")
#加载验证集
valid_datasets = ZodiacDatasets(mode="valid")
#加载测试集
test_datasets = ZodiacDatasets(mode="test")

#定义训练的轮数
epochs = 20
#设置学习率
learning_rate = 0.01
#设置batch size
batch_size = 128
#设置权重衰减
L2_decay_factor = 0.000001

step_each_epoch = len(train_datasets) // batch_size
#使用余弦退火来调整学习率
lr = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=learning_rate,
                                              T_max=step_each_epoch*epochs)
optimizer = paddle.optimizer.Momentum(learning_rate=lr,
                                      parameters=network.parameters(),
                                      weight_decay=paddle.regularizer.L2Decay(L2_decay_factor))
#设置损失函数
loss = paddle.nn.CrossEntropyLoss()
#设置评估函数
evaluate_fn = paddle.metric.Accuracy(topk=(1,5))

#模型训练配置
model.prepare(optimizer,loss,evaluate_fn)
#可视化visualDL的回调函数
visualdl = paddle.callbacks.VisualDL(log_dir="visualdl_log")
#启动模型训练
model.fit(train_datasets,valid_datasets,epochs=epochs,
          batch_size=batch_size,shuffle=True,verbose=1,
          save_dir="./save_models/",callbacks=[visualdl])

  • 模型预测
import cv2
from matplotlib import pyplot as plt

#定义网络结构,并且设置类别的数量
network = ResNet(BottleneckBlock,50,num_classes=12)
model = paddle.Model(network)
#加载模型
model.load("save_models/19")
#设置模型预测环境
model.prepare()
#加载数据集
test_datasets = ZodiacDatasets("test")
#获取标签的名称
label_names = test_datasets._zodiac_names
#获取测试集中所有的图片路径
img_path_list = test_datasets._img_path_list
#用来记录绘制图片的位置
img_index = 1
col_num = 4
row_num = 3
#设置图片的大小
plt.figure(figsize=(8,8))
#取12张图片来预测
for i in range(45,len(img_path_list),52):
    #获取图片的路径
    img_path = img_path_list[i]
    original_img = cv2.imread(img_path)
    original_img = cv2.cvtColor(original_img,cv2.COLOR_BGR2RGB)
    #获取图片的数据
    img,img_label_index = test_datasets[i]
    #获取图片的真实标签
    img_label = label_names[img_label_index]
    img = paddle.unsqueeze(img,axis=0)
    #模型预测
    out = model.predict_batch(img)
    #获取预测的标签
    pred_label = label_names[out[0].argmax()]

    #绘制图片
    plt.subplot(row_num,col_num,img_index)
    plt.以上是关于yolotv5和resnet152模型预测的主要内容,如果未能解决你的问题,请参考以下文章

ResNet50 模型总是预测 1 类

知识蒸馏IRG算法实战:使用ResNet50蒸馏ResNet18

模型推理从部署的角度看 bn 和 in 算子

第二十篇 ResNet——模型讲解

ResNet——CNN经典网络模型详解(pytorch实现)

当使用 OpenCV 完成图像加载和调整大小时,Resnet50 会产生不同的预测