修改YOLOv5 detect.py代码使其能逐个视频检测保存,同时对每个视频内参数进行单独操作
Posted Xiashawuyanzu
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了修改YOLOv5 detect.py代码使其能逐个视频检测保存,同时对每个视频内参数进行单独操作相关的知识,希望对你有一定的参考价值。
真没怎么看懂YOLOv5的detect.py代码的逻辑,看了YOLOv3,和YOLOv4的detect逻辑,基本都是用opencv对每个视频进行操作,感觉还清晰易懂一点,YOLOv5的作者都好像没用opencv进行操作,或者把opencv的视频操作封装成另一个py文件隐藏起来,实在有些隐晦,所以用了最笨的方法,用os.listdir读视频文件目录下的所有视频,逐一检测。同时改写了画框的函数(因为要保存一帧关键帧的内容),检测命令里是用python detect.py --exist-ok --nosave,因为检测命令里带nosave这一选项,所以浅扒了一下作者的画框逻辑,发现还是用的opencv的rectangle方法(作者藏的
import numpy as np
import argparse
import os
import sys
from pathlib import Path
import time
import shutil
from PIL import Image
import cv2
import torch
import torch.backends.cudnn as cudnn
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync
@torch.no_grad()
def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
vidpath='/home/ccf_disk/animal/test/', # file/dir/URL/glob, 0 for webcam
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.6, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=True, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project='/home/ccf_disk/animal/video_animal', # save results to project/name
name='test_1', # save results to project/name
exist_ok=True, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
):
vidpath = str(vidpath)
videos = os.listdir(vidpath)
number = 0
for video_name in videos:
time1_start = time.time()
so = vidpath + video_name
number = number + 1
print("第%d个视频处理中" %number)
source = str(so)
save_c = 0
keep = 0
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
if is_url and is_file:
source = check_file(source) # download
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
imgsz = check_img_size(imgsz, s=stride) # check image size
# Half
half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA
if pt or jit:
model.model.half() if half else model.model.float()
# Dataloader
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
bs = len(dataset) # batch_size
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half) # warmup
dt, seen = [0.0, 0.0, 0.0], 0
for path, im, im0s, vid_cap, s in dataset:
flag = 0
c = 1
time1 = 6
# t1 = time_sync()
im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# t2 = time_sync()
# dt[0] += t2 - t1
# Inference
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
# t3 = time_sync()
# dt[1] += t3 - t2
# NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# dt[2] += time_sync() - t3
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
count = 0
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'i: '
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
txt_path = str(save_dir / 'labels' / p.stem) + (
'' if dataset.mode == 'image' else f'_frame') # im.txt
s += '%gx%g ' % im.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"n names[int(c)]'s' * (n > 1), " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
count = 1
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\\n')
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f'names[c] conf:.2f')
annotator.box_label(xyxy, label, color=colors(c, True))
if save_crop:
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'p.stem.jpg', BGR=True)
box = xyxy
c = int(cls) # integer class
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
lw = max(round(sum(im0.shape) / 2 * 0.003), 2)
cv2.rectangle(im0, p1, p2, color=(0, 0, 255),
thickness=max(round(sum(im0.shape) / 2 * 0.003), 2), lineType=cv2.LINE_AA)
label = (f'names[c] conf:.2f')
tf = max(lw - 1, 1)
w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h - 3 >= 0 # label fits outside box
cv2.putText(im0, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, lw / 3,
(0, 0, 255),
thickness=tf, lineType=cv2.LINE_AA)
# Stream results
im0 = annotator.result()
if view_img:
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
if (seen % time1 == 0):
if (count == 0):
save_c = 0
else:
save_c = save_c + 1
if(save_c>=4):
if keep == 0:
im0 = cv2.cvtColor(im0, cv2.COLOR_BGR2RGB)
frame = Image.fromarray(np.uint8(im0))
#print(save_path)
frame.save(str(save_path.split('.')[0]) + ".jpg")
keep = 1
shutil.copy(so, save_path)
print('have animal')
break
else:
continue
break
# # Save results (image with detections)
# if save_img:
# if dataset.mode == 'image':
# cv2.imwrite(save_path, im0)
# else: # 'video' or 'stream'
# if vid_path[i] != save_path: # new video
# vid_path[i] = save_path
# if isinstance(vid_writer[i], cv2.VideoWriter):
# vid_writer[i].release() # release previous video writer
# if vid_cap: # video
# fps = vid_cap.get(cv2.CAP_PROP_FPS)
# w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# else: # stream
# fps, w, h = 30, im0.shape[1], im0.shape[0]
# save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
# vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
# vid_writer[i].write(im0)
# Print time (inference-only)
# LOGGER.info(f'sDone. (t3 - t2:.3fs)')
# Print results
# t = tuple(x / seen * 1E3 for x in dt) # speeds per image
# LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape (1, 3, *imgsz)' % t)
if save_txt or save_img:
s = f"\\nlen(list(save_dir.glob('labels/*.txt'))) labels saved to save_dir / 'labels'" if save_txt else ''
# LOGGER.info(f"Results saved to colorstr('bold', save_dir)s")
if update:
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
time1_end = time.time()
print('视频%d处理时间' % number + str(time1_end-time1_start))
# if bool == True:
# shutil.copy(so, save_path)
# else:
# pass
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'weights/best.pt', help='model path(s)')
parser.add_argument('--vidpath', type=str, default='/home/ccf_disk/animal/video/4-3/',
help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--data', type=str, default=ROOT / 'data/myvoc.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.75, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='/home/ccf_disk/animal/video_animal_yolov5/', help='save results to project/name')
parser.add_argument('--name', default='4-3', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(FILE.stem, opt)
return opt
def main(opt):
check_requirements(exclude=('tensorboard', 'thop'))
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)
有点深),第一次发博客,浅记录一下。
yolov5-master代码详解笔记——detect模块
为保证您获得更好的学习体验,请使用电脑阅读学习,本专栏对手机用户并不友好!
本文将以detec.py文件为主,带你从头开始逐一追踪代码,了解detect运行流程。
目录
detect.py:
FILE 当前文件绝对路径
ROOT 整个yolov5项目的路径(多数情况下在文件的下载转移更新时已存在包导入时无法查找,则可查看该路径是否正确)
Parse_opt 定参,返回opt(存储所有参数信息)
Main ①检测requirement中依赖包
②执行Run
Run
1.判断source传入数据
①is_file: 判断输入图片格式是否在设定格式中(dataset.py)
②webcam:false
2. Directories,新建保存结果文件夹
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
3. Load model,加载模型
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
①device:选择设备,摄像头、GPU、CPU等
②model:(weight,coco.yaml)显示后端框架(pytorch、TorchScri等)
DetectMultiBackend(common.py)
③加载模型数据
④imgsz保证图片尺寸为32的倍数,不是则自动计算出32倍数尺寸
4. Dataloader,加载待预测图片
# Dataloader
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
bs = len(dataset) # batch_size
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs
dataset=LoadImages(datasets.py) 初始化
5. Run inference,输入模型推理产生推理结果画出识别框:
初始化:
Warmup:传入一张空图片到GPU预热
遍历dataset(LoadImages):
im: 图片numpy转pytroch支持的格式
/=255:归一化
扩张维度
Inference: 预测
visualize(默认false):若为true,保存推断过程特征图
pred: 检测框
augment:可对推断做数据增强,但降低模型运行速度
[1,18900,85]:85指4个坐标信息,1个置信度,80个类别概率
NMS: 非极大值过滤
pred: 1,5,6: 6指4坐标,1置信度,1类别
Process:
det: [5,6],5个矩形框, 6指4坐标,1置信度,1类别
seen:计数器
save_path:图片保存路径
txt_path:默认不保存txt文件
s:
gn:获得原图宽高,保存txt时有用
imc:判断是否把检测框裁剪保存
annotator(plots.py):原图绘制
if(det): 画框
det[] 从调整图中坐标映射回原图
遍历所有框:n统计所有框->s打印信息
write results:选择保存方式
add bbox to image(默认选择):
label:hide_labels、hide_conf(detect参数)是否打印
标签、置信度
annotator.box_label:画框
save_crop:默认false,是否保存截取检测框
stream: (view_img)展示结果
save_img: 保存图片
6. Print results,打印输出结果
# Print results
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape (1, 3, *imgsz)' % t)
if save_txt or save_img:
s = f"\\nlen(list(save_dir.glob('labels/*.txt'))) labels saved to save_dir / 'labels'" if save_txt else ''
LOGGER.info(f"Results saved to colorstr('bold', save_dir)s")
if update:
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
t: 统计预测每张图片平均时间
seen:预测图片数量,dt每张图片所用时间
LOGGER.info:日志
detect.py源码注释:
(有小改动,可对比自身本地项目文件阅读)
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Run inference on images, videos, directories, streams, etc.
Usage - sources:
$ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
path/ # directory
path/*.jpg # glob
'https://youtu.be/Zgi9g1ksQHc' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
Usage - formats:
$ python path/to/detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s.xml # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (MacOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
"""
import argparse
import os
import sys
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
FILE = Path(__file__).resolve() # 当前文件绝对路径
# 整个yolov5项目的路径
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync
@torch.no_grad()
def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s)
source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / 'runs/detect', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
):
source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images
# 判断输入的是否为文件地址且是否包含于相应格式
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) # 判断是否为网络流地址(false)
# 判断是否传入是0-打开电脑摄像头,默认false
webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
if is_url and is_file:
source = check_file(source) # download
# Directories
# 新建保存结果文件夹
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
# 加载模型
device = select_device(device) # 选择设备,摄像头、GPU、CPU等
# 显示后端框架(pytorch、TorchScri等)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt # 加载模型数据
# 保证图片尺寸为32的倍数,不是则自动计算出32倍数尺寸
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
# 加载待预测图片
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
bs = len(dataset) # batch_size
else:
# 初始化
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
# 输入模型推理产生推理结果画出识别框:
# 传入一张空图片到GPU预热
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
dt, seen = [0.0, 0.0, 0.0], 0
for path, im, im0s, vid_cap, s in dataset:
t1 = time_sync()
# 图片numpy转pytroch支持的格式
im = torch.from_numpy(im).to(device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
# 图片归一化
im /= 255 # 0 - 255 to 0.0 - 1.0
# 扩张维度
if len(im.shape) == 3:
im = im[None] # expand for batch dim
t2 = time_sync()
dt[0] += t2 - t1
# Inference
# 若为true,保存推断过程特征图
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize) # 检测框[1,18900,85]
# [1,18900,85]指4个坐标信息,1个置信度,80个类别概率
# augment可对推断做数据增强,但降低模型运行速度
t3 = time_sync()
dt[1] += t3 - t2
# NMS
# 非极大值过滤
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) # [1,5,6]
# [1,5,6]: 6指4坐标,1置信度,1类别
dt[2] += time_sync() - t3
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
# Process predictions
# det:[5,6],5个矩形框, 6指4坐标,1置信度,1类别
for i, det in enumerate(pred): # per image
# 计数器
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'i: '
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
# 图片保存路径
save_path = str(save_dir / p.name) # im.jpg
# 默认不保存txt文件
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_frame') # im.txt
s += '%gx%g ' % im.shape[2:] # print string
# 获得原图宽高,用于保存txt
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
# 判断是否把检测框裁剪保存
imc = im0.copy() if save_crop else im0 # for save_crop
# 原图绘制
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
# 画框
if len(det):
# Rescale boxes from img_size to im0 size
# 从调整图中坐标映射回原图
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
# Print results
# 遍历所有框:n统计所有框用于基于s打印信息
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"n names[int(c)]'s' * (n > 1), " # add to string
# Write results
# 选择保存方式(默认选择第二种,即非txt)
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\\n')
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
# 是否打印标签、置信度
label = None if hide_labels else (names[c] if hide_conf else f'names[c] conf:.2f')
# 画框
annotator.box_label(xyxy, label, color=colors(c, True))
# 是否保存截取检测框(默认false)
if save_crop:
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'p.stem.jpg', BGR=True)
# Stream results
im0 = annotator.result()
if view_img:
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
else: # 'video' or 'stream'
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
# Print time (inference-only)
LOGGER.info(f'sDone. (t3 - t2:.3fs)')
# Print results
# 统计预测每张图片平均时间,seen即预测图片数量,dt即每张图片所用时间
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape (1, 3, *imgsz)' % t)
if save_txt or save_img:
s = f"\\nlen(list(save_dir.glob('labels/*.txt'))) labels saved to save_dir / 'labels'" if save_txt else ''
LOGGER.info(f"Results saved to colorstr('bold', save_dir)s")
if update:
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
# 定参,返回opt(存储所有参数信息)
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train//exp13/weights/best.pt', help='model path(s)')
parser.add_argument('--source', type=str, default=ROOT / 'C:/Users/Pictures/Saved Pictures/read11.avi', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[960, 540], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results') # 置信度
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(FILE.stem, opt)
return opt
def main(opt):
# 检测requirement中依赖包
check_requirements(exclude=('tensorboard', 'thop'))
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)
common.py(models):
DetectMultiBackend: (line279)
w 判断weights是否为list,若是取出第一个值作为传入路径
model_type 判断模型格式(pt、jit等),执行相应加载模式
fp16 半精度推算
if data 加载传入文件,获取names
datasets.py(utils):
LoadImages: (line178)
P: 根据相对路径获得绝对路径
判断是否带*,是否为文件夹,是否为文件
images/videos: 获取文件格式,判断图片格式是否包含在规定拓展名中
nf: 所有文件数
count: 文件中图片计数器,起索引作用
img0: 读入初始图
s: 字符串,表示输入的是第几张图片,用于后续打印
img: 调整图片尺寸(augmentations.py->letterbox)(需要32倍宽高)
vid_cap(None):
Convert:
augmentations.py(utils):
letterbox: (line91)
r: 长边缩放图片,(long/640)
填充图片:
if auto: 若auto(默认true)为true,判断图片宽高是否为32倍数,若满足直接读取
plots.py(utils):
Annotator: (line68)
初始化:
If-else: 默认opencv画框
box_label: 画框画标签
本文谨以用于代码详解笔记,而非项目点评和项目研发,若本文何处理解有误,劳烦广大读者指正。本人才疏学浅,请多包涵。
以上是关于修改YOLOv5 detect.py代码使其能逐个视频检测保存,同时对每个视频内参数进行单独操作的主要内容,如果未能解决你的问题,请参考以下文章