如何正确使用带有 C++ 的 tensorflow 从 YOLO 模型中获取输出?
Posted
技术标签:
【中文标题】如何正确使用带有 C++ 的 tensorflow 从 YOLO 模型中获取输出?【英文标题】:How to get the output from YOLO model using tensorflow with C++ correctly? 【发布时间】:2020-04-27 20:06:15 【问题描述】:我正在尝试使用 C++ 中的 YOLO 模型编写推理程序。我搜索了一些关于darknet的信息,但是它必须使用.cfg文件来导入模型结构(这对我来说有点太复杂了......),所以我想用tensorflow做这个程序。
(我的模型权重从.hdf5(用于python)转换为.pb(用于C++))
我找到了一些用python写的例子,好像他们在推理过程之前做了一些工作......Source
def yolo_eval(yolo_outputs,
anchors,
num_classes,
image_shape,
max_boxes=50,
score_threshold=.6,
iou_threshold=.5):
"""Evaluate YOLO model on given input and return filtered boxes."""
num_layers = len(yolo_outputs)
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
for l in range(num_layers):
_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
boxes = K.concatenate(boxes, axis=0)
box_scores = K.concatenate(box_scores, axis=0)
mask = box_scores >= score_threshold
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
for c in range(num_classes):
# TODO: use keras backend instead of tf.
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
nms_index = tf.image.non_max_suppression(
class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_ = K.concatenate(boxes_, axis=0)
scores_ = K.concatenate(scores_, axis=0)
classes_ = K.concatenate(classes_, axis=0)
return boxes_, scores_, classes_
我已经打印出返回值 它看起来像这样
boxes-> Tensor("concat_11:0", shape=(?, 4), dtype=float32)
scores-> Tensor("concat_12:0", shape=(?,), dtype=float32)
classes-> Tensor("concat_13:0", shape=(?,), dtype=int32)
我的 YOLO 模型(.hdf5)的原始输出是(我通过打印输出得到这个 model.output)
tf.Tensor 'conv2d_59_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_67_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_75_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
而python代码的推理部分是
out_boxes, out_scores, out_classes = sess.run(
[boxes, scores, classes],
feed_dict=
yolo_model.input: image_data,
input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
)
对比python版本的推理代码, C++ 部分是... (Reference)
int main()
string image = "test.jpg";
string graph = "yolo_weight.pb";
string labels = "coco.names";
int32 input_width = 416;
int32 input_height = 416;
float input_mean = 0;
float input_std = 255;
string input_layer = "input_1:0";
std::vector<std::string> output_layer = "conv2d_59/BiasAdd:0", "conv2d_67/BiasAdd:0", "conv2d_75/BiasAdd:0" ;
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, graph);
Status load_graph_status = LoadGraph(graph_path, &session);
std::vector<Tensor> resized_tensors;
string image_path = tensorflow::io::JoinPath(root_dir, image);
Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width,
input_mean, input_std, &resized_tensors);
Tensor inpTensor = Tensor(DT_FLOAT, TensorShape( 1, input_height, input_width, 3 ));
std::vector<Tensor> outputs;
cv::Mat srcImage = cv::imread(image);
cv::resize(srcImage, srcImage, cv::Size(input_width, input_height));
srcImage.convertTo(srcImage, CV_32FC3);
srcImage = srcImage / 255;
string ty = type2str(srcImage.type());
float *p = (&inpTensor)->flat<float>().data();
cv::Mat tensorMat(input_height, input_width, CV_32FC3, p);
srcImage.convertTo(tensorMat, CV_32FC3);
Status run_status = session->Run( input_layer, inpTensor , output_layer , , &outputs);
int cc = 1;
auto output_detection_class = outputs[0].tensor<float, 4>();
std::cout << "detection scores" << std::endl;
std::cout << "typeid(output_detection_scoreclass).name->" << typeid(output_detection_class).name() << std::endl;
for (int i = 0; i < 13; ++i)
for (int j = 0; j < 13; ++j)
for (int k = 0; k < 21; ++k)
// using (index_1, index_2, index_3) to access the element in a tensor
printf("i->%d, j->%d, k->%d\t", i, j, k);
std::cout << output_detection_class(1, i, j, k) << "\t";
cc += 1;
if (cc % 4 == 0)
std::cout << "\n";
std::cout << std::endl;
return 0;
c++版本推断部分的输出是
outputs.size()-> 3
输出[0].shape()-> [1,13,13,21]
输出[1].shape()-> [1,26,26,21]
输出[2].shape()-> [1,52,52,21]
但是我得到的输出很奇怪......
(outputs[0] 的输出值看起来不像是分数、类或坐标中的任何一个...)
所以我想知道是不是因为我错过了在推断之前用 python 编写的部分?还是我使用错误的方式获取输出数据?
我已经检查了一些相关的问题和答案...
1.Yolo v3 model output clarification with keras
2.Convert YoloV3 output to coordinates of bounding box, label and confidence
3.How to access tensorflow::Tensor C++
但我还是不知道怎么做:(
我还找到了一个repo,这可能会有所帮助, 我看了一下它的yolo.cpp,但是它的模型输出张量的形状和我的不一样,我不确定我是否可以直接修改代码,它的输出张量是
tf.Tensor 'import/output:0' shape=(?, 735) dtype = float32
感谢任何帮助或建议...
【问题讨论】:
【参考方案1】:如果您仍在为此苦苦挣扎,我看不出您将 Sigmoid 和 Exp 应用于输出层值的位置。
你可以看看这篇论文,它描述了如何处理输出。
https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193
【讨论】:
抱歉回复晚了,我已经想出了解决这个问题的另一种方法,有空我会更新答案。 :D【参考方案2】:正如 Bryan 所说,输出层仍然需要执行一些操作。
所以在我的情况下(根据this repo),我将它添加到 YOLO 类(在文件yolo.py
)中,以便在保存模型时添加这些后处理:
def output_pb(self, out_dir, out_pb):
out_bx = self.boxes.name.split(":")[0]
out_sc = self.scores.name.split(":")[0]
out_cs = self.classes.name.split(":")[0]
print(out_bx, out_sc, out_cs)
frozen_graph = tf.graph_util.remove_training_nodes(tf.graph_util.convert_variables_to_constants(self.sess, self.sess.graph.as_graph_def(), [out_bx, out_sc, out_cs]))
tf.io.write_graph(frozen_graph, out_dir, out_pb, as_text=False)
print("===== FINISH saving new pb file =====")
保存模型时,我这样调用函数:
yolo = YOLO(**config)
yolo.output_pb(output_dir, output_pb_name)
在 C++ 中进行推理时, 整个过程是这样的:
// initialize model
YOLO* YOLO_data = (YOLO*)Init_DllODM_object(config);
// do some stuff to set data in YOLO_data
cv::Mat input_pic = "whatever_pic.png";
predict(YOLO_data, input_pic, YOLO_data ->bbox_res, YOLO_data ->score_res, YOLO_data ->class_res);
// draw result on pic
cv::Mat res = show_result(YOLO_data, input_pic);
详细代码在这里:
// yolo_cpp.h
struct YOLO
float score_thres;
std::vector<int> class_res;
std::vector<float> bbox_res, score_res;
std::string inp_tensor_name;
std::string placeholder_name;
std::vector<std::string> out_tensors;
Session* session;
Tensor t, inpTensor;
std::vector<tensorflow::Tensor> outTensor;
std::vector<int> MD_size;
std::vector<int> inp_pic_size;
std::vector<std::string> md_class_list;
std::vector<cv::Scalar> color_list;
int show_score;
int score_type;
int return_origin;
;
// yolo_cpp.cpp
void* Init_DllODM_object(json config)
std::string model_path = config["model"].get<std::string>();
YOLO* YOLO_data = new YOLO();
auto options = tensorflow::SessionOptions();
GraphDef graphdef;
// loading model to graph
Status status_load = ReadBinaryProto(Env::Default(), model_path, &graphdef);
options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.7);
options.config.mutable_gpu_options()->set_allow_growth(true);
int node_count = graphdef.node_size();
for (int i = 0; i < node_count; i++)
auto n = graphdef.node(i);
if (n.name().find("input_") != string::npos)
YOLO_data->inp_tensor_name = n.name();
else if (n.name().find("Placeholder_") != string::npos)
YOLO_data->placeholder_name = n.name();
else if (i == node_count - 5)
YOLO_data->out_tensors.push_back(n.name());
else if (i == node_count - 3)
YOLO_data->out_tensors.push_back(n.name());
else if (i == node_count - 1)
YOLO_data->out_tensors.push_back(n.name());
if (!status_load.ok())
std::cout << "ERROR: Loading model failed..." << std::endl;
std::cout << model_path << status_load.ToString() << "\n";
std::vector<int> MD_size_ = config["input_size"];
YOLO_data->MD_size = MD_size_;
std::vector<int> inp_pic_size_ = config["input_pic_size"];
YOLO_data->inp_pic_size = inp_pic_size_;
YOLO_data->inpTensor = Tensor(DT_FLOAT, TensorShape( 1, YOLO_data->MD_size[0], YOLO_data->MD_size[1], 3 )); // input tensor
YOLO_data->t = Tensor(DT_FLOAT, TensorShape( 2 ));
//ref: https://***.com/questions/36804714/define-a-feed-dict-in-c-for-tensorflow-models
auto t_matrix = YOLO_data->t.tensor<float, 1>();
t_matrix(0) = YOLO_data->inp_pic_size[0];
t_matrix(1) = YOLO_data->inp_pic_size[1];
// create session
Status status_newsess = NewSession(options, &YOLO_data->session); //for the usage of gpu setting
Status status_create = YOLO_data->session->Create(graphdef);
if (!status_create.ok())
std::cout << "ERROR: Creating graph in session failed.." << status_create.ToString() << std::endl;
else
std::cout << "----------- Successfully created session and load graph -------------" << std::endl;
return YOLO_data;
int predict(YOLO* YOLO_, cv::Mat srcImage, std::vector<float>& bbox_res, std::vector<float>& score_res, std::vector<int>& class_res)
// read image -> input image
if (srcImage.empty()) // check if image can open correctly
std::cout << "can't open the image!!!!!!!" << std::endl;
int res = -1;
return res;
// ref: https://ppt.cc/f7ERNx
std::vector<std::pair<string, tensorflow::Tensor>> inputs =
YOLO_->inp_tensor_name, YOLO_->inpTensor ,
YOLO_->placeholder_name, YOLO_->t ,
;
srcImage = letterbox_image(srcImage, YOLO_->MD_size[0], YOLO_->MD_size[1]);
convertCVMatToTensor(YOLO_, srcImage);
Status status_run = YOLO_->session->Run( inputs , YOLO_->out_tensors , , &YOLO_->outTensor);
if (!status_run.ok())
std::cout << "ERROR: RUN failed..." << std::endl;
std::cout << status_run.ToString() << "\n";
int res = -1;
return res;
TTypes<float>::Flat pp1 = YOLO_->outTensor[0].flat<float>();
TTypes<float>::Flat pp2 = YOLO_->outTensor[1].flat<float>();
TTypes<int>::Flat pp3 = YOLO_->outTensor[2].flat<int>();
int pp1_idx;
for (int i = 0; i < pp2.size(); i++)
pp1_idx = i * 4;
bbox_res.push_back(pp1(pp1_idx));
bbox_res.push_back(pp1(pp1_idx + 1));
bbox_res.push_back(pp1(pp1_idx + 2));
bbox_res.push_back(pp1(pp1_idx + 3));
score_res.push_back(pp2(i));
class_res.push_back(pp3(i));
return 0;
cv::Mat show_result(YOLO* inf_obj, cv::Mat inp_pic)
int bbox_idx;
std::string plot_str;
bool under_thresh = false;
std::vector<int> del_idx;
for (int i = 0; i < inf_obj->class_res.size(); i++)
int y_min, y_max, x_min, x_max;
bbox_idx = i * 4;
y_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx] + 0.5));
x_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 1] + 0.5));
y_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 2] + 0.5));
x_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 3] + 0.5));
//std::cout << md_class_list[class_res[i]] << ", ";
//std::cout << score_res[i] << ",";
//std::cout << "[" << x_min << ", " << y_min << ", " << x_max << ", " << y_max << "]\n";
if (inf_obj->show_score)
if (inf_obj->score_type)
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i] * 100, 2)).substr(0, 5) + "%";
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i], 2)).substr(0, 4);
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]];
if (inf_obj->score_res[i] >= inf_obj->score_thres)
inp_pic = plot_one_box(inp_pic, x_min, y_min, x_max, y_max, plot_str, inf_obj->color_list[inf_obj->class_res[i]]);
else
//std::cout << "score_res[i]->" << score_res[i] << "under thresh!!" << std::endl;
under_thresh = true;
del_idx.push_back(i);
if (under_thresh)
//std::cout << "*** deleting element" << std::endl;
for (int x = 0; x < del_idx.size(); x++)
bbox_idx = (del_idx[x] - x) * 4;
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 3);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 2);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 1);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx);
inf_obj->score_res.erase(inf_obj->score_res.begin() + del_idx[x] - x);
inf_obj->class_res.erase(inf_obj->class_res.begin() + del_idx[x] - x);
del_idx.clear();
return inp_pic;
由于我的代码是用于dll的,所以我是这样安排的。 还有一些我没有删除的冗余代码, 但我认为到目前为止,整个过程都可以使用这些提供的代码来完成。 希望对您有所帮助:D
【讨论】:
以上是关于如何正确使用带有 C++ 的 tensorflow 从 YOLO 模型中获取输出?的主要内容,如果未能解决你的问题,请参考以下文章