神经网络推理加速：合并卷积和BN层运算原理及实验

Posted 2023-02-07 deep_learninger

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了神经网络推理加速：合并卷积和BN层运算原理及实验相关的知识，希望对你有一定的参考价值。

1. 为什么要合并BN层

在训练深度网络模型时，BN（Batch Normalization）层能够加速网络收敛，并且能够控制过拟合，一般放在卷积层之后。BN 层将数据归一化后，能够有效解决梯度消失与梯度爆炸问题。虽然 BN 层在训练时起到了积极作用，然而，在网络前向推断时多了一些层的运算，影响了模型的性能，且占用了更多的内存或者显存空间。目前，很多先进的网络模型（ResNet，MobileNet，Xception，ShuffleNet 等）都使用了BN技术，因此，我们有必要将 BN 层的参数合并到卷积层，来提升模型前向推断的速度。

２. BN层与卷积层合并的数学原理

卷积层中

卷积权重： W，卷积偏置：B

卷积层运算：

BN 层中
均值：，方差：，缩放因子：，偏移：，一个较小数（防止分母为0）：

BN层和卷积层合并后：

3. 实验结果

机器：显卡 GTX 1080Ti，i7 CPU

本实验对比了Resnet50 模型合并BN层前后的性能，分类精度保持不变，速度显著提升。

模型	CPU前向时间	GPU前向时间
Resnet50（合并前）	176.17ms	11.03ms
Resnet50（合并后）	161.69ms	7.3ms
提升	10%	51%

4. 合并的python脚本

该脚本需要caffe的python接口

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import numpy as np
import sys
import os
import os.path as osp
import google.protobuf as pb
import google.protobuf.text_format
from argparse import ArgumentParser
import caffe

caffe.set_mode_cpu()

def load_and_fill_biases(src_model, src_weights, dst_model, dst_weights):
    with open(src_model) as f:
        model = caffe.proto.caffe_pb2.NetParameter()
        pb.text_format.Merge(f.read(), model)

    for i, layer in enumerate(model.layer):
        if layer.type == 'Convolution': # or layer.type == 'Scale':
            # Add bias layer if needed
            if layer.convolution_param.bias_term == False:
                layer.convolution_param.bias_term = True
                layer.convolution_param.bias_filler.type = 'constant'
                layer.convolution_param.bias_filler.value = 0.0

    with open(dst_model, 'w') as f:
        f.write(pb.text_format.MessageToString(model))

    caffe.set_mode_cpu()
    net_src = caffe.Net(src_model, src_weights, caffe.TEST)
    net_dst = caffe.Net(dst_model, caffe.TEST)
    for key in net_src.params.keys():
        for i in range(len(net_src.params[key])):
            net_dst.params[key][i].data[:] = net_src.params[key][i].data[:]

    if dst_weights is not None:
        # Store params
        pass

    return net_dst


def merge_conv_and_bn(net, i_conv, i_bn, i_scale):
    # This is based on Kyeheyon's work
    assert(i_conv != None)
    assert(i_bn != None)

    def copy_double(data):
        return np.array(data, copy=True, dtype=np.double)

    key_conv = net._layer_names[i_conv]
    key_bn = net._layer_names[i_bn]
    key_scale = net._layer_names[i_scale] if i_scale else None

    # Copy
    bn_mean = copy_double(net.params[key_bn][0].data)
    bn_variance = copy_double(net.params[key_bn][1].data)
    num_bn_samples = copy_double(net.params[key_bn][2].data)

    # and Invalidate the BN layer
    net.params[key_bn][0].data[:] = 0
    net.params[key_bn][1].data[:] = 1
    net.params[key_bn][2].data[:] = 1

    if num_bn_samples[0] == 0:
        num_bn_samples[0] = 1

    if net.params.has_key(key_scale):
        print 'Combine :s + :s + :s'.format(key_conv, key_bn, key_scale)
        scale_weight = copy_double(net.params[key_scale][0].data)
        scale_bias = copy_double(net.params[key_scale][1].data)
        net.params[key_scale][0].data[:] = 1
        net.params[key_scale][1].data[:] = 0

    else:
        print 'Combine :s + :s'.format(key_conv, key_bn)
        scale_weight = 1
        scale_bias = 0

    weight = copy_double(net.params[key_conv][0].data)
    bias = copy_double(net.params[key_conv][1].data)

    alpha = scale_weight / np.sqrt(bn_variance / num_bn_samples[0] + 1e-5)
    net.params[key_conv][1].data[:] = bias * alpha + (scale_bias - (bn_mean / num_bn_samples[0]) * alpha)
    for i in range(len(alpha)):
        net.params[key_conv][0].data[i] = weight[i] * alpha[i]


def merge_batchnorms_in_net(net):
    # for each BN
    for i, layer in enumerate(net.layers):
        if layer.type != 'BatchNorm':
            continue

        l_name = net._layer_names[i]

        l_bottom = net.bottom_names[l_name]
        assert(len(l_bottom) == 1)
        l_bottom = l_bottom[0]
        l_top = net.top_names[l_name]
        assert(len(l_top) == 1)
        l_top = l_top[0]

        can_be_absorbed = True

        # Search all (bottom) layers
        for j in xrange(i - 1, -1, -1):
            tops_of_j = net.top_names[net._layer_names[j]]
            if l_bottom in tops_of_j:
                if net.layers[j].type not in ['Convolution', 'InnerProduct']:
                    can_be_absorbed = False
                else:
                    # There must be only one layer
                    conv_ind = j
                    break

        if not can_be_absorbed:
            continue

        # find the following Scale
        scale_ind = None
        for j in xrange(i + 1, len(net.layers)):
            bottoms_of_j = net.bottom_names[net._layer_names[j]]
            if l_top in bottoms_of_j:
                if scale_ind:
                    # Followed by two or more layers
                    scale_ind = None
                    break

                if net.layers[j].type in ['Scale']:
                    scale_ind = j

                    top_of_j = net.top_names[net._layer_names[j]][0]
                    if top_of_j == bottoms_of_j[0]:
                        # On-the-fly => Can be merged
                        break

                else:
                    # Followed by a layer which is not 'Scale'
                    scale_ind = None
                    break


        merge_conv_and_bn(net, conv_ind, i, scale_ind)

    return net


def process_model(net, src_model, dst_model, func_loop, func_finally):
    with open(src_model) as f:
        model = caffe.proto.caffe_pb2.NetParameter()
        pb.text_format.Merge(f.read(), model)

    for i, layer in enumerate(model.layer):
        map(lambda x: x(layer, net, model, i), func_loop)

    map(lambda x: x(net, model), func_finally)

    with open(dst_model, 'w') as f:
        f.write(pb.text_format.MessageToString(model))


# Functions to remove (redundant) BN and Scale layers
to_delete_empty = []
def pick_empty_layers(layer, net, model, i):
    if layer.type not in ['BatchNorm', 'Scale']:
        return

    bottom = layer.bottom[0]
    top = layer.top[0]

    if (bottom != top):
        # Not supperted yet
        return

    if layer.type == 'BatchNorm':
        zero_mean = np.all(net.params[layer.name][0].data == 0)
        one_var = np.all(net.params[layer.name][1].data == 1)

        if zero_mean and one_var:
            print 'Delete layer: '.format(layer.name)
            to_delete_empty.append(layer)

    if layer.type == 'Scale':
        no_scaling = np.all(net.params[layer.name][0].data == 1)
        zero_bias = np.all(net.params[layer.name][1].data == 0)

        if no_scaling and zero_bias:
            print 'Delete layer: '.format(layer.name)
            to_delete_empty.append(layer)


def remove_empty_layers(net, model):
    map(model.layer.remove, to_delete_empty)


# A function to add 'engine: CAFFE' param into 1x1 convolutions
def set_engine_caffe(layer, net, model, i):
    if layer.type == 'Convolution':
        if layer.convolution_param.kernel_size == 1\\
            or (layer.convolution_param.kernel_h == layer.convolution_param.kernel_w == 1):
            layer.convolution_param.engine = dict(layer.convolution_param.Engine.items())['CAFFE']


def main():
    # Set default output file names
    if args.output_model is None:
       file_name = osp.splitext(args.model)[0]
       args.output_model = file_name + '_inference.prototxt'
    if args.output_weights is None:
       file_name = osp.splitext(args.weights)[0]
       args.output_weights = file_name + '_inference.caffemodel'

    net = load_and_fill_biases(args.model, args.weights, args.model + '.temp.pt', None)
    net = merge_batchnorms_in_net(net)

    process_model(net, args.model + '.temp.pt', args.output_model,
                  [pick_empty_layers, set_engine_caffe],
                  [remove_empty_layers])

    # Store params
    net.save(args.output_weights)


if __name__ == '__main__':
   parser = ArgumentParser(
           description="Generate Batch Normalized model for inference")
   parser.add_argument('--model', default="MobileNetSSD_deploy.prototxt", help="The net definition prototxt")
   parser.add_argument('--weights', default="MobileNetSSD_deploy.caffemodel", help="The weights caffemodel")
   parser.add_argument('--output_model')
   parser.add_argument('--output_weights')
   args = parser.parse_args()
   main()

脚本下载地址：

https://download.csdn.net/download/kangdi7547/10578152

参考博客： http://keep.01ue.com/?pi=943537&_a=app&_c=index&_m=p

以上是关于神经网络推理加速：合并卷积和BN层运算原理及实验的主要内容，如果未能解决你的问题，请参考以下文章

神经网络推理加速： 合并卷积和BN层运算原理及实验

神经网络推理加速：合并卷积和BN层运算原理及实验