PyTorch笔记 - Convolution卷积运算的原理

Posted 2022-12-01 SpikeKing
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了PyTorch笔记 - Convolution卷积运算的原理相关的知识，希望对你有一定的参考价值。
Convolution卷积：
step1 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度
step2 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度 flatten input 版本
step3 用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
step4：通过对kernel进行展开，来实现二维卷积，并推导出转置卷积，不考虑batch和channel
源码：
import torch
import torch.nn as nn
import torch.nn.functional as F

in_channels = 1
out_channels = 1
kernel_size = 3  # 可以是标量，也可以是元组
batch_size = 1  
bias = False
input_size = [batch_size, in_channels, 4, 4]  # 卷积的输入是4维，需要batch_size

# 初始化卷积层
conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
# 卷积输入
input_feature_map = torch.randn(input_size)
# 卷积输出，输出公式: O=(I-K+2P)/S+1
output_feature_map = conv_layer(input_feature_map)
print(f'[Info] input_feature_map: \\ninput_feature_map') # 1x1x4x4
print(f'[Info] conv_layer.weight(kernel): \\nconv_layer.weight') # 1x1x3x3
print(f'[Info] output_feature_map: \\noutput_feature_map')  # 1x1x2x2

# weight就是kernel
output_feature_map1 = F.conv2d(input_feature_map, conv_layer.weight)

# F.conv2d和conv_layer的区别，在于传入kernel
print(f'[Info] output_feature_map1: \\noutput_feature_map1')  # 1x1x2x2
print(f'[Info] torch.allclose: torch.allclose(output_feature_map, output_feature_map1)')

input = torch.randn(5, 5)  # 卷积输入特征图
kernel = torch.randn(3, 3)  # 卷积核
bias = torch.randn(1)  # 卷积偏置，默认输出通道数是1

# step1 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度
def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        # pad是从里到外，反过来的，w，h，c，b，左右上下
        input = F.pad(input, (padding, padding, padding, padding))
        
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    
    # 向下取整floor, 直接pad到input，不用padding
    output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
    output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    
    for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
        for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w]
            # 点乘，并且赋值输出位置的元素
            output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias
    
    return output


# step2 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度 flatten input 版本
def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    # 向下取整floor, 直接pad到input，不用padding
    output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
    output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
    
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    
    # 存储所有的拉平后的特征区域
    region_matrix = torch.zeros(output.numel(), kernel.numel())
    kernel_matrix = kernel.reshape((-1, 1)) # kernel的列向量(kernel.numel())形式，矩阵
    for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
        for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w]
            region_vector = torch.flatten(region)
            region_matrix[i*output_h+j] = region_vector  # 每个值都有一个kernel行
    
    output_matrix = region_matrix @ kernel_matrix + bias
    output = output_matrix.reshape(output_h, output_w)
    
    return output


# 矩阵运算实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1)
# print(f'mat_mul_conv_output: \\nmat_mul_conv_output')

# 矩阵运算实现卷积的结果，flatten input版本
mat_mul_flatten_conv_output = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1)
# print(f'mat_mul_flatten_conv_output: \\nmat_mul_flatten_conv_output')

# 调用PyTorch API的卷积实现结果, padding=1, padding="same"
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, *input.shape)), kernel.reshape(1, 1, *kernel.shape), bias=bias, padding=1)
# print(f'F.conv2d: \\npytorch_api_conv_output.reshape(mat_mul_conv_output.shape)')

flag1 = torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)
# print(f"flag1: flag1")
flag2 = torch.allclose(pytorch_api_conv_output, mat_mul_flatten_conv_output)
# print(f"flag2: flag2")


# step3 用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
# bias形状和output channel是一致的
def matrix_multiplication_for_conv2d_full(input, kernel, bias, stride=1, padding=0):
    if padding > 0:
        # 从里到外，width、height、channel、batch
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))  
        
    bs, in_channel, input_h, input_w = input.shape
    # kernel一共4维，包含通道融合的功能
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    
    if bias == None:
        bias = torch.zeros(out_channel)
        
    # 向下取整floor, 直接pad到input，不用padding
    output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
    output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
    output = torch.zeros(bs, out_channel, output_h, output_w)  # 初始化输出矩阵
    
    for ind in range(bs):
        for oc in range(out_channel):
            for ic in range(in_channel):
                for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
                    for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
                        region = input[ind, ic, i:i+kernel_h, j:j+kernel_w]
                        # 点乘，并且赋值输出位置的元素
                        output[ind, oc, int(i/stride), int(j/stride)] += torch.sum(region * kernel[oc, ic])  
            output[ind, oc] += bias[oc]

    return output


input = torch.randn(2, 2, 5, 5)  # 卷积输入特征图, bs*in_channel*in_h*in_w
kernel = torch.randn(3, 2, 3, 3)  # 卷积核，输出通道和输入通道, out_channel*in_channel*kernel_h*kernel_w
bias = torch.randn(3)  # 卷积偏置，默认输出通道数是1

# matrix_multiplication_for_conv2d_full 与 PyTorch官方API的结果一致
pytorch_conv2d_api_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)

flag = torch.allclose(pytorch_conv2d_api_output, mm_conv2d_full_output)
# print(f"flag: flag")  # True


# step4：通过对kernel进行展开，来实现二维卷积，并推导出转置卷积，不考虑batch和channel
def get_kernel_matrix(kernel, input_size, stride=1):
    """
    基于kernel和输入特征图的大小，来得到填充拉直后的kernel堆叠后的矩阵
    """
    kernel_h, kernel_w = kernel.shape
    input_h, input_w = input_size  # 元组
    output_h, output_w = int((input_h-kernel_h)/stride + 1), int((input_w-kernel_w)/stride+1)
    num_out_feat_map = output_h * output_w
    result = torch.zeros((num_out_feat_map, input_h*input_w))  # 初始化结果矩阵，输出特征图元素个数*输入特征图元素个数
    for i in range(0, output_h, stride):
        for j in range(0, output_w, stride):
            # 左右上下，填充成跟输入特征图一样的大小
            padded_kernel = F.pad(kernel, (j, input_w-kernel_w-j, i, input_h-kernel_h-i))
            result[i*output_h + j] = padded_kernel.flatten()
    return result

# 测试1：验证卷积
kernel = torch.randn(3, 3)
input = torch.randn(4, 4)
kernel_matrix = get_kernel_matrix(kernel, input.shape)  # 4x16
print(f"[Info] kernel: \\nkernel")
print(f"[Info] kernel_matrix: \\nkernel_matrix")
mm_conv2d_output = kernel_matrix @ input.reshape((-1, 1))   # 通过矩阵相乘的方式，算出卷积
mm_conv2d_output_ = mm_conv2d_output.reshape(1, 1, 2, 2)  # 对齐
print(f"[Info] mm_conv2d_output_.shape: mm_conv2d_output_.shape")
print(f"[Info] mm_conv2d_output_: \\nmm_conv2d_output_")
# PyTorch conv2d api
pytorch_conv2d_output = F.conv2d(input.reshape(1, 1, *input.shape), kernel.reshape(1, 1, *kernel.shape))
print(f"[Info] pytorch_conv2d_output: \\npytorch_conv2d_output")

# 测试2：验证二维转置卷积，将输入卷积由input 2x2，ouput上升为4x4
# 通过kernel matrix，提升特征图尺寸，TransposedConv，转置卷积用在上采样
mm_transposed_conv2d_output = kernel_matrix.transpose(-1, -2) @ mm_conv2d_output
mm_transposed_conv2d_output = mm_transposed_conv2d_output.reshape(1, 1, 4, 4)
print(f"[Info] mm_transposed_conv2d_output: \\nmm_transposed_conv2d_output")

pytorch_transposed_conv2d_output = F.conv_transpose2d(pytorch_conv2d_output, kernel.reshape(1, 1, *kernel.shape))
print(f"[Info] pytorch_transposed_conv2d_output: \\npytorch_transposed_conv2d_output")
以上是关于PyTorch笔记 - Convolution卷积运算的原理的主要内容，如果未能解决你的问题，请参考以下文章
PyTorch笔记 - Convolution卷积运算的原理