PyTorch笔记 - Convolution卷积运算的原理

Posted 2022-12-04 SpikeKing

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了PyTorch笔记 - Convolution卷积运算的原理相关的知识，希望对你有一定的参考价值。

卷积操作包括5个参数：input、kernel、bias、stride、padding

input已包括padding，计算ouput时，不要再加上2*padding
output的索引是i/stride，j/stride，因为i和j的步长是stride

def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    # 向下取整floor, 直接pad到input，不用padding
    output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
    output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
    
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    
    for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
        for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w]
            output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias  # 点乘，并且赋值输出位置的元素
    
    return output

矩阵和矩阵相乘，转换为行向量和列向量相乘，即输入矩阵9个行向量 x kernel的列向量

与输入矩阵尺寸相同，填充kernel为0，计算矩阵相乘，转置卷积

torch.flatten() 操作，多维向量变成1维向量

import torch
import torch.nn as nn
import torch.nn.functional as F

a = torch.randn(1, 1, 2, 3)
b = torch.flatten(a)
"""
tensor([[[[ 1.0875,  0.1187, -0.0439],
          [ 0.2802,  0.7416, -0.0189]]]])
tensor([ 1.0875,  0.1187, -0.0439,  0.2802,  0.7416, -0.0189])
"""

torch.numel() 操作

a = torch.randn(1, 1, 2, 3)
b = torch.randn(2, 3, 4)
print(a.numel())
print(b.numel())
"""
6
24
"""

torch.reshape() 操作

a = torch.randn(1, 1, 2, 3)
b = a.reshape(-1, 1)
print(b.shape)
print(b)
"""
torch.Size([6, 1])
tensor([[ 0.8519],
        [ 0.1513],
        [-0.1491],
        [-1.2765],
        [-0.1445],
        [ 1.0137]])
"""

flatten input版本：对比与直接矩阵相乘，优势，只有1次矩阵相乘，节省计算量。

matrix_multiplication_for_conv2d
matrix_multiplication_for_conv2d_flatten
F.conv2d

# step1 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度
input = torch.randn(5, 5)  # 卷积输入特征图
kernel = torch.randn(3, 3)  # 卷积核
bias = torch.randn(1)  # 卷积偏置，默认输出通道数是1


def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    # 向下取整floor, 直接pad到input，不用padding
    output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
    output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
    
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    
    for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
        for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w]
            output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias  # 点乘，并且赋值输出位置的元素
    
    return output

  
# flatten input 版本
def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    # 向下取整floor, 直接pad到input，不用padding
    output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
    output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
    
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    
    # 存储所有的拉平后的特征区域
    region_matrix = torch.zeros(output.numel(), kernel.numel())
    kernel_matrix = kernel.reshape((-1, 1)) # kernel的列向量(kernel.numel())形式，矩阵
    for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
        for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w]
            region_vector = torch.flatten(region)
            region_matrix[i*output_h+j] = region_vector  # 每个值都有一个kernel行
    output_matrix = region_matrix @ kernel_matrix + bias
    output = output_matrix.reshape(output_h, output_w)
    return output


# 矩阵运算实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1)
print(f'mat_mul_conv_output: \\nmat_mul_conv_output')

# 矩阵运算实现卷积的结果，flatten input版本
mat_mul_flatten_conv_output = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1)
print(f'mat_mul_flatten_conv_output: \\nmat_mul_flatten_conv_output')

# 调用PyTorch API的卷积实现结果, padding=1, padding="same"
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, *input.shape)), kernel.reshape(1, 1, *kernel.shape), bias=bias, padding=1)
print(f'F.conv2d: \\npytorch_api_conv_output.reshape(mat_mul_conv_output.shape)')

# 验证矩阵运算，与PyTorch API的结果一致
flag1 = torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)
print(f"flag1: flag1")
flag2 = torch.allclose(pytorch_api_conv_output, mat_mul_flatten_conv_output)
print(f"flag2: flag2")

用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维

bias形状和output channel是一致的

# step3 用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
# bias形状和output channel是一致的
def matrix_multiplication_for_conv2d_full(input, kernel, bias, stride=1, padding=0):
    if padding > 0:
        # 从里到外，width、height、channel、batch
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))  
        
    bs, in_channel, input_h, input_w = input.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    
    if bias == None:
        bias = torch.zeros(out_channel)
        
    # 向下取整floor, 直接pad到input，不用padding
    output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
    output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
    
    output = torch.zeros(bs, out_channel, output_h, output_w)  # 初始化输出矩阵
    
    for ind in range(bs):
        for oc in range(out_channel):
            for ic in range(in_channel):
                for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
                    for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
                        region = input[ind, ic, i:i+kernel_h, j:j+kernel_w]
                        output[ind, oc, int(i/stride), int(j/stride)] += torch.sum(region * kernel[oc, ic])  # 点乘，并且赋值输出位置的元素
            output[ind, oc] += bias[oc]

    return output


input = torch.randn(2, 2, 5, 5)  # 卷积输入特征图, bs*in_channel*in_h*in_w
kernel = torch.randn(3, 2, 3, 3)  # 卷积核，输出通道和输入通道, out_channel*in_channel*kernel_h*kernel_w
bias = torch.randn(3)  # 卷积偏置，默认输出通道数是1

# matrix_multiplication_for_conv2d_full 与 PyTorch官方API的结果一致
pytorch_conv2d_api_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)

flag = torch.allclose(pytorch_conv2d_api_output, mm_conv2d_full_output)
print(f"flag: flag")  # True

以上是关于PyTorch笔记 - Convolution卷积运算的原理的主要内容，如果未能解决你的问题，请参考以下文章