PyTorch笔记 - Convolution卷积运算的原理
Posted SpikeKing
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了PyTorch笔记 - Convolution卷积运算的原理相关的知识,希望对你有一定的参考价值。
Convolution卷积:
- step1 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度
- step2 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度 flatten input 版本
- step3 用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
- step4:通过对kernel进行展开,来实现二维卷积,并推导出转置卷积,不考虑batch和channel
源码:
import torch
import torch.nn as nn
import torch.nn.functional as F
in_channels = 1
out_channels = 1
kernel_size = 3 # 可以是标量,也可以是元组
batch_size = 1
bias = False
input_size = [batch_size, in_channels, 4, 4] # 卷积的输入是4维,需要batch_size
# 初始化卷积层
conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
# 卷积输入
input_feature_map = torch.randn(input_size)
# 卷积输出,输出公式: O=(I-K+2P)/S+1
output_feature_map = conv_layer(input_feature_map)
print(f'[Info] input_feature_map: \\ninput_feature_map') # 1x1x4x4
print(f'[Info] conv_layer.weight(kernel): \\nconv_layer.weight') # 1x1x3x3
print(f'[Info] output_feature_map: \\noutput_feature_map') # 1x1x2x2
# weight就是kernel
output_feature_map1 = F.conv2d(input_feature_map, conv_layer.weight)
# F.conv2d和conv_layer的区别,在于传入kernel
print(f'[Info] output_feature_map1: \\noutput_feature_map1') # 1x1x2x2
print(f'[Info] torch.allclose: torch.allclose(output_feature_map, output_feature_map1)')
input = torch.randn(5, 5) # 卷积输入特征图
kernel = torch.randn(3, 3) # 卷积核
bias = torch.randn(1) # 卷积偏置,默认输出通道数是1
# step1 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度
def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
if padding > 0:
# pad是从里到外,反过来的,w,h,c,b,左右上下
input = F.pad(input, (padding, padding, padding, padding))
input_h, input_w = input.shape
kernel_h, kernel_w = kernel.shape
# 向下取整floor, 直接pad到input,不用padding
output_w = int((input_w - kernel_w) / stride + 1) # 卷积输出的高度
output_h = int((input_h - kernel_h) / stride + 1) # 卷积输出的宽度
output = torch.zeros(output_h, output_w) # 初始化输出矩阵
for i in range(0, input_h-kernel_h+1, stride): # 对高度维进行遍历
for j in range(0, input_w-kernel_w+1, stride): # 对宽度度维进行遍历
region = input[i:i+kernel_h, j:j+kernel_w]
# 点乘,并且赋值输出位置的元素
output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias
return output
# step2 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度 flatten input 版本
def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
if padding > 0:
input = F.pad(input, (padding, padding, padding, padding))
input_h, input_w = input.shape
kernel_h, kernel_w = kernel.shape
# 向下取整floor, 直接pad到input,不用padding
output_w = int((input_w - kernel_w) / stride + 1) # 卷积输出的高度
output_h = int((input_h - kernel_h) / stride + 1) # 卷积输出的宽度
output = torch.zeros(output_h, output_w) # 初始化输出矩阵
# 存储所有的拉平后的特征区域
region_matrix = torch.zeros(output.numel(), kernel.numel())
kernel_matrix = kernel.reshape((-1, 1)) # kernel的列向量(kernel.numel())形式,矩阵
for i in range(0, input_h-kernel_h+1, stride): # 对高度维进行遍历
for j in range(0, input_w-kernel_w+1, stride): # 对宽度度维进行遍历
region = input[i:i+kernel_h, j:j+kernel_w]
region_vector = torch.flatten(region)
region_matrix[i*output_h+j] = region_vector # 每个值都有一个kernel行
output_matrix = region_matrix @ kernel_matrix + bias
output = output_matrix.reshape(output_h, output_w)
return output
# 矩阵运算实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1)
# print(f'mat_mul_conv_output: \\nmat_mul_conv_output')
# 矩阵运算实现卷积的结果,flatten input版本
mat_mul_flatten_conv_output = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1)
# print(f'mat_mul_flatten_conv_output: \\nmat_mul_flatten_conv_output')
# 调用PyTorch API的卷积实现结果, padding=1, padding="same"
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, *input.shape)), kernel.reshape(1, 1, *kernel.shape), bias=bias, padding=1)
# print(f'F.conv2d: \\npytorch_api_conv_output.reshape(mat_mul_conv_output.shape)')
flag1 = torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)
# print(f"flag1: flag1")
flag2 = torch.allclose(pytorch_api_conv_output, mat_mul_flatten_conv_output)
# print(f"flag2: flag2")
# step3 用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
# bias形状和output channel是一致的
def matrix_multiplication_for_conv2d_full(input, kernel, bias, stride=1, padding=0):
if padding > 0:
# 从里到外,width、height、channel、batch
input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))
bs, in_channel, input_h, input_w = input.shape
# kernel一共4维,包含通道融合的功能
out_channel, in_channel, kernel_h, kernel_w = kernel.shape
if bias == None:
bias = torch.zeros(out_channel)
# 向下取整floor, 直接pad到input,不用padding
output_w = int((input_w - kernel_w) / stride + 1) # 卷积输出的高度
output_h = int((input_h - kernel_h) / stride + 1) # 卷积输出的宽度
output = torch.zeros(bs, out_channel, output_h, output_w) # 初始化输出矩阵
for ind in range(bs):
for oc in range(out_channel):
for ic in range(in_channel):
for i in range(0, input_h-kernel_h+1, stride): # 对高度维进行遍历
for j in range(0, input_w-kernel_w+1, stride): # 对宽度度维进行遍历
region = input[ind, ic, i:i+kernel_h, j:j+kernel_w]
# 点乘,并且赋值输出位置的元素
output[ind, oc, int(i/stride), int(j/stride)] += torch.sum(region * kernel[oc, ic])
output[ind, oc] += bias[oc]
return output
input = torch.randn(2, 2, 5, 5) # 卷积输入特征图, bs*in_channel*in_h*in_w
kernel = torch.randn(3, 2, 3, 3) # 卷积核,输出通道和输入通道, out_channel*in_channel*kernel_h*kernel_w
bias = torch.randn(3) # 卷积偏置,默认输出通道数是1
# matrix_multiplication_for_conv2d_full 与 PyTorch官方API的结果一致
pytorch_conv2d_api_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)
flag = torch.allclose(pytorch_conv2d_api_output, mm_conv2d_full_output)
# print(f"flag: flag") # True
# step4:通过对kernel进行展开,来实现二维卷积,并推导出转置卷积,不考虑batch和channel
def get_kernel_matrix(kernel, input_size, stride=1):
"""
基于kernel和输入特征图的大小,来得到填充拉直后的kernel堆叠后的矩阵
"""
kernel_h, kernel_w = kernel.shape
input_h, input_w = input_size # 元组
output_h, output_w = int((input_h-kernel_h)/stride + 1), int((input_w-kernel_w)/stride+1)
num_out_feat_map = output_h * output_w
result = torch.zeros((num_out_feat_map, input_h*input_w)) # 初始化结果矩阵,输出特征图元素个数*输入特征图元素个数
for i in range(0, output_h, stride):
for j in range(0, output_w, stride):
# 左右上下,填充成跟输入特征图一样的大小
padded_kernel = F.pad(kernel, (j, input_w-kernel_w-j, i, input_h-kernel_h-i))
result[i*output_h + j] = padded_kernel.flatten()
return result
# 测试1:验证卷积
kernel = torch.randn(3, 3)
input = torch.randn(4, 4)
kernel_matrix = get_kernel_matrix(kernel, input.shape) # 4x16
print(f"[Info] kernel: \\nkernel")
print(f"[Info] kernel_matrix: \\nkernel_matrix")
mm_conv2d_output = kernel_matrix @ input.reshape((-1, 1)) # 通过矩阵相乘的方式,算出卷积
mm_conv2d_output_ = mm_conv2d_output.reshape(1, 1, 2, 2) # 对齐
print(f"[Info] mm_conv2d_output_.shape: mm_conv2d_output_.shape")
print(f"[Info] mm_conv2d_output_: \\nmm_conv2d_output_")
# PyTorch conv2d api
pytorch_conv2d_output = F.conv2d(input.reshape(1, 1, *input.shape), kernel.reshape(1, 1, *kernel.shape))
print(f"[Info] pytorch_conv2d_output: \\npytorch_conv2d_output")
# 测试2:验证二维转置卷积,将输入卷积由input 2x2,ouput上升为4x4
# 通过kernel matrix,提升特征图尺寸,TransposedConv,转置卷积用在上采样
mm_transposed_conv2d_output = kernel_matrix.transpose(-1, -2) @ mm_conv2d_output
mm_transposed_conv2d_output = mm_transposed_conv2d_output.reshape(1, 1, 4, 4)
print(f"[Info] mm_transposed_conv2d_output: \\nmm_transposed_conv2d_output")
pytorch_transposed_conv2d_output = F.conv_transpose2d(pytorch_conv2d_output, kernel.reshape(1, 1, *kernel.shape))
print(f"[Info] pytorch_transposed_conv2d_output: \\npytorch_transposed_conv2d_output")
以上是关于PyTorch笔记 - Convolution卷积运算的原理的主要内容,如果未能解决你的问题,请参考以下文章
PyTorch笔记 - Convolution卷积运算的原理
PyTorch笔记 - Convolution卷积运算的原理
PyTorch笔记 - Convolution卷积运算的原理
PyTorch笔记 - Convolution卷积运算的原理