卷积操作具体实现

Posted 东东就是我

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了卷积操作具体实现相关的知识,希望对你有一定的参考价值。

1.具体结论

我们都知道卷积操作就是滑动卷积核与图像点乘然后相加。但是pytorch不是这么操作的
具体原理就是把图像和卷积核都缩放成一个二维矩阵,然后矩阵相乘。也就是im2col

参考:https://mp.weixin.qq.com/s/GPDYKQlIOq6Su0Ta9ipzig
https://zhuanlan.zhihu.com/p/46305636

2.具体代码实现:

1.1* H* W
2.B* C* H* W

import  numpy as np
import torch
from torch import nn
# #1,H,W
# src=np.array(np.arange(0,7**2).reshape(7,7))
# intH,intW=src.shape
#
# kernel = np.array([-0.2589,  0.2106, -0.1583, -0.0107,  0.1177,  0.1693, -0.1582, -0.3048, -0.1946]).reshape(3,3)
# KHeight, KWeight = kernel.shape
#
# row_num=intH-KHeight+1
# col_num=intW-KWeight+1
#
# OutScrIm2Col = np.zeros([KHeight*KWeight,row_num*col_num])
#
# ii,jj=0,0
# col_cnt,row_cnt=0,0
# for i in range(row_num):
#     for j in range(col_num):
#         ii=i
#         jj=j
#         for iii in range(KHeight):
#             for jjj in range(KHeight):
#                 OutScrIm2Col[col_cnt][row_cnt]=src[ii][jj]
#                 jj+=1
#                 col_cnt+=1
#             ii+=1
#             jj=j
#         col_cnt=0
#         row_cnt+=1
#
# im2col_kernel = kernel.reshape(-1,9)
# out = np.matmul(im2col_kernel,OutScrIm2Col)
# print(out.reshape(5,5))
#
# import torch
# from torch import nn
# import numpy as np
# torch.manual_seed(100)
#
# net = nn.Conv2d(1, 1, 3, padding=0, bias=False)
#
# scr = np.array(np.arange(0, 7**2).reshape(1, 1, 7, 7)).astype(np.float32)
# scr = torch.from_numpy(scr)
#
# print(net.weight.data) # 把这里的weight的值复制到上面numpy的代码来做验证
# print(net(scr))
#C,H,W
np.set_printoptions(threshold=np.inf)
src = np.array(np.arange(0, 9**4))[0:9*9*3*5]
src = src.reshape(-1, 3, 9, 9)
print(src.shape)
# kernel = np.array([[[[-0.1158,  0.0942, -0.0708],
#           [-0.0048,  0.0526,  0.0757],
#           [-0.0708, -0.1363, -0.0870]],
#
#          [[-0.1139, -0.1128,  0.0702],
#           [ 0.0631,  0.0857, -0.0244],
#           [ 0.1197,  0.1481,  0.0765]],
#
#          [[-0.0823, -0.0589, -0.0959],
#           [ 0.0966,  0.0166,  0.1422],
#           [-0.0167,  0.1335,  0.0729]],
#
#          [[-0.0032, -0.0768,  0.0597],
#           [ 0.0083, -0.0754,  0.0867],
#           [-0.0228, -0.1440, -0.0832]],
#
#          [[ 0.1352,  0.0615, -0.1005],
#           [ 0.1163,  0.0049, -0.1384],
#           [ 0.0440, -0.0468, -0.0542]]]])

kernel = np.array([[[[-0.1495,  0.1216, -0.0914],
          [-0.0062,  0.0679,  0.0977],
          [-0.0913, -0.1760, -0.1124]],

         [[-0.1470, -0.1456,  0.0907],
          [ 0.0815,  0.1107, -0.0314],
          [ 0.1545,  0.1913,  0.0987]],

         [[-0.1063, -0.0761, -0.1238],
          [ 0.1246,  0.0215,  0.1836],
          [-0.0215,  0.1724,  0.0941]]],


        [[[-0.0042, -0.0991,  0.0771],
          [ 0.0107, -0.0973,  0.1120],
          [-0.0294, -0.1859, -0.1074]],

         [[ 0.1746,  0.0794, -0.1298],
          [ 0.1502,  0.0063, -0.1786],
          [ 0.0568, -0.0604, -0.0700]],

         [[ 0.0100, -0.1753,  0.0048],
          [ 0.1559,  0.0381, -0.0212],
          [ 0.0877, -0.0168, -0.0620]]],


        [[[ 0.0466,  0.0204,  0.0730],
          [-0.0505,  0.1560,  0.1292],
          [-0.0755,  0.0664,  0.0285]],

         [[ 0.1629,  0.1608,  0.0997],
          [ 0.1068,  0.0454, -0.0624],
          [-0.1089,  0.1714,  0.0814]],

         [[-0.1479,  0.0606, -0.0596],
          [-0.1750,  0.1847,  0.0211],
          [ 0.0719, -0.0031, -0.1637]]]])
# kernel = np.tile(kernel, 12)
# kernel = kernel.reshape(-1, 3, 3, 3)
print(kernel.shape)

scrN, srcChannel, intH, intW= src.shape
KoutChannel, KinChannel, kernel_H, kernel_W = kernel.shape
im2col_kernel = kernel.reshape(KoutChannel, -1)
outChannel, outH, outW =  KoutChannel, (intH - kernel_H + 1) , (intW - kernel_W + 1)
OutScrIm2Col = np.zeros( [scrN, kernel_H*kernel_W*KinChannel, outH*outW ] )#5 27 49



for bs in range(scrN):
    cnt_col = 0
    cnt_row = 0
    for Outim2colCol_H  in range(outH):
        for Outim2colCol_W  in range(outW):
            i_number=Outim2colCol_H
            j_number=Outim2colCol_W
            #  遍历卷积核大小的图像
            for c in range(KinChannel):
                for l in range(kernel_H):
                    for m in range(kernel_W):
                        OutScrIm2Col[bs][cnt_row][cnt_col]=src[bs][c][i_number][j_number]
                        j_number += 1
                        cnt_row+=1
                    j_number = Outim2colCol_W
                    i_number+=1
                i_number = Outim2colCol_H
                j_number = Outim2colCol_W
            cnt_col += 1
            cnt_row =0

print(OutScrIm2Col.shape)
Out =  np.matmul(im2col_kernel, OutScrIm2Col).reshape(5,3,7,7)
print(Out)
torch.manual_seed(100)

net = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, padding=0, bias=False)
print(net.weight.data.shape)
# print(net.weight.data)
#
scr = np.array(np.arange(0, 9**4))[:9*9*3*5].reshape(-1, 3, 9, 9).astype(np.float32)

# scr = torch.from_numpy(src)
# print("data:", scr.shape)
scr = torch.from_numpy(scr)
print("data:", scr.shape)
#
#
Out = net(scr)
print("Our:", Out.shape)
print(Out)

以上是关于卷积操作具体实现的主要内容,如果未能解决你的问题,请参考以下文章

GLSL实现图像处理

卷积操作转化成矩阵乘法

CNN基础计算机如何计算卷积操作

CNN基础计算机如何计算卷积操作

可变形卷积(Deformable Conv)原理解析与torch代码实现

Java学习(Day 37)