Conv2d Padding

Source: a convenient blog

While using convolutional neural network, we don’t have to manually calculate the dimension (the spatial size) of the output(s), but it’s a good idea to do so to keep a mental account of how our inputs are being transformed at each step. We can compute the spatial size on each dimension (width/height/depth(channel)).

$$W_2= \frac{W_1-F_w + 2P}{S_w} + 1$$
$$H_2= \frac{H_1-F_h + 2P}{S_h} + 1$$

Goal $W_2== W_1$; provided $S_w=1$

in other words $H_{2} = \left\lceil \dfrac{H_{1}}{S_{h}} \right\rceil \qquad \qquad W_{2} = \left\lceil \dfrac{W_{1}}{S_{w}} \right\rceil$

import matplotlib.pyplot as plt
import numpy as np
from math import ceil

def convolution2d(input, filter, bias=0, strides=(1, 1), padding='SAME'):
    #This is only for using one filter
    if not len(filter.shape) == 3:
        raise ValueError("The size of the filter should be (filter_height, filter_width, filter_depth)")
        
    if not len(input.shape) == 3:
        raise ValueError("The size of the input should be (input_height, input_width, input_depth)")
        
    if not filter.shape[2] == input.shape[2]:
        raise ValueError("the input and the filter should have the same depth.")
    
    input_w, input_h = input.shape[1], input.shape[0]      # input width and input height
    filter_w, filter_h = filter.shape[1], filter.shape[0]  # filter width and filter height
    
    if padding == 'VALID':
        output_h = int(ceil(float(input_h - filter_h + 1) / float(strides[0])))
        output_w = int(ceil(float(input_w - filter_w + 1) / float(strides[1])))
        
        output = np.zeros((output_h, output_w))  # convolution output
        
        for x in range(output_w):  # Loop over every pixel of the output
            for y in range(output_h):
                # element-wise multiplication of the filter and the image
                output[y, x] = (filter * input[y * strides[0]:y * strides[0] + filter_h,
                                x * strides[1]:x * strides[1] + filter_w, :]).sum() + bias
        
    if padding == 'SAME':
        output_h = int(ceil(float(input_h) / float(strides[0])))
        output_w = int(ceil(float(input_w) / float(strides[1])))
        
        '''
        Ideally we want the output to be the same size as input provided the
        stride is 0. Otherwise, we just scale it by stride.

		''' 
        if input_h % strides[0] == 0:
            pad_along_height = max((filter_h - strides[0]), 0)
        else:
            pad_along_height = max(filter_h - (input_h % strides[0]), 0)
        if input_w % strides[1] == 0:
            pad_along_width = max((filter_w - strides[1]), 0)
        else:
            pad_along_width = max(filter_w - (input_w % strides[1]), 0)
            
        pad_top = pad_along_height // 2 #amount of zero padding on the top
        pad_bottom = pad_along_height - pad_top # amount of zero padding on the bottom
        pad_left = pad_along_width // 2             # amount of zero padding on the left
        pad_right = pad_along_width - pad_left      # amount of zero padding on the right
        
        output = np.zeros((output_h, output_w))  # convolution output
        # Add zero padding to the input image
        image_padded = np.zeros((input.shape[0] + pad_along_height, input.shape[1] + pad_along_width, input.shape[2]))
        image_padded[pad_top:-pad_bottom, pad_left:-pad_right, :] = input
        
        for x in range(output_w):  # Loop over every pixel of the output
            for y in range(output_h):
                # element-wise multiplication of the filter and the image
                output[y, x] = (filter * image_padded[y * strides[0]:y * strides[0] + filter_h,
                                x * strides[1]:x * strides[1] + filter_w, :]).sum() + bias
                
    return output