EMS-YOLO的工程结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
EMS-YOLO
├─ .ipynb_checkpoints
├─ LICENSE
├─ README.md
├─ __pycache__
├─ data
├─ detect.py
├─ environment.yml
├─ export.py
├─ firerate10_5.npy
├─ g1-resnet
├─ hubconf.py
├─ models # 网络模型文件
│ ├─ __init__.py
│ ├─ common.py
│ ├─ common_origin.py
│ ├─ experimental.py
│ ├─ res10-ee.yaml
│ ├─ res18-ee.yaml
│ ├─ res18-eebk.yaml
│ ├─ res18-sew.yaml
│ ├─ resnet10.yaml
│ ├─ resnet18.yaml
│ ├─ resnet34-cat.yaml
│ ├─ resnet34.yaml
│ ├─ tf.py
│ └─ yolo.py
├─ requirements.txt
├─ running_log.log
├─ runs
├─ size10_5.npy
├─ train.py
├─ train.sh
├─ utils
│ ├─ augmentations.py
│ ├─ autoanchor.py
│ ├─ autobatch.py
│ ├─ callbacks.py
│ ├─ datasets.py
│ ├─ downloads.py
│ ├─ general.py
│ ├─ loggers
│ │ ├─ __init__.py
│ │ ├─ __pycache__
│ │ └─ wandb
│ │ ├─ __init__.py
│ │ ├─ __pycache__
│ │ ├─ log_dataset.py
│ │ ├─ sweep.py
│ │ ├─ sweep.yaml
│ │ └─ wandb_utils.py
│ ├─ loss.py
│ ├─ metrics.py
│ ├─ plots.py
│ └─ torch_utils.py
├─ val.py
├─ visi_img.py
└─ wandb

其中,models/common.py放了很多YOLO的基本模块定义。让我们来看一看这些模块的功能:

1. 二值量化

1
2
3
4
5
6
7
8
9
10
11
12
class BinaryQuantize(Function):
@staticmethod
def forward(ctx, input, k, t):
ctx.save_for_backward(input, k, t)
out = torch.sign(input)
return out

@staticmethod
def backward(ctx, grad_output):
input, k, t = ctx.saved_tensors
grad_input = k * t * (1 - torch.pow(torch.tanh(input * t), 2)) * grad_output
return grad_input, None, None

功能

前向传播时以零为界输出二值化后的张量,即

outi=sgn(inputi)\mathrm{out}_i=\mathrm{sgn}(\mathrm{input}_i)

反向传播貌似使用了某种代理梯度(surrogate gradient)来计算yx\frac{\partial y}{\partial x}:

Lx=Lyyx =Lykt(1tanh2(tx))\begin{aligned} \frac{\partial L}{\partial x} &= \frac{\partial L}{\partial y} \frac{\partial y}{\partial x} \ &= \frac{\partial L}{\partial y} kt(1-\mathrm{tanh}^2(tx)) \end{aligned}

2. SNN-qConv2d(量化二维SNN卷积)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

class Snn_Conv2d_quant(nn.Conv2d):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, groups=1,
bias=False, padding_mode='zeros', marker='b'):
super(Snn_Conv2d_quant, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode)
self.marker = marker
self.k = torch.tensor([10]).float().cuda()
self.t = torch.tensor([0.1]).float().cuda()
def forward(self, input):
w = self.weight
bw = w - w.view(w.size(0), -1).mean(-1).view(w.size(0), 1, 1, 1)
bw = bw / bw.view(bw.size(0), -1).std(-1).view(bw.size(0), 1, 1, 1)
sw = torch.pow(torch.tensor([2]*bw.size(0)).to(input.device).float(), (torch.log(bw.abs().view(bw.size(0), -1).mean(-1)) / math.log(2)).round().float()).view(bw.size(0), 1, 1, 1).detach()
w = BinaryQuantize().apply(bw,self.k,self.t)
# w = BinaryQuantize().apply(w)
weight = w * sw
# weight = self.weight#
# print(self.padding[0],'=======')
h = (input.size()[3]-self.kernel_size[0]+2*self.padding[0])//self.stride[0]+1
w = (input.size()[4]-self.kernel_size[0]+2*self.padding[0])//self.stride[0]+1
c1 = torch.zeros(time_window, input.size()[1], self.out_channels, h, w, device=input.device)
# print(weight.size(),'=====weight====')
for i in range(time_window):
c1[i] = F.conv2d(input[i], weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
return c1

该模块继承经典的nn.Conv2d, 前传过程中将权重标准化,计算量化缩放因子,二值化处理,再送到卷积层中处理。

3. 膜电压正负二值(-1,1)量化

1
2
3
4
5
6
7
def u_q(u, b, alpha):
u = torch.tanh(u)
# alpha = u.data.abs().max()
u = torch.clamp(u/alpha,min=-1,max=1)
u = u*(2**(b-1)-1)
u_hat = (u.round()-u).detach()+u
return u_hat*alpha/(2**(b-1)-1)

step.1 非线性映射到(1,1)(-1, 1)

unl=tanh(u) u_{nl} = \mathrm{tanh}(u)

step.2 截断处理

uc=clamp(unlα,1,1) u_c = \mathrm{clamp}(\frac{u_{nl}}{\alpha}, -1, 1)

step.3 量化

uc=uc×(2b11)\overline{u_c} = u_c \times (2^{b-1}-1)
uR=round(uc)u_R = \mathrm{round}{(\overline{u_c})}
u^=uRα/(2b11)\hat{u} = u_R * \alpha / (2^{b-1}-1)

4. 放电函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
lens = 0.5  # 0.5 # hyper-parameters of approximate function
thresh = 0.5 # 0.5 # neuronal threshold
class ActFun(torch.autograd.Function):
@staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
return input.gt(thresh).float()

@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
grad_input = grad_output.clone()
temp = abs(input - thresh) < lens
temp = temp / (2 * lens)
return grad_input * temp.float()

前向传播时输出脉冲(比阈值大的输出1,否则为0),反传过程中梯度近似为放电为1,不放电为0。

5. 自定义的Conv-BN-LIF卷积层

1
2
3
4
5
6
7
8
9
10
11
12
13
class Conv(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k, s, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Snn_Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = batch_norm_2d(c2)
self.act = mem_update(act=True)

def forward(self, x):
return self.act(self.bn(self.conv(x)))

def forward_fuse(self, x):
return self.act(self.conv(x))

6. 使用SiLU激活函数的卷积层

1
2
3
4
5
6
7
8
9
10
11
12
13
class Conv_A(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k, s, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Snn_Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = batch_norm_2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

def forward(self, x):
return self.act(self.bn(self.conv(x)))

def forward_fuse(self, x):
return self.act(self.conv(x))

7. 不使用激活函数的卷积层

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class Conv_1(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k, s, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Snn_Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = batch_norm_2d(c2)
#self.act = mem_update() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

def forward(self, x):
return self.bn(self.conv(x))

def forward_fuse(self, x):
return self.conv(x)

class Conv_2(nn.Module):
# Standard convolution
def __init__(self, c1, c2, k, s, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Snn_Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = batch_norm_2d(c2)
#self.act = mem_update() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

def forward(self, x):
return self.bn(self.conv(x))

def forward_fuse(self, x):
return self.conv(x)

8. 魔改过的BatchNorm层

不同之处在于初始化权重不同.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class batch_norm_2d(nn.Module):
def __init__(self, num_features, eps=1e-5, momentum=0.1):
super(batch_norm_2d, self).__init__() #num_features=16
self.bn = BatchNorm3d1(num_features) # input (N,C,D,H,W) imension batch norm on (N,D,H,W) slice. spatio-temporal Batch Normalization

def forward(self, input):
y = input.transpose(0, 2).contiguous().transpose(0, 1).contiguous()
y = self.bn(y)
return y.contiguous().transpose(0, 1).contiguous().transpose(0, 2) #

class batch_norm_2d1(nn.Module):

def __init__(self, num_features, eps=1e-5, momentum=0.1):
super(batch_norm_2d1, self).__init__()
self.bn = BatchNorm3d2(num_features)

def forward(self, input):
y = input.transpose(0, 2).contiguous().transpose(0, 1).contiguous()
y = self.bn(y)
return y.contiguous().transpose(0, 1).contiguous().transpose(0, 2)


class BatchNorm3d1(torch.nn.BatchNorm3d):#5
def reset_parameters(self):
self.reset_running_stats()
if self.affine:
nn.init.constant_(self.weight, thresh)#
nn.init.zeros_(self.bias)

class BatchNorm3d2(torch.nn.BatchNorm3d):
def reset_parameters(self):
self.reset_running_stats()
if self.affine:

nn.init.constant_(self.weight, 0.2*thresh)
nn.init.zeros_(self.bias)

9. 对输入的每个时间步应用的 MaxPool2d 池化操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class Pools(nn.Module):
def __init__(self,kernel_size,stride,padding=0,dilation=1):
super().__init__()
self.kernel_size=kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.pool=nn.MaxPool2d(kernel_size=self.kernel_size,stride=self.stride,padding=self.padding)

def forward(self,input):
h=int((input.size()[3]+2*self.padding-self.dilation*(self.kernel_size-1)-1)/self.stride+1)
w=int((input.size()[4]+2*self.padding - self.dilation*(self.kernel_size-1)-1)/self.stride+1)
c1 = torch.zeros(time_window, input.size()[1],input.size()[2],h,w,device=input.device)
for i in range(time_window):
c1[i]=self.pool(input[i])
return c1

10. 对输入的每个时间步应用的 ZeroPad2d 填充操作

1
2
3
4
5
6
7
8
9
10
11
12
class zeropad(nn.Module):
def __init__(self,padding):
super().__init__()
self.padding=padding
self.pad=nn.ZeroPad2d(padding=self.padding)
def forward(self,input):
h=input.size()[3]+self.padding[2]+self.padding[3]
w=input.size()[4]+self.padding[0]+self.padding[1]
c1=torch.zeros(time_window,input.size()[1],input.size()[2],h,w,device=input.device )
for i in range(time_window):
c1[i]=self.pad(input[i])
return c1

11. 对输入的每个时间步应用的UpSample操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
class Sample(nn.Module):
def __init__(self,size=None,scale_factor=None,mode='nearset'):
super(Sample, self).__init__()
self.scale_factor=scale_factor
self.mode=mode
self.size = size
self.up=nn.Upsample(self.size,self.scale_factor,mode=self.mode)


def forward(self,input):
# self.cpu()
temp=torch.zeros(time_window,input.size()[1],input.size()[2],input.size()[3]*self.scale_factor,input.size()[4]*self.scale_factor, device=input.device)
# print(temp.device,'-----')
for i in range(time_window):

temp[i]=self.up(input[i])

# temp[i]= F.interpolate(input[i], scale_factor=self.scale_factor,mode='nearest')
return temp

12. 逐通道卷积

继承Conv类,设置卷积组数g为输入和输出通道数的最大公约数

1
2
3
4
5
6
7
class DWConv(Conv):
# Depth-wise convolution
def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
"""Initializes depth-wise convolution with optional activation; parameters are channel in/out, kernel, stride,
dilation.
"""
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)

Ultralytics中的Tiny-YOLOv3的Backbone结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

from n params module arguments
0 -1 1 464 models.common.Conv [3, 16, 3, 1]
1 -1 1 0 torch.nn.modules.pooling.MaxPool2d [2, 2, 0]
2 -1 1 4672 models.common.Conv [16, 32, 3, 1]
3 -1 1 0 torch.nn.modules.pooling.MaxPool2d [2, 2, 0]
4 -1 1 18560 models.common.Conv [32, 64, 3, 1]
5 -1 1 0 torch.nn.modules.pooling.MaxPool2d [2, 2, 0]
6 -1 1 73984 models.common.Conv [64, 128, 3, 1]
7 -1 1 0 torch.nn.modules.pooling.MaxPool2d [2, 2, 0]
8 -1 1 295424 models.common.Conv [128, 256, 3, 1]
9 -1 1 0 torch.nn.modules.pooling.MaxPool2d [2, 2, 0]
10 -1 1 1180672 models.common.Conv [256, 512, 3, 1]
11 -1 1 0 torch.nn.modules.padding.ZeroPad2d [[0, 1, 0, 1]]
12 -1 1 0 torch.nn.modules.pooling.MaxPool2d [2, 1, 0]
13 -1 1 4720640 models.common.Conv [512, 1024, 3, 1]
14 -1 1 262656 models.common.Conv [1024, 256, 1, 1]
15 -1 1 1180672 models.common.Conv [256, 512, 3, 1]
16 -2 1 33024 models.common.Conv [256, 128, 1, 1]
17 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
18 [-1, 8] 1 0 models.common.Concat [1]
19 -1 1 885248 models.common.Conv [384, 256, 3, 1]
20 [19, 15] 1 196350 models.yolo.Detect [80, [[10, 14, 23, 27, 37, 58], [81, 82, 135, 169, 344, 319]], [256, 512]]
yolov3-tiny summary: 49 layers, 8852366 parameters, 8852366 gradients, 13.3 GFLOPs

.yaml文件中对于模型的描述如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]

# YOLOv3-tiny head
head: [
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)

[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)

[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

原始的Tiny-YOLOv3的Backbone中只有Conv、MaxPool2d、ZeroPad2d三种模块,head中有Conv、UpSample和Concat三种模块。而EMS-YOLO的作者们已经为我们改好了带有时间步的各个模块。于是一个大胆的想法油然而生:暴力替换

替换之后的yaml文件如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
nc: 80  # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32
# -------上面的配置和经典的一样-------
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv_2, [16, 3, 1]], # 0
[-1, 1, Pools, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv_2, [32, 3, 1]],
[-1, 1, Pools, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv_2, [64, 3, 1]],
[-1, 1, Pools, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv_2, [128, 3, 1]],
[-1, 1, Pools, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv_2, [256, 3, 1]],
[-1, 1, Pools, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv_2, [512, 3, 1]],
[-1, 1, zeropad, [[0, 1, 0, 1]]], # 11
[-1, 1, Pools, [2, 1, 0]], # 12
]

# YOLOv3-tiny head
head: [
[-1, 1, Conv_2, [1024, 3, 1]],
[-1, 1, Conv_2, [256, 1, 1]],
[-1, 1, Conv_2, [512, 3, 1]], # 15 (P5/32-large)

[-2, 1, Conv_2, [128, 1, 1]],
[-1, 1, Sample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [2]], # cat backbone P4
[-1, 1, Conv_2, [256, 3, 1]], # 19 (P4/16-medium)

[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]