news 2026/5/1 10:22:29

基于 RepNCSPELAN_CAA 的 YOLOv8 结构优化:模块替换策略与 mAP 提升实证分析

作者头像

张小明

前端开发工程师

1.2k 24
文章封面图
基于 RepNCSPELAN_CAA 的 YOLOv8 结构优化:模块替换策略与 mAP 提升实证分析

实验分析:

指标YOLOv8 (原模型 - 蓝色线条)RepNCSPELAN_CAA-YOLOv8 (优化模型 - 绿色线条)
准确率 (Precision)~0.84~0.88
召回率 (Recall)~0.72~0.81
mAP50~0.79~0.82
mAP50-95~0.44~0.48

移植

创建ultralytics\cfg\models\v8\yolov8-RepNCSPELAN_CAA.yaml

# Ultralytics YOLO 🚀, AGPL-3.0 license# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect# Parametersnc:80# number ofclassesscales:# model compoundscalingconstants,i.e.'model=yolov8n.yaml' will call yolov8.yaml with scale'n'# [depth, width, max_channels]n:[0.33,0.25,1024]#YOLOv8nsummary:225layers,3157200parameters,3157184gradients,8.9GFLOPss:[0.33,0.50,1024]#YOLOv8ssummary:225layers,11166560parameters,11166544gradients,28.8GFLOPsm:[0.67,0.75,768]#YOLOv8msummary:295layers,25902640parameters,25902624gradients,79.3GFLOPsl:[1.00,1.00,512]#YOLOv8lsummary:365layers,43691520parameters,43691504gradients,165.7GFLOPsx:[1.00,1.25,512]#YOLOv8xsummary:365layers,68229648parameters,68229632gradients,258.5GFLOPs# YOLOv8.0n backbonebackbone:# [from, repeats, module, args]-[-1,1,Conv,[64,3,2]]#0-P1/2-[-1,1,Conv,[128,3,2]]#1-P2/4-[-1,1,RepNCSPELAN4_CAA,[128,64,32,1]]-[-1,1,Conv,[256,3,2]]#3-P3/8-[-1,1,RepNCSPELAN4_CAA,[256,128,64,1]]-[-1,1,Conv,[512,3,2]]#5-P4/16-[-1,1,RepNCSPELAN4_CAA,[512,256,128,1]]-[-1,1,Conv,[1024,3,2]]#7-P5/32-[-1,1,RepNCSPELAN4_CAA,[1024,512,256,1]]-[-1,1,SPPF,[1024,5]]#9# YOLOv8.0n headhead:-[-1,1,nn.Upsample,[None,2,'nearest']]-[[-1,6],1,Concat,[1]]# cat backbone P4-[-1,1,RepNCSPELAN4,[512,256,128,1]]#12-[-1,1,nn.Upsample,[None,2,'nearest']]-[[-1,4],1,Concat,[1]]# cat backbone P3-[-1,1,RepNCSPELAN4,[256,128,64,1]]#15(P3/8-small)-[-1,1,Conv,[256,3,2]]-[[-1,12],1,Concat,[1]]# cat head P4-[-1,1,RepNCSPELAN4,[512,256,128,1]]#18(P4/16-medium)-[-1,1,Conv,[512,3,2]]-[[-1,9],1,Concat,[1]]# cat head P5-[-1,1,RepNCSPELAN4,[1024,512,256,1]]#21(P5/32-large)-[[15,18,21],1,Detect,[nc]]#Detect(P3,P4,P5)

修改ultralytics\nn\tasks.py

在from ultralytics.nn.modules import (中添加

找到 n = n_ = max(round(n * depth), 1) if n > 1 else n # depth gain
在下面添加
找到args = [c1, c2, *args[1:]]添加

ifminRepNCSPELAN4_CAA:args[2]=make_divisible(min(args[2],max_channels)*width,8)args[3]=make_divisible(min(args[3],max_channels)*width,8)

修改ultralytics/nn/modules/init.py

在from .block import (添加

修改ultralytics\nn\extra_modules\block.py

在__all__添加

在文末添加

classRepConvN(nn.Module):"""RepConvisabasic rep-styleblock,including traininganddeploy status This codeisbasedonhttps://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py""" default_act=nn.SiLU()#defaultactivationdef__init__(self,c1,c2,k=3,s=1,p=1,g=1,d=1,act=True,bn=False,deploy=False):super().__init__()assertk==3andp==1self.g=g self.c1=c1 self.c2=c2 self.act=self.default_actifactisTrueelseactifisinstance(act,nn.Module)elsenn.Identity()self.bn=None self.conv1=Conv(c1,c2,k,s,p=p,g=g,act=False)self.conv2=Conv(c1,c2,1,s,p=(p-k// 2), g=g, act=False)defforward_fuse(self,x):"""Forward process"""returnself.act(self.conv(x))defforward(self,x):"""Forward process"""ifhasattr(self,'conv'):returnself.forward_fuse(x)id_out=0ifself.bnisNoneelseself.bn(x)returnself.act(self.conv1(x)+self.conv2(x)+id_out)defget_equivalent_kernel_bias(self):kernel3x3,bias3x3=self._fuse_bn_tensor(self.conv1)kernel1x1,bias1x1=self._fuse_bn_tensor(self.conv2)kernelid,biasid=self._fuse_bn_tensor(self.bn)returnkernel3x3+self._pad_1x1_to_3x3_tensor(kernel1x1)+kernelid,bias3x3+bias1x1+biasiddef_avg_to_3x3_tensor(self,avgp):channels=self.c1groups=self.gkernel_size=avgp.kernel_sizeinput_dim=channels// groupsk=torch.zeros((channels,input_dim,kernel_size,kernel_size))k[np.arange(channels),np.tile(np.arange(input_dim),groups),:,:]=1.0/kernel_size**2returnkdef_pad_1x1_to_3x3_tensor(self,kernel1x1):ifkernel1x1isNone:return0else:returntorch.nn.functional.pad(kernel1x1,[1,1,1,1])def_fuse_bn_tensor(self,branch):ifbranchisNone:return0,0ifisinstance(branch,Conv):kernel=branch.conv.weightrunning_mean=branch.bn.running_meanrunning_var=branch.bn.running_vargamma=branch.bn.weightbeta=branch.bn.biaseps=branch.bn.epselifisinstance(branch,nn.BatchNorm2d):ifnothasattr(self,'id_tensor'):input_dim=self.c1// self.gkernel_value=np.zeros((self.c1,input_dim,3,3),dtype=np.float32)foriinrange(self.c1):kernel_value[i,i%input_dim,1,1]=1self.id_tensor=torch.from_numpy(kernel_value).to(branch.weight.device)kernel=self.id_tensorrunning_mean=branch.running_meanrunning_var=branch.running_vargamma=branch.weightbeta=branch.biaseps=branch.epsstd=(running_var+eps).sqrt()t=(gamma/std).reshape(-1,1,1,1)returnkernel*t,beta-running_mean*gamma/stddefswitch_to_deploy(self):ifhasattr(self,'conv'):returnkernel,bias=self.get_equivalent_kernel_bias()self.conv=nn.Conv2d(in_channels=self.conv1.conv.in_channels,out_channels=self.conv1.conv.out_channels,kernel_size=self.conv1.conv.kernel_size,stride=self.conv1.conv.stride,padding=self.conv1.conv.padding,dilation=self.conv1.conv.dilation,groups=self.conv1.conv.groups,bias=True).requires_grad_(False)self.conv.weight.data=kernel self.conv.bias.data=biasforparainself.parameters():para.detach_()self.__delattr__('conv1')self.__delattr__('conv2')ifhasattr(self,'nm'):self.__delattr__('nm')ifhasattr(self,'bn'):self.__delattr__('bn')ifhasattr(self,'id_tensor'):self.__delattr__('id_tensor')classRepNBottleneck(nn.Module):# Standard bottleneckdef__init__(self,c1,c2,shortcut=True,g=1,k=(3,3),e=0.5):# ch_in,ch_out,shortcut,kernels,groups,expandsuper().__init__()c_=int(c2*e)# hidden channels self.cv1=RepConvN(c1,c_,k[0],1)self.cv2=Conv(c_,c2,k[1],1,g=g)self.add=shortcutandc1==c2defforward(self,x):returnx+self.cv2(self.cv1(x))ifself.addelseself.cv2(self.cv1(x))classRepNCSP(nn.Module):# CSP Bottleneck with 3 convolutionsdef__init__(self,c1,c2,n=1,shortcut=True,g=1,e=0.5):# ch_in,ch_out,number,shortcut,groups,expansionsuper().__init__()c_=int(c2*e)# hidden channels self.cv1=Conv(c1,c_,1,1)self.cv2=Conv(c1,c_,1,1)self.cv3=Conv(2*c_,c2,1)#optionalact=FReLU(c2)self.m=nn.Sequential(*(RepNBottleneck(c_,c_,shortcut,g,e=1.0)for_inrange(n)))defforward(self,x):returnself.cv3(torch.cat((self.m(self.cv1(x)),self.cv2(x)),1))classCAA(nn.Module):def__init__(self,ch,h_kernel_size=11,v_kernel_size=11)->None:super().__init__()self.avg_pool=nn.AvgPool2d(7,1,3)self.conv1=Conv(ch,ch)self.h_conv=nn.Conv2d(ch,ch,(1,h_kernel_size),1,(0,h_kernel_size// 2), 1, ch)self.v_conv=nn.Conv2d(ch,ch,(v_kernel_size,1),1,(v_kernel_size// 2, 0), 1, ch)self.conv2=Conv(ch,ch)self.act=nn.Sigmoid()defforward(self,x):attn_factor=self.act(self.conv2(self.v_conv(self.h_conv(self.conv1(self.avg_pool(x))))))returnattn_factor*xclassRepNCSPELAN4_CAA(nn.Module):# csp-elandef__init__(self,c1,c2,c3,c4,c5=1):# ch_in,ch_out,number,shortcut,groups,expansionsuper().__init__()self.c=c3//2self.cv1=Conv(c1,c3,1,1)self.cv2=nn.Sequential(RepNCSP(c3//2, c4, c5), Conv(c4, c4, 3, 1))self.cv3=nn.Sequential(RepNCSP(c4,c4,c5),Conv(c4,c4,3,1))self.cv4=Conv(c3+(2*c4),c2,1,1)self.caa=CAA(c3+(2*c4))defforward(self,x):y=list(self.cv1(x).chunk(2,1))y.extend((m(y[-1]))formin[self.cv2,self.cv3])returnself.cv4(self.caa(torch.cat(y,1)))defforward_split(self,x):y=list(self.cv1(x).split((self.c,self.c),1))y.extend(m(y[-1])formin[self.cv2,self.cv3])returnself.cv4(self.caa(torch.cat(y,1)))

我用夸克网盘分享了「ultralytics-RepNCSPELAN_CAA.zip」,点击链接即可保存。打开「夸克APP」,无需下载在线播放视频,畅享原画5倍速,支持电视投屏。
链接:https://pan.quark.cn/s/d56fef471eae

版权声明: 本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:809451989@qq.com进行投诉反馈,一经查实,立即删除!
网站建设 2026/5/1 3:49:07

知识竞赛抢答器PLC设计

第三章 硬件电路设计 3.1控制特点分析 知识竞赛抢答器通过PLC进行按控制要求编程,其主要的输入就是通过裁判员和参赛选手的按钮,然后将信号传递给信息分析中心(PLC),PLC将根据信号作出相应的响应。知识竞赛抢答器有六个…

作者头像 李华
网站建设 2026/5/1 4:46:15

教育行业网页中,实现文件上传下载有哪些高效方案?

《一个.NET码农的奇幻漂流——20G文件上传历险记》 甲方需求说明书(魔幻现实主义版) “兄弟,我们要做个能在IE8上传20G文件的功能,要支持文件夹结构保留、断点续传、加密传输,预算是100块哈!对了&#xf…

作者头像 李华
网站建设 2026/5/1 3:51:54

低功耗蓝牙技术入门指南(4)-- 等时适配层(The Isochronous Adaptation Layer)

9. 等时适配层 9.1 基础概念 等时适配层(ISOAL)的主要作用是解决一个潜在问题 —— 该问题可能影响涉及音频设备的连接式等时通信与广播式等时通信。该层也可应用于等时通信的其他场景。 9.1.1 音频采样基础 数字音频的工作原理是:对模拟音频信号进行采样,再通过编解码…

作者头像 李华
网站建设 2026/5/1 8:25:37

d3dx9_42.dll文件缺少打不开程序 彻底解决办法 免费下载方法分享

在使用电脑系统时经常会出现丢失找不到某些文件的情况,由于很多常用软件都是采用 Microsoft Visual Studio 编写的,所以这类软件的运行需要依赖微软Visual C运行库,比如像 QQ、迅雷、Adobe 软件等等,如果没有安装VC运行库或者安装…

作者头像 李华