YOLOv5之Common.py

文章目录

    • 1.学习目的
    • 2.网络模型![在这里插入图片描述](https://i-blog.csdnimg.cn/direct/67b8dbd00c9b4034ba370fc8b8a6031a.jpeg)
    • 3.common.py分析

1.学习目的

YOLOv5中最关键一个模型类

2.网络模型在这里插入图片描述

在这里插入图片描述

3.common.py分析

# Ultralytics YOLOv5 🚀, AGPL-3.0 license
"""Common modules."""import ast
import contextlib
import json
import math
import platform
import warnings
import zipfile
from collections import OrderedDict, namedtuple
from copy import copy
from pathlib import Path
from urllib.parse import urlparseimport cv2
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp# Import 'ultralytics' package or install if missing
try:import ultralyticsassert hasattr(ultralytics, "__version__")  # verify package is not directory
except (ImportError, AssertionError):import osos.system("pip install -U ultralytics")import ultralyticsfrom ultralytics.utils.plotting import Annotator, colors, save_one_boxfrom utils import TryExcept
from utils.dataloaders import exif_transpose, letterbox
from utils.general import (LOGGER,ROOT,Profile,check_requirements,check_suffix,check_version,colorstr,increment_path,is_jupyter,make_divisible,non_max_suppression,scale_boxes,xywh2xyxy,xyxy2xywh,yaml_load,
)
from utils.torch_utils import copy_attr, smart_inference_mode# 实现合适的p,使得输出形状和输入一致
def autopad(k, p=None, d=1):"""Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.`k`: kernel, `p`: padding, `d`: dilation."""if d > 1:# 卷积核有可能是标量,也可能是列表,当d>1的时候就会调整卷积核尺寸k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-sizeif p is None:# 如果p没有赋值,那么就将p赋值为他的一半并取整p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-padreturn p# 卷积类,继承自父类nn.Module
class Conv(nn.Module):# 卷积 归一化 激活函数"""Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""# 这里默认激活函数为SiLU函数default_act = nn.SiLU()  # default activation# 初始化操作def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):"""Initializes a standard convolution layer with optional batch normalization and activation."""super().__init__()self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)self.bn = nn.BatchNorm2d(c2)# 激活函数的配置,默认是SiLu函数,但是如果是别的也行,再就是保持不变,给的啥就是啥self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()# 前向传播函数def forward(self, x):"""Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""# 卷积--批量归一化--激活return self.act(self.bn(self.conv(x)))# 融合卷积,不经过BN层直接激活def forward_fuse(self, x):"""Applies a fused convolution and activation function to the input tensor `x`."""return self.act(self.conv(x))# DW卷积 一个纵深的卷积层
class DWConv(Conv):"""Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""def __init__(self, c1, c2, k=1, s=1, d=1, act=True):"""Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), outputchannels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act)."""super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)# DW卷积实现上采样
class DWConvTranspose2d(nn.ConvTranspose2d):"""A depth-wise transpose convolutional layer for upsampling in neural networks, particularly in YOLOv5 models."""def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):"""Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels(c2), kernel size (k), stride (s), input padding (p1), output padding (p2)."""super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))# 带有多头注意力机制的Transformer层
class TransformerLayer(nn.Module):"""Transformer layer with multihead attention and linear layers, optimized by removing LayerNorm."""def __init__(self, c, num_heads):"""Initializes a transformer layer, sans LayerNorm for performance, with multihead attention and linear layers.See  as described in https://arxiv.org/abs/2010.11929."""super().__init__()# 配置q,k,v,ma,fc1,fc2初始值 注意赋两个值self.q = nn.Linear(c, c, bias=False)self.k = nn.Linear(c, c, bias=False)self.v = nn.Linear(c, c, bias=False)self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)self.fc1 = nn.Linear(c, c, bias=False)self.fc2 = nn.Linear(c, c, bias=False)# 前向传播函数def forward(self, x):"""Performs forward pass using MultiheadAttention and two linear transformations with residual connections."""# 多头注意力模块配置,由三个线性层构成x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x# 经过多头注意力模块后,在经过两层线性层x = self.fc2(self.fc1(x)) + xreturn x# 转换模块,肩负视觉任务,位置嵌入以及转换层
class TransformerBlock(nn.Module):"""A Transformer block for vision tasks with convolution, position embeddings, and Transformer layers."""def __init__(self, c1, c2, num_heads, num_layers):"""Initializes a Transformer block for vision tasks, adapting dimensions if necessary and stacking specifiedlayers."""super().__init__()self.conv = Noneif c1 != c2:self.conv = Conv(c1, c2)#  可以学习的位置嵌入self.linear = nn.Linear(c2, c2)  # learnable position embedding# 序列化操作-===将多个层连接起来self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))self.c2 = c2# 前向传播 现将每个通道展平与原始通道进行叠加,然后改变通道维度def forward(self, x):"""Processes input through an optional convolution, followed by Transformer layers and position embeddings forobject detection."""if self.conv is not None:x = self.conv(x)b, _, w, h = x.shape# 从第三个维度开始展平  轻量化的操作p = x.flatten(2).permute(2, 0, 1)# 将展平后张量送到神经网络中计算,得到的结果再次转换维度并reshapereturn self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)# Bottleneck类 瓶颈类 核心类
class Bottleneck(nn.Module):# 特色:随机裁剪+分组卷积 == 特征提取"""A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""# 初始化函数def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):# 初始化一个标准的颈部层,带有随机裁剪以及分组卷积,支持通道拓展"""Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channelexpansion."""super().__init__()# 定义隐藏层通道数 将c2通道数减半c_ = int(c2 * e)  # hidden channels# 定义两个卷积self.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_, c2, 3, 1, g=g)# 叠加标志,当c1与c2相同时就为Trueself.add = shortcut and c1 == c2# 前向传播函数def forward(self, x):"""Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is atensor."""# 如果叠加标志为True,则将输入域两次卷积后的输出进行叠加,否则只需要两次卷积后的输出return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))# 颈部CSP网络 ==== 特征提取带有交叉空间连接以及随机裁剪
class BottleneckCSP(nn.Module):"""CSP bottleneck layer for feature extraction with cross-stage partial connections and optional shortcuts."""# 初始化函数def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):# 采用随机裁剪初始化CSP瓶颈网络,参数:输入通道数,输出通道数,模块重复个数,剪切标志,分组,拓展标志"""Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool,groups, expansion."""super().__init__()# 获取隐藏层输入通道数c_ = int(c2 * e)  # hidden channels# 卷积1self.cv1 = Conv(c1, c_, 1, 1)# 卷积2self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)# 卷积3self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)# 卷积4self.cv4 = Conv(2 * c_, c2, 1, 1)# 归一化层self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)# 激活函数层self.act = nn.SiLU()# 序列化self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))# 前向传播函数 通过运用这些网络层,激活函数,专注于输入x,返回特征增强的输出def forward(self, x):"""Performs forward pass by applying layers, activation, and concatenation on input x, returning feature-enhanced output."""# 先将x进行1*1卷积,然后送入搭建好的神经网络,最后再进行一次卷积,得到通道数减半的输出层y1 = self.cv3(self.m(self.cv1(x)))# 直接将输入进行卷积,得到通道数减半的卷积y2 = self.cv2(x)# 将y1,y2在第二个维度上进行连接,得到通道数叠加的特征图,然后进行归一化处理,最后用SiLu函数激活,再进行最后一次卷积,恢复通道数为c2return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))# 交叉卷积类 下采样,拓展,随机裁剪
class CrossConv(nn.Module):"""Implements a cross convolution layer with downsampling, expansion, and optional shortcut."""# 初始化函数def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):"""Initializes CrossConv with downsampling, expanding, and optionally shortcutting; `c1` input, `c2` outputchannels.Inputs are ch_in, ch_out, kernel, stride, groups, expansion, shortcut."""# 调用父类进行初始化super().__init__()# 获取隐藏层的输入通道数,由c2减半而来c_ = int(c2 * e)  # hidden channels# 卷积1,卷积核大小改变,默认(1,3),步长(1,1)self.cv1 = Conv(c1, c_, (1, k), (1, s))# 卷积2,默认卷积核大小为(3,1),步长(1,1)self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)# 裁剪标志位 只有调用时c1=c2,才会使裁剪标志位值置1self.add = shortcut and c1 == c2def forward(self, x):"""Performs feature sampling, expanding, and applies shortcut if channels match; expects `x` input tensor."""# 根据标志位不同进行不同的操作return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))# C3模块类
class C3(nn.Module):# 用3个卷积层搭建一个CSP瓶颈网络来增强特征提取"""Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""# 初始化函数def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, groupconvolutions, and expansion."""super().__init__()c_ = int(c2 * e)  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c1, c_, 1, 1)self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))def forward(self, x):"""Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))# C3x模块类 继承了带有交叉卷积的C3模块
class C3x(C3):"""Extends the C3 module with cross-convolutions for enhanced feature extraction in neural networks."""def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3x module with cross-convolutions, extending C3 with customizable channel dimensions, groups,and expansion."""# 调用父类方法进行子类参数初始化super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))#C3TR模块类
class C3TR(C3):"""C3 module with TransformerBlock for enhanced feature extraction in object detection models."""# 初始化函数 == 用Transformer模块来初始化C3模块def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3 module with TransformerBlock for enhanced feature extraction, accepts channel sizes, shortcutconfig, group, and expansion."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)self.m = TransformerBlock(c_, c_, 4, n)# C3SPP模块===继承C3模块,重写了一个SPP层,以此来进行增强空间特征提取以及通道定制(可控)
class C3SPP(C3):"""Extends the C3 module with an SPP layer for enhanced spatial feature extraction and customizable channels."""def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):"""Initializes a C3 module with SPP layer for advanced spatial feature extraction, given channel sizes, kernelsizes, shortcut, group, and expansion ratio."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)# 调用下方SPP模块self.m = SPP(c_, c_, k)# C3Ghost模块类 == 实现了一个配置有Ghost瓶颈网络的C3模块===增强特征提取
class C3Ghost(C3):"""Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in YOLOv5."""def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes YOLOv5's C3 module with Ghost Bottlenecks for efficient feature extraction."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)  # hidden channelsself.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))# SPP模块类
class SPP(nn.Module):"""Implements Spatial Pyramid Pooling (SPP) for feature extraction, ref: https://arxiv.org/abs/1406.4729."""# 初始化函数,其中k是一个三个元素构成的元组,后期用来进行三次最大池化操作def __init__(self, c1, c2, k=(5, 9, 13)):"""Initializes SPP layer with Spatial Pyramid Pooling, ref: https://arxiv.org/abs/1406.4729, args: c1 (input channels), c2 (output channels), k (kernel sizes)."""super().__init__()c_ = c1 // 2  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])def forward(self, x):"""Applies convolution and max pooling layers to the input tensor `x`, concatenates results, and returns outputtensor."""x = self.cv1(x)with warnings.catch_warnings():warnings.simplefilter("ignore")  # suppress torch 1.9.0 max_pool2d() warningreturn self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))# SPPF层===快速空间金字塔池化层
class SPPF(nn.Module):"""Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv5 models."""# 初始化SPPF层用给定的通道和卷积核def __init__(self, c1, c2, k=5):"""Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution andmax pooling.Equivalent to SPP(k=(5, 9, 13))."""super().__init__()c_ = c1 // 2  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_ * 4, c2, 1, 1)# 最大池化self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)# 前向传播函数===通过一系列卷积和最大池化操作进行特征提取def forward(self, x):"""Processes input through a series of convolutions and max pooling operations for feature extraction."""# 对输入进行卷积操作x = self.cv1(x)# 捕获异常信息with warnings.catch_warnings():# 抑制一些异常信息warnings.simplefilter("ignore")  # suppress torch 1.9.0 max_pool2d() warning# 对输入进行最大池化操作y1 = self.m(x)# 对上面结果再次池化y2 = self.m(y1)# 对x,y1,y2,再次池化的y2进行通道维度连接,然后再次进行池化return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))# Focus类 使用切片与卷积操作来专注于空间信息转化为通道空间
class Focus(nn.Module):"""Focuses spatial information into channel space using slicing and convolution for efficient feature extraction."""# 初始化函数 初始化Focus模块从专注于宽高信息到通道空间def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):"""Initializes Focus module to concentrate width-height info into channel space with configurable convolutionparameters."""super().__init__()# 卷积操作self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)# self.contract = Contract(gain=2)#前向传播函数def forward(self, x):"""Processes input through Focus mechanism, reshaping (b,c,w,h) to (b,4c,w/2,h/2) then applies convolution."""# todo 在通道上面进行切片操作,最终导致宽度与高度减半 ==== 改进点return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))# return self.conv(self.contract(x))# Ghost卷积
class GhostConv(nn.Module):"""Implements Ghost Convolution for efficient feature extraction, see https://github.com/huawei-noah/ghostnet."""#初始化函数def __init__(self, c1, c2, k=1, s=1, g=1, act=True):"""Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channelsfor efficiency."""super().__init__()c_ = c2 // 2  # hidden channelsself.cv1 = Conv(c1, c_, k, s, None, g, act=act)self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)def forward(self, x):"""Performs forward pass, concatenating outputs of two convolutions on input `x`: shape (B,C,H,W)."""y = self.cv1(x)# 将输出和输出的卷积进行连接return torch.cat((y, self.cv2(y)), 1)#GhostBottleneck类
class GhostBottleneck(nn.Module):"""Efficient bottleneck layer using Ghost Convolutions, see https://github.com/huawei-noah/ghostnet."""def __init__(self, c1, c2, k=3, s=1):"""Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see https://github.com/huawei-noah/ghostnet."""super().__init__()c_ = c2 // 2# 初始化一个由多种卷积组成的卷积self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pwDWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dwGhostConv(c_, c2, 1, 1, act=False),)  # pw-linear# 初始化一个由多个卷积组成的网络序列构成剪切属性self.shortcut = (nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity())def forward(self, x):"""Processes input through conv and shortcut layers, returning their summed output."""# 构成该网络的模型return self.conv(x) + self.shortcut(x)# Contract模块类 === 降维类
class Contract(nn.Module):"""Contracts spatial dimensions into channel dimensions for efficient processing in neural networks."""def __init__(self, gain=2):"""Initializes a layer to contract spatial dimensions (width-height) into channels, e.g., input shape(1,64,80,80) to (1,256,40,40)."""super().__init__()# 增益初始化self.gain = gain# 前向传播函数def forward(self, x):"""Processes input tensor to expand channel dimensions by contracting spatial dimensions, yielding output shape`(b, c*s*s, h//s, w//s)`."""# 获取张量尺寸b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'# 将gain->strides = self.gain# 进行reshape操作x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)#再次将张量进行维度调换,并且将内存进行连续操作x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)# 返回维度被转换的张量return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)# 拓展类==拓展空间维度通过重新分配通道数---reshape操作
class Expand(nn.Module):"""Expands spatial dimensions by redistributing channels, e.g., from (1,64,80,80) to (1,16,160,160)."""# 初始化函数def __init__(self, gain=2):"""Initializes the Expand module to increase spatial dimensions by redistributing channels, with an optional gainfactor.Example: x(1,64,80,80) to x(1,16,160,160)."""super().__init__()self.gain = gaindef forward(self, x):"""Processes input tensor x to expand spatial dimensions by redistributing channels, requiring C / gain^2 ==0."""b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain's = self.gainx = x.view(b, s, s, c // s**2, h, w)  # x(1,2,2,16,80,80)x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)return x.view(b, c // s**2, h * s, w * s)  # x(1,16,160,160)# Concat类  指定通道维度进行连接
class Concat(nn.Module):"""Concatenates tensors along a specified dimension for efficient tensor manipulation in neural networks."""def __init__(self, dimension=1):"""Initializes a Concat module to concatenate tensors along a specified dimension."""super().__init__()# 维度初始化self.d = dimensiondef forward(self, x):"""Concatenates a list of tensors along a specified dimension; `x` is a list of tensors, `dimension` is anint."""# 张量在指定维度上进行连接return torch.cat(x, self.d)# 检测多个后端模型类
class DetectMultiBackend(nn.Module):"""YOLOv5 MultiBackend class for inference on various backends including PyTorch, ONNX, TensorRT, and more."""def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):"""Initializes DetectMultiBackend with support for various inference backends, including PyTorch and ONNX."""#   PyTorch:              weights = *.pt#   TorchScript:                    *.torchscript#   ONNX Runtime:                   *.onnx#   ONNX OpenCV DNN:                *.onnx --dnn#   OpenVINO:                       *_openvino_model#   CoreML:                         *.mlpackage#   TensorRT:                       *.engine#   TensorFlow SavedModel:          *_saved_model#   TensorFlow GraphDef:            *.pb#   TensorFlow Lite:                *.tflite#   TensorFlow Edge TPU:            *_edgetpu.tflite#   PaddlePaddle:                   *_paddle_modelfrom models.experimental import attempt_download, attempt_load  # scoped to avoid circular importsuper().__init__()# 获取字符串类型的权重文件名w = str(weights[0] if isinstance(weights, list) else weights)pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)fp16 &= pt or jit or onnx or engine or triton  # FP16nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)stride = 32  # default stride# 判断cuda是否可用cuda = torch.cuda.is_available() and device.type != "cpu"  # use CUDAif not (pt or triton):w = attempt_download(w)  # download if not local#如果选择的pytorch模型则进入下面的程序if pt:  # PyTorchmodel = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)# 获取模型步长最大值stride = max(int(model.stride.max()), 32)  # model stride# 获取模型名字names = model.module.names if hasattr(model, "module") else model.names  # get class names# 将模型数据类型降级model.half() if fp16 else model.float()self.model = model  # explicitly assign for to(), cpu(), cuda(), half()elif jit:  # TorchScriptLOGGER.info(f"Loading {w} for TorchScript inference...")extra_files = {"config.txt": ""}  # model metadatamodel = torch.jit.load(w, _extra_files=extra_files, map_location=device)model.half() if fp16 else model.float()if extra_files["config.txt"]:  # load metadata dictd = json.loads(extra_files["config.txt"],object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()},)stride, names = int(d["stride"]), d["names"]elif dnn:  # ONNX OpenCV DNNLOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")check_requirements("opencv-python>=4.5.4")net = cv2.dnn.readNetFromONNX(w)elif onnx:  # ONNX RuntimeLOGGER.info(f"Loading {w} for ONNX Runtime inference...")check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))import onnxruntimeproviders = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]session = onnxruntime.InferenceSession(w, providers=providers)output_names = [x.name for x in session.get_outputs()]meta = session.get_modelmeta().custom_metadata_map  # metadataif "stride" in meta:stride, names = int(meta["stride"]), eval(meta["names"])elif xml:  # OpenVINOLOGGER.info(f"Loading {w} for OpenVINO inference...")check_requirements("openvino>=2023.0")  # requires openvino-dev: https://pypi.org/project/openvino-dev/from openvino.runtime import Core, Layout, get_batchcore = Core()if not Path(w).is_file():  # if not *.xmlw = next(Path(w).glob("*.xml"))  # get *.xml file from *_openvino_model dirov_model = core.read_model(model=w, weights=Path(w).with_suffix(".bin"))if ov_model.get_parameters()[0].get_layout().empty:ov_model.get_parameters()[0].set_layout(Layout("NCHW"))batch_dim = get_batch(ov_model)if batch_dim.is_static:batch_size = batch_dim.get_length()ov_compiled_model = core.compile_model(ov_model, device_name="AUTO")  # AUTO selects best available devicestride, names = self._load_metadata(Path(w).with_suffix(".yaml"))  # load metadataelif engine:  # TensorRTLOGGER.info(f"Loading {w} for TensorRT inference...")import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-downloadcheck_version(trt.__version__, "7.0.0", hard=True)  # require tensorrt>=7.0.0if device.type == "cpu":device = torch.device("cuda:0")Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))logger = trt.Logger(trt.Logger.INFO)with open(w, "rb") as f, trt.Runtime(logger) as runtime:model = runtime.deserialize_cuda_engine(f.read())context = model.create_execution_context()bindings = OrderedDict()output_names = []fp16 = False  # default updated belowdynamic = Falseis_trt10 = not hasattr(model, "num_bindings")num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)for i in num:if is_trt10:name = model.get_tensor_name(i)dtype = trt.nptype(model.get_tensor_dtype(name))is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUTif is_input:if -1 in tuple(model.get_tensor_shape(name)):  # dynamicdynamic = Truecontext.set_input_shape(name, tuple(model.get_profile_shape(name, 0)[2]))if dtype == np.float16:fp16 = Trueelse:  # outputoutput_names.append(name)shape = tuple(context.get_tensor_shape(name))else:name = model.get_binding_name(i)dtype = trt.nptype(model.get_binding_dtype(i))if model.binding_is_input(i):if -1 in tuple(model.get_binding_shape(i)):  # dynamicdynamic = Truecontext.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))if dtype == np.float16:fp16 = Trueelse:  # outputoutput_names.append(name)shape = tuple(context.get_binding_shape(i))im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())batch_size = bindings["images"].shape[0]  # if dynamic, this is instead max batch sizeelif coreml:  # CoreMLLOGGER.info(f"Loading {w} for CoreML inference...")import coremltools as ctmodel = ct.models.MLModel(w)elif saved_model:  # TF SavedModelLOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")import tensorflow as tfkeras = False  # assume TF1 saved_modelmodel = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxtLOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")import tensorflow as tfdef wrap_frozen_graph(gd, inputs, outputs):"""Wraps a TensorFlow GraphDef for inference, returning a pruned function."""x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrappedge = x.graph.as_graph_elementreturn x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))def gd_outputs(gd):"""Generates a sorted list of graph outputs excluding NoOp nodes and inputs, formatted as '<name>:0'."""name_list, input_list = [], []for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDefname_list.append(node.name)input_list.extend(node.input)return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))gd = tf.Graph().as_graph_def()  # TF GraphDefwith open(w, "rb") as f:gd.ParseFromString(f.read())frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_pythontry:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpufrom tflite_runtime.interpreter import Interpreter, load_delegateexcept ImportError:import tensorflow as tfInterpreter, load_delegate = (tf.lite.Interpreter,tf.lite.experimental.load_delegate,)if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtimeLOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[platform.system()]interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])else:  # TFLiteLOGGER.info(f"Loading {w} for TensorFlow Lite inference...")interpreter = Interpreter(model_path=w)  # load TFLite modelinterpreter.allocate_tensors()  # allocateinput_details = interpreter.get_input_details()  # inputsoutput_details = interpreter.get_output_details()  # outputs# load metadatawith contextlib.suppress(zipfile.BadZipFile):with zipfile.ZipFile(w, "r") as model:meta_file = model.namelist()[0]meta = ast.literal_eval(model.read(meta_file).decode("utf-8"))stride, names = int(meta["stride"]), meta["names"]elif tfjs:  # TF.jsraise NotImplementedError("ERROR: YOLOv5 TF.js inference is not supported")elif paddle:  # PaddlePaddleLOGGER.info(f"Loading {w} for PaddlePaddle inference...")check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle")import paddle.inference as pdiif not Path(w).is_file():  # if not *.pdmodelw = next(Path(w).rglob("*.pdmodel"))  # get *.pdmodel file from *_paddle_model dirweights = Path(w).with_suffix(".pdiparams")config = pdi.Config(str(w), str(weights))if cuda:config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)predictor = pdi.create_predictor(config)input_handle = predictor.get_input_handle(predictor.get_input_names()[0])output_names = predictor.get_output_names()elif triton:  # NVIDIA Triton Inference ServerLOGGER.info(f"Using {w} as Triton Inference Server...")check_requirements("tritonclient[all]")from utils.triton import TritonRemoteModelmodel = TritonRemoteModel(url=w)nhwc = model.runtime.startswith("tensorflow")else:raise NotImplementedError(f"ERROR: {w} is not a supported format")# class namesif "names" not in locals():names = yaml_load(data)["names"] if data else {i: f"class{i}" for i in range(999)}if names[0] == "n01440764" and len(names) == 1000:  # ImageNetnames = yaml_load(ROOT / "data/ImageNet.yaml")["names"]  # human-readable namesself.__dict__.update(locals())  # assign all variables to self# 定义前向传播函数def forward(self, im, augment=False, visualize=False):"""Performs YOLOv5 inference on input images with options for augmentation and visualization."""b, ch, h, w = im.shape  # batch, channel, height, widthif self.fp16 and im.dtype != torch.float16:im = im.half()  # to FP16if self.nhwc:im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)# 我们用这种模型即可if self.pt:  # PyTorchy = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)elif self.jit:  # TorchScripty = self.model(im)elif self.dnn:  # ONNX OpenCV DNNim = im.cpu().numpy()  # torch to numpyself.net.setInput(im)y = self.net.forward()elif self.onnx:  # ONNX Runtimeim = im.cpu().numpy()  # torch to numpyy = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})elif self.xml:  # OpenVINOim = im.cpu().numpy()  # FP32y = list(self.ov_compiled_model(im).values())elif self.engine:  # TensorRTif self.dynamic and im.shape != self.bindings["images"].shape:i = self.model.get_binding_index("images")self.context.set_binding_shape(i, im.shape)  # reshape if dynamicself.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)for name in self.output_names:i = self.model.get_binding_index(name)self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))s = self.bindings["images"].shapeassert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"self.binding_addrs["images"] = int(im.data_ptr())self.context.execute_v2(list(self.binding_addrs.values()))y = [self.bindings[x].data for x in sorted(self.output_names)]elif self.coreml:  # CoreMLim = im.cpu().numpy()im = Image.fromarray((im[0] * 255).astype("uint8"))# im = im.resize((192, 320), Image.BILINEAR)y = self.model.predict({"image": im})  # coordinates are xywh normalizedif "confidence" in y:box = xywh2xyxy(y["coordinates"] * [[w, h, w, h]])  # xyxy pixelsconf, cls = y["confidence"].max(1), y["confidence"].argmax(1).astype(np.float)y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)else:y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)elif self.paddle:  # PaddlePaddleim = im.cpu().numpy().astype(np.float32)self.input_handle.copy_from_cpu(im)self.predictor.run()y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]elif self.triton:  # NVIDIA Triton Inference Servery = self.model(im)else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)im = im.cpu().numpy()if self.saved_model:  # SavedModely = self.model(im, training=False) if self.keras else self.model(im)elif self.pb:  # GraphDefy = self.frozen_func(x=self.tf.constant(im))else:  # Lite or Edge TPUinput = self.input_details[0]int8 = input["dtype"] == np.uint8  # is TFLite quantized uint8 modelif int8:scale, zero_point = input["quantization"]im = (im / scale + zero_point).astype(np.uint8)  # de-scaleself.interpreter.set_tensor(input["index"], im)self.interpreter.invoke()y = []for output in self.output_details:x = self.interpreter.get_tensor(output["index"])if int8:scale, zero_point = output["quantization"]x = (x.astype(np.float32) - zero_point) * scale  # re-scaley.append(x)y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixelsif isinstance(y, (list, tuple)):return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]else:return self.from_numpy(y)# 张量转numpy函数def from_numpy(self, x):"""Converts a NumPy array to a torch tensor, maintaining device compatibility."""return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else xdef warmup(self, imgsz=(1, 3, 640, 640)):"""Performs a single inference warmup to initialize model weights, accepting an `imgsz` tuple for image size."""warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.tritonif any(warmup_types) and (self.device.type != "cpu" or self.triton):im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # inputfor _ in range(2 if self.jit else 1):  #self.forward(im)  # warmup# 模型类型类 有预训练权重以及后期正式训练的一些权重@staticmethoddef _model_type(p="path/to/model.pt"):"""Determines model type from file path or URL, supporting various export formats.Example: path='path/to/model.onnx' -> type=onnx"""# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]from export import export_formatsfrom utils.downloads import is_url# 导出格式,取前缀sf = list(export_formats().Suffix)  # export suffixesif not is_url(p, check=False):check_suffix(p, sf)  # checksurl = urlparse(p)  # if url may be Triton inference servertypes = [s in Path(p).name for s in sf]types[8] &= not types[9]  # tflite &= not edgetputriton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])return types + [triton]# 导入元数据@staticmethoddef _load_metadata(f=Path("path/to/meta.yaml")):"""Loads metadata from a YAML file, returning strides and names if the file exists, otherwise `None`."""if f.exists():d = yaml_load(f)return d["stride"], d["names"]  # assign stride, namesreturn None, None# 数据预处理类
class AutoShape(nn.Module):"""AutoShape class for robust YOLOv5 inference with preprocessing, NMS, and support for various input formats."""conf = 0.25  # NMS confidence thresholdiou = 0.45  # NMS IoU thresholdagnostic = False  # NMS class-agnosticmulti_label = False  # NMS multiple labels per boxclasses = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogsmax_det = 1000  # maximum number of detections per imageamp = False  # Automatic Mixed Precision (AMP) inference# 初始化操作def __init__(self, model, verbose=True):"""Initializes YOLOv5 model for inference, setting up attributes and preparing model for evaluation."""super().__init__()if verbose:LOGGER.info("Adding AutoShape... ")copy_attr(self, model, include=("yaml", "nc", "hyp", "names", "stride", "abc"), exclude=())  # copy attributesself.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instanceself.pt = not self.dmb or model.pt  # PyTorch modelself.model = model.eval()if self.pt:m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()m.inplace = False  # Detect.inplace=False for safe multithread inferencem.export = True  # do not output loss values# 运用函数def _apply(self, fn):"""Applies to(), cpu(), cuda(), half() etc.to model tensors excluding parameters or registered buffers."""self = super()._apply(fn)if self.pt:m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()m.stride = fn(m.stride)m.grid = list(map(fn, m.grid))if isinstance(m.anchor_grid, list):m.anchor_grid = list(map(fn, m.anchor_grid))return self@smart_inference_mode()def forward(self, ims, size=640, augment=False, profile=False):"""Performs inference on inputs with optional augment & profiling.Supports various formats including file, URI, OpenCV, PIL, numpy, torch."""# For size(height=640, width=1280), RGB images example inputs are:#   file:        ims = 'data/images/zidane.jpg'  # str or PosixPath#   URI:             = 'https://ultralytics.com/images/zidane.jpg'#   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)#   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)#   numpy:           = np.zeros((640,1280,3))  # HWC#   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)#   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of imagesdt = (Profile(), Profile(), Profile())with dt[0]:# 尺寸拓展if isinstance(size, int):  # expandsize = (size, size)p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # paramautocast = self.amp and (p.device.type != "cpu")  # Automatic Mixed Precision (AMP) inferenceif isinstance(ims, torch.Tensor):  # torchwith amp.autocast(autocast):return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference# Pre-process# 数据预处理===获取数量以及图片集合n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of imagesshape0, shape1, files = [], [], []  # image and inference shapes, filenamesfor i, im in enumerate(ims):f = f"image{i}"  # filenameif isinstance(im, (str, Path)):  # filename or uriim, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im), imim = np.asarray(exif_transpose(im))elif isinstance(im, Image.Image):  # PIL Imageim, f = np.asarray(exif_transpose(im)), getattr(im, "filename", f) or ffiles.append(Path(f).with_suffix(".jpg").name)if im.shape[0] < 5:  # image in CHWim = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)#  取三个通道图像数据,如果是三维图像,否则将灰度图像处理为BGR格式im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch inputs = im.shape[:2]  # HWC# 将图像形状数据放到shape0中shape0.append(s)  # image shape# 取最大尺寸与图像尺寸的比值g = max(size) / max(s)  # gain# shap1列表存放预处理后的图像形状shape1.append([int(y * g) for y in s])# 获取图像信息ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # updateshape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)]  # inf shapex = [letterbox(im, shape1, auto=False)[0] for im in ims]  # pad# 维度转换x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHWx = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32with amp.autocast(autocast):# Inferencewith dt[1]:y = self.model(x, augment=augment)  # forward# Post-processwith dt[2]:y = non_max_suppression(y if self.dmb else y[0],self.conf,self.iou,self.classes,self.agnostic,self.multi_label,max_det=self.max_det,)  # NMSfor i in range(n):scale_boxes(shape1, y[i][:, :4], shape0[i])# 初始化一些图像信息return Detections(ims, y, files, dt, self.names, x.shape)class Detections:"""Manages YOLOv5 detection results with methods for visualization, saving, cropping, and exporting detections."""def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):"""Initializes the YOLOv5 Detections class with image info, predictions, filenames, timing and normalization."""super().__init__()# 获取设备信息d = pred[0].device  # devicegn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizationsself.ims = ims  # list of images as numpy arrays# 预测框信息self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)self.names = names  # class namesself.files = files  # image filenamesself.times = times  # profiling timesself.xyxy = pred  # xyxy pixelsself.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixelsself.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalizedself.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalizedself.n = len(self.pred)  # number of images (batch size)self.t = tuple(x.t / self.n * 1e3 for x in times)  # timestamps (ms)self.s = tuple(shape)  # inference BCHW shapedef _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path("")):"""Executes model predictions, displaying and/or saving outputs with optional crops and labels."""s, crops = "", []for i, (im, pred) in enumerate(zip(self.ims, self.pred)):s += f"\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} "  # stringif pred.shape[0]:for c in pred[:, -1].unique():n = (pred[:, -1] == c).sum()  # detections per classs += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to strings = s.rstrip(", ")if show or save or render or crop:annotator = Annotator(im, example=str(self.names))for *box, conf, cls in reversed(pred):  # xyxy, confidence, classlabel = f"{self.names[int(cls)]} {conf:.2f}"if crop:file = save_dir / "crops" / self.names[int(cls)] / self.files[i] if save else Nonecrops.append({"box": box,"conf": conf,"cls": cls,"label": label,"im": save_one_box(box, im, file=file, save=save),})else:  # all others# 盒子标签annotator.box_label(box, label if labels else "", color=colors(cls))im = annotator.imelse:s += "(no detections)"im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from npif show:if is_jupyter():from IPython.display import displaydisplay(im)else:im.show(self.files[i])if save:f = self.files[i]im.save(save_dir / f)  # saveif i == self.n - 1:LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")if render:self.ims[i] = np.asarray(im)if pprint:s = s.lstrip("\n")return f"{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}" % self.tif crop:if save:LOGGER.info(f"Saved results to {save_dir}\n")return crops@TryExcept("Showing images is not supported in this environment")def show(self, labels=True):"""Displays detection results with optional labels.Usage: show(labels=True)"""self._run(show=True, labels=labels)  # show resultsdef save(self, labels=True, save_dir="runs/detect/exp", exist_ok=False):"""Saves detection results with optional labels to a specified directory.Usage: save(labels=True, save_dir='runs/detect/exp', exist_ok=False)"""save_dir = increment_path(save_dir, exist_ok, mkdir=True)  # increment save_dirself._run(save=True, labels=labels, save_dir=save_dir)  # save resultsdef crop(self, save=True, save_dir="runs/detect/exp", exist_ok=False):"""Crops detection results, optionally saves them to a directory.Args: save (bool), save_dir (str), exist_ok (bool)."""save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else Nonereturn self._run(crop=True, save=save, save_dir=save_dir)  # crop resultsdef render(self, labels=True):"""Renders detection results with optional labels on images; args: labels (bool) indicating label inclusion."""self._run(render=True, labels=labels)  # render resultsreturn self.imsdef pandas(self):"""Returns detections as pandas DataFrames for various box formats (xyxy, xyxyn, xywh, xywhn).Example: print(results.pandas().xyxy[0])."""new = copy(self)  # return copyca = "xmin", "ymin", "xmax", "ymax", "confidence", "class", "name"  # xyxy columnscb = "xcenter", "ycenter", "width", "height", "confidence", "class", "name"  # xywh columnsfor k, c in zip(["xyxy", "xyxyn", "xywh", "xywhn"], [ca, ca, cb, cb]):a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # updatesetattr(new, k, [pd.DataFrame(x, columns=c) for x in a])return newdef tolist(self):"""Converts a Detections object into a list of individual detection results for iteration.Example: for result in results.tolist():"""r = range(self.n)  # iterablereturn [Detections([self.ims[i]],[self.pred[i]],[self.files[i]],self.times,self.names,self.s,)for i in r]def print(self):"""Logs the string representation of the current object's state via the LOGGER."""LOGGER.info(self.__str__())def __len__(self):"""Returns the number of results stored, overrides the default len(results)."""return self.ndef __str__(self):"""Returns a string representation of the model's results, suitable for printing, overrides defaultprint(results)."""return self._run(pprint=True)  # print resultsdef __repr__(self):"""Returns a string representation of the YOLOv5 object, including its class and formatted results."""return f"YOLOv5 {self.__class__} instance\n" + self.__str__()class Proto(nn.Module):"""YOLOv5 mask Proto module for segmentation models, performing convolutions and upsampling on input tensors."""def __init__(self, c1, c_=256, c2=32):"""Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration."""super().__init__()self.cv1 = Conv(c1, c_, k=3)self.upsample = nn.Upsample(scale_factor=2, mode="nearest")self.cv2 = Conv(c_, c_, k=3)self.cv3 = Conv(c_, c2)def forward(self, x):"""Performs a forward pass using convolutional layers and upsampling on input tensor `x`."""return self.cv3(self.cv2(self.upsample(self.cv1(x))))# 分类模块
class Classify(nn.Module):"""YOLOv5 classification head with convolution, pooling, and dropout layers for channel transformation."""def __init__(self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0):  # ch_in, ch_out, kernel, stride, padding, groups, dropout probability"""Initializes YOLOv5 classification head with convolution, pooling, and dropout layers for input to outputchannel transformation."""super().__init__()c_ = 1280  # efficientnet_b0 sizeself.conv = Conv(c1, c_, k, s, autopad(k, p), g)self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)self.drop = nn.Dropout(p=dropout_p, inplace=True)self.linear = nn.Linear(c_, c2)  # to x(b,c2)def forward(self, x):"""Processes input through conv, pool, drop, and linear layers; supports list concatenation input."""if isinstance(x, list):x = torch.cat(x, 1)return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))

总的来说没那么复杂

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.xdnf.cn/news/8733.html

如若内容造成侵权/违法违规/事实不符,请联系一条长河网进行投诉反馈,一经查实,立即删除!

相关文章

5G时代,国产化融合通信行业的新机遇

在5G时代&#xff0c;国产化融合通信行业正在经历重要的转型与崛起&#xff0c;国产化融合通信行业正肩负着重要的社会责任&#xff0c;成为了推动我们社会发展的重要力量。5G技术的高速发展以及大规模的商业应用&#xff0c;使国产化融合通信行业迎来了前所未有的发展机遇。 5…

Spring WebFlux 核心原理(2-2)

1、Project Reactor 核心 1.1、新建项目 新建maven项目&#xff0c;将Project Reactor作为依赖项添加到应用程序中&#xff1a; <?xml version"1.0" encoding"UTF-8"?> <project xmlns"http://maven.apache.org/POM/4.0.0" xmlns:x…

sublime Text的提取查找结果功能

notePad中是 sublime Text是快捷键 ctrlshiftF 点击find就行了&#xff0c;会新建一个文件里面是提取的内容 勾选展示上下文的情况

基于Jeecgboot3.6.3vue3的flowable流程online表单的审批使用介绍

更多技术支持与服务请加入我的知识星球或加我微信&#xff0c;名称:亿事达nbcio技术交流社区https://t.zsxq.com/iPi8F 今天介绍一下基于jeecgboot3.6.3的flowable流程使用online表单进行审批的情况 1、首先建立一个online应用类型的流程&#xff0c;如下&#xff1a; 2、进行…

flink 内存配置(二):设置TaskManager内存

flink 内存配置&#xff08;一&#xff09;&#xff1a;设置Flink进程内存 flink 内存配置&#xff08;二&#xff09;&#xff1a;设置TaskManager内存 flink 内存配置&#xff08;三&#xff09;&#xff1a;设置JobManager内存 flink 内存配置&#xff08;四&#xff09;…

基于SpringBoot沉浸式戏曲文化体验系统【附源码】

基于SpringBoot沉浸式戏曲文化体验系统 效果如下&#xff1a; 系统主页面 系统登陆页面 用户管理页面 戏曲剧目管理页面 戏曲倾听页面 活动信息管理页面 个人中心页面 研究背景 随着互联网技术的飞速发展&#xff0c;传统文化传播方式正面临着前所未有的变革。戏曲作为我国传…

P3-1.【结构化程序设计】第一节——知识要点:算法、顺序结构程序设计、if语句的语法结构及各种用法

讲解视频&#xff1a; P3-1.【结构化程序设计】第一节——知识要点&#xff1a;算法、顺序结构程序设计、if语句的语法结构及各种用法 知识要点&#xff1a;算法、顺序结构程序设计、if语句的语法结构及各种用法 一、算法、顺序结构程序设计任务分析 知识要点&#xff1a;算法…

RAG三件套运行的新选择 - GPUStack

GPUStack 是一个开源的大模型即服务平台&#xff0c;可以高效整合并利用 Nvidia、Apple Metal、华为昇腾和摩尔线程等各种异构的 GPU/NPU 资源&#xff0c;提供本地私有部署大模型解决方案。 GPUStack 可以支持 RAG 系统中所需要的三种关键模型&#xff1a;Chat 对话模型&…

SSM物联网养殖管理系统-计算机毕业设计源码03998

目录 1 绪论 1.1 研究背景和意义 1.2国内外研究现状 1.3论文结构与章节安排 2 系统分析 2.1 可行性分析 2.1.1 技术可行性分析 2.1.2经济可行性分析 2.1.3操作可行性分析 2.2 系统功能分析 2.2.1 功能性分析 2.2.2 非功能性分析 2.3 系统用例分析 2.4 系统流程分析…

使用Python进行健康监测和分析的案例研究

健康监测和分析是指系统地使用健康数据来跟踪和评估个人或人群在一段时间内的健康状况。它包含一系列活动&#xff0c;从实时生理数据收集&#xff08;如心率&#xff0c;血压和体温&#xff09;到分析更复杂的健康记录&#xff08;包括患者病史&#xff0c;生活方式选择和遗传…

RHCE 第四次作业

一.搭建dns服务器能够对自定义的正向或者反向域完成数据解析查询。 1.配置环境 [rootlocalhost ~]# yum install bind [rootlocalhost ~]#systemctl stop firewalld [rootlocalhost ~]#setenforce 0 2.配置DNS主服务器 [rootlocalhost ~]# vim /etc/named.conf options { …

打字机效果显示

文章目录 打字机效果显示一、效果图二、视频效果三、代码 打字机效果显示 一、效果图 二、视频效果 B站-打字机效果图 打字机效果 打字机效果 三、代码 框架&#xff1a; <div class"t_title"><span>我的能力</span> <!-- <span>使…

PyQt5实战——翻译的实现,成功爬取微软翻译(可长期使用)经验总结(九)

个人博客&#xff1a;苏三有春的博客 系类往期文章&#xff1a; PyQt5实战——多脚本集合包&#xff0c;前言与环境配置&#xff08;一&#xff09; PyQt5实战——多脚本集合包&#xff0c;UI以及工程布局&#xff08;二&#xff09; PyQt5实战——多脚本集合包&#xff0c;程序…

[含文档+PPT+源码等]精品基于Nodejs实现的物流管理系统的设计与实现

基于Node.js实现的物流管理系统的设计与实现背景&#xff0c;主要源于物流行业的快速发展以及信息技术在物流管理中的广泛应用。以下是对该背景的具体阐述&#xff1a; 一、物流行业的快速发展 随着全球经济一体化的加速和电子商务的蓬勃发展&#xff0c;物流行业作为连接生产…

JavaWeb开发9

ResponseBody 类型&#xff1a;方法注解、类注解 位置&#xff1a;Controller方法上/类上 作用&#xff1a;将方法返回值直接响应&#xff0c;如果返回值类型是实体对象/集合&#xff0c;将会转换为JSON格式响应 说明&#xff1a;RestControllerControllerResponseBody; 统…

直播美颜SDK开发指南:实时美颜技术的架构与优化策略

时下&#xff0c;为了满足市场需求&#xff0c;许多企业开始开发自己的美颜SDK&#xff0c;通过集成到直播或视频应用中&#xff0c;实现实时美颜效果。接下来&#xff0c;笔者将从美颜SDK的架构设计和优化策略出发&#xff0c;深入探讨如何打造一个智能化的视频美颜平台。 一…

Leecode热题100-104.二叉树中的最大路径和

二叉树中的 路径 被定义为一条节点序列&#xff0c;序列中每对相邻节点之间都存在一条边。同一个节点在一条路径序列中 至多出现一次 。该路径 至少包含一个 节点&#xff0c;且不一定经过根节点。 路径和 是路径中各节点值的总和。 给你一个二叉树的根节点 root &#xff0c…

第六十三周周报 GCN-CNNGA

文章目录 week 63 GCN-CNNGA摘要Abstract1. 题目2. Abstract3. 文献解读3.1 Introduction3.2 创新点 4. 网络结构4.1 数据分析4.2 混合深度学习框架的发展4.3 Mul4.4 CNN block4.5 GCN block4.6 GRU block4.7 注意力机制4.8 模型评估标准 5. 实验结果5.1 不同邻接矩阵的性能评价…

学习笔记——MathType公式编号:右编号和随章节变化

1.如何在word文档中插入带有编号的公式&#xff1f; 步骤&#xff1a;(前提是已经安装mathtype) 2.MathType公式编号怎么随章节变化&#xff1f; 想要编号级数也随标题级数进行自动变化&#xff0c;则需要插入或修改文档的“分隔符” 步骤&#xff1a;

VS+QT开发 找不到宏$(Qt_INCLUDEPATH_) $(Qt_LIBS_)

问题&#xff1a;在VSQT开发环境&#xff0c;项目右键->属性->C/C->常规->附加包含目录->宏&#xff08;位置在右下角&#xff09;->右侧新弹出的属性框内搜索Qt_INCLUDEPATH_ 找不到的场景的解决办法。