o
    WhqS                     @   s2  d Z ddlZddlZddlZddlmZ dZd#ddZG dd dej	Z
G d	d
 d
e
ZG dd dej	ZG dd de
ZG dd dejZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd  d ej	ZG d!d" d"ej	ZdS )$zConvolution modules.    N)ConvConv2	LightConvDWConvDWConvTranspose2dConvTransposeFocus	GhostConvChannelAttentionSpatialAttentionCBAMConcatRepConvIndex   c                    s`    dkrt | tr | d  d n fdd| D } |du r.t | tr'| d ndd | D }|S )zPad to 'same' shape outputs.r   c                    s   g | ]
} |d   d  qS r    .0xdr   O/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/nn/modules/conv.py
<listcomp>   s    zautopad.<locals>.<listcomp>N   c                 S      g | ]}|d  qS r   r   r   r   r   r   r   !       )
isinstanceint)kpr   r   r   r   autopad   s
   , r"   c                       :   e Zd ZdZe Zd fdd	Zdd Zd	d
 Z	  Z
S )r   a?  
    Standard convolution module with batch normalization and activation.

    Attributes:
        conv (nn.Conv2d): Convolutional layer.
        bn (nn.BatchNorm2d): Batch normalization layer.
        act (nn.Module): Activation function layer.
        default_act (nn.Module): Default activation function (SiLU).
    r   NTc	           	   
      sr   t    tj||||t|||||dd| _t|| _|du r'| j| _dS t	|tj
r2|| _dS t | _dS )a  
        Initialize Conv layer with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            p (int, optional): Padding.
            g (int): Groups.
            d (int): Dilation.
            act (bool | nn.Module): Activation function.
        FgroupsdilationbiasTN)super__init__nnConv2dr"   convBatchNorm2dbndefault_actr   ModuleIdentityact	selfc1c2r    sr!   gr   r2   	__class__r   r   r)   2   s   
$8zConv.__init__c                 C      |  | | |S z
        Apply convolution, batch normalization and activation to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        r2   r.   r,   r4   r   r   r   r   forwardE      
zConv.forwardc                 C      |  | |S )z
        Apply convolution and activation without batch normalization.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        r2   r,   r>   r   r   r   forward_fuseQ      
zConv.forward_fuse)r   r   Nr   r   T__name__
__module____qualname____doc__r*   SiLUr/   r)   r?   rC   __classcell__r   r   r9   r   r   %   s    
r   c                       s:   e Zd ZdZd fdd	Zdd	 Zd
d Zdd Z  ZS )r   a'  
    Simplified RepConv module with Conv fusing.

    Attributes:
        conv (nn.Conv2d): Main 3x3 convolutional layer.
        cv2 (nn.Conv2d): Additional 1x1 convolutional layer.
        bn (nn.BatchNorm2d): Batch normalization layer.
        act (nn.Module): Activation function layer.
       r   NTc	           	   
      sD   t  j||||||||d tj||d|td||||dd| _dS )a  
        Initialize Conv2 layer with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            p (int, optional): Padding.
            g (int): Groups.
            d (int): Dilation.
            act (bool | nn.Module): Activation function.
        r8   r   r2   r   Fr$   N)r(   r)   r*   r+   r"   cv2r3   r9   r   r   r)   i   s   (zConv2.__init__c                 C   s    |  | | || | S r<   )r2   r.   r,   rN   r>   r   r   r   r?   z   s    
zConv2.forwardc                 C   r;   )z
        Apply fused convolution, batch normalization and activation to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        r=   r>   r   r   r   rC      r@   zConv2.forward_fusec                 C   s   t | jjj}dd |jdd D }| jjj |dddd|d |d d |d |d d f< | jj j|7  _| d | j	| _
dS )zFuse parallel convolutions.c                 S   r   r   r   r   r   r   r   r      r   z$Conv2.fuse_convs.<locals>.<listcomp>r   Nr   r   rN   )torch
zeros_liker,   weightdatashaperN   clone__delattr__rC   r?   )r4   wir   r   r   
fuse_convs   s   B
zConv2.fuse_convs)rL   r   Nr   r   T)	rF   rG   rH   rI   r)   r?   rC   rX   rK   r   r   r9   r   r   ^   s    
r   c                       s2   e Zd ZdZde f fdd	Zdd Z  ZS )r   a  
    Light convolution module with 1x1 and depthwise convolutions.

    This implementation is based on the PaddleDetection HGNetV2 backbone.

    Attributes:
        conv1 (Conv): 1x1 convolution layer.
        conv2 (DWConv): Depthwise convolution layer.
    r   c                    s2   t    t||ddd| _t||||d| _dS )a!  
        Initialize LightConv layer with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size for depthwise convolution.
            act (nn.Module): Activation function.
        r   Fr2   N)r(   r)   r   conv1r   conv2)r4   r5   r6   r    r2   r9   r   r   r)      s   

zLightConv.__init__c                 C   rA   )z
        Apply 2 convolutions to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        )r[   rZ   r>   r   r   r   r?      rD   zLightConv.forward)	rF   rG   rH   rI   r*   ReLUr)   r?   rK   r   r   r9   r   r      s    
r   c                       "   e Zd ZdZd fdd	Z  ZS )r   zDepth-wise convolution module.r   Tc              	      s&   t  j||||t||||d dS )aQ  
        Initialize depth-wise convolution with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            d (int): Dilation.
            act (bool | nn.Module): Activation function.
        rM   Nr(   r)   mathgcd)r4   r5   r6   r    r7   r   r2   r9   r   r   r)         &zDWConv.__init__r   r   r   TrF   rG   rH   rI   r)   rK   r   r   r9   r   r          r   c                       r]   )r   z(Depth-wise transpose convolution module.r   r   c                    s&   t  j||||||t||d dS )aH  
        Initialize depth-wise transpose convolution with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            p1 (int): Padding.
            p2 (int): Output padding.
        )r%   Nr^   )r4   r5   r6   r    r7   p1p2r9   r   r   r)      ra   zDWConvTranspose2d.__init__)r   r   r   r   rc   r   r   r9   r   r      rd   r   c                       r#   )r   as  
    Convolution transpose module with optional batch normalization and activation.

    Attributes:
        conv_transpose (nn.ConvTranspose2d): Transposed convolution layer.
        bn (nn.BatchNorm2d | nn.Identity): Batch normalization layer.
        act (nn.Module): Activation function layer.
        default_act (nn.Module): Default activation function (SiLU).
    r   r   Tc                    st   t    tj|||||| d| _|rt|nt | _|du r(| j| _dS t	|tj
r3|| _dS t | _dS )a}  
        Initialize ConvTranspose layer with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            p (int): Padding.
            bn (bool): Use batch normalization.
            act (bool | nn.Module): Activation function.
        r'   TN)r(   r)   r*   ConvTranspose2dconv_transposer-   r1   r.   r/   r   r0   r2   )r4   r5   r6   r    r7   r!   r.   r2   r9   r   r   r)      s   
8zConvTranspose.__init__c                 C   r;   )z
        Apply transposed convolution, batch normalization and activation to input.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        )r2   r.   ri   r>   r   r   r   r?     r@   zConvTranspose.forwardc                 C   rA   )z
        Apply activation and convolution transpose operation to input.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        )r2   ri   r>   r   r   r   rC     rD   zConvTranspose.forward_fuse)r   r   r   TTrE   r   r   r9   r   r      s    
r   c                       s*   e Zd ZdZd	 fdd	Zdd Z  ZS )
r   z
    Focus module for concentrating feature information.

    Slices input tensor into 4 parts and concatenates them in the channel dimension.

    Attributes:
        conv (Conv): Convolution layer.
    r   NTc              	      s*   t    t|d ||||||d| _dS )am  
        Initialize Focus module with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            p (int, optional): Padding.
            g (int): Groups.
            act (bool | nn.Module): Activation function.
           rY   N)r(   r)   r   r,   )r4   r5   r6   r    r7   r!   r8   r2   r9   r   r   r)   (  s   
 zFocus.__init__c                 C   sr   |  t|dddddddf |dddddddf |dddddddf |dddddddf fdS )a  
        Apply Focus operation and convolution to input tensor.

        Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        .Nr   r   )r,   rO   catr>   r   r   r   r?   9  s   rzFocus.forward)r   r   Nr   TrF   rG   rH   rI   r)   r?   rK   r   r   r9   r   r     s    	r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	r	   a+  
    Ghost Convolution module.

    Generates more features with fewer parameters by using cheap operations.

    Attributes:
        cv1 (Conv): Primary convolution.
        cv2 (Conv): Cheap operation convolution.

    References:
        https://github.com/huawei-noah/Efficient-AI-Backbones
    r   Tc              	      sF   t    |d }t||||d||d| _t||ddd||d| _dS )aQ  
        Initialize Ghost Convolution module with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            g (int): Groups.
            act (bool | nn.Module): Activation function.
        r   NrY      r   )r(   r)   r   cv1rN   )r4   r5   r6   r    r7   r8   r2   c_r9   r   r   r)   W  s   
zGhostConv.__init__c                 C   s    |  |}t|| |fdS )z
        Apply Ghost Convolution to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor with concatenated features.
        r   )rn   rO   rk   rN   )r4   r   yr   r   r   r?   h  s   

zGhostConv.forwardrb   rl   r   r   r9   r   r	   I  s    r	   c                       s^   e Zd ZdZe Zd fdd	Zdd	 Zd
d Z	dd Z
edd Zdd Zdd Z  ZS )r   a
  
    RepConv module with training and deploy modes.

    This module is used in RT-DETR and can fuse convolutions during inference for efficiency.

    Attributes:
        conv1 (Conv): 3x3 convolution.
        conv2 (Conv): 1x1 convolution.
        bn (nn.BatchNorm2d, optional): Batch normalization for identity branch.
        act (nn.Module): Activation function.
        default_act (nn.Module): Default activation function (SiLU).

    References:
        https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
    rL   r   TFc              	      s   t    |dkr|dksJ || _|| _|| _|du r| jnt|tjr'|nt	 | _
|	r<||kr<|dkr<tj|dnd| _t||||||dd| _t||d|||d  |dd| _dS )	a  
        Initialize RepConv module with given parameters.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size.
            s (int): Stride.
            p (int): Padding.
            g (int): Groups.
            d (int): Dilation.
            act (bool | nn.Module): Activation function.
            bn (bool): Use batch normalization for identity branch.
            deploy (bool): Deploy mode for inference.
        rL   r   T)num_featuresNF)r!   r8   r2   r   )r(   r)   r8   r5   r6   r/   r   r*   r0   r1   r2   r-   r.   r   rZ   r[   )r4   r5   r6   r    r7   r!   r8   r   r2   r.   deployr9   r   r   r)     s   
(&$zRepConv.__init__c                 C   rA   )z
        Forward pass for deploy mode.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        rB   r>   r   r   r   rC     rD   zRepConv.forward_fusec                 C   s6   | j du rdn|  |}| | || | | S )z
        Forward pass for training mode.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Output tensor.
        Nr   )r.   r2   rZ   r[   )r4   r   id_outr   r   r   r?     s   
zRepConv.forwardc                 C   sN   |  | j\}}|  | j\}}|  | j\}}|| | | || | fS )z
        Calculate equivalent kernel and bias by fusing convolutions.

        Returns:
            (torch.Tensor): Equivalent kernel
            (torch.Tensor): Equivalent bias
        )_fuse_bn_tensorrZ   r[   r.   _pad_1x1_to_3x3_tensor)r4   	kernel3x3bias3x3	kernel1x1bias1x1kernelidbiasidr   r   r   get_equivalent_kernel_bias  s   z"RepConv.get_equivalent_kernel_biasc                 C   s    | du rdS t jj| g dS )z
        Pad a 1x1 kernel to 3x3 size.

        Args:
            kernel1x1 (torch.Tensor): 1x1 convolution kernel.

        Returns:
            (torch.Tensor): Padded 3x3 kernel.
        Nr   )r   r   r   r   )rO   r*   
functionalpad)rx   r   r   r   ru     s   zRepConv._pad_1x1_to_3x3_tensorc                 C   s  |du rdS t |tr$|jj}|jj}|jj}|jj}|jj}|jj}nMt |t	j
rqt| ds_| j| j }tj| j|ddftjd}	t| jD ]}
d|	|
|
| ddf< qGt|	|jj| _| j}|j}|j}|j}|j}|j}||  }|| dddd}|| ||| |  fS )z
        Fuse batch normalization with convolution weights.

        Args:
            branch (Conv | nn.BatchNorm2d | None): Branch to fuse.

        Returns:
            (torch.Tensor): Fused kernel
            (torch.Tensor): Fused bias
        N)r   r   	id_tensorrL   )dtyper   )r   r   r,   rQ   r.   running_meanrunning_varr'   epsr*   r-   hasattrr5   r8   npzerosfloat32rangerO   
from_numpytodevicer   sqrtreshape)r4   branchkernelr   r   gammabetar   	input_dimkernel_valuerW   stdtr   r   r   rt     s2   


zRepConv._fuse_bn_tensorc              
   C   s   t | drdS |  \}}tj| jjj| jjj| jjj| jjj	| jjj
| jjj| jjjddd| _|| jj_|| jj_|  D ]}|  qA| d | d t | dr\| d t | d	rf| d	 t | d
rr| d
 dS dS )zLFuse convolutions for inference by creating a single equivalent convolution.r,   NT)in_channelsout_channelskernel_sizestridepaddingr&   r%   r'   FrZ   r[   nmr.   r   )r   r|   r*   r+   rZ   r,   r   r   r   r   r   r&   r%   requires_grad_rQ   rR   r'   
parametersdetach_rU   )r4   r   r'   parar   r   r   rX     s8   
	










zRepConv.fuse_convs)rL   r   r   r   r   TFF)rF   rG   rH   rI   r*   rJ   r/   r)   rC   r?   r|   staticmethodru   rt   rX   rK   r   r   r9   r   r   v  s    
%r   c                       s@   e Zd ZdZdeddf fddZdejdejfdd	Z  Z	S )
r
   a  
    Channel-attention module for feature recalibration.

    Applies attention weights to channels based on global average pooling.

    Attributes:
        pool (nn.AdaptiveAvgPool2d): Global average pooling.
        fc (nn.Conv2d): Fully connected layer implemented as 1x1 convolution.
        act (nn.Sigmoid): Sigmoid activation for attention weights.

    References:
        https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet
    channelsreturnNc                    s<   t    td| _tj||ddddd| _t | _dS )z{
        Initialize Channel-attention module.

        Args:
            channels (int): Number of input channels.
        r   r   Trg   N)	r(   r)   r*   AdaptiveAvgPool2dpoolr+   fcSigmoidr2   )r4   r   r9   r   r   r)   +  s   
zChannelAttention.__init__r   c                 C   s   ||  | | | S )z
        Apply channel attention to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Channel-attended output tensor.
        )r2   r   r   r>   r   r   r   r?   7  s   
zChannelAttention.forward)
rF   rG   rH   rI   r   r)   rO   Tensorr?   rK   r   r   r9   r   r
     s    r
   c                       *   e Zd ZdZd fdd	Zdd Z  ZS )r   a&  
    Spatial-attention module for feature recalibration.

    Applies attention weights to spatial dimensions based on channel statistics.

    Attributes:
        cv1 (nn.Conv2d): Convolution layer for spatial attention.
        act (nn.Sigmoid): Sigmoid activation for attention weights.
       c                    sN   t    |dv sJ d|dkrdnd}tjdd||dd| _t | _d	S )
z
        Initialize Spatial-attention module.

        Args:
            kernel_size (int): Size of the convolutional kernel (3 or 7).
        >   rL   r   zkernel size must be 3 or 7r   rL   r   r   F)r   r'   N)r(   r)   r*   r+   rn   r   r2   )r4   r   r   r9   r   r   r)   O  s
   
zSpatialAttention.__init__c                 C   s<   ||  | ttj|dddtj|dddd gd S )z
        Apply spatial attention to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Spatial-attended output tensor.
        r   T)keepdimr   )r2   rn   rO   rk   meanmaxr>   r   r   r   r?   \  s   <
zSpatialAttention.forwardr   rl   r   r   r9   r   r   D  s    
r   c                       r   )r   a-  
    Convolutional Block Attention Module.

    Combines channel and spatial attention mechanisms for comprehensive feature refinement.

    Attributes:
        channel_attention (ChannelAttention): Channel attention module.
        spatial_attention (SpatialAttention): Spatial attention module.
    r   c                    s"   t    t|| _t|| _dS )z
        Initialize CBAM with given parameters.

        Args:
            c1 (int): Number of input channels.
            kernel_size (int): Size of the convolutional kernel for spatial attention.
        N)r(   r)   r
   channel_attentionr   spatial_attention)r4   r5   r   r9   r   r   r)   t  s   

zCBAM.__init__c                 C   rA   )z
        Apply channel and spatial attention sequentially to input tensor.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            (torch.Tensor): Attended output tensor.
        )r   r   r>   r   r   r   r?     rD   zCBAM.forwardr   rl   r   r   r9   r   r   i  s    
r   c                       r   )r   z
    Concatenate a list of tensors along specified dimension.

    Attributes:
        d (int): Dimension along which to concatenate tensors.
    r   c                       t    || _dS )z
        Initialize Concat module.

        Args:
            dimension (int): Dimension along which to concatenate tensors.
        N)r(   r)   r   )r4   	dimensionr9   r   r   r)        

zConcat.__init__c                 C   s   t || jS )z
        Concatenate input tensors along specified dimension.

        Args:
            x (List[torch.Tensor]): List of input tensors.

        Returns:
            (torch.Tensor): Concatenated tensor.
        )rO   rk   r   r>   r   r   r   r?     s   
zConcat.forwardr   rl   r   r   r9   r   r         
r   c                       r   )r   zt
    Returns a particular index of the input.

    Attributes:
        index (int): Index to select from input.
    r   c                    r   )zn
        Initialize Index module.

        Args:
            index (int): Index to select from input.
        N)r(   r)   index)r4   r   r9   r   r   r)     r   zIndex.__init__c                 C   s
   || j  S )z
        Select and return a particular index from input.

        Args:
            x (List[torch.Tensor]): List of input tensors.

        Returns:
            (torch.Tensor): Selected tensor.
        )r   r>   r   r   r   r?     s   

zIndex.forward)r   rl   r   r   r9   r   r     r   r   )Nr   )rI   r_   numpyr   rO   torch.nnr*   __all__r"   r0   r   r   r   r   rh   r   r   r   r	   r   r
   r   r   r   r   r   r   r   r   <module>   s,   
	9>&8+- '(%$