o
    VhA                     @   s  d dl mZ d dlmZmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZ g dZG dd dejZG dd dejZ G dd dejZ!G dd dej"Z#G dd dej"Z$G dd dejZ%G dd dejZ&G dd dej"Z'de	e
e#e$f  dee	e
ee!e f   dee( d ed!ej"f d"ee d#e)d$ed%e'fd&d'Z*d(ed)d*d+Z+G d,d- d-eZ,G d.d/ d/eZ-G d0d1 d1eZ.e ed2e,j/fd3dd4d5d"ee, d#e)d$ed%e'fd6d7Z0e ed2e-j/fd3dd4d5d"ee- d#e)d$ed%e'fd8d9Z1e ed2e.j/fd3dd4d5d"ee. d#e)d$ed%e'fd:d;Z2dd<lm3Z3 e3e,j/j4e-j/j4e.j/j4d=Z5dS )>    )partial)AnyCallableListOptionalSequenceTupleTypeUnionN)Tensor   )VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)VideoResNetR3D_18_WeightsMC3_18_WeightsR2Plus1D_18_Weightsr3d_18mc3_18r2plus1d_18c                       ^   e Zd Z	ddededee dededdf fd	d
Zededeeeef fddZ  Z	S )Conv3DSimpleN   	in_planes
out_planes	midplanesstridepaddingreturnc                    s   t  j||d||dd d S )N)r   r   r   Fin_channelsout_channelskernel_sizer#   r$   biassuper__init__selfr    r!   r"   r#   r$   	__class__ S/var/www/vscode/kcb/lib/python3.10/site-packages/torchvision/models/video/resnet.pyr-      s   
zConv3DSimple.__init__c                 C   
   | | | fS Nr2   r#   r2   r2   r3   get_downsample_stride'      
z"Conv3DSimple.get_downsample_strideNr   r   
__name__
__module____qualname__intr   r-   staticmethodr   r7   __classcell__r2   r2   r0   r3   r      "    &r   c                       sX   e Zd Zddedededededdf fd	d
Zededeeeef fddZ  ZS )Conv2Plus1Dr   r    r!   r"   r#   r$   r%   Nc                    s`   t  tj||dd||fd||fddt|tjddtj||d|ddf|ddfdd d S )	Nr   r   r   r   r   Fr)   r#   r$   r*   Tinplacer   r   r   r,   r-   nnConv3dBatchNorm3dReLUr.   r0   r2   r3   r-   -   s   
zConv2Plus1D.__init__c                 C   r4   r5   r2   r6   r2   r2   r3   r7   >   r8   z!Conv2Plus1D.get_downsample_strider   r   )	r;   r<   r=   r>   r-   r?   r   r7   r@   r2   r2   r0   r3   rB   ,   s    (&rB   c                       r   )Conv3DNoTemporalNr   r    r!   r"   r#   r$   r%   c                    s(   t  j||dd||fd||fdd d S )NrC   r   r   Fr&   r+   r.   r0   r2   r3   r-   D   s   
zConv3DNoTemporal.__init__c                 C   s
   d| | fS Nr   r2   r6   r2   r2   r3   r7   Q   r8   z&Conv3DNoTemporal.get_downsample_strider9   r:   r2   r2   r0   r3   rN   C   rA   rN   c                       sb   e Zd ZdZ		ddedededejf dedeej d	df fd
dZ	de
d	e
fddZ  ZS )
BasicBlockr   Ninplanesplanesconv_builder.r#   
downsampler%   c                    s   || d d d |d d d|   }t    t|||||t|tjdd| _t||||t|| _tjdd| _|| _	|| _
d S )Nr   TrE   )r,   r-   rI   
SequentialrK   rL   conv1conv2relurT   r#   r/   rQ   rR   rS   r#   rT   r"   r0   r2   r3   r-   Z   s   (

zBasicBlock.__init__xc                 C   sB   |}|  |}| |}| jd ur| |}||7 }| |}|S r5   )rV   rW   rT   rX   r/   rZ   residualoutr2   r2   r3   forwardm   s   




zBasicBlock.forwardr   Nr;   r<   r=   	expansionr>   r   rI   Moduler   r-   r   r^   r@   r2   r2   r0   r3   rP   V   s$    rP   c                       sb   e Zd ZdZ		ddedededejf ded	eej d
df fddZ	de
d
e
fddZ  ZS )
Bottleneck   r   NrQ   rR   rS   .r#   rT   r%   c                    s   t    || d d d |d d d|   }ttj||dddt|tjdd| _t|||||t|tjdd| _ttj||| j	 dddt|| j	 | _
tjdd| _|| _|| _d S )Nr   r   F)r)   r*   TrE   )r,   r-   rI   rU   rJ   rK   rL   rV   rW   ra   conv3rX   rT   r#   rY   r0   r2   r3   r-   ~   s   
	("
zBottleneck.__init__rZ   c                 C   sL   |}|  |}| |}| |}| jd ur| |}||7 }| |}|S r5   )rV   rW   re   rT   rX   r[   r2   r2   r3   r^      s   





zBottleneck.forwardr_   r`   r2   r2   r0   r3   rc   {   s$    rc   c                       "   e Zd ZdZd fddZ  ZS )	BasicStemz$The default conv-batchnorm-relu stemr%   Nc              
      s4   t  tjdddddddtdtjdd	 d S )
Nr   @   )r      ri   r   r   r   rC   FrD   TrE   rH   r/   r0   r2   r3   r-      s
   
zBasicStem.__init__r%   Nr;   r<   r=   __doc__r-   r@   r2   r2   r0   r3   rg          rg   c                       rf   )R2Plus1dStemzRR(2+1)D stem is different than the default one as it uses separated 3D convolutionr%   Nc                    sZ   t  tjdddddddtdtjdd	tjdd
dddddtd
tjdd	 d S )Nr   -   )r   ri   ri   rj   )r   r   r   FrD   TrE   rh   rG   r   r   r   )r   r   r   rH   rk   r0   r2   r3   r-      s   

zR2Plus1dStem.__init__rl   rm   r2   r2   r0   r3   rp      ro   rp   c                       s   e Zd Z		ddeeeef  deeeee	e
f   dee dedejf ded	ed
df fddZded
efddZ	ddeeeef  deeee	e
f  dededed
ejfddZ  ZS )r     Fblockconv_makerslayersstem.num_classeszero_init_residualr%   Nc                    s  t    t|  d| _| | _| j||d d|d dd| _| j||d d|d dd| _| j||d d|d dd| _| j||d d	|d dd| _	t
d
| _t
d	|j || _|  D ]N}t|t
jrt
jj|jddd |jdurt
j|jd q`t|t
jrt
j|jd t
j|jd q`t|t
jrt
j|jdd t
j|jd q`|r|  D ]}t|trt
j|jjd qdS dS )a^  Generic resnet video generator.

        Args:
            block (Type[Union[BasicBlock, Bottleneck]]): resnet building block
            conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator
                function for each layer
            layers (List[int]): number of blocks per layer
            stem (Callable[..., nn.Module]): module specifying the ResNet stem.
            num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
            zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
        rh   r   r   r6      r      r   i   rr   fan_outrX   )modenonlinearityNg{Gz?)r,   r-   r   rQ   rw   _make_layerlayer1layer2layer3layer4rI   AdaptiveAvgPool3davgpoolLinearra   fcmodules
isinstancerJ   initkaiming_normal_weightr*   	constant_rK   normal_rc   bn3)r/   rt   ru   rv   rw   rx   ry   mr0   r2   r3   r-      s<   


zVideoResNet.__init__rZ   c                 C   sT   |  |}| |}| |}| |}| |}| |}|d}| |}|S rO   )rw   r   r   r   r   r   flattenr   )r/   rZ   r2   r2   r3   r^      s   







zVideoResNet.forwardr   rS   rR   blocksr#   c           
   	   C   s   d }|dks| j ||j kr+||}ttj| j ||j d|ddt||j }g }||| j |||| ||j | _ td|D ]}	||| j || qDtj| S )Nr   F)r)   r#   r*   )	rQ   ra   r7   rI   rU   rJ   rK   appendrange)
r/   rt   rS   rR   r   r#   rT   	ds_striderv   ir2   r2   r3   r   	  s   

zVideoResNet._make_layer)rs   F)r   )r;   r<   r=   r	   r
   rP   rc   r   r   rN   rB   r   r>   r   rI   rb   boolr-   r   r^   rU   r   r@   r2   r2   r0   r3   r      sB    4r   rt   ru   rv   rw   .weightsprogresskwargsr%   c                 K   sT   |d urt |dt|jd  t| |||fi |}|d ur(||j|dd |S )Nrx   
categoriesT)r   
check_hash)r   lenmetar   load_state_dictget_state_dict)rt   ru   rv   rw   r   r   r   modelr2   r2   r3   _video_resnet#  s   	r   rM   zKhttps://github.com/pytorch/vision/tree/main/references/video_classificationzThe weights reproduce closely the accuracy of the paper. The accuracies are estimated on video-level with parameters `frame_rate=15`, `clips_per_video=5`, and `clip_len=16`.)min_sizer   recipe_docsc                	   @   D   e Zd Zedeedddi eddddd	id
dddZeZdS )r   z7https://download.pytorch.org/models/r3d_18-b3b3357e.pthp   r   rz      	crop_sizeresize_sizeiP5Kinetics-400gO@g-T@zacc@1zacc@5gK7YD@g"_@
num_params_metrics_ops
_file_sizeurl
transformsr   N	r;   r<   r=   r   r   r   _COMMON_METAKINETICS400_V1DEFAULTr2   r2   r2   r3   r   B  $    r   c                	   @   r   )r   z7https://download.pytorch.org/models/mc3_18-a90a0ba3.pthr   r   r   iPu r   g{GO@gQU@r   gClE@gtVF@r   r   Nr   r2   r2   r2   r3   r   V  r   r   c                	   @   r   )r   z<https://download.pytorch.org/models/r2plus1d_18-91a641e6.pthr   r   r   ir   gʡP@g33333U@r   gOnBD@g1Z^@r   r   Nr   r2   r2   r2   r3   r   j  r   r   
pretrained)r   T)r   r   c                 K   .   t | } tttgd g dt| |fi |S )a  Construct 18 layer Resnet3D model.

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R3D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R3D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R3D_18_Weights
        :members:
    rd   r   r   r   r   )r   verifyr   rP   r   rg   r   r   r   r2   r2   r3   r   ~     
r   c                 K   s4   t | } tttgtgd  g dt| |fi |S )a  Construct 18 layer Mixed Convolution network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.MC3_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MC3_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MC3_18_Weights
        :members:
    r   r   )r   r   r   rP   r   rN   rg   r   r2   r2   r3   r     s   
r   c                 K   r   )a  Construct 18 layer deep R(2+1)D network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R2Plus1D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R2Plus1D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R2Plus1D_18_Weights
        :members:
    rd   r   )r   r   r   rP   rB   rp   r   r2   r2   r3   r     r   r   )
_ModelURLs)r   r   r   )6	functoolsr   typingr   r   r   r   r   r   r	   r
   torch.nnrI   torchr   transforms._presetsr   utilsr   _apir   r   r   _metar   _utilsr   r   __all__rJ   r   rU   rB   rN   rb   rP   rc   rg   rp   r   r>   r   r   r   r   r   r   r   r   r   r   r   r   
model_urlsr2   r2   r2   r3   <module>   sv    (%1^
*#*#*$