o
    )iA                     @   s  d dl mZ d dlmZmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZ g dZG dd dejZG dd dejZ G dd dejZ!G dd dej"Z#G dd dej"Z$G dd dejZ%G dd dejZ&G dd dej"Z'de	e
e#e$f  dee	e
ee!e f   dee( d ed!ej"f d"ee d#e)d$ed%e'fd&d'Z*d(ed)d*d+Z+G d,d- d-eZ,G d.d/ d/eZ-G d0d1 d1eZ.e ed2e,j/fd3dd4d5d"ee, d#e)d$ed%e'fd6d7Z0e ed2e-j/fd3dd4d5d"ee- d#e)d$ed%e'fd8d9Z1e ed2e.j/fd3dd4d5d"ee. d#e)d$ed%e'fd:d;Z2dd<lm3Z3 e3e,j/j4e-j/j4e.j/j4d=Z5dS )>    )partial)AnyCallableListOptionalSequenceTupleTypeUnionN)Tensor   )VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)VideoResNetR3D_18_WeightsMC3_18_WeightsR2Plus1D_18_Weightsr3d_18mc3_18r2plus1d_18c                       ^   e Zd Z	ddededee dededdf fd	d
Zededeeeef fddZ  Z	S )Conv3DSimpleN   	in_planes
out_planes	midplanesstridepaddingreturnc                    s   t  j||d||dd d S )N)r   r   r   FZin_channelsZout_channelskernel_sizer#   r$   biassuper__init__selfr    r!   r"   r#   r$   	__class__ n/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/torchvision/models/video/resnet.pyr+      s   
zConv3DSimple.__init__c                 C   
   | | | fS Nr0   r#   r0   r0   r1   get_downsample_stride'      
z"Conv3DSimple.get_downsample_strideNr   r   
__name__
__module____qualname__intr   r+   staticmethodr   r5   __classcell__r0   r0   r.   r1   r      "    &r   c                       sX   e Zd Zddedededededdf fd	d
Zededeeeef fddZ  ZS )Conv2Plus1Dr   r    r!   r"   r#   r$   r%   Nc                    s`   t  tj||dd||fd||fddt|tjddtj||d|ddf|ddfdd d S )	Nr   r   r   r   r   Fr'   r#   r$   r(   TZinplacer   r   r   r*   r+   nnConv3dBatchNorm3dReLUr,   r.   r0   r1   r+   -   s   
zConv2Plus1D.__init__c                 C   r2   r3   r0   r4   r0   r0   r1   r5   >   r6   z!Conv2Plus1D.get_downsample_strider   r   )	r9   r:   r;   r<   r+   r=   r   r5   r>   r0   r0   r.   r1   r@   ,   s    (&r@   c                       r   )Conv3DNoTemporalNr   r    r!   r"   r#   r$   r%   c                    s(   t  j||dd||fd||fdd d S )NrA   r   r   Fr&   r)   r,   r.   r0   r1   r+   D   s   
zConv3DNoTemporal.__init__c                 C   s
   d| | fS Nr   r0   r4   r0   r0   r1   r5   Q   r6   z&Conv3DNoTemporal.get_downsample_strider7   r8   r0   r0   r.   r1   rK   C   r?   rK   c                       sb   e Zd ZdZ		ddedededejf dedeej d	df fd
dZ	de
d	e
fddZ  ZS )
BasicBlockr   Ninplanesplanesconv_builder.r#   
downsampler%   c                    s   || d d d |d d d|   }t    t|||||t|tjdd| _t||||t|| _tjdd| _|| _	|| _
d S )Nr   TrC   )r*   r+   rF   
SequentialrH   rI   conv1conv2relurQ   r#   r-   rN   rO   rP   r#   rQ   r"   r.   r0   r1   r+   Z   s   (

zBasicBlock.__init__xc                 C   sB   |}|  |}| |}| jd ur| |}||7 }| |}|S r3   )rS   rT   rQ   rU   r-   rW   Zresidualoutr0   r0   r1   forwardm   s   




zBasicBlock.forwardr   Nr9   r:   r;   	expansionr<   r   rF   Moduler   r+   r   rZ   r>   r0   r0   r.   r1   rM   V   s$    rM   c                       sb   e Zd ZdZ		ddedededejf ded	eej d
df fddZ	de
d
e
fddZ  ZS )
Bottleneck   r   NrN   rO   rP   .r#   rQ   r%   c                    s   t    || d d d |d d d|   }ttj||dddt|tjdd| _t|||||t|tjdd| _ttj||| j	 dddt|| j	 | _
tjdd| _|| _|| _d S )Nr   r   F)r'   r(   TrC   )r*   r+   rF   rR   rG   rH   rI   rS   rT   r]   conv3rU   rQ   r#   rV   r.   r0   r1   r+   ~   s   
	("
zBottleneck.__init__rW   c                 C   sL   |}|  |}| |}| |}| jd ur| |}||7 }| |}|S r3   )rS   rT   ra   rQ   rU   rX   r0   r0   r1   rZ      s   





zBottleneck.forwardr[   r\   r0   r0   r.   r1   r_   {   s$    r_   c                       "   e Zd ZdZd fddZ  ZS )	BasicStemz$The default conv-batchnorm-relu stemr%   Nc              
      s4   t  tjdddddddtdtjdd	 d S )
Nr   @   )r      re   r   r   r   rA   FrB   TrC   rE   r-   r.   r0   r1   r+      s
   
zBasicStem.__init__r%   Nr9   r:   r;   __doc__r+   r>   r0   r0   r.   r1   rc          rc   c                       rb   )R2Plus1dStemzRR(2+1)D stem is different than the default one as it uses separated 3D convolutionr%   Nc                    sZ   t  tjdddddddtdtjdd	tjdd
dddddtd
tjdd	 d S )Nr   -   )r   re   re   rf   )r   r   r   FrB   TrC   rd   rD   r   r   r   )r   r   r   rE   rg   r.   r0   r1   r+      s   

zR2Plus1dStem.__init__rh   ri   r0   r0   r.   r1   rl      rk   rl   c                       s   e Zd Z		ddeeeef  deeeee	e
f   dee dedejf ded	ed
df fddZded
efddZ	ddeeeef  deeee	e
f  dededed
ejfddZ  ZS )r     Fblockconv_makerslayersstem.num_classeszero_init_residualr%   Nc                    s  t    t|  d| _| | _| j||d d|d dd| _| j||d d|d dd| _| j||d d|d dd| _| j||d d	|d dd| _	t
d
| _t
d	|j || _|  D ]N}t|t
jrt
jj|jddd |jdurt
j|jd q`t|t
jrt
j|jd t
j|jd q`t|t
jrt
j|jdd t
j|jd q`|r|  D ]}t|trt
j|jjd qdS dS )a^  Generic resnet video generator.

        Args:
            block (Type[Union[BasicBlock, Bottleneck]]): resnet building block
            conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator
                function for each layer
            layers (List[int]): number of blocks per layer
            stem (Callable[..., nn.Module]): module specifying the ResNet stem.
            num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
            zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
        rd   r   r   r4      r      r   i   rn   Zfan_outrU   )modeZnonlinearityNg{Gz?)r*   r+   r   rN   rs   _make_layerlayer1layer2layer3layer4rF   ZAdaptiveAvgPool3davgpoolZLinearr]   fcmodules
isinstancerG   initZkaiming_normal_weightr(   Z	constant_rH   Znormal_r_   Zbn3)r-   rp   rq   rr   rs   rt   ru   mr.   r0   r1   r+      s<   


zVideoResNet.__init__rW   c                 C   sT   |  |}| |}| |}| |}| |}| |}|d}| |}|S rL   )rs   rz   r{   r|   r}   r~   flattenr   )r-   rW   r0   r0   r1   rZ      s   







zVideoResNet.forwardr   rP   rO   blocksr#   c           
   	   C   s   d }|dks| j ||j kr+||}ttj| j ||j d|ddt||j }g }||| j |||| ||j | _ td|D ]}	||| j || qDtj| S )Nr   F)r'   r#   r(   )	rN   r]   r5   rF   rR   rG   rH   appendrange)
r-   rp   rP   rO   r   r#   rQ   Z	ds_striderr   ir0   r0   r1   ry   	  s   

zVideoResNet._make_layer)ro   F)r   )r9   r:   r;   r	   r
   rM   r_   r   r   rK   r@   r   r<   r   rF   r^   boolr+   r   rZ   rR   ry   r>   r0   r0   r.   r1   r      sB    4r   rp   rq   rr   rs   .weightsprogresskwargsr%   c                 K   sT   |d urt |dt|jd  t| |||fi |}|d ur(||j|dd |S )Nrt   
categoriesT)r   Z
check_hash)r   lenmetar   Zload_state_dictZget_state_dict)rp   rq   rr   rs   r   r   r   modelr0   r0   r1   _video_resnet#  s   	r   rJ   zKhttps://github.com/pytorch/vision/tree/main/references/video_classificationzThe weights reproduce closely the accuracy of the paper. The accuracies are estimated on video-level with parameters `frame_rate=15`, `clips_per_video=5`, and `clip_len=16`.)Zmin_sizer   ZrecipeZ_docsc                	   @   D   e Zd Zedeedddi eddddd	id
dddZeZdS )r   z7https://download.pytorch.org/models/r3d_18-b3b3357e.pthp   r   rv      Z	crop_sizeZresize_sizeiP5Kinetics-400gO@g-T@zacc@1zacc@5gK7YD@g"_@Z
num_paramsZ_metricsZ_ops
_file_sizeurlZ
transformsr   N	r9   r:   r;   r   r   r   _COMMON_METAKINETICS400_V1DEFAULTr0   r0   r0   r1   r   B  $    r   c                	   @   r   )r   z7https://download.pytorch.org/models/mc3_18-a90a0ba3.pthr   r   r   iPu r   g{GO@gQU@r   gClE@gtVF@r   r   Nr   r0   r0   r0   r1   r   V  r   r   c                	   @   r   )r   z<https://download.pytorch.org/models/r2plus1d_18-91a641e6.pthr   r   r   ir   gʡP@g33333U@r   gOnBD@g1Z^@r   r   Nr   r0   r0   r0   r1   r   j  r   r   Z
pretrained)r   T)r   r   c                 K   .   t | } tttgd g dt| |fi |S )a  Construct 18 layer Resnet3D model.

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R3D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R3D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R3D_18_Weights
        :members:
    r`   r   r   r   r   )r   verifyr   rM   r   rc   r   r   r   r0   r0   r1   r   ~     
r   c                 K   s4   t | } tttgtgd  g dt| |fi |S )a  Construct 18 layer Mixed Convolution network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.MC3_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MC3_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MC3_18_Weights
        :members:
    r   r   )r   r   r   rM   r   rK   rc   r   r0   r0   r1   r     s   
r   c                 K   r   )a  Construct 18 layer deep R(2+1)D network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R2Plus1D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R2Plus1D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R2Plus1D_18_Weights
        :members:
    r`   r   )r   r   r   rM   r@   rl   r   r0   r0   r1   r     r   r   )
_ModelURLs)r   r   r   )6	functoolsr   typingr   r   r   r   r   r   r	   r
   Ztorch.nnrF   Ztorchr   Ztransforms._presetsr   utilsr   Z_apir   r   r   _metar   _utilsr   r   __all__rG   r   rR   r@   rK   r^   rM   r_   rc   rl   r   r<   r   r   r   r   r   r   r   r   r   r   r   r   Z
model_urlsr0   r0   r0   r1   <module>   sv    (%1^
*#*#*$