o
    i:                     @   s  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ dgZe jjZd	d
 ZdddefddZd4defddZd4defddZd4defddZ	d5dee dee dee dedef
ddZdddefddZdd Z defd d!Z!d"d# Z"dddefd$d%Z#d&d' Z$dddefd(d)Z%ej&eej'eej(eej)eej*eej+eej,e!ej-e#ej.e#ej/e%ej0e%iZ1d*d+ Z2g d,Z3d-d. Z4d/d0 Z5G d1d deZ6G d2d3 d3eZ7dS )6    N)tree_map)ListAnyDictOptionalUnion
NamedTuple)defaultdict)TorchDispatchMode)RemovableHandleprodFlopCounterModec                 C   s   t | tjr	| jS | S N)
isinstancetorchTensorshape)i r   g/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/torch/utils/flop_counter.py	get_shape   s   r   )	out_shapereturnc          	      O   s,   | \}}|\}}||ksJ || d | S )z!
    Count flops for matmul.
       r   )	a_shapeb_shaper   argskwargsmkk2nr   r   r   mm_flop   s   r#   c                 K   
   t ||S )z
    Count flops for addmm
    )r#   Z
self_shaper   r   r   r   r   r   r   
addmm_flop   s   
r&   c                 K   sD   | \}}}|\}}}	||ksJ ||ksJ || |	 d | }
|
S )z,
    Count flops for the bmm operation.
    r   r   )r   r   r   r   br   r    b2r!   r"   flopr   r   r   bmm_flop%   s   

r*   c                 K   r$   )z0
    Count flops for the baddbmm operation.
    r*   r%   r   r   r   baddbmm_flop3   s   
r,   Fx_shapew_shaper   
transposedc           
      C   sJ   | d }|r| n|dd }|^}}}|t | | t | d | }	|	S )a  
    Count flops for convolution. Note only multiplication is
    counted. Computation for bias are ignored.
    Flops for a transposed convolution are calculated as
    flops = (x_shape[2:] * prod(w_shape) * batch_size).
    Args:
        x_shape (list(int)): The input shape before convolution.
        w_shape (list(int)): The filter shape.
        out_shape (list(int)): The output shape after convolution.
        transposed (bool): is the convolution transposed
    Returns:
        int: the number of flops
    r   r   Nr   )
r-   r.   r   r/   Z
batch_sizeZ
conv_shapeZc_outZc_indimsr)   r   r   r   conv_flop_count<   s
   
 r1   c          
      O   s   t | |||dS )z&
    Count flops for convolution.
    )r/   )r1   )
r-   r.   _bias_stride_padding	_dilationr/   r   r   r   r   r   r   	conv_flopX   s   r6   c                 C   s    | d | d gt | dd   S )N   r   r   )list)r   r   r   r   transpose_shape^   s    r9   c                 C   sZ   d}|
d rt |d }|t| ||| 7 }|
d r+t |d }|tt|| ||7 }|S Nr   r7   )r   r1   r9   )grad_out_shaper-   r.   r2   r3   r4   r5   r/   Z_output_paddingZ_groupsZoutput_maskr   
flop_countZgrad_input_shapeZgrad_weight_shaper   r   r   conv_backward_flopa   s   r=   c                 C   s   | \}}}}|\}}}	}
|\}}}}||  kr|kr8n J ||  kr)|kr8n J ||
kr8|	|kr8||
ks:J d}|t || ||f|| ||	f7 }|t || ||	f|| |	|f7 }|S )z]
    Count flops for self-attention.
    NB: We can assume that value_shape == key_shape
    r   r+   )query_shape	key_shapevalue_shaper'   hs_qd_q_b2_h2s_k_d2_b3_h3_s3d_vtotal_flopsr   r   r   sdpa_flop_county   s   P""rM   c                O   s   t | ||S )z)
    Count flops for self-attention.
    )rM   )r>   r?   r@   r   r   r   r   r   r   	sdpa_flop   s   rN   c                 C   sV  d}|\}}}}|\}	}
}}|\}}}}| \}}}}||	  kr)|  kr)|krBn J ||
  kr;|  kr;|krBn J ||ksDJ ||krP||krP||ksRJ d}|t || ||f|| ||f7 }|t || ||f|| ||f7 }|t || ||f|| ||f7 }|t || ||f|| ||f7 }|t || ||f|| ||f7 }|S )Nr   r+   )r;   r>   r?   r@   rL   r'   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   Z_b4Z_h4Z_s4Z_d4r   r   r   sdpa_backward_flop_count   s   T"""""rO   c                O   s   t | |||S )z2
    Count flops for self-attention backward.
    )rO   )r;   r>   r?   r@   r   r   r   r   r   r   sdpa_backward_flop   s   rP   c                 C   s   t | ts| fS | S r   )r   tuplexr   r   r   normalize_tuple   s   
rT   ) KMBTc                 C   s0   t dtttd tt| d d }t| S )Nr   r7      )maxminlensuffixesstr)numberindexr   r   r   get_suffix_str   s   (rb   c                 C   s&   t |}| d|  d}|t |  S )Ni  z.3f)r^   ra   )r`   suffixra   valuer   r   r   convert_num_with_suffix   s   
re   c                       s   e Zd ZdZ				d%deeejje	ejj f  de
dedeeeef  fd	d
Zdd Zdd Zdd Zdd Zdd Zdd Zde
fddZdeeeee
f f fddZd&ddZ fddZ fd d!Zd'd#d$Z  ZS )(r   a  
    ``FlopCounterMode`` is a context manager that counts the number of
    flops within its context. It does this using a ``TorchDispatchMode``.

    It also supports hierarchical output by passing a module (or list of modules) to FlopCounterMode on construction.

    Example usage

    .. code-block:: python

        mod = ...
        flop_counter = FlopCounterMode(mod)
        with flop_counter:
            mod.sum().backward()

    Nr   Tmodsdepthdisplaycustom_mappingc                 C   s`   t dd | _|| _dg| _|| _|d u ri }t|tjjr!|g}|| _	i | _
i t|| _d S )Nc                   S   s   t tS r   )r	   intr   r   r   r   <lambda>   s    z*FlopCounterMode.__init__.<locals>.<lambda>Global)r	   flop_countsrg   parentsrh   r   r   nnModulerf   _module_to_forward_hook_handlesflop_mapping)selfrf   rg   rh   ri   r   r   r   __init__   s   zFlopCounterMode.__init__c                 C   s   | j d u rd S | j D ]:}t|j}t|  D ]*\}}|dkr$|}nd||g}|| |}|	| 
|}t||| j|< qq
d S )NrU   .)rf   type__name__dictZnamed_modulesitemsjoinZregister_forward_pre_hook_enter_moduleZregister_forward_hook_exit_module_ForwardHookHandlesrq   )rs   modprefixnamemoduleforward_pre_hook_handleforward_hook_handler   r   r   _register_forward_hooks   s   


z'FlopCounterMode._register_forward_hooksc                 C   s6   | j  D ]}|d   |d   q| j   d S r:   )rq   valuesremoveclear)rs   Zforward_hook_handlesr   r   r   _deregister_forward_hooks  s   z)FlopCounterMode._deregister_forward_hooksc                        fdd}|S )Nc                    s   t |} | }|S r   )rT   _create_pre_module)r   inputsoutr   rs   r   r   f  s   z(FlopCounterMode._enter_module.<locals>.fr   rs   r   r   r   r   r   r{     s   zFlopCounterMode._enter_modulec                    r   )Nc                    s   t |} | S r   )rT   _create_post_module)r   r   Zoutputsr   r   r   r     s   z'FlopCounterMode._exit_module.<locals>.fr   r   r   r   r   r|     s   zFlopCounterMode._exit_modulec                        G  fdddt jj}|jS )Nc                       0   e Zd Ze fddZe fddZdS )z6FlopCounterMode._create_post_module.<locals>.PushStatec                    sB   j d  ks	J j   tdd |}t|dkr|d S |S )Nc                 S      t | tjr
|  S | S r   r   r   r   clonerR   r   r   r   rk   '      zPFlopCounterMode._create_post_module.<locals>.PushState.forward.<locals>.<lambda>r7   r   )rn   popr   r]   ctxr   r   r   r   forward#  s   
z>FlopCounterMode._create_post_module.<locals>.PushState.forwardc                    s   j   |S r   )rn   appendr   Z	grad_outsr   r   r   backward,  s   z?FlopCounterMode._create_post_module.<locals>.PushState.backwardNrw   
__module____qualname__staticmethodr   r   r   r   r   r   	PushState"  s
    r   r   ZautogradFunctionapply)rs   r   r   r   r   r   r   !     z#FlopCounterMode._create_post_modulec                    r   )Nc                       r   )z4FlopCounterMode._create_pre_module.<locals>.PopStatec                    s2   j   tdd |}t|dkr|d S |S )Nc                 S   r   r   r   rR   r   r   r   rk   8  r   zNFlopCounterMode._create_pre_module.<locals>.PopState.forward.<locals>.<lambda>r7   r   )rn   r   r   r]   r   r   r   r   r   5  s
   z<FlopCounterMode._create_pre_module.<locals>.PopState.forwardc                    s    j d  ks	J j   |S )Nr   )rn   r   r   r   r   r   r   =  s   
z=FlopCounterMode._create_pre_module.<locals>.PopState.backwardNr   r   r   r   r   PopState4  s
    r   r   )rs   r   r   r   r   r   r   3  r   z"FlopCounterMode._create_pre_moduler   c                 C   s   t | jd  S )Nrl   )sumrm   r   rs   r   r   r   get_total_flopsE  s   zFlopCounterMode.get_total_flopsc                 C   s
   t | jS )a  Returns the flop counts as a dictionary of dictionaries. The outer
        dictionary is keyed by module name, and the inner dictionary is keyed by
        operation name.

        Returns:
            Dict[str, Dict[Any, int]]: The flop counts as a dictionary.
        )rx   rm   r   r   r   r   get_flop_countsH  s   
zFlopCounterMode.get_flop_countsc                    s  |d u rj }|d u rd}dd l}d|_g d}g }  t d fdd}j D ]$}|dkr9q2|d	d
 }||krEq2|||d
 }|D ]}	||	 qNq2djv rzszt	|D ]\}
}	d||
 d  ||
 d< qb|dd| }t
|dkrg dg}|j||ddS )Ni?B r   T)rp   ZFLOPz% TotalFc                    s   t j|   }| kO d| }g }|||  t||  d ddg j|   D ]\}}||d t| t||  d ddg q1|S )N d   z.2f%z - )r   rm   r   r   re   ry   r_   )mod_namerg   rL   paddingr   r    vZglobal_flopsZglobal_suffixZis_global_subsumedrs   r   r   process_mod`  s    z.FlopCounterMode.get_table.<locals>.process_modrl   ru   r7   r   )rl   0z0%)leftrightr   )headersZcolalign)rg   tabulateZPRESERVE_WHITESPACEr   rb   rm   keyscountr   	enumerater]   )rs   rg   r   headerr   r   r~   Z	mod_depthZ
cur_valuesrd   idxr   r   r   	get_tableR  s:   
zFlopCounterMode.get_tablec                    s    | j   |   t   | S r   )rm   r   r   super	__enter__r   	__class__r   r   r     s   

zFlopCounterMode.__enter__c                    s.   | j rt| | j |   t j|  d S r   )rh   printr   rg   r   r   __exit__)rs   r   r   r   r   r     s   zFlopCounterMode.__exit__r   c                 C   s   |r|ni }||i |}|j }|| jv rA| j| }tt|||f\}}}||i |d|i}	| jD ]}
| j|
 |  |	7  < q3|S )Nr   )Z_overloadpacketrr   r   r   rn   rm   )rs   functypesr   r   r   Zfunc_packetZflop_count_funcr   r<   parr   r   r   __torch_dispatch__  s   


z"FlopCounterMode.__torch_dispatch__)Nr   TNr   )r   N)rw   r   r   __doc__r   r   r   ro   rp   r   rj   boolr   r   rt   r   r   r{   r|   r   r   r   r_   r   r   r   r   r   __classcell__r   r   r   r   r      s6    


=c                   @   s   e Zd ZU eed< eed< dS )r}   r   r   N)rw   r   r   r   __annotations__r   r   r   r   r}     s   
 r}   r   )F)8r   Ztorch.nnro   Ztorch.utils._pytreer   typingr   r   r   r   r   r   collectionsr	   Ztorch.utils._python_dispatchr
   Ztorch.utils.hooksr   mathr   __all__opsZatenr   rj   r#   r&   r*   r,   r   r1   r6   r9   r=   rM   rN   rO   rP   mmZaddmmZbmmZbaddbmmZconvolutionZ_convolutionZconvolution_backwardZ'_scaled_dot_product_efficient_attentionZ#_scaled_dot_product_flash_attentionZ0_scaled_dot_product_efficient_attention_backwardZ,_scaled_dot_product_flash_attention_backwardrr   rT   r^   rb   re   r   r}   r   r   r   r   <module>   sn     

 P