o
    "i                    @   sb"  U d dl Z d dlZd dlZd dlmZ d dl mZmZ d dlmZm	Z	 d dl
mZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlmZmZmZ d dl m!Z! d dlm"Z"m#Z#m$Z$m%Z% d d	l&m'Z'm(Z(m)Z)m*Z* d d
l+m,Z,m-Z- d dl.m/Z/m0Z0 ej1j2Z2g Z3ee4 e5d< ej6j7j8Z8G dd deZ9	ddedej:de;fddZ<ee<ej:j=ddZ>ee<ej:j=dZ?ee<ej:j@dZAdedeBdefddZCe!e8jDe?dedefdd ZDe!e8jEe?dedefd!d"ZEe!e8jFe?deded#eGd$eGfd%d&ZFe!e8jHe?d'ed(eGd)eGd*eGd+e;d,efd-d.ZHe!e8jIjJgd/d0 ZKe!e8jIjgd1efd2d3ZLe!e8jMe?d4edefd5d6ZMe!e8jNe?d'ed4efd7d8ZNe!e8jOd'ed4ed9eGd:eGfd;d<ZOe!e8jPe?d4edefd=d>ZPe!e8jQe?d'ed4edefd?d@ZQe!e8jRd'ed4ed$eGfdAdBZRe!e8jSe?d'ed4edCeGdDe;fdEdFZSe!e8jTe?ddHed4edIe4fdJdKZTe!e8jUe?d'edLefdMdNZUe!e8jVe?d4edefdOdPZVe!e8jWe?d'ed4edefdQdRZWe!e8jXd4edSedefdTdUZXe!e8jYd'ed4edSedeeef fdVdWZYe!e8jZe8jZj[\e2j]e?		dd4edXedYeGdZeGd[e;d\eej^ defd]d^ZZe!e8j_e8j_j[\e2j]e?		dd4edXedYeGdZeGd[e;d\eej^ defd_d`Z_e!e8j`e?d'ed4edXedYeGdZeGd[e;dDe;defdadbZ`e!e8jae?d'ed4edcedefdddeZadfedgeBfdhdiZbdjejcfdkdlZde!e8jee?e9jfjgfd4edmedgeBdefdndoZee!e8jhe?d'edLedmedgeBfdpdqZhe!e8jie?e9jfjgdrfd4edmedgeBd#eGfdsdtZie!e8jjj[e?d'ed4edmedgeBd#eGf
dudvZje!e8jjjke?d'ed4edmedgeBd#eGdwefdxdyZle!e8jmj[e?d'ed4edmedgeBdzeGf
d{d|Zme!e8jmjne?d'ed4edmedgeBdzeGdwefd}d~Zod'ed4edmedSee dgeBdeBdedefddZpe!e8jqe?d'ed4edeBdefddZqe!e8jrd'ed4edmedSee dgeBdeBdedefddZre!e8jsd'ed4edmedSee dgeBdeBdedefddZse!e8jte?de9jfjgfd4edmedSee dgeBdef
ddZte!e8jue?de9jfjgfd'ed4edmedSee dgeBdefddZue!e8jve* e?e9jfjgfdLedmedgeBdefddZve!e8jwe?e9jfjgfd'ed4edmedgeBdef
ddZwe!e8jxddLededeGfddZxe!e8jydededefddZye!e8jzd'edeeB deBdeBdeBdeBfddZze!e8j{j	 			dd4edeBdeeB deeB deBf
ddZ|e!e8j}d'edeeB deBdeBfddZ}e!e8j~d'edeeB deBdeBdeBf
ddZ~d'edwedejcfddZe!e8je>d'ededeBdejcfddZe!e8je>d'ededeBdejcfddZdd Ze!e8je* e?dLedeeB deeB deeB deeB defddZe!e8je* e?dLedeeB deeB deeB deeB deeB defddZe!e8jd'eded)eGfddZe!e8jdHedeeB deBdeBdeBdefddńZe!e8jj[e?	dd'ed4edeeG defddȄZe!e8je8jj[\e2je8jj[\e2jdLedeGdee; fdd˄Ze!e8jdLedeGdee; fdd̈́Ze!e8je* dedeBde;fddЄZe!e8je* dedeBde;fdd҄Ze!e8jjdd4eded(eGdefddԄZe!e8jjJdd4edeGd(eGdefddքZe!e8j			ddSededeBde;de;defdd݄Ze!e8jd'ededeBdeBde;f
ddZdeeB fddZe!e8je8jg	 d d4edeeB deBdee fddZe!e8jje8jjgd d4edeBdeBdeedf fddZe!e8je* e?dd4ededed#eBd(eBf
ddZe!e8je* e?			dd4ededed#eBd(eBde;fddZe!e8je* e?dd4ededed#eBd(eBf
ddZe!e8je?d'edLedededee deBdeBdeBdeBdee; deee ee ee f fddZdee dee fddZe!e8jd edLedeeB dededSee dee dee; deee ee ee f fddZdLedSee dee dee dee d[e;deGdeGde;deeeeee ee f fd	d
Ze!e8jdLedSee dee dee dee d[e;deGdeGdeeeef fddZe8jj[\e2je8jj[\e2jdLedSee dee dee dee d[e;deGdeGdeeeef fddZe8jj[\e2jd dee fddZe!e8jj[dLedSee dee dededeGdeGdeeeef fddZe!e8jj[dLedSee dee deded[e;deGdeGdeeeef fddZe!e8jjdLedSee dee d[e;deGdeGdeeeef fddZe!e8jj[dLedSee dee deded[e;deGdeGdeeeeeef fddZe!e8je?dddZdd Zdd Ze!e8jddddddddedjeejc d eej d!e;d"e;d#eej fd$d%Ze!e8je8je8jgd&d' Ze8jj[\e2je!e8jdLedSedee dee dee d[e;d(eGd)eGfd*d+Zd,d- Ze!e8jd edLedSee dee dee d.ee d/ee de;deGdee; deeee ee f fd0d1Ze!e8jdLed'edSedee dee d.ee d2ee d)eGd3efd4d5Ze!e8je?dLedeeBeBf fd6d7Ze!e8jdd8de$deBde$d9e$d(e#f
d:d;Ze!e8je* dd8de$deBde$d9e$d(e#f
d<d=Zdd8de$deBde$d9e$d>e;d(e#fd?d@Ze!e8jde$deBde$d9e$fdAdBZe!e8je* de$deBde$d9e$fdCdDZde$deBde$d9e$d>e;f
dEdFZe!e8je*ddce?d4edeeef fdGdHZe!e8j	I	rddedJee;eBeGf dKee;eBeGf fdLdMZe!e8jddNdOZdPdQ ZdRdS Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdTdU Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdVdW Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdXdY ZʐdZd[ Ze!e8jj[e8jj[\e2je?	ddLedeeB d\eeG defd]d^Ze!e8jj[e8jj[\e2je?		ddLedeeB d_eeG d`eeG def
dadbZe!e8jj[e8jj[\e2je?			ddLedeeB dceeG d_eeG d`eeG defdddeZɐdfdg Z̐dhdi Z͐djdk Zΐdldm Z	ddndoZАdpdq Zѐdrds ZҐddtduZӐddvdwZԐdxdy Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdzd{ Ze!e8jj׃e8jjנ\e2je8jjנ\e2jd|d} Ze!e8jjۃe8jj۠\e2je8jj۠\e2jd~d Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdd Zݐdd ZސdddZߐdddZdd Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdd Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdd Zdd Zdd Ze!e8jjۃe8jj۠\e2je8jj۠\e2jdd Ze!e8jj׃e8jjנ\e2je8jjנ\e2jdd Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdd Ze!e8jjŃe8jjŠ\e2je8jjŠ\e2jdd Ze!e8jj[e8jj[\e2je?		ddLedeeB de;d_eeG d`eeG defddZe!e8jj[dedede;fddZe!e8je8jgdd Ze!e8jgdd Zd4edmedSee dgeBdeBdeeef fddZe!e8jd4edmedSee dgeBdeBdeeef fddZe!e8jd4edmedSee dgeBdeBdeeef fddZdedeGdefddZdedeGdefddZdede%fddZde%dedefddZdee defddZdeBde;djejcd ejfddZdedeBdeBde;fddZdedeBdeBdeBde;f
ddÄZdedeeB de;fdĐdńZdedeeB de;fdƐdǄZe!e8j e?dedeeB de;fdȐdɄZ e!e8je?	 	 	ddededeBdeBde;defd͐d΄Ze!e8je* e?dϐdЄ Ze!e8je* e?dѐd҄ Ze!e8jdde9jfjgfdӐdԄZdejdejde;fdאd؄Ze8jj[\e2je* dِdڄ Ze!e8jj[e?		ddedeeBeBf de;deeG deeG defdݐdބZe!e8jjŃe8jjŠ\e2je8jjŠ\e2je* e?	ddedeeeBeBf  de;deeeGeGf  def
ddZ	e!e8j
e*dddddddZ
e!e8je* dddddZe!e8jj[e8jjnge* dejdddde#djeejc dejd eej d!e;f
ddZe!e8jjgdejdddde#de#djeejc dejd eej d!e;fddZe!e8je8jj[\e2je* ddde9jfjgfdLedmede#de#dSee dgeBdefddZe!e8je8jj[\e2je*dddLedmedgeBdeeef fddZdd Zee8je8j ee8je8j ee8je8j ee8je8j ee8je8jI ee8je8j ee8je8jP ee8je8j  ee8j!e8jM ee8j"e8j# ee8j$e8j% ee8j&e8j' ee8j(e8j) ee8j*e8j+ ee8j,e8j- ee8j.e8j/ ee8j0e8j1 ee8j2e8j3 ee8j4e8j5 ee8j6e8j7 ee8j8e8j9 ee8j:e8j; ee8j<e8j= ee8j>e8j? ee8j@e8jV dS (	      N)Enum)partialreduce)chainproduct)CallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)IntLike
NumberType
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)expect_true	guard_int)tree_flattentree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r)   r)   k/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/torch/_decomp/decompositions.pyr    "   s    r    Fftype_promotioncompute_dtype_onlyc                    s   t  fdd}|S )Nc                     sr   dd t | |fd D }tj|di\  fdd}fdd}t|| i t||}r4|S t||S )	Nc                 S   s   g | ]	}t |tr|qS r)   )
isinstancer   .0xr)   r)   r*   
<listcomp>2   s
    
z-type_casts.<locals>.inner.<locals>.<listcomp>r   type_promotion_kindc                       t | tr
|  S | S Nr.   r   tor1   computation_dtyper)   r*   increase_prec:      

z0type_casts.<locals>.inner.<locals>.increase_precc                    r4   r5   r6   r8   )result_dtyper)   r*   decrease_prec@   r<   z0type_casts.<locals>.inner.<locals>.decrease_prec)r   utilselementwise_dtypesr   )argskwargsZ	flat_argsr;   r>   rr-   r+   r,   )r:   r=   r*   inner0   s   

ztype_casts.<locals>.inner)	functoolswraps)r+   r,   r-   rE   r)   rD   r*   
type_casts+   s   rH   T)r,   r-   )r,   r1   dimreturnc                 C   s$   t ||   D ]}| d} q| S )N)rangerI   	unsqueeze)r1   rI   _r)   r)   r*   _unsqueeze_to_dim]   s   rO   out_gradyc                 C   s   | d||     S Nr!   Zconj_physicalrP   rQ   r)   r)   r*   tanh_backwardc      rU   c                 C   s   | |d|     S rR   rS   rT   r)   r)   r*   sigmoid_backwardi   rV   rW   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exptorchwhere)rP   r1   rX   rY   zr)   r)   r*   softplus_backwardo   s   "r`   grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sb   || }|}|}|rt |dk| | ||  | | S t |dk| | | t ||  | | S Nr   )r]   r^   r\   )	ra   rb   rc   rd   re   rf   ZnegcoefZposcoefZ
negiptcoefr)   r)   r*   elu_backwardv   s   
rh   c                 C      t | |S r5   )r]   Z	full_likeselfvaluer)   r)   r*   fill_scalar      rm   rl   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrI   r)   rl   r)   r*   <lambda>       zfill_tensor.<locals>.<lambda>)r]   _checkrI   atencopyrj   r)   rp   r*   fill_tensor   s
   

rv   rk   c                 C   s    t jt j| d ddddd S N   r   min   maxr]   clamprk   r)   r)   r*   hardsigmoid   s    r   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        r]   r^   ra   rk   r)   r)   r*   hardsigmoid_backward   s
   r   min_valmax_valc                 C   s   t ||k||kB d| S Nr   r   )ra   rk   r   r   r)   r)   r*   hardtanh_backward   s   r   c                 C   s$   | t jt j| d dddd d S rw   r~   r   r)   r)   r*   	hardswish   s   $r   c              
   C   s,   t |dk dt |dk| |d d  | S )Nr   rx         ?r   r   r)   r)   r*   hardswish_backward   s
   r   c                 C   s   t ||kd| S r   r   )ra   rk   rY   r)   r)   r*   threshold_backward      r   negative_slopeself_is_resultc                 C   s   t |dk| | | S rg   r   )ra   rk   r   r   r)   r)   r*   leaky_relu_backward   s   r   nonegradapproximatec                 C   s   d}d}d}|dkrO|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S )
Ng;f?g;f?gmBP?tanhr   gHm?r!   rx   g      )r]   r   erfr\   )r   rk   r   ZM_SQRT2Z	M_SQRT1_2Z
M_2_SQRTPIZkBetaZkKappaZx_sqZx_cuberE   Z
tanh_innerleftrightZleft_derivativeZtanh_derivativeZinner_derivativeZright_derivativeZkAlphaZcdfZpdfr)   r)   r*   gelu_backward   s,   
r   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rR   )r]   r   FZsoftplussigmoid)ra   r   Zinput_tanh_softplusZinput_sigmoidoutr)   r)   r*   mish_backward   s   
r   c                 C   s   | t |  S r5   )r]   r   r   r)   r)   r*   silu   s   r   c                 C   s,   ddt |   }| | d|d|    S rR   )r]   r\   )ra   rk   r   r)   r)   r*   silu_backward  s   r   weightc                 C   s   t | dk| ||  S rg   r   )rk   r   r)   r)   r*   _prelu_kernel	  s   r   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )ra   rk   r   Z
input_gradZweight_gradr)   r)   r*   _prelu_kernel_backward  s   r   noiseloweruppertraining	generatorc           
      C   sh   |d u sJ |r(| dk}t | ||}t|| | | }|t||d |S || d }	t | |	S Nr   r!   r"   )rt   uniformr]   r^   copy_
leaky_relu)
rk   r   r   r   r   r   Znot_positiverC   outputr   r)   r)   r*   rrelu_with_noise  s   r   c              	   C   s   |  t| |||||S r5   )r   r   )rk   r   r   r   r   r   r)   r)   r*   rrelu_with_noise_0  s   r   c                 C   s6   |r|| dkr|  |S || d }t| |||S )Ngư>r"   )mulrt   r   )ra   rk   r   r   r   r   r   r   r)   r)   r*   rrelu_with_noise_backward>  s   
r   bufferc                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r!   rK   )r]   r^   r\   abs)ra   rk   r   Zin_negativeZ	max_derivsignr_   r)   r)   r*   log_sigmoid_backwardR  s
   r   loss	reductionc                 C   s0   |t jjkrt| S |t jjkrt| S | S r5   )r    r'   rl   r]   meanr(   sum)r   r   r)   r)   r*   apply_loss_reduction^  s
   

r   dtypec                 C   s4   | t jkrt jS | t jkrt jS | t jkrt jS d S r5   )r]   Z	complex32Zfloat16Z	complex64Zfloat32Z
complex128Zfloat64r   r)   r)   r*   to_real_dtypeg  s   


r   targetc                 C   s   | | d }t ||S )Nr"   )r   )rk   r   r   r   r)   r)   r*   mse_lossv  s   
r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r    r'   rl   numel)ra   r   r   r   normr)   r)   r*   mse_loss_backward  s   r   r[   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r"   )r   r]   r^   r   )rk   r   r   rX   r   r)   r)   r*   smooth_l1_loss  s   &
r   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S rZ   )r    r'   rl   r   r]   r   r^   r   )	ra   rk   r   r   rX   r   r1   Zabs_xZ	norm_gradr)   r)   r*   smooth_l1_loss_backward  s   

r   
grad_inputc                 C   *   t | ||||}t||j t||ddS NT)Z	copy_fromZcopy_toZexact_dtype)r   r   shaper   )ra   rk   r   r   rX   r   resultr)   r)   r*   smooth_l1_loss_backward_out     
r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S rZ   )r    r'   rl   r   r]   r^   )ra   rk   r   r   r   r   r1   r)   r)   r*   huber_loss_backward  s    r   c                 C   r   r   )r   r   r   r   )ra   rk   r   r   r   r   r   r)   r)   r*   huber_loss_backward_out  r   r   ignore_indextotal_weightc                 C   s   |  dk rdnd}|tjjkr| | } ||}t||k|d}t|}	t|	||d}	|	  |     kr=dkrDn n| |} |d urcdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr"   r   r!   g      c                 S   s   g | ]}d qS r!   r)   r0   rN   r)   r)   r*   r2     rr   z&_nll_loss_backward.<locals>.<listcomp>)rI   r    r'   rl   rM   r]   r^   
zeros_likescatterrL   r   reshape)ra   rk   r   r   r   r   r   channel_dimsafe_targetr   Z	new_shaper)   r)   r*   _nll_loss_backward  s    	

 

r   c           
      C   s   |  dks
J dt|  |}||}|d dks'J d| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr"   z.Halving dimension must be even, but dimension z	 is size r[   ro   )rI   r?   canonicalize_dimsizenarrowr]   r   cat)
ra   rk   rI   Zwrap_dimZnInZ	inputSizeZ	firstHalfZ
secondHalfZgradInputFirstHalfZgradInputSecondHalfr)   r)   r*   glu_backward  s   

r   c                 C   sr  d|    krdksJ d J d|  dksJ d|  dko)|  dk}|sC|jd |jd ksCJ d|j d|j d| dksXJ d	|j d
|  df|d u si| |jd ksiJ d|tjjkr|  dkr|   dkr| jd |jd ksJ d|jd  d|    d| jd  n|   dkr|  dksJ d| j t| ||||||S )Nr   r"   input tensor should be 1D or 2Dr!   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rK   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rI   r   r   r    r&   rl   r   )ra   rk   r   r   r   r   r   no_batch_dimr)   r)   r*   nll_loss_backward  s<   (
"
r   c                 C   s   |  dksJ d|   |  dksJ d|   |jd |jd kr<|jd |jd kr<|jd |jd ksHJ d|j d	|j | dks\J d
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: rx   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r"   r!   r   r   r   z ( z, elements))rI   r   r   r   )ra   rk   r   r   r   r   r   r)   r)   r*   nll_loss2d_backward1  s*   r   c              	   C   s\   |d t t |  | dd |t t | | dd  }|d ur)|| }t||S )Nr!   r)   i)r]   maximumlog1pnew_fulllogr   )rk   r   r   r   r   r)   r)   r*   binary_cross_entropyS  s   

r   c                 C   sR   d}| ||  t j|d|  |d }|d ur|| }|tjjkr'||  }|S )Ng-q=r!   ry   )r]   r   r    r'   rl   r   )ra   rk   r   r   r   ZEPSILONr   r)   r)   r*   binary_cross_entropy_backwardh  s   	"r   c                 C   s    t t |  | }t||S r5   )r]   r   r\   r   )r   r   r   r   r)   r)   r*   soft_margin_lossz  s   
r   c                 C   s6   ||  t || d  }|tjjkr||  }|S rR   )r]   r   r    r'   rl   r   )ra   rk   r   r   r   r)   r)   r*   soft_margin_loss_backward  s   r   r"   otherpc                 C   s   t j| | |dS )N)r   )rt   r   )r   r   r   r)   r)   r*   dist  r   r   x1x2c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr"   rK   Tmemory_formatr   )powr   r]   	ones_likecontiguous_formatr   r   matmulmT	clamp_minsqrt)	r   r   Zx1_normZx1_padZx2_normZx2_padZx1_Zx2_r   r)   r)   r*   _euclidean_dist  s   r  input_sizesstartendstepc                 C   s   |  |}t|| ||||S r5   )	new_zerosr]   Zslice_scatter)ra   r  rI   r  r  r  r   r)   r)   r*   slice_backward  s   
	r  r!   c                 C   s:  |   }|dkrtdt|   |}t|  }t|  }|dkr(td|d ur.|nd}|d ur6|ntj}	|dk rC||| 7 }|	dk rM|	|| 7 }	|dk rTd}n
||| kr^|| }|	|k re|}	n
|	|| kro|| }	| 	 |||   }
|	| }|| d | ||< ||  |9  < | j
rtd| |||
S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver!   z<Slice decomposition for quantized tensors aren't implemented)rI   RuntimeErrorr?   r   listr   stridesysmaxsizestorage_offsetZis_quantizedNotImplementedErrorZ
as_strided)rk   rI   r  r  r  ndimsizesstridesZ	start_valZend_valr  lenr)   r)   r*   slice_forward  s>   	r  indexc                 C   s   |  |}t|| ||S r5   )r  r]   Zselect_scatter)ra   r  rI   r  r   r)   r)   r*   select_backward  s   
r  offsetdim1dim2c                 C   s   |  |}t|| |||S r5   )r  r]   Zdiagonal_scatter)ra   r  r  r  r  r   r)   r)   r*   diagonal_backward  s   
r  input_dtypec                 C   s   | j |kr
||}|S r5   )r   r7   )ra   r   r  r)   r)   r*   _cast_grad_to_input_dtype  s   

r  r   c                 C   s0   | | }||t j||dd  }t| || S NTrI   keepdim)r]   r   r  
contiguous)ra   r   rI   r  Znew_grad_outputr   r)   r)   r*   _softmax_backward_data  s
   
r   c                 C   s*   | t |t j| |dd  }t| ||S r  )r]   r\   r   r  )ra   r   rI   r  r   r)   r)   r*   _log_softmax_backward_data  s   
r!  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr"   r!   r   devicer   rK   )r   r]   arangeint64rM   )
Zinput_dZkernel_dZ
dilation_dZ	padding_dZstride_dr#  Zblocks_dZ	arange_kwZblocks_d_indicesZkernel_gridr)   r)   r*    _im2col_col2im_indices_along_dim  s
   r&  kernel_sizedilationpaddingr
  c              	      s&  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv odtdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s|d}|S ) Nr"   c                   S      dS )Nz"im2col(): only 2D kernel supportedr)   r)   r)   r)   r*   rq   4      zim2col.<locals>.<lambda>c                   S   r*  )Nz$im2col(): only 2D dilation supportedr)   r)   r)   r)   r*   rq   5  r+  c                   S   r*  )Nz#im2col(): only 2D padding supportedr)   r)   r)   r)   r*   rq   6  r+  c                   S   r*  )Nz"im2col(): only 2D stride supportedr)   r)   r)   r)   r*   rq   7  r+  Tc                 S   <   |rt dd | D nt dd | D }t|dd  d S )Nc                 s       | ]}|d kV  qdS r   Nr)   r0   r   r)   r)   r*   	<genexpr>:      z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s       | ]}|d kV  qdS r.  r)   r/  r)   r)   r*   r0  :  r1  c                   S   r*  )Nz<{param_name} should be greater {'than' zero, but got {param}r)   r)   r)   r)   r*   rq   <  r+  z0im2col.<locals>.check_positive.<locals>.<lambda>allr]   rs   param
param_namestrictcondr)   r)   r*   check_positive9     (zim2col.<locals>.check_positiver'  r(  r)  Fr8  r
  rx   r   c                 s       | ]}|d kV  qdS r.  r)   r0   dr)   r)   r*   r0  G  r1  zim2col.<locals>.<genexpr>r   c                         dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler)   r   r)   r*   rq   H      c                 s   s>    | ]\}}}}}d |d|  ||d    d  |  V  qdS )r!   r"   Nr)   r0   r   padZdilZkerstr)   r)   r*   r0  K  s
    "
r   c                 s   r-  r.  r)   )r0   cr)   r)   r*   r0  R  r1  c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spacial size r   , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.rB  r)   r(  r'  output_sizer)  r   r
  r)   r*   rq   S  s    r   r   rK   r!   rx      T)r]   rs   r  r   r4  rC  ziprM   r&  r#  r   rG  permuter   r   squeeze)r   r'  r(  r)  r
  r:  r  batched_inputZ	batch_dimr   Zinput_hZinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wZblocks_row_indicesZblocks_col_indicesZpadded_inputr   Znum_blocks_rowZnum_blocks_colr)   rN  r*   im2col*  sd   




 




r^  rO  c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv outdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
sf|d}|S )%Nr"   c                   S   r*  )Nzonly 2D output_size supportedr)   r)   r)   r)   r*   rq     r+  zcol2im.<locals>.<lambda>c                   S   r*  )Nzonly 2D kernel supportedr)   r)   r)   r)   r*   rq     r+  c                   S   r*  )Nzonly 2D dilation supportedr)   r)   r)   r)   r*   rq     r+  c                   S   r*  )Nzonly 2D padding supportedr)   r)   r)   r)   r*   rq     r+  c                   S   r*  )Nzonly 2D stride supportedr)   r)   r)   r)   r*   rq     r+  Tc                 S   r,  )Nc                 s   r-  r.  r)   r/  r)   r)   r*   r0    r1  z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   r2  r.  r)   r/  r)   r)   r*   r0    r1  c                   S   r*  )Nz9{param_name} should be greater than zero, but got {param}r)   r)   r)   r)   r*   rq     r+  z0col2im.<locals>.check_positive.<locals>.<lambda>r3  r5  r)   r)   r*   r:    r;  zcol2im.<locals>.check_positiver'  r(  r)  Fr<  r
  rO  )r"   rx   c                 s   r>  r.  r)   r?  r)   r)   r*   r0    r1  zcol2im.<locals>.<genexpr>r   c                      rA  )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: rB  r)   rD  r)   r*   rq     rE  r   r!   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r   z and kernel_size=r)   r)   )r'  r   r)   r*   rq     s
    c                 S   s:   g | ]\}}}}}d |d|  ||d    d  |  qS r!   r"   r)   rF  r)   r)   r*   r2     s    "zcol2im.<locals>.<listcomp>rK   c                      4   d d d d d d  dd  d	S 
NzGiven output_size=rJ  rK  rL  rM  z , expected input.size(-1) to be 	 but got rK   .r)   r)   Lr(  r'  rO  r)  r   r
  r)   r*   rq         c                      r`  ra  r)   r)   rd  r)   r*   rq     rf  rx   r   rP  c                 S   s   g | ]
\}}|d |  qS r"   r)   )r0   or   r)   r)   r*   r2     s    
accumulaterQ  )r]   rs   r  r   r4  rR  rM   r   rS  r&  r#  rO   r  prodrt   _unsafe_index_putr   rG  rT  )r   rO  r'  r(  r)  r
  r:  r  Zprod_kernel_sizecolrU  out_hout_wrV  rW  rX  rY  rZ  r[  r\  r]  Zindices_rowZindices_colZoutput_padded_sizer   idxr)   rd  r*   col2im}  s   




 



"

rq  maskc                 C   s$   | | | |  jt| d}|S )Nr   )type_ascloner?   suggest_memory_format)ra   rr  rc   rC   r)   r)   r*   native_dropout_backward  s   rv  
input_size	dimensionr   c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   r#  r   rK   r!   r5   Tri  )r  r]   Zsqueeze_copyr?   r   r$  r#  int32ZunfoldflattenZmovedimr  rt   rl  r  )	r   rw  rx  r   r  rI   rp  r   r  r)   r)   r*   unfold_backward  s   
r|  epsc              	   C   st   |d ur|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS )Nr[   r   r)   nan)r]   r^   logical_andr   float)ra   rk   r}  lohir)   r)   r*   logit_backward   s   r  trainc                 C   s&   |r|dkrt | ||d S |  S rg   )rt   native_dropoutrt  )r   r   r  r)   r)   r*   dropout  s   r  c                 C   sp   |r.|dkr.|dkrt | t j| t jdfS t | |k}||  tdd|   }||fS | t j| t jdfS )Nr   r!   r   r[   )r]   r   bool	rand_liker  r   )r   r   r  Z	bool_maskresr)   r)   r*   r    s   r  half_to_floatc                 C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr*t
| }ntj| |dd}t
| | }|tj||dd }|sJ||}|S Nr3   r   T)r  )r  r   r]   halfr?   r@   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr7   r   r\   amaxr   )r1   rI   r  r:   r=   Zunnormalizedx_maxr   r)   r)   r*   _softmax+  s   


r  c           	      C   s   |   } |r| jtjksJ tj| tjjd\}}| |} | 	 dkr'| }ntj
| |dd}| | }ttjt||dd}|| }|sL||}|S r  )r  r   r]   r  r?   r@   r  r  r7   r   r  r   r   r\   )	r1   rI   r  r:   r=   Zshiftedr  Zshifted_logsumexpr   r)   r)   r*   _log_softmaxB  s    


r  c                 C      t j|| |dS Nrb   r]   subrk   r   rb   r)   r)   r*   rsub_TensorZ     r  c                 C   r  r  r  r  r)   r)   r*   rsub_Scalar_  r  r  rK   indicespadding_idxscale_grad_by_freqsparsec                 C   sJ   |   dks
J d|jdkr!| d|}|jdkr|d}|S | | S )Nr"   z'weight' must be 2-Dr!   r   )rI   r  Zindex_selectrT  )r   r  r  r  r  r   r)   r)   r*   	embeddingd  s   


r  num_weightsc                 C   s   t j| t jjd\}}| |} t|tj}|r8||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Tri  rK   r   )r?   r@   r  r  r7   r   r]   longr  r   rt   rl  rM   rO   r  masked_fillr   )ra   r  r  r  r  r:   r=   countsonesZgrad_weights_scalerr  r   grad_weightr)   r)   r*   embedding_dense_backwardx  s&   


r  c                 C   s   d}| D ]}||9 }q|S rR   r)   )r1   rC   ir)   r)   r*   rk    s   
rk  split_sizesc                 C   s   t tt|| j| kdd  t|}g }d}t|D ])}|| }t |dkdd  t|| | j| k |	| 
||| ||7 }q|S )Nc                   S   r*  )NzDSplit sizes don't add up to the tensor's size in the given dimensionr)   r)   r)   r)   r*   rq     r+  z"split_with_sizes.<locals>.<lambda>r   c                   S   r*  )NzCsplit_with_sizes expects split_sizes have only non-negative entriesr)   r)   r)   r)   r*   rq     r+  )r]   Z_check_with
ValueErrorr   r   r  rL   rs   r   appendr   )rk   r  rI   Z
num_splitsZsplitsZ	start_idxr  lengthr)   r)   r*   split_with_sizes  s$   
r  
split_size.c                    sx   | j }|| } dkr|dksJ | fS |  d   }t|} fddt|D }  | |  |d< t| ||S )Nr   r!   c                       g | ]} qS r)   r)   r0   r  r  r)   r*   r2     rr   zsplit.<locals>.<listcomp>rK   )r   r   rL   r]   split)rk   r  rI   r  dim_sizechunksr  r)   r  r*   r    s   r  mat1mat2c                 C   H   |   s|  st|}t|}|t|| }|dkr|S |||   S rg   )is_floating_point
is_complexintr]   mm)rk   r  r  rX   rb   r   r)   r)   r*   addmm  s   r  use_geluc                 C   s<   t | ||||}|r| jrtj|ddS t|S t|S )Nr   )r   )r  is_cudart   gelurelu)rk   r  r  rX   rb   r  r   r)   r)   r*   _addmm_activation  s   

r  vecc                 C   r  rg   )r  r  r  r]   mv)rk   r  r  rX   rb   r   r)   r)   r*   addmv  s   r  r   rstdgammaNCHxWgroupoutput_maskc
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u pJ  k fdd t \}
}t|dk fdd t| |	 j
d	gd
}| 	 j
d	gd
}d }d }d }|	d r:d|
  }d urt|d|

d	}t|d|

d	}t|dd|
}n&||

d	}||

d	}t|dtjd|
f|jd}| | | | | | }|  || |  }|d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r_|	|
|	|
d  |d j
dgd
 }|	d	 rk|j
dgd
}|||fS )NF)Zallow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr)   r)   )r  r  r  r)   r*   rq         z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got rD  r)   )r  r  r   r)   r*   rq         c                      s$   d  dd ur   S d S )NzExpect gamma to have z elements but got rK   r   r)   )r  r  r)   r*   rq     s   $ r   c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r)   r)   )r  r  r)   r*   rq     rr   r"   ro   r[   rK   r!   r#  r   )r?   Zcheck_same_deviceZcheck_same_shaper]   rs   r   r   divmodr   viewr   rM   r   r  r#  rO   r7   r   )ra   r   r   r  r  r  r  r  r  r  ZcpgZ_remZdsdbd_inputZd_gammad_biassZds_valZdb_valc1c2c3r)   )r  r  r  r  r  r   r*   native_group_norm_backward  s   
 
""



$

r  c                 C   s   | d ur	|  |S | S r5   r7   )r1   r   r)   r)   r*   _maybe_castS  s   
r  grad_outnormalized_shapebiasc           !         s0  |j }| }	t|j  fdd| |||fD \}
}}}|
d us$J |	t| }||d  }|d | }g }g }t|	D ]}||krJ|| q>|| q>t|}t|}|dks`|dkr|d ri|	|nd |d rw|	||d  nd |d r|	||d  fS d fS || | }|d ur|
| }n|
}|| }t
||d}t
||}t
||d}t
||}|| | }d }d }d } |d r|| | }|d r|d urt|dkrt
|
| |d}n|
| }|d r|d urt|dkrt
|
|d} n|
 } t||jt||jt| |jfS )Nc                 3   s*    | ]}|d ur|   n|V  qd S r5   )r7   r  r/   r9   r)   r*   r0  h  
    
z-native_layer_norm_backward.<locals>.<genexpr>r   r!   r"   TF)r   rI   r?   get_computation_dtyper   r  rL   r  rk  r  r]   r   r   rt  r  )!r  r   r  r   r  r   r  r  input_shapeZ
input_ndimgrad_out_cast
input_castweight_castZ	bias_castaxisZ
inner_dimsZ
outer_dimsZinner_dim_indicesZouter_dim_indicesr  r  MZx_hatZ
grad_x_hatabr  r  r  rE   r  Zd_weightr  r)   r9   r*   native_layer_norm_backwardZ  sh   





r  running_meanrunning_varmomentum
functionalc	                 C   sT  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur]|| d| |  }|s]|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| nT|d ur|d usJ |j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n
| d	}| d	}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur	| }t||  d }|| }| jjdkr|j| jd}|j| jd}|j| jd||||fS )
Nr   r"   r   T)rI   Z
correctionr  r!   )r   ru   cpur   )r	  rL   rI   r?   r  r   r7   r]   Zvar_meanrsqrtrT  r   r   r   r   r#  typer  rO   r{  )r   r   r  r  r  r   r  r}  r  Zreduction_dimsr:   new_running_meannew_running_varZ	input_accZ
biased_varr   r  r   	save_mean	save_rstdnZsqueezed_varZunbiased_varinvstdr)   r)   r*   native_batch_norm_helper  st   





r  c              
   C   ,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  r}  r   r  r  rN   r)   r)   r*   native_batch_norm     
r  c              
   C   sv   |d u r|d u rt | |||||S |d u rtd|d u r"td|r0t | |||||||S t | ||||||S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)rt   _native_batch_norm_legitr  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r}  r)   r)   r*   native_batch_norm_decomposition  s&   r  c                    s|   |  |}|| d |   dkr4|dkr4 fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr!   r   c                    r  r)   r)   r   r  r)   r*   r2   ?  rr   z(unsafe_chunk_py_impl.<locals>.<listcomp>)r   r]   opsrt   unsafe_split_with_sizesdefaultunsafe_splitr   )tensorr  rI   r  r  r)   r  r*   unsafe_chunk_py_impl9  s   
r	  c              
   C   s   t j| ||||d||S r  )rt   r  r  )r   r   r  r  r  r  r}  r)   r)   r*   r  E  s   
r  c              
   C   r  r  r  r  r)   r)   r*   r  [  r   r  c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   r  r   r  r}  r   r  r  rN   r)   r)   r*   !_native_batch_norm_legit_no_statsl  s   	
r
  c              
   C   sP   t | |||||||d	\}}	}
}}|d usJ d|d us!J d||	|
||fS )NTz#new_running_mean should not be Nonez"new_running_var should not be Noner  )r   r   r  r  r  r   r  r}  r   r  r  r  r  r)   r)   r*   #_native_batch_norm_legit_functional{  s   r  c                 C   sB   |d u sJ t | |k jt jd}|| |  d|  }||fS )Nr   r[   )r]   r  r7   uint8rs  )r   r   r   rr  r  r)   r)   r*   _fused_dropout_decomposition  s   r  c                 C   s   t | tjjr
| jS d S r5   )r.   r]   Z_subclassesZ
FakeTensorZfake_device)r  r)   r)   r*   device_hint  s   r  c                 C   sD   |d ur | j jdkr ddlm} | }d|_|j}||| |S | S )Nmetar   )FakeTensorModeT)r#  r  Ztorch._subclasses.fake_tensorr  Zin_kernel_invocationZfake_tensor_converterZfrom_meta_and_device)r1   common_devicer  Z	fake_mode	converterr)   r)   r*   wrap_output_with_input_device_  s   r  )r   layoutr#  
pin_memorynon_blockingr   r#  r  r  r   c          	      C   s   |r|t jksJ d|rJ d|d u r!|d u r!|d u r!|  S d}t| }|d urI|| jkrI|d urB|jdkrBt j| |} d}t j| |} |d urX|sXt j| |} d}|r_t	| |} |d urjt j| |dS | S )NTODOFr  Tr   )
r]   stridedrt  r  r#  r  _primsZconvert_element_typeZ
device_putr  )	r1   r   r  r#  r  r  r   Zdtype_convertedr  r)   r)   r*   _to_copy  s&   
r  c                 C   s
   t | S r5   )rt   aliasr8   r)   r)   r*   nop_decomposition  s   
r  exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )rt   r  r  r]   r  )r   r   r  r  r  r   r  r  r  r  rI  r)   r)   r*   cudnn_batch_norm  s"   
r  c                 C   sD   t |D ]\}}|dkr|| jk r| j| || ks| |} q| S rR   )	enumerater  r   rM   )r1   broadcast_maskr  rr  r)   r)   r*   _broadcast_batch_norm_backward  s
   $
r"  r  save_invstdc
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dks9J dd}tt|||  }|}|}|rV|d urS|d usUJ n|d ur^|d us`J |}t|| }dg| }|| ||< g }t	|D ]}||kr|
| qzt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s&    | ]}|d ur|  n|V  qd S r5   r  r/   r9   r)   r*   r0  !  s
    
z-native_batch_norm_backward.<locals>.<genexpr>r"   z$rank of the input must be at least 2r!   r[   )r   r?   r  r   rI   rk  r	  r]   r  rL   r  r"  r   r   r7   r  )&r  r   r   r  r  r  r#  r  r}  r  r  Zweight_dtyper  r  r  Zrunning_mean_castZrunning_var_castZsave_mean_castZsave_invstd_castr  Z
input_rankr  Znum_featuresr   r  r!  Zreduction_axesr  r   Zgrad_output_sumZdot_pZ	grad_meanZ
proj_scaleZ
grad_scaleZprojr   r  Z	grad_biasr)   r9   r*   native_batch_norm_backward  s   
	



r$  save_varreserveSpacec	           	      C   s    t || |||||d|g d
S )NT)TTT)rt   r$  )	r   ra   r   r  r  r  r%  r  r&  r)   r)   r*   cudnn_batch_norm_backwardj  s   r'  c                    s"  | j  | jttdv fdd | jdd  D ]}t|dkfdd q| jtjtjtjtj	tj
fv rCtjj| |S d |d  dkrd |d  dkrtdd	 tdd  |D }td
d	 tdd  ||D }tjj| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]\}}|d u r|d|d d |f }q||d|d d |f  }q|||  S )Nr=  c                      
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r)   r)   r  r)   r*   rq        
 z%adaptive_avg_pool2d.<locals>.<lambda>r   r   c                      s   dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape rc  rB  r)   rD  r)   r*   rq     s    rK   c                 s   s    | ]	\}}|| V  qd S r5   r)   )r0   r  rh  r)   r)   r*   r0        z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s&    | ]\}}}||d  |  V  qdS )r!   Nr)   )r0   r  rh  r  r)   r)   r*   r0    s    
c                 S   s   t j| | |ddS )NtruncZrounding_moder]   divr  r  rI  r)   r)   r*   start_index  s   z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr!   r,  r-  r.  r0  r)   r)   r*   	end_index      z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkp"|| dk }|r+|d7 }n|dkr3|d8 }t j| t jd}|d| }|rbt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nry  r!   r   rK   r"  )r]   r$  r%  rM   Zscalar_tensorr   r#  minimum)in_sizeout_sizeZorangeZi0Z	maxlengthZin_size_modadaptive	range_maxrp  maxvali1r  )r#  r2  r1  r)   r*   compute_idx  s(   

z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r   rK   ro   c                 S   s`   t |tr	| |fS |dk sJ ||dk}|dkrt|d}t| |d} t|| }| |fS )Nr   rK   r   r   r   )r.   r   rM   rO   r]   r  )valsr  r8  r7  rI   rr  r)   r)   r*   
maybe_mask  s   

z'adaptive_avg_pool2d.<locals>.maybe_mask)r7  rI   r   )r#  r   r  r]   rs   r   int8r  Zint16rz  r%  nnr  adaptive_avg_pool2drC  rR  Z
avg_pool2drO   r   r   rL   )r   rO  r@  r
  Zkernelr;  ZidxhZlength_hZrange_max_hZ
adaptive_hZidxwZlength_wZrange_max_wZ
adaptive_wr<  r=  retr  jr)   )r#  r2  r  r   r1  r*   r@    sR   

(  



&r@  r  r  c                C      t | |||d|dS )NTinplacerb   
_index_addr1   rI   r  r  rb   r)   r)   r*   
index_add_  s   	rI  c                C   rC  )NFrD  rF  rH  r)   r)   r*   	index_add  s   
rJ  rE  c                   s   t | j|}tjdkfdd  dkr7t | jttkp+t t	  fdd |  }| jdk}|rC| 
dn| }d| f }|rQtjntj}	|	|||dd}
|r`| S |rg|
dS |
 S )	Nr!   c                         d j  dS Nz(Index should have dimension 1 or 0 (got r   r)  r)   r  r)   r*   rq         z_index_add.<locals>.<lambda>c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)r  r)   )rb   python_typer)   r*   rq     s    r   r5   Tri  )r?   canonicalize_dimsr  r]   rs   Zdtype_to_typer   r  Zis_weakly_lesser_typer  rM   rt   
index_put_	index_putrT  r  )r1   rI   r  r  rE  rb   zero_dimr   rp  rS  r   r)   )rb   r  rP  r*   rG  	  s*   	

rG  c                 C      t | |||ddS )NTrE  _index_copyr1   rI   r  r  r)   r)   r*   index_copy_+  r   rZ  c                 C   rU  )NFrV  rW  rY  r)   r)   r*   
index_copy0  s   r[  c          
         s   t | j|}t jdk fdd | jdk}|r | dn| }d|  f }|r.tjntj}||||}	|r;| S |rB|		dS |	
 S )Nr!   c                      rK  rL  r)  r)   rM  r)   r*   rq   <  rN  z_index_copy.<locals>.<lambda>r   r5   )r?   rQ  r  r]   rs   rM   rt   rR  rS  rT  r  )
r1   rI   r  r  rE  rT  r   rp  rS  r   r)   rM  r*   rX  6  s   

rX  c                 C   sL   t | d| }t t |  }| jr| d}n|}|t | |fS )Nr)   r  )r]   r4  r  r\   r   r  r   )rk   rz   r_   r   r)   r)   r*   log_sigmoid_forwardK  s   r\  r   lowhighc                 C   s"   t j| jt|t|| j| jdS )N)r]  r^  r   r#  )primsZ_uniform_helperr   r   r   r#  )r1   r]  r^  r)   r)   r*   r   X  s   r   c                 C   s   |d u sJ |  t| ||S r5   )r   r   )rk   r]  r^  r   r)   r)   r*   uniform_g  s   r`  c                 C   s   t | d }|d ur"t|d u dd  tt ||kdd  |S |d urjt|d u dd  tt ||kdd  g }t|D ]%\}}t||krZ|| |d  t|  qB|t| |d  |  qB|S tddd  d S )	Nr"   c                   S   r*  Nz9Must specify exactly one of output_size and scale_factorsr)   r)   r)   r)   r*   rq   s  r+  z.upsample_compute_output_size.<locals>.<lambda>c                   S   r*  N r)   r)   r)   r)   r*   rq   u  r+  c                   S   r*  ra  r)   r)   r)   r)   r*   rq   {  r+  c                   S   r*  rb  r)   r)   r)   r)   r*   rq   }  r+  Fc                   S   r*  ra  r)   r)   r)   r)   r*   rq     r+  )r  r]   rs   r   r  r  r   )rw  rO  scale_factorsZspatial_dimensionsr  r  r)   r)   r*   upsample_compute_output_sizen  s.   re  c                 C   s   | d u rd S | | S r5   r)   )scalesrp  r)   r)   r*   get_scale_value  s   rg  c                 C   s&   t |  ||}t|d}t| ||S rg   )re  r   rg  upsample_nearest1d)r   rO  rd  osizerc   r)   r)   r*   upsample_nearest1d_vec  s   
rj  c                 C   s2   t |  ||}t|d}t|d}t| |||S Nr   r!   )re  r   rg  upsample_nearest2d)r   rO  rd  ri  scale_hscale_wr)   r)   r*   upsample_nearest2d_vec  s   

ro  c                 C   s>   t |  ||}t|d}t|d}t|d}t| ||||S r   )re  r   rg  upsample_nearest3d)r   rO  rd  ri  Zscale_drm  rn  r)   r)   r*   upsample_nearest3d_vec  s
   


rq  c                 C   s   g }t |}| jtjkrtjn| j}t|D ]F}|| }tj||| jd}| j| |  }	|| d ur;|	|	||   n|	| }
||
 	tj
}t|d | D ]}|d}qO|| qt|S )Nr"  r!   rK   )r  r   r]   r  r  rL   r$  r#  r   r7   r%  rM   r  rC  )r   rO  rf  r  Znum_spatial_dimsr  r@  ri  Zoutput_indicesisizerc   Zinput_indicesrN   r)   r)   r*   !_compute_upsample_nearest_indices  s   $rs  rf  c                 C   s"   t | ||f\}t| d d |fS r5   rs  rt   _unsafe_index)r   rO  rf  Z	l_indicesr)   r)   r*   rh    s   rh  scales_hscales_wc           
      C   sj   t | |||f\}}t| d d ||f}t| }| j\}}	}}| jjdkr-|	dk r-tj	}|j
|d}|S )Ncudar   r   )rs  rt   ru  r?   ru  r   r#  r  r]   r   r  )
r   rO  rv  rw  	h_indices	w_indicesr   r   rN   
n_channelsr)   r)   r*   rl    s   	

rl  scales_dc           	      C   s2   t | ||||f\}}}t| d d |||f}|S r5   rt  )	r   rO  r|  rv  rw  Z	d_indicesry  rz  r   r)   r)   r*   rp    s
   

rp  c                    sb   |r|rd n|rd n|rd nd t   dks!J t  fddtdt  D S )NrP  r   rx   r"   r   c                    s    g | ]}t ||   qS r)   rB  r  Z
group_sizeparamsr)   r*   r2   	  s    z!gather_params.<locals>.<listcomp>)r  rL   )r~  
has_biaseshas_projectionsr)   r}  r*   gather_params	  s   r  c                 C   sh   |r!| d|  |d|  }}| d| d  |d| d  }}n| | || }}d\}}||||fS )Nr"   r!   NNr)   )r~  hiddensr  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr)   r)   r*   params_hiddens	  s   $r  c                 C   s2   ||ksJ | | d|||  | dd|S rg   )r  r   )r  last_batch_size
batch_sizer  r)   r)   r*   update_hidden_for_packed	  s   r  c              	   C   s4   ||kr| S ||k sJ t | |d||| fS rg   )r]   concatr   )r  r  r  Z
inp_hiddenr)   r)   r*    update_hidden_for_packed_reverse"	  s   r  c                 C   s$  |d }|d }|r|d nd }	|r|d nd }
g }g }|r"|d n|d }| dd|}t| t|}|r>|d d d }|D ]-} | jd }||krLn|rVt||||}nt||||}|| |||	||
}|}|| q@|ru|  n	|| |  t	|d}|st	|dn|}||fS )Nr   r!   r"   rx   rK   )
r   r]   r  r	  r   r  r  r  reverser   )inphiddenr~  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inpr  r   
hidden_outr)   r)   r*   one_layer_rnn_data0	  s@   


r  c                        fdd}|S )Nc                    s    t ||||  S r5   r   linearr  r  r  r  r  r  nonlinearityr)   r*   rE   _	  s   zrnn_cell.<locals>.innerr)   r  rE   r)   r  r*   rnn_cell^	  s   r  c                    r  )Nc                    s$   t | ||}  t ||||  S r5   r  r  r  r)   r*   rE   f	  s   zrnn_cell_data.<locals>.innerr)   r  r)   r  r*   rnn_cell_datae	  s   r  c                 C   s   |d }|d }|r|d nd }|r|d nd }	t | ||}
|r&|
dn|
}
|d}g }|
D ]}|||||||	}|| q1|rH|  t|d}||dfS )Nr   r!   r"   rx   )	r   r  fliprM   r  r  r]   r   rT  )r  r  r~  r  r  r  r  r  r  r  precomputed_inputr  r  r  r   r)   r)   r*   one_layer_rnnm	  s   
r  c                 C   s   |d }|d }|r|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d }}}||	d|	dffS )Nr   r!   r"   rx   F)
r]   Zzerosr   rM   r  r  rt   Zmkldnn_rnn_layerr  rT  )r  r  r~  r  r  Zw0Zw1Zw2Zw3hxcxr  modeZhidden_size
num_layersr  batch_firstr  ZoutputsrQ   hycyr)   r)   r*   mkldnn_one_layer_lstm	  sN   


r  c
                 C   s   |r|  ddn| } g }
t|D ]^}t||||\}}}}|r'||d k r'|nd}|	| |||\}}|
| |rI|	| |||dd\}}|
| |rXt||g| d } n|} |dkrn|rn||d k rntj| |dd} q|rw|  ddn| } | |
fS )Nr   r!   r   T)r  )r  )	transposerL   r  r  r]   r   rI   r  )r   r  r~  r  r  r  r  r  r  layer_fnfinal_hiddensr  r  r  r  r  Zfwd_inpZ
fwd_hiddenZbwd_inpZ
bwd_hiddenr)   r)   r*   _rnn_helper	  s,   



r  c	                 C   R   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindr  r  r   r  r  r]   r   stackr   r  r~  r  r  r  r  r  r  r  r   r  r)   r)   r*   rnn_tanh_input	     
r  c	                 C   r  r  )	r  r  r  r   r  r  r]   r  r  r  r)   r)   r*   rnn_relu_input	  r  r  c	                 C   T   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  r  r  r   r  r  r]   r  r  datar  r  r~  r  r  r  r  r  r  r   r  r)   r)   r*   rnn_relu_data
  &   
r  c	                 C   r  r  )	r  r  r  r   r  r  r]   r   r  r  r)   r)   r*   rnn_tanh_data?
  r  r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u r;|nt ||d }||fS )Nr   r   r!   r"   rx   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimZgatesZchunked_gatesZin_gateZforget_gateZ	cell_gateZout_gater  r  r)   r)   r*   	lstm_cellb
  s   r  c              
   C   s   |d }|d }|r|d nd }|r|d nd }t |dkr"|d nt |dkr,|d nd }	|d d}
|d d}t| ||}|rJ|dn|}g }|D ]} t| |
||||	dd\}
}||
 qP|rk|  t	|d}||

d|
dffS )Nr   r!   r"   rx   rP  r   r  )r  rM   r   r  r  r  r  r  r]   r   rT  )r  r  r~  r  r  r  r  r  r  r  r  r  r  r  r   r)   r)   r*   one_layer_lstmp
  s$   *r  c              
   C   s
  |d }|d }|r|d nd }|r|d nd }	t |dkr"|d nt |dkr,|d nd }
g }g }|r8|d n|d }t| t|}|rM|d d d }|d }|d }|dd||dd|}}|D ]l} | jd }t| ||} ||k r||d||| |d||| f |dd||dd|}}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| qf|r|  ||f}n|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r!   r"   rx   rP  r   rK   r  )r  r]   r  r	  r   r   r   r  r  r  r  r  rR  r   )r  r  r~  r  r  r  r  r  r  r  r  r  r  r  r  Zorig_hxZorig_cxr  r  r  r  Zhidden0Zhidden1r   r)   r)   r*   one_layer_lstm_data
  s\   *

r  c                 C   s   dd }|| ||rt S tS )a   Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._has_mkldnn`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t jjsdS | gt| tt| }dd |D }t|dkr#dS | }|t dkr0dS dd |D }|D ]}|t j	t j
fvrF dS q9| jrLdS |d d|d dk}|r^dS d	S )
NFc                 S      h | ]}|j qS r)   r  r0   tr)   r)   r*   	<setcomp>
      zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r!   r  c                 S   r  r)   r   r  r)   r)   r*   r  
  r  r   r"   T)r]   _CZ_has_mkldnnr	  r   from_iterabler  popr#  r  Zbfloat16requires_gradr   )	r   r  r~  ZtensorsZdevicesr#  Zdtypesr   r  r)   r)   r*   
use_mkldnn
  s(   z2select_one_layer_lstm_function.<locals>.use_mkldnn)r  r  )r   r  r~  r  r)   r)   r*   select_one_layer_lstm_function
  s   r  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t|d dt|d dfS )Nr"   lstm expects two hidden statesr   r!   )	r  r  r   r	  rR  r  r  r]   r  )r   r  r~  r  r  r  r  r  r  r  r  r   r  r)   r)   r*   	lstm_impl
  s$   $"r  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	|d dt	|d dfS )Nr"   r  r   r!   F)r  )
r  r  r   r	  rR  r  r   r  r]   r  r  r)   r)   r*   lstm_data_impl  s"   $
"r  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrx   r!   r"   r   )r  r   r  r   r   r  r  r  r  r  r  Zchunked_igatesZchunked_hgatesZ
reset_gateZ
input_gateZnew_gater)   r)   r*   gru_cell?  s   r  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nrx   r!   r   r"   r  r  r)   r)   r*   gru_cell_dataH  s   r  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r]   r  )r  r  r  r~  r  r  r  r  r  r   r  r)   r)   r*   gru_impl_dataQ  s   r  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r  r]   r  )r   r  r~  r  r  r  r  r  r  r   r  r)   r)   r*   gru_implo  s   
r  c                 C   s:   t |  ||}t|d}t|d}tjj| ||||S rk  )re  r   rg  r]   r  rt   _upsample_bilinear2d_aar   rO  align_cornersrd  ri  rm  rn  r)   r)   r*   upsample_bilinear2d_aa_vec  s   


r  c                 C   s4   t |  ||}t|d}t|d}t| ||||S rk  )re  r   rg  upsample_bilinear2dr  r)   r)   r*   upsample_bilinear2d_vec  s   

r  r  c           $      C   sf  | j \}}}}|d }	|d }
|	dkr+|r|d |	d  }n|d ur&d| n||	 }nd}|
dkrI|r<|d |
d  }n|d urDd| n||
 }nd}tj|	| j| jd}tj|
| j| jd}|rj|| }|| }n||d  d jdd}||d  d jdd}|tj}t|j|d dtj}|tj}t|j|d dtj}|	d}|	d}|	d}t
| d d ||g}t
| d d ||g}t
| d d ||g}t
| d d ||g}|| }d| }|| }d| }t||t|| } t||t|| }!t| |t|!| }"t| }#| jjd	kr+|d
k r+tj}#|"j|#d}"|"S )Nr   r!   r[   r   r"  r   ry   r|   rx     r   )r   r]   r$  r   r#  r   r7   r%  ceilrM   rt   ru  r   r?   ru  r  r   r  )$r   rO  r  rv  rw  Zn_batchr{  Zin_hZin_wrn  ro  Zh_scale_factorZw_scale_factorr  rB  r1   rQ   Zx_floorZx_ceilZy_floorZy_ceilZx_viewZx_floor_viewZx_ceil_viewZv1Zv2Zv3Zv4Zxscale2Zxscale1Zyscale2Zyscale1q1q2r   r   r)   r)   r*   r    sV   




r  r  r  c                 C   s   | j |j kS r5   rD  )r  r  r)   r)   r*   is_same_size  rn   r  c                 G   ri   r5   )rt   r  )r1   r   rA   r)   r)   r*   _reshape_alias  rn   r  c                 C   ri   r5   )rt   r  )r1   r  r)   r)   r*   _index  rn   r  c                 C   sV  |   }d}|dk rd}|d ur,|dkr&dg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
krb|dkrb| dd}||fS |d ur|| j}t|||
|}t||k|d}| }n	||k | }|tjj
kr| }||fS |tjj
kr| | }||fS )Nr!   r"   r   r)   r   )rI   r   r  r]   r^   rM   gatherrT  r    r&   rl   r   expandr   r7   r(   r'   )rk   r   r   r   r   Zn_dimsr   r   wr   Zsafe_target_r   r   Zwsumr)   r)   r*   _nll_loss_forward  sB   


r  c                 C   s   |   dkr|   dksJ d|  dksJ d|   dko%|  dk}|s?| jd |jd ks?J d| j d|j d| jd	 }|d u s_|  dkrT| |ks_J d
| d|j t| ||||S )Nr   r"   r   r!   r   r   r   r   rK   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rI   r   r   r  )rk   r   r   r   r   r   Z	n_classesr)   r)   r*   nll_loss_forward8  s    
r  c                 C   s   t | ||||S r5   )r  )rk   r   r   r   r   r)   r)   r*   nll_loss2d_forwardS  s   r  Ac                 C   s    |d |  |d  |  |  d S )Nr"   rx   r!   r)   r1   r  r)   r)   r*   _upsample_cubic_convolution1`  r3  r  c                 C   s(   ||  d|  |  d|  |  d|  S )NrP     r   r)   r  r)   r)   r*   _upsample_cubic_convolution2d  s   (r  r  c                 C   s4   d}t | d |t| |td|  |t d|  |fS )Ng      r[   r   )r  r  )r  r  r)   r)   r*    _upsample_get_cubic_coefficientsh  s   r  coeffstsc                 C   s    t |}tdd t| |D S )Nc                 s   s    | ]	\}}|| V  qd S r5   r)   )r0   r  r  r)   r)   r*   r0  t  r+  z+_upsample_cubic_interp1d.<locals>.<genexpr>)r  _sum_tensorsrR  )r  r  Zcoeffs2r)   r)   r*   _upsample_cubic_interp1dr  s   r  c                 C   s   t tj| S r5   )r   r]   add)r  r)   r)   r*   r  x  s   r  	num_stepsc                 C   sB   | dkrt jd||dS |s| d |  nd}t j| || ||dS )Nr!   r   ry  )Zstepsr#  r   )r]   r  Zlinspace)r  r  r   r#  r  r)   r)   r*   _linspace_from_neg_one|  s   r  thetahr  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr!   )r!   r!   r!   r"  )r   r"   constantr   rG  r  rl   r!   r!   )r"   r   	r   r#  r  r  r]   r  r?  r  rG  )	r   r  r  r  r   r#  grid_xgrid_ygrid_oner)   r)   r*   _make_base_grid_4d  s   r	  r@  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr!   )r!   r!   r!   r!   r"  )r   rx   r  r   r  r_  )r"   r!   )rx   r   r  )r   r@  r  r  r  r   r#  r  r  Zgrid_zr  r)   r)   r*   _make_base_grid_5d  s   r
  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr  rK   rx   r!   r   r"   )r	  r  r   rM   r   )	r   r   r  r  rN   r  r  	base_gridgridr)   r)   r*   _affine_grid_generator_4d  s    r  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr  rK   r   r!   r   rx   )r
  r  r   rM   r   )
r   r   r  r  rN   r@  r  r  r  r  r)   r)   r*   _affine_grid_generator_5d  s    r  c                 C   s@   t t|dv dd  t|dkrt| ||dS t| ||dS )N)r   rP  c                   S   r*  )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r)   r)   r)   r)   r*   rq     r+  z'affine_grid_generator.<locals>.<lambda>r   r  )r]   rs   r  r  r  )r   r   r  r)   r)   r*   affine_grid_generator  s   
r  r  interpolation_modepadding_modec                    s  t dv fdd t dv fdd dtdtdtffdd	dtd
tdtdtfdddtdtdtffdddtdtdtffdd}j\}
|j\}}dtdtdtf
fddt jjddddt j|jdd|dd dtdtdtdtffdddtdtdtf fdd|d }|d  }	d!kr||}
||	
}|
	 |	 d }}d }}||}}||
 ||  }|
| ||  }||
 ||  }|
 |  }t
fd"d#|f|||f|||f|||ffD S dkr4||}
||	
}|
 }| }||dS |}
|	
}|
	 |	 |
 | }dtdtdtf
fd$d%	d&tdtf	fd'd(tfd)d#td*D }t||dS )+N)r   r!   r"   c                      r(  )NzInvalid interpolation mode r)   r)   )r  r)   r*   rq     r*  z!grid_sampler_2d.<locals>.<lambda>c                      r(  )NzInvalid padding mode r)   r)   )r  r)   r*   rq     r*  coordsr   rJ   c                    s0    r|d d n|d }|d d }| | | S Nr   r)   )r  r   r   ofsr  r)   r*   unnormalize  s   z$grid_sampler_2d.<locals>.unnormalize	twice_low
twice_highc                 S   sv   ||kr	t | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr"   r   r!   r   )r]   r   r   fmodfloorr7   r>  r^   )r  r  r  Z
coords_minZcoords_spanZcoords2extraZflipsr)   r)   r*   reflect_coordinates  s   
z,grid_sampler_2d.<locals>.reflect_coordinatesc                    sf   dkr| S dkrt | d|d S  r | dd|d  }n
| dd| d }t |d|d S )Nr   r!   r"   rK   r~   )r  r   Zcoords_reflected)r  r  r  r)   r*   compute_coordinates  s   z,grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r5   r)   )r  r   Z	coords_un)r  r  r)   r*   compute_source_index  s   

z-grid_sampler_2d.<locals>.compute_source_indexxsysc                    s,   t d| kt | k t d|k| k S rg   )r]   r  )r  r   )iHiWr)   r*   in_bounds_cond  s   $z'grid_sampler_2d.<locals>.in_bounds_condr  r!   wsc                    s@   | | t  fdd| jtjd|jtjd|fD S )Nc                 3   s*    | ]}t |d  dV  qdS r   r!   N)r]   r^   r  r  )r  r9  oHoWr)   r*   r0    r  z0grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )rC  r7   r]   r%  )r  r   r$  )r  r#  r&  r'  )r9  r*   clip  s   
zgrid_sampler_2d.<locals>.clipixiyc                    s&   | ||\}}} ||f | S r5   r)   )r)  r*  r  Zidx_xZidx_yZw_)C_idxN_idxr  r(  r)   r*   get_summand  s   z$grid_sampler_2d.<locals>.get_summand).r   ).r!   r   c                 3   s"    | ]\}}} |||V  qd S r5   r)   )r0   r)  r*  r  )r-  r)   r*   r0  -  
    

z"grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rR   r)   )r)  r*  r1   rQ   )r  r-  r!  r"  r)   r*   get_value_boundedH  s   

z*grid_sampler_2d.<locals>.get_value_boundedr  c                    sL   | d  } d | | d | d |f}t |dS )Nr!   r"   )r  rM   )r  Ziy_ofscs)r/  ix_nwiy_nwtxr)   r*   	get_coeffM  s   z"grid_sampler_2d.<locals>.get_coeffc                 3       | ]} |V  qd S r5   r)   )r0   r  )r4  r)   r*   r0  W  r1  r   )r]   rs   r   r  r   r$  r#  r  r   r  r  roundrC  rL   r  rM   )r  r  r  r  r  r  r  rN   r1   rQ   r)  r*  Zix_neZiy_neZix_swZiy_swZix_seZiy_seZw_nwZw_neZw_swZw_seZ
ix_nearestZ
iy_nearesttyr  r)   )r+  r  r,  r  r  r(  r  r4  r-  r/  r!  r"  r#  r  r1  r2  r&  r'  r  r  r3  r  r*   grid_sampler_2d  sl   	
$ 




	



 
r8  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr"   r!   c                      s   d    d   S )Nzmatrix @ vector expected, got r  ro   r)   rk   r  r)   r*   rq   a  r  zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r1   r!   z), vec (r   )r   r)   r9  r)   r*   rq   e  s   * ro   )r]   rs   rI   r   r   r9  r)   r9  r*   r  [  s   r  c                    s     r, r   rt    S t  S   r,t  S t dko9  dk fdd tj jk fdd  fdd}t	  	 k|   
 S )Nr!   c                      s   d   d    dS )Nz1D tensors expected, but got zD and z	D tensorsro   r)   r   rk   r)   r*   rq   y  s    zdot.<locals>.<lambda>c                      s   dj  d j  S )Nz:dot : expected both vectors to have same dtype, but found  and r   r)   r:  r)   r*   rq   }  r  c                	      s.   d   d    d   d    d	S )Nz+inconsistent tensor size, expected tensor [z] and src [z.] to have thesame number of elements, but got r;  z elements respectivelyr  r)   r:  r)   r*   numel_error  s   zdot.<locals>.numel_error)r  Zis_conjr]   dotZconjZvdotrs   rI   r   r   r   )rk   r   r<  r)   r:  r*   r=  j  s$   
r=  c                 C   s   |   d}|d ur+|d | d }d| |  ||  |  |    |   }nd| |  | |  |  |     }|d urI|| }t||S rk  )r   r\   r   r   )rk   r   r   Z
pos_weightr   r   Z
log_weightr   r)   r)   r*    binary_cross_entropy_with_logits  s   

r>  tensor1tensor2c                 C   s   | j |j kr
| |fn|| f\}}|j dkr|j dksdS |jr!dS | j dkr(dS | dkr0dS |j}| }tdd t|d d |d	d
 |d	d
 D S )Nrx   r"   FTr   c                 s   s"    | ]\}}}||| kV  qd S r5   r)   )r0   Zst1Zst2s2r)   r)   r*   r0    r.  zshould_fold.<locals>.<genexpr>r   r!   rK   )r  r  r   r   r
  r4  rR  )r?  r@  t1t2Zt1_shapeZ	t1_strider)   r)   r*   should_fold  s    
"rD  c                 C   sj  |   }|  }|dkr|dksJ |dkr |dkr t| |S |dkr.|dkr.t| |S |dkrD|dkrDttt| d|dS |dkrR|dkrRt| |S t| |r||k}|r`|jn| }|sf|n	|dkrn| 	 n| }|j
}t|d d }ttj|}	|  dk}
|
r||j
d  ||	|d }|
r|||}|r|j S |S |||S |dkr|dkr|dkr| dnd}| d}| j
d d }|dkr|dn|d}|dkr|dnd}g }t|d D ]
}||| q|dkr:|dkr:|d |d kr:|d dkr'| jr't| d|S |d dkr:|jr:t| |dS tt||}|||g }t|}| ||||}|dk}|ro||g }||||d}n|||g }|||||}|}|dkr|| |dkr|| |r||d|S |||S tddd	  d S )
Nr   r!   r"   rK   r   rx   Fc                   S   r*  )Nz/both arguments to matmul need to be at least 1Dr)   r)   r)   r)   r*   rq   .  r+  zmatmul.<locals>.<lambda>)rI   r]   r=  r  rT  r  rM   rD  r   r  r   r	  r   operatorr   r  r   r  r  r   rL   r  r   Zbroadcast_shapesrk  r  Zbmmrs   )r?  r@  Zdim_tensor1Zdim_tensor2r  rB  rC  Zsizes_1Zoutput_shapeZfolded_dim1Zt2_is_matrixZ	t1_foldedr   r  m1Zbatch_tensor1m2r   Zbatch_tensor2r  Zexpand_batch_portionZtensor1_expand_sizeZexpand_batch_productZtensor1_expandedZ
vector_rhsZtensor2_expand_sizeZtensor2_expandedr)   r)   r*   r     s   
	










r   rm  rn  c                    s  j \}}|\}}ddd}	dd }
|	|||}|	|||}tj|jd|dddtj|jdd|dd tj|jddd|df}tj|jdddd|f}|
|||}| }|| |jtjd}|
|||}| }|| }|jtjd}|d ||d |d f}|d ||d |d f fd	d
fddtfdd|D }t	||}t
}|j|d}|S )Nc                 S   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S )Nr!   r   r)   )r5  r6  r  rc   r)   r)   r*   compute_scale=  s    z1upsample_bicubic2d_default.<locals>.compute_scalec                 S   s   |r| | S | |d  d S r  r)   )rc   Z	dst_indexr  r)   r)   r*   r  C  s   z8upsample_bicubic2d_default.<locals>.compute_source_indexr  r!   r   r"   c                    s8   t | dd }t |dd }t ||gS rk  )r]   r   rt   ru  )r   r  Zy_idxZx_idx)r+  r,  r  r!  r"  r)   r*   load_bounded^  s   z0upsample_bicubic2d_default.<locals>.load_boundedc                    s"   t  fddD }t|S )Nc                 3   s    | ]} |V  qd S r5   r)   )r0   Zx_ofs)rI  rQ   r)   r*   r0  d  s    zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>)rC  r  )rQ   Zcoeffs_x)ixs_ofsrI  t_x)rQ   r*   get_x_interpc  s   
z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   r5  r5   r)   )r0   Zy_ofs)rL  r)   r*   r0  g  r1  z-upsample_bicubic2d_default.<locals>.<genexpr>r   r5   )r   r]   r$  r#  r  r  r7   r%  rC  r  r?   ru  r  )r  rO  r  rm  rn  r  r  r&  r'  rH  r  Zheight_scaleZwidth_scaleZout_yZout_xZreal_xZin_xr)  Zreal_yZin_yZt_yr*  Ziys_ofsZcoeffs_yr   r   r)   )	r+  r,  r  rL  r!  r"  rJ  rI  rK  r*   upsample_bicubic2d_default1  s6   	


rM  rd  c                 C   s   t t|t| dkdd  |d u r2|d usJ ttttf tdd t| jdd  |D }|r6|nd\}}t	| ||||S )Nr!   c                   S   r*  )Nz:Must specify exactly one of output_size and scale_factors.r)   r)   r)   r)   r*   rq   }  r+  z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s$    | ]\}}t t|| V  qd S r5   )r   r   )r0   r  rc   r)   r)   r*   r0    s
    
z)upsample_bicubic2d_vec.<locals>.<genexpr>r"   r  )
r]   rs   r  r   r   r  rC  rR  r   rM  )r  rO  r  rd  rm  rn  r)   r)   r*   upsample_bicubic2d_vecp  s   
rN  rz   r}   r  c                C   s(   t j| ||d}t j| ||d}||fS )Nr  )r]   aminr  )rk   rI   r  rO  r  r)   r)   r*   aminmax  s   rP  r   c                C   s"   t jtt| d| |||dS )Nr   r   )rt   r   r]   r^   isnan)rk   rI   r  r   r)   r)   r*   nansum  s   "rR  r   r  r#  r  r  c             	   C   s   t jjd| d||||dS )Nr   r!   rS  rt   r$  Z
start_step)r  r   r  r#  r  r)   r)   r*   arange_default     
rU  c             	   C   s   t jj| |d||||dS )Nr!   rS  rT  )r  r  r   r  r#  r  r)   r)   r*   arange_start  rV  rW  marginc           	         s  t t jd jd  t |dkp|dkdd  t jdko, dkfdd t jdko? kfdd d urdt t jdko\  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur|  }t j
 jd
}t |k|d}|tjjkr| S |tjjkr| |jd  S |jddS )Nr   r!   r"   c                   S   r*  )Nz only p == 1 and p == 2 supportedr)   r)   r)   r)   r*   rq     r+  z#multi_margin_loss.<locals>.<lambda>c                      s   d j  S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: rD  r)   )r   r)   r*   rq     s    c                         d  dj  S )Nz#inconsistent target size, expected rb  rD  r)   )nframer   r)   r*   rq     r  c                      rZ  )Nz#inconsistent weight size, expected rb  rD  r)   )rI   r   r)   r*   rq     r  rI   r  r  ro   )r]   
atleast_2dZ
atleast_1dr   rs   r  r   rM   r  r   r$  r#  r^   r    r'   rl   r   r(   r   )	r   r   r   rX  r   r   ur_   rp  r)   )rI   r   r[  r   r   r*   multi_margin_loss  sB   







r_  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko |dk fdd ttdko2 k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkr|jdd }n|tjjkr| }n|jdd}|| j}||fS )Nr!   r"   r   c                      r(  rY  r)   r)   )orig_input_shaper)   r*   rq     r*  z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r)   r)   ra  Zorig_target_shaper)   r*   rq     rr   r  rK   Tr  r\  ro   r[   )r   rK   )r   r]   r]  rs   r  r$  r#  rO  r^   r  anyrM   Tr   r    r'   rl   r   r   r(   r7   r   r   )r   r   r   rI   rp  Zis_endZend_idxZtarget_maskZtidx0r^  Ztidx1r`  r_   r)   rb  r*   multilabel_margin_loss_forward  s@   





re  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S rg   )r   )rA   rB   r   outplace_opr)   r*   
inplace_op  s   z$register_inplace.<locals>.inplace_opr   )Zaten_oprg  rh  r)   rf  r*   register_inplace  s   ri  )F)r   )FNrg  )r   NNr!   r5   r   )rK   FFr  r  )r!   r!   F)r   r[   r%  r  )NNN)r   r   Fr  (A  rF   rE  r  enumr   r   r   	itertoolsr   r   typingr   r   r	   r
   r   r   r   r]   Ztorch._primsr  r_  Ztorch._prims_commonZ_prims_commonr?   Ztorch.nn.functionalr?  r  r   r   r   r   Ztorch._decompr   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   Z%torch.fx.experimental.symbolic_shapesr   r   Ztorch.utils._pytreer   r   r  ZDispatchKeyr   str__annotations__Z_opsr  rt   r    r  r  rH   r  Zcompute_only_pw_cast_for_opmathZpw_cast_for_opmathZINT_TO_FLOATZpw_cast_for_int_to_realr  rO   rU   rW   r`   r  rh   fillZScalarrm   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  Zpy_implZAutogradCUDA	Generatorr   r   r   r   r   r   r   r'   rl   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  slicer  r  r  r  r   r!  r&  r^  rq  rv  r|  r  r  ZCompositeImplicitAutogradZAutogradr  r  r  Zrsubr  r  r  r  rk  r  r  r  r  r  r  r  r  r  r  r  r  r  Zunsafe_chunkr	  r  r  Zno_statsr
  r  Z_fused_dropoutr  r  r  r  r#  r   detachZliftZ
lift_freshr  r  r"  r$  r'  Z_adaptive_avg_pool2dr@  rI  rJ  rG  rZ  r[  rX  r\  r   r`  re  rg  rh  r  rj  rl  ro  rp  rq  rs  r  r  r  r  r  r  r  r  r  r  Zrnn_tanhr   r  Zrnn_relur  r  r  r  r  r  r  r  Zlstmr  r  r  r  Zgrur  r  r  r  r  r  r  r  Z_unsafe_viewru  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r  r  r8  r  r=  r>  rD  r   Zupsample_bicubic2drM  rN  rP  rR  r$  r  r  rU  r  rW  r_  re  ri  Zaddbmm_ZaddbmmZaddmm_Zaddmv_Zbaddbmm_ZbaddbmmZfill_Zgelu_r  Z
hardswish_Z	hardtanh_ZhardtanhZhardsigmoid___iand____and____ilshift__
__lshift__rR  rS  Zindex_reduce_Zindex_reduce__ior____or____irshift__
__rshift____ixor____xor__Zleaky_relu_r   Zlogit_ZlogitZrelu_r  Zrenorm_ZrenormZround_r6  Zscatter_r   Zscatter_add_Zscatter_addZscatter_reduce_Zscatter_reduceZsilu_r)   r)   r)   r*   <module>   s
  
 $

$



	




 *!	
2 
	P`
 
 
 (((
	
V	L	
R		#

	

	%	
c	"j""$$



	

	
.2
)


  ?
2
	
	


	

N 

5


&
,""


(
 



$w
=



,

0	