o
    "i                 -   @   sl&  d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dl	mZmZmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dl
mZmZmZmZmZmZm Z  d d	l!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- e	j.j/Z/e	j01dddZ2dd Z3dd Z4dd Z5e3e/j6j7e/j6j8ge% dd Z9e3e/j:j7e/j:j8ge% ddddZ:e3e/j;e% dd Z;e3e/j<j7e/j<j8e/j=j7e/j=j8ge%ddd d! Z>e3e/j?j7e/j?j8ge% d"d# Z?d$d% Z@e3e/jAj7e/jAj8ge% d&d' ZBe3e/jCj7e/jCj8ge% d(d) ZDe3e/jEjFdd*d+d,ZGe3e/jEj7e	jHdddd-d.d/ZIe3e/jJj7e	jHdddd-d0d1ZKe3e/jJjLe	jHdddd-d2d3ZMe3e/jNj7ddddd-d4d5ZOe3e/jPj7e/jPj8ge% d6d7 ZQe3e/jRj7dd9d:ZSd;d< ZTe3e/jUj7d=d> ZVe3e/jWj7d?d@dAedBeXdCedDe	jdEeYdFeZdGefdHdIZ[e3e/j\j7d?d@dAedBeXdCedDe	jdEeYdFeZdGefdJdKZ]e3e/j^j7dLdM Z_e3e/j^j8dNdO Z`e3e/jaj7e/jajbge% dPdQ Zce3e/jajdddRdSZee3e/jfj7e/jfjbge% dTdU Zge3e/jfjdddVdWZhe3e/jij7dXdY Zje3e/jij8dZd[ Zke3e/jlj7d\d] Zme3e/jljnd^d_ Zoe3e/jpj7dddddd`dadbZqe3e/jrj7ddcddZre3e/jsj7ddedfZte3e/juj7ddgdhZue3e/jvj7didj Zwe3e/jxjndkdl ZydAedmeYfdndoZzdAedpedqeYfdrdsZ{	?ddtedmeYdueZfdvdwZ|ddpedmeYdxeYfdydzZ}dped{ed|eZdmeYfd}d~Z~	ddeYdededeYfddZdeYfddZe3e/jj7e/jjge%dd		?ddpedeYdeZfddZdedGefddZe3e/je% dAedpedeZdGefddZe3e/je% ddAedpedeZdGefddZe3e/je% ddAedeZdGefddZe3e/je% ddAedeZdGefddZe3e/jj7ddpedeZdeZfddZe3e/jj7e/jj8ge% dededGefddZe3e/jj7ddpedeZfddZe3e/jj7e/jj8ge%dddd8d8ddAedeZdeZdGeeeef fddZe3e/jj7e/jj8ge% d8ddeded{edeZdGef
ddZe3e/jj7e/jj8ge%dddd?ddpedeZdGeeeef fddZe3e/jj7e/jj8ge%dddd?d8ddpedeZdeZdGeeeef fddZe3e/jj7e/jj8ge% d?d8ddeded{ed|eZdeZdGefddZe3e/je%ddd	?	?ddededeZdeZdGeeeef f
ddZdeYdGeeZeZf fddZe3e/jj7e/jj8ge%dd	ÐddpedeYdGeeef fddńZe3e/jj7e/jjge%dddddpedGeeeeef fddɄZe3e/jj7	8	?	ddpedeZdeZdeeY fdd΄ZdededGeeeX eeX f fdd҄ZdededqeeY dGeeef fddԄZdededGeZfddׄZe3e/jd?d8ddddd؜dped{ed|eZdeZdee dee dee dee dGeeeeef fddڄZe3e/jj7e/jj8gd?d8ddۜdped{edeZd|eZdeZdee dGefdd߄Ze3e/je%dd	?	8	8ddAedpedeZdeZdeZdGeeef fddZe3e/jj7dd Ze3e/je% 	?	8ddededed|eZdeZdGefddZdd Zdd Ze3e/je% dd Ze3e/je% dd Zdd Ze3e/je% dd Ze3e/je% dd Zdd Ze3e/je% dd Ze3e/je% dd Ze3e/jj7e/jje/jj7e/jjge% dd Zdd  Ze3e/je% dd Ze3e/je% dd Ze3e/jj7e/jje/jj7e/jjge% dd Ze3e/jǃe% ddAededGefd	d
Ze3e/jʃe% dedAedeȐdedGef
ddZe3e/jj7e/jj8ge% dddddZe3e/jj7e/jj8ge% dd*ddZe3e/jjȃdddZe3e/jj҃dddZe3e/jj7	8	8dddZՐdd Ze3e/jj7dd Ze3e/jj7gd d! Zڐd"d# Zېdd%d&Z	dd'e	jd(e	jd)eeeX eXf d*eeeX eXf d+eeeX eXf d,eZd-eXd.eeeeX eXf  fd/d0Zݐd1d2 Ze3e/jj7d'e	jd(e	jd3e	jd)eeX d*eeX d+eeX d,eZd.eeX d-eXfd4d5Ze	jjre	j01d6ddZe3e	j.jjj7d7d8 Ze3e	j.jjj7d9d: Ze	jjre	j01d;ddZe3e	j.jjd<d= Ze	j01d>ddZe3e	j.jjj7d?d@ Ze3e	j.jjj7dAdB ZdCdD Ze3e/jj7	E	F	8	?	ddGdHZdIdJ Ze3e/jj7dKdL Ze3e/je% 	E	F	8	?	ddMdNZe3e/je% dOdP Ze3e/jj7dQdR Ze3e/j j7dSdT Ze3e/jj7dUdV Ze3e/je% dWdX ZdYedxeYfdZd[Ze3e/je%ddd\d] Ze3e/j	e% d^d_ Z
e3e/je%ddd`da Ze3e/je% dbdc Ze3e/jjddddeZe3e/jj7e/jj8ge% dfdg Ze3e/jjdhdi Ze3e/jj7djdk Ze3e/jj7e/jj8gddldmeXdneXfdodpZe3e/jje/jjgdqdr Ze3e/jj7gdsdt Ze3e/jj7e/jj8ge% ddddudvZe3e/jj7e/j j7e/j!j7e/j"j7gdwdx Z#e3e/j$j7e/j%j7e/j&j7e/j'j7gdydz Z(d{d| Z)e3e/j*je/j+je/j,je/j-je/j.je/j/jgdd}d~Z0e3e/j1je/j2je/j3je/j4je/j5je/j6jgdddZ7e3e/j1j8e/j3j8e/j2j8e/j4j8gdddZ9e3e/j*j8e/j-j8e/j,j8e/j+j8gdddZ:e3e/j;j8e/j<j8gdddZ=e3e/j>j8gdddZ?e3e/j@j8e/jAj8gdddZBe3e/jCjDgdd ZEe3e/j;je/j<jgdd ZFe3e/jGgdddZHe3e/jIj7gdddddZJe3e/jKj7gdddddZLe3e/jMge% dd ZNe3e/jOj7dd ZPe3e/jQe% dd ZRe3e/jSj7	8	 	8		8	dddZTe3e/jUj7dd ZVdddZWe3e/jXj7e/jXj8ge% dddddZYe3e/jZj7e/j[j7gdd Z\e3e/jZjde/jZj]e/j[jde/j[j]e/j^j7e/j^j_ge%dddddZ`e3e/jaj7dd Zbe3e/jcj7dd Zde3e/jej7dd Zfe3e/jgj8e/jhj8e/jgje/jhje/jij7e/jjj7e/jkj7gdd Zle3e/jmj8e/jnj8e/jmje/jnjgdddZoe3e/jpj7e/jpjqgdd Zrdd Zse3e/jtje/jtj8gdd Zue3e/jvje/jvj8gdd Zwe3e/jxj7dd Zye3e/jzje/jzj8gdd Z{e3e/j|je/j|j8gdd Z}e3e/j~j7dd Ze3e/jj7e/jj7gddÐdĄZe3e/jj8dŐdƄ Ze3e/jj7ddǐdȄZe3e/jj7dɐdʄ Zddːd̄Ze3e/jj7d͐d΄ ZdϐdЄ Zdѐd҄ ZdӐdԄ ZdՐdք Z	8ddedeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeYdeZf,ddZdd ZdedYedeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeXdeYf&ddZdd Ze3e/jj7dd Ze3e/jj7	8dddZe3e/je% dd Zdd Ze3e/je% dd Ze3e/je%dd	E	F		8dddZe3e/je% d d ZdedefddZG dd deZdededeXfdd	Ze3e/jj7d
d Ze3e/je% dd Ze3e/je%dddd Ze3e/jj7gdd Ze3e/jj7					dddZe3e/jjXdd Ze3e/jj7dd Ze3e/jj7dddZddBeXdeXdeZfddZd d! Zd"d# Ze3e/jj7dd$d%Zdd&d'Zdd(d)Zd*d+ Zdd,d-Zdd.d/Ze3e/jj7d0d1 Ze3e/jd2d3 Ze3e/jje/jje/jje/jjge% dd4d5Ze3e/jĐje/jĐje/jĐje/jĐjgdd6d7Ze3e/jg	8	8	8	dd9ed:ed;ed<eȐd=eZd>eZd?ee fd@dAZe3e/jg	ddBed9ed:ed;ededCedDedEedFeXdGeXd<eȐd=eZdHedIed?ee fdJdKZe3e/jg	8	8	dd9ed:ed;edLee dMeZd=eZd?ee fdNdOZe3e/jg	8	ddBed9ed:ed;edLee dedCedHedIed<eȐdPeeZ d=eZd?ee fdQdRZe3e/jΐje/jΐjge% ddSdTZe3e/jҐjσddUdVZe3e/jj7e/jj8ge% ddd*dWdXZՐdYdZ Z֐d[d\ Ze3e/jj7dd]d^Ze3e/jj7dd_d`Ze3e/jj7		ddYedaeeeXe	jېjf  dbeeeXe	jېjf  dcee ddee f
dedfZe3e/jj7ddgdhZe3e/jj7e/jސje/jސj_e/jސjgddidjZe3e/jj߃dd8dkdldmZdndo Ze3e/jj7	ddpdqZe3e/jj7drds Ze3e/jj7dtdu Zdvdw Zdxdy Ze3e/jj7e/jj7gddzd{Ze3e/jj7dd|d}Ze3e/jj7dd~dZe	jZdd Ze3e/jj7dd Ze3e/jj7dd Ze3e/jj7dd Ze3e/jje/jjge% d8d8dddZe3e/jj7	dddZe3e/jj7dd Ze3e/jj7e/jj8ge% dddZe3e	j.j/jdd Ze3e	j.j/j dd Z e3e/je% d8d8dddddZe3e/je% deXdAedGefddZdd Zdd Zee/j ee/j ee/j	 ee/j
 ee/j ee/j ee/j ee/j ee/j ee/jj7e/jj8e/jjg ee/jj7e/jj8e/jjg ee/jj7e/jj8e/jjg ee/jj7e/jj8e/jjg ee/jj7e/jj8e/jjg d dl&Z	d dlZ	d dlZ	dd Ze  dS (      N)Enum)ListOptionalSequenceTupleUnion)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)corresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDIntLikemake_contiguous_strides_for
TensorLike)_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_constrain_range_for_sizeconstrain_range)tree_mapatenZIMPLMetac                    s    fdd}|S )Nc                    s"   t    fdd}t|  S )Nc                    s   t t|   d S N)r   r   opfn h/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/torch/_meta_registrations.pyregister1   s   z0register_meta.<locals>.wrapper.<locals>.register)r   r!   )r(   r+   r%   r'   r*   wrapper.   s   
zregister_meta.<locals>.wrapperr)   )r&   r,   r)   r%   r*   register_meta-   s   	r-   c                 C   s(   t jt jt jt jt jt ji}|| | S r$   )torchZ	complex32halfcfloatfloatcdoubledoubleget)dtypeZfrom_complexr)   r)   r*   toRealValueType:   s
   r6   c                    s2   t tg|R   t k fdd d S )Nc                         d d  S )Nzoutput with shape z# doesn't match the broadcast shape r)   r)   Zbroadcasted_shape
self_shaper)   r*   <lambda>G       z)check_inplace_broadcast.<locals>.<lambda>)tupler   r.   _check)r9   Z
args_shaper)   r8   r*   check_inplace_broadcastC   s
   r>   c                    sN   t  jt jk fdd t |  dko  dk dd  |  jS )Nc                         d j  S )Nz2take(): Expected a long tensor for index, but got r5   r)   indexr)   r*   r:   Q       zmeta_take.<locals>.<lambda>r   c                   S      dS )Nz*take(): tried to take from an empty tensorr)   r)   r)   r)   r*   r:   V       )r.   r=   r5   long_check_indexnumel	new_emptyshape)selfrB   r)   rA   r*   	meta_takeK   s   

rL   dimc                   sh   j }j }t||kdd  t dko dk fdd tjj}|S )Nc                   S   rD   )Nz=linalg.cross: inputs must have the same number of dimensions.r)   r)   r)   r)   r*   r:   b   rE   zlinalg_cross.<locals>.<lambda>   c                      s"   d  d   d   S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and sizer)   rO   otherrK   r)   r*   r:   f   s
   )ndimr.   r=   rS   r   rJ   rI   )rK   rU   rO   Zx_dZy_d	out_shaper)   rT   r*   linalg_cross[   s   
rX   c                 C   s   t | d t| d t| S )Nzlinalg.matrix_expZ
matrix_exp)squareCheckInputscheckFloatingOrComplexr.   
empty_likerK   r)   r)   r*   linalg_matrix_expo   s   


r]   valuesindicesc                 C   sV   t j| j| j| jd}t j| j| jt jd}|  dkr'| jdkr't|| j ||fS )Ndevicer5   r   )	r.   emptyrJ   ra   r5   int64rH   rV   maybe_wrap_dim)rK   rO   r^   r_   r)   r)   r*   	cummaxminw   s
   re   c                 C   s   t || j t|  S r$   )rd   rV   r.   r[   
contiguous)rK   rO   r)   r)   r*   logcumsumexp   s   rg   c                    s  |j }t|}|| }tt|}dd t|D }	|D ]}
d|	|
< qg g }}|D ]}
|	|
 s6||
 q*||
 q*|| }t|}|  |d | }|j fdddd |||d   }||}dgt|j|d   }|	|}|
d}||d< |}tt|D ]}|||  ||d	 < q| 	|} d
d t|D }d	}|d	 }|dkr|| d ||| < ||||  9 }|d	8 }|dkst||D ]}| d	||  ||| < q| |||  S )Nc                 S      g | ]}d qS Fr)   .0_r)   r)   r*   
<listcomp>   r;   z_exec_fft.<locals>.<listcomp>Tc                        |  S r$   r)   xZself_stridesr)   r*   r:          z_exec_fft.<locals>.<lambda>keyreverserM   r      c                 S   rh   r   r)   rj   r)   r)   r*   rm      r;   )rV   lenlistrangeappendstridesortpermuterJ   reshaperS   
as_stridedstorage_offset)outrK   	out_sizesrO   forwardrV   Zsignal_ndim
batch_dimsZdim_permuteZis_transformed_dimdleftrightZ	batch_endtmpinputZbatched_sizes
batch_sizeZbatched_out_sizesiZout_stridesZbatch_numelr)   rq   r*   	_exec_fft   sL   





r   c                    sb   | j jsJ | j}| |}|s|S |d d  }|   |j fdddd t|| |||}|S )Nc                    rn   r$   r)   ro   rq   r)   r*   r:      rr   zmeta_fft_c2c.<locals>.<lambda>Trs   )r5   
is_complexrJ   rI   r|   r}   r   )rK   rO   normalizationr   r   outputZsorted_dimsr)   rq   r*   meta_fft_c2c   s   
r   c                 C   sR   | j jsJ t|  }|r|d }|| d d }|||< | j|t| j dS )NrM      rv   r@   )r5   is_floating_pointry   rS   rI   utilsr   )rK   rO   r   Zonesidedoutput_sizesZlast_dimZlast_dim_halfsizer)   r)   r*   meta_fft_r2c   s   r   )	generatorc                C   s    |j dkr|d| ksJ |S Nrv   r   )rV   rS   )nr   r   r)   r)   r*   meta_randperm   s   r   r5   layoutra   
pin_memoryc                C      t j| ||||dS Nr   r.   rb   )r   r5   r   ra   r   r)   r)   r*   meta_randperm_default      
r   c                C   s   t j|||||dS r   r   )highrS   r5   r   ra   r   r)   r)   r*   meta_randint   r   r   c                C   s   t j|||||dS r   r   )lowr   rS   r5   r   ra   r   r)   r)   r*   meta_randint_low   s   
r   c                C   r   r   r   )rS   r5   r   ra   r   r)   r)   r*   meta_rand_default     
r   c                 C   s8   | j jsJ t|  }|||d < | j|t| j dS )NrM   r@   )r5   r   ry   rS   rI   r6   )rK   rO   r   Zlastdimr   r)   r)   r*   meta_fft_c2r  s   r   Fc                 C   s2   | | |}|  | krtj||   | S r$   )torS   r"   Zexpand_copydefault)rK   srcnon_blockingZintermediater)   r)   r*   
meta_copy_  s   r   c                 C   sX   t |  }t |  }||  krdn|| ||  }||d ||| ||fS Nrv   )ry   rS   r|   rO   insert)tensorrO   Zresult_sizesZresult_strides
new_strider)   r)   r*   inferUnsqueezeGeometry*  s    r   c                 C   s0   t ||  d }t| |\}}| || | S r   )rd   rO   r   as_strided_)rK   rO   Zg_sizesZ	g_stridesr)   r)   r*   meta_unsqueeze_3  s   r   T)include_selfrK   rO   rB   sourcereducer   returnc                C   s   t j| t jdS )Nmemory_format)r.   r[   contiguous_formatrK   rO   rB   r   r   r   r)   r)   r*   meta_index_reduce;  s   
r   c                C      | S r$   r)   r   r)   r)   r*   meta_index_reduce_H  s   
r   c                 C   s.   t |  }|  dkr| ||< | |S )Nr   )ry   rS   rO   rH   rI   )rK   rO   rB   result_sizer)   r)   r*   meta_index_selectV  s   
r   c                 C   s(   t ||  | j |t | ||S r$   )r.   _resize_output_rS   ra   copy_index_select)rK   rO   rB   r   r)   r)   r*   meta_index_select_out^  s   r   c                 C   
   |  dS Nr)   rI   r\   r)   r)   r*   meta_maxd     
r   c                 C   6   t | j|f}t| ||}| || j|tjdfS Nr@   r   reduction_dimsrJ   _compute_reduction_shaperI   r.   rF   rK   rO   keepdimoutput_shaper)   r)   r*   meta_max_dimj  
   r   c                 C   r   r   r   r\   r)   r)   r*   meta_mint  r   r   c                 C   r   r   r   r   r)   r)   r*   meta_min_dimz  r   r   c                 C   s4   |   r
t| j}n	t| tjd\}}tj| |dS )NZtype_promotion_kindr@   )r   r   r5   r   r   INT_TO_FLOATr.   r[   )rK   result_dtyperl   r)   r)   r*   
meta_angle  s   
r   c                 C   s$   t ||  | j |t | S r$   )r.   r   rS   ra   r   angle)rK   r   r)   r)   r*   meta_angle_out  s   r   c                 C      d S r$   r)   )valr)   r)   r*   assert_async     r   c                 C   r   r$   r)   )r   
assert_msgr)   r)   r*   assert_async_meta  r   r   r5   r   ra   r   r   c                 C   s   t jg ddS )Nmetara   r   r   r)   r)   r*   make_dep_token  s   	r   c                 C   (   t | ttfrtdt| ||d d S Nz'Constraining SymFloat or Symbool is nyiminmax)
isinstancer	   r   
ValueErrorr    rS   r   r   r)   r)   r*   sym_constrain_range     r   c                 C      t j| ||d |S Nr   )r"   r   rS   r   r   	dep_tokenr)   r)   r*   functional_sym_constrain_range     r   c                 C   r   r   )r   r	   r   r   r   r   r)   r)   r*   sym_constrain_range_for_size  r   r   c                 C   r   r   )r"   r   r   r)   r)   r*   'functional_sym_constrain_range_for_size  r   r   c                 C   s   |S r$   r)   )r   r   r   r)   r)   r*   functional_assert_async_meta  r   r   f_namec                 C   sX   |   dksJ | d| d| dks*J | d| d d| d dd S )Nr   z3: The input tensor must have at least 2 dimensions.rM   z5: A must be batches of square matrices, but they are  by 	 matrices)rO   rS   )rK   r   r)   r)   r*   rY     s    rY   Anamec                    s   t j jk fdd t j jk fdd t  d dk fdd t  ddk fdd d S )Nc                         dj  d j  dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.r   r)   r   rK   r)   r*   r:     
   z(linearSolveCheckInputs.<locals>.<lambda>c                      r  )Nz=Expected b and A to have the same dtype, but found b of type z and A of type r  r@   r)   r  r)   r*   r:     r  rM   r   c                      s   d  d d  d dS )Nz3A must be batches of square matrices, but they are r   r   rM   r   rR   r)   r   r)   r*   r:     s
   c                      s:   d d  d d  d d d d d 
S )NzIncompatible matrix sizes for z: each A matrix is rM   r   z but each b matrix is r   rR   r)   r   r   rK   r)   r*   r:     s   )r.   r=   ra   r5   rS   )rK   r   r   r)   r  r*   linearSolveCheckInputs  s    


r  tallow_low_precision_dtypesc                    s^   | j  t|  p|   fdd |s-t tjtjtjtjfv  fdd d S d S )Nc                          d  S )Nz<: Expected a floating point or complex tensor as input. Got r)   r)   r5   r   r)   r*   r:         z(checkFloatingOrComplex.<locals>.<lambda>c                      r
  )Nz*: Low precision dtypes not supported. Got r)   r)   r  r)   r*   r:     r  )	r5   r.   r=   r   r   r1   r3   r0   r2   )r  r   r	  r)   r  r*   rZ      s   rZ   arg_namec                    s"   t |  dk fdd d S )Nr   c                          d  dS )Nz: The input tensor z! must have at least 2 dimensions.r)   r)   r  r   r)   r*   r:     r;   zcheckIsMatrix.<locals>.<lambda>)r.   r=   rO   )r   r   r  r)   r  r*   checkIsMatrix  s   
r  Br   c                    sZ   t   t tr ddkn	 ddk fdd d S )Nr   rM   c                      sH    drdnd d  d d  d d d d d d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r   rp   rM   rQ   )rR   r)   r   r  r   r   r)   r*   r:   !  s   
z#checkInputsSolver.<locals>.<lambda>)rY   r  r.   r=   rS   )r   r  r   r   r)   r  r*   checkInputsSolver  s   

*r  resultfn_namer   result_namec                    s&   t jjk fdd d S )Nc                	      s$     d d dj  dj  	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on r   r)   r  r   r  r  r)   r*   r:   .  s   z!checkSameDevice.<locals>.<lambda>)r.   r=   ra   )r  r  r   r  r)   r  r*   checkSameDevice)  s   
r  UPLOc                    s8      }tt dko|dkp|dk fdd d S )Nrv   ULc                      
   d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r)   r)   r  r)   r*   r:   9     
 zcheckUplo.<locals>.<lambda>)upperr.   r=   rx   )r  ZUPLO_uppercaser)   r  r*   	checkUplo5  s
   
r!  eigenvaluesZeigenvectorsr  	compute_vc                 C   sp   t | d t| t| j}|r | |}||t|dd n| dg}|  | j|t| j	d}||fS )Nzlinalg.eighFZ	row_majorr   r@   )
rY   r!  ry   rJ   rI   r   r   popr6   r5   )r   r  r#  rJ   Zvecsvalsr)   r)   r*   meta__linalg_eigh=  s   


r'  r   c                 C   s   | j jtjdddS )Nr   r   rM   )ZmTcloner.   r   	transpose)r   r)   r)   r*   cloneBatchedColumnMajorT  s   r*  r   c                 C   s   t | S r$   )r*  )rK   r   r   r)   r)   r*   _cholesky_solve_helperX  s   r+  c                    sP   t jdkfdd t  jdk fdd t d\}}t|||S )Nr   c                         d j  dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadrV   r)   r\   r)   r*   r:   c  r  z cholesky_solve.<locals>.<lambda>c                      r,  )Nz-u should have at least 2 dimensions, but has r-  r.  r)   r  r)   r*   r:   g  r  cholesky_solve)r.   r=   rV   !_linalg_broadcast_batch_dims_namer+  )rK   r   r   Zself_broadcastedZA_broadcastedr)   r  r*   r/  ^  s   

r/  c                 C   s.   |   dkrtj| tjdS t| d t| S )Nr   r   cholesky)rH   r.   r[   legacy_contiguous_formatrY   r*  rK   r   r)   r)   r*   r1  o  s   
r1  c                 C   s   t | d t| S )Ncholesky_inverse)rY   r*  r3  r)   r)   r*   r4  x  s   
r4  check_errorsc                 C   sf   t | d t| d | j}t|}t|d}| |}||| | j|d|d  tjd}||fS )Nzlinalg.choleskyFr   r   r@   )	rY   rZ   rJ   rx   r   rI   r   r.   int32)r   r   r5  ZA_shaperV   Z	L_stridesr  infosr)   r)   r*   linalg_cholesky_ex  s   



r8  tauc                    s  t jdkdd  t ddkdd  t ddkdd  t jj dkfd	d jdkr[jd d }jd d  t  |k fd
d t jjkfdd tdd t jjtjddjj	dS )Nr   c                   S   rD   )NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r)   r)   r)   r)   r*   r:     rE   z,linalg_householder_product.<locals>.<lambda>r   rM   c                   S   rD   )Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r)   r)   r)   r)   r*   r:     rE   c                   S   rD   )Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r)   r)   r)   r)   r*   r:     rE   rv   c                         dj  d j  S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r.  r)   r   r9  r)   r*   r:     
   c                      r  )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r)   r)   actual_batch_tau_shaper)   r*   r:        c                      r:  )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype r@   r)   r<  r)   r*   r:        
z torch.linalg.householder_productr9  Fr$  rS   r|   r5   ra   )
r.   r=   rV   rS   rJ   r5   r  empty_stridedr   ra   )r   r9  Zexpected_batch_tau_shaper)   )r?  r   r9  r*   linalg_householder_product  sD   


rD  c                 C   s^   t | d t| ddd | | j}|| jt| jdd | j| jd d tjd}||fS )Nzlinalg.inv_exF)r	  r$  r   r@   rY   rZ   rI   rJ   r   r   r.   r6  )r   r5  r  r7  r)   r)   r*   linalg_inv_ex_meta  s   
rF  LDpivotsinfo)	hermitianr5  rJ  c                C   st   t | d t| d tj| jt| jdd| j| jd}| j| jd d tj	d}| j| jd d tj	d}|||fS )Nztorch.linalg.ldl_factor_exFr$  rB  rM   r@   r   )
rY   rZ   r.   rC  rJ   r   r5   ra   rI   int)rK   rJ  r5  rG  rH  rI  r)   r)   r*   linalg_ldl_factor_ex_meta  s   


rL  )rJ  c                   s   t d td t d t jdk fdd jd d }t|jkfdd ttj	fdd tj	 j	k fdd t
 \}}tj|t|d	d
 j	 jdS )Nztorch.linalg.ldl_solver   c                      r,  )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r-  r.  r)   )r  r)   r*   r:        z'linalg_ldl_solve_meta.<locals>.<lambda>rM   c                      r,  )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadrJ   r)   rH  r)   r*   r:     rM  c                      r?   )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got r@   r)   rP  r)   r*   r:     rC   c                      r:  )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype r@   r)   )r  rG  r)   r*   r:         Fr$  rB  )rY   rZ   r  r.   r=   rV   rJ   r   is_integer_dtyper5   _linalg_broadcast_batch_dimsrC  r   ra   )rG  rH  r  rJ  Zexpected_pivots_shapeB_broadcast_sizerl   r)   )r  rG  rH  r*   linalg_ldl_solve_meta  s6   







rU  Pr  )pivotrW  c          	         s   t  jdk fdd t j}|d }|d }t||}||d< |r+ |}n dg}||d<  |}||d< ||d<  |}|||fS )Nr   c                      r,  )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: rN  rO  r)   r  r)   r*   r:     r  z linalg_lu_meta.<locals>.<lambda>r   rM   r   )r.   r=   rV   ry   rJ   r   rI   )	r   rW  sizesmr   krV  r  r  r)   r  r*   linalg_lu_meta  s$   





r[  LU)rW  r5  c          	         s   t  jdk fdd t j}|d }|d }t j|t|dd j jd}|	  t
|||d<  j|t jd	}|	   j|t jd	}|||fS )
Nr   c                      r,  )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: rN  rO  r)   r  r)   r*   r:   4  r  z*linalg_lu_factor_ex_meta.<locals>.<lambda>r   rM   Fr$  rB  r@   )r.   r=   rV   ry   rJ   rC  r   r5   ra   r%  r   rI   rK  )	r   rW  r5  rX  rY  r   r\  rH  rI  r)   r  r*   linalg_lu_factor_ex_meta-  s&   



r]  )r   adjointr^  c                   s   t d tj jk fdd tjtjkdd  td t |d tddkdd  tjd d jkfdd t	 \}}tj
|t|| d	 j jd
}| dkru|su| ru| }|S )Nztorch.linalg.lu_solvec                      r  )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type rN  r@   r)   )r  r\  r)   r*   r:   \  r  z&linalg_lu_solve_meta.<locals>.<lambda>c                   S   rD   )NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r)   r)   r)   r)   r*   r:   c  rE   zlinalg.lu_solverM   c                   S   rD   )NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr)   r)   r)   r)   r*   r:   k  rE   c                      r,  )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape rN  rO  r)   rP  r)   r*   r:   q  rM  r$  rB  r   )rZ   r.   r=   r5   rK  rY   r  rS   rJ   rS  rC  r   ra   rH   r   conj)r\  rH  r  r   r^  rT  rl   r  r)   )r  r\  rH  r*   linalg_lu_solve_metaN  s<   




ra  unpack_dataunpack_pivotsc                    s   t  jdk fdd |rt |jt jkdd  t j}|d }|d }t||}||d< |r9 |}n dg}|rX||d<  |}	||d< ||d<  |}
n dg}	 dg}
||	|
fS )Nr   c                      r,  )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: rN  rO  r)   r\  r)   r*   r:     r  z lu_unpack_meta.<locals>.<lambda>c                   S   rD   )Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr)   r)   r)   r)   r*   r:        r   rM   r   )	r.   r=   rV   r5   r6  ry   rJ   r   rI   )r\  rH  rb  rc  rX  rY  r   rZ  rV  r  r  r)   rd  r*   lu_unpack_meta  s4   





rf  modec                    sd    dkrd}d}||fS  dkrd}d}||fS  dkr$d}d}||fS t d fdd ||fS )NreducedTZcompleteFrc                         d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r)   r)   rg  r)   r*   r:     s   z _parse_qr_mode.<locals>.<lambda>r.   r=   )rg  	compute_qrh  r)   rk  r*   _parse_qr_mode  s"   	
rn  QRrh  c                 C   s   t | d t| d t|\}}| jd }| jd }t||}|r>t| j}|r*|n||d< | |}||t|dd n| dg}t| j}	|sM|sO|n||	d< | |	}
|
|	t|	dd ||
fS )Nz	linalg.qrr   rM   Fr$  r   )	r  rZ   rn  rJ   r   ry   rI   r   r   )r   rg  rm  Zreduced_moderY  r   rZ  ZQ_shapero  ZR_shaperp  r)   r)   r*   linalg_qr_meta  s"   








rq  sign	logabsdetc                 C   s   t | d t| dd | j}| |d d }| j|d d t| jd}tj|t|d| j| j	d}| j|d d tj
d}||||fS )Nzlinalg.slogdetFr   r@   rB  rM   )rY   rZ   rJ   rI   r6   r5   r.   rC  r   ra   r6  )r   rJ   rr  rs  r\  rH  r)   r)   r*   _linalg_slogdet  s   
rt  full_matrices
compute_uvdriverc                 C   s   t | d t| d t| jd d }| jd }| jd }t||}|r]|||r*|n|g }| |}	|	|t|dd ||rB|n||g }
| |
}t| dk}||
t|
|d n| dg}	| dg}| j||g t	| j
d}|	||fS )	Nz
linalg.svdr   rM   Fr$  cudar   r@   )r  rZ   ry   rJ   r   rI   r   r   device_hintr6   r5   )r   ru  rv  rw  r   rY  r   rZ  ZU_shaper  ZV_shapeVZis_cudaSr)   r)   r*   _linalg_svd_meta  s$   







r|  arg1arg2c                 C   sn   | j d d }|j d d }t||}t|}|| d| dg7 }t|}||d|dg7 }||fS )Nr   rM   )rJ   r   ry   rS   )r}  r~  Zarg1_batch_sizesZarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizer)   r)   r*   rS    s   
rS  c                 C   sV   |rt | || t| |\}}|| jkr| n| |}||jkr"|n||}||fS r$   )r  rS  rJ   expand)r}  r~  r   r  r  Zarg1_broadcastedZarg2_broadcastedr)   r)   r*   r0  /  s   r0  rU   c                 C   s6   | j d d }|jdkp| jd |jko|j |k}|S )NrM   rv   )rJ   rV   )r   rU   Zexpected_batched_rhs_shapevector_caser)   r)   r*   linalg_solve_is_vector_rhsA  s
   
r  )r   r5  r  r\  rH  rI  c                   sn  t  d t jjk fdd t }|r dn}	t |	|d t|	 \}
}t|p6| dd  |rC|
d d n|
}tj|t	|| jj
d} j} j}tj|t	|d j j
d} j|d d tjd} j|d d	 tjd}||||f}||||f}td
d |D rt||D ]\}}t||j ||j|  t||dd q|S )Nzlinalg.solvec                      s   d j  dj  dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r_  rN  r@   r)   r   r  r)   r*   r:   X  r  z"_linalg_solve_ex.<locals>.<lambda>rM   c                   S   rD   )Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r)   r)   r)   r)   r*   r:   c  re  rB  Fr@   r   c                 s   s    | ]}|d uV  qd S r$   r)   rk   rp   r)   r)   r*   	<genexpr>{      z#_linalg_solve_ex.<locals>.<genexpr>)	copy_fromcopy_toZexact_dtype)rZ   r.   r=   r5   r  	unsqueezer  rS  rC  r   ra   rJ   rV   rI   r6  allzipr   r   r|   r   )r   r  r   r5  r  r\  rH  rI  r  B_ZB_broad_shaperl   Zresult_shapeZresult_rJ   rV   ZLU_Zpivots_Zinfo_r   resri  or)   r  r*   _linalg_solve_exI  sL   



r  )r   unitriangularr   r  r   c          	      C   s   |d u r
|  dg}t|tsJ t| ||d t|| d \}}|dd o+| }|r6t||j	}|S t
||j	rL||ddj	 |dd |S )Nr   zlinalg.solve_triangularr   rM   )rI   r   r   r  r0  r)  is_contiguousis_conjr   rJ   r   Zresize_
transpose_)	r   r  r   r   r  r   r  ZA_Zavoid_copy_Ar)   r)   r*   linalg_solve_triangular_meta  s   
r  solutioncloned_coefficientr)  c           	         s   t jdkfdd t  jdk fdd t d  jt jkrOt \}}t j|t|ddj	j
d}t j|t|dd j	 j
d}||fS  jt jks[ jt jkrjt }d	g}||fS t dd
d  ||fS )Nr   c                      r,  )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r-  r.  r)   r\   r)   r*   r:     rM  z'triangular_solve_meta.<locals>.<lambda>c                      r,  )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r-  r.  r)   r  r)   r*   r:     rM  triangular_solveFr$  rB  r   c                   S   rD   )Nz+triangular_solve: Got an unexpected layout.r)   r)   r)   r)   r*   r:     rE   )r.   r=   rV   r  r   stridedrS  rC  r   r5   ra   
sparse_csr
sparse_bsrr[   rI   )	rK   r   r   r)  r  Zself_broadcast_sizeZA_broadcast_sizer  r  r)   r  r*   triangular_solve_meta  s<   	




r  c                 C   sp   t | d t| d | | jd d }| | j}|| jt| jdd | j| jd d tjd}|||fS )Nz
linalg.detr   Fr$  rM   r@   rE  )r   Zdetr\  rH  r)   r)   r*   _linalg_det_meta  s   


r  c                    s  t jdkdd  t jdkdd  |rdndt j jd kfdd t j jd kfdd t jd jd kd	d  t jj d
kfdd t jjkfdd jdkrjd d }jd d t |kfdd jd d  t  |k fdd t jjkfdd t jjkfdd tdd tdd t jjtjddjjdS )Nr   c                   S   rD   )Nz3torch.ormqr: input must have at least 2 dimensions.r)   r)   r)   r)   r*   r:     rE   zormqr.<locals>.<lambda>c                   S   rD   )Nz3torch.ormqr: other must have at least 2 dimensions.r)   r)   r)   r)   r*   r:     rE   r   rM   c                      rj  )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r)   r)   left_size_conditionr)   r*   r:     rC   c                      rj  )Nr  z"] must be equal to input.shape[-2]r)   r)   r  r)   r*   r:     rC   c                   S   rD   )NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r)   r)   r)   r)   r*   r:     rE   rv   c                      r:  )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r;  r.  r)   r<  r)   r*   r:     r=  c                      r:  )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r;  r.  r)   r   rU   r)   r*   r:     r=  c                      r  )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r)   r)   r>  r)   r*   r:     r@  c                      r  )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r)   r)   )actual_batch_other_shaper)   r*   r:     r@  c                         d j  dj  S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype r@   r)   r<  r)   r*   r:   "  r=  c                      r  )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype r@   r)   r  r)   r*   r:   )  r=  ztorch.ormqrr9  rU   Fr$  rB  )	r.   r=   rV   rJ   r5   r  rC  r   ra   )r   r9  rU   r   r)  Zexpected_batch_shaper)   )r  r?  r   r  rU   r9  r*   ormqr  sn   	







r  c                   s   t td  k fdd j}| d k}|}| }|r3td|D ]}|o0|dk}q&ntd|D ]}|oB|dk}q8t |pI| fdd d S )Nr   c                      s   dd   dt  S )Nzpadding size is expected to be r   z, but got: rx   r)   )rO   paddingr)   r*   r:   =      z,_padding_check_valid_input.<locals>.<lambda>rv   r   c                      s    d d  d d  dj  S )Nz	Expected rv   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: rO  r)   )rO   r   r)   r*   r:   R  s   )r.   r=   rx   rV   rz   rS   )r   r  rO   Z	input_dimZis_batch_modeZvalid_batch_modeZvalid_non_batch_moder   r)   )rO   r   r  r*   _padding_check_valid_input:  s$   r  c                   s   d}d d}j dkrd} d7  |d7 }t|dd |\|}   |rHtk o>k  fdd tdkfdd j dkra|fS ||fS )	Nr   rv   rP   rN   c                         d d d  dj  S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (, ) at dimension 
 of input rO  r)   dim_wr   pad_lpad_rr)   r*   r:   n     z_pad1d_common.<locals>.<lambda>c                         d  d S )Nz
input (W: z%) is too small. Calculated output W: r)   r)   )input_woutput_wr)   r*   r:   v  r;   r   )rV   rS   r  r.   r=   rI   )r   r  is_reflection	dim_planenbatchnplaner)   )r  r   r  r  r  r  r*   _pad1d_commonY  s0   




r  c                 C      t | |ddS NTr  r  r   r  r)   r)   r*   meta_reflection_pad1d     r  c                 C   r  NFr  r  r  r)   r)   r*   meta_replication_pad1d  r  r  c                   s   d |st t|dkdd  jdkr d7  |\ }|  |r=t |k o3|k  fdd t  k fdd jS )Nrv   r   c                   S   rD   )Nz padding size is expected to be 2r)   r)   r)   r)   r*   r:     rE   z(_pad1d_backward_common.<locals>.<lambda>rP   c                      r  r  rO  r)   r  r)   r*   r:     r  c                         d d   S Nz(grad_output width unexpected. Expected: , Got: rR   r)   r  grad_outputr  r)   r*   r:         r.   r=   rx   rV   rS   rI   rJ   )r  r   r  r  r  r)   )r  r  r   r  r  r  r*   _pad1d_backward_common  s$   

r  c                 C      t | ||ddS r  r  r  r   r  r)   r)   r*   meta_reflection_pad1d_backward     r  c                 C   r  r  r  r  r)   r)   r*   meta_replication_pad1d_backward  r  r  c                   s2  dd d}d}t |dd j}|dkr'd}d7  d7  |d7 }|\	
|} 
   	 |rptk oS	k 	fdd t
k ofk  
fdd tdkpydkfd	d jd
kr|fS ||fS )Nr   rv   r   rN      c                      r  r  rO  r)   r  r)   r*   r:     r  z_pad2d_common.<locals>.<lambda>c                         d d d  dj  S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (r  r  r  rO  r)   dim_hr   pad_bpad_tr)   r*   r:     r  c                      s   d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r)   r)   )input_hr  output_hr  r)   r*   r:     s
   rP   r  rV   rS   r.   r=   rI   )r   r  r  Z
dim_slicesr  rV   r  r)   )r  r  r   r  r  r  r  r  r  r  r  r*   _pad2d_common  sB   




r  c                 C   r  r  r  r  r)   r)   r*   meta_reflection_pad2d  r  r  c                 C   r  r  r  r  r)   r)   r*   meta_replication_pad2d  r  r  c                    s   dd d}d}|j }| dkr!|d }d7  d7  |d7 }|\}}}}	|| }
|  }| }|| |	 || | tkfdd t k fdd ||j S )Nr   rv   r   r  c                      r  r  rR   r)   r  r)   r*   r:     r  z%meta_pad2d_backward.<locals>.<lambda>c                      r  Nz)grad_output height unexpected. Expected: r  rR   r)   r  r  r  r)   r*   r:     r  )rJ   rO   r.   r=   rS   rI   )r  rK   r  r  r  r9   r  r  r  r  r  r  r  r)   )r  r  r  r  r  r*   meta_pad2d_backward  s2   
r  c             	      s  ddd d}t |dd jdk}|r+d}d7 d7  d7  |d7 }|\
|}    
   	|rtk odk fdd tk ow
k 
fd	d tk ok  fd
d t	dkpdkpdk	fdd |r||	fS |	fS )NrP   r   rv   r   rN      c                      r  r  rO  r)   r  r)   r*   r:   <  r  z_pad3d_common.<locals>.<lambda>c                      r  r  rO  r)   r  r)   r*   r:   C  r  c                      r  )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (r  r  r  rO  r)   )dim_dr   pad_bkpad_fr)   r*   r:   J  r  c                      s(   d  d d d d d S )Nz
input (D:  H: r  z%) is too small. Calculated output D: r)   r)   )input_dr  r  output_dr  r  r)   r*   r:   R  s   r  )r   r  r  r  Z
batch_moder  r  r)   )r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r*   _pad3d_common  sP   





r  c                 C   r  r  r  r  r)   r)   r*   meta_reflection_pad3d^  r  r  c                 C   r  r  r  r  r)   r)   r*   meta_replication_pad3dd  r  r  c                    s(  t t|dkdd  |jdksJ j|jksJ ddd |jdkr2d7 d7  d7  |\}}}}}}| }	|}
|}|	| | |
| | || | t kfdd t kfd	d t  k fd
d ||jS )N   c                   S   rD   )Nz padding size is expected to be 6r)   r)   r)   r)   r*   r:   t  rE   z%meta_pad3d_backward.<locals>.<lambda>rP   r   rv   r  c                      r  r  rR   r)   r  r)   r*   r:     r  c                      r  r  rR   r)   r  r)   r*   r:     r  c                      r  )Nz(grad_output depth unexpected. Expected: r  rR   r)   )r  r  r  r)   r*   r:     r  r  )r  r   r  r  r  r  r  r  r  r  r  r  r)   )r  r  r  r  r  r  r  r*   meta_pad3d_backwardj  s<   




r  r   pc                 C   s^   t |  dd  | d}|dkr| dgjt jdS | ||d  d fjt jdS )Nc                   S   rD   )Nz(_pdist_forward requires contiguous inputr)   r)   r)   r)   r*   r:     rE   z%meta__pdist_forward.<locals>.<lambda>r   rv   r   r   )r.   r=   r  rS   rI   r   r2  )rK   r  r   r)   r)   r*   meta__pdist_forward  s   
r  gradpdistc                 C   s8   t | dd  t | dd  t j|t jdS )Nc                   S   rD   )Nz._pdist_backward requires self to be contiguousr)   r)   r)   r)   r*   r:     rE   z&meta__pdist_backward.<locals>.<lambda>c                   S   rD   )Nz/_pdist_backward requires pdist to be contiguousr)   r)   r)   r)   r*   r:     rE   r   )r.   r=   r  r[   r2  )r  rK   r  r  r)   r)   r*   meta__pdist_backward  s   r  rv   )betaalphac          	         s     d}  d} d}|||ft  dkdd  t dkdd  tj j  ko=jkn   fdd  j}j|d |d td kocd kfd	d   S )
Nr   rv   r   rP   c                   S   rD   Nzbatch1 must be a 3D tensorr)   r)   r)   r)   r*   r:     rE   zmeta_baddbmm.<locals>.<lambda>c                   S   rD   Nzbatch2 must be a 3D tensorr)   r)   r)   r)   r*   r:     rE   c                      s   dj  d j  dj  S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: r@   r)   )batch1batch2rK   r)   r*   r:         c                	      &   d d d d  d d  d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [r  z] but got: [r   rv   ].r)   r)   batch2_sizesbscontraction_sizer)   r*   r:     s   )rS   r  r.   r=   rO   r5   rJ   rI   )	rK   r  r  r  r  dim1dim2Zdim3batch1_sizesr)   )r  r  r  r  r  rK   r*   meta_baddbmm  s&   


r  c                C      t |  S r$   r.   r[   rf   )rK   r   r)   r)   r*   meta_bernoulli  s   r        ?c                 C   r   r$   r)   rK   r  r   r)   r)   r*   meta_bernoulli_  r   r  c                 C   r	  r$   r
  r  r)   r)   r*   meta_bernoulli_p  r  r  c                 C   s6   t |
|  k dd  t j| t jd}t | |fS )Nc                   S   rD   )NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r)   r)   r)   r)   r*   r:     rE   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>r@   )r.   r=   rO   r[   bool)rK   Zobserver_onZfake_quant_onZrunning_minZrunning_maxscaleZ
zero_pointZaveraging_constZ	quant_minZ	quant_maxZch_axisZper_row_fake_quantZsymmetric_quantmaskr)   r)   r*   $meta__fused_moving_avg_obs_fq_helper  s   
r  c                    sT   t  dko  dk fdd  fdd}t    k| d S )Nrv   c                         d   d    dS )Nz1D tensors expected, but got zD and z	D tensorsrN   r)   rU   rK   r)   r*   r:     s    zdot_check.<locals>.<lambda>c                	      s.   d   d    d   d    d	S )Nz+inconsistent tensor size, expected tensor [z] and src [z.] to have thesame number of elements, but got rQ   z elements respectivelyrH   r)   r  r)   r*   numel_error   s   zdot_check.<locals>.numel_error)r.   r=   rO   rH   )rK   rU   r  r)   r  r*   	dot_check  s   r  c                 C   s   t | | | dS r   )r  rI   )rK   r   r)   r)   r*   meta_dot	  s   

r  c                    sn   t |  dkdd  t | dkdd  | j\ |j\t  k fdd | S )Nr   c                   S   rD   )Nza must be 2Dr)   r)   r)   r)   r*   r:     rE   zmeta_mm.<locals>.<lambda>c                   S   rD   )Nzb must be 2Dr)   r)   r)   r)   r*   r:     rE   c                	      s   d d  d d d	S )Nz/a and b must have same reduction dim, but got [r  z] X [r   r)   r)   ZM1ZM2NrV  r)   r*   r:     s    )r.   r=   rO   rJ   rI   abr)   r  r*   meta_mm  s   

r  c                    s0   |rt  fddtjD S tj S )Nc                 3   s&    | ]}| vrj | nd V  qdS )rv   NrO  rk   r   dimsrK   r)   r*   r    s   $ z+_compute_reduction_shape.<locals>.<genexpr>)r<   rz   rV   r   compute_reduction_output_shaperJ   )rK   r"  r   r)   r!  r*   r     s   r   strc                 C   s   t | tjjr| jjS dS )Nrx  )r   r.   Z_subclassesZ
FakeTensorZfake_devicetype)r   r)   r)   r*   ry  '  s   ry  input_tensorweightr|   r  dilationis_transposedgroupsoutput_paddingc                 C   s  dt dt dt dt dt dt fdd}dt dt dt dt dt d	t dt fd
d}	|jdd  }
| jdd  }|r<||jd  }n|jd }|jd | | jd krQtd| jd |g}t|tre|gt| }nt|dkrt|d gt| }t|tr|gt| }nt|dkr|d gt| }t|tr|gt| }nt|dkr|d gt| }d }|rt|tr|gt| }nt|dkr|d gt| }n|}tt|D ]2}|r||	|| || || |
| || ||  q|||| || || |
| ||  q|S )Nlnr  r   rZ  sr   c                 S   s$   | d|  ||d   d | d S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   rv   r)   )r,  r  r   rZ  r-  r)   r)   r*   _formula8  s   $z+calc_conv_nd_return_shape.<locals>._formular&   c                 S   s(   | d | d|  ||d   | d S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        rv   r   r)   )r,  r  r   rZ  r-  r&   r)   r)   r*   _formula_transposedI  s   (z6calc_conv_nd_return_shape.<locals>._formula_transposedr   rv   r   zInvalid channel dimensions)rK  rJ   RuntimeErrorr   r   rx   rz   r{   )r&  r'  r|   r  r(  r)  r*  r+  r.  r/  kernel_sizer"  Zout_channelsZ	ret_shapeZoutput_padding_listr   r)   r)   r*   calc_conv_nd_return_shape.  sZ   "
&




"r2  c                 C      t j| t jkS r$   r.   _prims_commonsuggest_memory_formatchannels_lasttenr)   r)   r*   is_channels_last     r:  biasc	              	      sH    fdd}	t  ||||||r|nd }
 |
}|j|	 d}|S )Nc                      s^   t  dkrt strtjS nt rtjS  jtjdr#tjS  jtjdr-tjS d S Nrx  r   )ry  r:  r.   r7  r  r   preserve_formatr)   r&  r'  r)   r*   pick_memory_format  s   z%meta_conv.<locals>.pick_memory_formatr   )r2  rI   r   )r&  r'  r<  r|   r  r(  r)  r+  r*  r@  	shape_outr   r)   r?  r*   	meta_conv  s   

rB  mkldnnc
              	   C   s6   t | ||||d|g }
| |
}tj}|j|d}|S )NFr   )r2  rI   r.   r7  r   )r&  r'  r<  r  r|   r(  r*  attrscalars	algorithmrA  r   Zout_memory_formatr)   r)   r*   meta_mkldnn_convolution_default  s   
rG  c                 C   s$   |  g | jd d |jd R S NrM   r   rI   rJ   )r&  r'  r<  rD  rE  rF  r)   r)   r*   meta_linear_pointwise_default  s   $rJ  mklc                 C   s$   |  g | jd d |jd R S rH  rI  )r&  Zpacked_weightZorig_weightr<  r   r)   r)   r*   meta_mkl_linear  s   rL  onednnc              	   C   s@   t | ||||	d|
d }| j||rtjnd d}|jtjd}|S )NFr@   r   )r2  rI   r.   float32r   r7  )rp   x_scalex_zpww_scalew_zpr<  r|   r  r(  r*  output_scaleoutput_zero_pointfp32_outputrD  rE  rF  rA  r   r)   r)   r*   meta_qconv2d_pointwise  s   
rW  c                 C   s4   t | j}|jd |d< | j||	rtjnd d}|S )Nr   rM   r@   )ry   rJ   rI   r.   rN  )rp   rO  rP  rQ  rR  rS  r<  rT  rU  rV  Zpost_op_nameZpost_op_argsZpost_op_algorithmr   r   r)   r)   r*   meta_qlinear_pointwise  s   
rX  c                    s4   t   koj k fdd d S )Nc                      s8   d  d d dd   d dj   S )NzExpected a tensor of dimension z and tensor.size[z] == r  zbut got : dimension z] = rO   rJ   r)   rO   dim_sizerS   r   r)   r*   r:   0  s    z check_dim_size.<locals>.<lambda>)r.   r=   rO   rJ   )r   rO   r[  rS   r)   rZ  r*   check_dim_size-  s   r\  r)   rw   c                 C   sb  dd }|d|\}}	t t|dv dd  t|dkr#||	}
}nt|dkr3|d |d }
}n|d	|\}
}|d
|\}}t |d u pJ|dkdd  |  dkrZ| dnd}| d}| d}| d}t||||
d|}t||	||d|}t| }t| ||	|
|||dd|||||| |  dkr|||g}n||||g}t j	|| j
| j|dS )Nc                    D   t t|dv  fdd |d }t|dkr|n|d }||fS )Nrv   r   c                      rj  )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr)   r)   r   r)   r*   r:   B  rC   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>r   rv   r.   r=   rx   r   r   HWr)   r`  r*   unpack?     

zmeta_avg_pool2d.<locals>.unpackr1  r   rv   r   c                   S   rD   NzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr)   r)   r)   r)   r*   r:   K  rE   z!meta_avg_pool2d.<locals>.<lambda>r   rv   r|   r  c                   S   rD   Nzdivisor must be not zeror)   r)   r)   r)   r*   r:   X  rE   r  r   rM   rP   r5   ra   r   )r.   r=   rx   rO   rS   pooling_output_shaper   r6  pool2d_shape_checkrb   r5   ra   )r   r1  r|   r  	ceil_modecount_include_paddivisor_overridere  kHkWdHdWpadHpadWr  nInputPlaneinputHeight
inputWidthoutputHeightoutputWidthr   rS   r)   r)   r*   meta_avg_pool2d5  sb   
	




r}  c                 C   sj   t | ||||||dd|	|
|||| |  }|	}t|||d | t|||d | t|||d | d S )Nrv   rP   r   )rn  rO   r\  )r   Z
gradOutputr  rr  rs  rt  ru  rv  rw  rx  ry  rz  r{  r|  
mem_formatrV   nOutputPlaner)   r)   r*   avg_pool2d_backward_shape_check  s,   r  c                 C   s  t t|dkpt|dkdd  |d }t|dkr|n|d }	t t|dkp5t|dkp5t|dkdd  t|dkrB|n|d }
t|dkrN|	nt|dkrV|
n|d }t t|dkpgt|dkdd  |d }t|dkrx|n|d }t |d u p|dkdd  |j}| d	kr|d
 nd}|d }|d }|d }t||||
d|}t||	||d|}t|}t|| |||	|
||||||||| t j	||j
|j|dS )Nrv   r   c                   S   rD   )NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr)   r)   r)   r)   r*   r:     rE   z*meta_avg_pool2d_backward.<locals>.<lambda>r   c                   S   rD   rh  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   )NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr)   r)   r)   r)   r*   r:     rE   c                   S   rD   ri  r)   r)   r)   r)   r*   r:     rE   r  rj  rk  r   rM   rl  )r.   r=   rx   rJ   rO   rm  r   r6  r  rb   r5   ra   )ZgradOutput_r   r1  r|   r  ro  rp  rq  rr  rs  rt  ru  rv  rw  
input_sizer  rx  ry  rz  r{  r|  r~  r)   r)   r*   meta_avg_pool2d_backward  sj   "(
r  c                 C   s
  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }	t | p2t|dv dd  |s;|n|d }
|sC|nt|dkrK|
n|d }|sS|	nt|dkr[|
n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t | jd	v d
d  t | p|dkdd  | d}| d}| d}| d}| d}t||||
d|}t||||d|}t||	||d|}t| ||||	|
|||||ddd||||||ddd | jdkr| ||||fS | |||||fS )Nrv   rP   c                   S   rD   NzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr)   r)   r)   r)   r*   r:   	  rE   z!meta_avg_pool3d.<locals>.<lambda>r   rv   r   c                   S   rD   NzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr)   r)   r)   r)   r*   r:   	  rE   c                   S   rD   NzBavg_pool3d: padding must be a single int, or a tuple of three intsr)   r)   r)   r)   r*   r:   	  rE   r  r  c                   S   rD   Nz9non-empty 4D or 5D (batch mode) tensor expected for inputr)   r)   r)   r)   r*   r:   	  rE   c                   S   rD   ri  r)   r)   r)   r)   r*   r:   "	  rE   rj  rk  r   rM   zavg_pool3d()T)check_input_sizer  )r.   r=   rx   rV   rS   rm  pool3d_shape_checkrI   )r   r1  r|   r  ro  rp  rq  kTrr  rs  dTrt  ru  padTrv  rw  r  nslicesitimeiheightiwidthotimeoheightowidthr)   r)   r*   meta_avg_pool3d  s   
  






r  c                 C   s  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t |jd	v d
d  t | p|dkdd  |d}|d}|d}|d}t||||d|}t||	||d|}t||
||d|}t|| |||	|
||||||||||||d ||jS )Nr  c                   S   rD   r  r)   r)   r)   r)   r*   r:   \	  rE   z*meta_avg_pool3d_backward.<locals>.<lambda>r   rv   r   c                   S   rD   r  r)   r)   r)   r)   r*   r:   d	  rE   c                   S   rD   r  r)   r)   r)   r)   r*   r:   l	  rE   r  c                   S   rD   r  r)   r)   r)   r)   r*   r:   t	  rE   c                   S   rD   ri  r)   r)   r)   r)   r*   r:   y	  rE   rj  rk  r   rM   zavg_pool3d_backward())	r.   r=   rx   rV   rS   rm  avg_pool3d_backward_shape_checkrI   rJ   )r  r   r1  r|   r  ro  rp  rq  r  rr  rs  r  rt  ru  r  rv  rw  r  r  r  r  Zotime_for_shape_checkZoheight_for_shape_checkZowidth_for_shape_checkr)   r)   r*   meta_avg_pool3d_backwardN	  st   
  




r  c                    sZ   t  jdkp jdk fdd  jd d t| }t }t j| j j	|dS )NrP   r  c                      r?   )Nz"Expected 3D or 4D tensor, but got rO  r)   r\   r)   r*   r:   	  rC   z*meta_adaptive_avg_pool2d.<locals>.<lambda>r   rl  )
r.   r=   rV   rJ   r<   r   r6  rb   r5   ra   )rK   output_sizer   r   r)   r\   r*   meta_adaptive_avg_pool2d	  s   

r  c                    s@   t  jdkp jdk fdd   jd d t| S )Nr  r  c                      r?   )Nz"Expected 4D or 5D tensor, but got rO  r)   r\   r)   r*   r:   	  rC   z*meta_adaptive_avg_pool3d.<locals>.<lambda>rk  )r.   r=   rV   rI   rJ   r<   )rK   r  r)   r\   r*   meta_adaptive_avg_pool3d	  s
   
r  c                    s    j }td|D ]t dk fdd qt|dkp$|dkfdd tj jk fdd tj}trDtj}	j
j|d	S )
Nrv   r   c                      s   d j  d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyrO  r)   )grad_outr   r)   r*   r:   	  s
    z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>rP   r  c                      r?   )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got rO  r)   r\   r)   r*   r:   	  rC   c                      r:  Nzexpected dtype z! for `grad_output` but got dtype r@   r)   )r  rK   r)   r*   r:   	  rQ  r   )rV   rz   r.   r=   rS   r5   r   r:  r7  rI   rJ   r   )r  rK   rV   r   r)   )r  r   rK   r*   "meta__adaptive_avg_pool2d_backward	  s$   

r  c                 C   s   t | d tj|tjdS )NZadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkr.   r[   r2  r  rK   r)   r)   r*   "meta__adaptive_avg_pool3d_backward	  s   
r  r  c                    s<   j }td|D ]tdk fdd qd S )Nrv   r   c                      s     dj  d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  rO  r)   r  r  r   r)   r*   r:   	  s
   z3_adaptive_pool_empty_output_check.<locals>.<lambda>)rV   rz   r.   r=   rS   )r  r  rV   r)   r  r*   r  	  s   r  c                    s"  j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}j d	krGd}|d7 }|d }|\}}j d
krm|||f}|}	j|tjd}
|	|
fS ||||f}t	}|j
|d}	j|tjdj
|d}
|	|
fS )NrP   r  c                      r?   )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: rO  r)   r   r)   r*   r:   	  rC   z*meta_adaptive_max_pool2d.<locals>.<lambda>rv   r   c                         dj  d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  rO  r)   r   r   r)   r*   r:   	  
   r   c                   S   rD   )NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r)   r)   r)   r)   r*   r:   	  rE   r  rP   r@   r   )rV   r.   r=   rz   rS   rx   rI   rc   r   r6  r   )r   r  rV   ZdimHsizeBsizeDosizeHosizeWrW   r   r_   r   r)   r  r*   meta_adaptive_max_pool2d	  sD   







r  c                    sd    j }t|dv  fdd t d tj jk fdd t}jj	|dS )Nr  c                      r?   )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: rO  r)   r  r)   r*   r:   
  rC   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>adaptive_max_pool2d_backwardc                      r:  r  r@   r)   r  r   r)   r*   r:    
  rQ  r   )
rV   r.   r=   r  r5   r   r6  rI   rJ   r   )r  r   r_   rV   r   r)   r  r*   !meta_adaptive_max_pool2d_backward
  s   



r  c                    s   j }t|dv fdd td|D ] t dk fdd qtt|dkdd  d}d}d}|d	krFd}|d7 }|}|\}}}|d
kr[||||f}	n|||||f}	|	}
j|	tjd}|
|fS )Nr  c                      r?   )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: rO  r)   r  r)   r*   r:   -
  rC   z*meta_adaptive_max_pool3d.<locals>.<lambda>rv   r   c                      r  )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  rO  r)   r  r)   r*   r:   2
  r  rP   c                   S   rD   )NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r)   r)   r)   r)   r*   r:   :
  rE   r  r  r@   )rV   r.   r=   rz   rS   rx   rI   rc   )r   r  rV   ZdimDr  r  ZosizeTr  r  rW   r   r_   r)   r  r*   meta_adaptive_max_pool3d'
  s8   





r  c                 C   s   t | d ||jS )Nadaptive_max_pool3d_backward)r  rI   rJ   )r  r   r_   r)   r)   r*   !meta_adaptive_max_pool3d_backwardS
  s   
r  c                 C   s   |d u rt d| |S )Nz:cannot repeat_interleave a meta tensor without output_size)r0  rI   )repeatsr  r)   r)   r*   meta_repeat_interleave_TensorZ
  s   
r  c                 C   s:   | j jsJ |j jsJ t| j|j}| j|t| j dS r   )r5   r   r   rJ   rI   r   )realimagrW   r)   r)   r*   meta_complexa
  s   r  c                 C   s   t | |S r$   )r   clone_preserve_stridesr   )rK   r5   r)   r)   r*   
view_dtypej
  s   r  c                 C   sv   | j s	t| |S |  r#| rt| |  S t|  |S | r1t| |  S t| | | dS r   )r   r.   dotr  vdotr`  r  rI   rK   rU   r)   r)   r*   r  o
  s   

r  )
fill_valuerS   r  c                C   s   | j ||  ftjdS r   )rI   rO   r.   rF   )rK   rS   r  r)   r)   r*   nonzero_static
  s   r  c              
      s  t tdd  g }tD ]q\ d ur|t jt jt jt jt jfv dd  jt jt jfv rv }t	|t 
j jkfdd tjD ]#t 
j j  k fdd ||d qQq| q| q|t t	jkfdd dd lm} t|j t	jk rd  t	jk sd}d	}D ]|dkrǈd urd}q|dkr҈d u rd
}qd ur nqd}|sg }g }tD ]\ d ur|  | qtD ]\ d u r|  | q||g }	g }
g }tD ]&\}d u rB|r8|
j|  q"|	j|  q"tj}q"|	| |
 S )Nc                   S   rD   )Nz#at least one index must be providedr)   r)   r)   r)   r*   r:   
  rE   z#meta_index_Tensor.<locals>.<lambda>c                   S   rD   )Nz?tensors used as indices must be long, int, byte or bool tensorsr)   r)   r)   r)   r*   r:   
  rE   c                      r?   )N)too many indices for tensor of dimension r.  r)   r\   r)   r*   r:   
  rC   c                	      s$   dj  d  dj  d  S )NzThe shape of the mask z
 at index z0 does not match the shape of the indexed tensor rO  r)   )r   rB   jrZ  rK   r)   r*   r:   
  s
    rv   c                      s   dj  dt  dS )Nr  z (got r  )rV   rx   r)   )r_   rK   r)   r*   r:   
  r  r   Fr   T)r.   r=   r  	enumerater5   rF   rK  int8nonzerorx   rG   rV   rz   rJ   r{   selecttorch._refsZ_refsry   r   r~   rI   )rK   r_   r  r  refsstateZhas_contiguous_subspacer"  Ztransposed_indicesZbefore_shapeZafter_shapeZreplacement_shaperO   r)   )r   rB   r_   r  rZ  rK   r*   meta_index_Tensor
  s   








r  c                 C   sT   d }d }d }|
d r|  | }|
d r|  | }|
d r%|  |}|||fS )Nr   rv   r   )rI   rS   )Zgrad_output_input_Zweight_Zbias_sizes_optr|   r  r(  Z
transposedr+  r*  output_maskZbackend_grad_inputZbackend_grad_weightZbackend_grad_biasr)   r)   r*   meta_convolution_backward
  s   

r  c                   s     d} d}| ||f} t  dkdd  t dkdd  t  d dk fdd t  d dk fd	d t|  d|ko^|  d|kd
d  | |   S )Nrv   r   rP   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   zmeta_addbmm.<locals>.<lambda>c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   r   c                         d  d d d S )Nz8batch1 and batch2 must have same number of batches, got r   rQ   rR   r)   r  r  r)   r*   r:     r  c                
      6   d  d d  d d d d d d	S )Nz#Incompatible matrix sizes for bmm (rv   rp   r   rQ   r  rR   r)   r  r)   r*   r:     
   c                   S   rD   )Nz.self tensor does not match matmul output shaper)   r)   r)   r)   r*   r:     rE   )rS   r  r.   r=   rO   rI   )rK   r  r  r  r  r  r  r)   r  r*   meta_addbmm  s$   

r  c                       t t t fdd d S )Nc                         dt   S NzExpect List[Tensor] but got r%  r)   r\   r)   r*   r:   ,  r  z&meta__foreach_unaop_.<locals>.<lambda>r.   r=   r   r   r\   r)   r\   r*   meta__foreach_unaop_!     	
r  c                    (   t t t fdd dd  D S )Nc                      r  r  r  r)   r\   r)   r*   r:   ;  r  z%meta__foreach_unaop.<locals>.<lambda>c                 S      g | ]}t |qS r)   r.   r[   rk   r-  r)   r)   r*   rm   =  r  z'meta__foreach_unaop.<locals>.<listcomp>r  r\   r)   r\   r*   meta__foreach_unaop0  
   	
r  c                    sX   t ttot t fdd t tdko"tt k fdd d S )Nc                         dt  dt   dS )Nz9The first two arguments of must be List[Tensor], but got rQ   .r  r)   r  r)   r*   r:   C  
   z3_check_foreach_binop_tensor_lists.<locals>.<lambda>r   c                      r  )Nz>self and other must be non-empty and match in length, but got rQ   r  r  r)   r  r)   r*   r:   J  r  )r.   r=   r   r   rx   r  r)   r  r*   !_check_foreach_binop_tensor_lists@  s   r  c                 C   s   t | | dd | D S )Nc                 S   r  r)   r  r  r)   r)   r*   rm   ]  r  z,meta__foreach_binop_list.<locals>.<listcomp>r  rK   rU   r  r)   r)   r*   meta__foreach_binop_listQ  s   
r  c                 C      t | | d S r$   r  r  r)   r)   r*   meta__foreach_binop__list`  s   r  c                    r  )Nc                         dt   dS Nz4The first argument of must be List[Tensor], but got r  r  r)   r\   r)   r*   r:   y  r;   z-meta__foreach_binop__scalar.<locals>.<lambda>r  rK   scalarr)   r\   r*   meta__foreach_binop__scalarn  r  r  c                    r  )Nc                      r  r  r  r)   r\   r)   r*   r:     r;   z,meta__foreach_binop_scalar.<locals>.<lambda>c                 S   r  r)   r  r  r)   r)   r*   rm     r  z.meta__foreach_binop_scalar.<locals>.<listcomp>r  r  r)   r\   r*   meta__foreach_binop_scalar}  r  r  c                    st   t tdd  fD  fdd t t dkdd  t t tko3t tkdd  d S )Nc                 s       | ]}t |tV  qd S r$   r   r   rk   lr)   r)   r*   r        z/meta__foreach_addcop__scalar.<locals>.<genexpr>c                      "   dt   dt  dt  S )Nz?All arguments of _foreach_addc*_ must be List[Tensor], but got r  , and r  r)   rK   tensor1tensor2r)   r*   r:        z.meta__foreach_addcop__scalar.<locals>.<lambda>r   c                   S   rD   Nz$input tensor list must not be empty.r)   r)   r)   r)   r*   r:     rE   c                   S   rD   Nz0All input tensor lists must have the same lengthr)   r)   r)   r)   r*   r:     rE   r.   r=   r  rx   rK   r  r  r  r)   r  r*   meta__foreach_addcop__scalar  s   r  c                 C   r  r$   r  )rK   rU   r  r)   r)   r*   meta__foreach_lerp__scalar  s   r  c                    s~   t tdd  fD  fdd t t dkdd  t t tko3t tkdd  dd	  D S )
Nc                 s   r  r$   r  r  r)   r)   r*   r    r  z.meta__foreach_addcop_scalar.<locals>.<genexpr>c                      r  )Nz,All arguments must be List[Tensor], but got r  r  r  r)   r  r)   r*   r:     r  z-meta__foreach_addcop_scalar.<locals>.<lambda>r   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   c                 S   r  r)   r  r  r)   r)   r*   rm     r  z/meta__foreach_addcop_scalar.<locals>.<listcomp>r  r  r)   r  r*   meta__foreach_addcop_scalar  s   r   c                    r  )Nc                      r  Nz'exponent must be a tensor list but got r  r)   exponentr)   r*   r:     r  z5meta__foreach_pow_scalar_and_tensor.<locals>.<lambda>c                 S   r  r)   r  )rk   er)   r)   r*   rm     r  z7meta__foreach_pow_scalar_and_tensor.<locals>.<listcomp>r  )rK   r  r)   r  r*   #meta__foreach_pow_scalar_and_tensor  s
   
r  c                    s   t tdd fD ot t j fdd t tdkdd  t ttko:ttkdd  d S )Nc                 s   r  r$   r  r  r)   r)   r*   r    r  z.meta__foreach_addcop_tensor.<locals>.<genexpr>c                	      s,   dt  dt  dt  dt   S )Nzi_foreach_addc*_ op expects arguments of type: List[Tensor], List[Tensor], List[Tensor], tensor, but got: r  r  r  r)   rE  rK   r  r  r)   r*   r:     s   z-meta__foreach_addcop_tensor.<locals>.<lambda>r   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   )r.   r=   r  r   r
   rx   )rK   r  r  rE  r)   r  r*   meta__foreach_addcop_tensor  s   
r  c                 C   r  r$   r  )rK   r   r   r)   r)   r*   meta__foreach_copy_inplace  s   r  )
grad_scale	found_infc       	            s4   | |||||fD ] t t t fdd qd S )Nc                      r  r  r  r)   r  r)   r*   r:     r  z#meta__fused_adam_.<locals>.<lambda>r  )rK   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizer	  r
  r)   r  r*   meta__fused_adam_  s   
r  c       	            sZ   | |||||fD ] t t t fdd qdd }|| ||||||||fS )Nc                      r  r  r  r)   r  r)   r*   r:     r  z"meta__fused_adam.<locals>.<lambda>c                 S   s   dd | D S )Nc                 S   r  r)   r  )rk   r  r)   r)   r*   rm     r  z=meta__fused_adam.<locals>.empty_like_list.<locals>.<listcomp>r)   )Ztensor_listr)   r)   r*   empty_like_list  s   z)meta__fused_adam.<locals>.empty_like_listr  )rK   r  r  r  r  r  r  r  r  r  r  r  r  r	  r
  r  r)   r  r*   meta__fused_adam  s   
r  c                    s   t   dkdd  t  dkdd  t  jt ju  fdd t jt ju fdd t  ddk fd	d  j ddft jd
S )Nr   c                   S   rD   )Nza must be a 2D tensorr)   r)   r)   r)   r*   r:   #  rE   zmeta__int_mm.<locals>.<lambda>c                   S   rD   )Nzb must be a 2D tensorr)   r)   r)   r)   r*   r:   $  rE   c                      r?   )Nzexpected self to be int8, got r@   r)   )r  r)   r*   r:   '  rC   c                      r?   )Nzexpected mat2 to be int8, got r@   r)   )r  r)   r*   r:   +  rC   rv   r   c                
      r  )Nz'Incompatible matrix sizes for _int_mm (r   rp   rv   rQ   r  rR   r)   r  r)   r*   r:   /  r  r@   )r.   r=   rO   r5   r  rS   rI   r6  r  r)   r  r*   meta__int_mm   s   



 r  c           	         s  t  dkfdd t  dkfdd t ddkfdd t tjdd  t tjdd  t |d	kd
d  t  dv  fdd d}d}jd d }jd d }tt 	||}|
||g |S )Nr   c                         d    dS )Nz1cdist only supports at least 2D tensors, X1 got: DrN   r)   )x1r)   r*   r:   ;  r;   z$meta_cdist_forward.<locals>.<lambda>c                      r  )Nz1cdist only supports at least 2D tensors, X2 got: r  rN   r)   )x2r)   r*   r:   ?  r;   rM   c                      r  )Nz4X1 and X2 must have the same number of columns. X1: rM   z X2: rR   r)   )r  r  r)   r*   r:   C  r  c                   S   rD   )Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r)   r)   r)   r)   r*   r:   G  rE   c                   S   rD   )Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r)   r)   r)   r)   r*   r:   K  rE   r   c                   S   rD   )Nz)cdist only supports non-negative p valuesr)   r)   r)   r)   r*   r:   M  rE   )Nrv   r   c                      r  )Nz%possible modes: None, 1, 2, but was: r)   r)   )compute_moder)   r*   r:   P  r  r   )r.   r=   rO   rS   r   is_float_dtyper5   rJ   ry   broadcast_shapesextendrI   )	r  r  r  r   r1r2batch_tensor1batch_tensor2r   r)   )r   r  r  r*   meta_cdist_forward7  s@   









r(  c                 C   s   |j d }|j d }|j d }|j d d }|j d d }	tt||	}
|
d d  }|||g t|
}|dksG|dksG|dksG|dkrLt|S |t|j krX||}tj	|tj
dS )NrM   r   r   r   )rJ   ry   r.   r"  r#  mathprod
zeros_liker  r[   r   )r  r  r  r  Zcdistc1r$  r%  r&  r'  r  Ztensor1_expand_sizeZbatch_productr)   r)   r*   meta_cdist_backward[  s   



 

r-  c	                    s<  t  jt jt jfv  fdd t jt jt jfv fdd t tjfdd d}	|rEt |	dkdd  |	d8 }	|	d}
t	d\}}}d urt ||kd	d  t jjkfd
d t j
dkfdd t    k fdd fdddd fdd}tdkrʈ  d}  }||krĈ |	d}nT d}nN||
|}|||fv s|s d}nd}|	}jd }||kr|rt |dkdd  |d8 }|jd }n| }|
|||fS )Nc                      r?   )Nz(expected indices to be long or int, got r@   r)   r_   r)   r*   r:     rC   z$meta_embedding_bag.<locals>.<lambda>c                      r?   )Nz(expected offsets to be long or int, got r@   r)   )offsetsr)   r*   r:     rC   c                      r?   )Nz/expected weight to be floating point type, got r@   r)   )r'  r)   r*   r:     rC   r   rv   c                   S   rD   Nz1include_last_offset: numBags should be at least 1r)   r)   r)   r)   r*   r:     rE   rP   c                   S   rD   )Nz@embedding_bag: per_sample_weights only supported with mode='sum'r)   r)   r)   r)   r*   r:     rE   c                      r  )Nzexpected weight (z) and per_sample_weights (z) to have same dtyper@   r)   )per_sample_weightsr'  r)   r*   r:     r  c                      r,  )Nz1expected per_sample_weights to be 1D tensor, got r  r.  r)   )r1  r)   r*   r:     r  c                      r  )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (r  r  r)   )r_   r1  r)   r*   r:     s   c                    s    | ||o| ddkS Nr   rv   r|   r   r  r   padding_idx)is_fast_path_index_selectr)   r*   is_fast_path_index_select_scale  s   z;meta_embedding_bag.<locals>.is_fast_path_index_select_scalec                 S   s<   | j tjks| j tjko| ddko|ddko|dk S r   )r5   r.   r1   r/   r|   )r   r   r5  r)   r)   r*   r6    s   z5meta_embedding_bag.<locals>.is_fast_path_index_selectc                    s"   |d ur| |||S  | ||S r$   r)   r4  )r6  r7  r)   r*   is_fast_path  s   z(meta_embedding_bag.<locals>.is_fast_pathcpuc                   S   rD   r0  r)   r)   r)   r)   r*   r:     rE   )r.   r=   r5   rF   rK  r   r!  rS   rI   rz   rV   rH   ry  rJ   )r'  r_   r/  Zscale_grad_by_freqrg  sparser1  Zinclude_last_offsetr5  Znum_bagsr   ZMODE_SUMZ	MODE_MEANZMODE_MAXr8  
offset2bagbag_sizemax_indicesZfast_path_sumZnumBagsr)   )r_   r6  r7  r/  r1  r'  r*   meta_embedding_bagr  s~   










r>  c                 G   sB   t | ||g|R  \}}}}t|dkr|| }||||fS )Nr9  )r>  ry  rI   rS   )r'  r_   r/  argsr   r;  r<  r=  r)   r)   r*   meta_embedding_bag_forward_only  s   r@  c                 C   s.   |r|S | j js| j jr| j S |rtjS | j S r$   )r5   r   r   r.   rF   )r   r5   promote_int_to_longr)   r)   r*   _get_reduction_dtype  s   rB  r@   c                C   s6   t | |dd}t| j|}t| ||}| j||dS )NT)rA  r@   )rB  r   r   rJ   r   rI   )r   r"  r   r5   Zoutput_dtyper   r)   r)   r*   meta_nansum  s   rC  c                 C   s$   t | jtt|  }| |S r$   )r   r#  rJ   r<   rz   rO   rI   )r   r   r)   r)   r*   meta_median  s   
rD  c                 C   sL   t | dkrtd t| j|f}t| ||}| || j|tjdfS )Nrx  zmedian CUDA with indices outputr@   )	ry  r   alert_not_deterministicr   rJ   r   rI   r.   rF   )r   rO   r   r   r)   r)   r*   meta_median_mode_dim  s   
rF  c                 C   r   r$   r)   r\   r)   r)   r*   meta_logical_not_  r   rG  c                    sd   t t|  kdd  t|   }d| t| j   fddttD }| |S )Nc                   S   rD   )NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr)   r)   r)   r)   r*   r:   "  rE   zmeta_repeat.<locals>.<lambda>rv   c                    s   g | ]
} | |  qS r)   r)   r   Zpadded_sizer  r)   r*   rm   )  r  zmeta_repeat.<locals>.<listcomp>)r.   r=   rx   rO   r<   rJ   rz   rI   )rK   r  Znum_new_dimensionsZtarget_sizer)   rI  r*   meta_repeat  s   
rJ  c                 C   r   r$   r)   r\   r)   r)   r*   
meta_zero_-  r   rK  c                 C      t |tjrt| j|j | S r$   r   r.   r
   r>   rJ   r  r)   r)   r*   meta_binop_inplace2  s   rN  c                 C   rL  r$   rM  r  r)   r)   r*   meta_binop_inplace_alphaC  s   	rO  c                 K      t | tjdS NZtype_promotion)r   r   DEFAULT)rK   kwargsr)   r)   r*   
meta_roundQ  s   rU  c                    sl   t tj fdd tt jr&t tj fdd d S t tt fdd d S )Nc                           dj  S )Nz7: Expected input tensor to have an integral dtype. Got r@   r)   )r  rK   r)   r*   r:   [  r;   z#shift_dtype_check.<locals>.<lambda>c                      rV  )Nz6: Expected shift value to have an integral dtype. Got r@   r)   r  r   r)   r*   r:   `  r;   c                      s     d S )Nz): Expected shift value to be an int. Got r)   r)   rW  r)   r*   r:   e  r  )r.   r=   r   rR  r5   r   r
   r   r  rK   r   r)   rX  r*   shift_dtype_checkX  s   

rY  c                 C   L   t d| | t| tjd}|  dkr$t|tjr$tj|j	|j
|jdS |S )NrshiftrR  r   r`   rY  r   r   rS  rO   r   r.   r
   rb   rJ   ra   r5   rK   rU   Zelement_wiser)   r)   r*   meta_rshiftsi     r^  c                 C   rZ  )NlshiftrR  r   r`   r\  r]  r)   r)   r*   meta_lshiftsw  r_  ra  c                 C      |  | jS r$   rI  r\   r)   r)   r*   	meta_zero     rc  c                 C   r   r$   r)   rK   r   r)   r)   r*   
meta_fill_  r   rf  c                 C   
   t | S r$   r  re  r)   r)   r*   	meta_fill     
rh  c                 C   r   r$   r)   r\   r)   r)   r*   
meta_relu_  r   rj  c                 C   rg  r$   r  rK   r_   r^   
accumulater)   r)   r*   meta_index_put  ri  rm  c                 C   s   t | j|j | S r$   )r>   rJ   )rK   r  valuer)   r)   r*   meta_masked_fill_  s   ro  c                 C   r   r$   r)   rk  r)   r)   r*   meta_index_put_  r   rp  c                 C   rb  r$   )viewrJ   r\   r)   r)   r*   
meta_alias  rd  rr  c                    s   t |  dkdd  t | dkdd  |  }|  |d |d |d } d }||ft  d koB d k fdd |}|sqd urqt  dkd	d  t  kfd
d |S )NrP   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   z)common_meta_baddbmm_bmm.<locals>.<lambda>c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   r   r   rv   c                	      r  r  r)   r)   r  r)   r*   r:     s    c                   S   rD   )Nzself must be a 3D tensorr)   r)   r)   r)   r*   r:     rE   c                      s   d  d   S )Nz*Expected an input tensor shape with shape z but got shape: rR   r)   )r  self_baddbmmr)   r*   r:     rQ  )r.   r=   rO   rS   rI   )r  r  Zis_bmmrs  r  Zres_rowsZres_colsr   r)   )r  r  r  r  rs  r*   common_meta_baddbmm_bmm  s*   


rt  c                 C   s   t | |dS )NT)rt  )rK   Zmat2r)   r)   r*   meta_bmm  rd  ru  c                 C   s<   | | }| | }|dkrt |dk t |dk kr|d8 }|S r2  )r  )rp   yqri  r)   r)   r*   div_rtn  s
    rx  c                 C   sZ   t | | | ||d   d |r|d nd |d }|r+|d | | | kr+|d8 }|S r   )rx  )	inputSize
kernelSizer  r  r|   r(  ro  Z
outputSizer)   r)   r*   pooling_output_shape_pad_lr  s*   
	r{  c                    s^   t |dkdd  t dkfdd t  d k fdd t|  |||S )Nr   c                   S   rD   )Nzstride should not be zeror)   r)   r)   r)   r*   r:     rE   z&pooling_output_shape.<locals>.<lambda>c                      r  )Nz'pad must be non-negative, but got pad: r)   r)   )padr)   r*   r:     r  r   c                      r7   )Nz7pad should be at most half of kernel size, but got pad=z and kernel_size=r)   r)   rz  r|  r)   r*   r:     r;   )r.   r=   r{  )ry  rz  r|  r|   r(  ro  r)   r}  r*   rm    s   
rm  c              	      sN     }tdkodkdd  t|dko|dkdd  t|dko+|dkdd   ddko= ddk}|tjkrWt|dkoQ|oQ d	dkd
d  n"t|d	krf ddkrf|pr|dkor|or d	dk fdd td 
kod 	k	
fdd tdkodkfdd d S )Nr   c                   S   rD   )NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r)   r)   r)   r)   r*   r:     rE   z$pool2d_shape_check.<locals>.<lambda>c                   S   rD   )Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r)   r)   r)   r)   r*   r:     rE   c                   S   rD   )Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r)   r)   r)   r)   r*   r:     rE   rv   r   r  rP   c                   S   rD   )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r)   r)   r)   r)   r*   r:   %  rE   c                         d    S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: rR   r)   r  r)   r*   r:   ,  r  c                      s   d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r)   r)   )rr  rs  rv  rw  r)   r*   r:   1  s    c                      s*   d d  d d d d dS NzGiven input size: (rp   z). Calculated output size: (z). Output size is too smallr)   r)   )ry  rz  rx  r  r{  r|  r)   r*   r:   7  s    )rO   r.   r=   rS   r7  )r   rr  rs  rt  ru  rv  rw  	dilationH	dilationWrx  ry  rz  r{  r|  r   rV   Z
valid_dimsr)   )r   ry  rz  rr  rs  rx  r  r{  r|  rv  rw  r*   rn    sB   

rn  r  r  rr  rs  r  rt  ru  pTpHpW	dilationTr  r  r  r  r  r  r  r  r  c              
      s  	j }tdkodkodkfdd tdko&dko& dk fdd tdko<dko<dkfdd t|dv 	fdd t|D ]|dkradkraqVt	dk	fd	d qV|rt
kokok
fd
d td kod kod kfdd tdkodkodk
fdd d S )Nr   c                         d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r)   r)   )rr  r  rs  r)   r*   r:   Y     z$pool3d_shape_check.<locals>.<lambda>c                      r  )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r)   r)   )rt  r  ru  r)   r*   r:   `  r  c                      r  )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r)   r)   )r  r  r  r)   r*   r:   g  r  r  c                      rV  )Nz/: Expected 4D or 5D tensor for input, but got: rO  r)   )r  r   r)   r*   r:   o  r;   r  c                      s     dj  d dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)rJ   rS   r)   r  r   r   r)   r*   r:   x  s
   c                      s*   d d  d d d d dS )Nzinput image (T: r  r  z ) smaller than kernel size (kT:  kH:  kW: r  r)   r)   )r  r  r  rr  r  rs  r)   r*   r:     s   r   c                      s(   d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r  r  z padT: z padW: z padH: r)   r)   )rr  r  rs  r  r  r  r)   r*   r:     s   rv   c                      s6   d d d  d d d d d dS r  r)   r)   )r  r  r  r  r  r  r  r)   r*   r:     s   )rV   r.   r=   rz   rS   )r   r  r  rr  rs  r  rt  ru  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rV   r)   )rt  r  ru  r  r  r  r  r   r  r   r  r  rr  r  rs  r  r  r  r  r  r  r  r*   r  =  sJ   	"r  c                 C   s   | j }t| |||||||	|
|||||||||||| t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | t|||d | d S )Nr  rP   r   rv   rV   r  r\  )r   r  r_   r  r  rr  rs  r  rt  ru  r  r  r  r  r  r  r  r  r  r  r  r  r  rV   r)   r)   r*   max_pool3d_backward_shape_check  s@   r  c                 C   s   | j }t| ||||||||	|
|ddd|||||||d t|||d | t|||d | t|||d | t|||d | d S )Nrv   Tr  rP   r   r  )r   r  r  r  rr  rs  r  rt  ru  r  r  r  r  r  r  r  r  r  r  rV   r)   r)   r*   r    s:   r  c                 C   sB  dd }|d|\}}t t|dv dd  t|dkr#||}	}
n|d|\}	}
|d	|\}}|d
|\}}| d}| d}| d}t| }|t jkr^t |  dkdd  n|t jkrpt |  dv dd  nt ddd  t	||||	||}t	||||
||}t
| |||	|
|||||||||| |||fS )Nc                    r]  )Nr^  c                      rj  )Nzmax_pool2d: r_  r)   r)   r`  r)   r*   r:     rC   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>r   rv   ra  rb  r)   r`  r*   re    rf  z3max_pool2d_checks_and_compute_shape.<locals>.unpackr1  rg  c                   S   rD   )NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr)   r)   r)   r)   r*   r:     rE   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>r   r|   r  r(  rk  r   rM   r  c                   S   rD   )NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr)   r)   r)   r)   r*   r:   0  rE   r  c                   S   rD   )Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr)   r)   r)   r)   r*   r:   5  rE   Fc                   S   rD   )Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr)   r)   r)   r)   r*   r:   :  rE   )r.   r=   rx   rS   r   r6  r7  rO   r   rm  rn  )r   r1  r|   r  r(  ro  re  rr  rs  rt  ru  rv  rw  r  r  rx  ry  rz  r   r{  r|  r)   r)   r*   #max_pool2d_checks_and_compute_shape  sb   	









r  c                    s   t |||||\}tj jk fdd |jfdd}	|	  |	| t}
tjjjj	|
dS )Nc                      r:  )NzExpected dtype z  for `gradOutput` but got dtype r@   r)   r  r)   r*   r:   j  rQ  z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>c                    s:   t | d   t | d  t | d  d S )NrP   r   rv   )r\  )r  )r  rV   r{  r|  r)   r*   _check_dim_sizep  s   z>meta_max_pool2d_with_indices_backward.<locals>._check_dim_sizerl  )
r  r.   r=   r5   rV   r   r6  rb   rJ   ra   )r  rK   r1  r|   r  r(  ro  r_   rx  r  r   r)   )r  r  rV   r{  r|  rK   r*   %meta_max_pool2d_with_indices_backwardU  s.   

r  rH  c                 C   s   t | |||||\}}}|  dkr| dnd}	t| }
|  dkr*|||g}n|	|||g}tj|| j| j|
dtj|tj	| j|
dfS )Nr  rj  rv   rP   rl  )
r  rO   rS   r   r6  r.   rb   r5   ra   rc   )r   r1  r|   r  r(  ro  rx  r{  r|  r  r   rS   r)   r)   r*   meta_max_pool2d_with_indices  s2   
r  c           	         s  t d tjtjkfdd ttdkfdd \}}tjdv fdd tjjkfdd t	d	jD ] t
 d
k fdd qG }jdkrr|
d
}||||f}|S |
d
}|
d	}|||||f}|S )NZmax_unpooling2d_forward_outc                      r?   )Nz2elements in indices should be type int64 but got: r@   r)   r.  r)   r*   r:     rC   z#meta_max_unpool2d.<locals>.<lambda>r   c                      r  )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r  r)   r  r)   r*   r:        r  c                      r,  )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r.  r)   )self_r)   r*   r:     rM  c                      r:  NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: rO  r)   )r_   r  r)   r*   r:     rA  rv   r   c                      r  )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got r   being empty.rO  r)   )r   r  r)   r*   r:     s
   rP   )r   rE  r.   r=   r5   rc   rx   rV   rJ   rz   rS   rf   rI   )	r  r_   r  r  r  rK   	nchannelsr  r  r)   )r   r_   r  r  r*   meta_max_unpool2d  s@   






	



r  c                    s  t jt jkdd  t jdv fdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ]t dk fdd qXt d dkod
 dkod dkfdd d S )Nc                   S   rD   )Nz(elements in indices should be type int64r)   r)   r)   r)   r*   r:     rE   z._max_unpooling3d_shape_check.<locals>.<lambda>r  c                      r,  )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with r  r.  r)   r  r)   r*   r:     r  rP   c                      r  )NzVThere should be exactly three elements (depth, height, width) in output_size, but got r  r  r)   r  r)   r*   r:     r  c                      r  )NzRThere should be exactly three elements (depth, height, width) in stride, but got: r  r  r)   r3  r)   r*   r:     r;   c                      r  )NzSThere should be exactly three elements (depth, height, width) in padding, but got: r  r  r)   )r  r)   r*   r:     r;   c                      r:  r  rO  r)   )r_   r   r)   r*   r:     rA  rv   r   c                      s     dj  d dS )NzI: Expected input to have non-zero size for non-batch dimensions, but got r  r  rO  r)   r  r)   r*   r:     s
   r   c                      r  )Nz5strides should be greater than zero, but got stride: r)   r)   r3  r)   r*   r:     r  )	r.   r=   r5   rc   rV   rx   rJ   rz   rS   )r   r_   r  r|   r  r  r)   )r  r   r_   r   r  r  r|   r*   _max_unpooling3d_shape_check  s@   







	"
r  c                 C   s   t d t| ||||d |  }|\}}}| jdkr,|d}	||	|||f}
|
S |d}|d}	|||	|||f}
|
S )NZmax_unpooling3d_forward_outzmax_unpooling3d()r  r   rv   )r   rE  r  rf   rV   rS   rI   )r  r_   r  r|   r  rK   Zodepthr  r  r  r  r  r)   r)   r*   meta_max_unpool3d  s   





r  c                 C   s  t t|dv dd  |d }t|dkr|n|d }t|dkr$|n|d }t | p2t|dv dd  |s;|n|d }	|sC|nt|dkrK|	n|d }
|sS|nt|dkr[|	n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t | jd
v dd  | jdkr| dnd}| d}| d}| d}| d}t||||	||}t||||
||}t||||||}t| |||||	|
|||||||||||||d | jdkot| t j	k}| jdkr:| 
d}|  o2|jt j	d}||||f}n|||||f}| |}| j|t jd}|r_|jt j	d}|jt j	d}||fS )Nr  c                   S   rD   NzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr)   r)   r)   r)   r*   r:   /  rE   z.meta_max_pool3d_with_indices.<locals>.<lambda>r   rv   r   c                   S   rD   NzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr)   r)   r)   r)   r*   r:   7  rE   c                   S   rD   NzImax_pool3d: padding must either be a single int, or a tuple of three intsr)   r)   r)   r)   r*   r:   ?  rE   c                   S   rD   NzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr)   r)   r)   r)   r*   r:   G  rE   r  c                   S   rD   r  r)   r)   r)   r)   r*   r:   O  rE   r  rj  rk  r   rM   zmax_pool3d_with_indices()r  r   r@   )r.   r=   rx   rV   rS   rm  r  r   r6  channels_last_3dr  r  rI   rc   r   )r   r1  r|   r  r(  ro  r  rr  rs  r  rt  ru  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r7  input_channels_last_checkrW   r   r_   r)   r)   r*   meta_max_pool3d_with_indices#  s   

  







r  c                 C   s^  t t|dv dd  |d }t|dkr|n|d }	t|dkr$|n|d }
t | p2t|dv dd  |s;|n|d }|sC|	nt|dkrK|n|d }|sS|
nt|dkr[|n|d }t t|dv dd  |d }t|dkrw|n|d }t|dkr|n|d }t t|dv d	d  |d }t|dkr|n|d }t|dkr|n|d }t |jd
v dd  |d}|d}|d}|d}| d}| d}| d}t|| ||||	|
|||||||||||||||d |jdkot|t jk}|jdkr|	d}|
  o|j
t jd}||j}|r-|jt jd}|S )Nr  c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>r   rv   r   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   r  c                   S   rD   r  r)   r)   r)   r)   r*   r:     rE   rj  rk  r   rM   z"max_pool3d_with_indices_backward()r  r  r   )r.   r=   rx   rV   rS   r  r   r6  r  r  r  rI   rJ   r   )r  r   r1  r|   r  r(  ro  r_   r  rr  rs  r  rt  ru  r  r  r  r  r  r  r  r  r  r  r  r  r  r7  r  
grad_inputr)   r)   r*   %meta_max_pool3d_with_indices_backward  s   
  









r  gridc                    s   t j jk fdd t jt jko jt jk fdd t jd  jd k fdd t  jd jd k fdd tdjD ]t j dkfd	d qPd S )
Nc                      r:  )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on r   r)   r  r   r)   r*   r:     r=  z+check_grid_sampler_common.<locals>.<lambda>c                      r:  )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )r   r)   r  r)   r*   r:     r=  r   c                      r:  )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes rO  r)   r  r)   r*   r:     r=  rM   r   c                      s   dj d  d j S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )rV   rJ   r)   r  r)   r*   r:     s   c                      r  )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  rO  r)   r  r)   r*   r:     r  )r.   r=   ra   r   r  rJ   rV   rz   )r   r  r)   )r  r   r   r*   check_grid_sampler_common  s,   
r  c                   @   s   e Zd ZdZdZdZdS )GridSamplerInterpolationr   rv   r   N)__name__
__module____qualname__ZBILINEARZNEARESTBICUBICr)   r)   r)   r*   r    s    r  interpolation_modec                    sP   t jdkoj jk fdd t jdko |tjjk dd  d S )Nr  c                      r:  )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r  rO  r)   r  r)   r*   r:   $  s
   z'check_grid_sampler_3d.<locals>.<lambda>c                   S   rD   )Nz<grid_sampler(): bicubic interpolation only supports 4D inputr)   r)   r)   r)   r*   r:   /  rE   )r.   r=   rV   r  r  rn  )r   r  r  r)   r  r*   check_grid_sampler_3d!  s   

r  c           
      C   s:   |d }|rt j|t jd}nd }t j|t jd}	||	fS Nr   r   )r.   r+  r   r[   
r  r   r  r  padding_modealign_cornersr  Zinput_requires_gradr  	grad_gridr)   r)   r*   grid_sampler_2d_backward_meta3  s   
r  c           
      C   s\   t | | t| || | jd }| jd }|jd }|jd }|jd }	| |||||	fS )Nr   rv   r   rP   )r  r  rJ   rI   )
r   r  r  r  r  r  CZout_DZout_HZout_Wr)   r)   r*   grid_sampler_3dF  s   
	




r  r  r  c           
      C   sP   t || t||| |d }|rtj|tjd}nd }tj|tjd}	||	fS r  )r  r  r.   r+  r2  r[   r  r)   r)   r*   grid_sampler_3d_backwardY  s   
r  c                 O   s:   | dd }|st|}||d< tj| g|R i |S )Nr5   )r4   r   Z	get_dtyper.   rb   )rS   r  r?  rT  r5   r)   r)   r*   fullq  s
   
r  c                 C   s   |t jkrJt |d u dd  t jd|d u r| jn|||d u r"| jn||d}| jr8||  | 	 | 
  n||  |  d |d |S tjj| |||||d}|d |S )Nc                   S   rD   )Nz9memory format option is only supported by strided tensorsr)   r)   r)   r)   r*   r:     rE   zzeros_like.<locals>.<lambda>r   r   Tr   )r.   Z
sparse_coor=   rb   r5   ra   	is_sparseZsparse_resize_and_clear_rS   
sparse_dim	dense_dimrO   Z_coalesced_r"   r[   r   fill_)rK   r5   r   ra   r   r   r  r)   r)   r*   r+  {  s:   
	

	r+  c                    s     }t|dkdd   dkr n |   }t |kp'|k  fdd dkr7n| t }t } |    }| = | = |||S )Nr   c                   S   rD   )Nz-select() cannot be applied to a 0-dim tensor.r)   r)   r)   r)   r*   r:     rE   zmeta_select.<locals>.<lambda>c                      s   d d   d  S )Nzselect(): index z! out of range for tensor of size z at dimension rR   r)   rO   rB   rK   r)   r*   r:     s
    )rO   r.   rG   rS   ry   r|   r   r   )rK   rO   rB   rV   rS   new_sizer   Znew_storage_offsetr)   r  r*   meta_select  s$   
r  c                 C   rg  r$   r   r  )rK   r   rO   rB   r)   r)   r*   meta_select_scatter  ri  r  c                 C   rg  r$   r  )rK   r   rO   startendstepr)   r)   r*   meta_slice_scatter  ri  r  dim_post_exprwrap_scalarc                 C   sb   |dkr
|sJ d}| }|d }| |k s| |kr'J d|  d| d| d| dk r/| |7 } | S )Nr   rv   zdim z out of bounds (r  r  r)   )rO   r  r  r   r   r)   r)   r*   rd     s   ,rd   c                 C   s   |   dkrdS | j| S r2  rY  )r  rO   r)   r)   r*   ensure_nonempty_size  s   r  c                    st   t  d}t  d}t||kdd  t|D ] kr7tttk fdd qd S )Nrv   c                   S   rD   )NzDIndex tensor must have the same number of dimensions as input tensorr)   r)   r)   r)   r*   r:     rE   z$gather_shape_check.<locals>.<lambda>c                      s$   d dj  dj  d   S )Nz!Size does not match at dimension z expected index  to be smaller than self  apart from dimension rO  r)   rO   r   rB   rK   r)   r*   r:     s    )r   rO   r.   r=   rz   r  )rK   rO   rB   	self_dimsZ
index_dimsr)   r  r*   gather_shape_check  s   r  c                    sR   t ||  }  dk}|s#t jtjk fdd t| |  |  j	S )Nr   c                      r?   )Nz2gather(): Expected dtype int64 for index, but got r@   r)   rA   r)   r*   r:     rC   zmeta_gather.<locals>.<lambda>)
rd   rO   rH   r.   r=   r5   rF   r  rI   rJ   )rK   rO   rB   Zsparse_gradwrapped_dimZis_index_emptyr)   rA   r*   meta_gather  s   

r  c                 C   s   |r*| dkrdS | dkrdS | dkrdS | dkrdS | d	kr d
S t ddd  d S | dkr0dS | dkr6dS t ddd  d S )NsumZ
REDUCE_ADDr*  ZREDUCE_MULTIPLYmeanZREDUCE_MEANZamaxZREDUCE_MAXIMUMZaminZREDUCE_MINIMUMFc                   S   rD   )Nz=reduce argument must be either sum, prod, mean, amax or amin.r)   r)   r)   r)   r*   r:     rE   z#get_operator_enum.<locals>.<lambda>addmultiplyc                   S   rD   )Nz/reduce argument must be either add or multiply.r)   r)   r)   r)   r*   r:     rE   rl  )reduce_use_new_optionsr)   r)   r*   get_operator_enum  s,   r  c                    sT   |  dkrt|jtjk fdd |d ur(t|j|jk fdd d S d S )Nr   c                      
     dS )Nz"(): Expected dtype int64 for indexr)   r)   method_namer)   r*   r:     r  z,scatter_gather_dtype_check.<locals>.<lambda>c                      r  )Nz0(): Expected self.dtype to be equal to src.dtyper)   r)   r  r)   r*   r:   $  r  )rH   r.   r=   r5   rF   )r  rK   rB   src_optr)   r  r*   scatter_gather_dtype_check  s   



r  c                 C   s
   t | dS r   )r   rN   r)   r)   r*   ensure_nonempty_dim(  s   
r  c                    s     dkrd S tt t kdd  d}t }t|D ]}t|}| kr2q&|t|kr=d} nq&|s[d ur[t|D ]}t|}|t|krZd} nqHd urtt t kdd  t|  fdd d S t|  fdd d S )	Nr   c                   S   rD   NzCIndex tensor must have the same number of dimensions as self tensorr)   r)   r)   r)   r*   r:   2  rE   z%scatter_shape_check.<locals>.<lambda>FTc                   S   rD   r  r)   r)   r)   r)   r*   r:   L  rE   c                      s&   dj  dj  d  dj   S )NExpected index r  r  z and to be smaller than src rO  r)   rO   rB   rK   r  r)   r*   r:   P  s    c                      s   dj  dj  d   S )Nr  r  r  rO  r)   r  r)   r*   r:   V  s    )rH   r.   r=   r  rO   rz   r  )rK   rO   rB   r  Zis_wrong_shaper  r   Zindex_d_sizer)   r  r*   scatter_shape_check-  sH   

r  c                 C   sD   t ||  }td| || t| ||| |d ur t|| d S d S )Nscatter)rd   rO   r  r  r  )rK   rO   rB   r   r  r  r  r)   r)   r*   scatter_meta_impl\  s   r  c                 C   s   t | |||d | | jS Nr  r  rI   rJ   rK   rO   rB   r   r)   r)   r*   meta_scatter_adde  s   r  c                 C   s   t | |||d | S r  r  r  r)   r)   r*   meta_scatter_add_k  r   r  c                 C   s0   t |tjr|nd }t| |||| | | jS r$   )r   r.   r
   r  rI   rJ   rK   rO   rB   Zsrc_or_valuer   r   r)   r)   r*   meta_scatterq  s   
r  c                 C   s(   t |tjr|nd }t| |||| | S r$   )r   r.   r
   r  r  r)   r)   r*   meta_scatter_  s   	r          queryrt   rn  	dropout_p	is_causalreturn_debug_maskr  c                 C   s   |  d}|  d}|  d}	|  d}
| d}||	 }| dd}||||
}tj|| jd}|||	||
dd}t| dkrtj||	|ftj	| jddd}||tjdtj
d	dtjdtj
d	dddtjdtjd	dtjdtjd	dtjd| j| jdf	S t|	d
 d
 }tj|||ftj	| jd}tj|d tj
d	d}tj|d tj
d	d}|r|
dkrdnd}t|	| }|dkrd}n|dkrd}tj||||f| j| jd}n
tjd| j| jd}|||||	|tjdtjd	dtjdtjd	d|f	S )Nr   rv   r   rP   r   r9  r5   ra   r)   r      @         )rS   r)  r   r.   r[   ra   rq  ry  rb   r1   r6  rF   r5   r)  ceil)r  rt   rn  r  r  r  r  r   	num_headsZmax_seqlen_batch_qhead_dimZmax_seqlen_batch_kZNnz_qZquery_tZquery_reshapedZ	attention	logsumexpZmax_seqlen_qZcumulative_sequence_length_qZcumulative_sequence_length_kZblocksize_cZmax_seqlen_kZ
debug_maskr)   r)   r*   meta__scaled_dot_product_flash  s   






r  r  r  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                 C   s   | d}| d}| d}t|dkr| dn|}t|dkr'| dn|	}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}|||fS )Nr   rv   rP   r9  r   r   r   rv   rP   r  )rS   ry  r.   empty_permutedr5   ra   )r  r  rt   rn  r   r  r  r  r  r   r  r  r  r  r  r   r  r  len_qZlen_kgrad_qgrad_kgrad_vr)   r)   r*   'meta__scaled_dot_product_flash_backward  s0   






r	  	attn_biascompute_log_sumexpc                 C   s   |  dd} | dd}| dd}| d}| d}	|d}
| d}| d}|d}tj||	||| j| jd}|rHt|	d d nd}tj|||ftj| jd}| dd}tjdtj	d	d}tjdtj	d	d}||||fS )
Nrv   r   r   r   rM   r      r)   r   )
r)  rS   r.   rb   r5   ra   r)  r  r1   rF   )r  rt   rn  r
  r  r  r  r  r  Mr  r  KZKvr  Zlogsumexp_dimZ
logsum_expseedoffsetr)   r)   r*   "meta__scaled_dot_product_efficient   s(   





r  grad_input_maskc                 C   s   | d}| d}| d}| d}| d}| d}tj||||fd|j|jd}tj||||fd|j|jd}tj||||fd|j|jd}d }|d urd|
d rdtj|  | |j|jd}||||fS )Nr   rv   r   rP   r  r  )rS   r.   r  r5   ra   rC  r|   )r  r  rt   rn  r
  r   r  r  r  r  r  r  r  r   r  r  r  Z
head_dim_vr   r  r  r  	grad_biasr)   r)   r*   +meta__scaled_dot_product_efficient_backwardL  sB   








r  c                 C   s    t | ||||dd | | jS NT)r  r  rK   rO   rB   r   r   r   r)   r)   r*   meta_scatter_reduce_two  s   r  c                 C   s   t | ||||dd | S r  r  r  r)   r)   r*   meta_scatter_reduce__two  s   r  c                   sh   t d    k odkn   fdd   dkr&t j|t j jdS t j d|t j jdS )Nr   r   c                      r~  )Nz@The probabilty distributions dimensions must be 1 or 2, but got rN   r)   r  r)   r*   r:     r  z"meta_multinomial.<locals>.<lambda>rv   r  )r.   r=   rO   rb   rF   ra   rS   )r   Znum_samplesreplacementr   r)   r  r*   meta_multinomial  s   
r  c                 C   s   d}| D ]}||9 }q|S r   r)   )vsri  vr)   r)   r*   multiply_integers  s   
r  c                    s   t tkfdd d  t t k fdd t tdd dd  D o9tdd D fdd d d \}}||gR S )Nc                         d  dt  S )Nz%It is expected output_size equals to , but got size r  r)   )num_spatial_dimsr  r)   r*   r:     rQ  z'upsample_common_check.<locals>.<lambda>r   c                      r  )Nz$It is expected input_size equals to r  r  r)   )expected_input_dimsr  r)   r*   r:     rQ  c                 s       | ]}|d kV  qdS r   Nr)   r  r)   r)   r*   r    r  z(upsample_common_check.<locals>.<genexpr>c                      r  )NzDInput and output sizes should be greater than 0, but got input size z and output size r)   r)   )r  r  r)   r*   r:     s
    )r.   r=   rx   r  )r  r  r   r  Zchannelsr)   )r!  r  r   r  r*   upsample_common_check  s   

*r$  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   rv   c                      r~  )Nz>Non-empty 3D data tensor expected but got a tensor with sizes rR   r)   r  r)   r*   r:     r  z$upsample_nearest1d.<locals>.<lambda>r   r   
r.   r=   rH   r  rS   r$  rI   r   r   r6  )r   r  scalesfull_output_sizer)   r  r*   upsample_nearest1d     


r)  c           	         s   t   dkpt  dd   fdd t  |dd} |}t } j	\}}}} j
jdkr?|dk r?t j}|j|d	}|S )
Nr   rv   c                      r~  Nz>Non-empty 4D data tensor expected but got a tensor with sizes rR   r)   r  r)   r*   r:     r  z$upsample_nearest2d.<locals>.<lambda>r   r%  rx  r  r   )r.   r=   rH   r  rS   r$  rI   r   r6  rJ   ra   r%  r   rf   )	r   r  scales_hscales_wr(  r   r   rl   Z
n_channelsr)   r  r*   upsample_nearest2d  s   



r.  r  r  r,  r-  c                    st   t ||dd tjdkfdd tdD ]t  k fdd q|jt	dS )Nr   r%  r  c                      r?   )NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r.  r)   r  r)   r*   r:     rC   z-upsample_nearest2d_backward.<locals>.<lambda>c                
      s&   d d   d d  S )NzCExpected grad_output to have the same shape as output; output.size(z) = z but got grad_output.size(rR   r)   r(  r  r   r)   r*   r:     s   r   )
r$  r.   r=   rV   rz   rS   rI   r   r   r6  )r  r  r  r,  r-  r)   r/  r*   upsample_nearest2d_backward  s   

	r0  c                    sZ   t   dkpt  dd   fdd t  |dd} |jt	 dS )Nr   rv   c                      r~  )Nz>Non-empty 5D data tensor expected but got a tensor with sizes rR   r)   r  r)   r*   r:     r  z$upsample_nearest3d.<locals>.<lambda>rP   r%  r   r&  )r   r  Zscales_dr,  r-  r(  r)   r  r*   upsample_nearest3d  r*  r1  c           
      C   s   t | t j| t jd}}|d urQ|d urQt|tsJ t|ts$J |j}| }	t||}t||}|||	 |||	 t	||d t	||d ||fS ||fS )Nr@   )r  r  )
r.   r[   rc   r   r   rJ   r|   r   r   r   )
rK   stablerO   
descendingr^   r_   r  r   rW   Z
out_strider)   r)   r*   	meta_sort  s   	

r4  )rO   r3  c                C   s   t | |||dd S )N)r2  rO   r3  rv   )r4  )rK   r2  rO   r3  r)   r)   r*   meta_argsort*  s   r5  c                    s  t jdkfdd t jjkfdd dd urPt jdkfdd t  kfdd t jjkfdd t jdkfd	d d
   t   k fdd t tfddfD dd  d S )Nr   c                          j  dS Nz != 2r.  r)   input_gatesr)   r*   r:   2  rC   z%rnn_cell_checkSizes.<locals>.<lambda>c                         j  d j  S N != rO  r)   )hidden_gatesr9  r)   r*   r:   5      rv   c                      r6  )Nz != 1r.  r)   )
input_biasr)   r*   r:   9  rC   c                      s      d  S r;  r  r)   )
gates_sizer?  r)   r*   r:   <  r>  c                      r:  r;  rO  r)   )hidden_biasr?  r)   r*   r:   @  r>  c                      r6  r7  r.  r)   )prev_hiddenr)   r*   r:   B  rC   r   c                
      s,      dd d d d  d
S )Nr<  r   z * z // z (aka r  )rH   rS   r)   )expected_prev_hidden_numelfactorr@  r9  rB  r)   r*   r:   F  s   , c                 3   s    | ]	}|j  j kV  qd S r$   r   r  r8  r)   r*   r  I  s
    

z&rnn_cell_checkSizes.<locals>.<genexpr>c                   S   rD   )Nz%expected all inputs to be same devicer)   r)   r)   r)   r*   r:   M  rE   )r.   r=   rV   rJ   rS   rH   r  )r9  r=  r?  rA  rD  rB  r)   )rC  rD  r@  rA  r=  r?  r9  rB  r*   rnn_cell_checkSizes/  s8   





rE  c                 C   sL   t | |||d| tj| tjd}tj|tjd}tj|tjd}|||fS )Nr  r   )rE  r.   r[   r   )r9  r=  cxr?  rA  	workspacehycyr)   r)   r*   _thnn_fused_lstm_cell_metaQ  s
   
rJ  c                 C   s(  t |dk}|rt |}|d }| jd }n|
r| jd n| jd }|
r)| jd n| jd }d}|r4dnd}|dkr<|n|}|rG||| g}n|
rP|||| gn|||| g}| |}|	| ||g}|d u rptjd| jd}n||}||	| ||g}|rdnd}| j|tjd}|||||fS )Nr   rv   rM   r   r   r@   )rx   rJ   rI   r.   rb   ra   uint8)r   r'  Zweight_stride0Z
weight_bufhxrF  rg  hidden_sizeZ	proj_size
num_layersbatch_firstZdropouttrainbidirectionalbatch_sizesZdropout_stateZis_input_packed
seq_length
mini_batchZbatch_sizes_sumZnum_directionsZout_sizerW   r   Z
cell_shaperI  rH  Zreserve_shapeZreserver)   r)   r*   
_cudnn_rnn\  s2   

rU  c                 C   s   |r| j d n| j d }|r| j d n| j d }|
}|r!|||gn|||g}| |}|d u r8tjd| jd}n||j }|d u rKtjd| jd}n||j }tjd| jtjd}||||fS )Nrv   r   r   r`   )rJ   rI   r.   rb   ra   rK  )r   Zw0Zw1Zw2Zw3hx_Zcx_ru   rR  rg  rM  rN  
has_biasesrQ  rO  rP  rS  rT  Zoutput_chanelsrW   r   rH  rI  rG  r)   r)   r*   mkldnn_rnn_layer  s    
rX  c                    sT   | j dkrt dkp dk fdd d S t|  dk fdd d S )Nr   rM   c                      r
  )Nz4: Expected reduction dim -1 or 0 for scalar but got r)   r)   rO   r  r)   r*   r:     r  z'zero_numel_check_dims.<locals>.<lambda>c                      r  )Nz: Expected reduction dim z to have non-zero size.r)   r)   rY  r)   r*   r:     r;   )rV   r.   rG   rS   )rK   rO   r  r)   rY  r*   zero_numel_check_dims  s   
rZ  c                    sF   |d urt || }t||  d S t| dk fdd d S )Nr   c                      r  )Nz@: Expected reduction dim to be specified for input.numel() == 0.r)   r)   r`  r)   r*   r:     r  z%check_argmax_argmin.<locals>.<lambda>)rd   rO   rZ  r.   r=   rH   )r   rK   rO   r)   r`  r*   check_argmax_argmin  s   

r[  c                 C   sD   t d| | t| j|d ur|fnd }t| ||}| j|tjdS )Nargmaxr@   )r[  r   r   rJ   r   rI   r.   rc   )rK   rO   r   r"  rJ   r)   r)   r*   argmax_argmin_meta  s   r]  c                 C   s   t jd||||dS )Nr)   r   r   )r-  r5   r   ra   r   r)   r)   r*   scalar_tensor  r   r^  c                 C   s   t ||  dd}t|dko||  dkr| |ndkdd  |  dkr*dn| |}t|dko8||kdd  t| j}t|dkrL|||< | || j|tj	dfS )	NT)r  r   rv   c                   S   rD   )Nzselected index k out of ranger)   r)   r)   r)   r*   r:     rE   ztopk_meta.<locals>.<lambda>c                   S   rD   )Nzk not in range for dimensionr)   r)   r)   r)   r*   r:     rE   r@   )
rd   rO   r.   r=   rS   ry   rJ   rx   rI   rc   )rK   rZ  rO   ZlargestsortedZ	sliceSizeZtopKSizer)   r)   r*   	topk_meta  s   $
r`  c                 C   s   | d ur| n|}t | dkdd  | }| d ur(t |  |kdd  |d ur8t | |kdd  t | |kdd  t | |kdd  t | dkdd  t | |d	 |d
  d kdd  d S )Nr   c                   S   rD   N r)   r)   r)   r)   r*   r:     rE   z(checkLSTMBackwardSizes.<locals>.<lambda>c                   S   rD   ra  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   ra  r)   r)   r)   r)   r*   r:      rE   c                   S   rD   ra  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   ra  r)   r)   r)   r)   r*   r:     rE   c                   S   rD   ra  r)   r)   r)   r)   r*   r:     rE   r   rv   r  c                   S   rD   ra  r)   r)   r)   r)   r*   r:     rE   )r.   r=   rO   rS   rH   )grad_hygrad_cyrF  rI  rG  Zdefined_gradZexp_sizer)   r)   r*   checkLSTMBackwardSizes  s   ,re  c           	      C   s`   | d u r
|d u r
dS t | |||| tj|td}tj|td}|r)|jdddnd }|||fS )NNNNr   r   F)r   )re  r.   r[   legacy_contiguous_memory_formatr  )	rc  rd  rF  rI  rG  Zhas_biasZ
grad_gatesZgrad_cxr  r)   r)   r*   #_thnn_fused_lstm_cell_backward_impl  s   
rh  c                    s   t jdkrjd ||  dksJ dj d| dd   fdd	}jd ||  }jd
 | }jd | }g jd d |||R }|}|j| d}|S )Nr   rk  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 S   r3  r$   r4  r8  r)   r)   r*   r:    r;  z,meta_pixel_shuffle.<locals>.is_channels_lastc                      sL    rt dkrtjS tjS jtjdrtjS jtjdr$tjS d S r=  )ry  r.   r   r7  r  r>  r)   r:  rK   r)   r*   r@    s   z.meta_pixel_shuffle.<locals>.pick_memory_formatr   rM   r   )rx   rJ   rI   r   )rK   Zupscale_factorr@  r  ZHrZWrrW   r   r)   ri  r*   meta_pixel_shuffle  s   & 
rj  c                 C   sZ   |  | j}| |j}| |j}| |j}| |j}| |j}|||||||fS r$   rI  )r   Zweight0Zweight1Zweight2Zweight3rV  Zcx_tmpr   Zhy_Zcy_Zgrad_output_r_optZgrad_hy_r_optZgrad_cy_r_optru   rg  rM  rN  rW  rP  rQ  rR  rO  rG  Zdiff_xZdiff_hxZdiff_cxZdiff_w1Zdiff_w2Zdiff_br)   r)   r*   mkldnn_rnn_layer_backward3  s   rk  )	out_int32r   c                C   s   t j| |rt jnt jd S r   )r.   r[   r6  rc   rf   )rK   Z
boundariesrl  r   r)   r)   r*   meta_bucketizeV  s
   rm  c                    sd   t   |dd}t  dkptdd   dd  D  fdd  |jt	 d	S )
Nr   r%  r   c                 s   r"  r#  r)   )rk   rS   r)   r)   r*   r  f  r  z.meta_upsample_bilinear2d_aa.<locals>.<genexpr>rv   c                      r~  r+  rR   r)   r  r)   r*   r:   g  r  z-meta_upsample_bilinear2d_aa.<locals>.<lambda>r   )
r$  rS   r.   r=   rH   r  rI   r   r   r6  )r   r  r  r,  r-  r(  r)   r  r*   meta_upsample_bilinear2d_aa^  s   
(

rn  c                 C   s\   t | dkdd  t | dkdd  t |jjdd  t |jjdd  d S )Nrv   c                   S   rD   )Nz%found_inf must be a 1-element tensor.r)   r)   r)   r)   r*   r:   r  rE   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>c                   S   rD   )Nz%inv_scale must be a 1-element tensor.r)   r)   r)   r)   r*   r:   u  rE   c                   S   rD   )Nz!found_inf must be a float tensor.r)   r)   r)   r)   r*   r:   y  rE   c                   S   rD   )Nz!inv_scale must be a float tensor.r)   r)   r)   r)   r*   r:   }  rE   )r.   r=   rH   r5   r   )rK   r
  Z	inv_scaler)   r)   r*   *_amp_foreach_non_finite_check_and_unscale_o  s   ro  c                 C   s   t |  }| |S r$   )ry   rS   rI   )rK   nanZposinfZneginfr   r)   r)   r*   
nan_to_num  s   
rq  c                 C   s   | j tjtjtjtjhvsJ d| j  d| j}t||}t||}||kr)| S t| 	 }t| 
 }|| || ||< ||< || || ||< ||< | || | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)r   r.   r  Z
sparse_cscr  Z
sparse_bscrV   rd   ry   rS   r|   r   )rK   Zdim0r  ndimsrS   r|   r)   r)   r*   r    s&   

r  c                 C   sz   | j }| jr"|  }|  }|dkr|dks!J d| d| dn|  dks0J d| dt| d|dk r:dS dS )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r  rv   )rV   r  r  r  rO   r  )rK   rr  r  r  r)   r)   r*   t_  s   
rs  )rl  r   sidesorterc                C   s@   |rt jnt j}t|t jrt j||d S t jd|| jdS )Nr@   r)   r  )	r.   r6  rc   r   r
   r[   rf   rb   ra   )Zsorted_sequencerK   rl  r   rt  ru  r5   r)   r)   r*   meta_searchsorted  s   rv  r   c                 C   s4   t | dkdd  t|tjd\}}t j||dS )Nr   c                   S   rD   )Nz,polygamma(n, x) does not support negative n.r)   r)   r)   r)   r*   r:     rE   z meta_polygamma.<locals>.<lambda>r   r@   )r.   r=   r   r   r   r[   )r   rK   rl   r   r)   r)   r*   meta_polygamma  s   
rw  c                 C      t | t dd }|S )Nc                 S   rP  rQ  )r   r   r   ro   r)   r)   r*   _f  s   z)_create_unary_float_meta_func.<locals>._fr-   r   funcry  r)   r)   r*   _create_unary_float_meta_func  s   r}  c                 C   rx  )Nc                 S   s   t | |\} }t| |tjdS rQ  )r   r   r   r   )rp   rv  r)   r)   r*   ry    s   z*_create_binary_float_meta_func.<locals>._frz  r{  r)   r)   r*   _create_binary_float_meta_func  s   r~  c                  C   s
  i } dD ]}t | }|D ]}|| vr|| | |< qq|  D ]d\}}t|ts)J |tjjj| tj	|
 drJ|t d v rIt| dq|jrNq|
 dv rUqd|
 v rbt|| qd|
 v rot|| qd|
 v r|t|| qt|| qd S )	N)r   Zpost_autogradZpre_autogradZCompositeImplicitAutogradr   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   zaten::rot90zaten::clonezaten::copy_zaten::_to_copyzaten::empty_stridedzaten::constant_pad_ndzaten::as_strided_scatterzmkldnn::zmkl::zonednn::)r   itemsr   r   Zpy_implr.   _CZDispatchKeyr#   Z%_dispatch_has_kernel_for_dispatch_keyr   r0  Zis_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn'_meta_lib_dont_use_me_use_register_meta)Zactivate_meta_tabler%  registryZopoZop_overloadr(   r)   r)   r*   activate_meta  s>   	r  ri   )NNrf  )Tr  )r  )r  T)FF)TT)rh  )FTN)TFF)TF)r   )r  N)r   r$  r$   )r)   rw   FTN)Fr   FNFrM   )NF)rM   F)r)   rw   rH  F)NNNNN)r   NNrv   )NNF)r  FFN)r  FN)FN)NrM   FNN)NNNN)rM   TT(  r)  enumr   typingr   r   r   r   r   r.   Ztorch._prims_commonr5  r   r   r	   r
   Ztorch._decompr   r   r   r   Z
torch._opsr   Ztorch._primsr   r   r   r   r   r   r   r   r   Ztorch._prims_common.wrappersr   r   r   r   r  r   r   Z%torch.fx.experimental.symbolic_shapesr   r    Ztorch.utils._pytreer!   opsr"   libraryLibraryr  r-   r6   r>   Ztaker   r   rL   rX   r]   ZcummaxZcumminre   rg   r   Z_fft_c2cr   Z_fft_r2cr   ZrandpermZgenerator_outr   rF   r   randintr   r   r   Zrandr   Z_fft_c2rr   r   r   r   Z
unsqueeze_r   Zindex_reducerK  r$  r  r   Zindex_reduce_r   r   r   r   r   Z	unary_outr   rO   r   r   r   r   r   r   r   Z_assert_asyncr   msgr   Z_make_dep_tokenr   r   Z_functional_sym_constrain_ranger   r   Z(_functional_sym_constrain_range_for_sizer   Z_functional_assert_asyncr   rY   r  rZ   r  r  r  r!  Z_linalg_eighr"  r'  r*  r+  r/  r1  r4  r8  rD  Zlinalg_inv_exrF  Zlinalg_ldl_factor_exrL  Zlinalg_ldl_solverU  Z	linalg_lur[  Zlinalg_lu_factor_exr]  Zlinalg_lu_solvera  Z	lu_unpackrf  rn  Z	linalg_qrrq  rt  rr  Z_linalg_svdr|  rS  r0  r  r  Zlinalg_solve_triangularr  r  r  Z_linalg_detr  r  r  r  Zreflection_pad1dr  Zreplication_pad1dr  r  Zreflection_pad1d_backwardr  Zreplication_pad1d_backwardr  r  Zreflection_pad2dr  Zreplication_pad2dr  Zreflection_pad2d_backwardr  Zreplication_pad2d_backwardr  r  Zreflection_pad3dr  Zreplication_pad3dr  Zreflection_pad3d_backwardZreplication_pad3d_backwardr  Z_pdist_forwardr1   r  Z_pdist_backwardr  Zbaddbmmr  Z	bernoullir  Z
bernoulli_r  r  r  Z_fused_moving_avg_obs_fq_helperr  r  r  r  mmr  r   ry  r2  r:  ZconvolutionrB  r  Z_has_mkldnnr  rC  Z_convolution_pointwiserG  Z_linear_pointwiserJ  Zhas_mklr  rK  Z_mkl_linearrL  r  rM  Zqconv2d_pointwiserW  Zqlinear_pointwiserX  r\  Z
avg_pool2dr}  r  Zavg_pool2d_backwardr  Z
avg_pool3dr  Zavg_pool3d_backwardr  Z_adaptive_avg_pool2dr  Z_adaptive_avg_pool3dr  Z_adaptive_avg_pool2d_backwardr  Z_adaptive_avg_pool3d_backwardr  r  Zadaptive_max_pool2dr  r  r  Zadaptive_max_pool3dr  r  r  Zrepeat_interleaver  complexr  rq  r5   r  r  r  rB   Z_unsafe_indexr  Zconvolution_backwardr  Zaddbmmr  Z_foreach_neg_Z_foreach_reciprocal_Z_foreach_sqrt_Z_foreach_sign_r  Z_foreach_negZ_foreach_reciprocalZ_foreach_sqrtZ_foreach_signr  r  Z_foreach_addZ_foreach_subZ_foreach_mulZ_foreach_divZ_foreach_maximumZ_foreach_minimumr  Z_foreach_add_Z_foreach_sub_Z_foreach_mul_Z_foreach_div_Z_foreach_maximum_Z_foreach_minimum_r  ZScalarr  r  Z_foreach_addcdiv_Z_foreach_addcmul_r  Z_foreach_lerp_r  Z_foreach_addcdivZ_foreach_addcmulr   Z_foreach_powZScalarAndTensorr  r  Z_foreach_copy_r  Z_fused_adam_r  Z_fused_adamr  Z_int_mmr  Z_cdist_forwardr(  Z_cdist_backwardr-  Z_embedding_bagr>  Z_embedding_bag_forward_onlyr@  rB  ZnansumrC  ZmedianZ	nanmedianrD  Z
dim_valuesrg  r^   rF  Zlogical_not_rG  repeatrJ  Zzero_rK  Zmul_Zdiv_Zlogical_and_Zlogical_or_Zlogical_xor_rN  Zadd_Zsub_rO  roundZdecimalsrU  rY  
__rshift__r^  
__lshift__ra  zerorc  r  rf  fillrh  Zrelu_rj  Z	index_putZ_unsafe_index_putrm  Zmasked_fill_ro  Z
index_put_rp  aliasrr  rt  Zbmmru  rx  r{  rm  rn  r  r  r  r  Z max_pool2d_with_indices_backwardr  Zmax_pool2d_with_indicesr  Zmax_unpool2dr  r  Zmax_unpool3dr  Zmax_pool3d_with_indicesr  Z max_pool3d_with_indices_backwardr  r  r  r  Zgrid_sampler_2d_backwardr  r  r  r  r+  r  r  Zselect_scatterr  Zslice_scatterr  rd   r  r  gatherr  r  r  r  r  r  Zscatter_addr  Zscatter_add_r  r  r   rn  r   Zvalue_reducer  Zscatter_r  Z#_scaled_dot_product_flash_attentionr  Z,_scaled_dot_product_flash_attention_backwardr	  Z'_scaled_dot_product_efficient_attentionr  Z0_scaled_dot_product_efficient_attention_backwardr  Zscatter_reducetwoZtwo_outr  Zscatter_reduce_r  Zmultinomialr  r  r$  r)  r.  r0  typesZSymIntr1  r}   r2  Zvalues_stabler4  Zargsortr5  rE  Z_thnn_fused_lstm_cellrJ  rU  rX  rZ  r[  r\  Zargminr]  r^  Ztopkr`  r   rg  re  rh  Zpixel_shufflerj  rk  Z	bucketizeZ
Tensor_outrm  Z_upsample_bilinear2d_aarn  ro  rq  r  rs  Zsearchsortedrv  Z	polygammarw  r}  r~  Zspecial_airy_aiZspecial_bessel_y0Zspecial_bessel_y1Zspecial_modified_bessel_i0Zspecial_modified_bessel_i1Zspecial_modified_bessel_k0Zspecial_modified_bessel_k1Z!special_scaled_modified_bessel_k0Z!special_scaled_modified_bessel_k1Zspecial_chebyshev_polynomial_tZn_scalar_outZspecial_chebyshev_polynomial_uZspecial_hermite_polynomial_hZspecial_hermite_polynomial_heZspecial_laguerre_polynomial_lZtorch._refs.nn.functionalZtorch._refs.specialr  r)   r)   r)   r*   <module>   sP	   $
		6








	







	

	















)


"

2
&
*
7
(
"
%


	
;

/
Z&5
 ?'$,




e
	
'
!
M
,HT
N


.

*&c 














$
#h	







!
T	
]>	
6G+
!7
/

ge( 

	,$/	








	[	
-'	
5


"
7'
"


"	






9