o
    iz                     @   s   d dl Z ddlmZmZ ddlmZ ddlmZ dd Zeddd	 ied
dd	 ied
ejfddZ	eddd	 ied
dd	 ied
ejfddZ
G dd de jjZejZdS )    N   )
heuristicsjit)languagenext_power_of_2c                 C   s   | dk rdS | dk rdS dS )Ni      i           )Nr   r   g/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/triton/ops/cross_entropy.py	num_warps   s
   r   c                 C      t | d S Nr   r   nargsr   r   r   <lambda>       r   BLOCKc                 C   r   r   r   r   r   r   r   r      r   c                 C   s   t d}t d|}t || }| ||  | } |||  | }	|||  | }
t j| ||k td d}|t j}|t |d }t t 	t 
|d| }t j|	|||k d t   t |
}t || | d S Nr   inf)maskother)r   )tl
program_idarangeloadfloattofloat32maxlogsumexpstoreZdebug_barrier)ZLOGITSPROBSIDXZLOSSr   r   rowcolsidxZ
WRIT_PROBSZ
READ_PROBSlogitsprobsr   r   r   _forward   s   

r.   c                 C   r   r   r   r   r   r   r   r   )   r   c                 C   r   r   r   r   r   r   r   r   *   r   c                 C   s   t d}t d|}t || }| ||  | } t j| ||k tdd }t |t j}||k}	t || }
||	 |
 }t j| || j	j
||k d d S r   )r   r   r   r   r   r%   r    r!   r&   dtypeZ
element_ty)r'   r(   ZDPROBSr   r   r)   r*   r+   r-   deltaZdoutZdinr   r   r   	_backward)   s   
"r1   c                   @   s$   e Zd Zedd Zedd ZdS )_cross_entropyc           	         s~   |j tjks
J d j j }} jd tj|||d}tj ||d} fdd}t|  ||| ||| |S )Nz(Indices are expected to be of type long.)r/   devicec                    s       fS NZnumeloptr,   n_colsr   r   r   H       z(_cross_entropy.forward.<locals>.<lambda>)r/   torchZint64r4   shapeZ
empty_liker.   Zsave_for_backward)	clsctxr,   indicesr4   r/   resultneg_logprobsgridr   r9   r   forward>   s   
z_cross_entropy.forwardc                    s<   |j \}jd   fdd}t| ||  dfS )a  We know d(-log(p[i])/dlogit[k] = -id_mat[i,k] + p[k]
        so we initialize the gradient as neg_logprobs, so we can just exponentiate
        to get p[k], which is most of what we need...  neg_logprobs will be
        modified in place to become the gradient we want
        r3   c                    s       fS r5   r6   r7   r:   rB   r   r   r   Z   r;   z)_cross_entropy.backward.<locals>.<lambda>N)Zsaved_tensorsr=   r1   )r>   r?   Zdneg_logprobsr@   rC   r   rE   r   backwardN   s
   

z_cross_entropy.backwardN)__name__
__module____qualname__classmethodrD   rF   r   r   r   r   r2   =   s
    
r2   )r<    r   r   r   r   r   r   Z	constexprr.   r1   ZautogradFunctionr2   applyZcross_entropyr   r   r   r   <module>   s    
"