o
    im0                     @   sP  d Z ddlmZ ddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ G dd	 d	eZG d
d dZG dd dZG dd deZG dd deZG dd deZe Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Z G dd deZ!G dd deZ"G dd deZ#edd Z$dS )zf
Implements the cuda module as called from within an executing kernel
(@cuda.jit-decorated function).
    )contextmanagerN)types)numpy_support   )vector_typesc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )Dim3z;
    Used to implement thread/block indices/dimensions
    c                 C   s   || _ || _|| _d S Nxyz)selfr
   r   r    r   m/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/numba/cuda/simulator/kernelapi.py__init__   s   
zDim3.__init__c                 C      d| j | j| jf S )Nz(%s, %s, %s)r	   r   r   r   r   __str__      zDim3.__str__c                 C   r   )NzDim3(%s, %s, %s)r	   r   r   r   r   __repr__   r   zDim3.__repr__c                 c   s    | j V  | jV  | jV  d S r   r	   r   r   r   r   __iter__!   s   zDim3.__iter__N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r      s    r   c                   @      e Zd ZdZdd ZdS )	GridGroupz+
    Used to implement the grid group.
    c                 C      t    d S r   	threadingcurrent_threadsyncthreadsr   r   r   r   sync,   s   zGridGroup.syncN)r   r   r   r   r"   r   r   r   r   r   '   s    r   c                   @   r   )
FakeCUDACgz!
    CUDA Cooperative Groups
    c                 C   s   t  S r   )r   r   r   r   r   	this_grid7      zFakeCUDACg.this_gridN)r   r   r   r   r$   r   r   r   r   r#   3       r#   c                   @   r   )FakeCUDALocalz
    CUDA Local arrays
    c                 C   s"   t |tjrt|}t||S r   )
isinstancer   Typer   as_dtypenpempty)r   shapedtyper   r   r   array?   s   
zFakeCUDALocal.arrayN)r   r   r   r   r/   r   r   r   r   r'   ;   r&   r'   c                   @   r   )FakeCUDAConstz
    CUDA Const arrays
    c                 C   s   |S r   r   )r   Zaryr   r   r   
array_likeI   s   zFakeCUDAConst.array_likeN)r   r   r   r   r1   r   r   r   r   r0   E   r&   r0   c                   @   s    e Zd ZdZdd Zdd ZdS )FakeCUDAShareda  
    CUDA Shared arrays.

    Limitations: assumes that only one call to cuda.shared.array is on a line,
    and that that line is only executed once per thread. i.e.::

        a = cuda.shared.array(...); b = cuda.shared.array(...)

    will erroneously alias a and b, and::

        for i in range(10):
            sharedarrs[i] = cuda.shared.array(...)

    will alias all arrays created at that point (though it is not certain that
    this would be supported by Numba anyway).
    c                 C   s"   i | _ || _tj|tjd| _d S N)r.   )_allocations_dynshared_sizer+   Zzerosbyte
_dynshared)r   dynshared_sizer   r   r   r   _   s   zFakeCUDAShared.__init__c                 C   s   t |tjrt|}|dkr| j|j }tj| j	j
||dS tt }|d dd }| j|}|d u rCt||}|| j|< |S )Nr   )r.   count   )r(   r   r)   r   r*   r5   itemsizer+   Z
frombufferr7   data	tracebackextract_stacksys	_getframer4   getr,   )r   r-   r.   r9   stackZcallerresr   r   r   r/   d   s   

zFakeCUDAShared.arrayN)r   r   r   r   r   r/   r   r   r   r   r2   M   s    r2   c                   @   s|   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )FakeCUDAAtomicc                 C   sB   t  || }||  |7  < W d    |S 1 sw   Y  |S r   )addlockr   r/   indexvaloldr   r   r   add      
zFakeCUDAAtomic.addc                 C   sB   t  || }||  |8  < W d    |S 1 sw   Y  |S r   )sublockrG   r   r   r   sub   rL   zFakeCUDAAtomic.subc                 C   sB   t  || }||  |M  < W d    |S 1 sw   Y  |S r   )andlockrG   r   r   r   and_   rL   zFakeCUDAAtomic.and_c                 C   sB   t  || }||  |O  < W d    |S 1 sw   Y  |S r   )orlockrG   r   r   r   or_   rL   zFakeCUDAAtomic.or_c                 C   sB   t  || }||  |N  < W d    |S 1 sw   Y  |S r   )xorlockrG   r   r   r   xor   rL   zFakeCUDAAtomic.xorc                 C   sd   t & || }||krd||< n||  d7  < W d    |S W d    |S 1 s+w   Y  |S Nr   r   )inclockrG   r   r   r   inc   s   


zFakeCUDAAtomic.incc                 C   sl   t * || }|dks||kr|||< n||  d8  < W d    |S W d    |S 1 s/w   Y  |S rU   )declockrG   r   r   r   dec   s   


zFakeCUDAAtomic.decc                 C   s:   t  || }|||< W d    |S 1 sw   Y  |S r   )exchlockrG   r   r   r   exch   s   

zFakeCUDAAtomic.exchc                 C   @   t  || }t||||< W d    |S 1 sw   Y  |S r   )maxlockmaxrG   r   r   r   r^         
zFakeCUDAAtomic.maxc                 C   r\   r   )minlockminrG   r   r   r   ra      r_   zFakeCUDAAtomic.minc                 C   H   t  || }t|| |g||< W d    |S 1 sw   Y  |S r   )r]   r+   nanmaxrG   r   r   r   rc         
zFakeCUDAAtomic.nanmaxc                 C   rb   r   )r`   r+   nanminrG   r   r   r   re      rd   zFakeCUDAAtomic.nanminc                 C   sN   t  d|j }|| }||kr|||< |W  d    S 1 s w   Y  d S )N)r   )compare_and_swaplockndim)r   r/   rJ   rI   rH   loadedr   r   r   compare_and_swap   s   
$zFakeCUDAAtomic.compare_and_swapc                 C   sD   t  || }||kr|||< |W  d    S 1 sw   Y  d S r   )caslock)r   r/   rH   rJ   rI   rh   r   r   r   cas   s   $zFakeCUDAAtomic.casN)r   r   r   rK   rN   rP   rR   rT   rW   rY   r[   r^   ra   rc   re   ri   rk   r   r   r   r   rE      s    		rE   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Zd;d< Z d=S )>FakeCUDAFp16c                 C   s   || S r   r   r   abr   r   r   hadd      zFakeCUDAFp16.haddc                 C   s   || S r   r   rm   r   r   r   hsub   rq   zFakeCUDAFp16.hsubc                 C   s   || S r   r   rm   r   r   r   hmul   rq   zFakeCUDAFp16.hmulc                 C   s   || S r   r   rm   r   r   r   hdiv   rq   zFakeCUDAFp16.hdivc                 C      || | S r   r   r   rn   ro   cr   r   r   hfma      zFakeCUDAFp16.hfmac                 C   s   | S r   r   r   rn   r   r   r   hneg   r%   zFakeCUDAFp16.hnegc                 C   s   t |S r   )absrz   r   r   r   habs   rq   zFakeCUDAFp16.habsc                 C      t j|t jdS r3   )r+   sinfloat16r   r
   r   r   r   hsin      zFakeCUDAFp16.hsinc                 C   r~   r3   )r+   cosr   r   r   r   r   hcos  r   zFakeCUDAFp16.hcosc                 C   r~   r3   )r+   logr   r   r   r   r   hlog  r   zFakeCUDAFp16.hlogc                 C   r~   r3   )r+   log2r   r   r   r   r   hlog2  r   zFakeCUDAFp16.hlog2c                 C   r~   r3   )r+   log10r   r   r   r   r   hlog10  r   zFakeCUDAFp16.hlog10c                 C   r~   r3   )r+   expr   r   r   r   r   hexp  r   zFakeCUDAFp16.hexpc                 C   r~   r3   )r+   Zexp2r   r   r   r   r   hexp2  r   zFakeCUDAFp16.hexp2c                 C   s   t d| S )N
   r+   r   r   r   r   r   hexp10     zFakeCUDAFp16.hexp10c                 C   r~   r3   )r+   sqrtr   r   r   r   r   hsqrt  r   zFakeCUDAFp16.hsqrtc                 C   s   t |d S )Ng      r   r   r   r   r   hrsqrt  r   zFakeCUDAFp16.hrsqrtc                 C   r~   r3   r+   ceilr   r   r   r   r   hceil  r   zFakeCUDAFp16.hceilc                 C   r~   r3   r   r   r   r   r   hfloor   r   zFakeCUDAFp16.hfloorc                 C   r~   r3   )r+   Z
reciprocalr   r   r   r   r   hrcp#  r   zFakeCUDAFp16.hrcpc                 C   r~   r3   )r+   truncr   r   r   r   r   htrunc&  r   zFakeCUDAFp16.htruncc                 C   r~   r3   )r+   Zrintr   r   r   r   r   hrint)  r   zFakeCUDAFp16.hrintc                 C   s   ||kS r   r   rm   r   r   r   heq,  rq   zFakeCUDAFp16.heqc                 C   s   ||kS r   r   rm   r   r   r   hne/  rq   zFakeCUDAFp16.hnec                 C   s   ||kS r   r   rm   r   r   r   hge2  rq   zFakeCUDAFp16.hgec                 C   s   ||kS r   r   rm   r   r   r   hgt5  rq   zFakeCUDAFp16.hgtc                 C   s   ||kS r   r   rm   r   r   r   hle8  rq   zFakeCUDAFp16.hlec                 C   s   ||k S r   r   rm   r   r   r   hlt;  rq   zFakeCUDAFp16.hltc                 C   
   t ||S r   )r^   rm   r   r   r   hmax>     
zFakeCUDAFp16.hmaxc                 C   r   r   )ra   rm   r   r   r   hminA  r   zFakeCUDAFp16.hminN)!r   r   r   rp   rr   rs   rt   rx   r{   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rl      s>    rl   c                   @   s  e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	edd Z
edd Zedd Zedd Zedd Zedd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8S )9FakeCUDAModulea7  
    An instance of this class will be injected into the __globals__ for an
    executing function in order to implement calls to cuda.*. This will fail to
    work correctly if the user code does::

        from numba import cuda as something_else

    In other words, the CUDA module must be called cuda.
    c                 C   s   t | | _t | | _t | _t | _t|| _t	 | _
t | _t | _t D ]\}}t| || |jD ]}t| || q4q'd S r   )r   gridDimblockDimr#   _cgr'   _localr2   _sharedr0   _constrE   _atomicrl   _fp16r   itemssetattraliases)r   Zgrid_dimZ	block_dimr8   nameZsvtyaliasr   r   r   r   P  s   



zFakeCUDAModule.__init__c                 C      | j S r   )r   r   r   r   r   cgc     zFakeCUDAModule.cgc                 C   r   r   )r   r   r   r   r   localg  r   zFakeCUDAModule.localc                 C   r   r   )r   r   r   r   r   sharedk  r   zFakeCUDAModule.sharedc                 C   r   r   )r   r   r   r   r   consto  r   zFakeCUDAModule.constc                 C   r   r   )r   r   r   r   r   atomics  r   zFakeCUDAModule.atomicc                 C   r   r   )r   r   r   r   r   fp16w  r   zFakeCUDAModule.fp16c                 C   
   t  jS r   )r   r    	threadIdxr   r   r   r   r   {     
zFakeCUDAModule.threadIdxc                 C   r   r   )r   r    blockIdxr   r   r   r   r     r   zFakeCUDAModule.blockIdxc                 C   s   dS N    r   r   r   r   r   warpsize     zFakeCUDAModule.warpsizec                 C   s   t  jd S r   )r   r    	thread_idr   r   r   r   laneid  s   zFakeCUDAModule.laneidc                 C   r   r   r   r   r   r   r   r!     r   zFakeCUDAModule.syncthreadsc                 C      d S r   r   r   r   r   r   threadfence  r   zFakeCUDAModule.threadfencec                 C   r   r   r   r   r   r   r   threadfence_block  r   z FakeCUDAModule.threadfence_blockc                 C   r   r   r   r   r   r   r   threadfence_system  r   z!FakeCUDAModule.threadfence_systemc                 C      t  |S r   )r   r    syncthreads_countr   rI   r   r   r   r     r   z FakeCUDAModule.syncthreads_countc                 C   r   r   )r   r    syncthreads_andr   r   r   r   r     r   zFakeCUDAModule.syncthreads_andc                 C   r   r   )r   r    syncthreads_orr   r   r   r   r     r   zFakeCUDAModule.syncthreads_orc                 C   s   t |dS )N1)binr9   r   r   r   r   popc  r   zFakeCUDAModule.popcc                 C   ru   r   r   rv   r   r   r   fma  ry   zFakeCUDAModule.fmac                 C   s   |d S )NgUUUUUU?r   rz   r   r   r   cbrt  rq   zFakeCUDAModule.cbrtc                 C   s   t d|d d d dS )N{:032b}r;   )intformatr   r   r   r   brev  s   zFakeCUDAModule.brevc                 C   s    d |}t|t|d S )Nr   0)r   lenlstrip)r   rI   sr   r   r   clz  s   
zFakeCUDAModule.clzc                 C   s,   d |}t|t|d d d }|S )Nr   r   r   !   )r   r   rstrip)r   rI   r   rr   r   r   ffs  s   
zFakeCUDAModule.ffsc                 C   s   |r|S |S r   r   rv   r   r   r   selp  ry   zFakeCUDAModule.selpc                 C   s   | j }| j}| j}|j|j |j }|dkr|S |j|j |j }|dkr)||fS |j|j |j }|dkr;|||fS td| )Nr   r;      z*Global ID has 1-3 dimensions. %d requested)r   r   r   r
   r   r   RuntimeError)r   nbdimbidtidr
   r   r   r   r   r   grid  s   
zFakeCUDAModule.gridc                 C   sj   | j }| j}|j|j }|dkr|S |j|j }|dkr ||fS |j|j }|dkr/|||fS td| )Nr   r;   r   z,Global grid has 1-3 dimensions. %d requested)r   r   r
   r   r   r   )r   r   r   Zgdimr
   r   r   r   r   r   gridsize  s   
zFakeCUDAModule.gridsizeN) r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r!   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   E  sN    











r   c              	   #   sv    ddl m  | j}t fdd| D }tfdd| D }|| zd V  W || d S || w )Nr   cudac                 3   s$    | ]\}}| u r||fV  qd S r   r   .0kvr   r   r   	<genexpr>  s   " z&swapped_cuda_module.<locals>.<genexpr>c                 3   s    | ]	\}}| fV  qd S r   r   r   )fake_cuda_moduler   r   r     s    )Znumbar   __globals__dictr   update)fnr   Zfn_globsorigreplr   )r   r   r   swapped_cuda_module  s   
r   )%r   
contextlibr   r@   r   r>   Z
numba.corer   numpyr+   Znumba.npr   r   objectr   r   r#   r'   r0   r2   LockrF   rM   rO   rQ   rS   r]   r`   rf   rj   rV   rX   rZ   rE   rl   r   r   r   r   r   r   <module>   sB    
/_\ 