o
    ia)                     @   s   d dl mZ d dlZd dlZd dlZd dlZddlmZm	Z	 ddl
mZmZmZ ddlmZ ddlmZmZ 	 daed	d
 Zdd ZG dd dZG dd deZG dd deZG dd dejZG dd deZdS )    )contextmanagerN   )FakeCUDAArrayFakeWithinKernelCUDAArray)Dim3FakeCUDAModuleswapped_cuda_module   )normalize_kernel_dimensions)wrap_argArgHintc                 c   s.    t du s	J d| a zdV  W da dS da w )z*
    Push the current kernel context.
    Nz)concurrent simulated kernel not supported_kernel_context)mod r   j/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/numba/cuda/simulator/kernel.py_push_kernel_context   s   r   c                   C   s   t S )zT
    Get the current kernel context. This is usually done by a device function.
    r   r   r   r   r   _get_kernel_context$   s   r   c                   @   s   e Zd ZdZdd ZdS )FakeOverloadzE
    Used only to provide the max_cooperative_grid_blocks method
    c                 C   s   dS )Nr   r   )selfZblockdimr   r   r   max_cooperative_grid_blocks/   s   z(FakeOverload.max_cooperative_grid_blocksN)__name__
__module____qualname____doc__r   r   r   r   r   r   +   s    r   c                   @   s   e Zd Zdd ZdS )FakeOverloadDictc                 C      t  S N)r   )r   keyr   r   r   __getitem__6   s   zFakeOverloadDict.__getitem__N)r   r   r   r   r   r   r   r   r   5   s    r   c                   @   sb   e Zd ZdZdg dfddZdd Zdd Zd	d
 Zdd ZdddZ	e
dd Ze
dd ZdS )FakeCUDAKernelz(
    Wraps a @cuda.jit-ed function.
    Fc                 C   sJ   || _ || _|| _|| _t|| _d | _d | _d| _d| _	t
| | d S )Nr   )fn_deviceZ	_fastmath_debuglist
extensionsgrid_dim	block_dimstreamdynshared_size	functoolsupdate_wrapper)r   r!   ZdeviceZfastmathr%   debugr   r   r   __init__A   s   
zFakeCUDAKernel.__init__c           	   	      s   j rtjt  j| W  d    S 1 sw   Y  tjj\}}t||j}t	|S g fdd  fdd|D }tj|" t
j| D ]}tj||j}|j|g|R   qRW d    n1 sqw   Y  D ]}|  qxW d    d S 1 sw   Y  d S )Nc                    s   t  fddjd | f\}} t| tjr#| jdkr#t|  }nt| t	r.|  }nt| tj
r9t| }n| }t|trDt|S |S )Nc                    s   |j | d dS )Nr   )r(   retr)Zprepare_args)Zty_val	extension)r.   r   r   <lambda>b   s
    z;FakeCUDAKernel.__call__.<locals>.fake_arg.<locals>.<lambda>r   )r*   reducer%   
isinstancenpZndarrayndimr   Z	to_devicer   voidr   r   )arg_ret)r.   r   r   r   fake_arg_   s   
	


z)FakeCUDAKernel.__call__.<locals>.fake_argc                    s   g | ]} |qS r   r   ).0r6   )r9   r   r   
<listcomp>v   s    z+FakeCUDAKernel.__call__.<locals>.<listcomp>)r"   r   r!   r   r
   r&   r'   r   r)   r   r3   ndindexBlockManagerr#   run)	r   argsr&   r'   Zfake_cuda_moduleZ	fake_args
grid_pointZbmwbr   )r9   r.   r   r   __call__O   s0    
"zFakeCUDAKernel.__call__c                 C   s2   t |d d  \| _| _t|dkr|d | _| S )Nr	         )r
   r&   r'   lenr)   )r   configurationr   r   r   r      s
   

zFakeCUDAKernel.__getitem__c                 C   s   d S r   r   r   r   r   r   bind      zFakeCUDAKernel.bindc                 G   s   | S r   r   )r   r?   r   r   r   
specialize   rI   zFakeCUDAKernel.specializer   c                 C   s$   |dk r
t d| | |d||f S )Nr   z0Can't create ForAll with negative task count: %sr   )
ValueError)r   ZntasksZtpbr(   Z	sharedmemr   r   r   forall   s
   zFakeCUDAKernel.forallc                 C   r   r   )r   rG   r   r   r   	overloads      zFakeCUDAKernel.overloadsc                 C   s   | j S r   )r!   rG   r   r   r   py_func   rN   zFakeCUDAKernel.py_funcN)r   r   r   )r   r   r   r   r-   rB   r   rH   rJ   rL   propertyrM   rO   r   r   r   r   r    <   s    1	

r    c                       sT   e Zd ZdZ fddZ fddZdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )BlockThreadzG
    Manages the execution of a function for a single CUDA thread.
    c           	         s   |r fdd}|}n }t t| j|d t | _d| _|| _t| | _	t| | _
d | _d| _d| _|| _t| jj }| j
j|j| j
j|j| j
j    | _d S )Nc                     s   t jdd  | i | d S )Nraise)divide)r3   Zseterr)r?   kwargsfr   r   debug_wrapper   s   z+BlockThread.__init__.<locals>.debug_wrapper)targetFT)superrQ   r-   	threadingEventsyncthreads_eventsyncthreads_blocked_managerr   blockIdx	threadIdx	exceptiondaemonabortr,   
_block_dimxyz	thread_id)	r   rV   managerr_   r`   r,   rW   rX   ZblockDim	__class__rU   r   r-      s(   


zBlockThread.__init__c              
      s   z
t t|   W d S  tyN } z8dt| j }dt| j }t|dkr-d||f }nd|||f }t	 d }t
|||f| _W Y d }~d S d }~ww )Nztid=%szctaid=%s z%s %sz	%s %s: %sr	   )rY   rQ   r>   	Exceptionr$   r`   r_   strsysexc_infotypera   )r   etidZctaidmsgtbrj   r   r   r>      s    zBlockThread.runc                 C   s:   | j rtdd| _| j  | j  | j rtdd S )Nz"abort flag set on syncthreads callTz#abort flag set on syncthreads clear)rc   RuntimeErrorr]   r\   waitclearrG   r   r   r   syncthreads   s   

zBlockThread.syncthreadsc                 C   sD   | j j| j j| j jf}|| jj|< |   t| jj}|   |S r   )	r`   re   rf   rg   r^   block_statery   r3   Zcount_nonzero)r   valueidxcountr   r   r   syncthreads_count   s   zBlockThread.syncthreads_countc                 C   L   | j j| j j| j jf}|| jj|< |   t| jj}|   |r$dS dS Nr   r   )	r`   re   rf   rg   r^   rz   ry   r3   allr   r{   r|   testr   r   r   syncthreads_and      zBlockThread.syncthreads_andc                 C   r   r   )	r`   re   rf   rg   r^   rz   ry   r3   anyr   r   r   r   syncthreads_or   r   zBlockThread.syncthreads_orc                 C   s   d| j | jf S )NzThread <<<%s, %s>>>)r_   r`   rG   r   r   r   __str__   s   zBlockThread.__str__)r   r   r   r   r-   r>   ry   r~   r   r   r   __classcell__r   r   rj   r   rQ      s    rQ   c                   @   s    e Zd ZdZdd Zdd ZdS )r=   a  
    Manages the execution of a thread block.

    When run() is called, all threads are started. Each thread executes until it
    hits syncthreads(), at which point it sets its own syncthreads_blocked to
    True so that the BlockManager knows it is blocked. It then waits on its
    syncthreads_event.

    The BlockManager polls threads to determine if they are blocked in
    syncthreads(). If it finds a blocked thread, it adds it to the set of
    blocked threads. When all threads are blocked, it unblocks all the threads.
    The thread are unblocked by setting their syncthreads_blocked back to False
    and setting their syncthreads_event.

    The polling continues until no threads are alive, when execution is
    complete.
    c                 C   s.   || _ || _|| _|| _tj|tjd| _d S )N)Zdtype)Z	_grid_dimrd   _fr#   r3   ZzerosZbool_rz   )r   rV   r&   r'   r,   r   r   r   r-     s
   zBlockManager.__init__c           
         s$  t  }t  }t  }tjj D ] } fdd}t|||j}|  || || q|r}|D ])}|jr?|| q4|j	r]|D ]}	d|	_
d|	_|	j   qD|j	d |j	d q4||krr|D ]
}d|_|j   qdt  }t dd |D }|s2|D ]}|j	r|j	d |j	d qd S )	Nc                      s   j    d S r   )r   r   r?   r   r   r   rX     s   z BlockManager.run.<locals>.targetTFr   r   c                 S   s   g | ]}|  r|qS r   )is_alive)r:   tr   r   r   r;   /  s    z$BlockManager.run.<locals>.<listcomp>)setr3   r<   rd   rQ   r#   startaddr]   ra   rc   r\   with_traceback)
r   r@   r?   threadsZlivethreadsZblockedthreadsZblock_pointrX   r   Zt_otherr   r   r   r>     s@   

zBlockManager.runN)r   r   r   r   r-   r>   r   r   r   r   r=      s    r=   )
contextlibr   r*   ro   rZ   numpyr3   Zcudadrv.devicearrayr   r   Z	kernelapir   r   r   errorsr
   r?   r   r   r   r   r   r   dictr   objectr    ThreadrQ   r=   r   r   r   r   <module>   s&    

dS