o
    iY                     @  s  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZmZ d dlmZmZmZmZmZmZmZmZmZ ddlmZmZ ejejejeZdZd1dd	Z d
d Z!dd Z"dd Z#edZ$G dd dej%Z&e' dd Z(G dd dee$ Z)G dd de)e$ Z*ed2ddZ+edddddd3d%dZ+	d1dddddd&d4d*dZ+G d+d, d,Z,G d-d. d.Z-d/d0 Z.dS )5    )annotationsdivisionN)defaultdict
namedtuple)	CallableGenericIterableListOptionalTypeVarUnioncastoverload   )get_backendpath_to_ptxasz2.1.0c                 C  sN   | d u rt  } zddlm} || W S  ty&   dd l}|j| j Y S w )Nr   )_cuda_getCurrentRawStream)get_current_deviceZtorch._Cr   ImportErrortorchcudaZcurrent_streamZcuda_stream)idxr   r    r   a/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/triton/runtime/jit.pyget_cuda_stream   s   
r   c                  C  s   dd l } | j S Nr   )r   r   Zcurrent_device)r   r   r   r   r      s   
r   c                 C  s   dd l }|j|  d S r   )r   r   Z
set_devicer   r   r   r   r   set_current_device$   s   r   c                 C  s   dd l }|j| S r   )r   r   get_device_capabilityr   r   r   r   r   )   s   r   Tc                      s:   e Zd ZdZd fddZdd Zdd	 Zd
d Z  ZS )DependenciesFinderz
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.
    returnNonec                   s*   t    t|d | _|| _d S )Nutf-8)super__init__hashlibmd5encode	hexdigestretglobals)selfr+   src	__class__r   r   r%   <   s   

zDependenciesFinder.__init__c                 C  s   | j |jd S N)r+   getid)r,   noder   r   r   
visit_NameA   s   zDependenciesFinder.visit_Namec                 C  sj   |  |j}t|tjr|  |j}t|tjs|d u s-t|dddks-t|dddr/d S t||jS )N__name__ Ztritonz.triton)visitvalue
isinstanceast	Attributegetattrendswithattr)r,   r3   lhsr   r   r   visit_AttributeD   s   *z"DependenciesFinder.visit_Attributec                 C  s   |  |j}|d u rd S t|rd S |jr#|jds!d|jv r#d S t|ts1J d|j d|j	d u rLt
|j}t|j|j}| | |j|_	tt|dd}| j|j	 | d| _t| j | _d S )Nztriton.z.triton.z
Function "zv" is being called from a Triton function but is not a Triton function itself. Decorate it with @triton.jit to fix thisnoinlineFr#   )r7   funcinspect	isbuiltin
__module__
startswithr9   JITFunctionr5   hashr:   parser-   r    __globals__r*   strr<   r(   r&   r'   r)   )r,   r3   rB   treefinderrA   r   r   r   
visit_CallL   s    


zDependenciesFinder.visit_Call)r!   r"   )	r5   rE   __qualname____doc__r%   r4   r@   rN   __classcell__r   r   r.   r   r    5   s    r    c               	   C  s  dd l } g }ttd}|t|  g7 }W d    n1 s"w   Y  tj	t
d}| |gD ])}t|j|jjd}|t|  g7 }W d    n1 sXw   Y  q4ttj	t
dd}|t|  g7 }W d    n1 sw   Y  tj	t
d}| |gD ])}t|j|jjd}|t|  g7 }W d    n1 sw   Y  qt d }tt|dg }d	td | d d	| S )Nr   rbcompilerz_C/libtriton.solanguagez	--version-)pkgutilopen__file__r&   r'   readr)   ospathjoinTRITON_PATHiter_modulesmodule_finder	find_specnameoriginr   
subprocesscheck_outputTRITON_VERSION)rV   contentsfZcompiler_pathlibZlanguage_pathZptxasZptxas_versionr   r   r   version_keyc   s.   
 ri   c                   @  s    e Zd ZU ded< dddZdS )KernelInterfacer   runr!   c                 C  s   t ttjt t| j|dS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        )grid)r   r   	functoolspartialr   rk   )r,   rl   r   r   r   __getitem__   s   zKernelInterface.__getitem__N)r!   r   )r5   rE   rO   __annotations__ro   r   r   r   r   rj   }   s   
 rj   c                      s   e Zd ZdZdZedd Zedd Zedd Zed	d
 Z	dd Z
edd Zdd Zdd Zdd Zd1ddZd1ddZd2dd Zd!d" Zd3d#d$Zed%d& Zd'd( Zd)d* Zd+d, Z fd-d.Zd/d0 Z  ZS )4rG   N   c                 C  s   t | dr| jS t| trdS t| tr*d| kr| dkrdS d| kr(| dkr(dS d	S t| tr1d
S | d u r7d S tdt|  d|  )Ndtypei1i   ii32l            l    u64i64fp32zUnsupported type z for )hasattrrr   r9   boolintfloat	TypeErrortypeargr   r   r   _key_of   s   



zJITFunction._key_ofc                 C  s"   t | drt | jdr| jjS dS )Ndevicer}   r6   )rx   r   r}   r~   r   r   r   
_device_of      
zJITFunction._device_ofc                 C  s"   t | drt| jtr|  S dS )N	is_pinnedF)rx   r9   r   r   r~   r   r   r   _pinned_memory_of   r   zJITFunction._pinned_memory_ofc                 C  sD   t | dr|  tj dkS t| tr| d dk| dkfS | d u fS )Ndata_ptrr   rq      rx   r   rG   divisibilityr9   rz   r~   r   r   r   _spec_of   s
   


zJITFunction._spec_ofc                   sR   dd   fddt |D }fddt |D }tdddgt|t|S )	Nc                 S  sD   t | dr|  tj dkS t| tr| tj dkS | d u r dS dS )Nr   r   TFr   )xr   r   r   is_divisible_by_16   s   

z3JITFunction._get_config.<locals>.is_divisible_by_16c                   s&   h | ]\}} |r|j vr|qS r   )do_not_specialize.0ir   r   r,   r   r   	<setcomp>      & z*JITFunction._get_config.<locals>.<setcomp>c                   s:   h | ]\}}t |tst |tr|d kr| jvr|qS )r   )r9   ry   rz   r   r   r,   r   r   r      s   : Zinstance_descriptordivisible_by_16
equal_to_1)	enumerater   tuple)r,   argsr   r   r   r   r   _get_config   s   zJITFunction._get_configc                 C  s   | d u rdS t | dd }i dddddd	d
dddddddddddddddddddddd d!d"d#}t| D ]}|||< qGt| t rU| S d$||  S )%Nz*i8.ry   rs   Zfloat8e4Zfp8e4Zfloat8e5Zfp8e5Zfloat8e4b15Zfp8e4b15Zfloat16Zfp16Zbfloat16Zbf16Zfloat32rw   Zfloat64Zfp64Zint8i8Zint16Zi16Zint32rt   Zint64rv   Zuint8u8Zuint16u16Zuint32u32Zuint64ru   *)rK   splitlistvaluesr9   )keyZ	dtype_strZtysvr   r   r   _type_of   sN   	

zJITFunction._type_ofc                   s    d  fddt|D }|S )N,c                   s   g | ]	\}}  |qS r   )r   )r   r   kr   r   r   
<listcomp>       z/JITFunction._make_signature.<locals>.<listcomp>)r\   r   )r,   Zsig_key	signaturer   r   r   _make_signature   s   zJITFunction._make_signaturec                 C  s   t t| j|}|S r0   )dictzip
constexprs)r,   Zconstexpr_key	constantsr   r   r   _make_constants   s   zJITFunction._make_constantsc	              	   C  s   t jd u rdS | jj}	| jj}
ddd t| j|d D }|	 d| d| d| d	}t|}G d
d d}t	|||||||d}t j||||
|	d|i|dddS )NF, c                 S  s   g | ]\}}| d | qS )z: r   r   ra   tyr   r   r   r      s    z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_stages=]()c                   @  s   e Zd Zdd ZdS )z.JITFunction._call_hook.<locals>.LegacyCompilerc                 S  s   || _ || _d S r0   )modulera   )r,   r   ra   r   r   r   r%      s   z7JITFunction._call_hook.<locals>.LegacyCompiler.__init__N)r5   rE   rO   r%   r   r   r   r   LegacyCompiler   s    r   )r   r   r   	num_warps
num_stagesextern_libsconfigsr   )r   reprfncompileZis_manual_warmupZalready_compiled)
rG   
cache_hookr   r5   rE   r\   r   	arg_namesrK   r   )r,   r   r   r   r   r   r   r   r   ra   r   Z	arg_reprsr   r   kwargsr   r   r   
_call_hook   s   
 $zJITFunction._call_hookr!   rK   c                 C  s   | j |d}|dkr%d| dtj d| d| dtj d| d| d	S d
|v r3d| dtj dS |dkrDd| dtj d| dS dS )Nr6   (z.data_ptr() % z == 0) if hasattr(z,, "data_ptr")                         else (z % z == 0, z == 1) if isinstance(z,, int)                         else (False,)Tensorz == 0)rz   z == 1)z(False,))rp   r1   rG   r   r,   r   Zarg_annotationr   r   r   _get_arg_specialization_key  s    z'JITFunction._get_arg_specialization_keyc                 C  sD   | j |d}d|v r| dS |dkrdS |dkrdS d| d	S )
Nr6   r   z.dtypery   rs   r{   rw   z_key_of(r   )rp   r1   r   r   r   r   _get_arg_sig_key  s   
zJITFunction._get_arg_sig_keydevice_types	List[str]pinned_memory_flags
List[bool]c                 C  sv   dd |D }d|v rdd l }|jjrdS dS tdd |D }tdd |D }|r/|r/dS t|dkr9|d S dS )	Nc                 S  s   g | ]}|d kr|qS )r6   r   r   Zdevice_typer   r   r   r         z5JITFunction._conclude_device_type.<locals>.<listcomp>r   r   hipc                 s  s    | ]}|d kV  qdS )cpuNr   r   r   r   r   	<genexpr>"  s    z4JITFunction._conclude_device_type.<locals>.<genexpr>c                 s  s    | ]}|V  qd S r0   r   )r   Zpinned_memory_flagr   r   r   r   #  s    )r   versionr   allanylen)r,   r   r   r   Zis_cpuZis_pinned_memoryr   r   r   _conclude_device_type  s   z!JITFunction._conclude_device_typec                   s   fddt  jD } fddt  jD }d|}d fdd|D }dddd |D  d }ddd	d |D  d }d|}g }t |D ]\}	}
|	 jv rZqP| |
g7 }qPd|}d
dd  jD }ddd t j jD }d jj d| d| dt	|dkr| d
nd dt	|dkr| d
nd d| d| d| d| d| dddd  jD  d}t
 t  j j j j jttttd}t|| | jj S )Nc                   s    g | ]\}}| j vr| qS r   r   r   r   r   r   r   +       z.JITFunction._make_launcher.<locals>.<listcomp>c                   s    g | ]\}}| j v r| qS r   r   r   r   r   r   r   ,  r   r   c                   s   g | ]}  |qS r   )r   r   r   r   r   r   r   /  s    [c                 S     g | ]}d | dqS )z_device_of(r   r   r   r   r   r   r   0  r   ]c                 S  r   )z_pinned_memory_of(r   r   r   r   r   r   r   1  r   r   c                 S  s   g | ]
}d | d| qS )"z": r   r   r   r   r   r   <  s    c                 s  s0    | ]\}}|t jkr|n| d | V  qdS )z = NrC   _empty)r   ra   Zdfltr   r   r   r   =  s   . z-JITFunction._make_launcher.<locals>.<genexpr>z
def r   z, grid=None, num_warps=4, num_stages=3, extern_libs=None, stream=None, warmup=False, device=None, device_type=None):
    from ..compiler import compile, CompiledKernel
    sig_key =  z,
    constexpr_key = r   r   z
    spec_key = aV  
    key = (version_key, sig_key, constexpr_key, spec_key, num_warps, num_stages, self.debug)
    if not extern_libs is None:
      key = (key, tuple(extern_libs.items()))
    assert num_warps > 0 and (num_warps & (num_warps - 1)) == 0, "num_warps must be a power of 2"
    assert grid is not None
    if callable(grid):
        grid = grid({z})
    grid_size = len(grid)
    grid_0 = grid[0]
    grid_1 = grid[1] if grid_size > 1 else 1
    grid_2 = grid[2] if grid_size > 2 else 1

    if device_type is None:
        device_types = [_device_type for _device_type in zW if _device_type != '']
        device_type = self._conclude_device_type(device_types, a  )

    device_backend = None
    if device_type not in ['cuda', 'hip']:
        device_backend = get_backend(device_type)
        if device_backend is None:
            raise ValueError('Cannot find backend for ' + device_type)

    if device is None:
        if device_type in ['cuda', 'hip']:
            device = get_current_device()
            set_current_device(device)
        else:
            device = device_backend.get_current_device()
            device_backend.set_current_device(device)
    if stream is None and not warmup:
        if device_type in ['cuda', 'hip']:
            stream = get_cuda_stream(device)
        else:
            stream = device_backend.get_stream()

    bin = cache[device].get(key, None)
    if bin is not None:
      if not warmup:
          bin.c_wrapper(grid_0, grid_1, grid_2, bin.num_warps, bin.shared, stream, bin.cu_function, CompiledKernel.launch_enter_hook, CompiledKernel.launch_exit_hook, bin, zt)
      return bin
    # kernel not cached -- compile
    else:
      # build dict of constant values
      args = [z]
      all_args = c                 S  s   g | ]}| qS r   r   r   r   r   r   r   q      a  ,
      configs = self._get_config(*all_args),
      constants = self._make_constants(constexpr_key)
      constants.update({i: None for i, arg in enumerate(all_args) if arg is None})
      constants.update({i: 1 for i in configs[0].equal_to_1})
      # build kernel signature -- doesn't include specialized arguments
      signature = { i: self._type_of(_key_of(arg)) for i, arg in enumerate(all_args) if i not in self.constexprs }
      # build stub signature -- includes arguments that are specialized
      for i, arg in constants.items():
        if callable(arg):
          raise TypeError(f"Callable constexpr at index {i} is not supported")
      if not self._call_hook(key, signature, device, constants, num_warps, num_stages, extern_libs, configs):
        bin = compile(self, signature=signature, device=device, constants=constants, num_warps=num_warps, num_stages=num_stages, extern_libs=extern_libs, configs=configs, debug=self.debug, device_type=device_type)
        if not warmup:
            bin.c_wrapper(grid_0, grid_1, grid_2, bin.num_warps, bin.shared, stream, bin.cu_function, CompiledKernel.launch_enter_hook, CompiledKernel.launch_exit_hook, bin, *args)
        self.cache[device][key] = bin
        return bin
      return None
)ri   r   r,   r   r   r   r   cache__spec__r   r   r   )r   r   r\   r   r   r   arg_defaultsr   r5   r   ri   r   r   r   r   r   r   r   r   r   r   exec)r,   Zregular_argsZconstexpr_argsr   Zsig_keysr   r   Zconstexpr_keysZspecializationsr   r   Z	spec_keysZ	grid_argsZargs_signaturer-   scoper   r   r   _make_launcher*  sj   



,12E
zJITFunction._make_launcherc                   sZ  |_ |j_|_t|}dd |j D _dd |j D _	t
dd j	D _|d u r6g n|_fddjD _tt|_jjdd  _tt_d _g _d _tjd	d
dkrrdn|_|_dd   fdd|j D _fddj D _  _!|j"_"|j#_#|j$_$|j_d S )Nc                 S     g | ]}|j qS r   )ra   r   r   r   r   r   r     r   z(JITFunction.__init__.<locals>.<listcomp>c                 S  r   r   )defaultr   r   r   r   r     r   c                 s  s    | ]}|t jkV  qd S r0   r   r   r   r   r   r     s    z'JITFunction.__init__.<locals>.<genexpr>c                   s&   h | ]}t |tr j|n|qS r   )r9   rK   r   indexr   r   r   r   r     r   z'JITFunction.__init__.<locals>.<setcomp>defZTRITON_DEBUG01Tc                 S  s   t | tr| jS | S r0   )r9   r}   r5   )r   r   r   r   <lambda>  s    z&JITFunction.__init__.<locals>.<lambda>c                   s   i | ]	\}}| |qS r   r   r   )normalize_tyr   r   
<dictcomp>  r   z(JITFunction.__init__.<locals>.<dictcomp>c                   s$   g | ]\}}d |v r j |qS )Z	constexpr)r   r   r   r   r   r   r     s   $ )%r   rE   r   r   rC   r   
parametersr   r   r   r   Zhas_defaultsr   textwrapdedent	getsourcer-   findr   r   r   rH   kernel_decoratorskernelrZ   environr1   debugrA   rp   itemsr   r   rk   rP   r5   rJ   )r,   r   r   r   r   rA   r   r   )r   r,   r   r%     s2   


zJITFunction.__init__c                 C  s<   | j d u rt| j| jd}||   |jt  | _ | j S )N)r+   r-   )rH   r    rJ   r-   r7   rI   r*   ri   )r,   Zdependencies_finderr   r   r   	cache_key  s
   
zJITFunction.cache_keyc                 O  s    | j ttj|i |ddiS )NwarmupT)rk   map
MockTensor
wrap_dtyper,   r   r   r   r   r   r     s    zJITFunction.warmupc                 C  sH   t | j}t|t jsJ t|jdksJ t|jd t js"J |S )Nr   r   )r:   rI   r-   r9   Moduler   bodyFunctionDef)r,   rL   r   r   r   rI     s
   zJITFunction.parsec                 O  s   t d)Nz:Cannot call @triton.jit'd outside of the scope of a kernel)RuntimeErrorr  r   r   r   __call__  s   zJITFunction.__call__c                   s6   |dkrd | _ tt| || |dkrd | _d S d S )Nr   r-   )r   r$   rG   __setattr__rH   )r,   ra   r8   r.   r   r   r    s   
zJITFunction.__setattr__c                 C  s   d| j  d| jj dS )NzJITFunction(:r   )r   r   r5   r   r   r   r   __repr__  s   zJITFunction.__repr__r!   rK   )r   r   r   r   r!   rK   )NNNN)r5   rE   rO   r   r   staticmethodr   r   r   r   r   r   r   r   r   r   r   r   r   r%   propertyr   r   rI   r  r  r	  rQ   r   r   r.   r   rG      s:    








i%
rG   r   r!   JITFunction[T]c                 C     d S r0   r   )r   r   r   r   jit     r  r   r   r   rA   r   Optional[Iterable[int]]r   Optional[bool]rA   Callable[[T], JITFunction[T]]c                 C  r  r0   r   r  r   r   r   r    s   )r   r   r   rA   	interpretOptional[T]r  4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c                  s*   d fdd}| dur|| S |S )	a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    r   r   r!   r  c                   s6   t | sJ rddlm} || S t|  dS )Nr   )GridSelectorr  )callableZinterpreter.interpreterr  rG   )r   r  r   r   r  rA   r   r   r   	decorator  s   zjit.<locals>.decoratorNr   r   r!   r  r   )r   r   r   r   rA   r  r  r   r  r   r    s   c                   @  s0   e Zd ZdZedd Zdd Zedd ZdS )	r   zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nrr   r   )r/   r5   rE   r   r~   r   r   r   r   -  s   
zMockTensor.wrap_dtypec                 C  s
   || _ d S r0   )rr   )r,   rr   r   r   r   r%   4     
zMockTensor.__init__c                   C  s   dS r   r   r   r   r   r   r   7  r  zMockTensor.data_ptrN)r5   rE   rO   rP   r  r   r%   r   r   r   r   r   r   (  s    
r   c                   @  s.   e Zd Zdd Zdd Zdd Zdd	d
ZdS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S r0   )rr   baseZis_cudar   shape)r,   r  rr   r   r   r   r%   =  s
   zTensorWrapper.__init__c                 C  s
   | j  S r0   )r  r   r   r   r   r   r   D  r  zTensorWrapper.data_ptrc                 C  s   | j |S r0   )r  stride)r,   r   r   r   r   r!  G  s   zTensorWrapper.strider!   rK   c                 C  s   d| j  d| j dS )NzTensorWrapper[r   r   )rr   r  r   r   r   r   __str__J  s   zTensorWrapper.__str__Nr
  )r5   rE   rO   r%   r   r!  r"  r   r   r   r   r  <  s
    r  c                 C  sP   t | tr|| jjkr| jS t| j|S t| drt| |S tdt|  d)Nr   zCannot reinterpret a r   )r9   r  r  rr   rx   r|   r}   )Ztensorrr   r   r   r   reinterpretN  s   


r#  r0   r  )r   r  r   r  rA   r  r!   r  )r   r  r   r  r   r  rA   r  r  r  r!   r  )/
__future__r   r   r:   rm   r&   rC   rZ   rc   r   collectionsr   r   typingr   r   r   r	   r
   r   r   r   r   Zcommon.backendr   r   r[   dirnameabspathrX   r]   re   r   r   r   r   r   NodeVisitorr    	lru_cacheri   rj   rG   r  r   r  r#  r   r   r   r   <module>   sX    ,
.
  ^3