o
    !i@                     @   s  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZmZmZ d dlZd dlmZ d dlm  mZ d dlmZ d dlZd dlmZ d dlm  mZ d dlZd dl Z d dl!Z!d dl"m#Z# de$d	ee$e$f fd
dZ%dee dej&de	e$e
f fddZ'dej&de	e$e
f dejj(fddZ)d6dejj(d	ejj(fddZ*dej(d	ej(fddZ+dej(deej& deej& deej& fddZ,ej-ej.ej/ej0ej1ej2ej3ej4ej5ej6ej4ej7ej8gZ9ej:ej;gZ<ej-ej=ej.ej>ej/d d! iZ?deej& de	e$ej(f fd"d#Z@deej& de	e$ej(f d$e	ej(ej(f fd%d&ZAG d'd( d(ZBd7d+d,ZCd-eBd	eDfd.d/ZEG d0d1 d1ZFdejGfdejj(d2ee	e$e
f  d3eejG d	ejj(fd4d5ZHdS )8    N)ArgumentTarget)fuse_conv_bn_eval)TypeDictAnyTupleIterableOptionalListcast)	ShapeProp)defaultdict)Enumtargetreturnc                 C   s*   |  dd^ }}|r|d |fS d|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentname r   q/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/torch/fx/experimental/optimization.py_parent_name   s   r   patternnodemodulesc                 C   s   t |jdkr	dS |jd |f}t| |D ]2\}}t|tjs" dS |jdkr* dS t|jts3 dS |j|vr; dS t	||j |urG dS qdS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r   r   r   nodesexpected_typeZcurrent_noder   r   r   matches_module_pattern   s    

r*   
new_modulec                 C   s<   t | jtsJ t| j\}}||| j< t|| || d S N)r"   r   r&   r   setattr)r   r   r+   parent_namer   r   r   r   replace_node_module,   s   
r/   Fmodelc                 C   s   t jt jft jt jft jt jfg}|st| } t	
| }t| }t|j}|D ]E}|jD ]?}t|||rot|jd jdkrCq0||jd j }||j }	|	jsTq0t||	}
t|jd ||
 ||jd  || q0q+t	||S )z
    Fuses convolution/BN layers for inference purposes. Will deepcopy your
    model by default, but can modify the model inplace as well.
    r   r   )nnZConv1dZBatchNorm1dConv2dBatchNorm2dZConv3dZBatchNorm3dcopydeepcopyr#   symbolic_tracedictnamed_modulesgraphr(   r*   r   r    usersr   Ztrack_running_statsr   r/   replace_all_uses_with
erase_nodeGraphModule)r0   Zinplacepatternsfx_modelr   	new_graphr   r   convZbnZ
fused_convr   r   r   fuse2   s2   








rB   c                 C   s*   t | }G dd dtj j}|| S )z5
    Removes all dropout layers from the module.
    c                       s>   e Zd Zdedeedf deeef def fddZ	  Z
S )z&remove_dropout.<locals>.DropoutRemoverr   r    .kwargsr   c                    s:   t | j| tjrt|dksJ |d S t |||S )Nr   r   )r"   Z
submodulesr1   ZDropoutr   superr   )selfr   r    rC   	__class__r   r   r   V   s   z2remove_dropout.<locals>.DropoutRemover.call_module)__name__
__module____qualname__r   r   r   r   r&   r   r   __classcell__r   r   rF   r   DropoutRemoverU   s    6rL   )r#   r6   torchZTransformerZ	transform)r0   r?   rL   r   r   r   remove_dropoutO   s   
rN   orig_moduler(   inputsoutputsc                    s|   t  }i  |D ]}||j}| |< q|D ]}|| fdd}| |< q| fdd|D  |  t | |S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                    s    |  S r,   r   )xenvr   r   <lambda>h       z"extract_subgraph.<locals>.<lambda>c                    s   g | ]} | qS r   r   ).0outputrS   r   r   
<listcomp>j       z$extract_subgraph.<locals>.<listcomp>)r#   Graphplaceholderr   Z	node_copyrX   lintr=   )rO   r(   rP   rQ   r@   inputnew_noder   r   rS   r   extract_subgraph^   s   

r`   c                 C   s
   t | S r,   )	th_mkldnnZMkldnnBatchNorm)a_r   r   r   rU   {   s   
 rU   c                 C   s   i }| D ]9}|j dkr=t|jtsJ ||j }t|tv r=tt| |tj}t|tj	s0J t
|||< t||| q|S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r   )r%   r"   r   r&   r'   
mkldnn_maprM   floatr1   Moduler4   r5   r/   )r(   r   old_modulesr   
cur_moduler+   r   r   r   modules_to_mkldnn   s   

ri   rg   c                 C   sJ   | D ] }|j dkr"t|jtsJ ||j }||v r"t||||  qdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r   N)r%   r"   r   r&   r/   )r(   r   rg   r   rh   r   r   r   reset_modules   s   

rj   c                   @   s   e Zd ZdejfddZdS )MklSubgraphfx_graphc                 C   s   || _ g | _g | _g | _d S r,   )rl   r(   start_nodes	end_nodes)rE   rl   r   r   r   __init__   s   
zMklSubgraph.__init__N)rH   rI   rJ   r#   r[   ro   r   r   r   r   rk      s    rk   
   r   c                    s*   dddt dtf fdd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    Nr9   r   c                    s   | j }d u r| jj| jjt dd |D  tttj	 dd | j
D }t| j||fdd}| fdd}tjjt  | fdd}||k S )	Nc                 S   s   g | ]}t |jqS r   )rM   ZrandnshaperW   r   r   r   r   rY      s    z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r   )r    rr   r   r   r   rY      s    c                    s<   t D ]}|   qt }t  D ]}|  }qt | S r,   )rangetime)frc   beginout)iterswarmupr   r   	benchmark   s   z?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmarkc                      s   dd dd  D  D S )Nc                 S      g | ]}|  qS r   )to_denserW   ir   r   r   rY      rZ   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>c                 S   r{   r   )	to_mkldnnr}   r   r   r   rY      rZ   r   r   Zsample_inputs	submoduler   r   rU      s    z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>c                      s     S r,   r   r   r   r   r   rU      rV   )rm   rl   Zowning_modulerg   r   	propagater   r   r#   r$   rn   r`   r(   rj   r9   r7   r8   )r9   Zinput_nodesZoutput_argsrz   Zmkl_timeZno_mkl_timeexample_inputsr?   rx   rg   ry   r   r   use_mkl_heuristic   s   z,gen_mkl_autotuner.<locals>.use_mkl_heuristic)rk   bool)r   rx   ry   r   r   r   r   gen_mkl_autotuner   s   	r   r9   c                 C   s   t | jdkS )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r   r(   )r9   r   r   r   use_mkl_length   s   r   c                   @   sF   e Zd Zdd ZdefddZdedefddZd	ed
efddZdS )	UnionFindc                 C   s   d g| | _ dg| | _d S )Nr   r   size)rE   nr   r   r   ro      s   zUnionFind.__init__vc                 C   s   || j |< d| j|< d S )Nr   r   )rE   r   r   r   r   make_set   s   
zUnionFind.make_setr   c                 C   sB   | j | }||kr|S |d usJ | || j |< tt| j | S r,   )r   findr   int)rE   r   parr   r   r   r      s   
zUnionFind.findrb   bc                 C   sf   |  ||  |}}||kr|S | j| | j| k r ||}}|| j|< | j|  | j| 7  < d S r,   )r   r   r   )rE   rb   r   r   r   r   join   s   

zUnionFind.joinN)rH   rI   rJ   ro   r   r   r   r   r   r   r   r   r      s
    r   pass_configtracerc              	      sD  dddt id}|du ri }|| |d rt| } |d r#t| } |d du r+| S t|d ts6td	d|d vr@td
|d d }| }|t	|  t
|j }t|  }G dd dt}t jD ]}	|j}
|	jdkr||	j }t|tv r|j}
t| d}|dur|jtjksJ d|jtdksJ dn|	jdkr|	jtv r|j}
n|	jtv r|j}
|
|jkr"|
|jkrtdd |	j D sqk !|	 t
"|	j  fdd}W d   n1 sw   Y  t#t$t
j%j& ||	_  '|	  (dd|	f}|	)| |	f|_ W d   n	1 sw   Y  qkt*t j|}| _+ jD ]B}	|	jdkrr|	jdkrr|	j d }t|	j,}|D ]}|jdkrc|jdkrc|)|  -| qKt.|	j,dkrr -|	 q1t. j}t/|fddt0 jD ]w\}}	|	jdkr|	jdkr||	_12| q|	jdkr|	jdkrĈ|	j d dusJ |	j d |	_3qfdd|	j4D }t.|dkrאqtdd |D rJ t5|}|d |	_6|dd D ]}7|d | qqt8 fd d} jD ]9}	t9|	d!r|:|	j6 j;|	 t9|	d"r1|:|	j1 j<;|	 t9|	d#rC|:|	j3 j=;|	 q|> D ](}||sp|j<|j= D ]}	|	j d }|	)|  -|	 qVt?|j|| qId} jD ]}	|	jdks|	jdkr|d7 }qwt@AtBCd$|   D  t
|  }|S )%a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuserN   mkldnn_layout_optimizeNr   rN   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                   @   s   e Zd ZdZdZdZdS )z*optimize_for_inference.<locals>.MklSupportr   r      N)rH   rI   rJ   NOYESUNKNOWNr   r   r   r   
MklSupport  s    r   r   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulesZcall_functionc                 s   s    | ]}|j d kV  qdS )r|   N)r   )rW   argr   r   r   	<genexpr>3  s    z)optimize_for_inference.<locals>.<genexpr>c                    s     d| fS )Nr   )call_methodr   rl   r   r   rU   6  s    z(optimize_for_inference.<locals>.<lambda>r   r|   r   r   c                    s0   t | dr | jS t | dr | jS d S )Ncolorstart_color)hasattrr   r   r   r   )ufr   r   	get_colorS  s
   

z)optimize_for_inference.<locals>.get_colorc                    s,   g | ]}t |tjr |d ur |qS r,   )r"   r#   r$   r}   )r   r   r   rY   n  s   , z*optimize_for_inference.<locals>.<listcomp>c                 s   s    | ]}|d u V  qd S r,   r   r}   r   r   r   r   r  s    r   c                      s   t  S r,   )rk   r   r   r   r   rU   y  rV   r   r   	end_colorzmkldnn conversions: )Er   updaterB   rN   r"   r7   RuntimeErrortracer4   r5   r#   r=   rootr8   r   listr(   r   r%   r   r'   mkldnn_supportedr   next
parametersZdtyperM   re   Zdevicemkldnn_supported_unknownr   anyr    Zinserting_beforeZmap_argr   r   r   r   Zinserting_afterZcreate_noder;   ri   rg   r:   r<   r   r   	enumerater   r   r   Zall_input_nodessortedr   r   r   r   r   appendrm   rn   valuesrj   logging	getLoggerrH   infor]   )r0   r   r   Zdefault_pass_configr   Z
cur_tracerr?   r   r   r   Zsupports_mkldnnrh   Zsample_parameterZmkldnn_argsZdense_xrg   Zprv_noder:   userZ	num_nodesZcur_idxZ
cur_colorsZother_colorZmkldnn_graphsr9   ZprvZmkldnn_conversionsresultr   )rl   r   r   r   optimize_for_inference   s   
	




















r   )F)rp   r   )IZtorch.fxr#   Ztorch.fx.noder   r   Ztorch.nn.utils.fusionr   typingr   r   r   r   r	   r
   r   r   rM   Ztorch.nnr1   Ztorch.nn.functionalZ
functionalFZtorch.fx.passes.shape_propr   r4   collectionsr   Ztorch.utils.mkldnnutilsZmkldnnra   operatorrt   r   enumr   r&   r   r$   r*   rf   r/   rB   rN   r`   r2   ZLinearr3   ZReLUZ	MaxPool2dZ	AvgPool2dZAdaptiveAvgPool2dZreluZ	transposeZsigmoidZ
avg_pool2dZadaptive_avg_pool2dr   addmulr   ZMkldnnConv2dZMkldnnLinearrd   ri   rj   rk   r   r   r   r   ZTracerr   r   r   r   r   <module>   s^    ($	$.	
"2
&