
    Ph:B                     
   d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZmZmZ d dlZd dlmZ d dlmc mZ d dlmZ d dlZd dlmZ d dlmc mZ d dlZd dl Z d dl!Z!d dl"m#Z# de$d	ee$e$f   fd
Z%dee   dejL                  de	e$e
f   fdZ'dejL                  de	e$e
f   dej$                  jP                  fdZ)d'dej$                  jP                  d	ej$                  jP                  fdZ*dejP                  d	ejP                  fdZ+dejP                  deejL                     deejL                     deejL                     fdZ,ejZ                  ej\                  ej^                  ej`                  ejb                  ejd                  ejf                  ejh                  ejj                  ejl                  ejh                  ejn                  ejp                  gZ9ejt                  ejv                  gZ<ejZ                  ejz                  ej\                  ej|                  ej^                  d iZ?deejL                     de	e$ejP                  f   fdZ@deejL                     de	e$ejP                  f   de	ejP                  ejP                  f   fdZA G d d      ZBd(dZCd eBd	eDfd!ZE G d" d#      ZFdej                  fdej$                  jP                  d$ee	e$e
f      d%eej                     d	ej$                  jP                  fd&ZHy))    N)ArgumentTarget)fuse_conv_bn_eval)TypeDictAnyTupleIterableOptionalListcast)	ShapeProp)defaultdict)Enumtargetreturnc                 F    | j                  dd      ^ }}|r|d   |fS d|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentnames      mC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/fx/experimental/optimization.py_parent_namer      s3    
 MM#q)MVT6!9,,B,,    patternnodemodulesc                 j   t        |j                        dk(  ry|j                  d   |f}t        | |      D ]z  \  }}t        |t        j
                        s y|j                  dk7  r yt        |j                  t              s y|j                  |vr yt        ||j                           |usz y y)Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r   r   r   nodesexpected_typecurrent_nodes         r   matches_module_patternr.      s    
499~"&))A,!5E'*7E':#|,0??m+,--s3g-++,-]B (; r   
new_modulec                     t        | j                  t              sJ t        | j                        \  }}||| j                  <   t	        ||   ||       y N)r%   r   r)   r   setattr)r   r   r/   parent_namer   s        r   replace_node_moduler4   ,   sH    dkk3'('$T[[1K%GDKKGK $
3r   modelc                 f   t         j                  t         j                  ft         j                  t         j                  ft         j
                  t         j                  fg}|st        j                  |       } t        j                  |       }t        |j                               }t        j                  |j                        }|D ]  }|j                  D ]  }t        |||      st!        |j"                  d   j$                        dkD  r7||j"                  d   j&                     }||j&                     }	|	j(                  sot+        ||	      }
t-        |j"                  d   ||
       |j/                  |j"                  d          |j1                  |         t        j2                  ||      S )z
    Fuses convolution/BN layers for inference purposes. Will deepcopy your
    model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dcopydeepcopyr&   symbolic_tracedictnamed_modulesgraphr+   r.   r"   r#   usersr   track_running_statsr   r4   replace_all_uses_with
erase_nodeGraphModule)r5   inplacepatternsfx_modelr   	new_graphr   r   convbn
fused_convs              r   fuserP   2   sN   
 BNN+BNN+BNN+-H e$  'H8))+,Ghnn-IOOD%gtW=tyy|))*Q.tyy|223T[[)--.tR8
#DIIaL':F**499Q<8$$T* $  >>(I..r   c                     t        j                  |       } G d dt        j                   j                        } ||      j	                         S )z5
    Removes all dropout layers from the module.
    c                   D     e Zd Zdedeedf   deeef   def fdZ	 xZ
S )&remove_dropout.<locals>.DropoutRemoverr   r#   .kwargsr   c                     t        | j                  |   t        j                        rt	        |      dk(  sJ |d   S t
        |   |||      S )Nr   r   )r%   
submodulesr7   Dropoutr"   superr!   )selfr   r#   rT   	__class__s       r   r!   z2remove_dropout.<locals>.DropoutRemover.call_moduleV   sG    $//&12::>4yA~%~Aww*64@@r   )__name__
__module____qualname__r   r	   r   r   r)   r   r!   __classcell__)rZ   s   @r   DropoutRemoverrS   U   sF    	Av 	AeHcM6J 	AUYZ]_bZbUc 	Ahk 	A 	Ar   r_   )r&   r@   torchTransformer	transform)r5   rK   r_   s      r   remove_dropoutrc   O   sB       'HA-- A (#--//r   orig_moduler+   inputsoutputsc                 Z  	 t        j                         }i 	|D ]"  }|j                  |j                        }|	|<   $ |D ]  }|j	                  |	fd      }|	|<    |j                  |D cg c]  }	|   	 c}       |j                          t        j                  | |      S c c}w )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                     |    S r1    )xenvs    r   <lambda>z"extract_subgraph.<locals>.<lambda>h   s	    s1vr   )r&   Graphplaceholderr   	node_copyoutputlintrH   )
rd   r+   re   rf   rL   inputnew_noder   rp   rk   s
            @r   extract_subgraphrt   ^   s     
I"$C((4E
  &&t-=>D	  8fc&k89NN>>+y11 9s   /B(c                 ,    t        j                  |       S r1   )	th_mkldnnMkldnnBatchNorm)a_s     r   rl   rl   {   s    !:!:1!=r   c                    i }| D ]  }|j                   dk(  st        |j                  t              sJ ||j                     }t	        |      t
        v sPt        t	        |         |t        j                        }t        |t        j                        sJ t        j                  |      ||<   t        |||        |S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r!   )r(   r%   r   r)   r*   
mkldnn_mapr`   floatr7   Moduler>   r?   r4   )r+   r   old_modulesr   
cur_moduler/   s         r   modules_to_mkldnnr      s     /1K77m#dkk3/0/ -JJ:-'Z(89*ekkR
!*bii898*.--
*CJ'#D':>  r   r~   c                     | D ]R  }|j                   dk(  st        |j                  t              sJ ||j                     }||v sCt	        ||||          T y)za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r!   N)r(   r%   r   r)   r4   )r+   r   r~   r   r   s        r   reset_modulesr      sU    
 77m#dkk3/0/ -J[(#D';z3JK r   c                   ,    e Zd Zdej                  fdZy)MklSubgraphfx_graphc                 <    || _         g | _        g | _        g | _        y r1   )r   r+   start_nodes	end_nodes)rY   r   s     r   __init__zMklSubgraph.__init__   s     $&
*,(*r   N)r[   r\   r]   r&   rm   r   ri   r   r   r   r      s    + +r   r   c                 D     dddt         dt        f fd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrC   r   c                    | j                   }
F| j                  j                  
| j                  j                  t	        
      j                  	       |D cg c]!  }t        j                  |j                        # c}t        t        t        j                     | j                  D cg c]  }|j                  d    c}      }t        
| j                   ||      fd} |fd      }t#        j$                  j                   t'        j)                                       |fd      }||k  S c c}w c c}w )Nr   c                     t              D ]	  } |          t        j                         }t              D ]	  } |        } t        j                         |z
  S r1   )rangetime)fry   beginoutiterswarmups       r   	benchmarkz?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark   sG    6] #IIKE5\c "99;&&r   c                       D  cg c]  } | j                          c}  D  cg c]  } | j                          c} S c c} w c c} w r1   )	to_mkldnnto_dense)isample_inputs	submodules    r   rl   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>   s@    I_lGm_lZ[_lGm<n%o<nqajjl<n%oGm%os	   ?Ac                         S r1   ri   )r   r   s   r   rl   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>   s
    	=(Ar   )r   r   owning_moduler~   r   	propagater`   randnshaper   r   r&   r'   r   r#   rt   r+   r   rC   rA   rB   )rC   input_nodesr   output_argsr   mkl_timeno_mkl_timer   r   example_inputsrK   r   r~   r   s          @@r   use_mkl_heuristicz,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s    ''~~33H..44Kh)).9=HI[TTZZ0[I4=EOO*TOD499Q<O*TU$Xu{{KU		' opioo++T)2I2I2K-LkZ AB+%%! J*Ts   &D>.E
)r   bool)r   r   r   r   rK   r~   s   ``` @@r   gen_mkl_autotunerr      s/     HK& & & &0 r   rC   c                 2    t        | j                        dkD  S )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r"   r+   )rC   s    r   use_mkl_lengthr      s     u{{ar   c                   >    e Zd Zd ZdefdZdedefdZdedefdZy	)
	UnionFindc                 0    d g|z  | _         dg|z  | _        y )Nr   r   size)rY   ns     r   r   zUnionFind.__init__   s    ,06A: !sQw	r   vc                 @    || j                   |<   d| j                  |<   y )Nr   r   )rY   r   s     r   make_setzUnionFind.make_set   s    A		!r   r   c                     | j                   |   }||k(  r|S |J | j                  |      | j                   |<   t        t        | j                   |         S r1   )r   findr   int)rY   r   pars      r   r   zUnionFind.find   sO    kk!n8H3ACQ((r   rx   bc                    | j                  |      | j                  |      }}||k(  r|S | j                  |   | j                  |   k  r||}}|| j                  |<   | j                  |xx   | j                  |   z  cc<   y r1   )r   r   r   )rY   rx   r   s      r   joinzUnionFind.join   so    yy|TYYq\16H99Q<$))A,&aqAA		!		!$r   N)r[   r\   r]   r   r   r   r   r   ri   r   r   r   r      s9    '# )c )c )%c %c %r   r   pass_configtracerc                    dddt         id}|i }|j                  |       |d   rt        |       } |d   rt        |       } |d   du r| S t	        |d   t
              st        d	      d|d   vrt        d
      |d   d   } |       }|j                  t        j                  |             t        j                  |j                        }t        | j                               } G d dt              }t        j                         D ]  }	|j"                  }
|	j$                  dk(  r||	j&                     }t)        |      t*        v r|j,                  }
t/        |j1                         d      }||j2                  t4        j6                  k(  sJ d       |j8                  t5        j8                  d      k(  sSJ d       |	j$                  dk(  r=|	j&                  t*        v r|j,                  }
n|	j&                  t:        v r|j<                  }
|
|j"                  k7  s|
|j<                  k(  rt?        d |	j@                  D              s>jC                  |	      5  t        jD                  |	j@                  fd      }ddd       tG        tH        t        jJ                  jL                           |	_         jO                  |	      5  jQ                  dd|	f      }|	jS                  |       |	f|_         ddd        tU        t        j                         |      }|_+        j                   D ]  }	|	j$                  dk(  s|	j&                  dk(  s#|	j@                  d   }t        |	jX                        }|D ]D  }|j$                  dk(  s|j&                  dk(  s#|jS                  |       j[                  |       F t]        |	jX                        dk(  sj[                  |	        t]        j                         }t_        |      fd}ta        j                         D ]'  \  }}	|	j$                  dk(  r(|	j&                  dk(  r||	_1        je                  |       >|	j$                  dk(  rA|	j&                  dk(  r2 ||	j@                  d         J  ||	j@                  d         |	_3        |	jh                  D cg c].  }t	        |t        jj                        s ||      ' ||      0 }}t]        |      dk(  rt?        d |D              rJ tm        |      }|d   |	_7        |dd D ]  }jq                  |d   |        * ts        fd      }j                   D ]  }	tu        |	d      r7|jw                  |	jn                           j                   jy                  |	       tu        |	d      r7|jw                  |	jb                           jz                  jy                  |	       tu        |	d      s|jw                  |	jf                           j|                  jy                  |	        |j                         D ]q  } ||      r|jz                  |j|                  z   D ]3  }	|	j@                  d   }|	jS                  |       j[                  |	       5 t        |j                   ||       s d}j                   D ]&  }	|	j&                  dk(  s|	j&                  dk(  s"|dz  }( t        j                  t              j                  d|        j                          t        j                  |       }|S # 1 sw Y   xY w# 1 sw Y   VxY wc c}w ) a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuserc   mkldnn_layout_optimizeNr   rc   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                       e Zd ZdZdZdZy)*optimize_for_inference.<locals>.MklSupportr   r      N)r[   r\   r]   NOYESUNKNOWNri   r   r   
MklSupportr     s    r   r   r!   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   :   K   | ]  }|j                   d k(    yw)r   N)r   ).0args     r   	<genexpr>z)optimize_for_inference.<locals>.<genexpr>3  s     Iy3::3ys   c                 *    j                  d| f      S )Nr   )call_method)r   r   s    r   rl   z(optimize_for_inference.<locals>.<lambda>6  s    h>R>RS^ab`e>fr   r   r   r   r   c                     t        | d      rj                  | j                        S t        | d      rj                  | j                        S y )Ncolorstart_color)hasattrr   r   r   )r   ufs    r   	get_colorz)optimize_for_inference.<locals>.get_colorS  s@    1g77177##1m$771==))r   c              3   $   K   | ]  }|d u  
 y wr1   ri   )r   r   s     r   r   z)optimize_for_inference.<locals>.<genexpr>r  s     9j19js   r   c                      t               S r1   )r   )r   s   r   rl   z(optimize_for_inference.<locals>.<lambda>y  s
    H@Ur   r   r   	end_colorzmkldnn conversions: )Fr   updaterP   rc   r%   rA   RuntimeErrortracer>   r?   r&   rH   rootrB   r   listr+   r   r(   r   r*   mkldnn_supportedr   next
parametersdtyper`   r|   devicemkldnn_supported_unknownr   anyr#   inserting_beforemap_argr   r	   r   r   inserting_aftercreate_noderF   r   r~   rD   rG   r"   r   	enumerater   r   r   all_input_nodesr'   sortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerr[   inforq   ) r5   r   r   default_pass_configr   
cur_tracerrK   r   r   r   supports_mkldnnr   sample_parametermkldnn_argsdense_xr~   prv_noderD   user	num_nodesr   cur_idxr   
cur_colorsother_colormkldnn_graphsrC   prvmkldnn_conversionsresultr   r   s                                  @@r   optimize_for_inferencer     sV   & #."?
 {+>*U+,u%34=)*BCTJHII-.FGGQRR+,DEkRJe 45H~~joox8H$()<)<)>$?GT  X^^$$--77m# -JJ#33",..#'
(=(=(?#F #/+11U[[@nCnn@+22ell56IIoLooIWW'{{..",.. 88","4"4jmm+*"4"44ItyyII**40 jj4fg 1 U277#3#34kBDI))$/"..}j4'R**73 $w 0/3 %> $D$8'BK&H 77m#z(Ayy|H$E77m+{0J..x8''-  4::!###D)  HNN#I	9	B& #8>>277m#{(B&DKK WW%$++*CTYYq\*676&tyy|4DN040D0D{0D1
STVXV]V]H^bklmbnbz)A,0DJ{:!#9j99:9
+J#ADJ)!"~
1{3  . 3& -88U,VM4!"''$**-.44;;DA4'"''$"2"234@@GGM4%"''$..12<<CCDI  %%' '))EOO;iil**3/##D) < %++w< ( ;;+%
)B!#  h$$';<N;O%PQMMO^^E8,FMA 10
 0/h |s*   $]	.]]&&]&/
]&]	]#	)F)
   r   )Itorch.fxr&   torch.fx.noder   r   torch.nn.utils.fusionr   typingr   r   r   r	   r
   r   r   r   r`   torch.nnr7   torch.nn.functional
functionalFtorch.fx.passes.shape_propr   r>   collectionsr   torch.utils.mkldnnutilsmkldnnrv   operatorr   r   enumr   r)   r   r'   r.   r}   r4   rP   rc   rt   r:   Linearr;   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr{   r   r   r   r   r   r   r   Tracerr  ri   r   r   <module>r!     s    * 3 I I I     0  # & &    -# -%S/ -HTN "'' DQTVYQYN $4bgg 4S#X 4EHHOO 4/ /588?? /:0")) 0		 02")) 2DM 24PRPWPW= 2cghjhohocp 2" IIryy".."''2<<rOcOc	JJFFALL!//  %LL(,,7 IIy%%IIy%%NN=
T"''] T#ryy.5I $
Lbgg 
Lc299n1E 
LTXY[YbYbdfdmdmYmTn 
L+ +$L +  $  % %8 -1 iii88??i$sCx.)i Oi XX__	ir   