
    Phc                     	   d dl Z d dlZd dlZd dlZd dlmZmZ d dlZd dlm	Z	 d dl
mZ ddlmZmZ ddlmZmZmZmZmZmZ ddlmZ d	d
lmZ d	dlmZ ej:                  j<                  Zej:                  j>                  Zej:                  j@                  Z ej:                  jB                  Z!	  e       dfdZ"	  eejF                  jH                   eejJ                  jH                   eejL                  jN                   ed       ed             ed             ed            Z( ee jR                  jN                   ed       ed       ed       ed       ed       ed       ed            Z* e"e* ed            Z+ eejX                  jN                  e* ed            Z- eejX                  jN                  e+ ed            Z. eej:                  j^                  j`                  jN                   ed       ed       ed       ed       ed       ed       ed       ed       ed       ed        ed!       ed"       ed#       ed$       ed%       e        e             Z1 eej:                  j^                  jd                  jN                   ed       ed       ed       ed       ed       ed       ed       ed&       ed#       ed$       ed'       ed(       ed)            Z3 eejF                  jH                   eejJ                  jH                   eejL                  jN                   ed*       ed+             ed,             ed-            Z4	 dNd.Z5d/ Z6ejn                  fd0Z8d1 Z9d2 Z:ejn                  fd3Z;d4 Z<ejn                  fd5Z=d6 Z>d7 Z?d8 Z@d9 ZAd: ZBd; ZCd< ZDd= ZEd> ZF eejF                  jH                   eejJ                  jH                   eejL                  jN                   e        e              e              e             ZGd? ZHd@ ZIdA ZJdB ZKejn                  fdCZLejn                  fdDZMdE ZNejn                  fdFZOejn                  fdGZPejn                  fdHZQdI ZRejn                  fdJZSejn                  fdKZTejn                  fdLZU ej                  d      dM        ZWy)O    N)AnyTuple)counters)has_free_symbols   )	loweringsrequire_channels_last)ArgCallFunctionfilter_nodes
KeywordArgListOfMatch)pad_listlike   )register_freezing_graph_pattern)register_lowering_patternTc                 T    |r%t        t        j                  j                  | |      S | S N)r   primsconvert_element_typedefault)patterndtypedtype_converts      qC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/_inductor/fx_passes/quantization.py(_may_generate_pattern_with_dtype_convertr   &   s.    &&..
 	
     x
x_dq_dtypex_zpx_scaleq_weightw_scalew_zpw_axisw_quant_minw_quant_maxw_dtypeautocast_wgt_dtypememory_format)r+   packed_weightbstridepaddingdilationgroupsinv_output_scaleoutput_zero_pointoutput_dtypeattroutput_scalepostop_namepostop_argspostop_algorithmaccumaccum_dq_dtypeaccum_zpaccum_scalec                 H    t        | ||      }t        |t        d      |      S )Nconvert_dtype_after_inplace_add)r   r   r   )binary_post_opcomputation_callextra_input_pattern int8_mixed_bf16_with_inplace_addbinary_patterns        r   generate_pattern_with_binaryrE      s5     "N
 445( r   c           
          |o|t         j                  j                  k(  rFt        t         j                  t        t         j
                  | t        d            t        d            S t        ||       S | S )N	min_value	max_value)atenhardtanhr   r   	clamp_max	clamp_minr   )rA   unary_post_ops     r   generate_pattern_with_unaryrN      si     DMM111T^^-=z+?VW;'      r   c                    |t         j                  t         j                  fv sJ t        t        j
                  j                  t        t        j                  j                  t        t        j                  j                  t        t        j                  j                  t        t        j                  j                  t        t        j                  j                  t        | t        d      |t         j                  k7        t        d                  t        d            t        d            t        d            t        d            }|S )z
    quantize output:
        output = round(output * o_inv_scale)
        output = output + zero_point
        output = clamp_min(output, 0)
        output = clamp_max(output, 127)
        output = output.to(uint8)
    autocast_output_quant_dtypeo_inv_scaleo_zpo_qmino_qmaxo_dtype)torchfloat32bfloat16r   r   r   r   rI   rK   rL   addTensorroundmulr   r   )rA   r    quantized_op_output_pattern_pt2es      r   "generate_pattern_with_output_quantr^      s     U]]ENN3333'3""**NN""&&HHOO 

**$ HHOOD 0 *+H I % 6
 '}5 v&  8$%( x -	
0 	95($8 ,+r   c                     || j                   v r| j                   |   }||k(  S t        | j                        |dz   k\  sJ | j                  |   }||k(  S )Nr   )kwargslenargs)
check_node
kwarg_name
args_indexexpected_valueactual_values        r   _check_node_kwarg_arg_valuerh      s_    Z&&&!((4~--:??#
Q777!z2~--r   c                       fd}|S )Nc                     It        | j                  t        j                  j                  j
                        d   }t        |dd      S y)Nr   r4      T)r   nodesrV   opsonednnqconv2d_pointwiserh   )matchqconv_node_after_weight_prepackr4   s     r   fnz;_is_valid_quantized_conv2d_optimization_pattern.<locals>.fn   sR    #.:UYY--??//+ //\  r    r4   rr   s   ` r   /_is_valid_quantized_conv2d_optimization_patternru          	 Ir   c                 `    t        | t              |      dt        ffd       }|S )Nextra_checkpass_numberrp   c                 n   |d   |d   |d   }}}|d   |d   |d   }}}|d   |d   |d	   |d
   |d   f\  }	}
}}}d t         j                  t         j                  fv sJ |d   nd}|d   nd}|d   u sJ |d   dk(  sJ j                  dk(  r+|j	                  d      }|j	                  d      }||g_        |||||||	|
|||||j                  j
                  j                  f}t        d   dxx   dz  cc<   t        d   dxx   t        | j                        z  cc<   t           | S )Nr   r"   r!   r,   r$   r%   r-   r.   r/   r0   r1   rQ         ?rR   r   r4   r5   nonerJ   rG   rH   inductorqconv2d_unary_matcher_countr   qconv2d_unary_matcher_nodes)rV   rW   rX   op_namegetscalars_attralgorithm_attrr   ra   rl   L)rp   rb   r`   r   r"   r!   r,   r$   r%   r-   r.   r/   r0   r1   rQ   o_zero_pointrG   rH   computation_argscomputation_oporiginal_pattern_output_dtyper4   
unary_attrs                      r   qconvz0_register_quantized_conv_lowering.<locals>.qconv  s    3K96N 7 ?#96N !%w 3K89:80
,67Hf emmU^^DDDD/;/Cf]+)5)=vf~1>"&CC	
C 6Nf$	
$+

;/I

;/I'0)&<J# ##%%#
& 	:;q@;:;s5;;?OO; "233r   )r   ru   r   )r   rz   r   r4   r   r   r   s     ```` r   !_register_quantized_conv_loweringr      s9     CLQ
94U 94
94v Lr   c                       fd}|S )Nc                     It        | j                  t        j                  j                  j
                        d   }t        |dd      S y)Nr   r4   	   T)r   rl   rV   rm   rn   qlinear_pointwiserh   )rp   !qlinear_node_after_weight_prepackr4   s     r   rr   z;_is_valid_quantized_linear_optimization_pattern.<locals>.fnH  sR    #0<UYY--??11- /1>1l  r   rs   rt   s   ` r   /_is_valid_quantized_linear_optimization_patternr   G  rv   r   c                 `    t        | t              |      dt        ffd       }|S )Nrx   rp   c                    |d   |d   |d   }}}|d   |d   |d   }}}d|v r|d   nd }	|d   nd	}
|d
   nd}|d   u sJ |d   dk(  sJ |||||||	|
|j                   j                  j                  f}t        d   dxx   dz  cc<   t        d   dxx   t	        | j
                        z  cc<   t           | S )Nr   r"   r!   r,   r$   r%   r-   rQ   r|   rR   r   r4   r7   r}   r~   qlinear_unary_matcher_countr   qlinear_unary_matcher_nodes)r   r   r   r   ra   rl   r   )rp   rb   r`   r   r"   r!   r,   r$   r%   r-   rQ   r   r   r   r   r4   r   s                r   qlinearz4_register_quantized_linear_lowering.<locals>.qlinear^  s4    3K96N 7 ?#96N !%w &=F3Kd 0</Cf]+)5)=vf~1>"&CC	
C =!V+	
+ ##%%
 	:;q@;:;s5;;?OO; "233r   )r   r   r   )r   rz   r   r4   r   r   r   s     ```` r   #_register_quantized_linear_loweringr   V  s9     CLQ
,4u ,4
,4\ Nr   c                       fd}|S )Nc                    t        | j                  t        j                  j                  j
                        d   }t        |j                        dk7  ryt        |j                        d   j                  }t        |      dk(  sJ d       d }|D ]  }||k7  s	|} n |J t        |t        j                  j                        r'|j                  t        j                  j                   k7  ryy)Nr   r   Fr   z!Expects binary node with 2 inputsT)r   rl   rV   rm   rn   ro   ra   userslistrb   
isinstancefxNodetargetrI   r\   rZ   )rp   !qconv2d_node_after_weight_prepackbinary_node_inputsextra_input_nodeargr4   s        r   rr   z@_is_valid_quantized_conv_binary_optimization_pattern.<locals>.fn  s    ,8KK));;-

-) 06671<#!%&G&M&M!Nq!Q!V!V)*a/T1TT/#);;'*$ * $////? ''488??:r   rs   rt   s   ` r   4_is_valid_quantized_conv_binary_optimization_patternr     s    * Ir   c                 \    t        | t              |      dt        ffd       }|S )Nrx   rp   c                    |d   |d   |d   }}}|d   n|d   }|d   nd}|d   nd	}|d
   |d   |d   }}
}	|d   |d   |d   |d   |d   f\  }}}}}|d   nd}|d   nd	}|||||||	|
||||||||j                   j                  j                  j                  j                  f}t
        d   dxx   dz  cc<   t
        d   dxx   t        | j                        z  cc<   t           | S )Nr   r"   r!   r:   accum_after_dequantr=   r|   r<   r   r,   r$   r%   r-   r.   r/   r0   r1   rQ   rR   r~   qconv2d_binary_matcher_countr   qconv2d_binary_matcher_nodes)	binary_op_namealphaunary_op_namer   r   r   ra   rl   r   )rp   rb   r`   r   r"   r!   r:   r=   r<   r,   r$   r%   r-   r.   r/   r0   r1   rQ   r   r   binary_unary_attrr   r4   s                       r   qconv_binaryz>_register_quantized_conv_binary_lowering.<locals>.qconv_binary  s    "#;y(96&>D7+3F7O@U9V 	 0</Cf]+)5)=6*%1?#96N !%w 3K89:80
,67Hf 0</Cf]+)5)=vf~1 ,,##++**,,-
0 	;<A<;<EKK@PP< "233r   )r   r   r   )r   rz   r   r4   r   r   s     ``` r   (_register_quantized_conv_binary_loweringr     s:     HV
14E 14
14f r   c                      G d d      } t         j                  t         j                  fD ]  } | dg d      t        t        |       | dg d      t        t        t        t        j                  j                        |       | dg d      t        t        t        t        j                  j                        |      i}|j                         D ]8  \  }}t        |dt         j                  j                  j                  d ||	       :  | dg d      t        t        t        j                  j                         | dg d      t        t        t        j                  j                        i}|j                         D ]8  \  }}t        |d
t         j                  j                  j                  |||	       :  | dg d      t        t        |       | dg d      t        t        t        t        j                  j                        |      i}|j                         D ]8  \  }}t!        |dt         j                  j                  j"                  d ||	       :  | dg d      t        t        t        j                  j                        i}|j                         D ]8  \  }}t!        |d
t         j                  j                  j"                  |||	       :  y )Nc                       e Zd ZddefdZy)6_register_quantization_unary_fusion.<locals>.UnaryAttrNr   c                 J    || _         |r|ng | _        |r|| _        y d| _        y )N )r   r   r   )selfr   r   r   s       r   __init__z?_register_quantization_unary_fusion.<locals>.UnaryAttr.__init__  s&    "DL0<"D4B.DDr   )NN__name__
__module____qualname__strr   rs   r   r   	UnaryAttrr     s    	KC 	Kr   r   r}   r   r   relurJ   r   )r   r   )rV   rW   rX   r^   dequantize_qconv_pt2e_patternrN   rI   r   r   rJ   itemsr   rm   rn   ro   qlinear_pt2e_patternr   r   )r   r   conv_unary_replace_patternsr   patterns%conv_unary_replace_float_out_patternslinear_unary_replace_patterns'linear_unary_replace_float_out_patternss           r   #_register_quantization_unary_fusionr     s   K K +0--)H% fb"%'I-3( fb"%'I+14993D3D 4	( j"b)+M+14==3H3H 4	,'
#& %@$E$E$G J-		  22.K %H fb"%'B-tyy/@/@( j"b)+F-t}}/D/D,	1
- %J$O$O$Q J-		  22-.K %R fb"%'I$3( fb"%'I+,@$))BSBST3(	)
% %B$G$G$I J/		  22.K %J fb"%'B$dii&7&7(3
/ %L$Q$Q$S J/		  22-.K %Tm *Ir   c                  ,    G d d      } dD ]  } | dddg d      t        t        t        j                  j                  t
        t        |      |rt        j                  nt        j                         | ddd	g d      t        t        t        t        j                  j                  t
        t        |      t        j                  j                        |rt        j                  nt        j                        i}|j                         D ]@  \  }}t        |d
t        j                  j                   j"                  j$                  d |       B  | ddd	g d      t        t        t        j                  j                  t
        t'        d      |      t        j                  j                        i}|j                         D ]  \  }}|rJt        |d
t        j                  j                   j"                  j$                  t        j                  |       Rt        |dt        j                  j                   j"                  j$                  t        j                  |         | dddg d      t        t        j                  j                  t
        t'        d      |      i}|j                         D ]d  \  }}t        ||rdndt        j                  j                   j"                  j$                  |rt        j                  nt        j                  |       f  y )Nc                   &    e Zd Z	 	 	 	 ddedefdZy)=_register_quantization_binary_fusion.<locals>.BinaryUnaryAttrNr   r   c                 n    || _         |r|nd| _        || _        |r|ng | _        |r|| _        y d| _        y )Nr|   r   )r   r   r   r   r   )r   r   r   r   r   r   s         r   r   zF_register_quantization_binary_fusion.<locals>.BinaryUnaryAttr.__init__\  s<     #1D"'SDJ!.D0<"D4B.DDr   )Nr}   NNr   rs   r   r   BinaryUnaryAttrr   [  s*     !'	K	K 		Kr   r   )FTrY   r|   r}   r   r   r   r   r   r   r   )r^   rE   rI   rY   rZ   r   dequantize_accum_patternrV   rX   rW   rN   r   r   r   r   rm   rn   ro   binaryr   )r   rC   binary_replace_patternsr   r   !binary_replace_float_out_patternss         r   $_register_quantization_binary_fusionr   Z  s   K K -:( sFB1,HHOO1,4	 4 nn]]
 sFB1+0508	 II%% 4 nn]]!#
@ ,C+H+H+J'x4		  2299! ,K E3B79T,HHOO1454	 		!!:
-
) /446
/8II$$66==
 NN%
 9II$$66==MM% 70 E3B79U-010	:-
) /446
451		  2299"B! 7} -:r   c                      d } | S )Nc                 t    t        | j                  t        j                        d   }|j                  d   dk(  S )Nr   r   )r   rl   operatorgetitemrb   )rp   get_item_nodes     r   rr   z>_is_valid_quantized_maxpool2d_optimization_pattern.<locals>.fn  s5     %U[[(2B2BCAF!!!$))r   rs   )rr   s    r   2_is_valid_quantized_maxpool2d_optimization_patternr     s    * Ir   c                 P    t        | t                     dt        ffd       }|S )Nry   rp   c                    |d   }|d   }d|v r|d   nd }d|v r|d   nd}d|v r|d   nd}d|v r|d   nd	}|dk(  rddg}|dk(  rddg}|s|}t        |d
      }t        |d
      }t        |d
      }t        |d
      }t        |      d
k(  sJ t        |      d
k(  sJ t        |      d
k(  sJ t        |      d
k(  sJ ||||||f}	t        g|	 \  }	}
t           |	 S )Nr   kernel_sizer.   r/   r   r0   r   	ceil_modeFr   )r   ra   r	   r   )rp   rb   r`   r   r   r.   r/   r0   r   r   _r   s              r   
qmaxpool2dz:_register_quantized_maxpool2d_lowering.<locals>.qmaxpool2d  sJ   
 3K]+&.&&8!t(1V(;&#!*4*>6*%Q,76,AF;'	a<!fGq=1vH F";2fa(w*!,;1$$$6{a7|q   8}!!! 
 4NVEUV! "233r   )r   r   r   )r   r   r   s    ` r   &_register_quantized_maxpool2d_loweringr     s5     FH!4% !4	!4F r   c            	         t        d      gt        d      t        d      gt        d      t        d      t        d      gt        d      t        d      t        d      t        d      gg} | D ]  }t        t        j                  j                  t
        t        d      g| }t        t        j                  |t                     }t        t        |      t        j                  j                          y )Nr.   r/   r0   r   r   )r   r   rI   max_pool2d_with_indicesr   (dequantize_per_tensor_activation_patternr   r   r
   r   r^   	quantized
max_pool2d)max_pool2d_args_listmax_pool2d_argsdequantize_maxpool2d_pattern%dequantize_maxpool2d_get_item_patterns       r    _register_quantization_maxpool2dr     s     x 	
 x y!	

 x y!z"	
 x y!z"{#		
* 0'3((004}%(
 	(
$ 1=(E1
-
 	/./TU  ((	
 0r   c                       fd}|S )Nc                    t        | j                  t        j                  j                        }|D cg c]  }|j
                  d    c}t        | j                  t        j                  j                        }t        |      dk(  sJ d       j                  |d   j
                  d          t        fdD              syt        | j                  t        j                  j                        }|D cg c]>  }|j
                  d   j                  u r|j
                  d   nd|j
                  d   z  @ c}t        fdD              syyc c}w c c}w )	Nr   z.expect only 1 add node at output quant patternr   c              3   .   K   | ]  }|d    k(    yw)r   Nrs   ).0
zero_pointzero_pointss     r   	<genexpr>z=_is_input_output_same_scale_zp.<locals>.fn.<locals>.<genexpr>A  s     N+J:Q/+s   Fr|   c              3   T   K   | ]  }t        j                  |d    d       ! yw)r   gh㈵>)rel_tolN)mathisclose)r   scalescaless     r   r   z=_is_input_output_same_scale_zp.<locals>.fn.<locals>.<genexpr>P  s&     TVE4<<vay$??Vs   %(T)r   rl   rI   subrZ   rb   rY   ra   appendallr\   r   )	rp   	sub_nodesnode	add_nodes	mul_nodesmul_noder   r   rc   s	         @@r   rr   z*_is_input_output_same_scale_zp.<locals>.fn9  s,    !dhhoo>	09:	tyy|	: dhhoo>	9~"T$TT"9Q<,,Q/0N+NN !dhhoo>	 &
 & ==#**j8 a 8==++, &
 TVTT- ;
s   E:AErs   )rc   rr   s   ` r   _is_input_output_same_scale_zpr   8  s    8 Ir   c                     t        | t        t        j                  j                              dt
        ffd       }|S )Nr   rp   c                 P    |D cg c]  }|d   	 }}t           ||      S c c}w )Nr   )r   )rp   inputsdimr`   inputuint8_inputsr   s         r   qcatz._register_quantized_cat_lowering.<locals>.qcat\  s4     /55fUaf5 s33 6s   #)r   r   rI   catr   r   )r   r   r  s    ` r    _register_quantized_cat_loweringr	  X  s?     24883C3CD4E 4	4
 Kr   c                      t        t        j                  j                  t	        t
              t        d            } t        t        |       t        j                         y )Nr  )	r   rI   r  r   r   -_raw_dequantize_per_tensor_activation_patternr   r	  r^   )dequantize_cat_patterns    r   _register_quantization_catr  w  sE    )<=5
 %*+ABr   c                     t        | t        t        j                  j                              dt
        ffd       }|S )Nr   rp   c                     |d   }|d   }t         d   dxx   dz  cc<   t         d   dxx   t        | j                        z  cc<   t           ||      S )Nr   shaper~   qreshape_matcher_countr   qreshape_matcher_nodes)r   ra   rl   r   )rp   rb   r`   qxr  r   s        r   qreshapez6_register_quantized_reshape_lowering.<locals>.qreshape  s^    
 C[w56!;656#ekk:JJ6 U++r   )r   r   rI   reshaper   r   )r   r   r  s    ` r   $_register_quantized_reshape_loweringr    s?     24<<3G3GH, ,	, Or   c                      t        t        j                  j                  j                  j
                  t        t        d            } t        t        |       t        j                         y )Nr  )
r   rV   rm   rI   r  r   r   r   r  r^   )dequantize_reshape_patterns    r   _register_quantization_reshaper    sI    !-		&&07"
 )*+EFr   c                  h    t                t                t                t                t	                y r   )r   r   r   r  r  rs   r   r    _register_quantization_loweringsr    s     ')(*$& "$r   c                       fd}|S )Nc                    t         j                  t         j                  fv sJ t         j                  k(  r| j                         }n| j                         }|j                  d   }|j                  d   }|j                  d   }t         j                  k(  r|j
                  t        j                  j                  u r|j
                  t        j                  j                  u rj|j
                  t        j                  j                  u rDt        t        |j                              dkD  r#yt        t        j                              dkD  ryy)Nr   r   TF)rV   rW   rX   output_noderb   r   rI   r\   rZ   r   r   r   r   ra   r   r   )rp   r   convert_to_bf16_nodesub_nodeto_fp32_noder   s        r   _innerz3_is_valid_dequant_promotion_pattern.<locals>._inner  s   7777EMM!((*H#(#4#4#6 +003H==#}}Q' %	 OOtxx.488??2##u'A'A'I'IID()A-
  T.4456: r   rs   r   r"  s   ` r   #_is_valid_dequant_promotion_patternr$    s    * Mr   c                 R    t        | t              |      dt        ffd       }y )Nrx   rp   c                    t         j                  t         j                  fv sJ d }t         j                  k(  r| j                         }n| j                         }|j                  d   }|j                  d   }|j                  d   }|j
                  t        j                  j                  u sJ |j
                  t        j                  j                  u sJ |j
                  t        j                  j                  u sJ | j                  }t         j                  k(  rt        |j                        nt        j                        }	|	D ]H  }
t         j                  k(  r ||||
      }n |||
      } ||||      } ||||      } ||||      }J t         d   dxx   dz  cc<   t         d   dxx   t#        | j$                        z  cc<   y )Nc                 P   |j                   dk(  sJ d       | j                  |      5  | j                  |j                  |j                  |j
                        }t        j                  |j                        |_        |j                  ||       d d d        |S # 1 sw Y   S xY w)Ncall_functionz4clone_to_new_node only support node.op call_function)rb   r`   )	opinserting_beforer(  r   rb   r`   copymetareplace_input_with)graphsource_node	user_nodenew_nodes       r   clone_to_new_nodezV_register_dequant_promotion_pass.<locals>.dequant_promotion.<locals>.clone_to_new_node  s    /1FEF1''	2 ..&&$))&-- / 
 !%		+*:*: ;,,[(C 3 O 3 Os   A)BB%r   r~   dequant_promotion_matcher_countr   dequant_promotion_matcher_nodes)rV   rW   rX   r  rb   r   rI   r\   rZ   r   r   r   r   r.  r   r   r   ra   rl   )rp   rb   r`   r2  r   r  r   r!  r.  user_node_listr0  new_mul_nodenew_convert_to_bf16_node_nodenew_sub_noder   r   s                  r   dequant_promotionz;_register_dequant_promotion_pass.<locals>.dequant_promotion  s    7777	 EMM!((*H#(#4#4#6 +003H==#}}Q'$((//111$((//111""e&@&@&H&HHHH %  *001 	
 (I%0)L0A/1-  18%B  -UHlKL!%|DA ( 	>?1D?>?3u{{CSS?r   )r   r$  r   )r   rz   r   r9  s     ` r    _register_dequant_promotion_passr:    s2    $7>
4T 4T
4Tr   c                       fd}|S )Nc                 F   | j                         }|j                  t        j                  j                  u sJ |j
                  d   j                  j                  d      }|j
                  d   j                  j                  d      }||fD ]2  }|-|j                  j                  dk7  s|j                         dk7  s2 y 	t        j                  t        j                  fv sJ 	t        j                  k(  r|j
                  d   }n|j
                  d   }|j
                  d   }|j
                  d   }|j
                  d   }|j                  t        j                  j                  u sJ |j                  t        j                   j"                  u sJ |j                  t        j$                  j"                  u sJ t'        t)        |j*                              dk7  sBt'        t)        |j*                              dk7  s!t'        t)        |j*                              dk7  ryy)Nr   valr   cpu   FT)r  r   rI   convolutionr   rb   r,  r   devicetyper  rV   rW   rX   r   r   r   rZ   r\   ra   r   r   )
rp   	conv_nodeinput_meta_valueweight_meta_value
meta_valuer   convert_to_bf16r   r!  r   s
            r   r"  z0_is_valid_dequant_conv2d_pattern.<locals>._inner   s    %%'	4#3#3#;#;;;;$>>!,1155e<%NN1-2266u=+->?J"$$))U2>>#q(  @ 7777EMM! ~~a(H'nnQ/O&++A.H==#}}Q'""e&@&@&H&HHHH$((//111$((//111\''()Q.4'(A-4'(A- r   rs   r#  s   ` r    _is_valid_dequant_conv2d_patternrH    s    'R Mr   c                 R    t        | t              |      dt        ffd       }y )Nrx   rp   c                    t         j                  t         j                  fv sJ | j                         }|j                  t
        j                  j                  u sJ t         j                  k(  r|j                  d   }n|j                  d   }|j                  d   }|j                  d   }|j                  d   }|j                  d   j                  t
        j                  j                  u }|r|j                  d   nd}	t         j                  k(  r!|r|	j                  d   n|j                  d   }
n/|r|	j                  d   n|j                  d   }|j                  d   }
|
j                  t        j                  j                  u sJ |d   |d   |d   }}}|d   |d   |d	   }}}|d
   |d   |d   |d   |d   f\  }}}}}|j                  j                  d      j                  }t        |      rd}| j                   }|j#                  |      5  |||||||||f	}t         j$                  j&                  j(                  }|j+                  ||      }|||||||||||dddg df}|j+                  t         j$                  j&                  j,                  j                  |      }|j/                  |       |j                  j1                  |j                         |j3                  |       t         j                  k(  r|j3                         |j3                  |       |j3                  |       |j3                  |       |	|j3                  |	       t         j                  k(  r|j3                         |j3                  |
       t4        d   dxx   dz  cc<   t4        d   dxx   t7        | j8                        z  cc<   ddd       y# 1 sw Y   yxY w)ao  
        Match the pattern:
        int8 activation
          |
        dequant_per_tensor
          |
        Conv2d <- optional(aten.clone.default) <- dequant_per_channel <- int8_weight

        Insert weight prepack node and change the pattern to:
        int8 activation
          |
        onednn.qconv2d_pointwise <- onednn.qconv_prepack <- int8_weight
        r   r   Nr   r!   r"   r#   r$   r%   r-   r.   r/   r0   r1   tensor_metarb   r|   r}   r   r~   $qconv2d_weight_prepack_matcher_count$qconv2d_weight_prepack_matcher_nodes)rV   rW   rX   r  r   rI   r@  r   rb   clonequantized_decomposeddequantize_per_channelr,  r   r  r   r.  r*  rm   rn   qconv_prepackr(  ro   replace_all_uses_withupdate
erase_noder   ra   rl   )rp   rb   r`   rC  r   rG  r   r!  )has_clone_to_channel_last_node_in_pattern
clone_nodedequant_per_channelweight_to_bf16_noder  r!   r"   qwr$   r%   biasr.   r/   r0   r1   x_shaper.  packed_weight_inputspacked_weight_opprepack_weight_nodenew_argsnew_conv_noder   s                                 r   qconv_weight_prepackzA_register_qconv_weight_prepack_pass.<locals>.qconv_weight_prepack-  s   & 7777%%'	4#3#3#;#;;;;EMM! ~~a(H'nnQ/O&++A.H==#}}Q'NN1$$

(:(:: 	2 "KINN1PT 	 EMM! = "^^A&   = "^^A&  
 #6":":1"=  &&#::BBC	
C 3K6N9 D :96N G 3K89:83
/fgx ''++m,22G$G##I. 
$   %yy//=="'"5"5 '; #6 #
 ##)H& "//		  22:: 0 M ++M:%%inn5 Y'&  1X&X&\*%  ,&  !4501Z !GHAMHZ !GHCM Hw /..s   F6OO)r   rH  r   )r   rz   r   rb  s     ` r   #_register_qconv_weight_prepack_passrc  ,  s2    $4U;
KE K
Kr   c                 v   |t         j                  t         j                  fv sJ t        t        j
                  j                  t        t        t        d      |t         j                  k7        | t        d      t        d      t        d      t        d      t        d      t        d      t        d      
      }|S )	Nautocast_act_dtyper-   r.   r/   r0   is_transposedout_paddingr1   )
rV   rW   rX   r   rI   r@  r   r   r   r   )_dequant_per_channel_patternr    dequant_convolution_node_patterns      r   *_generate_dequant_convolution_node_patternrj    s     U]]ENN3333'3  04+,U]]"	

 	%389:?#=!8($  ,+r   c                     | t         j                  t         j                  fv sJ t        | t         j                  k(  rt        nt
        |       t        | t         j                  k(  rt        |       fS t        |       fS r   )rV   rW   rX   rj  %dequantize_per_channel_weight_pattern-dequantize_per_channel_to_bf16_weight_pattern+dequantize_per_channel_clone_weight_pattern3dequantize_per_channel_to_bf16_clone_weight_patternr   s    r   '_generate_qconv_weight_prepack_patternsrp    s~    U]]ENN33332% 2>		
 	3% 8 		
  E		
 r   c                       fd}|S )Nc                    | j                         }|j                  t        j                  j                  t        j
                  j                  fv sJ |j                  t        j
                  j                  u rdnd}t        j                  t        j                  fv sJ t        j                  k(  r|j                  |   }n|j                  |   }|j                  d   }|j                  d   }|j                  d   }|j                  t        j                  j                  u sJ |j                  t        j                  j                  u sJ |j                  t        j                  j                  u sJ t        t!        |j"                              dk7  sBt        t!        |j"                              dk7  s!t        t!        |j"                              dk7  ryy)Nr   r   FT)r  r   rI   addmmr   mmrV   rW   rX   rb   r   r   r   rZ   r\   ra   r   r   )rp   linear_nodeinput_indexr   rG  r   r!  r   s          r   r"  z0_is_valid_dequant_linear_pattern.<locals>._inner  sk   '')!!djj&8&8$''//%JJJJ&--@aa7777EMM!"''4H)..{;O&++A.H==#}}Q'""e&@&@&H&HHHH$((//111$((//111\''()Q.4'(A-4'(A- r   rs   r#  s   ` r    _is_valid_dequant_linear_patternrw    s    6 Mr   c                 R    t        | t              |      dt        ffd       }y )Nrx   rp   c                 h   t         j                  t         j                  fv sJ | j                         }|j                  t
        j                  j                  t
        j                  j                  fv sJ |j                  t
        j                  j                  u rdnd}|dz   }t         j                  k(  r|j                  |   }n|j                  |   }|j                  d   }|j                  d   }|j                  d   }	|j                  |   }
t         j                  k(  r|
j                  d   }n|
j                  d   }|j                  d   }|j                  t        j                  j                  u sJ |d   |d   |d   }}}|d   |d   |d   }}}d	|v r|d	   nd
}|j                  j                  d      j                  }t        |      rd
}| j                   }|j#                  |      5  ||f}t         j$                  j&                  j(                  }|j+                  ||      }|||||||dddg df}|j+                  t         j$                  j&                  j,                  j                  |      }|j/                  |       |j                  j1                  |j                         |j3                  |       t         j                  k(  r|j3                         |j3                  |       |j3                  |       |j3                  |	       |j3                  |
       t         j                  k(  r|j3                         |j3                  |       t4        d   dxx   dz  cc<   t4        d   dxx   t7        | j8                        z  cc<   d
d
d
       y
# 1 sw Y   y
xY w)aX  
        Match the pattern:
        int8 activation
          |
        dequant_per_tensor
          |
        mm/addmm <- t <- dequant_per_channel <- int8_weight

        Insert weight prepack node and change the pattern to:
        int8 activation
          |
        onednn.qlinear_pointwise <- onednn.qlinear_prepack <- int8_weight
        r   r   r   r!   r"   r#   r$   r%   r-   NrK  rL  r|   r}   r   r~   $qlinear_weight_prepack_matcher_count$qlinear_weight_prepack_matcher_nodes)rV   rW   rX   r  r   rI   rs  r   rt  rb   rP  rQ  r,  r   r  r   r.  r*  rm   rn   qlinear_prepackr(  r   rS  rT  rU  r   ra   rl   )rp   rb   r`   ru  rv  weight_indexr   activation_to_bf16_noder   r!  t_noderX  rY  r  r!   r"   rZ  r$   r%   r[  r\  r.  r]  r^  r_  r`  new_linear_noder   s                              r   qlinear_weight_prepackzE_register_qlinear_weight_prepack_pass.<locals>.qlinear_weight_prepack  s   & 7777'')!!djj&8&8$''//%JJJJ&--@aa"QEMM!"''4H&1&6&6{&C#.33A6H==#}}Q'!!,/EMM!"(++a."(++a."5":":1"=&&#::BBC	
C 3K6N9 D :96N G "Vmvc{''++m,22G$G##K0 $   %yy//??"'"5"5 '; #6 #
 #)H $11		  22:: 2 O --o>  ''(8(89 [)&  !89X&X&\*V$&  !4501Z !GHAMHZ !GHCM H] 100s   6F)N((N1)r   rw  r   )r   rz   r   r  s     ` r   %_register_qlinear_weight_prepack_passr    s2    $4U;
ke k
kr   c           
         t        t        j                  j                  t	        | t        d      |t        j                  k7        t        d            }t        t        j                  j                  t        d      t	        t        t        d      |t        j                  k7        |      }t        t        j                  j                  t	        t        t        d      |t        j                  k7        |      }||fS )Nr*   permute_axesr-   re  )r   rI   permuter   r   r   rV   rW   rs  r   rt  )rh  r   	t_patterndequant_linear_bias_patterndequant_linear_no_bias_patterns        r   %_generate_dequant_linear_node_patternr    s     0(+,U]]"	

 	>"I #/

304+,U]]"	

 		# &204+,U]]"	

 	&" '(FFFr   c                 "    t        t        |       S r   )r  rl  r   s    r   )_generate_qlinear_weight_prepack_patternsr    s    0-u r   c            	      <   t         j                  t         j                  fD ]y  } t        t	        t
        t        d      | t         j                  k7        d|        t        |       }|D ]  }t        |d|         t        |       }|D ]  }t        |d|         { y )Nre  r   )rz   r   r   )rV   rW   rX   r:  r   r   r   rp  rc  r  r  )r   weight_prepack_patternsweight_prepack_patterns      r   '_register_quantization_weight_pack_passr    s    --04 	)48/0&
 	
 #J%"P&="/&AU '> #LE"R&="1&AU '>] 1r   )F)Xr+  	functoolsr   r   typingr   r   rV   torch._dynamo.utilsr   %torch.fx.experimental.symbolic_shapesr   loweringr   r   r	   pattern_matcherr
   r   r   r   r   r   utilsr   freezing_patternsr   	post_gradr   rm   rI   r   rP  r   r   r\   rZ   r   r   r   r   rQ  rl  rm  rO  rn  ro  rn   ro   r   r   r   r   rE   rN   rW   r^   rh   ru   r   r   r   r   r   r   r   r   r   r   r   r	  r  r  r  r  r  r$  r:  rH  rc  rj  rp  rw  r  r  r  	lru_cacher  rs   r   r   <module>r     s         ( B < X X   > 0yy~~		yy55 II	" =@EQU  ,8HHOO&&..sO|$	

 	6 y, ( )5//77zyvx}}y	) % --'( . /;JJ)_-/ + 7CJJ1_-7 3 !-	II&&..sOyvyvsOxyzx!""#~vEE%! * $	II&&..sOyvyvsO~"#~}}!" " (HHOO&&..w'(	

 	: } & &+	$  @E}} &,R.* #(--HV* #(--;|8?DePvr+\*
Z@  1=HHOO&&..EE	

 	 E1 -	$	% /4mm 2 BG :Tz*Z EJMM Qj ).,. 38-- ,> GLmm qj ).GD 5:MM  T3 3r   