
    PhgB                        U d dl Z d dlmZ d dlmZmZmZ d dlmZm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d
dgZe j6                  j8                  Zd Zi Ze
e	e	f   ed<   d Z d+dZ! e!ejD                        ddde#fd       Z$ e!ejJ                        d,de#fd       Z& e!ejN                        d,de#fd       Z( e!ejR                        d,de#fd       Z*	 d+dee#   dee#   dee#   de+de#f
dZ, e!ejZ                  ej\                  g      ddde#fd       Z/d Z0 e!ejb                        de#fd       Z2d Z3 e!ejh                  ejj                  g      ddde#fd       Z6d  Z7 e!ejp                  ejr                  g      ddde#fd!       Z:ejD                  e$ejJ                  e&ejN                  e(ejR                  e*ejZ                  e/ej\                  e/ejb                  e2ejh                  e6ejj                  e6ejp                  e:ejr                  e:iZd" Z;g d#Z<d$ Z=d% Z>d& Z?d' Z@ G d( d
e      ZA G d) d*e      ZBy)-    N)tree_maptree_flattentree_unflatten)ListAnyDictOptionalUnion
NamedTuple)defaultdict)TorchDispatchMode)RemovableHandle)register_decompositionprodwrapsFlopCounterModeregister_flop_formulac                 R    t        | t        j                        r| j                  S | S N)
isinstancetorchTensorshape)is    cC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/utils/flop_counter.py	get_shaper      s    !U\\"wwH    flop_registryc                 4     t               d d fd
       }|S )N)outc                 F    t        t        ||| f      \  }}} |d|i|S )N	out_shape)r   r   )r"   argskwargsr$   fs       r   nfzshape_wrapper.<locals>.nf   s2    "*9tVS6I"Jfi$6)6v66r   r   r'   r(   s   ` r   shape_wrapperr*      s#    
1X 7 7 Ir   c                       fd}|S )Nc                 R    st        |       }  t        t        d      |        | S )NT)registryunsafe)r*   r   r    )flop_formulaget_rawtargetss    r   register_funz+register_flop_formula.<locals>.register_fun!   s*    (6LLwtL\Zr    )r1   r0   r2   s   `` r   r   r       s     r   )r$   returnc                :    | \  }}|\  }}||k(  sJ ||z  dz  |z  S )zCount flops for matmul.   r3   )	a_shapeb_shaper$   r%   r&   mkk2ns	            r   mm_flopr=   )   s3    
 DAqEB7N7q519q=r   c                     t        ||      S )zCount flops for addmm.)r=   
self_shaper7   r8   r$   r&   s        r   
addmm_floprA   4   s     7G$$r   c                 V    | \  }}}|\  }}}	||k(  sJ ||k(  sJ ||z  |	z  dz  |z  }
|
S )z"Count flops for the bmm operation.r6   r3   )r7   r8   r$   r&   br9   r:   b2r;   r<   flops              r   bmm_floprF   9   sK    
 GAq!IBA7N77N7q519q=1DKr   c                     t        ||      S )z&Count flops for the baddbmm operation.rF   r?   s        r   baddbmm_floprI   F   s    
 GW%%r   x_shapew_shaper$   
transposedc                 r    | d   }|r| n|dd }|^}}}|t        |      z  |z  t        |      z  dz  |z  }	|	S )a  Count flops for convolution.

    Note only multiplication is
    counted. Computation for bias are ignored.
    Flops for a transposed convolution are calculated as
    flops = (x_shape[2:] * prod(w_shape) * batch_size).
    Args:
        x_shape (list(int)): The input shape before convolution.
        w_shape (list(int)): The filter shape.
        out_shape (list(int)): The output shape after convolution.
        transposed (bool): is the convolution transposed
    Returns:
        int: the number of flops
    r   r6   Nr   )
rJ   rK   r$   rL   
batch_size
conv_shapec_outc_indimsrE   s
             r   conv_flop_countrS   N   sW    ( J''Y;J E4$ Z((504:=ADHDKr   c                     t        | |||      S )zCount flops for convolution.)rL   )rS   )
rJ   rK   _bias_stride_padding	_dilationrL   r$   r%   r&   s
             r   	conv_floprY   k   s     7GY:NNr   c                 4    | d   | d   gt        | dd        z   S )N   r   r6   )list)r   s    r   transpose_shaper]   p   s$    !HeAh$uQRy/11r   c                     d}|
d   r t        |d         }|t        | |||       z  }|
d   r(t        |d         }|t        t        |      | ||      z  }|S Nr   r[   )r   rS   r]   )grad_out_shaperJ   rK   rU   rV   rW   rX   rL   _output_padding_groupsoutput_maskr$   
flop_countgrad_input_shapegrad_weight_shapes                  r   conv_backward_floprg   s   sq     J1~$Yq\2ong?OU_Q_``
1~%il3oog&>Pacmnn
r   c                     | \  }}}}|\  }}}	}
|\  }}}}||cxk(  r|k(  r"n J ||cxk(  r|k(  rn J ||
k(  r
|	|k(  r||
k(  sJ d}|t        ||z  ||f||z  ||	f      z  }|t        ||z  ||	f||z  |	|f      z  }|S )z^
    Count flops for self-attention.

    NB: We can assume that value_shape == key_shape
    r   rH   )query_shape	key_shapevalue_shaperC   hs_qd_q_b2_h2s_k_d2_b3_h3_s3d_vtotal_flopss                   r   sdpa_flop_countrx      s     !NAq#s"Cc3$Cc3?s?[[qC3[[3#:#*QTX[Q[[[K8QUC-AsC/@AAK8QUC-AsC/@AAKr   c                    t        | ||      S )zCount flops for self-attention.)rx   )ri   rj   rk   r$   r%   r&   s         r   	sdpa_floprz      s     ;	;??r   c                    d}|\  }}}}|\  }	}
}}|\  }}}}| \  }}}}||	cxk(  r|cxk(  r|k(  rn J ||
cxk(  r|cxk(  r|k(  r	n J ||k(  sJ ||k(  r
||k(  r||k(  sJ d}|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|t        ||z  ||f||z  ||f      z  }|S )Nr   rH   )r`   ri   rj   rk   rw   rC   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   _b4_h4_s4_d4s                        r   sdpa_backward_flop_countr      sf   K NAq#s"Cc3$Cc3'Cc3!s!c!KKa3&<#&<&<KKKK#:#*33K 8QUC-AsC/@AAK 8QUC-AsC/@AAK8QUC-AsC/@AAK 8QUC-AsC/@AAK8QUC-AsC/@AAKr   c                    t        | |||      S )z(Count flops for self-attention backward.)r   )r`   ri   rj   rk   r$   r%   r&   s          r   sdpa_backward_flopr      s     $NKKXXr   c                 ,    t        | t              s| fS | S r   )r   tuplexs    r   normalize_tupler      s    atHr   ) KMBTc                     t        dt        t        t              dz
  t        t	        |             dz
  dz              }t        |   S )Nr   r[      )maxminlensuffixesstr)numberindexs     r   get_suffix_strr      s=     3s8}q(3s6{+;a+?A*EFGEE?r   c                 X    t         j                  |      }| d|z  z  d}|t         |   z   S )Ni  z.3f)r   r   )r   suffixr   values       r   convert_num_with_suffixr      s2    NN6"E%c*E8E?""r   c                     |dk(  ry| |z  dS )Nr   0%z.2%r3   )numdenoms     r   convert_to_percent_strr      s    zEk#r   c                 .     t                fd       }|S )Nc                 B    t        |       \  }} | }t        ||      S r   )r   r   )r%   	flat_argsspecr"   r'   s       r   r(   z)_pytreeify_preserve_structure.<locals>.nf   s'    &t,	4mc4((r   r   r)   s   ` r   _pytreeify_preserve_structurer      s     
1X) )
 Ir   c                   $    e Zd ZdZ	 	 	 	 ddeeej                  j                  e	ej                  j                     f      de
dedeeeef      fdZd Zd Zd	 Zd
 Zd Zd Zde
fdZdeeeee
f   f   fdZddZ fdZ fdZddZ xZS )r   a  
    ``FlopCounterMode`` is a context manager that counts the number of flops within its context.

    It does this using a ``TorchDispatchMode``.

    It also supports hierarchical output by passing a module (or list of
    modules) to FlopCounterMode on construction. If you do not need hierarchical
    output, you do not need to use it with a module.

    Example usage

    .. code-block:: python

        mod = ...
        flop_counter = FlopCounterMode(mod)
        with flop_counter:
            mod.sum().backward()

    modsdepthdisplaycustom_mappingc                 b   t        d       | _        || _        dg| _        || _        |i }t        |t        j                  j                        r|g}|| _	        i | _
        i t        |j                         D ci c]   \  }}|t        |dd      r|n
t        |      " c}}| _        y c c}}w )Nc                       t        t              S r   )r   intr3   r   r   <lambda>z*FlopCounterMode.__init__.<locals>.<lambda>  s
    +VYJZr   Global_get_rawF)r   flop_countsr   parentsr   r   r   nnModuler   _module_to_forward_hook_handlesr    itemsgetattrr*   )selfr   r   r   r   r:   vs          r   __init__zFlopCounterMode.__init__  s     7BBZ6[
 z!NdEHHOO,6D	UW,

WeWkWkWmnWmtqRSqwq*e4!-:JJWmn
ns   ;%B+c                    | j                   y | j                   D ]  }t        |      j                  }t        |j	                               j                         D ]y  \  }}|dk(  r|}ndj                  ||g      }|j                  | j                  |            }|j                  | j                  |            }t        ||      | j                  |<   {  y )Nr   .)r   type__name__dictnamed_modulesr   joinregister_forward_pre_hook_enter_moduleregister_forward_hook_exit_module_ForwardHookHandlesr   )r   modprefixnamemoduleforward_pre_hook_handleforward_hook_handles          r   _register_forward_hooksz'FlopCounterMode._register_forward_hooks$  s    9999C#Y''F $S%6%6%8 9 ? ? Af2:!D88VTN3D*0*J*J4K]K]^bKc*d'&,&B&B4CTCTUYCZ&[#?R+-@@44V< !B r   c                     | j                   j                         D ](  }|d   j                          |d   j                          * | j                   j                          y r_   )r   valuesremoveclear)r   forward_hook_handless     r   _deregister_forward_hooksz)FlopCounterMode._deregister_forward_hooks5  sS    $($H$H$O$O$Q  #**, #**, %R 	,,224r   c                       fd}|S )Nc                 H     t        j                              |      }|S r   )r   _create_pre_module)r   inputsr"   r   r   s      r   r'   z(FlopCounterMode._enter_module.<locals>.f<  s%    N/0G0G0MNvVCJr   r3   r   r   r'   s   `` r   r   zFlopCounterMode._enter_module;  s    	 r   c                       fd}|S )Nc                 H     t        j                              |      }|S r   )r   _create_post_module)r   r   outputsr   r   s      r   r'   z'FlopCounterMode._exit_module.<locals>.fC  s&    S3D4L4LT4RST[\GNr   r3   r   s   `` r   r   zFlopCounterMode._exit_moduleB  s    	 r   c                 j      G  fddt         j                  j                        }|j                  S )Nc                   :    e Zd Ze fd       Ze fd       Zy)6FlopCounterMode._create_post_module.<locals>.PushStatec                 ~    j                   d   k(  sJ j                   j                          t        d |      }|S )Nc                 Z    t        | t        j                        r| j                         S | S r   r   r   r   cloner   s    r   r   zPFlopCounterMode._create_post_module.<locals>.PushState.forward.<locals>.<lambda>N       z!U\\7R!'')*YXY*Yr   )r   popr   ctxr%   r   r   s     r   forwardz>FlopCounterMode._create_post_module.<locals>.PushState.forwardJ  s=    ||B'4/0/  " Y[_`r   c                 >    j                   j                         |S r   )r   appendr   	grad_outsr   r   s     r   backwardz?FlopCounterMode._create_post_module.<locals>.PushState.backwardQ  s    ##D)  r   Nr   
__module____qualname__staticmethodr   r   r   r   s   r   	PushStater   I  s)      ! !r   r   r   autogradFunctionapply)r   r   r   s   `` r   r   z#FlopCounterMode._create_post_moduleH  s%    	!// 	! r   c                 j      G  fddt         j                  j                        }|j                  S )Nc                   :    e Zd Ze fd       Ze fd       Zy)4FlopCounterMode._create_pre_module.<locals>.PopStatec                 X    j                   j                         t        d |      }|S )Nc                 Z    t        | t        j                        r| j                         S | S r   r   r   s    r   r   zNFlopCounterMode._create_pre_module.<locals>.PopState.forward.<locals>.<lambda>]  r   r   )r   r   r   r   s     r   r   z<FlopCounterMode._create_pre_module.<locals>.PopState.forwardZ  s(    ##D) Y[_`r   c                 d    j                   d   k(  sJ j                   j                          |S )Nr   )r   r   r   s     r   r   z=FlopCounterMode._create_pre_module.<locals>.PopState.backward`  s0    ||B'4/0/  "  r   Nr   r   s   r   PopStater   Y  s)     
 ! !r   r   r   )r   r   r   s   `` r   r   z"FlopCounterMode._create_pre_moduleX  s%    	!u~~.. 	! ~~r   r4   c                 N    t        | j                  d   j                               S )Nr   )sumr   r   r   s    r   get_total_flopszFlopCounterMode.get_total_flopsh  s!    4##H-44677r   c                 ,    t        | j                        S )a  Return the flop counts as a dictionary of dictionaries.

        The outer
        dictionary is keyed by module name, and the inner dictionary is keyed by
        operation name.

        Returns:
            Dict[str, Dict[Any, int]]: The flop counts as a dictionary.
        )r   r   r  s    r   get_flop_countszFlopCounterMode.get_flop_countsk  s     D$$%%r   c                 .    | j                   }|d}dd l}d|_        g d}g } j                         t	              d fd} j
                  j                         D ]F  }|dk(  r	|j                  d      d	z   }||kD  r# |||d	z
        }|D ]  }	|j                  |	        H d j
                  v r2s0t        |      D ]  \  }
}	d
||
   d   z   ||
   d<     |dd      |z   }t        |      dk(  rg dg}|j                  ||d      S )Ni?B r   T)r   FLOPz% TotalFc           	         t        
j                  |    j                               }	|k\  z  	d|z  }g }|j                  || z   t	        |      t        |      g       
j                  |    j                         D ]<  \  }}|j                  |dz   t        |      z   t	        |      t        |      g       > |S )N z - )r   r   r   r   r   r   r   r   )mod_namer   rw   paddingr   r:   r   global_flopsglobal_suffixis_global_subsumedr   s          r   process_modz.FlopCounterMode.get_table.<locals>.process_mod  s     d..x8??ABK+"==EkGFMM("']C&{LA 
 ((288:1eOc!f,+A}=*1l;  ; Mr   r   r   r[   r  )r   0r   )leftrightr  )headerscolalign)r   tabulatePRESERVE_WHITESPACEr  r   r   keyscountr   	enumerater   )r   r   r  headerr   r  r   	mod_depth
cur_valuesr   idxr  r  r  s   `          @@@r   	get_tablezFlopCounterMode.get_tablew  s;   =JJE=E'+$.++-&|4"	, ##((*Ch		#*I5 $S)a-8J#e$ $ + t'''0B'/
U!$vc{1~!5sA 0 !1-6Fv;!+,F  B\ ]]r   c                 x    | j                   j                          | j                          t        |           | S r   )r   r   r   super	__enter__)r   	__class__s    r   r   zFlopCounterMode.__enter__  s1     $$&r   c                     | j                   r$t        | j                  | j                               | j	                          t        |   |  y r   )r   printr  r   r   r  __exit__)r   r%   r!  s     r   r$  zFlopCounterMode.__exit__  s8    <<$..,-&&($r   c                     |r|ni } ||i |}|j                   }|| j                  v rF| j                  |   } ||i |d|i}| j                  D ]  }	| j                  |	   |xx   |z  cc<    |S )Nr"   )_overloadpacketr    r   r   )
r   functypesr%   r&   r"   func_packetflop_count_funcrd   pars
             r   __torch_dispatch__z"FlopCounterMode.__torch_dispatch__  s    !rD#F#**$,,,"00=O($B&BcBJ||  %k2j@2 $ 
r   )Nr6   TNr   )r3   N)r   r   r   __doc__r	   r
   r   r   r   r   r   boolr   r   r   r   r   r   r   r   r   r  r   r  r  r   r$  r,  __classcell__)r!  s   @r   r   r      s    , MQ 7;
5$uxx2G!GHI
 
 	

 %T#s(^4
,"5  8 8
&c4S>&9!: 
&;^z 
r   c                   "    e Zd ZU eed<   eed<   y)r   r   r   N)r   r   r   r   __annotations__r3   r   r   r   r     s    ,,((r   r   )Fr   )Cr   torch.nnr   torch.utils._pytreer   r   r   typingr   r   r   r	   r
   r   collectionsr   torch.utils._python_dispatchr   torch.utils.hooksr   torch._decompr   mathr   	functoolsr   __all__opsatenr   r    r1  r*   r   mmr   r=   addmmrA   bmmrF   baddbmmrI   r.  rS   convolution_convolutionrY   r]   convolution_backwardrg   rx   '_scaled_dot_product_efficient_attention#_scaled_dot_product_flash_attentionrz   r   0_scaled_dot_product_efficient_attention_backward,_scaled_dot_product_flash_attention_backwardr   r   r   r   r   r   r   r   r   r3   r   r   <module>rI     s     F F ? ? # : - 0   5
6yy~~
 !#tCH~ " tww/3 #    tzz"%# % #% txx 
C 
 !
 t||$&C & %& 	#Y#Y Cy 	
 	: (($*;*;<=bf Oux O >O2 t001  20$ DDdFnFnopDH @WZ @ q@6 MMt  PA  PA  B  C]a Yps Y CY
 	GGWJJ
HHhLL,iy100),,i99;M557I $# 
Q' Qf)* )r   