
    Ph                        U d Z ddlZddlZddlmZmZmZmZmZ ddl	Z	ddl	m
Z
 ddlmZ ddlmZ g Zee   ed<   ee	j$                  eef   Zee
   Zee
ef   Zdej.                  d	e	j$                  d
eej0                  ddf   fdZded
dfdZdej.                  deee   e
f   ded	e	j$                  d
ee   f
dZdej.                  deee   e
f   deded	e	j$                  d
ee   fdZy)zPer-layer profilers.    N)Any	GeneratorListUnionSequence)Tensor   )Batch__all__moduledevicereturnc              #      K   | D ]<  }t        j                  |      }|j                  |       |j                          | > yw)zOCopies layers for ease to profile. It doesn't modify the given
    module.
    N)copydeepcopytotrain)r   r   layer
layer_copys       {C:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/pipeline/sync/_balance/profile.pylayerwise_sandboxr      s>      ]]5)
f	 s   AAbatchc                     t        |       D ]1  \  }}|j                         j                  |j                        | |<   3 y)zDetaches from autograd graph.N)	enumeratedetachrequires_grad_requires_grad)r   ixs      r   r   r   &   s3    % 188:,,Q__=a !    sampletimeoutc                 0   t        d | j                         D              rt        d      t        |      }t	        |      D ]@  \  }}|j                         j                  |      j                  |j                        ||<   B | D cg c]  }g  }}t        j                         }	t        j                         |	z
  |k  r&|}
t	        t        | |            D ]  \  }}t        |
       |j                  dk(  rt        j                  j                  |       t        j                         }|
j                  |      }
t!        d |
D              }|r t        j"                  j%                  ||       |j                  dk(  rt        j                  j                  |       t        j                         }||   j'                  ||z
          t        j                         |	z
  |k  r&d|D cg c]  }t)        fd|D               c}S c c}w c c}w )z!Profiles elapsed times per layer.c              3   8   K   | ]  }|j                   d u  y wN)grad.0ps     r   	<genexpr>z profile_times.<locals>.<genexpr>.   s     
;':!166':s   z#some parameter already has gradientcudac              3   :   K   | ]  }|j                   s|  y wr%   )r   )r(   ys     r   r*   z profile_times.<locals>.<genexpr>F   s     $I1Qs   i@B c              3   :   K   | ]  }t        |z          y wr%   )int)r(   tuss     r   r*   z profile_times.<locals>.<genexpr>Q   s     )SAFSs   )any
parameters
ValueErrorr
   r   r   r   r   r   timer   typetorchr+   synchronizecalltupleautogradbackwardappendsum)r   r!   r"   r   _batchr   r   _	time_bufsbegun_atr   r   tickbackward_tensorstockbufr1   s                   @r   profile_timesrG   ,   s   

;v'8'8':
;;>??6]F&!1HHJMM&)88Iq	 " 17#71BI#7yy{H
))+
 7
*!"3FF"CDHAu5M{{f$

&&v.99;D JJu%E  %$I$II''(8:JK{{f$

&&v.99;DaLt,' E ))+
 7
*0 
B5>?YcC)S))Y??9 $88 @s   
	H0Hinputchunksparam_scalec                    |j                   dk7  rt        d      t        |      }g }|d   j                  d      |z  }t	        |      D ]C  \  }}	|	dd j                         j                  |      j                  |	j                        ||<   E t        | |      D ]  }
t        |       t        j                  j                          t        j                  j                  |      }|j                  |
      }t        j                  j                          t        j                  j                  |      }||z
  }t!        d |
j#                         D              }||z  ||z  z   }|j%                  t'        |              |S )z%Profiles CUDA memory usage per layer.r+   z'size profiler supports only CUDA devicer   N   c              3   X   K   | ]"  }|j                         j                          $ y wr%   )_typed_storage_nbytesr'   s     r   r*   z profile_sizes.<locals>.<genexpr>n   s%     R?Q!))+335?Qs   (*)r6   r4   r
   sizer   r   r   r   r   r   r7   _C_cuda_clearCublasWorkspacesr+   memory_allocatedr9   r>   r3   r=   r/   )r   rH   rI   rJ   r   r   sizeslatent_scaler   r   r   memory_beforememory_afterlatent_size
param_sizerP   s                   r   profile_sizesrZ   T   sG    {{fBCC%LEE8==#f,L% 1Ra5<<>$$V,;;AOOLa ! #662u 	,,.

33F;

5!,,.zz226:"]2 Ru?O?O?QRR
 \)J,DDSY! 3$ Lr    )__doc__r   r5   typingr   r   r   r   r   r7   r   torch.nnnn
microbatchr
   r   str__annotations__r   r/   DeviceTensorsTensorOrTensors
SequentialModuler   r   floatrG   rZ    r    r   <module>ri      sJ      8 8    c  
u||S#%	&
6
(bmm U\\ yQSQZQZ\`bfQfGg >% >D >%@"-- %@tCy&7H1I %@TY %@chcoco %@uyz}u~ %@P MM "'S	6(9": DG V[ ejeqeq 	#Y r    