
    Ph                         d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d a	 	 ddZ	d Z
d Zd Zd Zd	 Zg ad
edefdZddZy)    N)profileProfilerActivityc                       y )N r       kC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/_functorch/benchmark_utils.pysynchronizer	   
   s    r   c	                    |dg}|dgk7  r8t         j                  j                         rt         j                  j                  a|i }|i }|5  t        j                  d       t        d      D ]  }	 | |fi | t                 t        j                  d       t        j                         }
t        |      D ]  }	 | |fi | t                 t        j                         }ddd       
z
  }t        dd|i|5 }|5  t                t        j                  d       t        |      D ]  }	 | |fi | t                 	 ddd       ddd       j                  |       |S # 1 sw Y   xY w# 1 sw Y   0xY w# 1 sw Y   4xY w)a0  
    Output the chrome trace of running f(input, **kwargs_for_f) with [optimize_ctx]
    [num_runs] times to [trace_filename].

    [activities] are the activities that the profiler will record, e.g. ProfilerActivity.CUDA.
    Return total runtime without the profiler

    Outputs to trace_filename
    Ncudacpui9     
activitiesr   )
torchr   is_availabler	   manual_seedrangetimeperf_counterr   export_chrome_trace)finputtrace_filenameoptimize_ctxr   num_runsdeviceskwargs_for_fkwargs_for_profiler_t0t1timingprofs                 r   dump_chrome_tracer#      sa    ( 5'ejj557jj,," 	$qAe$|$M  	$ xAe$|$M !   
 "WF		>J	>*=	>$Md#8_%(<( %  
? 	^,M- 
 \ 
?	>s2   BE+FAE7F+E47F 	<FFc                 P    t        |       }t        j                  |      }|d   }|S )NtraceEvents)openjsonload)filenamer   dataeventss       r   get_chrome_trace_eventsr,   ?   s'    XA99Q<D- FMr   c                 D    d| v xr | d   t         v xr d| v xr | d   dk(  S )NpidphX)gpu_pidsevents    r   is_gpu_compute_eventr4   F   s3    E>_eElh6_45=_USW[\_M__r   c                 j    g }| D ]  }t        |      s|j                  |       ! t        |d       S )Nc                     | d   S )Ntsr   )xs    r   <lambda>z'get_sorted_gpu_events.<locals>.<lambda>Q   s    1T7r   )key)r4   appendsorted)r+   sorted_gpu_eventsr3   s      r   get_sorted_gpu_eventsr>   K   s<    #E*  '  #):;;r   c                     t        |       dk(  ry| d   }|d   |d   z   }|d   }| dd  D ]:  }t        |d   |      }|d   |d   z   }|t        ||z
  d      z   }t        ||      }< |S )Nr   r7   dur   )lenmax)r=   r3   current_end_timetotal_duration
start_timeend_times         r   get_durationrH   T   s    
"a ET{U5\15\N"12&t&67
;u-'#h.CQ*GG/:	 '
 r   c                 h    d }t        |       }g }|D ]  } ||      s|j                  |        |S )Nc                 R    d| v xr" d| d   v xs d| d   v xs d| d   v xs d| d   v S )Nnamegemmconvcutlasswgradr   r2   s    r   is_mm_conv_eventz7get_sorted_gpu_mm_conv_events.<locals>.is_mm_conv_eventc   sX     _FeFm$; %^vv?V %^'0E&M'A%^ELPUV\P]E]	_r   )r>   r;   )r+   rP   
gpu_eventssorted_eventsr3   s        r   get_sorted_gpu_mm_conv_eventsrS   b   sD    _ 'v.JM&U#  r   r)   total_lengthc                    t        |       }g a|D ]3  }d|vr|d   dk(  sd|d   d   v st        j                  |d          5 |dz  }t        |      }t	        |      |z  }t        |      }t	        |      |z  }||fS )a  
    Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
    and percent of times spent on matmul and convolution

    Args:
        filename(str): Name of chrome traces file produced by pytorch profiler

        total_length(float): total length of the process without profiler in second

    Return:
        tuple: (GPU Utilization, percent of time spent on matmul and convolution)
    rK   process_labelsGPUargslabelsr.   g    .A)r,   r1   r;   r>   rH   rS   )r)   rT   r+   r3   r=   utilizationsorted_gpu_mm_conv_eventsmm_conv_utilizations           r   compute_utilizationr]   r   s     %X.F H=,,%-:Q1QOOE%L)	   #%L-f501L@K =f E&'@ALP+++r   c           	      d   t         j                  j                  |      }|s#t        j                  |       t	        d|z          |t        j                         }t         j                  j                  ||dz         }t        | |||t        j                  g|d      }t        ||      \  }	}
|	|
fS )a  
    Benchmark the GPU Utilization and percent of time spent on matmul and convolution operations of
    running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
    It will produce a chrome trace file in trace_folder/trace_file_name.json

    Example:

    ```
    def f(a):
        return a.sum()
    a = torch.rand(2**20, device="cuda")
    utilization, mm_conv_utilization = benchmark_utilization(f, a, "tmp", trace_file_name = "tmp_chrome_trace")
    ```

    Args:
        f: function to benchmark

        input: input to :attr:`f`

        trace_folder: name of the folder to store the chrome trace

        optimize_ctx: the context in which f will run

        trace_file_name: name of the dumped chrome trace file, default to "tmp_chrome_trace"

        num_runs: number of times to run f, excluding the warm-up runs, default to 1.

    Return:
        tuple: (GPU Utilization, percent of time spent on matmul and convolution)

    zcreate folder z.jsonr   )r   r   )ospathexistsmakedirsprint
contextlibnullcontextjoinr#   r   CUDAr]   )r   r   trace_folderr   trace_file_namer   isExistchrome_trace_file_namerT   rZ   r\   s              r   benchmark_utilizationrl      s    @ ggnn\*G
L!-.!--/WW\\,'8QR$Q/E|&6&;&;%<xY_aL':;QS_'`$K$+++r   )rA   NNN)Ntmp_chrome_tracerA   )rd   r   r_   r'   r   torch.profilerr   r   r	   r#   r,   r4   r>   rH   rS   r1   strfloatr]   rl   r   r   r   <module>rq      sf      	   4	 TUKO.b`
<
 ,# ,U ,D-,r   