
    Ph                        d dl Z d dlmZ d dlmZ d dlZddlmZ ddlm	Z	m
Z
 ddlmZ erd dlmZ  G d	 d
e      Z G d de      ZdefdZdddefdZ G d de      Z G d de      Z G d de      Z ej,                  dgdgg      Z ej,                  dgdggdgdggdgdggg      Z ej,                  g dg dg dg      Zdddefd Zy)!    N)IntEnum)TYPE_CHECKING   )ir)get_dtype_sizesympy_product)V)BaseSchedulerNodec                       e Zd ZdZdZdZy)	NCCL_COLLr   r      N)__name__
__module____qualname__
ALL_REDUCE
ALL_GATHERREDUCE_SCATTER     hC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/_inductor/comm_analysis.pyr   r      s    JJNr   r   c                       e Zd ZdZdZdZy)NVIDIA_GPU_TYPEr   r   r   N)r   r   r   VOLTAAMPEREHOPPERr   r   r   r   r      s    EFFr   r   returnc                  0   t         j                  j                  j                  t         j                  j                  j                        } d| v rt
        j                  S d| v rt
        j                  S d| v rt
        j                  S t
        j                  S )NV100A100H100)	torchutilscollect_envget_gpu_inforunr   r   r   r   )gpu_infos    r   get_gpu_typer'      sv    {{&&33EKK4K4K4O4OPH$$$	8	%%%	8	%%% %%%r   snoder
   c                    t        | j                  t        j                  t        j                  f      rt
        j                  S t        | j                  t        j                  t        j                  f      rt
        j                  S t        | j                  t        j                  t        j                  f      rt
        j                  S t        d| j                         )NzUnsupported collective type: )
isinstancenoder   	AllReduceAllReduceCoalescedr   r   AllGatherIntoTensorAllGatherIntoTensorCoalescedr   ReduceScatterTensorReduceScatterTensorCoalescedr   	Exception)r(   s    r   get_collective_typer3   )   s    %**r||R-B-BCD###	

R++R-L-LM
 ###	

R++R-L-LM
 '''7

|DEEr   c                       e Zd ZdZdZdZy)NCCL_HWr   r   r   N)r   r   r   NVLINKPCINETr   r   r   r5   r5   =   s    F
C
Cr   r5   c                       e Zd ZdZdZy)	NCCL_ALGOr   r   N)r   r   r   TREERINGr   r   r   r:   r:   C   s    DDr   r:   c                       e Zd ZdZy)
NCCL_PROTOr   N)r   r   r   LLr   r   r   r>   r>   H   s	     
Br   r>   g333333@gffffff@g333333?      ?g      @g@)     C@rA   gffffff4@)gU@g     6@g      3@c                    t         j                  j                  j                  t	        | j
                  j                  j                              }| j
                  j                  j                  }|t        |      z  }|dz  dz  dz  }d}| j
                  j                  \  }}}t        j                  ||z        }|}	|	dk  ryt        j                  }
t        j                   }t#        |       }t$        j&                  j(                  j*                  }t$        j&                  j(                  j,                  }t/               }|dk  r|dz
  nd}|dk(  r|nd}t0        |   |   j3                         }|dk(  r|n|}d}||z  }t5        |||dkD  s|t6        j8                  k(  rdndz        }|t6        j8                  k(  r	d|	dz
  z  }n'|t6        j:                  t6        j<                  fv r|	dz
  }d|	z  z  }||z  }|d	z  }t>        j@                  }|dk(  r|nt>        jB                  }|t6        j8                  k(  r|dkD  rd|z  }n*d}n'|t6        j:                  t6        j<                  fv r|dz
  }tD        |
   |   j3                         }tF        |   |
   |   j3                         }tF        t>        jB                     |
   |   j3                         }d
} |dkD  rd} tI        ||       }||z
  |z  ||z  z   z  }|dz  }!||z  }"|"|!z   S )a9  
    Returns estimated NCCL collective runtime in nanoseconds (ns).

    The following heuristics are copied from https://github.com/NVIDIA/nccl/blob/master/src/graph/tuning.cc.
    We aim to estimate the runtime as accurately as possible.

    Assumptions:
    - only ring algorithm (NCCL_ALGO_RING) is used
    - only Low-Latency protocol (NCCL_PROTO_LL) is used, i.e. Simple or LL128 is not used
    - 8 gpus per node  # TODO: Need to find a way to get accurate "gpus per node" and "# nodes" info.
    - collective is one of: allreduce, reducescatter, allgather
    i      r   r   r   g      ?gUUUUUU?r@   g    eAg        g     @@)%r	   graphsizevars	size_hintr   r+   layoutsizedtyper   constant_argsmathceilr:   r<   r>   r?   r3   r!   	_inductorconfigintra_node_bwinter_node_bwr'   llMaxBwsitemminr   r   r   r   r5   r6   r8   baseLathwLatmax)#r(   tensor_numeltensor_dtypetensor_storage_size_bytestensor_storage_size_GBnum_gpus_per_node_
group_sizenNodesnRanks	nccl_algo
nccl_protocollbwIntrabwIntercompCapIndexindex2index1llMaxBwbw	nChannelsbusBwnstepsratio	bandwidthbandwidth_GB_per_nsintraHwhwnInterStepslatencyintraLatinterLatnetOverhead
latency_nstransport_nss#                                      r    estimate_nccl_collective_runtimery      s    77##--mEJJ<M<M<R<R.STL::$$**L ,~l/K K6=DtK zz//Aq*YYz$556FF{ IJu%D
 oo$$22Goo$$22G>L!Q;VaZAF#q[\aFvv&++-G aKWBINE !ty/C/C'C9)	UE y###fqj!	)**I,@,@A	A! 6\V#EI#c/ nnGaKW[[By###A:f*KK	)**I,@,@A	Aqj i ,113GW~i(499;HW[[!),Z8==?H Kz8[)H$0;3IIIG3J *,??L*$$r   )rK   enumr   typingr   r!    r   r"   r   r   virtualizedr	   torch._inductor.schedulerr
   r   r   r'   r3   r5   r:   r>   tensorrT   rU   rQ   floatry   r   r   r   <module>r      s1         0 ; g 
&o 
&F2 Fy F(g  
  %,, 	

 	
	 	 EE	
 EE	
 EE	
	, 5<<	
	
	
0e%,? e%E e%r   