
    PhA]              
          U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
mZ ddgZ e       s:d dlZ G d d      Zd Zeej                   d	   _        eej                   d	   _        yd d
lmZmZmZmZmZmZmZmZmZ  e j:                  e      Zer	 d dl m!Z!  G d d      Z$ e$       Z%e$e&d<   dde'fdZ( G d d      Zddde'dee)df   deee'df      defdZy# e"$ r ejG                  d       Y \w xY w)    N)DictListOptionalTupleTYPE_CHECKINGUnion)is_availableinit_device_mesh
DeviceMeshc                       e Zd Zy)_DeviceMeshStubN)__name__
__module____qualname__     hC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/device_mesh.pyr   r      s    r   r   c                       y Nr   r   r   r   _init_device_mesh_stubr      s    r   ztorch.distributed.device_mesh)	_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroup)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc                       e Zd ZddZddZdddededdfd	Zddded   fd
Z	dddee   fdZ
ededefd       Zededefd       Zy)_MeshEnvreturnNc                      g | _         i | _        y r   )
mesh_stackchild_to_parent_mappingselfs    r   __init__z_MeshEnv.__init__:   s    02DOIKD(r   r   c                 f    t        | j                        dk(  rt        d      | j                  d   S )Nr   z#No device mesh is currently active!)lenr%   RuntimeErrorr'   s    r   get_current_meshz_MeshEnv.get_current_mesh>   s.    4??#q("#HII??2&&r   device_meshmesh_dimmesh_dim_namec                 H   |j                         }|j                  j                  d|      j                  d|j                  j	                  |            }|D ]#  }t        |j                  ||fd      }||v s"|}% |j                  |   g_        || j                  |<   |S )Nr+   Fmesh_dim_names_init_process_groups)	r   meshswapdimsreshapesizer   device_type_dim_group_infosr&   )	r(   r/   r0   r1   cur_rankpg_ranks_by_dimmesh_1dsub_meshres_sub_meshs	            r   create_child_meshz_MeshEnv.create_child_meshC   s    
 #++-H)..77HEMMK$$))(3O +%++$1#3).	 w&#+L + .9-I-I(-S,TL)9DD((6r   c                 :    | j                   j                  |d       S r   )r&   get)r(   r/   s     r   get_parent_meshz_MeshEnv.get_parent_mesh\   s    //33KFFr   c                     | j                  |      }|j                  }|r9|r7t        |      dk(  sJ d       |d   }|j                  r|j                  |      S y)z
            Return the index of the mesh dim in the parent mesh.
            The device_mesh passed in needs to be sliced out from a parent mesh.
               z%The child mesh can only be a 1D mesh.r   N)rD   r4   r,   _get_mesh_dim_by_name)r(   r/   parent_meshchild_mesh_dim_nameschild_mesh_dim_names        r   get_parent_mesh_dimz_MeshEnv.get_parent_mesh_dim_   sm    
 ..{;K#.#=#= 3,-2;:;2&:1&=#--&<<=PQQr   r:   c                 4    t        |       j                         S r   )_get_device_handledevice_countr:   s    r   num_devices_per_hostz_MeshEnv.num_devices_per_hosto   s    %k2??AAr   c                 B    t               t        j                  |       z  S r   )r   r"   rP   rO   s    r   	num_hostsz_MeshEnv.num_hostss   s     "#x'D'D['QQQr   r#   Nr#   r   )r   r   r   r)   r.   intstrrA   r   rD   rK   staticmethodrP   rR   r   r   r   r"   r"   9   s    	L	'
	 +	 7:	 KN	 	 2	G| 	G@V 	G	< 	HSM 	  
	Bc 	Bc 	B 
	B 
	R3 	R3 	R 
	Rr   r"   _mesh_resourcesr:   c                 $    t        t        | d      S )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchrO   s    r   rM   rM   {   s     uk400r   c                      e Zd ZU dZeed<   ej                  ed<   ee	edf      ed<   ddddede
ej                  d	f   dee	edf      d
eddf
dZd Zd Zd dZd!dZdefdZd ZdedefdZdedd fdZ	 d"dee
eef      de
eee   f   fdZd"dee   defdZedefd       Zede	edf   fd       ZdefdZd"dee
eef      defdZdeee      fdZ dedefdZ!y)#r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        r:   r6   .r4   NTr3   r    r5   r#   c                   || _         t        |t        j                        r|j	                         n$t        j
                  |t        j                        | _        || _        t        | j                  j                         j                               | _        t        | j                  | j                  j                  t        |       f      | _        |dk7  r$| j#                          |r| j%                          y y y )N)dtypexla)r:   
isinstancer[   TensordetachtensorrU   r6   r4   tupleflattentolist_flatten_mesh_listhashshapeid_hash_get_or_create_default_groupr5   )r(   r:   r6   r4   r5   s        r   r)   zDeviceMesh.__init__   s      +D dELL1 \\$eii8 I
 #1D ',DII,=,=,?,F,F,H&ID#t66		DRSDJ e# 113'--/ ( $r   c           	         t               }|s
t                t               }| j                  j	                         |kD  r't        d| j                  j	                          d      t        | j                        }|sZ|rX|j                         }||kD  r'||z  dk7  rt        d| d| d| j                   d      |j                  t               |z         | j                  t               k(  j                         }|j                  d      dv sJ |j                  d      dkD  r|d   j                         nd | _        t               S )	Nz=Mesh should not be bigger than default world size, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r   rF   )r   r   r   r6   numelr-   rM   r:   rN   
set_devicer   nonzeror9   rf   _coordinate_on_dimr   )r(   default_initialized
world_sizedevice_handlerP   rank_coordss         r   rl   z'DeviceMesh._get_or_create_default_group   sO   "0"2&"$')Jyy :-"STXT]T]TcTcTeSffmn  /t/?/?@M&= (5'A'A'C$!55"%99Q>&R%,k2F1GqIYIYHZZce  ((6J)JK  99
2;;=K##A&&000+6+;+;A+>+BA%%' # &''r   c           	         g }| j                   j                  dk(  rk| j                   j                         t               k(  rF|j	                  t        t                     t        t        t                           f       || _        y t        | j                   j                        D ]  }| j                   j                  d|      j                  d| j                   j                  |            }|D ]w  }|j                         }t        |      }| j                         |v s2t        |      |kD  rt!        d| j                   d| d      |j	                  t        |      |f       y  || _        y )NrF   r+   )rankszFEach device mesh dimension should get only one process group, but got z in !)r6   ndimro   r   appendr   r   listranger7   r8   r9   rf   r   r   r,   r-   r;   )r(   dim_group_infosdimr=   dim_meshsubgroup_ranks	dim_groups          r   r5   zDeviceMesh._init_process_groups   sM    <>Oyy~~"tyy'8N<L'L  &&&'9';<U>#345@ %4D!1 !0C '+ii&8&8S&A&I&IDIINN3/'O
 %4)1): %.N$C	==?n<"?3c9&2&lmqmzmzl{ |**8)9%<'" !" ,22!/	!:N K %4 10 %4D!r   c                 D    t         j                  j                  |        | S r   )rX   r%   r{   r'   s    r   	__enter__zDeviceMesh.__enter__  s    &&--d3Kr   c                 @    t         j                  j                          y r   )rX   r%   pop)r(   exc_type	exc_valueexc_tracebacks       r   __exit__zDeviceMesh.__exit__"  s    &&**,r   c                 >    d| j                   j                          dS )NzDeviceMesh())r6   rf   r'   s    r   __repr__zDeviceMesh.__repr__&  s     !1!1!3 4A66r   c                     | j                   S r   )rk   r'   s    r   __hash__zDeviceMesh.__hash__)  s    ::r   otherc                    t        |t              syt        | j                        t        |j                        k(  ry| j                  j                  |j                  j                  k(  xr | j
                  |j
                  k(  S )NFT)r`   r   rj   r6   ri   rg   )r(   r   s     r   __eq__zDeviceMesh.__eq__,  sc    eZ0$))}5::.		5::#3#33 H++u/G/GGr   r1   c                     | j                   j                  dk  r#t        d| j                   j                   d      | j                  |      }t        j                  | ||      }|S )a  
            Slice the current DeviceMesh based on the mesh_dim_name given to create a child
            DeviceMesh.

            Args:
                mesh_dim_name (str): the name of the mesh dimension of the parent DeviceMesh
                to create a child DeviceMesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh["tp"] on rank 0, 1, 2, 3 would return a 1D child DeviceMesh:([0, 1, 2, 3]).
            Calling mesh["tp"] on rank 4, 5, 6, 7 would return a 1D child DeviceMesh:([4, 5, 6, 7]).
            Calling mesh["dp"] on rank 0, 4 would return a 1D child DeviceMesh:([0, 4]).
            Calling mesh["dp"] on rank 1, 5 would return a 1D child DeviceMesh:([1, 5]).
            Calling mesh["dp"] on rank 2, 6 would return a 1D child DeviceMesh:([2, 6]).
            Calling mesh["dp"] on rank 3, 7 would return a 1D child DeviceMesh:([3, 7]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            rF   zCannot slice a DeviceMesh with z dimension.)r6   rz   r-   rG   rX   rA   )r(   r1   r0   submeshs       r   __getitem__zDeviceMesh.__getitem__6  sa    8 yy~~""5diinn5E[Q  11-@H%77hVGNr   r0   c                    t        | d      st        d      | j                  j                  dk(  rt	        | j
                  d    S |6t        |t              r| j                  |      }t	        | j
                  |    S g }t        | j                  j                        D ]&  }|j                  t	        | j
                  |           ( |S )a  
            Returns a list of ProcessGroups corresponding to the mesh dimensions, or
            returns a single ProcessGroup if mesh_dim is specified or the given mesh has
            only one mesh dimension.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A list of :class:`ProcessGroup` object when `mesh_dim` is not specified for
                a DeviceMesh with more than 1 dimension; otherwise, returns a single
                :class:`ProcessGroup` object.
            r;   z*DeviceMesh process groups not initialized!rF   r   )hasattrr-   r6   rz   r   r;   r`   rV   rG   r}   r{   )r(   r0   
dim_groupsith_dims       r   	get_groupzDeviceMesh.get_group[  s    " 4!34"#OPPyy~~"0$2G2G2JKK#h,#99(CH0$2G2G2QRR
$TYY^^4G%%143H3H3QR  5 "!r   c                 p    || j                   j                         S | j                   j                  |      S r   )r6   ro   r9   )r(   r0   s     r   r9   zDeviceMesh.size~  s*    (0(8499??$VdiinnX>VVr   c                 .    | j                   j                  S r   )r6   rz   r'   s    r   rz   zDeviceMesh.ndim  s    99>>!r   c                 @    t        | j                  j                        S r   )rd   r6   ri   r'   s    r   ri   zDeviceMesh.shape  s    ))r   c                     t               S )z:
            Returns the current global rank.
            )r   r'   s    r   r   zDeviceMesh.get_rank  s     :r   c                     | j                   dkD  r&|$t        d| j                  j                    dd      |d}| j                  |      }t	        |t
              sJ d       t        |      S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            rF   zFound the DeviceMesh have z dimensionszJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.r   z1We expect ProcessGroup before calling `get_rank`!)rz   r-   r6   r   r`   r   r   )r(   r0   mesh_dim_groups      r   get_local_rankzDeviceMesh.get_local_rank  s    8 yy1}!1"00@L`  !!^^H5N CBC  N++r   c                 6    | j                   r| j                   S dS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)rr   r'   s    r   get_coordinatezDeviceMesh.get_coordinate  s    
 /3.E.E4**O4Or   c                     | j                   t        | j                         dk(  rt        d      || j                   vrt        d| dd| j                          | j                   j                  |      S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.zAvailable mesh dimensions are: )r4   r,   KeyErrorindex)r(   r1   s     r   rG   z DeviceMesh._get_mesh_dim_by_name  s    ""*c$2E2E.F!.K0  D$7$77&}o5FG5d6I6I5JK  &&,,];;r   rT   rS   r   )"r   r   r   __doc__rV   __annotations__r[   ra   r   r   r   boolr)   rl   r5   r   r   r   r   objectr   r   rU   r   r   r   r9   propertyrz   ri   r   r   r   rG   r   r   r   r   r      s   %	N ll sCx11 9=)-	0	0 k12	0
 %U38_5	0 #'	0 	0<!	(F)	4V		-	7c 	7		 	4 	#	S #	\ #	L 9=!	"$U38_5!	"<l!334!	"F	W# 	W# 	W 
	"# 	" 
	" 
	*5c? 	* 
	*	c 	(	,8E#s(O+D (	,PS (	,T	PHT#Y$7 	P
	<s 
	<s 
	<r   )r4   
mesh_shape.r4   r#   c          	      j   |kt        t        |            t        |      k7  rt        dd|       t        |      t        |      k7  r%t        ddt        |       dt        |       d      t        j                  t        j                  |            j                  |      }t        | ||      }|S )aG  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        z"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):.)r:   r6   r4   )	r,   setr-   r[   arangemathprodviewr   )r:   r   r4   r6   r/   s        r   r
   r
     s    P %3~&'3~+>>"8EnEUV 
 :#n"55"L1#n2E1FF[\_`j\k[llmn 
 ||DIIj1277
C #)
 r   )cuda)*loggingr   typingr   r   r   r   r   r   r[   torch.distributedr	   __all__sysr   r   modulesr   r
   "torch.distributed.distributed_c10dr   r   r   r   r   r   r   r   r   	getLoggerr   loggernumpy.typingr    ImportErrorwarningr"   rX   r   rV   rM   rU   r   r   r   <module>r      s0     D D  *|
, ~  ?NCKK/0; 0 KK'
 
 
 Wx(F 	.>R >R@ !)
OX*1 1F< F<X
 59	<<#s(O< !sCx1	<
 
<q  	NNU	s   C C76C7