
    Ph                        d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZmZ d dlmZ  e j(                  e      ZdZ	 d dlmc mZ d dlmZ d dlm Z  d dl!m"Z"m#Z#m$Z$ d	Zd
edefdZ)e)deddfd       Z*e)dejV                  de	e   de
ee
e,df      fd       Z-e)	 ddejV                  dedee	e      ddfd       Z.e)	 	 	 	 ddej^                  dee   deee0ej^                  egdf      deed      deed      dej^                  fd       Z1y# e%$ r%Z&ejO                  e&jP                         Y dZ&[&dZ&[&ww xY w)    N)wraps)AnyCallableDictOptionalSequenceTupleUnion)	Placement	Replicate)
DeviceMeshF)XLAShardedTensor)mark_shardingMeshShardingTypeTfuncreturnc                 |      J t               dt        t           dt        t        t
        f   dd f fd       }|S )Nargskwargsr   c                 d    t         rdt        j                  d<    | g|i |S t        d      )N1XLA_USE_SPMDzKtorch.distributed._tensor._xla API requires torch_xla package installation.)TORCH_XLA_INITIALIZEDosenvironImportError)selfr   r   r   s      iC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_tensor/_xla.pywrapperzwith_xla.<locals>.wrapper$   s;     !),BJJ~&.t.v..]     )r   r	   objectr   strr   )r   r    s   ` r   with_xlar$   !   sP    
4[
6]
.238n
	
 
 Nr!   dt_meshr   c                     | j                         t        j                         k(  sJ t        | j                  j                         t        | j                  j                               | j                        S )a<  
    Convert DTensor `dt_mesh` to XLAShardedTensor `partition_spec`.

    Example (1x4 logical device mesh topology):
      ```
      dt_mesh = DeviceMesh("xla", [[1, 2, 3, 4]])
      dt_mesh.shape
      >> torch.Size([1, 4])

      mesh = convert_to_xla_mesh(dt_mesh)
      mesh_shape
      >> [1, 4]
      ```
    )sizexrglobal_runtime_device_countr   meshflattentuplemesh_dim_names)r%   s    r   convert_to_xla_meshr.   4   sZ      <<>R;;====gll&7&7&9 :G<R<R r!   tensor
placementsc                    dgt        | j                        z  }t        |      D ]X  \  }}|j                         r|j                  }|||<   (|j                         r9t        dt        |      j                          t        |      S )av  
    Convert DTensor `placements` to XLAShardedTensor `partitoin_spec`.
    This supports Shard and Replicate Placement types.

    Example:
      ```
      # Mesh partitioning, 1/4-th of the input with replicated overlaps.
      # The first input tensor dimension is sharded across the second mesh
      # dimension, and the rest is replicated over the first mesh dimension.
      t = torch.randn(4, 8, 8)
      dt_mesh = DeviceMesh("xla", torch.arange(8).reshape(2,4))
      placements = [Replicate(), Shard(0)]
      my_dtensor = distribute_tensor(t, dt_mesh, placements)

      # `placements = [Replicate(), Shard(0)]` describes sharding per mesh dim,
      # and this is equivalent to `partition_spec = (1, None, None)` which is
      # sharding per input tensor dimension.
      partition_spec = convert_to_xla_partition_spec(t, placements)
      >> (1, None, None)
      ```
    NzUnsupported placement type: )
lenshape	enumerateis_sharddimis_replicate
ValueErrortype__name__r,   )r/   r0   sharding_specmesh_idxspec
tensor_idxs         r   convert_to_xla_partition_specr?   J   s    4 FS..M#J/$==?J(0M*% ;DJ<O<O;PQRR 0 r!   device_meshr   c                    |}|j                   dk(  sJ t        |      }|j                  t        |j                  j                               k(  sJ | j                  s| j                  |j                         } |*t        |j                        D cg c]  }t                }}t        |      |j                  k(  sJ d       dt        |       d|j                   d t        | |      }t        | j                        t        |      k(  sJ d       dt        | j                         dt        |       d | }t        |       j                  d	k(  rt!        d
      t        |       j                  dk(  r5| j"                  }||t$        j&                  k(  sJ d       	 | j(                  }|J d       t+        |||      }	|	S c c}w )a  
    Distribute a torch.Tensor to the `device_mesh` according to the `placements`
    specified. The rank of `device_mesh` and `placements` must be the same.

    Args:
        tensor (torch.Tensor): torch.Tensor to be distributed. Note that if you
            want to shard a tensor on a dimension that is not evenly divisible by
            the number of devices in that mesh dimension, we use `torch.chunk`
            semantic to shard the tensor and scatter the shards.
        device_mesh (:class:`DeviceMesh`, optional): DeviceMesh to distribute the
            tensor, if not specified, must be called under a DeviceMesh context
            manager, default: None
        placements (List[:class:`Placement`], optional): the placements that
            describes how to place the tensor on DeviceMesh, must have the same
            number of elements as `device_mesh.ndim`. If not specified, we will
            by default replicate the tensor across the `device_mesh` from the
            first rank of each dimension of the `device_mesh`.

    Returns:
        A :class:`XLAShardedTensor` object

    .. note:: We return a XLAShardedTensor with a global view and access to local shards.
    The successive ops would be programmed as if on a single-device and without calling
    any explicit collective ops. The actual sharded computation on the sharding annotated tensor
    happens lazily, is transparent to the user. In the future, we will introduce
    a new DTensor type for this kind of programming-mode (single-controller) and return.
    xlaz>`placements` must have the same length as `device_mesh.ndim`! zFound placements length: z, and device_mesh.ndim: .zQ`partition_spec` from `placements` must have the same length as `tensor.length`! zFound tensor shape length: z, and partition_spec length: DTensorz]Cannot distribute a DTensor with local tensor on xla devices.The input tensor must be global.r   zNXLAShardedTensor `tensor` is already annotated with non-replication sharding. z(distributing a tensor should not be None)device_typer.   
mesh_shaper,   r*   r'   is_metatorangendimr   r2   r?   r3   r9   r:   r8   sharding_typer   
REPLICATEDglobal_tensorr   )
r/   r@   r0   r%   xla_mesh_partition_specrM   rK   
xla_tensors
             r   xla_distribute_tensorrR   r   s   D G%''' #7+H%(9(9(;"<<<< >>7../+0+>?+>aik+>
?J7<<'HGH'J00HVWX26:FNv||!  [Z[  "#fll"3!44QRUVdReQffghMF|	)/
 	
 F| 22,,!]l6M6M%M	\[	\M{,,$P&PP$ }hGJ; @s   Gmodulepartition_fninput_fn).N	output_fnc                     t         N)NotImplementedError)rS   r@   rT   rU   rV   s        r   xla_distribute_modulerZ      s
     r!   rX   )NNNN)2loggingr   	functoolsr   typingr   r   r   r   r   r	   r
   torchtorch.nnnn)torch.distributed._tensor.placement_typesr   r   torch.distributed.device_meshr   	getLoggerr:   logr   torch_xla.core.xla_modelcore	xla_modelxmtorch_xla.runtimeruntimer(   )torch_xla.experimental.xla_sharded_tensorr   #torch_xla.experimental.xla_shardingr   r   r   r   ewarningmsgr$   r.   Tensorintr?   rR   Moduler#   rZ    r!   r   <module>rt      s    	  H H H   J 4g! ))"  !8  & 
   
* 
$ LL$ &.y&9$ 
5T!"#$  
$ N 
 15JLLJJ ),-J 	J 
JZ 
 )-KO.2/3II*% 8S"))Z$@$$FGH x	*+	
 +, YY 
K  KKs   !D E	$EE	