
    Ph'                     &   d dl mZmZmZmZ d dlZd dlmc mc m	Z
 d dlmZmZmZmZmZ d dlmZ eeeeef   f   ZdedefdZdee   dee   fdZd	ej.                  d
ededej.                  fdZ G d dej2                  j4                        Zy)    )castDictListTupleN)_PartialDTensorSpec	Placement	ReplicateShard)
DeviceMeshvalreturnc                     | \  }\  }}|j                         s|j                         r|j                         r| S |j                         s|j                         r|j                         r|S y)z[
    Replicate from inner to outer dimension.
    Shard from outer to inner dimension.
    r   )is_replicate
is_partialis_shard)r   icurrenttargets       qC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_tensor/redistribute.py_replicate_then_shardr      sd    
 A!2!2!4':J:J:Lr	



 G$6$6$8foo>O    c                    ddl m}  |t              } |t              }g }| D ]  \  }\  }}|j                         r%|t	        t
        |      j                  xx   dz  cc<   |j                         r%|t	        t
        |      j                  xx   dz  cc<   t        |t
              rt        |t
              rt|j                  |j                  k7  s||j                     ||j                     k7  r<|j                  ||t               ff       |j                  |t               |ff       |j                  |||ff        |S )aB  
    Decompose Si -> Sj into Si -> R -> Sj
    There's 2 ways a shardings can differ within a mesh dimension:
      1) sharding on different tensor dimensions, e.g. Shard(0) -> Shard(1)
      2) different sub-shards of a repeated shard ("mis-aligned sharding")
          (Shard(0), Shard(0)) -> (Replicate(), Shard(0))
          Here the Shard(0) -> Shard(0) for mesh dimension 2 is actually
          a reshard, because in the first case it's a sub-sharding of an already tensor dimension 0,
          and in the second case, it's the first sharding on tensor dimension 0.
    r   )defaultdict   )
collectionsr   intr   r   r   dim
isinstanceappendr
   )r   r   repeat_dim_currentrepeat_dim_targetoutputr   r   r   s           r   _decompose_reshardr$   !   s    ()4S)9(3C(8#%F #GVtE73778A=8??d5&1556!;6w&65)vzz)%gkk26G

6SS MM1w	456MM1y{F345MM1w/01% !$( Mr   local_tensorcurrent_spectarget_specc                    |j                   |j                   k7  rt        d      d}|j                  }|j                  }t        t	        t        ||                  }t        |      }|j                  t               |j                   }|D ]  \  }\  }	}
|j                         }|j                  |      }|| c S |	|
k(  r| }:|
j                         r|	j                         r%t        t        |	      }|j                  | ||      }nl|	j!                         r0t        t"        |	      }|j%                  | |j&                  ||      }n,t)        d| d| d      |
j!                         rt        t"        |
      }|	j                         r%t        t        |	      }|j+                  | |||      }n|	j                         r/|j-                  | |dd	      \  }}|||      j/                         }n|	j!                         s
J d
|	        t        t"        |	      }|j0                  |j0                  k7  rCt        d      |
j                         r(|	j                         r| |z  }nt)        d| d| d      |J |}  |J d       |S )z
    This redistribute the local tensor (torch.Tensor) from the current DTensorSpec to
    the target DTensorSpec, which involves the necessary collective calls to transform
    the local shard of the DTensor from its current spec to the target spec.
    z)Cross device mesh comm not supported yet!N)key)mesh_dimzredistribute from z to z not supported yetF)with_padding
contiguousz,Current placement should be shard but found z+Changing sharding dim is not supported yet!zredistribute failed!)meshNotImplementedError
placementslist	enumeratezipr$   sortr   get_coordinatesizer   r   r   r   _to_replicater   r   _to_replicate_tensorshapeRuntimeError	_to_shard_split_tensorcloner   )r%   r&   r'   new_local_tensorcurrent_placementstarget_placementssorted_placementsdevice_meshr   r   r   my_coordinate
num_chunkspartial_speccurrent_placementtarget_placementshards_
shard_specs                      r   redistribute_local_tensorrJ   K   s    K,,,!"MNN%00#..Ys+=?P'QRS*+<=45##K 1GV#224 %%q%1
   f+ !!##Hg6#/#=#= +q$  !!#$($8!#4#I#I ,"4"4k1$  #();(<DAR@SSef  __#E62!!##Hg6#/#9#9 +q2B$  %%',:: !&$	 ; 	 $*-*:#;#A#A#C 
 $$&LA'KL&!%1
>>%5%9%99-E   ##% $0*#< "();(<DAR@SSef   +++'O !2R '?)??'r   c            	       F    e Zd Zedddedeedf   fd       Zed	d       Zy)
Redistributeinputdtensor.DTensorrA   r/   .c           
      B   |j                   }|| _        t        |||j                   j                        }|j                  }t        |||      }t        j                  |||j                  |j                  |j                  |j                  |j                               S N)tensor_meta)r8   dtyperequires_gradstride)_specr&   r   rQ   _local_tensorrJ   dtensorDTensorr/   r8   rR   rS   rT   )ctxrM   rA   r/   r&   r'   r%   r#   s           r   forwardzRedistribute.forward   s     {{'!1H1H
 ***<{S""++++--<<>
 	
r   c           
      f   | j                   }|j                  }g }t        |j                  |j                        D ]P  \  }}|j	                         s*|j	                         r|j                  t                      @|j                  |       R t        |j                  t        |      |j                        }|j                  }t        |||      }	t        j                  |	|j                  |j                  |j                  |j                   |j"                  |j%                               }
|
d d fS rP   )r&   rU   r2   r/   r   r    r
   r   r-   tuplerQ   rV   rJ   rW   rX   r8   rR   rS   rT   )rY   grad_outputprevious_specr&   r?   r   r   r'   r%   r#   output_dtensors              r   backwardzRedistribute.backward   s   (( #((-/"<#:#:M<T<TUOGV%%'F,=,=,?!((5!((0  V "#$%11
 #00*<{S ""####%33%%'
 
 	
r   N)r]   rN   )	__name__
__module____qualname__staticmethodr   r   r	   rZ   r`    r   r   rL   rL      sO    
 !
  	

 )S.)
 
4 ,
 ,
r   rL   )typingr   r   r   r   torchtorch.distributed._tensor.apidistributed_tensorapirW   )torch.distributed._tensor.placement_typesr   r   r	   r
   r   torch.distributed.device_meshr   r   _PlacementItemr   r$   TensorrJ   autogradFunctionrL   re   r   r   <module>rr      s    * *  / /  5 sE)Y"6778~ # 'D0 'T.5I 'Td,,dd d \\	dNI
5>>** I
r   