
    Ph&A                         d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
mZ d dlmc mc mZ d dlmc mc mZ d dlmZmZmZmZmZ d dlmZmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ d d	l%m&Z& 	 d d
l'm(Z) e	jX                  jZ                  Z-de	j\                  j^                  dee0df   dee1e0f   de0fdZ2de	j\                  j^                  dee0df   dee1e0f   de3fdZ4 G d d      Z5y# e*$ r	 d dl'm+Z) Y w xY w)    N)castDictListOptionalSequenceTuple)_is_inplace_op_is_out_variant_opOpInfoOpSchemaOutputSpecType)DTensorSpec	Replicate
TensorMeta)is_rng_supported_mesh)redistribute_local_tensor)ShardingPropagator)convolution_backward_handlerconvolution_handler)
DeviceMesh)_cxx_pytree)_pytreeop_callargs.kwargsreturnc                 P     | j                   |i |}|t        ur|S t        d      )z
    Decomposes a op to core ATen op, this handler is mostly here
    for inference mode usage where the ops are not core aten ops.
    zDecomposition failed)	decomposeNotImplementedRuntimeError)r   r   r   rs       mC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_tensor/dispatch.pydecompose_handlerr#   $   s4     	4*6*A122    c                     t        t        j                  |d         }t        t        j                  |d         }|j                  |j                  k(  S )Nr      )r   torchTensorshape)r   r   r   lhsrhss        r"   is_same_size_handlerr,   4   s?    
 u||T!W
%C
u||T!W
%C99		!!r$   c                       e Zd ZdZddZdej                  j                  dee	df   de
ee	f   de	fd	Zed
ededdfd       Zdej                  j                  dee	df   de
ee	f   defdZede	dede	fd       Zy)OpDispatcherz
    Op dispatching class instance to handle args/kwargs pre-processing (un-wrapping), sharding
    propagation, redistribute local args, local compute, and post-processing (re-wrapping). It
    also handles any op specific logic if necessary.
    r   Nc           
      "   t               | _        t        j                  j                  t        j
                  j                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  h
| _        t        j                   j                  t"        t        j$                  j                  t&        t        j(                  j                  t*        t        j,                  j                  t.        i| _        y N)r   sharding_propagatoratennative_dropoutdefaultnormal_	rand_like
randn_likerandint_like	low_dtypelow_dtype_outuniform_	bernoulli
bernoulli_float_random_opslinearr#   is_same_sizer,   convolutionr   convolution_backwardr   _custom_op_handlers)selfs    r"   __init__zOpDispatcher.__init__E   s    #5#7 ''LL  NN""OO##%%''++MM!!NN""OO!!
 KK!2%%';$$&9%%--/K	$
 r$   r   r   .r   c                 $	   || j                   v r | j                   |   |||      S | j                  |||      }| j                  j                  |       |j                  }|J d       |j
                  }|j                         |j                  }|j                  j                  j                  j                  }|d}	ndt        dt        j                  fd}
t        |t              r
 |
|      }	nt        |t               r|D cg c]  }| |
|      nd }	}t        |	t"              sJ d|	v rvt%        |d   j&                        }t)        d| d      |j*                  r-|j,                  J | j/                  ||j,                  d          |j0                  r?t3        j4                  t7        t"        t8           |j:                        |j0                        n|j:                  }t7        t<        t8        d	f   |      }|| j>                  v rtA        |      r{tB        jD                  stG        d
      tB        jD                  jI                  t7        tJ        jL                  |d         jN                        5   ||i |jP                  }	ddd       n ||i |jP                  }	|j                  |tR        jT                  jV                  k(  r{tY        t[        j\                               D cg c]  }d }}t[        j^                  |	       ta        tc        d |            }te        jf                  th        jj                  |d      }	tm        |      r|j                  |d   S yto        |      rt        |j                  tp              s|j                  fn|j                  }g }d}|j                  jr                  D ]d  }|jt                  st7        tJ        jL                  ||jv                           }t7        t        ||         |_'        |jy                  |       |dz  }f t{        |      dk\  sJ d       t{        |      dkD  rtq        |      S |d   S | j}                  	|j                        S c c}w # 1 sw Y   xY wc c}w )z(
        Main dispatching logic
        Nz"output sharding should not be Nonespecr   c                    | j                   h| j                   j                  }| j                   j                  }t        |      dk(  rt	        j
                  d|      S t	        j                  g |      S t        |  d      )Nr    )dtypez has no tensor metadata.)tensor_metar)   rK   lenr'   zerostensorr    )rH   r)   rK   s      r"   default_tensorz-OpDispatcher.dispatch.<locals>.default_tensor   sr    ''3 $ 0 0 6 6 $ 0 0 6 6u:?#(;;r#?? $)<<%#@@*dV3K+LMMr$   r   zreturn type z in DTensor op is not supported.zA CudaRNGStateTracker instance must be instantiated before executing a random op over a DTensor. Try calling random.manual_seed() or distribute_tensor() before executing a DTensor random op.c                 
    | d uS r0   rJ   )xs    r"   <lambda>z'OpDispatcher.dispatch.<locals>.<lambda>   s    $r$   Tr&   z,out variant should have at least one out arg)?rD   unwrap_to_op_infor1   	propagateoutput_shardingmeshget_coordinateoutput_specschemaop_schemareturnsr   r'   r(   
isinstancer   r   strtypeNotImplementedErrorneeds_redistributeschema_suggestionsredistribute_local_argsargs_tree_specpytreetree_unflattenr   object
local_argsr   r?   r   random_rng_trackerr    _distribute_regiondtensorDTensor_speclocal_kwargsr2   equalr4   rangedistget_world_sizeall_gather_objectlistfilter	functoolsreduceoperatorand_r	   r
   tuple	argumentsis_outnameappendrM   wrap)rE   r   r   r   op_inforV   rW   rH   ret_listlocal_resultsrP   sret_typelocal_tensor_args_obj_listoutput_specsout_dtsspec_idxargumentout_dts                        r"   dispatchzOpDispatcher.dispatchZ   sH    d...44++G4WdFKK (($?  **73!11*P,PP*|| ( #..D~~((0088H| )-N N N dK0$24$8Mh/ OS%NRQ]q)Dd " % &mT:::},#&x{'7'7#81*8*4ST  11&99EEE,,_??B )) %%fw'9'9:G<R<R ''  !%U63;%79J K$***/DT/J**&@  ((;;$q'288 %,->$W'BVBV$WM 
 !(): Sg>R>R S &&.$**,,,*/0C0C0E*FG*FQD*FG&&x?'> IJ ) 0 0$ O'"**6Aw( "/"="=uE !,,.$00 
 GH#OO55??!'//6(--3HIF#'\(5K#LFLNN6*MH 6 w<1$T&TT$%(\A%55>E71:E99]O,G,GHH[%H  Hs   
Q;>R 4	R R
r   suggested_input_schemac                    | j                   )t        t        j                  |j                              }n|j                  }g }t        | j                        D ]  \  }}||   }t        |t              r]t        t        j                  | j                  |         }||k7  rt        |||      }|j                  |       f|j                  |       x|j                  |        t        |      | _        y r0   )re   r|   rf   tree_leavesargs_schema	enumerateflat_args_schemar^   r   r   r'   r(   ri   r   r   )	r   r   flatten_args_schema_to_reshardnew_local_argsiarg_specreshard_arg_speclocal_tensorresharded_local_tensors	            r"   rd   z$OpDispatcher.redistribute_local_args   s     !!--2""#9#E#EF.* .D-O-O*')$W%=%=>KAx=a@(K0#ELL'2D2DQ2GH//-F$h0@.* #))*@A")),7%%&67 ? #>2r$   c                 p   | j                   j                  j                  |d       }|'|j                  rt	        j
                  |      \  }}|}n|d }}g }i }	g }
i }d }|D ]O  }t        |t        j                        rc|j                  |j                         |
j                  |j                         |||j                  k7  sft        | d      |j                  }t        |t        j                        r|j                   dk(  rv|t|j                  t#        |t%               f|j                   z  t'        |j(                  |j+                         |j,                                     |
j                  |        t/        | d      |j                  |       |
j                  |       R |j1                         D ]  \  }}t        |t        j                        rK|j                  |	|<   |j                  ||<   |||j                  k7  sPt        | d      |j                  }kt        |t        j                        rt/        | d      ||	|<   |||<    |J d| d       t3        |t5        ||rt	        j6                  ||      n
t9        |      |	|      |t9        |
      ||      }|S )	Nz4: DTensor does not support cross-mesh operation yet!r   )r)   striderK   )rL   zw: got mixed torch.Tensor and DTensor, need to convert all torch.Tensor to DTensor before calling distributed operators!z*found no DeviceMesh from dtensor args for !)schema_info)r1   op_to_schema_infogetneeds_pytreerf   tree_flattenr^   rm   rn   r   ro   _local_tensordevice_meshra   r'   r(   ndimr   r   r   r)   r   rK   r    itemsr   r   rg   r|   )rE   r   r   r   runtime_schema_info	tree_args	args_spec	args_listr   kwargs_schemari   rp   rW   argkvr   s                    r"   rT   zOpDispatcher.unwrap_to_op_info  s    #66HHLLT
 */B/O/O#)#6#6t#< Iy*3I#'yI$&+-#%
*,%)C#w/""399-!!#"3"34#s.1&i'[\  ??DC.88q=T%5&&# &[NTYY6(2&)ii

CII) %%c*&") $Y Y 
 ""3'!!#&? B LLNDAq!W__-#$77a "#//Q#q}},1&i'[\  ==DAu||,"i  U U 
 $%a "#Q% #( X#MgYVW!XX %%k9=;'/ *
 r$   resrH   c                 t   d }t        | t        j                        r	 || |      S t        | t        t        f      r|t        |t        t        f      sJ d| d       g }t        | |      D ]  \  }}t        |t        t        f      r[t        |t        t        f      rE|j                   t        |      t        ||      D cg c]  \  }} |||       c}}             w|||j                   |||             |j                  d         t        | t              rt	        |      S |S | S c c}}w )Nc           	      D   |t        |t              sJ d| d       |j                  J t        j                  | |j
                  |j                  |j                  j                  |j                  j                  | j                  |j                  j                        S )NzBoutput spec does not match with output! Expected DTensorSpec, got .)r)   rK   requires_gradr   )r^   r   rL   rm   rn   rW   
placementsr)   rK   r   r   )r   rH   s     r"   to_dtz OpDispatcher.wrap.<locals>.to_dte  s    #
k) \STXSYYZ[\  ##///??		&&,,&&,,!//''.. r$   zAoutput spec does not match with output! Expected list/tuple, got r   )r^   r'   r(   rv   r|   zipr   r`   )r   rH   r   res_lister   eesss           r"   r   zOpDispatcher.wrapc  s%   	 c5<<(d##dE]+#
tUm) [RSWRXXYZ[  HC1 a$/Jq4-4POOGDGQPQ,Svr2U2r],S$TU]q}OOE!QK0OOD) ' '1e&<5?J(J J -Ts   D4)r   N)__name__
__module____qualname____doc__rF   r'   _ops
OpOverloadr   rh   r   r_   r   staticmethodr   r   rd   rT   r   r   rJ   r$   r"   r.   r.   >   s   
*GI&&GI FCK GI S&[!	GI
 
GIR 33 (3 
3 3@]&&] FCK ] S&[!	]
 
]~ %& % %6 % %r$   r.   )6rx   rz   typingr   r   r   r   r   r   r'   torch.distributeddistributedrs   torch.distributed._tensor.api_tensorapirm    torch.distributed._tensor.randomrj   #torch.distributed._tensor.op_schemar	   r
   r   r   r   )torch.distributed._tensor.placement_typesr   r   r   r   &torch.distributed._tensor.redistributer   'torch.distributed._tensor.sharding_propr   !torch.distributed._tensor.tp_convr   r   torch.distributed.device_meshr   torch.utilsr   rf   ImportErrorr   opsr2   r   r   rh   r_   r#   boolr,   r.   rJ   r$   r"   <module>r      s     > >    / / 1 1  Y X B L F 5.1 yy~~3ZZ""3

3 f3 	3 "ZZ"""

" f" 
	"K KA  .-.s   4C/ /C=<C=