
    Ph>y                        d dl Z d dlZd dlZd dlmZ d dlmc mZ d dl	m
Z
mZmZmZmZmZ ddlmZ ddlmZ d dlmZ d dlmZ 	 d dlmZ  ej8                         rd	 Zn	 d d
lmZ 	 	 	 eee"   eee"      ejF                  de
de"f   f   Z$	 d Z%dNdejL                  de"de$de'fdZ(dNdejL                  de'de$de'fdZ)	 dNdejL                  de"de$de'fdZ*	 dNdejL                  de'de"de$de'f
dZ+dNdeejL                     de'de$de'deejL                     f
dZ,dNdeejL                     de$de'deejL                     fdZ-	 dNdeejL                     de'dee"   de$de'deejL                     fdZ.d Z/	 dNdejL                  d eee"      d!eee"      de$de'dejL                  fd"Z0 G d# d$ejL                        Z1	 dNde$de'de
e'ee"   e"f   fd%Z2de3fd&Z4dejL                  fd'Z5d( Z6d) Z7d* Z8d+ Z9d, Z:d- Z;d. Z<d/ Z=d0 Z>d1 Z?d2 Z@d3 ZAd4 ZBd5 ZCd6 ZDd7 ZE ej8                         sej                  j                  d8d9      ZHej                  j                  d8d:      ZI eE        ej                  j                  d;d:      ZJeJj                  d<e8d=       eJj                  d>e=d=       eJj                  d?e<d=       eJj                  d@e>d=       eJj                  dAe9d=       eJj                  dBeAd=       eJj                  dCeBd=       eJj                  dDeCd=       eJj                  dEeDd=       n e jB                  dF       	 	 	 	 dOdGejL                  dHejL                  dIe3de'de"f
dJZL	 	 	 	 	 dPdGejL                  dHejL                  dKe'dIe3de"de'fdLZMd dMlmNZOm+ZP eOeLePeMiZQy# e$ r
 d dlmZ Y bw xY w# e $ r  e jB                  d       d ZY gw xY w)Q    N)TupleUnionListOptionalcastTYPE_CHECKING   )_functional_collectives_impl)_register_tensor_wrapper)get_innermost_proxy_mode)impl_abstract)tree_map_onlyc                       y)z9Can't import torchdynamo in torchdeploy builds currently.F r       tC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_functional_collectives.pyis_torchdynamo_compilingr      s    r   )is_compilingzdUnable to import torchdynamo util `is_torchdynamo_compiling`, so won't support torchdynamo correctlyc                       y)NFr   r   r   r   r   r       s    r   zdist._tensor.DeviceMeshc                 T    t         j                  j                  j                  |       S )z
    Wait on a tensor returned by the collectives ops.

    Waiting follows device semantics, which means blocking on CPU and synchronizing streams on CUDA.
    )torchopsc10d_functionalwait_tensor)tensors    r   r   r   z   s     99$$0088r   selfsrcgrouptagc                     t        ||      \  }}}t        j                  j                  j	                  | ||||      }t        |      S )a  
    Broadcasts the tensor to all processes in the given process group.

    Args:
        src (int): Source rank
        group (ProcessGroup or List[int]): The process group to work on.
        tag (str, optional): A unique identifier for the collective. Default: empty string
    )_expand_groupr   r   r   	broadcast_maybe_wrap_tensor)r   r   r   r   rankset
group_sizer   s          r   r"   r"      sD      -UC8C*YY&&00sC*UFf%%r   reduceOpc                     t        ||      \  }}}t        j                  j                  j	                  | ||||      }t        |      S )a  
    Reduces the tensor data across all machines in such a way that all get
    the final result.

    The input tensor is left unmodified.

    Group can be one of:
        List[int]: ranks participating in the collective.
        List[List[int]]: 2D mesh of ranks taking part of this collective in MPMD.
        ProcessGroup: Will perform a collective using the ranks and tag of the PG.
        DeviceMesh: Do a SPMD collective over all ranks of the mesh
        (DeviceMesh, int): Do a MPMD collective over one dimension of the DeviceMesh

    :: N.B. If you pass a PG or a 1D list to perform a MPMD collective, the compiler won't be able to recover
    that information and perform collective algebraic optimization. Use other forms of input for that.
    )r!   r   r   r   
all_reducer#   )r   r&   r   r   r$   r%   r   s          r   r(   r(      sE    "  -UC8C*YY&&11$#wPZ[Ff%%r   
gather_dimc                    | j                         sJ t        ||      \  }}}t        j                  j                  j                  | |||      }t        |      }|dk7  r-t        j                  t        j                  ||d      |      }|S )a%  
    Gather tensor data across from all machines and concatenate over ``gather_dim``.

    Note that it currently only supports gather_dim = 0.

    The input tensor is left unmodified.
    Group can be one of:
        List[int]: ranks participating in the collective.
        List[List[int]]: 2D mesh of ranks taking part of this collective in MPMD.
        ProcessGroup: Will perform a collective using the ranks and tag of the PG.
        DeviceMesh: Do a SPMD collective over all ranks of the mesh
        (DeviceMesh, int): Do a MPMD collective over one dimension of the DeviceMesh

    :: N.B. If you pass a PG or a 1D list to perform a MPMD collective, the compiler won't be able to recover
    that information and perform collective algebraic optimization. Use other forms of input for that.
    r   dim)	is_contiguousr!   r   r   r   all_gather_into_tensorr#   catchunk)r   r)   r   r   r$   r%   r   ress           r   all_gather_tensorr2      s}    , ,UC8C*YY&&==dCR\]F
V
$CQiiC;LJr   scatter_dimc                 `   t        ||      \  }}}| j                  |      |z  dk(  sJ d| j                  d       d|        |dk7  r-t        j                  | ||      }t        j                  |      } t        j
                  j                  j                  | ||||      }t        |      }	|	S )a(  
    Reduces the tensor data across all machines in such a way that all get
    the final result, then scatter the results to corresponding ranks.


    The input tensor is left unmodified.
    Group can be one of:
        List[int]: ranks participating in the collective.
        List[List[int]]: 2D mesh of ranks taking part of this collective in MPMD.
        ProcessGroup: Will perform a collective using the ranks and tag of the PG.
        DeviceMesh: Do a SPMD collective over all ranks of the mesh
        (DeviceMesh, int): Do a MPMD collective over one dimension of the DeviceMesh
    :: N.B. If you pass a PG or a 1D list to perform a MPMD collective, the compiler won't be able to recover
    that information and perform collective algebraic optimization. Use other forms of input for that.
    r   zinput dimension 0 (" must be a multiple of group_size r+   )	r!   sizer   r0   r/   r   r   reduce_scatter_tensorr#   )
r   r&   r3   r   r   r$   r%   tensor_listr   r1   s
             r   r7   r7      s    ,  -UC8C*		++q0Z	TYYq\N*LZLYZ0akk$
Dyy%YY&&<<T8SRY[efF
V
$CJr   returnc                     t        ||      \  }}}t        j                  j                  j	                  | ||||      }t        t        t        |            S )a  
    Reduces a list of tensors across all machines in such a way that all get
    the final result.

    The all tensors in the input list are left unmodified.

    Group can be one of:
        List[int]: ranks participating in the collective.
        List[List[int]]: 2D mesh of ranks taking part of this collective in MPMD.
        ProcessGroup: Will perform a collective using the ranks and tag of the PG.
        DeviceMesh: Do a SPMD collective over all ranks of the mesh
        (DeviceMesh, int): Do a MPMD collective over one dimension of the DeviceMesh

    :: N.B. If you pass a PG or a 1D list to perform a MPMD collective, the compiler won't be able to recover
    that information and perform collective algebraic optimization. Use other forms of input for that.
    )r!   r   r   r   all_reduce_coalescedlistmapr#   )r   r&   r   r   r$   r%   r8   s          r   r;   r;      sO    "  -UC8C*))++@@xQTV]_ijK&455r   c                     t        ||      \  }}}t        j                  j                  j	                  | |||      }t        t        t        |            S )a  
    Gather a list of tensors across from all machines.

    Note that it currently only supports gather_dim = 0.

    The input tensor is left unmodified.
    Group can be one of:
        List[int]: ranks participating in the collective.
        List[List[int]]: 2D mesh of ranks taking part of this collective in MPMD.
        ProcessGroup: Will perform a collective using the ranks and tag of the PG.
        DeviceMesh: Do a SPMD collective over all ranks of the mesh
        (DeviceMesh, int): Do a MPMD collective over one dimension of the DeviceMesh

    :: N.B. If you pass a PG or a 1D list to perform a MPMD collective, the compiler won't be able to recover
    that information and perform collective algebraic optimization. Use other forms of input for that.
    )r!   r   r   r    all_gather_into_tensor_coalescedr<   r=   r#   )r   r   r   r$   r%   r8   s         r   r?   r?      sM    "  -UC8C*))++LLTSVX_aklK&455r   inputsc           
         t        ||      \  }}}t        |      t        |       k(  sJ t        t        ||             D ]w  \  }\  }}	|	j	                  |      |z  dk(  s"J d| d|	j	                  |       d| d|        |dk7  sHt        j                  |	||      }
t        j                  |
      | |<   y t
        j                  j                  j                  | ||||      }
t        t        t        |
            S )a,  
    Reduces a list of tensors across all machines in such a way that all get
    the final result, then scatter the results to corresponding ranks.

    The input tensors are left unmodified.
    Group can be one of:
        List[int]: ranks participating in the collective.
        List[List[int]]: 2D mesh of ranks taking part of this collective in MPMD.
        ProcessGroup: Will perform a collective using the ranks and tag of the PG.
        DeviceMesh: Do a SPMD collective over all ranks of the mesh
        (DeviceMesh, int): Do a MPMD collective over one dimension of the DeviceMesh

    :: N.B. If you pass a PG or a 1D list to perform a MPMD collective, the compiler won't be able to recover
    that information and perform collective algebraic optimization. Use other forms of input for that.
    r   zinput dimension z (r5   z for tensor at index r+   )r!   len	enumeratezipr6   r   r0   r/   r   r   reduce_scatter_tensor_coalescedr<   r=   r#   )r@   r&   r3   r   r   r$   r%   idxr,   r   r8   s              r   rE   rE     s   ,  -UC8C*{s6{***'K(@A]c6KKz)Q.	@cU"V[[%5$66XYcXddyz}y~	@.!8++fjcBK))K0F3K B ))++KKFT\^acjlvwK&455r   c                    t        | t        j                  j                        sJ | j                  }t        |j                        dkD  r6|j                  d   }|j                  d uxr |j                  j                   S y Nr   )	
isinstancer   _ops
OpOverload_schemarB   	arguments
alias_infois_write)tgtschema	first_args      r   _is_view_oprS   <  sr    c5::00111[[F
6q $$Q'	##4/U	8L8L8U8U4UU !r   output_split_sizesinput_split_sizesc                     |t        d |D              sJ |       |t        d |D              sJ |       t        ||      \  }}}t        j                  j                  j                  | |||||      }t        |      S )aC  
    Each process splits input tensor and then scatters the split list
    to all processes in a group. Then concatenate the received tensors from all
    the processes in the group and return single output tensor.

    Group can be one of:
        List[int]: ranks participating in the collective.
        List[List[int]]: 2D mesh of ranks taking part of this collective in MPMD.
        ProcessGroup: Will perform a collective using the ranks and tag of the PG.
        DeviceMesh: Do a SPMD collective over all ranks of the mesh
        (DeviceMesh, int): Do a MPMD collective over one dimension of the DeviceMesh

    :: N.B. If you pass a PG or a 1D list to perform a MPMD collective, the compiler won't be able to recover
    that information and perform collective algebraic optimization. Use other forms of input for that.
    c              3   \   K   | ]$  }t        |t        t        j                  f       & y wNrI   intr   SymInt.0r6   s     r   	<genexpr>z$all_to_all_single.<locals>.<genexpr>\  s#     XEWT:dS%,,$78EW   *,c              3   \   K   | ]$  }t        |t        t        j                  f       & y wrX   rY   r\   s     r   r^   z$all_to_all_single.<locals>.<genexpr>^  s#     WEVT:dS%,,$78EVr_   )allr!   r   r   r   all_to_all_singler#   )r   rT   rU   r   r   r$   r%   r   s           r   rb   rb   E  s    , %XEWXXlZllX$WEVWWjYjjW,UC8C*YY&&88?QSdfikrt~Ff%%r   c                       e Zd ZU dZej
                  ed<   dgZej                  j                  Z
edej
                  fd       Zd Zd Zed        Zd Zd Zd	ej
                  fd
Zd Zedd       Zd Zy)AsyncCollectiveTensora  
    A Tensor wrapper subclass that is used to trigger a call to wait
    prior to first use of the underlying tensor.
    Use it inside functional collective pytorch wrappers like the following:
    def functional_collective(self, group, tag):
        tag, rankset, group_size = _expand_group(group, tag)
        tensor = torch.ops.c10d_functional.{collective}(self, tag, rankset, group_size)
        return _maybe_wrap_tensor(tensor)
    elemc           
          t         j                  j                  | |j                         |j	                         |j                         |j                  |j                  |j                  d      }||_	        |S )NF)stridesstorage_offsetdtypelayoutdevicerequires_grad)
r   Tensor_make_wrapper_subclassr6   striderh   ri   rj   rk   re   )clsre   rs      r   __new__zAsyncCollectiveTensor.__new__t  s`     LL//KKM$2E2E2G**T[[;;e	 0 
 r   c                     dgd fS Nre   r   r   s    r   __tensor_flatten__z(AsyncCollectiveTensor.__tensor_flatten__  s    x~r   c                 `    t        | j                         | j                  j                         S rX   )r   re   tolistru   s    r   rx   zAsyncCollectiveTensor.tolist  s!    DIIyy!!r   c                 *    |J | d   }t        |      S rt   )rd   )inner_tensorsmetare   s      r   __tensor_unflatten__z*AsyncCollectiveTensor.__tensor_unflatten__  s!    ||V$$T**r   c                 L    t        | j                         d| j                   dS )NzAsyncCollectiveTensor()r   re   ru   s    r   __repr__zAsyncCollectiveTensor.__repr__  s!    DII'		{!44r   c                 0    t        | j                         | S rX   r   ru   s    r   trigger_waitz"AsyncCollectiveTensor.trigger_wait  s    DIIr   r9   c                 D    t        | j                         | j                  S rX   r   ru   s    r   waitzAsyncCollectiveTensor.wait  s    DIIyyr   c                     | j                   S )zOThis method enables  _functional_collectives_impl to test if a tensor is an ACS)re   ru   s    r   _get_acs_underlying_tensorz0AsyncCollectiveTensor._get_acs_underlying_tensor  s    yyr   Nc                    
 t        |      
dt        f
fd}dt        j                  fd}t	        t        ||      }t	        t        ||      } ||i |}	
rt	        t        j                  ||	      }	|	S )Nec                 J    st        | j                         | j                  S rX   r   )r   
is_view_ops    r   unwrapz8AsyncCollectiveTensor.__torch_dispatch__.<locals>.unwrap  s    AFF#66Mr   c                 V    t        | t              rJ t        |       }t        |       |S rX   )rI   rd   r   )r   r1   s     r   wrapz6AsyncCollectiveTensor.__torch_dispatch__.<locals>.wrap  s*    !!%:;;;'*C$S)Jr   )rS   rd   r   rm   r   )rp   functypesargskwargsr   r   unwrapped_argsunwrapped_kwargsoutr   s             @r   __torch_dispatch__z(AsyncCollectiveTensor.__torch_dispatch__  sy     &
	+ 		ELL 	 ''<fdK()>O N7&67 dC8C
r   c                 >    | j                         j                         S rX   )r   numpyru   s    r   r   zAsyncCollectiveTensor.numpy  s    yy{  ""r   )r   N)__name__
__module____qualname____doc__r   rm   __annotations__	__slots___C_disabled_torch_function_impl__torch_function__staticmethodrr   rv   rx   r|   r   r   r   r   classmethodr   r   r   r   r   rd   rd   d  s     ,,I??	5<< 	 	" + +
5ell   8#r   rd   c           	         ddl mc m} t        rd }d }nd }d }t	        | t
              rt	        | d   t
              r^ ||       }g }d}|D ]K  }|j                  |       |dk7  r(|t        |      k7  rt        d| d	t        |             t        |      }M n; ||       }t        |      }n&t	        | t        j                        r:t        j                  |       }t        |      }|xs t        j                  |       }nt	        | |j                        r4| j                  d
k(  sJ d       | j                   d   \  }}t        |      }nt	        | t"              rmt        |       dk(  rTt	        | d   |j                        r;t	        | d
   t$              r(| d   }	| d
   }
|	j                   |
   \  }}t        |      }nt        d      t        d      |||fS )a5  
    _expand_group desugars the different RANK_TYPES types into a canonical format that is traceable.

    By having this be part of the explicit eager codepath, we avoid having to specialize behavior inside
    torchdynamo and can still interoperate with processgroup objects or other untraceable forms.
    r   Nc                 >    t        t        t        t              |       S rX   r   r   rZ   xs    r   cast_listlistintz'_expand_group.<locals>.cast_listlistint  s    T#Y++r   c                 0    t        t        t           |       S rX   r   r   s    r   cast_listintz#_expand_group.<locals>.cast_listint  s    S	1%%r   c                     | S rX   r   r   s    r   r   z'_expand_group.<locals>.cast_listlistint      Hr   c                     | S rX   r   r   s    r   r   z#_expand_group.<locals>.cast_listint  r   r   z$group sizes must be identical found z and r	   zJOnly 1D mesh is supported, pass in (DeviceMesh, int) together if mesh > 1D   z1Invalid tuple for group must be (DeviceMesh, int)z[Invalid type for group, must be one of List, Processgroup, DeviceMesh or (DeviceMesh, int).)torch.distributed._tensordistributed_tensorr   rI   r<   extendrB   
ValueErrordistProcessGroupget_process_group_ranksc10d_get_group_tag
DeviceMeshndim_dim_group_infostuplerZ   )r   r   dtr   r   nested_listr$   r%   rsdmeshr,   s              r   r!   r!     s    +*
 	,	&		 %eAh%*51KGJ!r"#
c"g(=$>zl%PSTVPWyY  !W
 " #5)GWJ	E4,,	-..u5\
/T((/	E2==	)zzQl ll--a0W\
	E5	!u:?z%(BMMBzRWXYRZ\_G`!HE(C 11#6LCWJPQQvww*%%r   c                      t               ryt        j                  j                  t        j                  j                  j
                        yt               } | y| j                  d uS )NTF)r   r   r   _get_dispatch_mode_TorchDispatchModeKey
FUNCTIONALr   tracer)modes    r   _are_we_tracingr     sT    ! xx""588#A#A#L#LMY#%D|;;d""r   c                     t               rt        |       S t        |       }t        |       t	        t
        j                  |      S rX   )r   r   rd   r   r   r   rm   )r   r1   s     r   r#   r#     s6    4  

%CS!c""r   c                 D    fd}| D cg c]
  } ||       c}S c c}w )Nc                 v    t        | j                               }|dxx   z  cc<   | j                  |      }|S rH   r<   r6   	new_empty)shardout_size
out_tensorr%   s      r   mk_out_tensorz=_all_gather_into_tensor_coalesced_meta.<locals>.mk_out_tensor  s5    

%z!__X.
r   r   )r   r   r$   r%   r   ts      `  r   &_all_gather_into_tensor_coalesced_metar     s'     '++dM!d+++   c                 ,    t        j                  |       S rX   r   
empty_liker   r   s     r   _broadcast_metar   "      D!!r   c                 ,    t        j                  |       S rX   r   r   s     r   _all_reduce_metar   %  r   r   c                 ,    t        j                  |       S rX   r   r   s     r   _wait_tensor_metar   (  r   r   c                 p    t        | j                               }|dxx   |z  cc<   | j                  |      S rH   r   )r   r   r$   r%   r   s        r   _all_gather_into_tensor_metar   +  s/    EJJL!HQK:K??8$$r   c                 p    t        | j                               }|dxx   |z  cc<   | j                  |      S rH   r   )input	reduce_opr   r$   r%   r   s         r   _reduce_scatter_tensor_metar   0  s/    EJJL!HQKJK??8$$r   c                 R    | D cg c]  }t        j                  |       c}S c c}w rX   r   )r   r   r   s      r   _all_reduce_coalesced_metar   5  s%    )-.AEQ...s   $c                     | S rX   r   )inpr   s     r   _all_reduce__metar   8  s    Jr   c                     | S rX   r   )r@   r   s     r   _all_reduce_coalesced__metar   ;  s    Mr   c                 D    fd}| D cg c]
  } ||       c}S c c}w )Nc                 v    t        | j                               }|dxx   z  cc<   | j                  |      }|S rH   r   )r   r   r   r%   s      r   r   z<_reduce_scatter_tensor_coalesced_meta.<locals>.mk_out_tensor?  s5    

%
"__X.
r   r   )r@   r&   r   r$   r%   r   r   s       `  r   %_reduce_scatter_tensor_coalesced_metar   >  s'     '--fM!f---r   c                     || j                  | j                               S |D ]  }t        j                  |        t	        | j                               }t        |      |d<   | j                  |      S rH   )r   r6   r   _check_is_sizer<   sum)r   rT   rU   r   r$   r%   sr   s           r   _all_to_all_single_metar   L  sc    !uzz|,,#A  # $

%,-x((r   c                 p    t        | j                               }|dxx   |z  cc<   | j                  |      S rH   r   )r   r%   
group_nameshapes       r   #_all_gather_into_tensor_native_metar   V  s/    E	!H
H??5!!r   c                 B    | D cg c]  }t        |||       c}S c c}w rX   )r   )r@   r%   r   r   s       r   -_all_gather_into_tensor_coalesced_native_metar   [  s2     E 	,E:zJ  s   c                 p    t        | j                               }|dxx   |z  cc<   | j                  |      S rH   r   )r   r   r%   r   r   s        r   "_reduce_scatter_tensor_native_metar   a  s/    E	!HH==r   c           	      D    | D cg c]  }t        ||||       c}S c c}w rX   )r   )r@   r   r%   r   r   s        r   ,_reduce_scatter_tensor_coalesced_native_metar   f  s4     C 	+3	:zR  s   c                  j   g d} t         j                  t           }| D ]  }|d|j                  d       }t	        t
        d|       }t	        |d| d      }t        j                  |t        j                  j                         t        j                  ||d        t        d|       |        y )	N)	zObroadcast(Tensor self, int src, str tag, int[] ranks, int group_size) -> TensorzUall_reduce(Tensor self, str reduceOp, str tag, int[] ranks, int group_size) -> Tensorzcall_reduce_coalesced(Tensor[] self, str reduceOp, str tag, int[] ranks, int group_size) -> Tensor[]z"wait_tensor(Tensor self) -> TensorzTall_gather_into_tensor(Tensor shard, str tag, int[] ranks, int group_size) -> Tensorzball_gather_into_tensor_coalesced(Tensor[] input, str tag, int[] ranks, int group_size) -> Tensor[]zareduce_scatter_tensor(Tensor input, str reduceOp, str tag, int[] ranks, int group_size) -> Tensorzpreduce_scatter_tensor_coalesced(Tensor[] inputs, str reduceOp, str tag, int[] ranks, int group_size) -> Tensor[]zall_to_all_single(Tensor input, SymInt[]? output_split_sizes, SymInt[]? input_split_sizes, str tag, int[] ranks, int group_size) -> Tensorr   (__meta)tagsCompositeExplicitAutogradzc10d_functional::)sysmodulesr   indexgetattrfun_col_implc10_libdefiner   Tagpt2_compliant_tagc10_lib_implimplr   )ops_defs	my_moduleop_defop_namebackend_impl	meta_impls         r   _register_opsr  l  s    
H H%I6<<,-|q	];I7)5'9:	vEII$?$?@'<1LM4)'34Y? r   r   DEFIMPL_c10d_functionalr(   Metaall_reduce_r;   all_reduce_coalesced_r   r.   r?   r7   rE   zJPyTorch Distributed functional collectives do not work with torch::deploy.outputr   async_opc                 N    |rJ d       | j                  t        ||||            S Nz@Can't remap async version of inplace op to functional collective)copy_r2   )r  r   r   r  r   r)   s         r   all_gather_tensor_inplacer!    s-     [[[<<<)%UCHIIr   opc           	      P    |rJ d       | j                  t        |||||            S r  )r   r7   )r  r   r"  r   r  r3   r   s          r   reduce_scatter_tensor_inplacer$    s/     [[[<<<-eReSQRRr   )r.   r7   ) )Fr%  r   )r   NFr   r%  )Rwarningsr  r   torch.distributedr   r   "torch.distributed.distributed_c10ddistributed_c10dr   typingr   r   r   r   r   r   r%  r
   r  r   "torch.fx.experimental.proxy_tensorr   torch._custom_opsr   torch.utils._cxx_pytreer   ImportErrortorch.utils._pytree_running_with_deployr   torch._dynamo.external_utilsr   	ExceptionwarnrZ   r   
RANK_TYPESr   rm   strr"   r(   r2   r7   r;   r?   rE   rS   rb   rd   r!   boolr   r#   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  libraryLibraryr	  r  _c10_lib_implr  r!  r$  r.   legacy_allgatherlegacy_reducescattertraceable_collective_remapsr   r   r   <module>r=     s|    
    1 1 D D : B ,25
 5Y#J
 49d49ot/@/@B[]bc|  B  dB  ^C  C  D
89&ELL &s &: &C &&U\\ &S & &# &4 	
,,  
	H  
,,     	 
 
 F6tELL1 6S 6 6Z] 6gklqlxlxgy 6,64+= 6j 6WZ 6dhiniuiudv 66 "6"6"6 c"6 	"6
 
"6 
%,,"6NV &
,,& c+&  S	*& 	&
 
& \\&>W#ELL W#rB& B&# B&uS$s)S=P7Q B&H# ## #,"""%
%
/.)"
 
@. "u!!# mm##$5u=G==(():FCLOMM))*<fEM|%5v>}&7@-/I6R.0KVT}&7@/1TV\]9;hjpq.0RTZ[8:fhnoHMM^_ 	JLL	J<<	J 		J
 
	J 	J 

SLL
S<<
S 	
S
 
S 
S 

S /7 a  2112  r	
	s$   N !N1 N.-N.1OO