
    Ph1                     >   d dl Z d dlZd dlZd dlZd dlmZ d dlmc mZ	 d dl
mZmZmZ 	  e j                  e      Z e       ad a G d d      Zd Zd Zd"dZd	ej0                  dej0                  fd
Zd	ej0                  defdZdefdZd"dZdedej@                  fdZ!	 d Z"d Z#d Z$d Z%d Z&dej0                  dededee   def
dZ'deej0                     dededee   def
dZ(d#dZ)d#dZ*dej0                  deee      d eee      dedee   defd!Z+y)$    N)ListOptionalcastc                   0    e Zd Zd Zd Zd Zd Zd Zd Zy)_WaitRegistrationc                 t    || _         t        | _        t               | _        i | _        d| _        t        dz  ay Nr      )workwork_versionversionsetptrsptr_alias_countcleanup_count)selfr   s     yC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_functional_collectives_impl.py__init__z_WaitRegistration.__init__   s3    	#E	!    c                 L    | t         |<   | j                  j                  |       y N)data_ptr_to_workr   add)r   data_ptrs     r   _register_tensor_ptrz&_WaitRegistration._register_tensor_ptr(   s    %)"		hr   c                     | j                  |       | j                  j                  |d       | j                  |xx   dz  cc<   | xj                  dz  c_        y r	   )r   r   
setdefaultr   r   ptrs     r   _record_wrapperz!_WaitRegistration._record_wrapper-   sM    !!#&''Q/S!Q&!ar   c                 ~    | j                   !| j                   j                          d | _         | j                          y r   )r   waitcleanup)r   s    r   r"   z_WaitRegistration.wait3   s*    99 IINNDIr   c                    | xj                   dz  c_         | j                   dk(  r| j                          y | j                  |xx   dz  cc<   | j                  |   dk  r"t        j	                  |d       | k(  rt        |= y y y )Nr
   r   )r   r"   r   r   getr   s     r   decrement_live_tensorz'_WaitRegistration.decrement_live_tensor9   sz    a"IIK  %*%##C(1,1A1E1Ec41PTX1X$S) 2Y,r   c                 h    | j                   D ]#  }t        j                  |d       | k(  st        |= % y r   )r   r   r%   r   s     r   r#   z_WaitRegistration.cleanupB   s-    99C##C.$6$S) r   N)	__name__
__module____qualname__r   r   r    r"   r&   r#    r   r   r   r      s      
 **r   r   c                 *   t        | t              s| g} t        |t              s2t        |      }| D ]!  }|j                  |j	                                # y t        | |      D ]/  \  }}t        |      }|j                  |j	                                1 y r   )
isinstancelistr   r   r   zip)tensor_or_listwork_or_listregtensorr   s        r   _register_tensor_workr4   H   s}    nd+()lD)-$F$$V__%67 %  =LFD#D)C$$V__%67 >r   c                 &    |j                  |        y r   )r&   )r   wait_regs     r   _wait_reg_decr7   V   s    ""3'r   returnc                     | j                   j                         }t        j                  |d       }|t	        j
                  d       y |j                  |       t        j                  | t        ||       y )NzZTrying to register finalizer to AsyncCollectiveTensor but the inner tensor is already gone)
elemr   r   r%   warningswarnr    weakreffinalizer7   r3   r   r6   s      r   _register_tensor_wrapperr@   Y   sa    {{##%H  ##Hd3Hh	
 	  *(Cr   r3   c                 t    | j                         }t        j                  |      }||j                          | S r   )r   r   r%   r"   r?   s      r   _wait_tensorrB   m   s2     H##H-HMr   c                     t        | d      r| j                         } | j                         }t        j	                  |      }|duxr |j
                  duS )zRReturns true if ```tensor``` needs to be waited. Works with ACS and inner tensors._get_acs_underlying_tensorN)hasattrrD   r   r   r%   r   r?   s      r   _tensor_needs_waitrF   u   sP    v34224 H##H-H4=HMM$==r   c                       t        t              S )zL Returns the number of outstanding work objects waiting to be waited (sic). )lenr   r+   r   r   _outstanding_wait_countrI   }   s      r   c                  h    t        t        j                               D ]  } | j                           y)z' Wait for all outstanding collectives. N)r.   r   valuesr"   )work_regs    r   	_wait_allrM      s$    )0023 4r   reduceOpc                     | j                         } t        j                  j                  j                  j                  |       }|t        d|        t        t        j                  |      S )NzInvalid reduce operation )upperdistReduceOp	RedOpType__members__r%   
ValueErrorr   )rN   ops     r   _str_to_reduce_oprW      sW    ~~H		 	 	,	,	0	0	:B	z4XJ?@@r""r   c                     t        j                  |||      }|J | j                  t        j                        }t        j                  |||d      }t        ||       |S )Nmemory_formatTgroupasync_op)c10d#_find_or_create_pg_by_ranks_and_tagclonetorchcontiguous_formatrQ   	broadcastr4   )r   srctagranks
group_sizer\   inplace_tensorr   s           r   
_broadcastri      s\    44S%LEZZe.E.EZFN>>.#UTJD.$/r   c                     t        |      }t        j                  |||      }|J | j                  t        j
                        }t        j                  |||d      }t        ||       |S NrY   TrV   r\   r]   )	rW   r^   r_   r`   ra   rb   rQ   
all_reducer4   )	r   rN   re   rf   rg   rV   r\   rh   r   s	            r   _all_reducern      sf    	8	$B44S%LEZZe.E.EZFN??>bMD.$/r   c                     t        |      }t        j                  |||      }|J | D cg c]"  }|j                  t        j
                        $ }}t        j                  |||d      }	t        ||	       |S c c}w rk   )	rW   r^   r_   r`   ra   rb   rQ   all_reduce_coalescedr4   )
r   rN   re   rf   rg   rV   r\   tinplace_tensor_listr   s
             r   _all_reduce_coalescedrs      s    	8	$B44S%LESWXSWa1771H1H7ISWX$$%8RuW[\D-t4	 Ys   'A;c                    t        j                  |||      }|J t        | j                               }|dxx   |z  cc<   | j	                  |      }|j                         sJ t        j                  |      t        j                  j                  k(  s| j                  r9t        t        j                  ||            }t        j                  || |d      }nt        j                  || |d      }t        ||       |S )Nr   Tr[   )r^   r_   r.   size	new_emptyis_contiguousrQ   get_backendBackendGLOOis_cpura   chunk
all_gatherall_gather_into_tensorr4   )	shardre   rf   rg   r\   out_size
out_tensortensor_listr   s	            r   _all_gather_into_tensorr      s    44S%LEEJJL!HQK:K*J##%%%$,,"3"33u||5;;z:>?{EN**:uETXY*d+r   c                     t        j                  ||      }|J fd}| D cg c]
  } ||       }}t        || |d      }t        ||       |S c c}w )Nc                     t        | j                               }|dxx   z  cc<   | j                  |      }|j                         sJ |S Nr   r.   ru   rv   rw   r   r   r   rg   s      r   mk_out_tensorz8_all_gather_into_tensor_coalesced.<locals>.mk_out_tensor   sG    

%z!__X.
'')))r   T)output_tensorsinput_tensorsr\   r]   )r^   r_   *_all_gather_into_tensor_coalesced_fallbackr4   )	r   re   ranksetrg   r\   r   rq   out_tensors	work_lists	      `     r   !_all_gather_into_tensor_coalescedr      sp    44S':NE .22T=#TK2:"	I +y1 3s   Ainputre   rf   rg   c                 r   t        j                  |||      }|J t        |      }t        j                  |      t        j
                  j                  k(  s| j                  rwt        j                  d       | j                         }t        j                  |      }t        j                  |||d      }	|j                  |d      |   }
t        |
|	       |
S t        | j!                               }|dxx   |z  cc<   | j#                  |      }
t        j$                  |
| ||d      }	t        |
|	       |
S )NzOProcessGroupGloo does not support reduce_scatter, falling back with all reduce!Trl   r   dim)r^   r_   rW   rQ   rx   ry   rz   r{   loggerwarningr`   get_rankrm   r|   r4   r.   ru   rv   reduce_scatter_tensor)r   rN   re   rf   rg   r\   rV   reduction_input
group_rankr   r   r   s               r   _reduce_scatter_tensorr      s    44S%LE	8	$B$,,"3"33u|| 	]	
  ++-]]5)
2UTR$**:1*=jI
j$/  

%
"__X.
))"ED
 	j$/r   inputs	reduce_opc                     t        j                  ||      }|J t        |      }fd}| D cg c]
  } ||       }	}t        |	| ||d      }
t	        |	|
       |	S c c}w )Nc                     t        | j                               }|dxx   z  cc<   | j                  |      }|j                         sJ |S r   r   r   s      r   r   z7_reduce_scatter_tensor_coalesced.<locals>.mk_out_tensor  sG    

%
"__X.
'')))r   F)r   r   rV   r\   r]   )r^   r_   rW   )_reduce_scatter_tensor_coalesced_fallbackr4   )r   r   re   rf   rg   r\   rV   r   rq   r   r   s       `      r    _reduce_scatter_tensor_coalescedr     s     44S%LE	9	%B .44V=#VK49"I +y1 5s   A c                    |d   j                   s|sg }| D cg c]4  }t        t        j                  |t	        j
                  |                  6 }}t        ||      D ]/  \  }}t        j                  ||||      }|j                  |       1 |S t        j                  |d      5 }	t        ||       D ]  \  }
}t	        j                  ||
|d         	 d d d        |	S c c}w # 1 sw Y   	S xY w)Nr   r[   T)r\   	async_ops)r{   r.   ra   r|   rQ   get_world_sizer/   r^   r}   append_coalescing_managerr~   )r   r   r\   r]   r   r   out_tensors_slicedr   r   cmin_tout_ts               r   r   r      s     Qh	 -
,
 Z)<)<U)CDE, 	 
 "%]4F!GE:??:uEHUDT" "H %%ETBb"=.Ae++E4utT  B C 	
 C 	s   9C#*.C((C2c                     g }t        ||       D ]0  \  }}t        j                  |||||      }|j                  |       2 |S )Nrl   )r/   r^   r   r   )	r   r   rV   r\   r]   r   r   r   r   s	            r   r   r   6  sM    I ?z))*e%Zbc @ r   output_split_sizesinput_split_sizesc                     t        j                  |||      }|ct        j                   j	                         dk\   fd       t         j                               }t        |      |d<    j                  |      }n j                   j                               }t        j                  | |||d      }	t        ||	       |S )Nr
   c                  ,    d j                          dS )Nz.Expected input to have at least 1 dim but got z dimr   )r   s   r   <lambda>z$_all_to_all_single.<locals>.<lambda>J  s    1_`e`i`i`k_llp/qr   r   T)r   r   r\   r]   )r^   r_   ra   _checkr   r.   ru   sumrv   all_to_all_singler4   )
r   r   r   re   rf   rg   r\   r   r   r   s
   `         r   _all_to_all_singler   ?  s     44S%LE%UYY[A%'qr

%,-__X.
__UZZ\2
!!E.@+54D *d+r   )r8   N)F),loggingr;   r=   ra   torch.distributeddistributedrQ   "torch.distributed.distributed_c10ddistributed_c10dr^   typingr   r   r   	getLoggerr(   r   dictr   r   r   r4   r7   r@   TensorrB   boolrF   intrI   rM   strrR   rW   ri   rn   rs   r   r   r   r   r   r   r   r+   r   r   <module>r      s         1 1 ' '  
		8	$6 '* '*T
8(D( %,, >u|| > >! !
# # #		&. <<   
  9	 
  D 
 9	
 >,<< c+  S	* 
	
 9 r   