
    Ph(                        d dl mZmZmZmZ d dlZd dlmZ d dl	mc m
c mZ ej                  j                  Zd Zd Zd Zd Zdej&                  j(                  deed	f   d
eeef   defdZdej&                  j(                  deed	f   d
eeef   defdZdej&                  j(                  deed	f   deeef   defdZdej&                  j(                  deed	f   deeef   defdZy)    )castDictListTupleNc                     | d   dk7  S )N   r    )paddings    lC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_tensor/tp_conv.py_requires_data_exchanger      s    1:?    c                     |d   dk7  rt        d      |d   dk7  r-|d   dk7  rt        d      |d   dz  | d   kD  rt        d      y	| d   |d   z  dk(  r|d   |d   k(  st        d      y	)
Nr   z3Dilation must be 1 for tensor parallel convolution.r   zGStride must be 1 when there is padding for tensor parallel convolution.      zbkernel_size[3] // 2 should be less than or equal to input_size[3] for tensor parallel convolution.zIt requires that input_size[3] is divisible by stride[1] and stride[1] equals kernel_size[3] when there is padding for tensor parallel convolution.T)RuntimeError)
input_sizekernel_sizestrider
   dilations        r   _is_supportedr      s    {aPQQqzQ!9>Y  q>QA.t   1q	)Q.6!9A3NI  r   c                    | d d d d d d | d f   j                         }| d d d d d d d |f   j                         }t        j                  |      }	t        j                  |      }
t        j                  t        j
                  ||      }t        j                  t        j
                  ||      }t        j                  t        j                  |	|      }t        j                  t        j                  |
|      }t        j                  ||||g      }|D ]  }|j                           |dk(  rt        j                  | |	gd      } | S ||dz
  k(  rt        j                  |
| gd      } | S t        j                  |
| |	gd      } | S )Nr   )dimr   )

contiguoustorch
zeros_likedistP2POpisendirecvbatch_isend_irecvwaitcat)	in_tensord1d2leftrightranksizesend_to_rightsend_to_leftrecv_from_rightrecv_from_leftsend_op_rightsend_op_leftrecv_op_rightrecv_op_leftreqsreqs                    r   _ring_send_recv_constructr5   &   sZ   aAstm,779MQ1crc\*557L&&|4O%%m4NJJtzz=%@M::djj,=LJJtzz?EBM::djj.$?L!!	lMBD 
  qyIIy/:C	  
	II~y9rB	  II~y/JPRS	r   c                    | d d d d d d | d f   j                         }| d d d d d d d |f   j                         }t        j                  |      }	t        j                  |      }
t        j                  t        j
                  ||      }t        j                  t        j
                  ||      }t        j                  t        j                  |	|      }t        j                  t        j                  |
|      }t        j                  ||||g      }|D ]  }|j                           |dk(  rI| d d d d d d d | f   } t        j                  | d d d d d d | d f   |	      | d d d d d d | d f<   y ||dz
  k(  rF| d d d d d d |d f   } t        j                  | d d d d d d d |f   |
      | d d d d d d d |f<   y | d d d d d d || f   } t        j                  | d d d d d d | d f   |	      | d d d d d d | d f<   t        j                  | d d d d d d d |f   |
      | d d d d d d d |f<   y )Nr   r   )
r   r   r   r   r   r   r    r!   r"   add)grad_in_tensorr%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   s                    r   _ring_send_recv_aggregater9   B   s   "1aRCD=1<<>M!!Q3B3,/::<L&&|4O%%m4NJJtzz=%@M::djj,=LJJtzz?EBM::djj.$?L!!	lMBD 
  qy'1a2#6(-		1aRCD=)?)
q!Q}% 
	'1a5',yy1aCRC<(.(
q!Q|$ (1aRC8(-		1aRCD=)?)
q!Q}% (-yy1aCRC<(.(
q!Q|$r   op_calllocal_tensor_args.local_tensor_kwargsreturnc           	         | t         j                  j                  k(  sJ t        |      dk(  sJ t	        j
                         }t	        j                         }t        t        j                  |d         }t        t        j                  |d         }|dd \  }}}	t        |j                  |j                  |||	      sJ t        |t              sJ t        |      s
 | |i |}
|
S |j                  d   dz
  }|dz  }||z
  }||z   |k(  sJ |dz   |z  }|dz
  |z   |z  }t        |||||||      }t!        |      }||d<   t        t"        t$        df   |      } | |i |}
|d   }|
j'                  d      }|dk(  r|
d d d d d d d ||z
  f   }
|
S ||dz
  k(  r|
d d d d d d |d f   }
|
S |
d d d d d d |||z
  f   }
|
S )N	   r   r   r      r   .)atenconvolutiondefaultlenr   get_rankget_world_sizer   r   Tensorr   shape
isinstancer   r   r5   listr   objectr*   )r:   r;   r<   r)   r*   r$   weightr   r
   r   local_resultsdr%   r&   r(   r'   local_tensor_args_list	padding_wws                      r   tp_convolutionrR   h   s   
 d&&..... !Q&&&==?D DU\\#4Q#78I%,, 1! 45F 1!A 6FGX&,,RRRgt$$$"7+!2J6IJ LLOa!VVBw!||T!q44' .r2tUD$
	
 "&&7!8$-q! vs{!35KL!2J6IJ AJ	q!19)!Q?Q]?*BCM  TAX)!Q9:*=>M  *!Q9q9}3L*LMMr   c           	         | t         j                  j                  k(  sJ t        |      dk(  sJ t	        j
                         }t	        j                         }t        t        j                  |d         }t        t        j                  |d         }t        t        j                  |d         }|dd \  }}	}
t        |j                  |j                  ||	|
      sJ t        |	t              sJ t        |	      s
 | |i |}|S |j                  d   dz
  }|dz  }||z
  }||z   |k(  sJ |dz   |z  }|dz
  |z   |z  }t        |||||||      }|j                  \  }}}}|	d   }|dk(  r/t        j                   j"                  j%                  |d|fdd      }ne||dz
  k(  r/t        j                   j"                  j%                  ||dfdd      }n.t        j                   j"                  j%                  |||fdd      }t'        |      }||d<   ||d<   t        t(        t*        d	f   |      } | |i |}|d   }t-        |||||||      }t'        |      }||d<   t        t(        t*        d	f   |      }|S )
N   r   r   r         r   constant.)rA   convolution_backwardrC   rD   r   rE   rF   r   r   rG   r   rH   rI   r   r   r5   nn
functionalpadrJ   r   rK   r9   )r:   r;   r<   r)   r*   grad_out_tensorr$   rL   r   r
   r   rM   rN   r%   r&   r(   r'   NC_outH_out_rP   rO   r8   s                           r   tp_convolution_backwardra      s   
 d//77777 !R'''==?D D5<<):1)=>OU\\#4Q#78I%,, 1! 45F 1!A 6FGX&,,RRRgt$$$"7+!2J6IJ LLOa!VVBw!||T!q44' .r2tUD$
	
 -225%AJ	19#hh1155!YQO TAX#hh1155)QQO $hh1155)Y!7QO
 "&&7!8$3q!$-q! vs{!35KL!2J6IJ 'q)2BD%t
 ]+)aU63;/?r   argskwargsc                    t         j                  j                  j                  | ||      }t         j                  j                  j                  j                  |       |j                  }|J d       t        | t        |j                        |j                        }t         j                  j                  j                  ||j                        S )N"output sharding should not be None)dtensorDTensor_op_dispatcherunwrap_to_op_infosharding_propagator	propagateoutput_shardingrR   tuple
local_argslocal_kwargswrapoutput_specr:   rb   rc   op_inforl   rM   s         r   convolution_handlerrt      s     oo,,>>wfUG OO""66@@I--O&L(LL& #w))*G,@,@M ??))..22 r   c                    t        |      }t        |d   t        j                        rt        |d   t        j                        sJ |d   j	                  |d   j
                  |d   j                        |d<   t        |      }t        j                  j                  j                  | ||      }t        j                  j                  j                  j                  |       |j                  }|J d       t        | t        |j                        |j                        }t        j                  j                  j!                  ||j"                        S )Nr   r   re   )rJ   rI   rf   rg   redistributedevice_mesh
placementsrm   rh   ri   rj   rk   rl   ra   rn   ro   rp   rq   rr   s         r   convolution_backward_handlerry      s    :Dd1gw/JtAw4XXX1g""47#6#6Q8J8JKDG;D oo,,>>wfUG OO""66@@I--O&L(LL& ,w))*G,@,@M ??))..22 r   )typingr   r   r   r   r   torch.distributeddistributedr   torch.distributed._tensor.api_tensorapirf   opsrA   r   r   r5   r9   _ops
OpOverloadrK   strrR   ra   rt   ry   r	   r   r   <module>r      sV   + *    / /yy~~
*8#
L2ZZ""2VS[)2 c6k*2 	2jDZZ""DVS[)D c6k*D 	DNZZ""

 f 	.ZZ""

 f 	r   