
    Ph>T                     	   U d dl mZmZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlmZ d dlZd dlmZ d dlm c m!Z" d d	lm#Z# d d
l$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7  G d de      Z8 G d de1      Z9dejF                  jt                  dejF                  jt                  fdZ; e,       a<dejz                  dee/   fdZ>edej~                  j                  deeAef   deeAej                  f   fd       ZCej                  j                  ZEed         ZFd7d"ZGd# ZHd$ ZId7d%ZJd7d&ZKd!d!d!d!d!d'd'ddd(	d)ZLeEj                  j"                  eGeEj                  j                   e
eJeEj                  j                        eEj                  j                   e
eKeEj                  j                        eEj                  j                   e
eKeEj                  j                        eEj                  j"                   e
eIeEj                  j"                        eEj                  j                   e
eJeEj                  j                        eEj                  j                   e
eJeEj                  j                        eEj                  j                   e
eHeEj                  j                        eEj                  j                   e
eHeEj                  j                        eEj                  j                   e
eHeEj                  j                        eEj                  j                   e
eJeEj                  j                        eEj                  j                  eLeEjJ                  j                  e%iZbej                  j                  j                  j                  ej                  j                  j                  j                  hZfeej                  j                     eid*<   de#jt                  de#jt                  fd+Zje G d, d-             Zkd.ed/eee8      d0e-d1ed2edekfd3Zld4Zm	 	 	 d8d/eee8      d5eee#jt                  ge#jt                  f      d0ee-   fd6Zny)9    )ABCabstractmethod)contextmanagernullcontext)copy)	dataclass)partialwraps)	AnyCallablecastDictListOptionalSetTupleUnion)make_fxN)fx)native_layer_norm_backward)FakeTensorMode)gradients_tagging)DataParallelDTensorExpandModeParallelMode)	Placement)_PyTreeCodeGen_PyTreeInfoCodeGen)	stateless)NamedMemberAccessorc                       e Zd ZdZededej                  j                  dej                  j                  fd       Z	ede
j                  deej                     de
j                  fd       Zy	)
Overridea  Override the tracing and transformation behavior of :meth:`~torch.distributed._spmd.compile`.

    This is useful when any part of the model is not traceable or if you prefer
    to not trace it due to any reason. More specifically, users can implement
    :meth:`torch.distributed._spmd.Override.replacement` to replace an original
    submodule with the return new submodule. The new submodule contains
    operations that users preferred to be traced, which simply be a dummy
    placeholder operator. After tracing, users can implement
    :meth:`torch.distributed._spmd.Override.transform` to transform the traced
    graph, where the dummy placeholder operator serves as an anchor to insert
    new sub-graphs.
    fqnorig_submodulereturnc                      y)a  Implement this method to return a new :class:`nn.Module` instance to replace the ``orig_submodule``
        argument in the model.

        This helps if ``orig_submodule`` is not traceable or should not be traced.

        Args:
            fqn (str): fully quantified name of the submodule.
            orig_submodule (class:`nn.Module`): original submodule instance to replace.

        Returns:
            A new :class:`nn.Module` instance to replace the original one.

        N )selfr$   r%   s      fC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_spmd/api.pyreplacementzOverride.replacement0   s     	    gm
flat_statec                      y)a/  
        Given a DTensor-expanded graph and sharding schema for every node,
        conduct additional transformation for the sub-graph from the :class:`nn.Module`
        returned by :meth:`torch.distributed._spmd.Override.replacement` if
        necessary.

        Args:
            gm (:class:`fx.Graph`): a DTensor-expanded graph.
            flat_state (List[str, :class:`Tensor`]): a reference to the list of
                flattened state. The elements in ``flat_state`` map to the first
                ``len(flat_state)`` placeholders in the graph. The transformation
                can add state to or remove state from ``flat_state`` as long as
                it keeps ``flat_state`` and the placeholders consistent.

        Returns:
            The :class:`fx.Graph` after transformation.

        Nr(   )r)   r-   r.   s      r*   	transformzOverride.transformA   s    0 	r,   N)__name__
__module____qualname____doc__r   strtorchnnModuler+   r   GraphModuler   Tensorr0   r(   r,   r*   r#   r#   "   s     s EHHOO     NN & 
	 r,   r#   c                   "    e Zd ZdedefdZd Zy)_PyTreeCodeGenOutputsOnlyargsr&   c                     |S Nr(   )r)   r=   s     r*   process_inputsz(_PyTreeCodeGenOutputsOnly.process_inputs^   s    r,   c                 0    t        j                  | ||      S r?   )r   
gen_fn_def)r)   	free_varsmaybe_return_annotations      r*   rB   z$_PyTreeCodeGenOutputsOnly.gen_fn_defb   s    !!$	3JKKr,   N)r1   r2   r3   r   r@   rB   r(   r,   r*   r<   r<   \   s    C C Lr,   r<   r-   r&   c                     t        t        dd| j                  j                  j                  j
                              | j                  _        | j                          | S )zMove the responsibility of flattening the input arguments from the graph module to the caller.

    Example:

        output = gm(my_struct)

        gm = gm(to_caller_flattened_graph_module)

        output = gm(*pytree.flatten(my_struct)[0])

    N)	orig_argsin_specout_spec)pytree_info)r<   r   _graph_codegenrI   rH   	recompile)r-   s    r*   !_to_caller_flattened_graph_modulerM   f   sL     3YY''33<<
	BII LLNIr,   t
placementsc                 <    |t         j                  t        |       <   y r?   )dtensor_expand_mode_placements_overrideid)rN   rO   s     r*   _override_placementsrT      s    6@,,RU3r,   optnamed_statesparamsc              #     K   | J t        | j                        }|D ]  }||   | j                  ||   <    | j                  d   }|d   }|j                         |d<   	 d  ||d<   || _        y # ||d<   || _        w xY ww)Nr   rW   )r   stateparam_groupsvalues)rU   rV   rW   orig_statesnparam_grouporig_paramss          r*   _rematerialize_optimizerr`      s      ?? syy/K+A		&) 
 ""1%Kh'K"MMOK  +H	 !,H	s   AB A1 $B1A??Bc               #   <  K   d } t         j                  j                  j                  }| j                  t         j                  j                  _        	 d  |t         j                  j                  _        y # |t         j                  j                  _        w xY ww)Nc                       y)NTr(   r(   r,   r*   f_truez_enable_compile.<locals>.f_true   s    r,   )r6   _utilsis_compiling__code__)rc   orig_is_compiling_codes     r*   _enable_compilerh      sg     
 #\\66??)/ELL&D-C!!*-C!!*s   ABA8  B8!BB   c                     t         j                  j                  | ||      }t        | |      D ]  \  }}|j	                  |        y )N)alpha)aten_foreach_addr   zipcopy_)r)   otherrk   self_updatedss_us         r*   _foreach_add_decomprt      s?    $$))$U)CLdL)3	 *r,   c                 ^     | |      }t        ||      D ]  \  }}|j                  |        y r?   rn   ro   )opr)   rq   rr   rs   s        r*   _foreach_unaop_decomprx      s+    d8LdL)3	 *r,   c                 `     | ||      }t        ||      D ]  \  }}|j                  |        y r?   rv   )rw   r)   rp   rq   rr   rs   s         r*   _foreach_binop_list_decomprz      s-    dE?LdL)3	 *r,   c                 `     | ||      }t        ||      D ]  \  }}|j                  |        y r?   rv   )rw   r)   scalarrq   rr   rs   s         r*   _foreach_binop_scalar_decompr}      s.    dF#LdL)3	 *r,   c                 d     | ||||      }t        ||      D ]  \  }}|j                  |        y r?   rv   )rw   r)   tensor1tensor2r|   rq   rr   rs   s           r*   _foreach_addcop_scalar_decompr      s2    dGWf5LdL)3	 *r,   T	lrbeta1beta2weight_decayepsamsgradmaximize
grad_scale	found_infc       	            | ||||f}t         j                  j                  | |||||||||	|
||||      }t        t	        ||            D ]3  \  }\  }}|dk(  rt	        ||      D ]  \  }}|j                  |        5 y )Nr   ri   )rl   _fused_adamdefault	enumeratern   ro   )r)   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr   r   r   r   r   r   r   r   r   
orig_tupleupdated_tupleidxorigupdatedous                         r*   _fused_adam_decompr      s    $ xoFJ$$,,! - M$ !*#j-*H I_dG!8g&DAqGGAJ '	 !Jr,   DEDUP_TARGETSc                 j   i }| j                   j                  D ]  }t        j                  |j                   }|j
                  t        v s2|j
                  g|}|j                  |d       }||||<   \|j                  |       | j                   j                  |        | j                          | S r?   )graphnodespytreearg_tree_leavesr=   targetr   getreplace_all_uses_with
erase_noderL   )r-   args_to_nodenoder=   args_keyunique_nodes         r*   _dedup_collectivesr   .  s    35L%%tyy1;;-'+d+H&**8T:K")-X& **;7##D)  LLNIr,   c                       e Zd ZU ej                  ed<   ej                  ed<   ee	j                  j                     ed<   ee	j                     ed<   y)_CompiledResultr-   modrU   r.   N)r1   r2   r3   r   r9   __annotations__r7   r8   r   r6   optim	Optimizerr   r:   r(   r,   r*   r   r   E  s9    
	N	%++''	((U\\""r,   r   funcmodule_overrideparallel_moder=   kwargsc           	         d\  t        j                  |i |D ]W  }t        |t        j                        rJ d       |t        |t
        j                  j                        sMJ d       |Y J d       r>t              dt        dt
        j                  j                  dd ffd d	       t        j                  d
            }t        j                  d
            }i }9|j                         D ]&  \  }	}
|
j                  v sj                  |
   ||	<   ( t        |t              fd}rdnd}rt!               t#        t        |      dt
        j$                  dt
        j$                  ffd}t        j&                  t
        j$                  ||      }t        j&                  t
        j$                  ||      }t)               5  t
        j*                  j-                  d
      5   t/        t1        ||       |t2        d
      |||||      }d d d        d d d        i ||}|j5                  ||||      }t        j6                  ||g      }t9        |      }t;        |      }rD ]  }|j=                  ||      } t?        ||      S # 1 sw Y   xY w# 1 sw Y   xY w)N)NNz%Only support single nn.Module for nowz%Only support single Optimizer for nowz5Couldn't find nn.Module instances from the arguments.
fqn_prefixmoduler&   c                 
   D ]}  }|j                         D ]h  \  }}t        |      dk(  r| dk7  r| dz   |z   n|}|j                  ||      }t        |      t        |      k(  r
 	||       Wj	                  ||       j  y )Nr    .)named_childrenlenr+   rS   swap_submodule)
r   r   overridenamechildr$   	new_childaccessorr   swaps
          r*   r   z_compile.<locals>.swapg  s    +#)#8#8#:KD%4yA~ 5?25E*s*T14C ( 4 4S% @I)}5	1S), //Y? $; ,r,   r   F)remove_duplicatec                    t        j                  i ||      5  	rt        	||      n	t               5  rt	        |      n	t               5   | |i |}d d d        t        j                               t        |j                               fcd d d        cd d d        S # 1 sw Y   NxY w# 1 sw Y   nxY w	 d d d        y # 1 sw Y   y xY wr?   )r    _reparametrize_moduler`   r   r   list
parametersr[   )
r   rW   buffersrV   r=   r   retis_data_parallel_moder   rU   s
          r*   stateless_funcz _compile.<locals>.stateless_func  s    ,,&F&g&
  $v
!m$ /D"6*VD+F+ W S^^-.\5H5H5J0KK$ $
 
 WV	$ $ $
 
 
s:   CB0	B$:B0	C$B-)B00B9	5CCfakesymbolicargc                     j                  |       }dg| j                  z  }|j                  xx   t        j                         z  cc<   |j                  |      S )Nri   )from_tensorndiminput_batch_dimdistget_world_sizerepeat)r   fake_argarg_dimsdata_parallel_mode	fake_modes      r*   _get_full_batch_argz%_compile.<locals>._get_full_batch_arg  sS    
 !,,S1HsSXX~H'778D<O<O<QQ8??8,,r,   )	check_nan)tracing_modedecomposition_table_allow_non_fake_inputs) r   r   
isinstancer7   r8   r6   r   r   r!   r5   dictnamed_parametersnamed_buffersitemsrY   r   r   r   r:   tree_map_onlyrh   autograddetect_anomalyr   r	   SPMD_DECOMP_TABLE	partitiontree_leavesrM   r   r0   r   )r   r   r   r=   r   r   rW   r   rV   r]   pr   r   r   r-   params_and_buffersr.   r   r   r   r   r   r   rU   r   s    `                @@@@@@@r*   _compiler   M  s    HC%%t6v6c299%;G GG;Cc5;;001;G GG;C 7 ?SSS? &s+
	@S 
	@%((// 
	@d 
	@ 	R #&&&>?F3$$e$<=GL
 LLNDAqCII~ #&))A,Q	 # '}lCL" 36
L"$	!,>		-U\\ 		-ell 		- ##LL

 %%LL
 
	ENN99E9J

WND)% 1#(	

 '<v7 K	H
H
H 
	 	 

B  ##%7$FGJ	*2	.B 
B	B 'H##B
3B ( 2sC44o KJ		s$   -!K(J96K9K	>KK_compiled_objgm_transformationc                 (     dt         f fd}|S )a  Compile and optimize a callable, which can be a train step within a training loop.

    This method will extract :class:`nn.Module` and :class:`torch.optim.Optimizer`
    instances from the input arguments and trace operations applied to their
    parameters and states.

    Args:
        module_override (Optional[List[Override]]): a list of Override instances
            that will be applied to the module in order. The :class:`Override`
            objects provide :class:`nn.Module` replacements during tracing and a
            graph transformation function after tracing. (Default: ``None``)
        gm_transformation (Optional[Callable[fx.GraphModule, fx.GraphModule]]):
            a callback that will be called after the original callable is
            compiled and distributed (usually after the first iteration) to
            transform the compiled GraphModule into a new optimized one.
        parallel_mode (Optional[ParallelMode]): a :class:`ParallelMode` object
            that specifies how to parallelize the callable. Each ParallelMode
            would have its own strategy to partition the model and the captured
            graph (Default: ``None``)

    r   c                 :     t                fd       S )Nc                  n   |r|j                  dd      nd}d}j                  j                  t        d       }|0d}t        n}t        	|g| i |}|j                  t        <   |j                  t        j                  | i |z   }t        j                         5  |r
r 
|j                        |_        |s |j                  | d   }n	  |j                  |d|id   }|cd d d        S # t        $ r+}dt        |      vr| |j                  | d   }Y d }~:d }~ww xY w# 1 sw Y   y xY w)Nlast_train_stepFTr   	last_iter)pop__dict__r   COMPILED_OBJECT_KEYrQ   r   r.   r   r   r6   no_gradr-   	TypeErrorr5   )r=   r   r   
first_itercompiled_objmode	flat_inpsoutputer   r   r   r   wrappers            r*   r  z'compile.<locals>.inner.<locals>.wrapper  sW   FLfjj):EBRWOJ #++//0CTJL#!
 ,9+@'m   (otUdUfU8D  !45$//&2H2H33 I  "3 '8&HLO&,\__i8;F
@!0)!W!W" - !" % @&c!f4"#G!0)!<Q!?@# !s6   "1D+C4)D+4	D(=!D#D+#D((D++D4)r
   )r   r  r   r   r   s   `@r*   innerzcompile.<locals>.inner  s!    	t*	 
*	X r,   )r   )r   r   r   r  s   ``` r*   compiler    s    6.H .` Lr,   )ri   )NNN)oabcr   r   
contextlibr   r   r   dataclassesr   	functoolsr	   r
   typingr   r   r   r   r   r   r   r   r   	functorchr   r6   torch.distributeddistributedr   )torch.distributed._functional_collectivestorch.nnr7   torch.utils._pytreeutils_pytreer   r   torch._decomp.decompositionsr   torch._subclasses.fake_tensorr   %torch.distributed._spmd.data_parallelr   %torch.distributed._spmd.parallel_moder   r   r   torch.distributed._tensorr   torch.fx.graphr   r   r   torch.nn.utilsr    %torch.nn.utils._named_member_accessorr!   r#   r<   r9   rM   rQ   r:   rT   r   r   r5   	Parameterr`   opsrl   rh   rt   rx   rz   r}   r   r   _foreach_add_Scalarrm   _foreach_addcdiv__foreach_addcdiv_foreach_addcmul__foreach_addcmul_foreach_div__foreach_div_foreach_mul__foreach_mul_foreach_neg_r   _foreach_neg_foreach_reciprocal__foreach_reciprocal_foreach_sqrt__foreach_sqrt_foreach_sub__foreach_sub_fused_adam_r   c10d_functional
all_reducewait_tensorr   _ops
OpOverloadr   r   r   r   r   r  r(   r,   r*   <module>r3     s   # # 2  ! $ O O O     1  $ $  C 8 C 
 0 ? ? $ E7s 7tL L%((*>*> 588CWCW : () AELL Ad9o A
  			 sCx.  bll"#   2 yy~~ D D 	

	!*\ 	0w$d&7&7&>&>  	!!7%t'<'<'C'C$ 	!!7%t'<'<'C'C$ 	W"D$5$5$:$: 	w$d&7&7&>&>  	w$d&7&7&>&>  	t0088! 	%%wt77??( 	t1199" 	w$d&7&7&>&>  	1##++-GC" L 
II((00	II))11-s5::(() 2>> bnn . # # #_5
_5d8n-_5  _5 	_5
 _5 _5H &  15NR,0Kd8n-K"..)92>>)I JKK L)Kr,   