
    Ph                         d dl mZmZ d dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlmc mZ d dlmZ d dlmZmZ d dlmZmZ d dlmZmZmZmZ d dlm Z   G d	 d
e      Z! G d de!      Z" G d de!      Z#y)    )ABCabstractmethod)AnyCallableDictListOptionalTupleN)FakeTensorMode)DataParallelStylepartition_data_parallel)_convert_to_distributedSchema)
DeviceMesh	Placement	ReplicateShard)GraphModulec                       e Zd ZdZededej                  j                  de	ej                  j                     deeef   deeef   deedf   d	eeef   d
efd       Zeded
efd       Zy)ParallelModez
    Basic Parallel Mode interface. Each parallelism pattern should implement
    this interface to describe how to partition and compile the graph in the
    spmd compiler.
    gmmodel	optimizerparams_and_buffersnamed_statesargs.kwargsreturnc                     t               )z
        Partition a single device graph to a distributed graph.

        TODO(@wanchaol): some of these arguments are not necessary for
        partitioning, remove the unnecessary ones later.
        NotImplementedError)selfr   r   r   r   r   r   r   s           pC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/_spmd/parallel_mode.py	partitionzParallelMode.partition   s    " "##    c                     t               )a  
        Transform and compile a distributed graph with a set of graph
        transformation and optimization passes for each parallel mode.

        The returned result should be a compiled executable graph in
        the distributed environment.
        r    r"   r   s     r#   transform_and_compilez"ParallelMode.transform_and_compile,   s     "##r%   N)__name__
__module____qualname____doc__r   r   torchnnModuler	   optim	Optimizerr   strr   r
   r$   r(    r%   r#   r   r      s     $$ xx$ EKK112	$
 !cN$ 38n$ CHo$ S#X$ 
$ $$ 	$ 	$ 	$ 	$r%   r   c                       e Zd ZdZ	 dddddededeeegef      fdZ	d	ed
e
j                  j                  dee
j                  j                     deeef   deeef   deedf   deeef   defdZd	edefdZy)DataParallelzData Parallelism mode.r   N)input_batch_dimcustom_passesparallel_styler6   r7   c                    |dk(  rt         j                  | _        nD|dk(  rt         j                  | _        n)|dk(  rt         j                  | _        nt        d|       || _        ||| _        yd | _        y)a  
        DataParallel Mode that partition the model and graph to data parallel style
        parallelism (i.e. DDP/FSDP/ZERO-3). It currently supports three different
        parallel styles: "replicate", "fully_shard", and "default". See
        :class:`DataParallelStyle` for more details.

        Args:
            parallel_style (str): parallel style to use. Currently supports
                "replicate", "fully_shard", and "default".

        Keyword args:
            input_batch_dim (int): the batch dimension of the input tensor.
                 default: 0
            custom_passes (Callable[[GraphModule], GraphModule], optional):
                A custom callable that overrides the default graph transformation
                and optimization passes.
        	replicatefully_sharddefaultzUnknown parallel style: Nc                     | S Nr3   r   s    r#   <lambda>z'DataParallel.__init__.<locals>.<lambda>e       r%   )r   	REPLICATEr8   FULLY_SHARDDEFAULTRuntimeErrorr6   
_gm_passes)r"   r8   r6   r7   s       r#   __init__zDataParallel.__init__<   sz    0 [("3"="=D},"3"?"?Dy("3";";D!9.9IJKK  /$DQDO ,DOr%   r   r   r   r   r   r   .r   r   c                     t        dt        j                  t        j                                     }t        ||||||||| j                  | j                  
      }|S )Ncuda)r   r-   arangedistget_world_sizer   r8   r6   )	r"   r   r   r   r   r   r   r   meshs	            r#   r$   zDataParallel.partitiong   s]     &%,,t/B/B/D"EF$  
 	r%   c                 $    | j                  |      S )z>optimize a distributed graph with a set of optimization passesrF   r'   s     r#   r(   z"DataParallel.transform_and_compile   s     r""r%   )r:   )r)   r*   r+   r,   r2   intr	   r   r   rG   r-   r.   r/   r0   r1   r   r   r
   r$   r(   r3   r%   r#   r5   r5   9   s      *),  !HL),), 	),
  +)C DE),V xx EKK112	
 !cN 38n CHo S#X 
6# # #r%   r5   c                       e Zd ZdZ	 ddeeegef      fdZdedej                  j                  deej                  j                     deeef   d	eeef   d
eedf   deeef   defdZdedefdZy)DTensorExpandModez
    The DTensor Expand mode. It's replicating the parameters and
    shard the inputs to represent DDP like behavior, it's currently
    a transitent mode before we move to the new data parallel expansion.
    Nr7   c                 6    i | _         ||| _        y d | _        y )Nc                     | S r>   r3   r?   s    r#   r@   z,DTensorExpandMode.__init__.<locals>.<lambda>   rA   r%   )_placements_overriderF   )r"   r7   s     r#   rG   zDTensorExpandMode.__init__   s$     AC!$DQDO ,DOr%   r   r   r   r   r   r   .r   r   c           
      2   t        j                  |i |}t        dt        j                  t        j                               j                               }	t        |	t        d      g      }
t        |	t               g      }g g }}t        j                  |      D ]Q  }t        |t        j                        sJ dt        |              |j                  |       |j                  |       S t        j                  |      D ]t  }t        |t        j                        r#|j                  |       |j                  |       @|j                  t        j                   d             |j                  |       v |D ]  }t        |t        j                        rm|j                  |       t#        |      | j$                  v r3|j                  t        |	| j$                  t#        |                      x|j                  |
       |j                  t        j                   d             |j                  |
        t'        d      5  |D cg c]  }t        j(                  |       }}d d d        t+        |||	d      d   S c c}w # 1 sw Y   !xY w)	NrI   r   )rM   
placementszexpecting Tensor but got T)allow_non_fake_inputsF)default_mesh_allow_partial)pytreearg_tree_leavesr   r-   rJ   rK   rL   rI   r   r   r   tree_leaves
isinstanceTensortypeappendemptyidrU   r   
empty_liker   )r"   r   r   r   r   r   r   r   	flat_argsrM   shard_schemareplicate_schemainpsschemaspoainp	fake_inpss                      r#   r$   zDTensorExpandMode.partition   s
    **D;F;	&%,,t/B/B/D"E"J"J"LM%4U1XJG#)t#NBg##$67Aa.U2KDQRG90UU.KKNNN+, 8
 ##L1A!U\\*A/0EKKN+/0 2 A!U\\*Aa5D555NNDT5N5NrRSu5UV NN<0 EKKN+|,! $ $7:>?$3))#.$I? 8 '	7e

 	 @ 87s   
JJ+JJJc                 $    | j                  |      S )z
        Transform and compile a distributed graph with a set of graph transformation
        and optimization passes for the dtensor fallback parallel mode.
        rO   r'   s     r#   r(   z'DTensorExpandMode.transform_and_compile   s     r""r%   r>   )r)   r*   r+   r,   r	   r   r   rG   r-   r.   r/   r0   r1   r   r2   r   r
   r$   r(   r3   r%   r#   rR   rR      s     OS,%h}k/I&JK,77 xx7 EKK112	7
 !cN7 38n7 CHo7 S#X7 
7r# # #r%   rR   )$abcr   r   typingr   r   r   r   r	   r
   r-   torch.distributeddistributedrK   torch.utils._pytreeutils_pytreer[   torch._subclassesr   %torch.distributed._spmd.data_parallelr   r   "torch.distributed._spmd.distributer   r   torch.distributed._tensorr   r   r   r   torch.fxr   r   r5   rR   r3   r%   r#   <module>r|      s[    # = =    $ $ , O M M  $$3 $$NL#< L#^P# P#r%   