
    Phi                     F   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZ d dlZd dlmZ d dlmc mc mZ d dlmc mc mZ d dlmc mc mZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5 d dl6m7Z7 d d	l8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@ d d
lAmBZB d dlCmDZD d dlEmFZF d dlGmHZH dZI	 d dlJmKZKmLZL  eNd      ZOdZPeej                  ej                  f   ZReeej                  eRf      ZSe>j                  e5j                  e>j                  e5j                  e>j                  e5j                  e>j                  e5j                  e>j                  e5j                  iZYe>j                  e>j                  gZZe>j                  e>j                  fZ[e	 d\de+deSde>deeB   dee&   de+fd       Z\ede+deSde&de+fd       Z]edede^fd       Z_ede&de^fd       Z`edeNdej                  fd       Zaedej                  deNdej                  fd        Zbdej                  deNdeej                  ej                  f   fd!Zce	 d\de+d"e!j                  d#ee
ejB                  j                        d$eee
ejB                  j                        ee
ejB                  j                        f   de+f
d%       Zfd$ee   d&e^ddfd'Zgede+d"e!j                  d(ee!j                     d)eeeNej                  f      de+f
d*       Ziede+d"e!j                  de+fd+       Zjede+dee>   d,ee=   d-ee:   d.e^d/e^d0eNd1eNde+fd2       Zkede+de+fd3       Zlede+d4e9d5e^de+fd6       Zmed\de+de&de+fd7       Znede+de+fd8       Zoede+d9e!j                  d)eeeNej                  f      d:eee!j                  gdf      d;e^de+fd<       Zpede+d=ee!j                     d9e!j                  fd>       Zqd?e!j                  d@ee
ejB                  j                        dee!j                     fdAZr	 d\d?ejB                  j                  d#eejB                  j                     dBee
ejB                  j                        deejB                  j                     fdCZsd?ejB                  j                  d#eejB                  j                     deet   fdDZud?e!j                  deet   fdEZvd"e!j                  d(ee!j                     d)eeeNej                  f      ddfdFZwd)eeeNej                  f      dGeNdeej                     fdHZxd"e!j                  d(ee!j                     d#ee!j                     dee^e^f   fdIZyd?e!j                  d:ee!j                  gdf   d#ee!j                     ddfdJZzd?e!j                  dKeej                     d#ee!j                     fdLZ{d?e!j                  d#ee!j                     dee!j                     fdMZ|d"e!j                  d(ee!j                     dNeej                     dKeej                     ddf
dOZ~d=ee!j                     dPeej                     dKeej                     ddfdQZdR Zd"e!j                  d(ee!j                     dKeej                     dGeNdej                  f
dSZd"e!j                  d=ee!j                     dej                  ddfdTZd=ee!j                     dPeej                     dej                  ddfdUZdVeej                     ddfdWZd"e!j                  d(ee!j                     dee!j                     fdXZd(ee!j                     ddfdYZde>fdZZdej                  de#j                  fd[Zy# eM$ r dZIY w xY w)]    N)AnyCallableDequeDict	GeneratorIterableIteratorListno_type_checkOptionalSetTupleUnion)default_hooks)_mesh_resources
DeviceMesh)_get_default_group)_FSDPDeviceHandle
_FSDPState_get_module_fsdp_state_is_fsdp_flattened!_named_parameters_with_duplicatesclean_tensor_nameTrainingState)_FSDP_USE_FULL_PREC_IN_EVALFlatParameterFlatParamHandleHandleShardingStrategy)_FreeEventQueue)BackwardPrefetch
CPUOffloadFullOptimStateDictConfigFullStateDictConfigMixedPrecisionShardingStrategyStateDictConfigStateDictType)_Policy)DTensorExtensions)_sync_params_and_buffers)RemovableHandleT)deferred_initfakeFi  _fsdp_syncedstateprocess_groupsharding_strategypolicydevice_meshreturnc                 (   ||t        d      |t        v }|r#|||t        d| d      t        | ||      } n4|r|| _        |j	                  d      | _        n||n	t               | _        | j
                  j                         | _        | j
                  j                         | _	        | j                  }|r|| j                  j                         z  }t        j                  j                  |      | _        || j                  z  | _        | S )NzcCannot pass both process_group and device_mesh at the same time. Please just pass only one of them.zManual wrapping with z@requires explicit specification of process group or device_mesh.r   mesh_dim)
ValueErrorHYBRID_SHARDING_STRATEGIES*_init_process_group_state_for_hybrid_shard_device_mesh	get_groupr0   r   ranksize
world_size_inter_node_pgr   DefaultState_get_gradient_predivide_factor_gradient_predivide_factor_gradient_postdivide_factor)r/   r0   r1   r2   r3   is_hybrid_strategydata_parallel_world_sizes          mC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/fsdp/_init_utils.py_init_process_group_staterH   a   sC     [%<<
 	
 +.HH V^8K '(9':;R 
 ?}kE !,E"-"7"7"7"CE "/!:@R@T  $$))+EJ**//1E$// E$8$8$=$=$?? ""AA$	
 
$ 	!5#C#CC 
% L    c                    |rft        |      r6|| _        |j                  d      | _        |j                  d      | _        nt        dt        |j                                      |@t               }t        || j                  j                               \  }}|| _        || _        n2t        |      r|\  | _        | _        nt        dt        |             t        | j                        | _        | S )Nr   r6      z,Expected device_mesh to have ndim=2 but got zmExpected process_group to be passed in as either None or Tuple[dist.ProcessGroup, dist.ProcessGroup] but got r0   )"_is_valid_hybrid_shard_device_meshr;   r<   r@   r0   r8   lenr   !_init_intra_and_inter_node_groups_device_handledevice_count_is_valid_hybrid_shard_pg_typetype_get_default_comm_hook_state_inter_node_state)r/   r0   r3   default_groupintra_node_groupinter_node_groups         rG   r:   r:      s    -k:!,E $/#8#8!#8#DE "-"7"7"7"CE{44678:  
	*,-N5//<<>.
** // *-8 9F5E!5GGKMGZF[] 
 ;**E LrI   c                 j    t        | t              xr" t        |       dk(  xr t        d | D              S )N   c              3   P   K   | ]  }t        |t        j                           y wN)
isinstancedistProcessGroup).0pgs     rG   	<genexpr>z1_is_valid_hybrid_shard_pg_type.<locals>.<genexpr>   s     JMb
2t001M   $&)r]   tuplerN   allrL   s    rG   rR   rR      s:     	=%( 	K!#	KJMJJrI   c                     t        j                  |       }|t        d|  d| dd      t        | t              xr | j
                  dk(  S )NzFound device_mesh z$ passed in has a parent device_mesh .z*Hybrid sharding + TP is not supported yet.rZ   )r   get_parent_meshRuntimeErrorr]   r   ndim)r3   parent_meshs     rG   rM   rM      s]    !11+>K -QR]Q^^_`8
 	
 k:.H;3C3Cq3HHrI   num_devices_per_nodec                 6    t        j                  |       \  }}|S )a5  
    Return a process group across the current node.

    For example, given each row is a distinct node:
    0 1 2 3 4 5 6 7 8
    9 10 11 12 13 14 15
    This API would return an intra-node subgroup across
    [0, 7] or [8, 15] depending on the process's rank.
    For example, rank 3 would get [0, 7].
    )r^   new_subgroups)rl   intra_node_subgroup_s      rG   _init_intra_node_process_grouprq      s!     "//0DErI   global_process_groupc                 T   d}t        j                  |       }t        j                  |       }||z  }t        j                  |       |z  }t	        |      D ]?  }t	        |      D cg c]
  }|||z  z    }	}t        j
                  |	|      }
||k(  s>|
}A |
J | d       |S c c}w )a  
    Return an inter-node process group where each contained rank has the same local rank.

    For example, given each row is a distinct node:
    0 1 2 3 4 5 6 7 8
    9 10 11 12 13 14 15
    This API would return inter-node process group {0, 8}, {1, 9}, {2, 10}, and so forth
    depending on the process's rank. For example, rank 1 would get {1, 9}, rank 5
    would get {5, 13}.
    N)ranksbackendz. expected to assign inter-node pg, but did not)r^   get_backendget_world_sizeget_rankrange	new_group)rr   rl   inter_node_pgsharding_backendr?   	num_nodesmy_local_rank
local_rankiranks_for_inter_groupgrps              rG   _init_inter_node_process_groupr      s      M''(<=$$%9:J22IMM"67:NNM01
=B9=M!
=MJ!223=M 	 !
 nn#8BRS&M 2 	!H
FGH!!
s   %B%c                 0    t        |      t        | |      fS )a  
    Initialize intra and inter-node process groups and return the ones corresponding to this process's rank.

    This function can be used to initialize process groups for ``HYBRID_SHARD`` or
    ``_HYBRID_SHARD_ZERO2`` in FSDP.
    This function assumes each node has an equal number of CUDA-enabled devices.
    Returns:
        Tuple[dist.ProcessGroup, dist.ProcessGroup]: Intra and inter-node process group.
    )rq   r   )rr   rl   s     rG   rO   rO     s#     	'';<&';=QR rI   moduleignored_modulesignored_statesc                    ||t        d      d }|d u}|rt        |      }t        |d       ng }t        |t        |      ng d       t        |      dkD  r"t	        |d   t
        j                        r|}n|}t        ||      | _        t        || j                  |      | _
        t        || j                        | _        | S )NzfCannot pass both ignored_modules and ignored_states at the same time. Please just pass ignored_states.TFr   )r8   list_check_ignored_statesrN   r]   nn	Parameter_get_ignored_modules_ignored_modules_get_ignored_params_ignored_params_get_ignored_buffer_names_ignored_buffer_names)r/   r   r   r   ignored_parameterspassed_as_ignored_statesignored_states_lists          rG   _init_ignored_module_statesr     s     "~'A:
 	
 -T9">2148 %4%@D!b%	
 !#)!,bll;!41O1&/JE/E
 #<#E LrI   r   c                    t        |       dk(  ry|r`t        d | D              }t        d | D              }|s9|s6t        | D ch c]  }t        |       c}t              }t        d|       yyt        d | D              s6t        | D ch c]  }t        |       c}t              }t        d|       yc c}w c c}w )	z
    Check that the ignored states are uniformly parameters or uniformly modules.

    We may remove this check in the future if we permit mixing.
    r   Nc              3   P   K   | ]  }t        |t        j                           y wr\   )r]   r   r   r`   r/   s     rG   rb   z(_check_ignored_states.<locals>.<genexpr>X  s     UnUE2<<8nrc   c              3   P   K   | ]  }t        |t        j                           y wr\   r]   r   Moduler   s     rG   rb   z(_check_ignored_states.<locals>.<genexpr>Y  s     SN5*UBII6Nrc   )keyzUignored_states expects all nn.Parameter or all nn.Module list elements but got types c              3   P   K   | ]  }t        |t        j                           y wr\   r   r   s     rG   rb   z(_check_ignored_states.<locals>.<genexpr>b  s     L^E:eRYY/^rc   z>ignored_modules expects nn.Module list elements but got types )rN   re   sortedrS   reprr8   )r   r   
all_paramsall_modulesr/   sorted_typess         rG   r   r   M  s     >aUnUU
SNSS+!N"KN54;N"KQUVL**69  #.z L^LL!N"KN54;N"KQUVL%(  M #L #Ls   B;C ignored_params	device_idc                 *   d}|1t        |t        j                        r|nt        j                  |      }|t        ||      D ]|  }|j                  j                  dv r||j                  }+|j                  j                  |j                  k7  sOt        d|j                   d|j                  j                          |xs2 t        j                  dt        j                  j                               }t        j                  |      | _
        | S )a  
    Determine device handle used for initializing FSDP.

    If a device is specified by ``device_id``,
    then returns device handle corresponds to that device type. Otherwise, If the
    module is already on a non-CPU device, then the device type is that non-CPU device type.
    If the module is on CPU or meta, then the device type is the current cuda device.

    This method will be called once ignored paramters was determined, as the device handle maybe needed
    for other initialization.
    N>   cpumetazLFSDP does not support modules with different device types but got params on z and cuda)r]   torchdevice_get_orig_paramsrS   ri   r   current_devicer   from_devicerP   )r/   r   r   r   determined_deviceparams         rG   _init_device_handler   j  s   $  )U\\2 i( 	
  %fn=E||  O3 ($)LL!<<$$(9(>(>>&-->-C-C,DE%,,J[J[I\^  > . 
EJJ--/2
 -889JKELrI   c                     t        |      | _        i }|j                         D ]  \  }}t        |      }|j                  ||<   ! || _        | S r\   )_get_buffer_names_buffer_namesnamed_buffersr   dtype_buffer_name_to_orig_dtype)r/   r   r   buffer_namebuffers        rG   _init_buffer_stater     sZ    
 ,F3E
 :<%335V'428,,";/  6 (BE$LrI   mixed_precisioncpu_offloadlimit_all_gathersuse_orig_paramsbackward_prefetch_limitforward_prefetch_limitc                 :   | j                   dk(  rN|t        j                  k7  r+t        j                  d|xs t        j
                   d       t        j                  }|xs t        j
                  | _        |xs
 t               | _        |5t        j                  j                  dt        | j                                t        j                  j                  t         d      dk(  | _        |xs
 t%               | _        || _        || _        t,        j.                  | _        d | _        t5               | _        t9        j:                         | _        t?        j@                  | j<                  ||      | _!        tE               }|| _#        d }	|	| _$        g }
|
| _%        | S )NrK   z/FSDP is switching to use `NO_SHARD` instead of z since the world size is 1.z'torch.distributed.fsdp.mixed_precision. 1)&r?   r%   NO_SHARDwarningswarn
FULL_SHARDr1   r$   r   r   _C_log_api_usage_oncestrosenvirongetr   _use_full_prec_in_evalr!   r   r   _use_orig_paramsr   IDLEtraining_state_is_rootr   _free_event_queuer^   get_debug_level_debug_levelexec_order_utils_ExecOrderData_exec_order_datadict_fully_sharded_module_to_handle_handleparams)r/   r1   r   r   r   r   r   r   r   r   r   s              rG   _init_core_stater     sv    1 0 9 99MMA$C(8(C(CD E''
 -55/N3C3N3NE+?~/?E"$$5c%:O:O6P5QR	
 	

2B73> 
  $3z|E/E,E(--EEN-/E--/E-<<E IM#,KE)  $GEM"$FELLrI   c                 f    g }|| _         g }|| _        g }|| _        d| _        d | _        d | _        | S )NT)_root_pre_forward_handles_pre_forward_handles_post_forward_handles_sync_gradients
_comm_hook_comm_hook_state)r/   r   r   r   s       rG   _init_runtime_stater     sK     8:&?E#24!5E35"7E EE!ELrI   backward_prefetchforward_prefetchc                 "    || _         || _        | S r\   )r   r   )r/   r   r   s      rG   _init_prefetching_stater     s     0E-E LrI   c                 x    |r0t        j                  | j                        t               | _        | S d | _        | S r\   )r   rh   r;   r)   _fsdp_extension)r/   r3   s     rG   _init_extensionr     s>     66u7I7IJV 1 3
 L !%LrI   c                     t         j                  | _        t               }t	               | _        || _        i }|| _        | S r\   )r'   FULL_STATE_DICT_state_dict_typer#   r"   _optim_state_dict_config_state_dict_config_unshard_params_ctx)r/   state_dict_configunshard_params_ctxs      rG   _init_state_dict_stater     s?    *::E)<)>%=%?E"0E57 2ELrI   fully_sharded_moduleparam_init_fnsync_module_statesc                     t        | j                  |       t        | j                        }t	        | j                   j
                        \  }}|s|r|t        || j
                         n6|rt        || j
                         n|rt        j                  | fd        j
                  D 	ch c]  }|j                         D ]  }	|	  }
}}	t        | j                  |
|       t        | j                  | j                         _        t        t        | j                              }|r@t!        || j"                          j$                  t&        v rt!        || j(                         t+         ||        S c c}	}w )zHInitialize a ``FlatParamHandle`` from a module ``fully_sharded_module``.c                 >    t        |       d u xr | j                  vS r\   )r   r   )	submoduler/   s    rG   <lambda>z0_init_param_handle_from_module.<locals>.<lambda>2  s(    '=i'HD'P (8!7!77(8rI   )check_fn)_check_single_device_moduler   _get_device_from_device_idr=   _need_to_materialize_moduler   _materialize_with_param_init_fn_materialize_meta_moduler,   materialize_modulebuffers_move_module_to_device_get_compute_devicecompute_devicer   r   _sync_module_params_and_buffersr0   r1   r9   r@   _init_param_handle_from_params)r/   r   r   r   r   device_from_device_idis_meta_moduleis_torchdistX_deferred_initignored_moduler   ignored_buffersmanaged_paramss   `           rG   _init_param_handle_from_moduler    s      4e6K6KYW6y%**M2Me33U5K5K3/N/ 	5=;T' -1G1G	
 
  )U-C-C	
 
%(( 8	
 $444N$,,.F 	. 	4   	 /

	E *+?AVAVWXN' .%2E2E	
 ""&@@+$ne6J6J #5.:NOL;s   6Fr   c                    t        |      dk(  ry t        ||| j                  t        | j                     | j
                  j                  | j                  j                  | j                  j                  | j                  j                  | j                  | j                  | j                        }|j                          | j                  rJ | j                   j#                  |j$                         || _        || j&                  |j(                  <   t+        j,                  d      }| j
                  j                  r,|j$                  j,                  |k7  r|j/                  |       y y y )Nr   )fsdp_extensionr   )rN   r   r  SHARDING_STRATEGY_MAPr1   r   offload_paramsr   param_dtypereduce_dtypekeep_low_precision_gradsr0   r   r   shardr   r   append
flat_paramr   _fully_sharded_moduler   r   flat_param_to)r/   r   r   handle
cpu_devices        rG   r	  r	  V  s%    6{ae556(())**66,,F LLN}}	LL))*EMJPE))&*F*FGe$J''F,=,=,D,D
,RZ( -S'rI   root_moduler   c           	         d}	 |t        |      n	t               }|D ]V  }t        |t        j
                  j                        st        |dt        |       z         t        |      sMt        d       | j                         D ])  }t        j                  |      r|j                  |       + |D ch c]3  }|j                         D ]  }t        |t        j                        s|  5 }}}| |v rt        j                   d        | j                         D ]9  }t        |      }	|	t#        |	d      sJ |j%                  |	j&                         ; |S # t        $ r }t        |dt        |       z         |d}~ww xY wc c}}w )ah  
    Check that ``_ignored_modules`` is an iterable of ``nn.Module`` s without any FSDP instances.

    Return the modules contained in their module
    subtrees as a :class:`set`. Nested FSDP instances are excluded, but their
    already-computed ignored modules are included.

    ``_ignored_modules`` represents the argument passed by the user to FSDP.
    z>`ignored_modules` should be an iterable of `torch.nn.Module`s Nzbut got zbut got an iterable with z1`ignored_modules` should not include FSDP moduleszTrying to ignore the top-level module passed into the FSDP constructor itself will result in all parameters being ignored and is not well-supported: r   )set	TypeErrorrS   r]   r   r   r   r   r8   modulestraversal_utils_composableadd	fsdp_fileFullyShardedDataParallelr   r   hasattrupdater   )
r  r   
msg_prefixignored_root_moduleser   childr   r   optional_fsdp_states
             rG   r   r   u  s    RJQ%5%AC !su 	
 '&%((//2J+DT&\N)SSTT!&) PQQ ' %%'**62 $$V, ( +*F^^%E%!C!CD 	% 	*   o%228;	
 !((*	4Y?*.0BCCC""#6#G#GH	 +
 I  Q
x5E0F/G%HHIqPQ$s   E 78F	F%F  Fr   c                    t               }|D ch c]%  }|j                         D ]  }t        |      r| ' }}}|j                  |       |,|D ch c]  }t        |      r| }}|j                  |       | j	                         D ]9  }t        |      }	|	t        |	d      sJ |j                  |	j                         ; |S c c}}w c c}w )z
    Return the parameters of the modules in ``ignored_modules`` and the parameters in ``ignored_parameters``.

    :class:`FlatParameter` s are excluded from the result.
    r   )r!  
parametersr   r*  r#  r   r)  r   )
r  r   r   all_ignored_paramsmpparams_in_ignored_modulesparams_in_ignored_parametersr   r/  s
             rG   r   r     s     36% #!"aALLNqBTUVBWN?  ! 78%)(
)!1CA1FA) 	% (
 	!!">? !((*	4Y?*.0ABBB%%&9&I&IJ	 + '!(
s   #C
C
C%Cc           	         t               }|D ch c]  }|j                         D ]  }|  }}}|j                  | j                         D ch c]  \  }}||v rt	        |       c}}       | j                         D ]9  }t        |      }|t        |d      sJ |j                  |j                         ; |S c c}}w c c}}w )z6Return the cleaned buffer FQNs in ``ignored_modules``.r   )	r!  r  r*  r   r   r#  r   r)  r   )	r  r   all_ignored_buffer_namesr3  r   buffers_in_ignored_modulesr   r   r/  s	            rG   r   r     s    
 *- ("'1aiikFk  " ## (3'@'@'B	
'B#V33 k*'B	
 !((*	4Y?*.0GHHH$++,?,U,UV	 + $#'"
	
s   B>C
c                 f    | j                         D ch c]  \  }}t        |       c}}S c c}}w )zrReturn the fully prefixed names of all buffers in the module hierarchy rooted at ``root_module`` as a class:`set`.)r   r   )r  r   rp   s      rG   r   r     s:     >I=V=V=X=X>;+&=X  s   -c                     t        | |      D ch c]  }|j                   }}t        |      dk(  r%t        j                  d      |v r|t	        d      yt        |      dkD  rt	        d|       yc c}w )z
    Raise an error if ``module`` has original parameters on multiple devices, ignoring the parameters in ``ignored_params``.

    Thus, after this method, the
    module must be either fully on the CPU or fully on a non-CPU device.
    rZ   r   NzTTo support a module with both CPU and GPU params, please pass in device_id argument.rK   z;FSDP only supports single device modules but got params on )r   r   rN   r   ri   )r   r   r   r   devicess        rG   r   r     s     *:&.)QR)Qu||)QGR 7|qU\\%0G;5  
 
W	I'S
 	
 
 Ss   A4r=   c           	      x   | yt        | t        j                        r| nt        j                  |       }|t        j                  d      k(  rmt        j                  d|  d| dt        j
                  j                          d       t        j                  dt        j
                  j                               }|S )z
    Return a ``torch.device`` for the specified ``device_id``.

    Processes ``device_id`` and returns either the corresponding device or
    ``None`` if ``device_id`` is ``None``.
    Nr   z"FSDP got the argument `device_id` z	 on rank zJ, which does not have an explicit index. FSDP will use the current device z. If this is incorrect, please explicitly call `torch.cuda.set_device()` before FSDP initialization or pass in the explicit device index as the `device_id` argument.)r]   r   r   r   r   r   r   )r   r=   r   s      rG   r   r     s     	5<<8	ell9>U  f%%09f 005

0I0I0K/L M11	
 fejj&?&?&ABMrI   c                    t        t        | |            }t        d |D              }| j                         D ]-  }||v r|j	                  d      D ]  }||j
                  z  } / | xr t        xr t        d |D              }||fS )z
    Return if ``module`` has parameters on meta device and if ``module`` is using torchdistX deferred initialization.

    At most of the returned bools can
    be ``True``. If either is ``True``, then ``module`` needs to be
    materialized.
    c              3   4   K   | ]  }|j                     y wr\   )is_metar`   r   s     rG   rb   z._need_to_materialize_module.<locals>.<genexpr>;  s     CN5Ns   Frecursec              3   F   K   | ]  }t        j                  |        y wr\   )r-   is_fakerA  s     rG   rb   z._need_to_materialize_module.<locals>.<genexpr>G  s     @U#s   !)r   r   anyr#  r  r@  _TORCHDISTX_AVAIL)r   r   r   r  r  r   bufr  s           rG   r   r   .  s     *6>BCNCNCCN ^^%	'$$U$3Cckk)N 4 &  	A	A@@@  
 666rI   c                     t        |      st        d| dt        |             t        | |      }|D ]
  } ||        y )Nz	Expected z to be callable but got )callabler8   rS   _get_modules_to_materialize)r  r   r   modules_to_materializer   s        rG   r  r  L  sQ    
 M"&>tM?R>ST
 	
 9oV(f )rI   r
  c           	      R   |xs1 t        j                  t         j                  j                               }t	        | |      }	 t        j
                         5  |D ]u  }t        j                  |j                  d      |j                  d            }t        t        |            dkD  }|sS|j                  |d       |j                          w 	 d d d        y # 1 sw Y   y xY w# t        $ r5}t        j                   dt#        |       dt%               d       |d }~ww xY w)NFrB  r   )r   rC  zIUnable to call `reset_parameters()` for module on meta device with error z(. Please ensure that your module oftype z* implements a `reset_parameters()` method.)r   r   r   r   rK  no_grad	itertoolschainr1  r  rN   r   to_emptyreset_parametersBaseExceptionr   r   r   rS   )	r  r
  r   materialization_devicerL  r   module_state_iterhas_module_statesr-  s	            rG   r  r  Z  s    3 ell

!!#7 9oV ]]_0 %.OO%%e%4fnnUn6S%! %(->(?$@1$D!$OO+A5OQ++- 1 __  !!$Q )L>!KM	

 s=   C( AC-%CC( C%!C( %C( (	D&10D!!D&c                 "   g }t        j                  | g      }| h}|rq|j                         }|j                  |       |j	                         D ]:  }||vst        |      ||vs|j                  |       |j                  |       < |rq|S r\   )collectionsdequepopleftr  childrenr   r&  )r  r   rL  queuevisited_modulesr   child_modules          rG   rK  rK  {  s    
 /1{m,E'2mO
%%f-"OO-LO3*<8@ 7##L1\* .  "!rI   r  c                    t        j                  d      |	t        j                         }|j	                  |        g }g }|r|j                         }|j                  fd|j                  d      D               |j                  fd|j                  d      D               |j                         D ].  }t        |t        j                        r|j	                  |       0 |r|D 	cg c]	  }	|	|vs|	 }
}	|D 	cg c]	  }	|	|vs|	 }}	t        |
||       yt        t        | |      d      }||j                  k(  rt!                yyyc c}	w c c}	w )a  
    Move ``module`` depending on ``device_from_device_id`` and its current device.

    This includes moving ignored modules' parameters.

    - If ``device_from_device_id`` is not ``None``, then this moves
    ``module`` to the device.
    - If ``device_from_device_id`` is ``None``, then this does not move
    ``module`` but warns the user if it is on CPU.

    Precondition: ``_check_single_device_module()``.
    r   Nc              3   @   K   | ]  }|j                   k(  r|  y wr\   r   )r`   r   r  s     rG   rb   z)_move_module_to_device.<locals>.<genexpr>  s%      BE<<:- B   FrB  c              3   @   K   | ]  }|j                   k(  r|  y wr\   ra  )r`   r   r  s     rG   rb   z)_move_module_to_device.<locals>.<genexpr>  s%      @F==J. @rb  )r   r   rX  rY  r  rZ  extendr1  r  r[  r]   r'  r(  _move_states_to_devicenextr   _warn_cpu_init)r   r   r  r
  r\  r   r  curr_moduler   r4  params_to_movebufs_to_mover   r  s                @rG   r  r    sY   $ e$J( #."3"3"5V%'&(--/K
 MM (33E3B 
 NN )11%1@ 
 )113	!)Y-O-OPLL+ 4! & &,GVq/F!VG#*G7aa.F7G~|=RS!&.94@EU\\Z7 8 HGs   1	E;E	EEr  c                 6   t        |       dk(  rt        |      dk(  ryt        |       dkD  r| d   j                  }nt        |      dkD  r|d   j                  }t        j                  d      }|| D ]k  }t        j                         5  |j	                  |      |_        |j                  *|j                  j	                  |      |j                  _        ddd       m |D ]  }|j	                  |      |_         y|k(  rt                yy# 1 sw Y   xY w)z
    Move states to the specified device.

    Precondition: ``_check_single_device_module()`` and module's parameters and
    buffers have been materialized if needed.
    r   Nr   )rN   r   r   rN  todatagradrg  )r   r  r
  r   r  r   r   s          rG   re  re    s     6{aCLA-
6{Q))	W	 **e$J( E"XX&;<
::)&+jjmm4I&JEJJO ! 
 F ))$9:FK 	:	% 
& !s   	ADD	c                  .    t        j                  d       y )Nam  The passed-in `module` is on CPU and will thus have FSDP's sharding initialization run on CPU, which may be slower than on GPU. We recommend passing in the `device_id` argument for FSDP to move `module` to GPU for the sharding initialization. `module` must also be on GPU device to work with the `sync_module_states=True` flag since that requires GPU communication.)r   r    rI   rG   rg  rg    s    MM	1rI   c                 D   t        t        | |      d      }|&|j                  j                  dk7  r|j                  }nF||j                  dk7  r|}n2t	        j                  dt        j
                  j                               }|||k7  rt        d| d| d|       |S )ax  
    Determine and return this FSDP instance's compute device.

    If a device is
    specified by ``device_id``, then returns that device. Otherwise, If the
    module is already on a non-CPU device, then the compute device is that non-CPU
    device. If the module is on CPU, then the compute device is the current
    device.

    Since this method should be called after materializing the module, any
    non-CPU device should not be meta device. For now, the compute device is
    always a CUDA GPU device with its explicit index.

    Precondition: ``_check_single_device_module()`` and
    ``_move_module_to_device()``.
    Nr   r   z4Inconsistent compute device and `device_id` on rank z: z vs )rf  r   r   rS   r   r   r   r8   )r   r   r
  r=   r   r  s         rG   r  r    s    , !&.94@EU\\..%7 ,1F1K1Kv1U2N"\\&%**2K2K2MNN(^?T-TB4&d#8"9;
 	
 rI   c                 "   g }| j                         D ]D  }t        |t        d      rt        |t        d       |j	                  |j                                F |j                  d |D               t        |       t        ||t        d       y)z
    Synchronize module states (i.e. parameters ``params`` and all not-yet-synced buffers) by broadcasting from rank 0 to all ranks.

    Precondition: ``sync_module_states == True`` and ``self.process_group`` has
    been set.
    FTc              3   <   K   | ]  }|j                           y wr\   )detachrA  s     rG   rb   z2_sync_module_params_and_buffers.<locals>.<genexpr>*  s     <VEVs   r   srcN)
r  getattrFSDP_SYNCEDsetattrr  rt  rd  +_check_module_states_for_sync_module_statesr*   PARAM_BROADCAST_BUCKET_SIZE)r   r   r0   module_statesr   s        rG   r  r    sx     )+M.."v{E2FK.  1	 #
 <V<</>#	rI   c                     | D cg c]  }|j                          c}|D cg c]  }|j                          c}z   }t        |       t        ||t        d       y c c}w c c}w )Nr   ru  )rt  rz  r*   r{  )r   r  r0   r   r   params_and_bufferss         rG   _sync_module_statesr  4  sm     7==fU%,,.f=&-A&-FgA  00BC#		 > As
   AA"r|  c                 D    | rt        d | D              rt        d      y y )Nc              3   `   K   | ]&  }|j                   t        j                   d       k(   ( yw)r   N)r   r   )r`   tensors     rG   rb   z>_check_module_states_for_sync_module_states.<locals>.<genexpr>J  s&      ;He,,=s   ,.zThe module has CPU parameters or buffers when `sync_module_states=True`, which requires them to be on GPU. Please specify the `device_id` argument or move the module to GPU before passing it to FSDP.)rF  r8   )r|  s    rG   rz  rz  G  s7      ;H  C
 	
}rI   c              #      K   | j                         }	 	 t        |      }||vrt        |      s| # t        $ r Y yw xY ww)aD  
    Return an iterator over the original parameters in ``module``.

    The iterator does not return
    the parameters in ``ignored_params``, any ``FlatParameter`` s (which may be
    present due to nested FSDP wrapping), or any original parameters already
    flattened (only relevant when ``use_orig_params=True``).
    N)r1  rf  r   StopIteration)r   r   	param_genr   s       rG   r   r   T  sT      !!#IOEN*3Ee3L   s   A 4 	A AA  Ac           	          t        |       D ]A  \  }}||vst        |      rt        d| d|j                          d|j                          y)a5  
    Check that original parameters in ``fsdp_module`` have been flattened.

    The flattened parameters are made
    invisible to ``named_parameters()`` for the module hierarchy rooted at
    ``fsdp_module``. This should be called as a sanity check after flattening
    the wrapped module's parameters.
    z Found an unflattened parameter: z;  N)r   r   ri   r>   	__class__)fsdp_moduler   
param_namer   s       rG   _check_orig_params_flattenedr  j  sZ     ?{K
E&/A%/H2:,b::<.%//!24  LrI   c                 h    | t         j                  k(  rt        j                  S t        j                  S r\   )r%   r   r   allreduce_hookreduce_scatter_hook)r1   s    rG   _get_default_comm_hookr  ~  s3      0 9 99 	$$ ..rI   c                 .    t        j                  |       S )NrL   )r   rA   rL   s    rG   rT   rT     s     %%MBBrI   r\   )rX  rO  r   r   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r   r   torch.distributeddistributedr^   (torch.distributed.fsdp._exec_order_utilsfsdp_exec_order_utilsr   'torch.distributed.fsdp._traversal_utils_traversal_utilsr$  2torch.distributed.fsdp.fully_sharded_data_parallelfully_sharded_data_parallelr'  torch.nnr   (torch.distributed.algorithms._comm_hooksr   torch.distributed.device_meshr   r   "torch.distributed.distributed_c10dr   $torch.distributed.fsdp._common_utilsr   r   r   r   r   r   r   "torch.distributed.fsdp._flat_paramr   r   r   r   %torch.distributed.fsdp._limiter_utilsr   torch.distributed.fsdp.apir    r!   r"   r#   r$   r%   r&   r'   torch.distributed.fsdp.wrapr(   &torch.distributed.tensor.parallel.fsdpr)   torch.distributed.utilsr*   torch.utils.hooksr+   rG  
torchdistxr,   r-   ImportErrorintr{  rx  r_   HybridShardProcessGroupTypeProcessGroupTyper   r   SHARD_GRAD_OPHYBRID_SHARD_HYBRID_SHARD_ZERO2r  r9   #NO_RESHARD_AFTER_FORWARD_STRATEGIESrH   r:   boolrR   rM   rq   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r  r	  r   r   r   r   r   r   r   r   r  r  rK  Tensorr  re  rg  r  r  r  rz  r   r  r  rA   rT   rp  rI   rG   <module>r     s
     	         C C A A F F  B E A    B	 	 	 0 D < - . ""34 #D$5$5t7H7H$HI E$"3"35P"PQR 
 5>>!7!B!B""$:$H$H!!#9#F#F((*@*T*T  !!(( 
 ""((' #  )-00#0 (0 W	0
 *%0 0 0f ))#) ) 	) )X # $   IJ I4 I I  ARAR   !++!! 
! !H++ 4d///0&  	++II+ huxx78+ %((,,-.%((//9R0SS	+ + +\I9=	: ))II) %) c5<</01	)
 ) )X II  " 44 014 n-4 *%	4
 4 4 !4  4 4 4n    		'	 	 		 	 	: 	J 	* 	 	 *    ::)): c5<</01: Hbii[$%678	:
 : : :z ))) ))) )<66x896 	^6x BF) !%((*<*<!=> 				D$$)$ 	X$:299 S 
II
%
 c5<</01
 
	
<c5<</01
 ell87II7%7 ^7 4:	7<RYYK-. ^ 
	#ELL1 ^B""-0^"	"))_",3II3%3 &3 $ELL1	3
 
3l%,, $ELL1 
	@#II#%# $ELL1# 	#
 \\#NII $$ 
	6%,, $$ 
	&

%

	

II% bll,% 
(.> C$$CCM"  s   
^ ^ ^ 