
    Ph2                     l   d dl Z d dlmZmZmZmZmZ d dlZd dlm	Z
 d dlmc mZ d dlmZ d dlmZ d dlmZmZ 	 	 ddedee
j*                     deej,                     d	ej.                  fd
Zddd e       ddedededee
j*                     deej,                     dedeedf   d	eeef   fdZddd e       ddeeef   dee
j*                     deej,                     dedeedf   d	eeef   fdZdd e       ddeeef   dee
j*                     deej,                     deedf   d	eeef   f
dZy)    N)AnyCallableDictOptionalTuple)distributed_c10d)ShardedTensor)DTensor	Replicatesharded_tensorpgdevicereturnc                 r   |t        j                         }t        j                  |      }| j	                         }| j                         d   }| j                         j                         }t        j                  ||z        |z  |z  }|t        j                  |      n|}|r|d   j                  j                         }	|	j                  j                  |j                  k7  r|	j                  |      }	||	j                         z
  }
|
dkD  r;t        j                   |	d|
g      }	n"t#        j$                  || j&                  |      }	t#        j(                  ||z  |	j&                  |      }t        j*                  ||	|       |j-                  dd|      j/                  | j                               }|S )Nr   )dtyper   )group)r   _get_default_groupdistget_world_sizelocal_shardssizenumelmathceil_get_pg_default_devicetensorflattenr   typetoFpadtorchzerosr   emptyall_gather_into_tensornarrowreshape)r   r   r   
world_sizeshards
dim_0_sizetensor_numel
chunk_size	pg_devicelocal_tensornum_paddingr   s               yC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/distributed/checkpoint/_state_dict_utils.py_all_gather_sharded_tensorr1      s   
 
z002$$R(J((*F$$&q)J!&&(..0L:
23lBjPJ7=~//36  ay''//1##y~~5'??95L <#5#5#77?55;/?@L{{n229
 [[Z  F
 	B?]]1a.66~7J7J7LMFM    Fr   r   cpu_offload
ranks_onlyiter_objectsharded_tensor_funcdtensor_funcr4   r5   .c                    t        j                  d      }t        | t              r || ||      }nt        | t              r || ||      }nt        | t         j
                  t        t        t        f      s| | }nt        | t              r4| j                         D 	
ci c]  \  }	}
|	t        |
||||||       }}	}
nht        | t        t        f      r;| D cg c]  }t        |||||||       }}t        | t              r#t        |      }nt        dt        |              |rt!        j"                  |      |v r/t        |t         j
                        r|r|j%                  |      }|S t        |t              ri nd }|S c c}
}	w c c}w )Ncpur3   zUnexpected value type )r"   r   
isinstancer	   r
   Tensorintfloatstrdictitems_iterate_state_dictlisttuple
ValueErrorr   r   get_rankr   )r6   r7   r8   r   r   r4   r5   
cpu_deviceretkeyvaluevs               r0   rB   rB   2   s    e$J+}-!+r6:	K	);F3;sE3 ?@KDW	K	& *//1
 2
U $#'%  2 	 
 
K$	/ !
 !  #'% ! 	 
 k5)*C1${2C1DEFFr*j8c5<<([&&$C J sD)btJI

s   FF
state_dictc          	      2    d }d }t        | ||||||      S )aG  
    Given a state_dict, this API gathers all the ShardedTensors or DTensors in
    the state_dict.


    Args:
        state_dict (Dict[str, Any]): the target sharded state_dict.
        pg (Optional[dist.ProcessGroup]): the process group that is used to
            gather ShardedTensor. Note that gathering a DTensor will use
            the DeviceMesh. So this argument will be ignored when gathering a
            DTensor.
        device: (Optional[torch.device]): the device that is used to
            perform allgather for ShardedTensor. Note that gathering a DTensor
            will use the DeviceMesh. So this argument will be ignored when
            gathering a DTensor.
        cpu_offload (bool): whether to offload the tensors to CPU memory. The
            default value is False.
        ranks_only: (Tuple[int, ...]): if this tuple is empty, all ranks will
            have the same state_dicts. Otherwise only ranks that in ``ranks_only``
            have the same state_dicts. Other ranks will get empty state_dicts.

    Returns:
        The gathered state dictionary.
    c                    t        j                  d      }t        | ||      }| j                         r'| j                         d   j                  j                  n|}|j                  |k7  r|j                  |      } | S |} | S )Nr:   r   )r"   r   r1   r   r   r   )rJ   r   r   rG   output_tensorlocal_shard_devices         r0   r7   z/_gather_state_dict.<locals>.sharded_tensor_func   s     \\%(
25"fE !!#  #**11 	
 #55!$$%78E  "Er2   c                 <   | j                   | j                  j                  k7  r%| j                  | j                  j                        } | j                  D cg c]  }t                }}| j                  | j                  |      } | j                         } | S c c}w )N)device_mesh
placements)r   rR   device_typer   rS   r   redistributeto_local)rJ   r   r   _rS   s        r0   r8   z(_gather_state_dict.<locals>.dtensor_func   s    <<5,,888HHU..::;E ,1+;+;<+;aik+;
<""))! # 
   =s   Br3   rB   )rL   r   r   r4   r5   r7   r8   s          r0   _gather_state_dictrY   n   s1    B"   r2   )r   r   r5   c          	      *    t        | d d ||d|      S )Nc                     | S N rJ   r   r   s      r0   <lambda>z,_offload_state_dict_to_cpu.<locals>.<lambda>       %r2   c                     | S r\   r]   r^   s      r0   r_   z,_offload_state_dict_to_cpu.<locals>.<lambda>   r`   r2   Tr3   rX   )rL   r   r   r5   s       r0   _offload_state_dict_to_cpurb      s&     '' r2   )NN) r   typingr   r   r   r   r   r"   torch.distributeddistributedr   torch.nn.functionalnn
functionalr    r   'torch.distributed._shard.sharded_tensorr	   torch.distributed._tensorr
   r   ProcessGroupr   r<   r1   rD   boolr=   r?   rB   rY   rb   r]   r2   r0   <module>rm      s    7 7      . A 8
 '+%)#!#""## U\\"# \\	#V '+%)"''99!9 9
 	""#9 U\\"9 9 c3h9 
#s(^9~ '+%)"''JS#XJ 	""#J U\\"	J
 J c3hJ 
#s(^J` '+%)"''S#X 	""# U\\"	
 c3h 
#s(^r2   