o
    Hh                     @   s  d dl Z d dlZd dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZmZmZ dd	 Z	dd
ejdedededejdeej defddZd
ejdededefddZd
edee dejfddZdS )    N)Optional)_get_device_module)distributed_c10d)ShardShardedTensorShardedTensorMetadataTensorPropertiesShardMetadata)
DeviceMeshDTensor	Replicater   c                 C   s`   |  dkrd|  d| S |  dkr#d|  d| dt|  S d|  d| d| |  S )Ncpuzrank:/hpu:)lowerr   current_device)rankdevice_typenum_devices_per_node r   W/var/www/vscode/kcb/lib/python3.10/site-packages/torch/distributed/fsdp/_shard_utils.py_get_remote_device_str   s
   r   tensorr   
world_sizer   pgdevicereturnc              
      sx  | j |dd}t||kr4||  }dd |  D t|  d | | d< t||g}ng }dd |D }	dgtt	
dd |	D dd  }
dgt|	d d	  fd
d|
D }|du rntjn|j  fddtt|	D }t|	t|  krt|ksJ  J dd t||	|D }t||  t| j| jdtj|  dd}tj||dS )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local shard to create a ShardedTensor.
    r   )dimc                 S   s   g | ]}d qS r   r   .0_r   r   r   
<listcomp>-   s    z0_create_chunk_sharded_tensor.<locals>.<listcomp>c                 S   s   g | ]}t | qS r   )listsize)r"   chunkr   r   r   r$   4   s    c                 S   s   g | ]}|d  qS r    r   )r"   
chunk_sizer   r   r   r$   6   s    N   c                    s   g | ]}|g  qS r   r   )r"   d0)offsetsr   r   r$   9   s    c                    s    g | ]}t t| qS r   )r   distget_global_rank)r"   r)r   r   r   r   r   r$   ?   s    
c                 S   s   g | ]\}}}t |||qS r   r	   )r"   offsetr&   	placementr   r   r   r$   H   s    
F)dtypelayoutrequires_gradmemory_format
pin_memory)shards_metadatar&   tensor_properties)sharded_tensor_metadataprocess_group)r'   lencloner&   mathceilr   from_tensor_and_offsetsr%   	itertools
accumulater   _get_pg_default_devicetyperangezipr   r   r2   r3   torchcontiguous_format	is_pinnedr   +_init_from_local_shards_and_global_metadata)r   r   r   r   r   r   chunkslocal_shardlocal_shardschunk_sizesdim0_offsetschunk_offsets
placementsshard_metadatar9   r   )r   r   r,   r   r   _create_chunk_sharded_tensor   sP   
(
rR   device_meshc                 C   sZ   |    } dd t|jD }dd t|jD }td|d< tj| ||ddj|dS )	z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local tensor to create a DTensor.
    c                 S      g | ]}t  qS r   r   r!   r   r   r   r$   j       z)_create_chunk_dtensor.<locals>.<listcomp>c                 S   rT   r   rU   r!   r   r   r   r$   k   rV   r   r)   F)	run_check)rP   )detachr<   rD   ndimDShardr   
from_localredistribute)r   r   rS   replicate_placementsshard_placementsr   r   r   _create_chunk_dtensor\   s   
r_   	root_meshc                 C   sD   || j ks	J dtt| j}t |d< | j| j |d} |  S )zT
    All gather a DTensor in its sharded dimension and return the local tensor.
    z2The device mesh of a tensor should be a root mesh.r)   )rS   rP   )rS   r%   copydeepcopyrP   r   r\   to_local)r   r`   rP   r   r   r   _all_gather_dtensoru   s   
rd   )N) ra   r@   r=   typingr   rF   torch.distributeddistributedr-   torch._utilsr   r   'torch.distributed._shard.sharded_tensorr   r   r   r   &torch.distributed._shard.sharding_specr
   torch.distributed.tensorr   r   r   rZ   r   TensorintProcessGroupr   rR   r_   rd   r   r   r   r   <module>   sX   
>
