
    qi                     Z   d dl Z d dlZd dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZmZmZ d Z	 dd	ej2                  d
edededej6                  deej8                     defdZd	ej2                  d
ededefdZd	edee   dej2                  fdZy)    N)Optional)_get_device_module)distributed_c10d)ShardShardedTensorShardedTensorMetadataTensorProperties)ShardMetadata)
DeviceMeshDTensor	Replicater   c                     |j                         dk(  rd|  d| S |j                         dk(  r"d|  d| dt        |      j                          S d|  d| d| |z   S )Ncpuzrank:/hpu:)lowerr   current_device)rankdevice_typenum_devices_per_nodes      i/home/ubuntu/crypto_trading_bot/.venv/lib/python3.12/site-packages/torch/distributed/fsdp/_shard_utils.py_get_remote_device_strr      s    e#tfAk]++					%tfAk]!,>{,K,Z,Z,\+]^^tfAk]!D3G,G+HII    tensorr   
world_sizer   pgdevicereturnc                 R   | j                  |d      }t        |      |kD  rx||   j                         }| j                         D cg c]  }d }	}t	        j
                  | j                         d   |z        |z  |	d<   t        j                  ||	|      g}
ng }
|D cg c]  }t        |j                                }}dgt        t        j                  |D cg c]  }|d   	 c}            dd z   }dgt        |d         dz
  z  }	|D cg c]  }|g|	z   
 }}|t        j                  |      j                  n|j                  }t        t        |            D cg c]#  }t        t!        j"                  ||      ||      % }}t        |      t        |      k7  st        |      t        |      k7  r/t%        dt        |       dt        |       dt        |             t'        |||      D cg c]  \  }}}t)        |||       }}}}t+        || j                         t-        | j.                  | j0                  dt2        j4                  | j7                         	      
      }t9        j:                  |
||      S c c}w c c}w c c}w c c}w c c}w c c}}}w )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local shard to create a ShardedTensor.
    r   )dimN   zQExpected chunk_sizes, chunk_offsets, and placements to have the same length, got z, F)dtypelayoutrequires_gradmemory_format
pin_memory)shards_metadatasizetensor_properties)sharded_tensor_metadataprocess_group)chunklencloner*   mathceilr   from_tensor_and_offsetslist	itertools
accumulater   _get_pg_default_devicetyperanger   distget_global_rankAssertionErrorzipr
   r   r	   r$   r%   torchcontiguous_format	is_pinnedr   +_init_from_local_shards_and_global_metadata)r   r   r   r   r   r   chunkslocal_shard_offsetslocal_shardsr.   chunk_sizes
chunk_sizedim0_offsetsd0chunk_offsetsr   r
placementsoffsetr*   	placementshard_metadatar,   s                            r   _create_chunk_sharded_tensorrQ      s    \\*!\,F
6{TTl((*$kkm,1,,YYv{{}Q/*<=D
55k7DQR 4::%4

%:K:3kJ
jmJK	r L cSQ(1,-G.:;bTG^;M; > 	//388[[  s;'(  	  Q' 	
J  ;3}--[1AS_1T{#$Bs='9&:"S_<MO
 	
 (+=+z'R #FD) 	fdI.N  4&[[]*,,==11'')

 DD.EUW ] - ;J <s$   	J	  JJJ#(J;J"device_meshc                 h   | j                         j                         } t        |j                        D cg c]  }t	                }}t        |j                        D cg c]  }t	                }}t        d      |d<   t        j                  | ||d      j                  |      S c c}w c c}w )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local tensor to create a DTensor.
    r   r"   F)	run_check)rM   )	detachr0   r9   ndimr   DShardr   
from_localredistribute)r   r   rR   rD   replicate_placementsshard_placementss         r   _create_chunk_dtensorr\   `   s     ]]_""$F 27{7G7G1HIAIKII-2;3C3C-DE	EE!!9R1Ul#  	 JEs   B* B/	root_meshc                     || j                   k7  rt        d      t        t        j                  | j
                              }t               |d<   | j                  | j                   |      } | j                         S )zT
    All gather a DTensor in its sharded dimension and return the local tensor.
    z2The device mesh of a tensor should be a root mesh.r"   )rR   rM   )	rR   r<   r4   copydeepcopyrM   r   rY   to_local)r   r]   rM   s      r   _all_gather_dtensorrb   y   st     F&&&QRRdmmF$5$567J [JrN  && ! F
 ??r   )N) r_   r5   r1   typingr   r>   torch.distributeddistributedr:   torch._utilsr   r   'torch.distributed._shard.sharded_tensorr   r   r   r	   &torch.distributed._shard.sharding_specr
   torch.distributed.tensorr   r   r   rW   r   TensorintProcessGroupr   rQ   r\   rb    r   r   <module>rn      s           + .  A T TJ &*?LL?
? ? 	?
 	? U\\"? ?DLL
  	2
# \\r   