
    qi5                        d dl Z d dlZd dlmc mZ d dlmZ d dlm	Z	 ddl
mZmZ ddlmZ  e j                  e      Zdej$                  dej&                  d	efd
Zdej$                  dej&                  ddd	dfdZdej$                  d	ej.                  fdZ e	eeef             Zdej.                  d	dfdZdej.                  d	efdZdej$                  deddfdZdej$                  fdZ d Z!y)    N)is_symbolic)
OrderedSet   )configir)Vxcomm_buffer_typereturnc                    t        |       }t        |t        j                        ry|j	                         }t        |t        j
                        ryt        |t        j                        rt        |j                               syy)ze
    Check if an input can be realized as a comm buffer of the specified
    `comm_buffer_type`.
    TF)		_get_data
isinstancer   Loopsget_output_specCommBufferLayoutFlexibleLayoutr   	get_numel)r	   r
   datalayouts       c/home/ubuntu/crypto_trading_bot/.venv/lib/python3.12/site-packages/torch/_inductor/comm_lowering.pycan_realize_as_comm_bufferr   7   sc     Q<D$!!!#F&"--.&"++,[AQ5R    
group_name,torch.distributed.distributed_c10d.GroupNamec                    | j                          t        |       }t        |t        j                        sJ |j                         }t        |t        j                        ryt        |t        j                        st        d| d      t        |j                               rt        d| d      t        j                  |||      |_        y)z
    Realize an input as a comm buffer of the specified `comm_buffer_type`.

    Specifically, this realizes the underlying buffer if it's still unrealized
    and changes the layout of the buffer to `ir.CommBufferLayout`.
    NzOA buffer can only be realized as a comm buffer if it has `FlexibleLayout` (got ).zGA buffer with symbolic shape cannot be converted to a comm buffer (got )r   r
   r   )realizer   r   r   Bufferr   r   r   AssertionErrorr   r   r   )r	   r
   r   bufferr   s        r   realize_as_comm_bufferr!   M   s     IIKq\Ffbii(((##%F&"--.fb//0))/4
 	

 6##%&""(-
 	

 '')FMr   c                    t        | j                  t        j                        rR| j                  j	                         }t        |t        j                  t        j
                  f      sJ |j                  S t        | j                  t        j                        r| j                  j                  S t        d| j                   d      )Nz\Expect the data attr of a `TensorBox` to be either an `ir.BaseView` or `ir.StorageBox` (got r   )r   r   r   BaseViewunwrap_view
MutableBox
StorageBoxr   )r	   nodes     r   r   r   s   s    !&&"++&vv!!#$bmm <===yy	AFFBMM	*vv{{889xrC
 	
r   c                 |    t         j                  t        t        j                        | j                         f       y)z
    If a non-blocking collective is lowered as a blocking collective, the wait
    node in the original graph becomes useless and we can skip the lowering it.
    N)_bufs_to_skip_waitaddidr   graphget_namer	   s    r   mark_as_skip_waitr/      s%    
 BqwwK67r   c                 `    t        t        j                        | j                         ft        v S N)r+   r   r,   r-   r)   r.   s    r   should_skip_waitr2      s!    qwwK&*<<<r   inp	reduce_opc                 D   ddl m} | j                         | j                         j                  z  }t
        j                  j                  xrT  ||      xrJ t        | t        j                  j                        xr$ |dk(  xr |t
        j                  j                  k  S )Nr   )is_symm_mem_enabled_for_groupsum)#torch.distributed._symmetric_memoryr6   r   	get_dtypeitemsizer   _collectiveauto_selectr   r   CommBufferTypeSYMM_MEM#one_shot_all_reduce_threshold_bytes)r3   r4   r   r6   inp_sizes        r   $_should_lower_as_one_shot_all_reducerA      s    
 R}}!9!99H&& 	O)*5	O&sB,=,=,F,FG	O 	O **NNNr   c           	      @   t        | t        j                  j                  |       t	        j
                  t        j                  j                  t        j                  j                  t        j                  j                  j                  j                  | ||            S r1   )r!   r   r=   r>   pytreetree_map	TensorBoxcreateFallbackKerneltorchopssymm_memone_shot_all_reducedefault)r3   r4   r   s      r   _one_shot_all_reducerM      sn    3 1 1 : :JG??

  II22::		
 r   c            	         	 t         j                  j                  j                   ddlmm	m
mm fd} t         j                  j                   | j                        dt        j                  dt        dd	d
t        j                  ffd       } | j                         dt        j                  dt        dd	d
t        j                  ffd       } | j"                        fd       } | j$                        fd       }d
t        j&                  fd | j(                        fd       } | j*                        fd       } | j,                        fd       } | j.                        fd       } | j0                        fd       }	 | j2                        fd       }
 | j4                        fd       } | j6                        fd       } | j8                        fd       } | t         j                  j:                  j<                        fd       } | j>                        fd       }y# t        $ r t
        j                  d       Y yw xY w)z4
    Register lowerings for the comm subsystem.
    zRInductor support for distributed collectives depends on building torch.distributedNr   )add_layout_constraintcloneconstrain_to_fx_stridescopy_register_loweringc                 &     |         |       S r1    )fnrO   rQ   rS   s    r   register_comm_loweringz7register_comm_lowerings.<locals>.register_comm_lowering   s    b"9: $$r   r3   r4   r   r   r   c                    t        | ||      rt        | ||      S  |       } t        j                  rG| j	                          t
        j                  j                  j                  | j                                t        j                  j                  |       } t        j                  j                  j                  j                   | ||       | S r1   )rA   rM   r    reorder_for_compute_comm_overlapr   r   r,   no_fuse_buffer_namesr*   r-   r   ExternKernelrequire_contiguous_AllReduce_Kernelcreate_inplaceall_reduce_rL   )r3   r4   r   c10drP   s      r   _all_reducez,register_comm_lowerings.<locals>._all_reduce   s     0Y
K'Y
CC Cj22 KKMGG((,,S\\^<oo005 	++$$		
 
r   c                    t        | ||      r! | t        | ||            }t        |       | S t        j                  j                  |       } t        j                  j                  j                  j                  | ||       | S r1   )
rA   rM   r/   r   r[   r\   r]   r^   r_   rL   )r3   r4   r   retr`   rR   s       r   _all_reduce_z-register_comm_lowerings.<locals>._all_reduce_   s     0Y
K$S)Z@C c"J oo005
++$$		
 
r   c                     | D cg c]
  } |       } }t         j                  j                  j                  j                  | ||       | S c c}w r1   r   _CollectiveKernelr^   all_reduce_coalesced_rL   )inputsr4   r   r3   r`   rP   s       r   _all_reduce_coalescedz6register_comm_lowerings.<locals>._all_reduce_coalesced  sR    (./%*//
++&&..		
  0s   Ac                 t    t         j                  j                  j                  j                  | ||       | S r1   rf   )ri   r4   r   r`   s      r   _all_reduce_coalesced_z7register_comm_lowerings.<locals>._all_reduce_coalesced_  s6    
++&&..		
 r   c                     t        j                  j                  | |g| }t        |t         j                        sJ t         j
                  j                  |      S r1   )r   rg   create_out_of_placer   IRNoderE   rF   )kernelri   argsr'   s       r   _create_out_of_placez5register_comm_lowerings.<locals>._create_out_of_place  sH    ##77NN$		***||""4((r   c                 B     j                   j                  | ||      S r1   )all_gather_into_tensorrL   )r3   
group_sizer   rr   r`   s      r   _all_gather_into_tensorz8register_comm_lowerings.<locals>._all_gather_into_tensor  s(    #''//	
 	
r   c           	          t        j                  t        j                  j                  t        j
                  j                  j                  j                  | ||            S r1   )	rC   rD   r   rE   rF   rg   rn    all_gather_into_tensor_coalescedrL   )ri   ru   r   r`   s      r   !_all_gather_into_tensor_coalescedzBregister_comm_lowerings.<locals>._all_gather_into_tensor_coalesced&  sM    LL  4455==	
 	
r   c                x    t         j                  j                  j                  j                  | |||       |S N)out)r   rg   r^   all_gather_into_tensor_outrL   )r3   ru   r   r|   r`   s       r   _all_gather_into_tensor_outz<register_comm_lowerings.<locals>._all_gather_into_tensor_out2  s>    
++++33 	, 	
 
r   c                 D     j                   j                  | |||      S r1   )reduce_scatter_tensorrL   )r3   r4   ru   r   rr   r`   s       r   _reduce_scatter_tensorz7register_comm_lowerings.<locals>._reduce_scatter_tensor=  s+    #&&..
 	
r   c                z    t         j                  j                  j                  j                  | ||||       |S r{   )r   rg   r^   reduce_scatter_tensor_outrL   )r3   r4   ru   r   r|   r`   s        r   _reduce_scatter_tensor_outz;register_comm_lowerings.<locals>._reduce_scatter_tensor_outG  sA    
++**22 	, 	
 
r   c           
          t        j                  t        j                  j                  t        j
                  j                  j                  j                  | |||            S r1   )	rC   rD   r   rE   rF   rg   rn   reduce_scatter_tensor_coalescedrL   )ri   r4   ru   r   r`   s       r    _reduce_scatter_tensor_coalescedzAregister_comm_lowerings.<locals>._reduce_scatter_tensor_coalescedS  sP    LL  4444<<	
 		
r   c                 D     j                   j                  | |||      S r1   )all_to_all_singlerL   )r3   output_split_sizesinput_split_sizesr   rr   r`   s       r   _all_to_all_singlez3register_comm_lowerings.<locals>._all_to_all_single`  s+    #""**
 	
r   c                      |       } t         j                  j                  j                  j                  | ||       | S r1   r   rg   r^   
broadcast_rL   )r3   srcr   r`   rP   s      r   
_broadcastz+register_comm_lowerings.<locals>._broadcastj  s:    Cj
++OO##S#z	
 
r   c                 t    t         j                  j                  j                  j                  | ||       | S r1   r   )r3   r   r   r`   s      r   _broadcast_z,register_comm_lowerings.<locals>._broadcast_r  s1    
++OO##S#z	
 
r   c                 t     t         j                  j                  j                  j                  | |||      S r1   )rH   rI   _dtensorshard_dim_alltoallrL   )r3   
gather_dim	shard_dimr   rr   s       r   _shard_dim_alltoallz4register_comm_lowerings.<locals>._shard_dim_alltoally  s5    #II1199
 	
r   c                     t        |       r| S t        j                  j                  j                  j
                  |        | S r1   )r2   r   _WaitKernelcreate_waitwait_tensorrL   )r3   r`   s    r   _wait_tensorz-register_comm_lowerings.<locals>._wait_tensor  s5    C J
""4#3#3#;#;SA
r   ) rH   rI   _c10d_functional
all_reduceAttributeErrorloginfoloweringrO   rP   rQ   rR   rS   r   rE   strr_   all_reduce_coalescedrh   ro   rt   rx   r}   r   r   r   r   	broadcastr   r   r   r   )rW   ra   rd   rj   rl   rv   ry   r~   r   r   r   r   r   r   r   r   rr   rO   r`   rP   rQ   rR   rS   s                   @@@@@@@r   register_comm_loweringsr      s   		""-- % 99%%DDOO,\\ C 
	 -< D,,-\\ C 
	 .0 D556 7 D667 8)ryy )
 D778
 9
 DAAB	
 C	
 D;;< = D667
 8
 D::;	 <	 D@@A

 B

 D223
 4
 DNN+ , DOO, - EII..AAB
 C
 D,,- .]   	
 	s   $I( (J	J	)"loggingrH   torch.utils._pytreeutils_pytreerC   torch._inductor.utilsr   torch.utils._ordered_setr    r   r   virtualizedr   	getLogger__name__r   rE   r=   boolr   r!   ro   r   tupleintr   r)   r/   r2   rA   rM   r   rU   r   r   <module>r      s)     $ $ - /   g!T	||')'8'8	,#	||#''# ?# 
	#L
 
")) 
  1Zc3h02 8 8t 8=		 =d =	 ?"
bll 
Yr   