
    qii                       d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlmZ d dlmZmZ d dlmZ dd	lmZmZ dd
lmZ ddlmZm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z& ddl'm(Z(m)Z) erd dl*m+Z+m,Z,  ed      Z- G d dej\                  j^                        Z0 G d de      Z1 G d de      Z2 G d de      Z3 G d d      Z4 G d d      Z5 G d de      Z6 G d de!      Z7y)     )annotationsN)autoEnum)Any
NamedTupleOptionalTYPE_CHECKINGTypeVar)identity)Scope
TracerBase)SymT   )configdependencies)index_prevent_reordering)DefaultHandler
OpsHandlerWrapperHandler)cache_on_selfreduction_num_outputssympy_index_symbol_with_prefix
sympy_subs)opsV)CallableSequenceTc                  b     e Zd Zeej
                  d               Z fdZd fdZ fdZ	 xZ
S )InterpreterShimc                 H    t         j                  j                  t              S N)torchfxsymbolic_tracer        _/home/ubuntu/crypto_trading_bot/.venv/lib/python3.12/site-packages/torch/_inductor/loop_body.py	_dummy_gmzInterpreterShim._dummy_gm&   s     xx&&x00r'   c                    t         |   | j                         d       | | _        || _        || _        d| _        |j                  | _        d | _	        y )NF)garbage_collect_values)
super__init__r)   modulegraph
submodulesextra_traceback__getitem__
fetch_attrcurrent_node)selfr/   r0   	__class__s      r(   r-   zInterpreterShim.__init__+   sP     	)%H
$$$00 r'   c                0    || _         t        | 	  |      S r"   )r4   r,   run_node)r5   nr6   s     r(   r8   zInterpreterShim.run_node6   s    w""r'   c                x    t        j                  |       5  t        |   |i |cd d d        S # 1 sw Y   y xY wr"   )r   set_interpreter_handlerr,   run)r5   argskwargsr6   s      r(   r<   zInterpreterShim.run;   s7    &&t, 	07;//	0 	0 	0s   09)r9   ztorch.fx.Nodereturnr   )__name__
__module____qualname__staticmethod	functoolscacher)   r-   r8   r<   __classcell__r6   s   @r(   r    r    %   s5    __1  1	!#
0 0r'   r    c                       e Zd Z fdZ xZS )LightTracerc                    t         |           t        j                  j	                  | j
                        | _        t        dd       | _        i | _	        i | _
        y )N)
tracer_cls )r,   r-   r#   r$   Graphr6   r/   r   scopemodule_stacknode_name_to_scope)r5   r6   s    r(   r-   zLightTracer.__init__B   sE    XX^^t~~^>
2t_
"$r'   )r@   rA   rB   r-   rF   rG   s   @r(   rI   rI   A   s    % %r'   rI   c                  ,    e Zd ZU ded<   ded<   ded<   y)MemoryEntrystr
index_nameOptional[str]buffer_namemodeN)r@   rA   rB   __annotations__r&   r'   r(   rR   rR   J   s    O
r'   rR   c                  n    e Zd Z e       Z e       Z e       Z e       Z e       Z e       Z	 e       Z
y)MemoryUsageTypeN)r@   rA   rB   r   LOAD	LOAD_SEEDSTORESTORE_REDUCTION
INDEX_EXPRCHECK_BOUNDS	BUCKETIZEr&   r'   r(   rZ   rZ   P   s3    6DIFEfOJ6LIr'   rZ   c                      e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   	 d4 fd	Zd5dZd Zd Zd6dZ	d7dZ
d8dZ	 	 	 	 	 	 d9dZd8dZed        Zed        Zed         Zd! Zd" Zd# Zd$ Zd% Zd& Zd' Zd:d(ZeZ	 	 d;	 	 	 	 	 	 	 d<d)Zd* Zd+ Zd, Zd- Z d4d.Z!dd/d0Z"d1 Z#d2 Z$d3 Z% xZ&S )=LoopBodyz
    Captures the body of a Loops subclass into an FX graph.  Persists any
    indexing simplifications and makes it easier to analyze loop bodies.
    zdict[str, sympy.Expr]indexing_exprsdict[str, Any]r0   zdict[str, LoopBodyBlock]	subblockszlist[sympy.Symbol]indirect_varszdict[sympy.Symbol, sympy.Expr]indirect_var_rangesLoopBodyBlock
root_blockz(dict[MemoryUsageType, list[MemoryEntry]]memory_usagecollections.Counter[str]	op_countszdict[sympy.Expr, str]indexing_exprs_nameFc                H   t         |           t        |j                               }|d t	        |       |t	        |      d  f| _        || _        || _        || _        t        |t              r| j                  |||       d | _        y | j                  ||       d | _        y r"   )r,   r-   tuplevalueslensizes	iter_varsreduce_vars
var_ranges
isinstancerc   _init_with_copy_init_with_tracingindexing)	r5   fnr=   rv   rt   ru   allow_same_symbol_in_index_flat_sizesr6   s	           r(   r-   zLoopBody.__init__m   s     	J--/0(#i.)I()


 #&$b(#  T+EF  ##B-r'   c                    | j                   sJ | j                  j                  j                  dd      d   }|j                  d   }|d   S )Ncall_methodpartial_accumulateoptargetr   num_reduction_dims)has_partial_accumulaterj   r/   
find_nodesr=   )r5   nodemetas      r(   get_original_num_rdimszLoopBody.get_original_num_rdims   sY    ****$$//%9 0 

 yy}())r'   c                    | j                   j                         | _         d| _        | j                  | j                  z   | _        g | _        | j
                  d   | j
                  d   z   t               f| _        | S )NTr   r   )rj   extract_pw_from_reductionr   rt   ru   rs   rp   r5   s    r(   r   z"LoopBody.extract_pw_from_reduction   sd    //CCE&*#$*:*::jjmdjjm3UW=
r'   c                `   i | _         i | _        d| j                  i| _        i | _        g | _        i | _        t        D ci c]  }|g  c}| _        t        j                         | _        t        | ||      | _        | j                  j                  j                  dd      | _        | `yc c}w )z9Do an FX trace of an arbitrary callable to construct self	get_indexr   r   r   N)rd   rn   r   r0   rf   rg   rh   rZ   rk   collectionsCounterrm   ri   rj   r/   r   r   )r5   r{   r=   ts       r(   ry   zLoopBody._init_with_tracing   s     #% &7CE ,;<qQU<$,,.'b$7&*oo&;&;&F&F%9 'G '
# $ =s   
B+c           	     :   |j                  ||      }|j                         D ci c]:  \  }}|t        j                  j                  j                  || j                        < c}}| _        |j                  j                         D ci c]  \  }}||j                  |        c}}| _        |j                  | _
        |j                  | _        |j                  | _        |j                  | _        |j                  j                  |       | _        |j                  | _        i |j                   }	|	j#                  d       d| j$                  i|	j                         D ci c]  \  }}||j                  |        c}}| _        yc c}}w c c}}w c c}}w )z
        _init_with_tracing() is slow, so this is a fast path in the case
        where we are just reordering/merging/splitting the args of an
        existing LoopBody.
        r   N)indexing_from_argsitemsr   r/   sizevarssimplify_with_rangesrv   rd   rf   clonerg   rh   rk   rm   rj   r   r0   popr   )
r5   otherr=   r|   rd   nameexprkvr0   s
             r(   rx   zLoopBody._init_with_copy   sZ    11$8RS -224
d !''""77dooNN
 8=7L7L7NOtq!!QWWT]*O"00#(#<#< !..**006&+&B&B#)(()
{#
,6,<,<,>?DAqq!''$-?

 P @s   ?FF$Fc                @    | j                   j                  |d      dkD  S )Nr   )rm   getr5   r   s     r(   has_opzLoopBody.has_op   s    ~~!!$*Q..r'   c           	        | }| j                   }|j                  \  }}|\  }}g |j                  j                         }t        j
                  j                  j                  ||t        |||            \  }}	}
t        j
                  j                  j                  ||t        |||            \  }}}
||k(  r||k(  r|S t        j                  ||d      \  \  }}}t        | |	|       ||      g|||d      }|S )zU
        Merge both iteration and reduction loops and return a new LoopBody.
        p)prefixTr|   )rs   varsrd   rq   r   r/   r   _simplify_loopsr   r   index_vars_no_squeezerc   )r5   old_body	old_sizesold_iter_varsold_reduce_varsold_iter_sizesold_reduce_sizesindex_exprs
iter_sizesiter_reindex_reduce_sizesreduce_reindexrt   ru   rv   new_bodys                    r(   merge_loopszLoopBody.merge_loops   s    JJ	)1&+4((9//6689&'gg&6&6&F&F$[-P'
#
L! +,''*:*:*J*J$[/CST+
'na 'L<L,LO ..z<PST	
)$n[&AB'+
 r'   c                ,   | | j                   }|\     t              }||<   |f}t        j                  |ddi\  \  }}}dfd}	t	        |	||f|||      }
t        j                  |ddi\  \  }}}t	        |
||f|||      }	|	S )z~
        Expand node on `dimension` to `new_range` and rely on index modular to avoid
        out-of-boundary access.
        r   r   c                    g t         j                  j                  |       }t        |      t              t        	      z   k(  sJ |d t               }|t              d  }t	        |      }|   z  |<    ||      S r"   )	itertoolschainfrom_iterablerr   list)
indicesindexiter_idx
reduce_idxnew_iter_idx	dimension	iter_sizer   original_rangereduce_sizes
        r(   r   z>LoopBody.expand_dimension_for_pointwise_node.<locals>.new_body  s    =ioo33G<=Eu:Y#k2B!BBBB-s9~.Hs9~/0J>L&.y&9N&JL#L*55r'   r   r   zSequence[sympy.Expr]r?   r   )rs   r   r   r   rc   )r5   r   	new_ranger   new_iter_size	new_sizesrt   ru   rv   r   	loop_body
iter_vars2reduce_vars2var_ranges2r   r   r   r   s    `            @@@@r(   #expand_dimension_for_pointwise_nodez,LoopBody.expand_dimension_for_pointwise_node   s     JJ	!*	;"9-Y#,i "K0	/;/Q/Q0
0
, K*
		6 		6 y+.
I{
	
 3?2T2T3
3
/"\K 
L1;
L
 r'   c                   ddl m} | | j                  }t        |d         t        |      k(  sJ  ||      }|\   |      }|f}t	        j
                  |ddi\  \  }}}	t        |      D 
ci c]  \  }
}||

 c}}
t        t        |            D cg c]  }|   	 c}dfd}t        |||f|	||      S c c}}
w c c}w )zD
        Reorder iteration loops and return a new LoopBody.
        r   )same_reorderr   r   r   c                    g t         j                  j                  |       }t        |      t              t              z   k(  sJ |d t               }|t              d  }D cg c]  }||   	 }} ||d      S c c}w )NTr   )r   r   r   rr   )	r   r   r   r   iinverse_orderr   r   r   s	        r(   r   z-LoopBody.reorder_iter_loops.<locals>.new_body9  s    =ioo33G<=Eu:Y#k2B!BBBB-s9~.Hs9~/0J-:;;H;HjTRR <s   (Br   )	irr   rs   rr   r   r   	enumeraterangerc   )r5   	new_orderr   r   
reorder_fnr   r   rt   ru   rv   abr   r   r   r   r   r   s                 @@@@r(   reorder_iter_loopszLoopBody.reorder_iter_loops!  s     	%JJ	9Q< C	N222!),
!*	;"9-"K0	/;/Q/Q0
0
, K*
 +4I*>?$!QA?38Y3HIaq)I	S 	S $
 	
 @Is   7CCc                j    | j                   J | j                  J | j                   | j                  fS r"   )rt   ru   r   s    r(   r   zLoopBody.varsI  s9    ~~)))+++~~t////r'   c                    t        j                  | j                  j                  fd | j                  j                         D              }|D cg c]  }|j                  D ]  }|  c}}S c c}}w )Nc              3  4   K   | ]  }|j                     y wr"   )r/   ).0blocks     r(   	<genexpr>z%LoopBody.get_nodes.<locals>.<genexpr>S  s     >UU[[>s   )r   r   rj   r/   rf   rq   nodes)r5   
all_graphsr/   r   s       r(   	get_nodeszLoopBody.get_nodesO  s[    ____""$>dnn&;&;&=>

 #-EEEEEEs   A.c                    ddl m}  ||       S )Nr   )	BoundVars)boundsr   )r5   r   s     r(   r   zLoopBody.boundsW  s     	&r'   c                    t        | j                  t        j                           D ],  }|j                  |k(  s| j
                  |j                     c S  t        |      r"   )reversedrk   rZ   r[   rV   rd   rT   KeyErrorr5   rV   entrys      r(   get_read_exprzLoopBody.get_read_expr^  sY    d//0D0DEF 	=E  K/**5+;+;<<	= {##r'   c                   t        j                  | j                  t        j                     | j                  t        j
                           D ],  }|j                  |k(  s| j                  |j                     c S  t        |      r"   )
r   r   rk   rZ   r]   r^   rV   rd   rT   r   r   s      r(   get_write_exprzLoopBody.get_write_expre  sv    __o334o==>
 	=E   K/**5+;+;<<	= {##r'   c                    | j                   t        j                     D cg c]  }| j                  |j                      c}S c c}w r"   )rk   rZ   r[   rd   rT   r5   r   s     r(   get_read_exprszLoopBody.get_read_exprsn  sF     **?+?+?@
  0 01
 	
 
s    Ac                    g }t        | j                  t        j                           D ]:  }|j                  |k(  s|j                  | j                  |j                            < |S r"   )r   rk   rZ   r[   rV   appendrd   rT   r5   rV   outr   s       r(   get_all_read_exprzLoopBody.get_all_read_exprt  s`    d//0D0DEF 	BE  K/

4..u/?/?@A	B 
r'   c                    t        j                  | j                  t        j                     | j                  t        j
                           D cg c]  }| j                  |j                      c}S c c}w r"   )r   r   rk   rZ   r]   r^   rd   rT   r   s     r(   get_write_exprszLoopBody.get_write_exprs|  sf     #!!/"7"78!!/"A"AB
  0 01
 	
 
s    A2c                   g }t        j                  | j                  t        j                     | j                  t        j
                           D ]:  }|j                  |k(  s|j                  | j                  |j                            < |S r"   )
r   r   rk   rZ   r]   r^   rV   r   rd   rT   r   s       r(   get_all_write_exprzLoopBody.get_all_write_expr  s}    __o334o==>
 	BE   K/

4..u/?/?@A	B 
r'   c           	        dt        | j                         g}|j                  | j                  j	                         D cg c]  \  }}| d|  c}}       |j                  t        j                  d| j                  fg| j                  j	                               D cg c]  \  }}|j                  |       c}}       dj                  |      S c c}}w c c}}w )Nzvar_ranges = z = body
)dictrv   extendrd   r   r   r   rj   rf   	debug_strjoin)r5   linesr   valr   s        r(   r  zLoopBody.debug_str  s     doo!6 7899L9L9R9R9TUID#c#'UV $-??doo./1E1E1G$D% %	
 yy Vs   C
$C
c                   t        | j                  t        j                           dk(  xr` t        | j                  t        j                           dk(  xr5 t        | j
                        dk(  xr | j                  j                  d      S )zx
        True of this contains only a single loads and store.
        Note, this could involve a layout change.
        r   )loadstore)rr   rk   rZ   r[   r]   r0   rj   contains_only_opsr   s    r(   is_memory_copyzLoopBody.is_memory_copy  s}     !!/"6"678A= ED%%o&;&;<=BEDOO$)E 112CD		
r'   c                    | j                   j                  |      }|s6dt        | j                         }|| j                   |<   || j                  |<   | j                  |   j                  t        |||             |S )Nr   )rn   r   rr   rd   rk   r   rR   )r5   r   mtyperV   rW   r   s         r(   add_index_exprzLoopBody.add_index_expr  s|     ''++D13t22345D-1D$$T*(,D%% ''D+t(LMr'   c                    |d   j                         r|| j                  vr|}n| t        | j                         }|| j                  |<   |S )zaNot actually for nn.Modules, but subblocks in generated code are mapped to FX call_module opcodesr   )	isnumericr0   rr   )r5   r   r   r   s       r(   add_submodulezLoopBody.add_submodule  sM    ":!fDOO&CDXc$//234D %r'   c                    t        t        j                  t        | j                              }|| j
                  vsJ | j                  j                  |       || j
                  |<   |S r"   )r   r   INDIRECTrr   rg   rh   r   )r5   sizevars      r(   add_indirectzLoopBody.add_indirect  sZ    ,T]]C@R@R<ST$22222!!#&(,  %
r'   c           
         t        |      t        |      k(  ry| j                  J | j                  j                         D ci c]  \  }}|t        |||i       c}}| _        yc c}}w )z,Swap in a variable used in indirect indexingN)rS   rz   r   r   )r5   oldnewr   r   s        r(   replace_indirectzLoopBody.replace_indirect  s[    s8s3x}}(((BF--BUBUBWX$!QJq3*55XXs   A'c                <    | j                   J | j                   |   S r"   )rz   r   s     r(   r   zLoopBody.get_index  s!    }}(((}}T""r'   c           	         g t         j                  j                  |      }t        |      t         j                        k(  sJ | j                  f       |s+t         fd|D              sJ d j                  d|       t        t         j                  j                         |            } j                  j                         D ci c]  \  }}|t        ||       c}}S c c}}w )Nc              3  :   K   | ]  }|j                   v  y wr"   )rv   )r   r   r5   s     r(   r   z.LoopBody.indexing_from_args.<locals>.<genexpr>  s      1
)*AT__$1
   zself.var_ranges=z
, indices=)r   r   r   rr   rv   allr   zipkeysrd   r   r   )r5   r   r|   r   replacementsr   r   s   `      r(   r   zLoopBody.indexing_from_args  s    9)////895zS11KE4??3KK1)S 1
.31
 .
 	-doo{',	- 
 C 4 4 6>? #11779
d *T<00
 	
 
s   C)r   c               b    | j                  ||      | _        | j                         }d | _        |S r"   )r   rz   rj   )r5   r|   r   results       r(   __call__zLoopBody.__call__  s/    //9ST"r'   c                |      fd}t        j                  t        j                        |_        |S )Nc           	     j    j                  t        j                  j                  |              y r"   )r  r   r   indirect_indexing)new_varcheckr5   r  r  wrap_negs    r(   set_indirectz5LoopBody.bind_set_indirect_shim.<locals>.set_indirect  s+    !!QUU,,WdE8Lr'   )r  r  r)  r*  )rD   partialrc   bind_set_indirect_shimr   )r5   r  r  r)  r*  r+  s   ````` r(   r-  zLoopBody.bind_set_indirect_shim  s?    	 	
 '..++
 r'   c                f    fd}t        j                  t        j                        |_        |S )Nc                F    t         j                  j                  | |      S r"   )r   r   scan)dtypesrq   
combine_fns     r(   shimz%LoopBody.bind_scan_shim.<locals>.shim  s    55::fj&99r'   )r2  )rD   r,  rc   bind_scan_shimr   )r5   r2  r3  s    ` r(   r4  zLoopBody.bind_scan_shim  s)    	: &&x'>'>:V
r'   c                j      fd}t        j                  t        j                        |_        |S )Nc                `    t         j                  j                  | j                     |      S r"   )r   r   maskedrf   )maskr   r   r5   s     r(   r3  z'LoopBody.bind_masked_shim.<locals>.shim  s#    55<<dnnT&:EBBr'   )r   )rD   r,  rc   bind_masked_shimr   )r5   r   r3  s   `` r(   r9  zLoopBody.bind_masked_shim  s*    	C &&x'@'@tL
r'   )F)r?   int)r   rc   )r   rS   )r?   rc   )r   r:  r   r:  r?   rc   r?   boolNN)r   
sympy.Exprr  rZ   rV   rU   rW   rU   )'r@   rA   rB   __doc__rX   r-   r   r   ry   rx   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r  r
  __repr__r  r  r  r  r   r   r$  r-  r4  r9  rF   rG   s   @r(   rc   rc   [   sc   
 *)''%%77::'' /. $)8*% 
4/*X--),-	-^&
P 0 0
 F F  $$

 

 H &*"  #	
 Y#
 =B r'   rc   c                  <    e Zd ZdZd	dZd Zd Zd
dZddZddZ	y)ri   a  
    Captures the body of a Loops subclass into an FX graph.
    In normal cases there will be a 1:1 mapping between LoopBody and
    LoopBodyBlock, however in the case of ops.masked() the masked out
    operations will manifest as an extra LoopBodyBlock.
    c                   || _         t               }|j                  dddi       }ddlm} t        t        |||      |j                        }t        j                  r2 ||| j                   j                  | j                   j                        }t        j                  |      5  t        j                   ||        d d d        |j                   | _        y # 1 sw Y   xY w)Nplaceholderr   r&   r   )IndexPropagation)r   rI   create_proxyindex_propagationrE  CountOpsCaptureIndexingrm   r   constant_and_index_propagationrv   rh   r   set_ops_handlerr   outputr/   )r5   r   r{   r=   tracer	proxy_opsrE  handlers           r(   r-   zLoopBodyBlock.__init__  s    	''ub"E	7ItV4NN
 00&--tyy/L/LG w' 	" JJr4y!	" \\
		" 	"s   $CC c           	     r   d }d }| j                   j                  D ]-  }|j                  dk(  r|rJ |}|j                  dk(  s(|rJ |}/ |sJ |sJ |j                  d   }|j                  d   }|j                  d   }|j                  d   }dt	        | j
                  j                        i}| j                   j                  |      5  | j                   j                  d|||||f       d d d        | j                   j                  |       | j                   j                  |       | S # 1 sw Y   AxY w)	N	reductionstore_reductionr   r   r   r   r   )
r/   r   r   r=   rr   r   ru   inserting_afterr   
erase_node)	r5   redr  r   reduction_typered_argbufr   
extra_metas	            r(   r   z'LoopBodyBlock.extract_pw_from_reduction%  s.   JJ$$ 	D{{k)w{{//  y	 
su"((2,jjmjjm !#dii&;&;"<

 ZZ''. 	JJ""$sC*&U	 	

e$

c"	 	s   "D--D6c                    | j                   }| j                  j                  }t        ||      j	                  t        j                               S r"   )r/   r   r0   r    r<   r   get_ops_handler)r5   r/   r0   s      r(   r$  zLoopBodyBlock.__call__A  s;    

YY))
uj155a6G6G6IJJr'   c           
        t         j                  j                  | j                  j                  | j
                        j                  }t        j                  dd|j                         j                  dd| d            S )Nz;[^\n]*rL   zdef forward(zdef ()r#   r$   GraphModuler   r0   r/   coderesubstripreplace)r5   r   r`  s      r(   r  zLoopBodyBlock.debug_strG  sa    xx##DII$8$8$**EJJvvJJL  4vQ@	
 	
r'   c                `    t        fd| j                  j                  d      D              S )Nc              3  :   K   | ]  }|j                   v   y wr"   )r   )r   r   allowed_opss     r(   r   z2LoopBodyBlock.contains_only_ops.<locals>.<genexpr>Q  s"      
 KK;&
r  r   )r   )r  r/   r   )r5   rg  s    `r(   r	  zLoopBodyBlock.contains_only_opsP  s/     


---?
 
 	
r'   c                    t         j                  t               }|j                  j                  i | j                  d|i       |S )z'Shallow copy with a new parent LoopBodyr   )ri   __new____dict__update)r5   r   copys      r(   r   zLoopBodyBlock.cloneV  s:    $$]3<<vt<=r'   N)r   rc   r{   Callable[..., Any]r=   z	list[Any])r   r;  )r   rc   )
r@   rA   rB   r?  r-   r   r$  r  r	  r   r&   r'   r(   ri   ri     s&    ".8K

r'   ri   c                      e Zd ZddZddZy)rH  c                     || _         || _        y r"   )_inner_counts)r5   innercountss      r(   r-   zCountOps.__init__^  s    r'   c                h    | j                   |xx   dz  cc<    t        | j                  |      |i |S )Nr   )rq  getattrrp  )r5   r   r=   r>   s       r(   _defaultzCountOps._defaultb  s3    Ta)wt{{D)4:6::r'   N)rr  OpsHandler[Any]rs  rl   )r   rS   r=   ztuple[Any, ...]r>   re   r?   r   )r@   rA   rB   r-   rv  r&   r'   r(   rH  rH  ]  s    ;r'   rH  c                       e Zd Zd Z	 	 	 	 	 	 d fdZddZddZddZddZddZ	d Z
d Zd	 Zd
 Z	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ	 	 ddZd Zd ZddZd Z xZS )rI  c                @    t         |   |       || _        || _        y r"   )r,   r-   r   rM  )r5   rr  r   rM  r6   s       r(   r-   zCaptureIndexing.__init__j  s      		r'   c           	     x    | j                   j                  dd | j                  j                  ||fi |fi       S )Ncall_moduler   )rM  rF  r   r  )r5   r   r  r>   s       r(   
_add_indexzCaptureIndexing._add_indext  s@    {{''%TYY%%dE<V<>	
 	
r'   c                ~    t         j                  j                  j                  || j                  j
                        S r"   )r   r/   r   r   r   rv   )r5   r   s     r(   	_simplifyzCaptureIndexing._simplify|  s)    ww44T499;O;OPPr'   c                    | j                  |      }| j                  |t        j                  |      }| j                  j                  ||      S NrV   )r~  r|  rZ   r[   rp  r  r5   r   r   s      r(   r  zCaptureIndexing.load  sA    u%';';N{{e,,r'   c                    t        |t              sJ | j                  j                  t	        j
                  |      t        j                  |       | j                  j                  ||      S r  )
rw   r:  r   r  sympyIntegerrZ   r\   rp  	load_seedr  s      r(   r  zCaptureIndexing.load_seed  sX    %%%%		  MM% /";"; 	! 	
 {{$$T511r'   c                    | j                  |      }| j                  |t        j                  ||      }| j                  j                  ||||      S )N)rV   rW   )r~  r|  rZ   r]   rp  r  )r5   r   r   valuerW   s        r(   r  zCaptureIndexing.store  sO    u%?((d   
 {{  ueT::r'   c                    | j                  |      }| j                  |t        j                  |      }| j                  j                  |||      S r  )r~  r|  rZ   r^   rp  rR  )r5   r   r   r  s       r(   rR  zCaptureIndexing.store_reduction  sK    u%?22   
 {{**4>>r'   c                    | j                   j                  ||||      t        |      }|dkD  rt        fdt	        |      D              S S )Nr   c              3  (   K   | ]	  }|     y wr"   r&   r   r   r#  s     r(   r   z,CaptureIndexing.reduction.<locals>.<genexpr>  s     ?q?   )rp  rQ  r   rp   r   )r5   dtype	src_dtyperW  r  num_outputsr#  s         @r(   rQ  zCaptureIndexing.reduction  sI    &&uiO+N;??E+,>???r'   c                &   | j                  |      }t        |t        t        j                  f      r%| j
                  j                  t        |      |      S | j                  |t        j                        }| j
                  j                  ||      S r"   )r~  rw   r:  r  r  rp  constantr|  rZ   r_   
index_expr)r5   r   r  s      r(   r  zCaptureIndexing.index_expr  sk    u%ec5==12;;''E
E::'A'AB{{%%eU33r'   c                    | j                  |      }| j                  |t        j                        }| j                  |t        j                        }| j                  j                  ||||      S r"   )r~  r|  rZ   r`   rp  check_bounds)r5   r   r  loweruppers        r(   r  zCaptureIndexing.check_bounds  sW    u%'C'CDt_%A%AB{{''tUEBBr'   c           	        |d   | j                  |d   t        j                  |d         | j                  |d   t        j                  |d         | j                  |d   t        j                  |d         f}|-|d   | j                  |d   t        j                  |d         f}| j                  j	                  |||||||      S )z3
        See [Note: Inductor bucketize op]
        r   r   r        )r|  rZ   ra   rp  	bucketize)r5   rq   
boundariesboundary_indicesindexing_dtyperightsortersorter_indicess           r(   r  zCaptureIndexing.bucketize  s     qMOO1))&qM  
 OO1))&qM  
 OO1))&qM  

$ q	1I88fQi   F {{$$
 	
r'   c                :   | j                   j                  dd      }| j                   j                  |      | j                   j                  |<   t	        | j                   |g       | j                   j
                  |<   | j                  j                  d|||fi       S )zR
        Recursively capture the masked out body in another LoopBodyBlock
        Nmasked_subblockr{  )r   r  r9  r0   ri   rf   rM  rF  )r5   
mask_proxymasked_bodyother_proxyr   s        r(   r7  zCaptureIndexing.masked  s     yy&&t->?%)YY%?%?%E		T"$1$))["$M		D!{{''4*k!:B
 	
r'   c                    | j                   j                  |      }| j                   j                  |d      }| j                  j	                  d|||fi       t        fdt        t        |            D              S )Nr0  r{  c              3  (   K   | ]	  }|     y wr"   r&   r  s     r(   r   z'CaptureIndexing.scan.<locals>.<genexpr>  s     @1VAY@r  )r   r4  r  rM  rF  rp   r   rr   )r5   dtype_proxyr2  value_proxyr3  r   r#  s         @r(   r0  zCaptureIndexing.scan  sq     yy''
3yy&&tV4))+&	
 @c+.>(?@@@r'   c                    | j                   j                  ||||      t        fdt        t	        |            D              S )Nc              3  (   K   | ]	  }|     y wr"   r&   r  s     r(   r   z'CaptureIndexing.sort.<locals>.<genexpr>  s     ;1VAY;r  )rp  sortrp   r   rr   )r5   r1  rq   stable
descendingr#  s        @r(   r  zCaptureIndexing.sort  s7    !!&&&*E;c&k(:;;;r'   c                L    | j                   j                  |      }|d   |d   fS )Nr   r   )rp  frexp)r5   r  r#  s      r(   r  zCaptureIndexing.frexp  s)    "";/q	6!9%%r'   c                    | j                   j                  |      }| j                   j                  ||||      }| j                  j	                  d| j                   j                  |d|       |fi        |S )zx
        Flow data from tensors into indexing formulas.
        Introduce a call_module to update the indexing.
        r{  set_)r   r  r-  rM  rF  r  )r5   index_proxyr  r)  r*  r  r+  s          r(   r'  z!CaptureIndexing.indirect_indexing  sp     ii$$T*yy77T5(S  II##LD,?N		
 
r'   c                @    | j                   j                  dd|i        y )NrL  )rM  rF  )r5   r#  s     r(   rL  zCaptureIndexing.output  s      8VR@r'   )rr  rw  r   rc   rM  rI   )r   r>  r  rZ   r>   r   )r   r>  r?   r>  )r   rS   r   r>  )r   rS   r   r:  r"   r=  )rq   r   r  z.tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]r  r   r  ztorch.dtyper  r<  r  z Optional[tuple[str, sympy.Expr]]r  zOptional[T]r?   r   )r  rm  )r2  z=Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]])TT)r@   rA   rB   r   r-   r|  r~  r  r  r  rR  rQ  r  r  r  r7  r0  r  r  r'  rL  rF   rG   s   @r(   rI  rI  g  s    D  	
Q-
2;?4C 48&*/
/
 C/
 	/

 $/
 /
 1/
 $/
 
/
b	
A RA"<
&
 Ar'   rI  )8
__future__r   r   rD   r   ra  enumr   r   typingr   r   r   r	   r
   r  torch.fxr#   torch._dynamo.utilsr   torch.fx.proxyr   r   torch.utils._sympy.symbolr   rL   r   r   codegen.commonr   ops_handlerr   r   r   utilsr   r   r   r   virtualizedr   r   collections.abcr   r   r   r$   Interpreterr    rI   rR   rZ   rc   ri   rH  rI  r&   r'   r(   <module>r     s    "    	  D D   ( , * " 4 C C    2 CL0ehh** 08%* %* d h hVT Tn;~ ;nAn nAr'   