
    qi                   `   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZm Z  d dl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z) d dl*Z+d dl,Z+d dl-m.c m/Z0 d dl1m2Z2 d dl+m3Z3 d dl4m5Z5 d dl6m7Z7m8Z9m	Z:m.Z; d dl<m=Z= d dl>m?Z? d dl@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZI d dlJm8ZK d dlLmMZM d dlNmOZOmPZPmQZQmRZR d dlSmTZTmUZUmVZV d dlWmXZXmYZYmZZZm[Z[ d dl\m]Z] d dl^m_Z_m`Z` d dlambZbmcZcmdZdmeZemfZfmgZg d dlhmiZi d dljmkZkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZs d dltmuZu d dlvmwZw d d lxmyZy d d!lzm{Z{ d d"l,m|Z| d d#l}m~Z~mZ d d$lmZ d d%lmZ d d&lmZ d'd(lmZ d'd)lmZmZ d'd*lmZ d'd+lmZ d'd,lmZ d-d.lm8Z8mZmZ d-d/lmZmZ d-d0lmZ d-d1lmZ d-d2lmZ d-d3lmZ d-d4lmZmZ d-d5lmZ d-d6lmZ d-d7lmZmZ d-d8lmZ d-d9lmZ d-d:l.mZmZmZmZmZmZmZmZmZ d-d;lmZ er"d d<lmZmZmZ d d=lamZ d d>lmZ d d?lmZ d-d@lmZ  e$dA      Z edB      Zes e8j                         s	ddCZddDZnd dElmZmZ er1d dlZd dFlmZmZmZ e eee   gee+j                     f   eee   ef   Z G dG dHej                        Ze G dI dJ             ZddKZddLZ eի       Zeאj                  Zeאj                  Zeאj                  Z e	j                  e߫      Ze+j                  j                  edM      Ze+j                  j                  edN      Ze+j                  j                  edO      Ze+j                  j                  edP      Ze+j                  j                  edQ      ZddRZddSZddTZddUZ ej                  d      ddV       Zej                  ddW       ZddXZ	 	 	 	 	 	 	 	 ddYZ	 d	 	 	 	 	 dd[Z	 	 	 	 	 	 dd\Z	 d	 	 	 	 	 dd]Zddd^Z	 	 	 d	 	 	 	 	 	 	 	 	 dd`ZddaZ	 	 	 	 ddbZ	 	 	 	 	 	 ddcZ	 d	 	 	 	 	 	 	 dddZ	 d	 	 	 ddeZej                  ddf       Z G dg dhe&dZi      Z G dj dke%      Z	 	 	 	 	 	 	 	 ddlZ  edmn      	 	 	 	 	 	 	 	 ddo       Z G dp dq      Z G dr dse      Z G dt due      Z	 	 	 	 	 	 	 	 	 	 ddvZ	 	 	 	 	 	 ddwZ	 ddxdxdxdy	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddzZdd{Z	 	 	 	 	 	 	 	 dd|Z		 d	 	 	 	 	 	 	 dd}Z
e df	 	 	 	 	 	 	 	 	 dd~Z ed       Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZ	 	 	 	 	 	 	 	 ddZddZ ed_       G d d             ZddZe dZf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZe f	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZe dddZf	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 ddZddZ	 ddd	 	 	 	 	 	 	 	 	 ddZy)    )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)	dataclass)currentframe)count)
attrgetter)AnyOptionalTYPE_CHECKINGTypeVarUnion)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)get_interface_for_device)wrap_compiler_debug)	chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_metrics_contextlazy_format_graph_codeset_feature_use)r   )!unwrap_tensor_subclass_parameters)aot_export_moduleGraphOutputNamemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)CustomPartitionerFn)"create_mapping_pre_post_grad_nodessave_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)		BoxedBoolcount_tangentsfresh_cacheget_all_devices	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)FakeScriptObject)is_opaque_type)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )aot_autograd)ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   distributed_autotunemetrics)get_wrapper_codegen_for_deviceinit_backend_registration)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)Callable	GeneratorSequence)_StrideExprStr)
OpOverload)Weights)ExternKernelNode_P_Tc                "    t         j                  S N)dynamo_utilsidentityattrs    `/home/ubuntu/crypto_trading_bot/.venv/lib/python3.12/site-packages/torch/_inductor/compile_fx.pytime_and_logr      s    $$$    c                      y r    )argskwargss     r   log_optimus_to_scubar      s    r   )r   r   )FQNGraphInputNameGraphSignaturec                      e Zd ZdZdZdZy)FxCompileModer   rZ   rS   N)__name__
__module____qualname__NORMAL	SERIALIZE
SUBPROCESSr   r   r   r   r      s    F IJr   r   c                  ,    e Zd ZU ded<   ded<   ded<   y)FxCompileConfigr   modebool	use_asyncuse_progressiveNr   r   r   __annotations__r   r   r   r   r      s    
Or   r   c                    d} t         j                  j                  |       }|t        t        j
                  dd      S d}d}|j                         j                  d      rd}|dd  }|j                         j                  d      rd}|dd  }	 |j                         }t        t        |   ||      S # t        $ r dd l
} |j                  t              }|j                  d	|| d
j                  t        d t        j                   D                           t         j                  j#                  |        t        t        j
                  dd      cY S w xY w)NTORCHINDUCTOR_FX_COMPILE_MODEFzprogressive+T   zasync+   r   z>Invalid value of %s for %s. Expected one of %s. Using default.z, c              3  2   K   | ]  }t        |        y wr   )repr.0xs     r   	<genexpr>z+_fx_compile_mode_default.<locals>.<genexpr>   s     HT!WHs   )osenvirongetr   r   r   lower
startswithupperKeyErrorr   	getLoggerr   errorjoinsorted__members__pop)namevaluer   r   r   logs         r   _fx_compile_mode_defaultr      s)   *DJJNN4 E}}33UEBBIO{{}/bc
{{})	ab	C}U3YPP Cg)		LIIfHm.G.GHHI		
 	

t}33UEBBCs   #B4 4BEEc                     ddigS )Nmax_autotuneTr   r   r   r   _get_progression_configsr      s     
 r   
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsinductor_metricsc                    t         j                  j                  j                         }t	        t        |             }|r|j                  s|S |j                  j                  S r   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)	num_fixedcontextfixeds      r   get_static_input_idxsr      sM    
 mm**224Gy!"E'--333r   c                $   | j                   j                  d      d   }g }t        |j                  d   t        j
                  j                        s|j                  d   }n|j                  }|D ]  }t        |t        j
                  j                        rW|j                  j                  d      x}:t        |t        j                        r |j                  |j                                ~|j                  d         ||j                  d<   y )Noutputopr   valoriginal_output_strides)graph
find_nodes
isinstancer   r   r   Nodemetar   Tensorappendstride)gmrq   output_stridesoutput_node_argsr   r   s         r   record_original_output_stridesr     s    ((%%%215KNk&&q)588==9&++A.&++" 	(vuxx}}-..;3-!!#**,/ !!$'	( 3AK./r   c                    | j                   j                  dt        j                  j                  j
                        D ]0  }t        | |j                  d   j                        }t        |       2 t        |        y )Ncall_functionr   targetr   )r   r   r   opshigher_orderinvoke_subgraphgetattrr   r   )_recursive_record_original_output_stridesr   )r   nodesubgraphs      r   r   r     sh    ##599#9#9#I#I $  < 2tyy|2231(;	< #2&r   c           	        | j                   j                  dt        j                  j                  j
                        D ]  }t        | |j                  d   j                        }|j                   j                  d      D ]r  }t        t        |j                  d               D cg c]8  }t        |j                  d   |   t        j                  j                        r|: c}|j                  d<   t t        |        y c c}w )Nr   r   r   r   r   user_visible_output_idxs)r   r   r   r   r   r   r   r   r   r   lenr   r   r   r   *_recursive_record_user_visible_output_idxs)r   r   r   idxs       r   r   r   %  s    ##599#9#9#I#I $  = 2tyy|223NN---: 	D !TYYq\!235diil3/? 5DII01	 	38<=5s   (=Dc                 4    t        j                  t              S r   )dynamo_loggingget_step_loggerr   r   r   r   _step_loggerr   5  s    ))#..r   c                    t         j                  j                         rgt         j                  j                  j                  j
                  s8t         j                  j                         dk\  rt        j                  d       y y y y )N)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmul
allow_tf32get_device_capabilitywarningswarnr   r   r   _warn_tf32_disabledr   :  sc     	

!##**55JJ,,.&8d	
 9 6 	"r   c           
        t        | j                  d      D cg c]  \  }}|	 c}}      j                  t        | j                  d      D cg c]  \  }}|	 c}}             dfd}|j                  j
                  D ]F  }|j                  dk(  s|j                  }|j                  d      s|j                  d      sCt        | |      sP t        |      |      } t        |      |       }t        |t              r)t        |t              rb|j                  |j                  u rJ|j                  |j                  k(  r0|j                  |j                  k(  rt!        j"                  ||      r|j                  d      rdnd}	 ||j                  |	      }
|	 |
 }||_        t%        |||       j'                  |       I yc c}}w c c}}w )	a  
    In aot_export_module (make_fx), we create get_attr nodes with name prefix
    "_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
    torch/fx/_symbolic_trace.py

    However, this might result in name collision if the original mod already
    has a different buffer with the same name.

    We resolve this potential name collision here by changing the target name
    with a new number post fix.
    Fremove_duplicatec                .   d}| j                   D ]  }|j                  dk(  s|j                  j                  |      s/t	        |j                        t	        |      kD  sQ|j                  j                  |      d   }|j                         st        |t        |            } D ]f  }|j                  |      st	        |      t	        |      kD  s-|j                  |      d   }|j                         sRt        |t        |            }h |dz   S )Nr   get_attrrZ   )	nodesr   r   r   r   splitisdigitmaxint)r   prefixir   post_fixkeyexisting_keyss         r   find_smallest_iz0_resolve_name_collision.<locals>.find_smallest_i[  s    KK 	2Dww*$)?)?)Gt{{#c&k1#{{008<H'')3x=1	2 ! 	2C~~f%s8c&k)"yy04H'')3x=1	2 1ur   r  _tensor_constant_torchbind_objN)r   zfx.Graphr  strreturnr
  )rR   named_parametersupdatenamed_buffersr   r  r   r   r   hasattrr   r   rI   real_objdevicedtyper   equalsetattradd)modr   r   r   r  r   target_name	gm_targetmodel_targetr  new_idnew_target_namer  s               @r   _resolve_name_collisionr%  G  s    "33U3KL)$LM #*;*;U*;*STYT3DTU   $/77j ++K))"!,,-=>3,/
;/3I2:k237L)%56|-=>!**l.C.CC  L$7$77OO|'9'99KK	<8  ))*<= #% 
 %RXXv6F!'1O)DKB3o.I$/- 	M Us   G
Gc                   ddl m}m} t        | |       i }| j	                  d      D ]   \  }}|||<    |||||j
                         " | j                  d      D ]   \  }}|||<    |||||j                         " |j                  j                  d      }	g }
|	D ]  }|j                  }||j                  v r!|j                  |   }|
j                  |       >||j                  v rE|j                  |   }|
j                  |       t        ||         |j                  t!        |      <   ||j"                  v sJ |
j                  d         ddlm} t)        |j                  j+                         j,                  d         }g }|j.                  }|j0                  }|j2                  }t5        |      D ]f  \  }}d }|t7        |      t7        |      z   t7        |      z   k  r(t9        |j                        }||v r||   }n	||v r||   }|j                  |       h  |||
|t;        j<                         d       }t?        |jA                               |j                  d	<   |S )
Nr   )_assign_attr	_AttrKindFr  )	attr_kindplaceholderr   )_unliftmutated_named_buffers)!torch.export.unflattenr'  r(  r%  r  	PARAMETERr  BUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersrl   r   rn   user_inputstorch.export._unliftr+  tuplerq   r   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumerater   r,   pytreetreespec_leafrR   values)r  r   graph_signaturer'  r(  
state_dictr   parambufferplaceholder_nodeslifted_inputsr   	node_nameparameter_namebuffer_namer+  outputsmutated_outputsbuffer_mutationsuser_input_mutationsr7  r   outr   unlifted_gms                            r   _unlift_graphrK    s    ?C$OQJ++U+C 
e 
4))		

 ))5)A 
f!
4&&		

 ++}+=)+M " 'II	<<<,AA)LN  0/;;;);;IFK  -&z+'>? GG4[AB  ; ;;;;  &' -).rxx/C/C/E/J/J1/M)NGO&88*@@#11Mg& 
&S6:%&-A)BBSEWWW"388,D''(.--,T2u%
& 
K 1;;K;R;R;T0UK,-r   Fc              #    K   t        d | j                  j                  d      D              }t               }| j                         D ]@  \  }}||v st	        |t
        j                  j                        s0|j                  |       B |rl| j                  j                  dt
        j                  j                  j                        D ]*  }|j                  |j                  d   j                         , |E d {    y 7 w)Nc              3  4   K   | ]  }|j                     y wr   )r   r   s     r   r   z&_get_subgraph_names.<locals>.<genexpr>  s      55s   r  r   r   r   r   )rR   r   r   named_childrenr   r   r   rM   r  r   r   r   discardr   r   )r   skip_invoke_subgraphall_subgraph_namesfx_subgraph_names
child_namechild_moduler   s          r   _get_subgraph_namesrU    s      +5 5((---<5 + *4$&$5$5$7 . 
L ++
%((..1
 !!*-. HH''uyy'='='M'M ( 
 	;D %%diil&9&9:	;
 !  s   AD$D9BD?D Dc                F   t        ddd      5  t        j                  s| cd d d        S t        j                  }t        j                  }t        |       D ]'  }t        | |      }t        |d      }t        | ||       ) t        | |||      cd d d        S # 1 sw Y   y xY w)N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr   )
r%   r   use_pre_grad_passesadd_pre_grad_passesremove_pre_grad_passesrU  r   rW  r  re   )r   example_inputs
add_passesremove_passessubgraph_namer   new_subgraphs          r   rW  rW    s     
$"!8
 N
 ))N N //
5504 	5Mr=1H5hCLB|4		5
 r>:}MN N Ns   BA"BB c                    t        ddd      5  t        j                  s
	 d d d        y t        | |      D ]  }t	        | |      }t        ||        t        |        d d d        y # 1 sw Y   y xY w)N_recursive_joint_graph_passesTjoint_graph_pass_time_usrY  )r%   r   use_joint_graph_passesrU  r   re  rb   )r   rP  rb  r   s       r   re  re    s~     
'"!;
 
 ,,  15IJ 	JMr=1H)(4HI	J 	2#     A'4A''A0c                    t        ddd      5  t        j                  s
	 d d d        y t        |       D ]  }t	        | |      }t        ||        t        | |       d d d        y # 1 sw Y   y xY w)N_recursive_post_grad_passesTpost_grad_pass_time_usrY  )r%   r   use_post_grad_passesrU  r   rj  rc   )r   is_inferencerb  r   s       r   rj  rj  '  sz    	%"!9
 +
 **+ + 14 	@Mr=1H',?	@ 	\*+ + +rh  Tc                f   ddl m}m}m}m}m}  || |||      }	| |	       nd}
t        t        |	j                  j                        d   j                  d         D ci c]  \  }}|j                  | }}}g }g }i }| j                  j                  D ]V  }|j                  |v r|j                  |       #|j                  |   |k(  s6|j                  dk7  sF|j                  |       X |D ]B  }d|j                  z   } || |||
||j                        nd|       ||j                     ||<   D |ddd   D ]X  }|j                  r/|j                  D ]  }|j                  |   |k(  rJ d| d        >| j                  j!                  |       Z | j#                          |	|fS c c}}w )	a  
    This function takes an GraphModule input "gm".
    The gm will be split into 2 components,
      1) const_gm, which consists the subgraph of gm that can be constant folded.
      2) gm (being inplace modified,) which returns the graph after constant folding.

    If an additional "lifted_constants" argument is passed in, we will assume the gm has
    been lifted and run the transformation accordingly.

    When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
    the nodes for which the callback returns True.

    const_output_index is a mapping of corresponding node name from gm to the
    output index of const_gm.
    Returns (const_gm, const_output_index)
    r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNr  r*  _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingro  rp  rq  rr  rs  r8  r4  r   r  r   r   r   r   r   users
erase_node	recompile)r   skip_constructorlifted_constant_namesskip_folding_node_fnro  rp  rq  rr  rs  const_gmconst_resultr   r   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                       r   split_const_gmr  6  s   ,  *
35IH "7!>8:DL #,E(..2F2F,G,K,P,PQR,S"TQM  MO '99%""4(YYx $44M9Q  &	'   F)DII5" )0 ]49956		
 .;499-E>*F dd# &::ZZ Wvvh':5VvEU7VV5W HH%& LLN'''Es    F-c                Z   t         j                  j                  }t        |j                  j
                  |j                  j
                  |j                  j
                  |j                  j
                  g      }|D ]  }| j                  j                  d|      D ]  }t        |j                  j                  dd       t         j                        s8|j                  d   j                  t         j                   k(  sc|j                  d   j"                  j$                  dk(  s  y  y)Nr   r   r   r   TF)r   r   atenrR   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   r  float32r  type)r   r  tf32_opsr   r   s        r   is_tf32_warning_applicabler  ~  s    99>>DGGOOJJHHLL  		
H  HH''?6'J 	D499==5u||DIIe$**emm;IIe$++00F:	 r   c                r   t        d | D              }t        j                  r=t        j                  r-|s+t        j                  d       t        j                  d      S t        j                  j                  r+t        j                  d       t        j                  d      S t        j                         S )z
    For CPU backend, enable comprehensive padding causes some unit tests
    fail due to changing number of generated kernels. Skip for now.
    c              3     K   | ]>  }t        |t        j                        st        |j                  j
                         @ y wr   )r   r   r   rE   r  r  )r   ts     r   r   z6maybe_disable_comprehensive_padding.<locals>.<genexpr>  s/      "#Au||9Tqxx}}s
   A$Az!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpur  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)r_  has_gpus     r   #maybe_disable_comprehensive_paddingr    s      '5 G !!f&B&B7>?||%88				9	9I	
 ||%88%%''r   c                ^    | s|rt        j                  d      S t        j                         S )zH
    graph partition does not support cpp_wrapper and aot_mode yet.
    F)graph_partition)r   r  r  r  )cpp_wrapperaot_modes     r   maybe_disable_graph_partitionr    s'     h||E22%%''r   c                   t               5  t        |      }|s;t        j                  j	                  d      } t        | |      j                  |  n\|st        j                         n t        j                  j                  |dd      }|5   t        | |      j                  |  ddd       ddd       |S # 1 sw Y   xY w# 1 sw Y   S xY w)z}
    If we can not detect fake mode from the context of inputs, create one.

    The created fake mode will be returned.
    Tallow_non_fake_inputs)r   r  N)r   r$   r   _subclassesFakeTensorModerP   	propagater  r  r   r  objectpropagate_dont_convert_inputs)r   r_  force_allow_non_fake_inputs	fake_modectxs        r   fake_tensor_propr    s     
"	# $^4	))88t8TI8N2I.88.I 3 &&(ZZ&&y2I4P 
  Pr	2PP#     s$   BCB:(C:C	?CCc                    t        j                  |       5  t        j                         cd d d        S # 1 sw Y   y xY wr   )r   r  get_config_copy)config_patchess    r   get_patched_config_dictr    s1     
n	% (%%'( ( (s   4=c               #     K   t         j                  r#t        t               d      5  d  d d d        y d  y # 1 sw Y   y xY ww)NF)dirdelete)r   force_disable_cachesrB   r?   r   r   r   with_fresh_cache_if_configr    s>     "" Y[7 		 	 		 	s   &A;AA Ac                  |    e Zd ZU ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   ded<   y)_CompileFxKwargszOptional[BoxedBool]
cudagraphsSequence[int]static_input_idxsr   is_backwardzOptional[int]graph_idr  r  rm  zOptional[bool]
layout_optz1Optional[Callable[[list[ExternKernelNode]], Any]]extern_node_serializerzOptional[BoxedDeviceIndex]boxed_forward_device_index
fx_wrapperNr   r   r   r   r  r    sC    ##$$NMM ::r   r  )totalc                  $    e Zd Z	 	 	 	 	 	 	 	 ddZy)_CompileFxCallablec                     y r   r   )selfr   r_  r   s       r   __call__z_CompileFxCallable.__call__  s    
 r   Nr   rM   r_  Sequence[InputType]r   Unpack[_CompileFxKwargs]r  r>   )r   r   r   r  r   r   r   r  r    s-     , +	
 
r   r  c                   |j                  dd        |j                  dd       |j                  dd       |j                  dd        |j                  dd       |j                  dd       |j                  d	d       |j                  d
d        |j                  dd        |j                  dd        t        j                         5 }|j                  t        j
                  j                  j                                |j                  t        t        j                               |j                  t        j                  dddddd             |j                  t                      |j                  t                      t        j                   d|d           t#        t$        d      | |fi |cd d d        S # 1 sw Y   y xY w)Nr  r  r   r  Fr  r  r  rm  r  r  r  compile_fx_innerinductor_compileTcompile_inductor#inductor_cumulative_compile_time_us)
phase_namerZ  log_waitcounterwaitcounter_name_overrider[  )r  inductor)compiler_name)
setdefaultr  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrW   dynamo_configuse_lazy_graph_moduler   r%   r  r_   r"   pt2_compiler    _compile_fx_inner)r   r_  r   stacks       r   r  r    s   
 lD)
)2.
mU+
j$'
mU+
lE*
ne,
2D9
lD)
.5 
			 
5EKK88OOQR2=3V3VWX%%"-&* $*<)N		
 	689LN+&&}-	
 P"#4JO
 
'
 
 
s   	C.GG
zcompilation time (in seconds)r   c                ~  '( t         j                  }t        j                  j                  j
                  j                          t        j                  | j                        dk(  r|s~ddl
m} ddlm} |j                  |        t        j                  j                   j#                         }t%        j&                  dd|i|j(                         t+        | j,                        S |j/                  dd      }t0        j3                  d	|       t5        ||      }t7        t9        t;        t=        | j                  j>                                    j@                  d   tB        tD        f      sJ d
| j                          |jG                  d      &tI        tJ        jL                  jN                        |d<   tJ        jP                  rtS        | |fi | tU        jT                         }	tW               }
tY                t[        d d t]        |       D        D              }t_        ddd      5  tJ        j`                   xrD tJ        jb                  xs |
xr. | xr) |xr% t        jd                  jJ                  jf                   }tJ        jb                  }|
}ti        d|       tj        j3                  d||||tJ        j`                         tm        |      D ]L  \  }}t7        |t        jn                        s!tq        |jr                  jt                        sA||v sFd|_;        N d}d}d'd}ty        |       }tU        jz                         }|rt}        j~                  | ||||      \  }'|v|\  }}tj        j3                  d|       |r)t}        j                         }tj        j3                  d       t}        j                  ||||||jG                  dd      |      \  }'ntj        j3                  d       t        jd                  jJ                  jf                  rn|J 'J t        j                          	 t        | ||fi |}|J t        j                         \  }}|j                  |       	 t        j                          n''d   dk(  r?|J tj        j3                  d''jG                  dd      nd       	 t        | ||fi |}nB'd   d k(  r|J |J tj        j3                  d!       t        j                          	 t        | ||fi |}|J tU        jz                         |z
  |_O        |\  }}||_P        ||_Q        t        j                         \  }}|j                  |       	 t        j                          |t        |      'd"<   |j                  'd#<   tj        j3                  d$|       t}        j                  |||||       n;'d   d%k(  sJ |J |J |\  }}tj        j3                  d&|       ||_P        ||_Q        |J |}''d   nd'(t%        j                  d(( 'xs i |)       t%        j                  d*(|'r'jG                  d+      nd'r'jG                  d,      nd'r'jG                  d      nd-||.       't        d/(fd0'fd12       |j                  |||       ddd       tj        j3                  d3tU        jT                         |	z
         tj        j                  t        j                        rg }t        d4   j                         D ]  \  }}|j                  d5      }t        |      d6k  r|j                  |d7d8d8d8|g       =t        |      d9k\  rd5j                  |dd:       nd5j                  |dd;       } | j                  d<      }!|!rDt        |      d9k\  r6|d:d \  }"}#}$}%d5j                  |dd:       } |j                  | |"|#|$|%|g       |d;d \  }#}$}%d5j                  |dd;       } |j                  | d7|#|$|%|g        tj        j                  d=       tj        j                  d>j                  d?d@dAdBdCdD             tj        j                  dE       |D ]9  }&tj        j                   d>j                  |&        tj        j                  dE       ; t        j                  j                  j
                  j                           t               t        j                  dF|d   rdGndH dI|dJ           S # t        t        f$ r  t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        j                          w xY w# t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        t        f$ r  t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        j                          w xY w# 1 sw Y   sxY w)Kz
    Inductor API that compiles a single graph.

    If you change the argument list for this function, make sure you
    also update the call to save_args_for_compile_fx_inner below accordingly.
    r   )CompileEventLogLevel)_LazyGraphModulezbackward no-op
compile_id)metadata	log_levelr  r   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nc              3  8   K   | ]  }||j                     y wr   )supports_caching)r   backends     r   r   z$_compile_fx_inner.<locals>.<genexpr>f  s&      	#  	  	#s   c              3     K   | ]7  }t        |j                  t        j                  t        j                         9 y wr   )r]   r  r   r  r  r   r  s     r   r   z$_compile_fx_inner.<locals>.<genexpr>h  s6      
  +V//1B1B
s   =?fx_codegen_and_compileT)rZ  r  fx_cachezXFX cache status: use_cache=%s, local=%s, remote=%s, aot_mode=%s, force_disable_caches=%szFX cache key generated: %szUsing remote FX cacher  F)r  	constantszFailed to generate FX cache keycache_statebypasszFX cache bypass reason: %scache_bypass_reasonunknownz*FX cache disabled or key generation failedmissz,FX cache miss, compiling and saving to cachetriton_bundler_metatime_taken_nsz.Saving compiled graph to FX cache with key: %shitzFX cache hit with key: %sdisabledfx_graph_cache_)r  time_nsr  r  
componentszcache not enabled)r  cache_event_timer  r  r  remote_cache_enabledlocal_cache_enabledartifactc                     d  ddS )Nr  jsonr   encodingr   )r  s   r   <lambda>z#_compile_fx_inner.<locals>.<lambda>@  s    -k]; &% r   c                 .    t        j                         S r   )r   dumps)
cache_infos   r   r  z#_compile_fx_inner.<locals>.<lambda>D  s    4::j#9 r   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsaten_mm_info_   -?   )r  r  z$Overview info of inductor aten mms: z3{:<30} | {:<20} | {:<20} | {:<20} | {:<20} | {:<20}NameBMNKCountz----------------------------------------------------------------------------------------------------------------------------------ztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )ert   aot_compilationr   	_inductorasync_compileCompiledTritonKernelscache_clearr   count_callsr   torch._dynamo.utilsr  torch.fx._lazy_graph_moduler  force_recompiler   CompileContextcurrent_compile_idr"   log_instant_eventPT2_COMPILEr-   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr  r   r4  r   r   r@   r   tritonr  	save_argsr8   timerG   r^   allrC   r%   r  fx_graph_cache
_functorchbundled_autograd_cacher)   r   r8  r   rE   r  r  _is_inductor_staticr;   r  r0   prepare_keyget_remote_cacheload_with_keyrj   begin_compiler  collectset_triton_bundlerU   rV   	Exceptionra   r	   with_traceback__traceback__end_compile_time_taken_ns_fx_graph_cache_key_fx_graph_cache_debug_linesr  _save_graphinstantr  rK   post_compileisEnabledForr   INFOr#   itemsr  r   r   r   endswithr  formatr   ))r   r_  graph_kwargsr  r  r  r  r  inputs_to_checkstartfx_graph_remote_cachebackends_support_caching	use_cachelocalremoter  inputmb_compiled_graphkey_inforemote_cacher  
start_timer  debug_linestriton_bundler  e	cache_keycompiled_graphmm_table_datar   partsr   
is_batchedbatchmr  krowr  r  s)                                          @@r   r  r  -  s(
    &&H 
OO!!77CCE)Q.x 	=@((,]]11DDF
,,"J/*66	
 rzz**'3'>'>?RTV'WDFWX-n>OPOd4 89:??BUDMR 
QRTRZRZQ[\R %-%.v}}/G/G%H\"&	
 	
 IIKE<> " 	#
 *"-	
	# 	  
 d
 UM +++ C&&?*?CC )C $$++BBB 	 %%&
I.		f''	
 ".1 	1HAu5%,,/5<<,,-**,0)	1 37
226	 \\^
%1%=%=NL/6&"Xz
 ##+ [		6<#/#@#@#BLII560<0J0J"  , 0 0 F'1-!: 		;<""99$,,,%%%
 ''),$:%;G%! )444 "))+!'!33MB ))+
 :m#<#H$,,,II, "- NN#8)DE$:%;G%! &&0$,,,'''IIDE''),$:%;G%! )44437<<>J3N!0)1&	;8A!5@K!= "))+!'!33MB ))+".478K4L
01*;*J*JJ'IIF	R$$! m,555$000''''/$YII19=4=1<G9 ,,,* *4)?J}%Z 	 	""k]+%2	
 	&&#')3
u%7Az~~l3t  45(!' %	
  ! : 	##NI|LkUMn II5tyy{U7JK %">288: 	BJCIIcNE5zA~$$c3S#u%EF ,/u:?388E#2J'sPR@TD'9:Jc%jAo!&rsq!Qxxcr
+$$dE1aE%BC  *1axxcr
+$$dCAq%%@A)	B, 	78AHHS#sG	

 	  	 CHHQJQQSVWXHHY	  
OO!!77CCELN'&}5;:
F Gj)*	, i %i0  #A|~6EEOO
 ))+&  #A|~6EEOO2 %i0  #A|~6EEOO
 ))+]UM UMs   -C	h27h2h2Dh28:d%3Ah2f;h2A)g6Eh2%e0=.e++e00e33f		h2	g.ggh2h#.hhhh//h22h<c                  $    e Zd ZU dZded<   ddZy)_FxCompileStatr   r
  codegen_and_compilec                     d| j                    S )Nzcodegen_and_compile: )rg  )r  s    r   __repr__z_FxCompileStat.__repr__  s    &t'?'?&@AAr   N)r  r  )r   r   r   rg  r   ri  r   r   r   rf  rf  |  s      Br   rf  c                  d    e Zd ZU dZ ee      Zded<   e	 	 	 	 	 	 	 	 	 	 dd       Z	e
dd       Zy)		FxCompileza
    An FxCompile represents a mechanism that can turn a GraphModule into an
    OutputCode.
    z%dict[type[FxCompile], _FxCompileStat]_compile_statsc                     y r   r   )r  r   r_  rM  rL  s        r   rg  zFxCompile.codegen_and_compile  s     r   c                8    | j                   j                          y r   )rl  clear)clss    r   _reset_statszFxCompile._reset_stats  s      "r   N
r   rM   r_  r  rM  r  rL  r  r  r>   r  None)r   r   r   __doc__r   rf  rl  r   r   rg  classmethodrq  r   r   r   rk  rk    sr     =H<WN9W
  , '	
 ' 
  # #r   rk  c                  2    e Zd Ze	 	 	 	 	 	 	 	 	 	 dd       Zy)_InProcessFxCompilec                   34567 d|v r|d   J |d   }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }	|j                  d	d      }
t        j                  }|j                  d
d      }|j                  dd      }t        d      j	                         5  t        j                         5  t        j                  x},ddl	}t        j                  d|        |j                  |       t              r
t                t        d   j!                         }t#        j$                  t'        t#        j(                         d              t+               t,        j.                  d|rdnd d|        t1        j2                         }t4        j6                  j8                  j:                  j=                  ||dd       |j?                         7tA        dd 7fd       t        jB                  jE                  |       jF                  }|tI        |      }tK               tM        dd      5  t5        jN                         5  tQ        |      }ddd       ddd       tS               tA        dd fd       t        jT                        5  tW              }|5  tY        |       ddd       t        jB                  j[                  |       t\        jC                  d t_        d!ddd"             ja                  dddd#      4tA        dd$ 4fd%       t        jb                  jd                  dk7  r~t4        jf                  jh                  jk                  jl                        }to        t4        jp                  jB                  jr                  |      t4        jp                  jB                  _:        tw               }|jy                         r.t        d&   j{                         }t}        j~                  d|'       t        j                         r 	 t        d(t        t                     i)       ddd       t        jT                  |      5  t        |      5  t        |	|      5  d}d}d}d}|rt        j                  j                  rt        d+ ,      \  }}t        |g |||	||||d|
-      }t        j                  |      5  t        j                  g       5  |	sJ d.       |j                          |j                         \  }}ddd       ddd       t        ||||	||||||r|j                  nd|r|j                  nd|||
/      }t        j                         }|j                          t        j                  |      5  t        j                  g       5  t        j                         5   |j                  |  g }|j                  t               6|j                  D ]  } t        | t              rq| j                         rat        t        | j                                     dk(  r<|j                  t        6fd0| j                         j                  D                     |j                  d        t        |       d}!tM        d1d      5  |j                  r_|j                  rS|j                  rJ |j                         d   j                  }"t        jB                  d2|"ja                  d3             n|j                  rOd4d5lhmi}# |j                  sJ d.       |j                         \  }$}%t        jB                  d6|$j                         |%j                  r t        jB                  d7|%j                         d}&t        j                  r5|j                  t        j                        }&t        jB                  d8|&       tM        d9d      5  |#j                  ||$j                  |%j                  |&|j                  g t        j                  |j                  j                  |r|j                  j                  ng z         :      }"ddd       n)|j                         }'|'j                  }"t        |'d;d      }!ddd       d5d3t        jb                  jd                  dk7  rt        j                  t4        jp                  jB                  j                               5t        j                  t4        jp                  jB                  j                        3tA        dd< 5fd=       tA        dd> 3fd?       3r,tw               }|jy                         r|j                  d@3       d}(t        j                  t,        j.                        rz|j                         \  })}*}(t        xj                  |)z  c_}        t        xj                  |(z  c_~        t        xj                  |*z  c_        t        j                  dA|)|*|(dB       t        j                  r>|j                         \  }+}+}(t4        jp                  jB                  j                  |(       t4        jp                  jB                  j                  |j                  j
                         |rMt        j                  j                  r0t        j                  st        jl                  j                  st5        jp                  j                  j                  | rd},jl                  j
                  D ]  }-|-j                  j                  dCd      }.|-j                  dDk(  sFt        |.t4        j                        r+t4        jp                  j                  j                  |.      sw|-j                  j                  dEd      x},s n dF}/|,r	|/ dG|, dH}/n|/ dH}/|/t        jl                  _        |rt        j                  sut        jl                  j                  sZt              }0|0rLdI|0j                    }/|0j                  j                  dEd      x},r|/ dG|, dH}/|/t        jl                  _        t        j                  rt        "t        t"        t4        jf                  j$                  f      sJ t'        |"             t)        |"|j                  J      cddd       cddd       cddd       cddd       cddd       cddd       cddd       cddd       S |rUt        jl                  j                  s:ddKlm}1  |1t        jl                  j.                        t        jl                  _        | j0                  t'        |          xj2                  d4z  c_        t4        jp                  jB                  j4                  rt4        jp                  jB                  j6                  ot        t4        j8                  j:                  j=                               }2|j                  d      }|(|2t4        jp                  jB                  j6                  |<   t?        "||t        jl                  j                  |jA                         t        d   |z
  |||||74|!53      cddd       cddd       cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   yxY w# t        $ r t        j                  d*       Y w xY w# 1 sw Y   !xY w# 1 sw Y   
TxY w# 1 sw Y   
YxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)LzS
        Generates the OutputCode from the GraphModule and example_inputs.
        r  Nr  r   r  Fr  r  r  rm  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r  r  r  )save_dirr  c                     dddS )Nfx_graph_runnablestringr  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (% r   c                      S r   r   )runnable_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    #5 r   r  additional_fake_tensor_propTrZ  c                     dddS )Nbefore_post_grad_graphr}  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    4 (% r   c                 ,     j                  ddd      S NFTprint_outputinclude_strideinclude_deviceprint_readabler   s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    2#4#4!&tD $5 $ r   rm  %szAFTER POST GRADr  r  colored)r  r  r  fast_sympy_printc                     dddS )Ninductor_post_grad_graphr}  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>9  s     :$,) r   c                      S r   r   )inductor_post_grad_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>=  s    'C r   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configsc                    | j                   dk(  xrc t        | j                  t              xrG | j                  j	                  d      xs* t        | j
                  j                  dd       t              S )Nr  r  r   )r   r   r   r  r   r   r   rI   )r   s    r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>k  s_    $''Z:O ;&t{{C8; !KK223CD X)$))--t*DFVW	 r   )r{  )
r_  	shape_envr  r  r  r  rm  r  is_const_graphr  z"AOT mode only supports C++ wrapper)r_  r  r  r  r  r  rm  r  r  const_wrapper_codeconst_kernel_codeconst_modulerM  r  c              3  @   K   | ]  }j                  |        y wr   )doprint)r   sps     r   r   z:_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>  s     )X1!))A,)Xs   zGraphLowering.compile_to_fnzOutput graph module: 
%s)r  rZ   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesrunnerc                     dddS )N*inductor_provenance_tracking_node_mappingsr   r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    (T,21 r   c                      S r   r   )r  s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    /Y r   c                     dddS )N0inductor_provenance_tracking_kernel_stack_tracesr   r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    (Z,21 r   c                      S r   r   )inductor_kernel_stack_trace_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    /N r   inductor_provenancezGraph Metrics:
%s)num_bytes_accessednodes_num_elemnode_runtimesr   r*  stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op )filenamer  ) check_lowering_disable_cudagraph)r   rt   r  rQ   guardr   preserve_rng_stater   sleep_sec_TESTING_ONLYr1  r   warningsleepr  r   r#   copysyssetrecursionlimitr	  getrecursionlimitr   r   rH  ioStringIOr   _dynamorepro	after_aotsave_graph_reprogetvaluerK   r*  fx_graphr  rs   rd   r%   no_gradr  r   set_fake_modeget_cuda_device_contextrj  fx_graph_transformedpost_grad_graphs_logr(   r  traceprovenance_tracking_levelr   	tracebackget_graph_provenance_jsonr   r7   r  _pre_grad_graph_id _inductor_post_to_pre_grad_nodesr'   in_progressr  r"   compilation_metric	is_fbcoder   r  r  r=  r  r  r  r  r  rf   set_graph_handlerset_extern_kernel_nodesruncodegen_with_cpp_wrapperr   r\   CachedMetricsHelperfreeze_runtime_assertsr[   graph_contextgraph_outputsrO   r   rh   has_tensor_outputr   rN   
get_strider   r4  
get_layoutr   _check_triton_bf16_supportr  r  r  codegenr   r1   	codecacher  extern_kernel_nodesr  compiler  dictfromkeyswrapper_coder  compile_to_modulecallr   r   r  dump_inductor_provenance_info_inductor_kernel_stack_trace
add_to_setinductor_metrics_logrG  count_bytesr  r  r  r  log_tlparselog_runtime_and_tensor_metalog_collective_schedule	schedulerr  r/  cudagraph_skip_dynamic_graphsr  disable_cudagraphs_reasonr   any_is_symbolicr   r   r   ro   r   r   rM   r  r9   torch._inductor.cudagraph_utilsr  device_node_mappingrl  rg  RECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSr   r$  r%  r:   
get_deltas)8r  r   r_  rM  rL  r  r  r  r  r  r  r  rm  r  	sleep_secr1  inductor_countersfdr  r  cuda_contextprovenance_tracking_jsonmetrics_contextr  r  const_graphr  r  r|  r   metrics_helperr   rI  compiled_fn_runnercompiled_fnr  r  kernel_codeserialized_extern_kernel_nodescompiled_moduler  	num_bytesr  r  r  r   meta_valdisablemaybe_incompat_noder  r  r  r  r  r  r  s8    `                                                 @@@@@r   rg  z'_InProcessFxCompile.codegen_and_compile  s     |+\0J0VVV ,\ :
+7+;+;<OQS+T(,,]EB"."2"2:t"D(,,]EB'++L%@
**)--neD5t< 	
 JKQQSu	++-u	 $:::	GI9 

9%)"-#% ( 4 9 9 ; !!#c&;&;&=t"DELN*"-;:> ?!
$ BMM))::B
T ;  "$ 6 GGR0
 I 1.A	$ B-T E ]]_ E 0^ DIEE 6b9
 	 + ?A6r:! O/NO,,R@$***)'+'+ $	 02/@/@!&#'#'%)	 0A 0, !!  D <<99Q>**DDRXXN - ;!OO11DD4 OO))J #6"7"..0'/'>'D'D'F$&99"&9I ##%	A, -s3J3L/M+o?AD 	*U3NCU .k8DU
 &*""%)"$(! 3 3 P P 4B.40H0 #0 ')"+!)$/!)/E%1$/'+#-#K ++K811"5  +P,PP{#)'@@B >*,=  & $2'% +%+A!- +'94F*00D 4E)//$!,$3)-0 ")!<!<!> ,,.''.J--b1J )668J
 EII~.QSN**6 +,#(#6#6 <C *3 7$'$9$9$;$'(=cnn>N(O$PTU$U !/ 5 5$))X@P@W@W)X$X!" !/ 5 5d ;< /u5 *.&%5T > !>>e.>.>','8'88#8*/--/!*<*?*?K+11 ; + : : : N
 #^^B#(#4#4  D#4 9>8V8V8X5L++11 ;\=O=O  +00 / 5 5$={?P?P!" >B: 44$)$@$@AVAV$W !? !0 5 5$J$B!"
 ". 9QU" " />.E.E$)$0$6$6$/$5$5$B050A0A	6&)-,1,>,>,O,O 4? 1<0H0H0Y0Y57	-.**	6& /F /"	" ", /4.E.E.GO*9*>*>K18 /42.y>B BF>6:3||==BEIZZ!OO11OOQFB ;?**!OO11NN;7 )&) (Z )&) (O ;.A.CO.::< / : :$9$C!"
 %)M+88FCHCTCTCV@	>=22i?2-->-...@.,1106?2@1> )).3.?.?.A+1m--II-X OO))AA%//BWBWX #"MMGG & 6 6 ! A A!OO11AA>R&*$&HHNN 
&D'+yy}}UD'AH $= 8'1(ELL'I','<'<'L'LX'V (.2iimmM4.PP{P %
& #|&)0	k]"&MG)0	nG<C9
 # & 6 6 ! A A.STV.W+.(TUhUoUoTp&qG.A.F.F.J.J -t/  {   .5I\+b*Q@GAGG= (()' $(<(<=  -  ,	-  
  ,%0e>O>O aJ J J JWU U U UAu	 u	 u	B "!''*K*K = ! ; ; 9 ''T
3GG1LG --DD!OO11CCO%(!MM88KKM&
 $0#3#3J#?#/ !+ "OO11CCHM +#&99&113 ,/@@"&)$'*4*B7'mJ J J JWU U U UAu	 u	 u	ZE EE E8O Or % A $?@Ay?A ?AJ   V" "I> >AJ J J J J J J JWU U U U U U U U UAu	 u	 u	 u	 u	 u	s&  >AAFA@?},|4	9}:A@?;~ 	}	E*~ }!A@?>A@*
A@	A!A@ 8~-~;~A7A@ :+%C%
E~4A:~'	2~4;M <D	 	+)	A@ 2	A@	;	A@*	A@?	AA F+	4	+=	A@ 	A@		A@*	A@?!	AA4|>9}}A@?}~ }=	9~ <}=	=~  ~
A@?~~~$A@ '~1,~44~>9
	++"	A@ +40A@ 7	A@	@ A@	@A@	@	A@*@A@@A@*@!	A@?@*A@3@/A@?@6	AA@?AA	AAAAAANrr  )r   r   r   r   rg  r   r   r   rx  rx    sK    QQ ,Q '	Q
 'Q 
Q Qr   rx  c                   t         t        j                  k(  rt               }nIt         t        j                  k(  rddlm}  |       }n$t         t        j                  k(  rddlm	}  |       }t        r'ddlm} ddlm} t        |      sJ d        ||      }t        r=ddlm}	 ddlm} t        |      sJ d       t#               }
t               } |	|||
      }j%                  | |||      S )	NrZ   )_DebugSerdeFxCompile)_SubprocessFxCompile)_AsyncFxCompile)_OutOfProcessFxCompilez7async is only valid with an out-of-process compile mode)_ProgressiveFxCompilez=progressive is only valid with an out-of-process compile mode)fx_compile_moder   r   rx  r   compile_fx_extr  r   compile_fx_subprocr  fx_compile_asynccompile_fx_asyncr  r  r   fx_compile_progressiver  r   rg  )r   r_  rM  rL  schemer  r  r  r  r  progression_configsfast_schemes               r   r  r    s     -...$&	M33	38%'	M44	4<%'5: &"89 	
E	
9 !(;: &"89 	
K	
9 78 *+ '{F<OP %%b./<XXr   c                d   g }t        |       D ]  \  }}t        |t        j                        s!t	        |j
                  j                        sAt               5  ||v rt        |      r
	 ddd       et        |      s
	 ddd       z	 ddd       |j                  |        |S # 1 sw Y   xY w)z
    This function runs at compile time, and generates a list of indices for which we
    might need to do a copy to preserve alignment requirements.
    N)r8  r   r   r   rE   r  r  rp   rH   rF   r   )inputsr  ids_to_checkr  rT  s        r   r+  r+    s     Lf% 5%.ell''(02 	 %%*;E*B		 	
 /u5	 	
 6	 	A), 	 	s   B&3B&&B/	r   )r  placeholdersmutated_input_idxsc                    ddl m}	 t        j                  j                  rEt        j                  |	|||||||t        j                  j                  j                         	      nt        d d fd}
|
S )Nr   )cudagraphify_impl)device_indexstack_tracesr  rm  r  r   r!  r  c                ~    't        j                         5   |       d d d         |       S # 1 sw Y   xY wr   )r   r  )
new_inputsr  cudagraphify_fnmodelr  s    r   r  zcudagraphify.<locals>.run.  sH    002 T-eZARST:&&T Ts   3<)r'  r  r  r   )torch._inductor.cudagraph_treesr#  r   r/  cudagraph_trees	functoolspartialr   r   r$  r%  )r)  r  r$  r%  r  rm  r  r   r!  new_cudagraphify_implr  r  r(  s   ``         @@r   cudagraphifyr/    sr    
 }}$$#++!%%#%%1}}33FFH

 ,K' ' Jr   c                    t        j                  | j                         | j                         | j                  | j
                        S )z1
    Copy and input while preserving strides
    )r  r  )r   empty_stridedsizer   r  r  )r   s    r   static_inputr3  8  s/     qvvx177188TTr   c                V    t        | |      } t        ||      }| j                  |       y)z=Index into expanded dimensions of both dst and src then copy_N)r=   copy_)dstsrcexpanded_dimss      r   index_expanded_dims_and_copy_r9  ?  s'     c=
1C
c=
1CIIcNr   c                *  	
 t        |      }t        t        |            t        ||       t	        |t
              sJ t        |      D cg c]  \  }}|vrt        |      ng  c}}t        |      D cg c]@  \  }}t	        |t        j                        s|n|vrt        |      n|j                         B c}}t        t        |            D ]8  \  }\  }}t	        |t        j                        s$|vs)t        |   ||       : t        j                  j                          t        j                  j!                         }|j#                  t        j                  j%                                t        j                  j'                  |      5   | t                     ddd       |j                          t        j                  j%                         j#                  |       t        j                  j                          t        j                  j)                         
t        j                  j+                  
|d      5   | t                    ddd       t	        t
        t,        f      sft.        j0                  rd
fd}n1t3        t5                    D cg c]	  }|vs| c}	d	
fd}t7        ||t                     S c c}}w c c}}w # 1 sw Y   ExY w# 1 sw Y   xY wc c}w )zQ
    Assumes inputs[static_input_idxs[i]] are always the same memory address
    Nthread_local)streamcapture_error_modec                   t              t        |       k(  sJ t        t        |             D ]u  \  }\  }}}t        |t        j
                        s%t        |t        j
                        sJ |v r$|j                         |j                         k(  rgJ t        |||       w | j                          j                          	S r   )
r   r8  zipr   r   r   data_ptrr9  ro  replay)
r'  r   r6  r7  r8  r   inps_expanded_dimsr  static_inputsstatic_outputss
        r   r  zcudagraphify_impl.<locals>.run  s    }%Z8882;M:/AB3 K..c3 "#u||4!#u||444++<<>S\\^;;;
 2#sMJK LLN!!r   c                    D ]8  }|   }| |   }t        |t        j                        sJ t        |   ||       : | j	                          j                          S r   )r   r   r   r9  ro  rA  )	r'  r   r8  r7  copy_indicesr   rB  rC  rD  s	       r   r  zcudagraphify_impl.<locals>.run  si    # V 23 7 o!#u||444-mC.@#}U	V
 LLN!!r   )r'  list[InputType]r   Callable[[list[InputType]], Any])r+  rR   rr   rm   r   r   r8  r<   r   r   r3  detachr?  r9  r   synchronizeStreamwait_streamcurrent_streamr<  	CUDAGraphr   r4  r   size_assertsr   r   rk   )r)  r  r  check_input_idxsr   r   r8  r<  r  rF  r   rB  rC  rD  s     `      @@@@@r   r#  r#  J  s    /v7HI)3#F,=>* 6#34fd###  'C !$+< <!"D  '	 C a.  ++ a		M $-S9K-L#M Paa&36G+G)-*<aOP
 
JJZZ F
uzz0023			6	" #d=!"#
	JJ++F3	JJ JJ  "E			%>		R 4tM234ntUm4(*	" 	", !]!34
CT8TC
		" 		" (-=z|LL]	*# #4 48
s1   K+AK1"K7L;	LL7LLc                <   t        | t              sJ |        t        |        t        j                  |xs i       }|j                  dd      st        j                  sd|d<   |j                  dt        j                  j                        }|r|j                  d      r"J d       i |dt        | j                        i}dd	lm}  ||      }|j                  d
d       }| j                   j                  dd       }t"        j$                  j'                  |      }t)        j*                  d      5  t"        j$                  j-                  |      5  t/        ddd      5  t1               5  t3        | |t5        j6                  ||      |      }	t        |	t8              sJ |	j:                  cd d d        cd d d        cd d d        cd d d        S # 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr  FTr  zaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.rZ   )maybe_aoti_standalone_configr  dynamo_compile_idcompile_fx_aot)rZ  reset_event_log_on_exit)r  )inner_compiler  )r   rM   r*   r  deepcopyr   r   r  r  output_pathrJ  r/   coder   rR  r   r   r   r   r$  rt   set_aot_compilationcompile_contextr!   r'   
compile_fxr,  r-  r9   r  )
model_example_inputs_rV  r  rX  rR  r  saved_compile_idsaved_compile_contextcompiled_artifactss
             r   rT  rT    s    fk*2F2* &f- &*]]>3GR%HN|U3v7H7H(,}% $$"F$7$7$C$CK ''/ 	
R	
/

&	&++(>

 41.AN+//0H$O{{':DA!MM889IJ	d#+%%&;<+ 	"&$(	
+ 	+ (#++'= *
 ,l;;;!**-+ + + + + + + + + + + + + + +sa   2 HG=!G(,AG	.	G(7	G= 	HGG(	G=(G1-G=4	H=H	HHc                v   ddl m}m}	 t        |        t	        j
                  | d      }
|
rt        | |d        ||         |	|| |      \  }D cg c]  }||   	 }}t        |      }|j                  j                  ^ }}|j                  d   }t        |      D cg c],  \  }}t        |t        j                  j                        s+|. c}}|j                   d<   g }t        j"                  j$                  j'                         }dgd|,|j(                  J |j(                  }t+        dt-        |      dz
        t/        t0                  }|j2                  }|J d}t-        |      dkD  rg t5        t-        |            D ]I  }|vrd ||<   |dkD  r(||   ||dz
     k(  r|dz  }n|j7                  ||          j9                  |       K |j:                  J t5        t-        |j:                              D ]  }||vsd |j:                  |<    |j<                  r|j<                  j>                  }t@        jB                  jE                  |dd      5   ||||||d||
      d d d        tF        jH                  rS d
fd	}d|_%        |S c c}w c c}}w # 1 sw Y   8xY w)Nr   )%convert_conv_weights_to_channels_lastfreezeTr  r   rZ   r  )r  r  r  rm  r  r  c           
         D cg c]  }| |t        |         z
      }}| j                           |      S c c}w r   )minro  )r   r  args_newmax_offset_idxoptimized_functionpreserved_arg_indicesunwrapped_args_offsetss      r   wrapperz%fw_compiler_freezing.<locals>.wrapperK  sT     +
 +C>,BCCD
 
 	

!(++
s   <)r   zlist[object]r  zSequence[torch.Tensor])&torch._inductor.freezingrc  rd  re  rf   decide_layout_optr  r$   r   r  r   r8  r   r   r   r   r   r   r   r   params_flat_unwrap_subclassesr	  r   rR   r
  params_unwrapped_to_flat_indexr   r  r   params_flatr   r   r   r  r  rt   r  _boxed_call)aot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsrV  r  r  forward_devicerc  rd  r  	opt_modelindr  r  model_outputs_nodemodel_outputsr   r  r  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr  rl  rh  ri  rj  rk  s                              @@@@r   fw_compiler_freezingr    s    W ""45001CRVWJ+-?F-.@A'-($I$ >SSc,S1SS !34I '__22Q&++A.M#M2;QjEHHMM6R;67 $&mm22::<OSN"<<HHH,JJQ$6 7! ;<(23(9%(GG)))!"Q&%'"s-./ 	:A--(,"1%q5^A..Q2GG"a'N-11.2CD")).9	: **666s?6678 	6A5515++A.	6 && / ; ; P P			9&=t	D 

*/!'5!	


 	!!, , GNQ T;L

 

s   J$&,J)J)'J//J8c                     t         j                  j                  rt        t	        d             t         j                  j
                  t         j                  j
                  n	t               ddddS )Nzcpp wrapper enabledFT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLtztriton.cudagraphsztriton.store_cubin)r   r/  r  r4   r3   autotune_at_compile_timerY   r   r   r   get_cpp_wrapper_configr  X  sY    }}+'(=>	
 }}55A MM22$)""
 
r   c                B   t         j                  j                         st        j                         S t        d t        |       D              }t        |      dk(  r1t         j                  j                  t        t        |                  S t        j                         S )zX
    Returns a cuda device context manager if there is a single device in the graph
    c              3  @   K   | ]  }|j                   d k(  s|  yw)r   N)r  r  s     r   r   z*get_cuda_device_context.<locals>.<genexpr>r  s       8FKK64I8s   rZ   )r   r   r   r  r  rR   rC   r   r  r,  r-  )r   cuda_devicess     r   r  r  k  s     ::""$%%''-7 8,R08 .L |! 	

$tL123 ##%r   c                0   t        |       }|5  t        | d       d d d        |j                  dd       }t        j                  1t        j                  dd      5  t        | |fd|d|cd d d        S t        t        j                  t              sJ t        j                  t        j                  j                  j                  d      5  t        j                  | |fd|d|cd d d        S # 1 sw Y   xY w# 1 sw Y   y xY w# 1 sw Y   y xY w)NT)rP  static_lifetime_input_indicesr   r  r  )compilerr  )r  re  r   r   custom_partitioner_fnr   r%   r   r   r6   	__class__r   )r   joint_inputsr   r   r  s        r   partition_fnr  }  s5   
 +2.L	 E 	&btD	E :@':! ##+&&1
 		 7 $.K	
 		 		 &668KLLL&&((22;;"&
 
	 // $.K	
 	
	 
	/E E		 		
	 
	s#   C4D D4C= D	Dc                f    t        |       }t        j                  |j                   }t	        |      S r   )rq   r9  arg_tree_leavesr   r   )r)  rz  r{  s      r   get_num_model_outputsr    s/    $U+**,>,C,CDM}r   )frozenc                  ,    e Zd ZU ded<   ded<   ded<   y)CompilerConfigExtrar@   r  r
  r  r2   rw  Nr   r   r   r   r  r    s    M$$r   r  c                    t        | j                  j                        }t        t              }t        d       }t        |||      S )N)r  r  rw  )r@   r/  r  r,  _graph_counterr2   r  )r   r  r  rw  s       r   create_compiler_config_extrar    sF    
 6==334J N#H &d+N% r   c           	     `    |r/t        dd  fd       t                t        dd  fd       t        j                  j                  j                  |t        |            }t               }t        j                  rt        j                  |j                   }	t        |	      }
t        j                  j                  j                         }|%|j                   r|s|j                   j"                  }nd}||
k  sJ ||z   }||
k  sJ t%        ||      D cg c]+  }t'        |	|   t        j(                  j*                        r|- c}|j,                  d<   ng |j,                  d<   t/                 | |t1        |      |j2                  |j4                  ||j6                  	      S c c}w )
a#  
    Compile the forward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        num_orig_model_outputs: The number of model outputs from the original dynamo graph.
        num_example_inputs: The number of example inputs from the original dynamo graph.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
        is_inference: Whether this is an inference graph.
    r  c                     dddS )Nbefore_joint_graphr}  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>  s    ,$! r   c                 ,     j                  ddd      S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>      r00"4  1   r   r  c                     dddS )Nafter_joint_graphr}  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>  s    +$! r   c                 ,     j                  ddd      S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>  r  r   r   r   )r  r  r  rm  r  )rK   re  r   r  r   num_fw_fixed_argumentsr   rq   r   keep_output_strider9  r  r   r   r   r   r   num_mutated_inp_runtime_indicesr   r   r   r   r   r   r   r  r  rw  )r   r_  num_orig_model_outputsrv  compiler_config_extrarV  rm  r   rz  r{  num_model_outputsr   original_output_start_indexorig_output_end_idxr   s   `              r   compile_fx_forwardr    s   , 		
 	&b)		
 OO!!88C/E %R  ..0B0G0GH.--..6687#6#6|##CC ( +,'%):::: :<RR #&7777 8:MN?
-,ehhmm< ?
 :; ?A :;
 /r2
/6(33&//!#8#G#G ?
s   0F+c                   ddl m} |5  t        |       }t        j                  rlt        j                  |j                   }t        |      D cg c]+  \  }}t        |t        j                  j                        r|- c}}|j                  d<   ng |j                  d<   t        |       }	t        j                  rt        j                   t#                     nt%        j&                         5   || |t)        t+        |	            |j,                  d|j.                  |j0                        cddd       cddd       S c c}}w # 1 sw Y   nxY w	 ddd       y# 1 sw Y   yxY w)a5  
    Compile the backward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
    r   )compile_lockr   T)r  r  r  r  r  N)torch._dynamo.convert_framer  rq   r   bw_outputs_user_visibler9  r  r   r8  r   r   r   r   r   rA   r  r  r  r  r  r   r   r  r  rw  )
r   r_  r  rV  r  rz  r{  r   r  r   s
             r   compile_fx_backwardr  9	  s?    9	 (_))"224F4K4KLM (6CCa/ C##$>? CE##$>?r" !! LL/12'')	
 !"&uU|"40;; .77+@+O+O	 	 C	 	 	  s7   AE0D>?A-E,?E+	E>EE		EE#c           
         t        dd  fd       t        j                  dt        d ddd             t	         j
                        t        j                  j                  _        t        j                  j                  d	k(  rc j
                  j                  D ]J  }|j                  s|j                  t        j                  j                  j                  |j                  <   L t!         |       t        dd
  fd        S )Nr  c                     dddS )Nbefore_pre_grad_graphr}  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>n	  s    + 
 r   c                 ^     j                  ddd      dt         j                         z   S NFTr  z

 # graph id: r  idr   r]  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>r	  9    600tD 1 
 b./
01 r   r  r  zBEFORE PRE GRADTr  rZ   c                     dddS )Nafter_pre_grad_graphr}  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>	  s    * 
 r   c                 ^     j                  ddd      dt         j                         z   S r  r  r  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>	  r  r   )rK   pre_grad_graphs_logr*  r(   r  r   r   r  r  r   r  r  r  r  #_inductor_pre_grad_node_stack_tracer   rW  )r]  r^  r   s   `  r   run_pre_grad_passesr  g	  s    
 
1
 	
	 02&,,/?EOO,||--2LL&& 	D$$ %%II$))T	 (@F
1
 Mr   c                   ddl m} |j                  dd      r| S |rHt        j                  |      5  t        | | t        j                  |      |      ||      cddd       S t        d |D              r2t        j                  j                  j                  j                          t        j                  st        j                  rddlm} t        j                  }t        j                  }	t        j                  t!                     5  t#        j$                  |      5  t'        | t(              rt+        | |      n|}
t-        |
      } || |
i |      5 \  }}}}}t/        ||t1        j2                  |||		      ||      cddd       cddd       cddd       S t/        | ||||      S # 1 sw Y   SxY w# 1 sw Y   nxY w	 ddd       n# 1 sw Y   nxY wddd       G# 1 sw Y   PxY w)
a@  
    Main entry point for compiling given FX graph.  Despite the fact that this
    lives in :mod:`torch._inductor`, this function is responsible for calling
    into AOT Autograd (and we will eventually get a callback to
    ``inner_compile`` to perform actual compilation.  In other words, this
    function orchestrates end-to-end compilation for the inductor backend when
    you use :func:`torch.compile`.

    NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
    mutate it!  Make a copy if you need to preserve the original GraphModule.
    r   )CompilerBisectorr  pre_grad_graphrV  decompositionsignore_shape_envNc              3     K   | ]8  }t        |t        j                        xr |j                  j                  d v  : yw))r   xpuN)r   r   r   r  r  )r   r[  s     r   r   zcompile_fx.<locals>.<genexpr>	  s8       	1ell#H(HHs   >A )_fakify_script_objects)r  r  )!torch._inductor.compiler_bisectorr  disable_subsystemr   r  r\  r  r   r  r  AsyncCompilewakeupr  r  torch._export.non_strict_utilsr  r  rt   set_real_inputsr   rM    _extract_inputs_from_exported_gmr$   _maybe_wrap_and_compile_fx_mainr,  r-  )r]  r^  rV  r  r  r  r  r  cpp_wrapper_configfx_wrapper_configinputs_r  patched_mod	fake_argsr  s                  r   r\  r\  	  s   , C))*6FG\\.) 	:fll>:=I-!1	 	     	%%2299;V..I#//"-- LL/12	o.	 fk2 1I$ 
 )1I'YG  L6"+"3"3%$6#4#
 $2%5
 	 	 	: + i	 	B  	 	 	 	 	 	sM   )F.G&"5G,F;	G	G&.F8;G G	G&G	G&&G/c           
        | j                   j                  D cg c]-  }|j                  dk(  s|j                  j	                  d      / }}t
        j                  s+|D cg c]   }t        |t        j                        r|nd " }}t        d |D              rt        t               ||      D ]  \  }}}|
t        |t        j                        s%t        |t        j                        sJ |j                  |j                  k7  s[t        d| d|j                   d|j                   d       |S |S c c}w c c}w )Nr*  r   c              3  $   K   | ]  }|d u 
 y wr   r   )r   vs     r   r   z3_extract_inputs_from_exported_gm.<locals>.<genexpr>
  s     
.Q1D=
.s   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.)r   r  r   r   r   r   r  r   r   r   r  r?  r
   r  
ValueError)r   r^  r   fake_inputsinpr   fir  s           r   r  r  	  s/    *,!%477m;S		eK  
 GR
?B:c5<<0Cd:
 
 
.+
..eg{OD 	JCQ~*R">!!U\\22299($\]`\aac99+T!(( 4cc 		 3
s   D8D8!%D=c                J   t        j                  t        |||      }t        |       st	        | ||      S t        | t              r1t        | j                  j                  t              rt        | ||      S t        d |D              rt        | ||      S t        | ||||      S )z
    Part of compile_fx, called after patching configs.

    Ultimately we want to call _compile_fx_main, where the actual work happens.
    But under various conditions, various forms of wrapping might be needed
    around _compile_fx_main.
    r  c              3  R   K   | ]  }t        |t        t        t        f       ! y wr   )r   r   r4  r  r   s     r   r   z2_maybe_wrap_and_compile_fx_main.<locals>.<genexpr>0
  s     
G!:a$t,-
Gs   %')r,  r-  r  graph_returns_tuplemake_graph_return_tupler   rM   r   _codegenrX   handle_dynamo_export_graphr  r&   _compile_fx_main)r]  r^  rV  r  r  
compile_gms         r   r  r  
  s      ""'#%)	J v&&v
KK&+&:~, *&/:NN

G
GG $FOZHH  r   c                
    t        t        j                        5  t               5  t        j
                  j                  j                  t        j                  j                  dk(        5  t        j                  j                  j                         5  t         t              rt!         |       t        j"                  rJ t%        |      t'        t              ||n	t)               }	 	 	 	 	 	 	 	 d fd}t+        j,                  |d      }t/        t0        |      }t        j2                  rSt	        j4                         s?t+        j,                  t6         j8                  j:                  j<                        }n't+        j,                  |d      }t/        t0        |      }t?        d	      	 	 	 	 	 	 dfd
       }t/        t0        |      }tA        |      xs  t        jB                  jE                  d      }	t        jF                  jH                  jK                         xs t        jF                  jI                  |	      }
tL        jN                  rt        jP                  sddl)m*}  |        tW        jX                  dt        jZ                        5  t]         |d|      \  }}ddl/m0}  ||      }	|jb                  jd                  D ]  }|jf                  dk(  sd|jh                  vs# tk        |jl                        |      }t        |t        jn                        r%|	J |	jq                  |d      |jh                  d<   }t        |t        jr                        stu        tw        |            r8t        jx                  jz                  j}                  |	|      |jh                  d<   t        |t~              s||jh                  d<    	 ddd       t               }d jh                  v r jh                  d   |jh                  d<   d jh                  v r jh                  d   |jh                  d<   t        j                  j                         }|rt        j                  j                  nt        j                  }tM        j                  |	      5  t        j                         5   |       5   |||      cddd       cddd       cddd       cddd       cddd       cddd       cddd       S tM        j                  |	      5  t        jF                  j                  |
      5  t        j                         5  tW        jX                  dt        jZ                        5  	  t        ||||t        dj8                  j<                  |	       |      cddd       cddd       cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       %# 1 sw Y   /xY w# t        $ r}|j                         dd}~ww xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)aQ  
    Main part of compile_fx, called after wrapping is done.

    Roughly speaking, here the steps will be:
    (1) apply pre-grad passes
    (2) create `fw_compiler` and `bw_compiler` functions out of `inner_compile`
    (3) call aot_autograd, which:
    - (3a) creates a joint graph with `decompositions`,
    - (3b) partitions it with `partition_fn` into fw and bw graphs (applying joint-graph passes),
    - (3c) calls `fw_compiler` and `bw_compiler` on those graphs (applying post-grad passes)
    - (3d) finally, assembles the fw and bw compiled functions back together and returns.
    rZ   Nc           
         t        j                  d      5  t        t              rt	              }nt	        |       }t        | |||      cd d d        S # 1 sw Y   y xY w)Nz$compile_fx.<locals>.fw_compiler_base)r  rv  r  rV  rm  )r   r%   r   rM   r  r  )r   r_  rm  r  r  rV  r]  rv  s       r   fw_compiler_basez*_compile_fx_main.<locals>.fw_compiler_basel
  sf    
 **+QR fk2-B6-J*-B2-F*)"+A'9*?"/!-  s   9AA#Fr  )ru  rv  rV  r  r  rw  Tbackward)r  c                x    t        j                  d      5  t        | |      cd d d        S # 1 sw Y   y xY w)Nzcompile_fx.<locals>.bw_compiler)r  rV  )r   r%   r  )r   r_  r  rV  s     r   bw_compilerz%_compile_fx_main.<locals>.bw_compiler
  s?    
 ))*KL +"*?"/	  s   09r  )is_valid_aoti_model_name)unlift_effect_tokensselective_decompose)trace_jointr  r   )_detect_fake_mode_from_gmr  r   )static_shapes dynamo_flat_name_to_original_fqnrS  )	fw_compilerr  inference_compilerr  r  keep_inference_input_mutationsr  r  r  )r   rM   r_  r  rm  r   r  r>   )r   rM   r_  r  r  r>   )NrW   r  r  r   r   r   r  preserve_node_metar   r  r  r  r*  reset_provenance_globalsr   rM   r  _raise_error_for_testingr   r  r`   r,  r-  r.   r>   freezingis_grad_enabledr  r  r  rw  rL   r$   r  r  r   r   r   rt   r  enable_autograd_for_aotr   r  functorch_configr  r  r+   torch._export.utilsr  r   r  r   r   r   r   r   from_tensorScriptObjectrJ   r  _libraryfake_class_registrymaybe_to_fake_objrI   rK  _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   _disabletracingrT   r  rU   remove_dynamo_frames)r]  r^  rV  r  r  r  r  r  r  r  r|  r  r   r<  r  r   r   rJ  disable_ampr   r[  r  rv  s   ` `                  @@r   r  r  ?
  sR   ( 	}BBCu9 "u9 	--LL22a7	
u9 	668u9 fk*(AF2222 1 <V D -8N>Q>S 			/	 	 		 	* .UC 	 6j+N??5#8#8#:5>5F5F$##5+0;;.774CC6 "+!2!23CRV!W!@." 
'*	=		-@		 
>	 6j+N$
 J--D-I 	 MM((002 7}}++I6 	
 V%C%C7$&!''%)$*$>$> %6 '8# %#1	'#O J5b9	 HHNN 6Dww*,dii1G!8DKK!8!<%fell;#,#88#8/8/D/D &d 0E 0DIIe, (0B0BC~ LH !& B B T T$-v!" !IIe,
 (0@A/5DIIe,#6)%6N (ODK1V[[@GM{{6H  !CD #fkk18>DW8X  !45  ((;;=K-8))j>T>T  + H->-G-G-I H79 H)+GH H H Huu9 u9 u9 u9 u9| OOI&	9MM!!/2	9 &&(	9 ""%)$*$>$>		99
| + +'9#1!-374??/D/S/S%5
 /
+	9 	9 	9 	9 	9{u9 u9 u9 u9 u9@%6 %6tH H H H H H H H0 $ 9 ,,.D89+	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9{u9 u9 u9 u9 u9 u9 u9 u9 u9 u9 u9 u9s  [9A[$.)[HZ:	&A
W?1W? CW?W?$CZ:	,X7X!		X	X!	X7$	Z:	-	[6	[$?	[9Z:	' Z%Z&Y;Y&2Y6	Y;?	Z	Z%	Z:		[#	[$,	[9?X	Z:	XX!	X7!X*&X7-
Z:	7Y<Z:		Y#YY##Y&&Y/+Y;2	Z;Z Z	Z%ZZ%	Z:	%Z.*Z:	1	[:[?[	[$[[$	[9$[-	)[99\c                   t        | t              syt        |       j                  \  }t        |t        t
        f      ryt        |t        j                  j                  j                        rst        |j                  d      r]t        |j                  j                  j                        dkD  r1t        d |j                  j                  j                  D              ryy)z"True if a FX graph returns a tupleT_schemarZ   c              3  L   K   | ]  }t        |j                        d k(    yw)r   N)r  r  )r   rets     r   r   z&graph_returns_tuple.<locals>.<genexpr>  s     OcCHH)Os   "$F)r   rM   rq   r   r   r4  r   r   r   r   r  r   r   r  returnsr2  )r   rvs     r   r  r  
  s    b+&O  ER"tUm$2uxx}}))*BIIy)		!!))*Q.ORYY5F5F5N5NOO r   c                   t        |       }|j                  \  }t        j                  |      \  }| j                  j                  |      5  | j                  j                  |       ddd       | j                  j                  |       t        |       sJ  || |      t        j                        dfd       }|S # 1 sw Y   [xY w)z
    Mutate gm so it returns a tuple.  This is only needed for graphs
    not created by torchdynamo that return non-tuples.
    Nc                 <    t        j                   | i |      S r   )r9  tree_unflatten)r   r   r  specs     r   rl  z(make_graph_return_tuple.<locals>.wrapper/  s     $$[$%A&%A4HHr   )r   r   r   r   r  r   )rq   r   r9  tree_flattenr   inserting_beforer   rw  r  r,  wraps)r   r  r  r   r  rl  r  r  s         @@r   r  r    s     r?DIIER""2&HB		"	"4	( 
HHr"""R(K__[!I "I N s   CCc                .   | j                   j                  t        j                  j                   j	                         | j                   _        | j                           ||  j                  |       t        j                        dfd       }|S )z
    `torch._dynamo.export` embeds pytrees in the FX graph codegen object,
    convert that to a normal FX graph so inductor can compile it.
    c                 F    j                    j                  |         S r   )process_outputsprocess_inputs)r   r  r  s    r   rl  z+handle_dynamo_export_graph.<locals>.wrapperE  s'    &&{4JG4J4JD4Q'RSSr   )r   r   r  r   )	r   r  r   r   CodeGenrx  r  r,  r  )r   r  r  rl  r  r  s       @@r   r  r  6  sx     hhG..0BHHLLNR!7!7!7!@AK__[!T "T Nr   c                   dd}t        j                  | j                  j                         | j                        D ]  }t        |t              st        |      }|r,t        |      r!|j                         t        j                  k7  rNt        |      }|j                  d      r y  ||j                                 y )Nc                    ddl m} | J t        | j                        }|j	                  |       }t        j                  |j                   d        |d      )Nr   )rV   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrV   r   r  get_device_propertiesr   r   r   )r  rV   device_interfacedevice_propss       r   warn_and_skipz1_check_triton_bf16_support.<locals>.warn_and_skipM  s\    /!!!3FKK@'==fE  !!Z[	
 /00r   F)including_emulation)r  zOptional[torch.device]r  r   )	itertoolschaingraph_inputsr;  r  r   rh   rg   rE   	get_dtyper   bfloat16r   is_bf16_supported
get_device)r   r  r   r  r  s        r   r  r  L  s    
1  2 2 9 9 ;U=P=PQ )$'%d++&~~5>>1 4K@--%-Hdoo'()r   )optionsc               B   ddl m}  ||       sJ d       d}d}t        | j                  j                  t
        j                  j                  j                        r| j                  j                  }t
        j                  j                  j                         | j                  _        | j                          |j                  j                  |j                  j                  }|j                  j                  G|j                  j                  }n0t        | d      r| j                  }t        | d      r| j                  }|t!        j"                  |      nd}|t!        j"                  |      nd}	t!        j$                  ||xs i f      \  }
}t'        d |
D              rd	d
lm}m}  ||j.                  d      |
D cg c]&  }t        |d   t
        j0                        r|d   nd( }}|||k7  rt3        d| d|       |||	dni |||	d}||fS c c}w )z
    Flatten the inputs to the graph module and return the flat inputs and options.
    Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
    rZ   )r  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec c              3  V   K   | ]!  }t        |d    t        j                         # yw)rZ   N)r   r   r  r   s     r   r   z'_aoti_flatten_inputs.<locals>.<genexpr>  s!     
MA:adE../
Ms   ')r   )	UserErrorUserErrorTypezTorchBind objects found in inputs. TorchBind object inputs are not supported in AOTInductor. TorchBind objects can only be attributes.z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)r\  r  r   r   r  r   r   rX   r  rx  pytree_infoin_specout_specr  r)  r*  r9  treespec_dumpstree_flatten_with_pathr  r  r-  r.  INVALID_INPUTr   r  )r   r   r   r'  r  r0  r1  r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specr-  r.  r   flat_example_inputss                   r   _aoti_flatten_inputsr:  k  s(    0r" 	" GH"((##UXX^^%B%BC((##!HHNN224
&&2))11G''3**33H 2z"kkG2{#||H;B;N..w7TV+3+?h'R  *0)F)F	v|*& 
M9L
MM>''8
 	
 CV=>
1Q4.!D8  }7Mi <o
 	
 ? 0B0C	



/A0C
  ''1s   +H)r   r  r  z.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   r  r   r  r  rt  )r  r   )r  zlist[dict[str, Any]])r   r
  r  	list[int])r   rM   r  rt  )r  zCallable[..., None]rs  )r  rM   r   rM   r  rt  )r  rM   r   rM   r<  r   r  rM   )F)r   rM   rP  r   r  zGenerator[str, None, None])r   rM   r_  r  r  rM   )r   rM   rP  r   r  rt  )r   rM   rm  r   r  rt  )TNN)
r   rM   ry  r   rz  zOptional[list[str]]r{  z)Optional[Callable[[torch.fx.Node], bool]]r  z"tuple[GraphModule, dict[str, int]])r   rM   r  r   )r_  r  r  "AbstractContextManager[None, None])r  r   r  r   r  r<  )r   rM   r_  r  r  r   r  z torch._subclasses.FakeTensorModer   )r  z$Optional[Union[str, dict[str, Any]]]r  zdict[str, Any])r  zGenerator[None, None, None]r  )r   rM   r_  r  rL  r  r  r>   )
r   rM   r_  r  rM  r  rL  r  r  r>   )r  r  r  r  r  r  )r   )r)  Callable[..., Any]r  r  r$  r
  r%  zlist[Optional[str]]r  r   rm  r   r  ztuple[torch.Tensor, ...]r   zSequence[PlaceholderInfo]r!  ztuple[int, ...]r  r=  )r   torch.Tensorr  r>  )r6  r>  r7  r>  r8  r;  r  rt  )r)  r=  r  zlist[torch.Tensor]r  r  r  rH  )
r]  rM   r^  rG  rV  r  r  Optional[dict[str, Any]]r  z2Union[list[Union[str, Weights]], str, GraphModule])rs  rM   rt  r  ru  rM   rv  r
  rV  r=  r  r@   r  r
  rw  r2   r  z0Callable[[list[object]], Sequence[torch.Tensor]])r  zdict[str, object])r   torch.fx.GraphModuler  zAbstractContextManager[None])r   rM   r  zSequence[object]r   r  r  ztuple[GraphModule, GraphModule])r)  rM   r  r
  )r   ztypes.ModuleTyper  r  )r   rM   r_  r  r  r
  rv  r
  r  r  rV  Callable[..., OutputCode]rm  r   r  r>   )
r   rM   r_  r  r  r  rV  rA  r  r>   )r]  rM   r^  r  r  rM   )r]  rM   r^  r  rV  rA  r  r?  r  .Optional[dict[OpOverload, Callable[..., Any]]]r  r   r  CompileFxOutput)r   rM   r^  r  r  r  )r]  rM   r^  r  rV  rA  r  rB  r  r   r  rC  )r   rM   r  r  r  r=  r  r=  )r   rf   r  rt  )
r   r@  r   z!Union[list[Any], tuple[Any, ...]]r   r?  r'  r?  r  z tuple[list[Any], dict[str, Any]](   
__future__r   r  r  enumr,  r  r   r   r   r   r  r1  r   abcr   r   collectionsr   r   dataclassesr   inspectr	   r
   operatorr   typingr   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreer9  functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   r   torch._dynamo.device_interfacer   torch._dynamo.repro.after_aotr    r!  r!   r"   r#   r$   r%   r&   r'   r(   r)   torch._functorchr  7torch._functorch._aot_autograd.subclass_parametrizationr*   torch._functorch.aot_autogradr+   r,   r-   r.   torch._inductor.codecacher/   r0   r1   r  r2   r3   r4   r5   !torch._inductor.custom_graph_passr6   torch._inductor.debugr7   r8   torch._inductor.output_coder9   r:   r;   r<   r=   r>   'torch._inductor.runtime.cache_dir_utilsr?   torch._inductor.utilsr@   rA   rB   rC   rD   rE   rF   rG   rH   "torch._library.fake_class_registryrI   torch._library.opaque_objectrJ   torch._loggingrK   torch._utils_internalrL   rM   %torch.fx.experimental.symbolic_shapesrN   rO    torch.fx.passes.fake_tensor_proprP   torch.monitorrQ   torch.utils._ordered_setrR   _dynamo.backends.commonrT   _dynamo.excrU   rV   fx._lazy_graph_modulerW   fx.graphrX   utils._tritonrY   r+  r[   r\   codegen.commonr]   r^   r*  r_   decompositionr`   excra   fx_passes.joint_graphrb   fx_passes.post_gradrc   rd   fx_passes.pre_gradre   r   rf   irrg   rh   output_coderi   triton_bundlerrj   rk   rl   rm   rn   ro   rp   rq   rr   rs   virtualizedrt   collections.abcru   rv   rw   rx   
torch._opsry   )torch.export.pt2_archive._package_weightsrz   r{   r|   r}   r  r   r   torch._inductor.fb.utilstypes&torch._functorch._aot_autograd.schemasr   r   r   r   r  r   r  rC  Enumr   r   r   r   _fx_compile_configr   r  r   r  r   r  r   r   r   _logginggetArtifactLoggerr  r  r  r)  r  r   r   r   r   	lru_cacher   cacher   r%  rK  rU  rW  re  rj  r  r  r  r  r  r  contextmanagerr  r  r  r  r  rf  rk  rx  r  r+  r/  r3  r9  r#  rT  r  r  r  r  r  r  r  r  r  r  r  r\  r  r  r  r  r  r  r  r:  r   r   r   <module>r     s	   "     	    	 
   # # - !     ? ? U U  $  $ $ A  ;  D =
 
 
 8  O N  B  >
 
 
 @ 7 + ?   W ; & / 2 5 : % & 3 3 U  .  5 B /   ' / )
 
 
  ==:%A$ t_T](((*% L  $v,%,,!778S		ODII    CD ./ $))%// +;; g!00<Hnn66xARS ~~77BTU NN44'  ~~77BTU 
4A,'=  T/ / 	
 	
H/VM	M%M8FMMb 38!!+/!!0NN'N N. 38+/	.+" "15FJ	E(E(E( /E( D	E(
 (E(P*('('(.	(	(!%	('	( ).' "& &	@ <@(8((  y  +
+
'+
 '+
 	+
\ 23KK'K -K 	K 4K\
B B# #4S) Sl2Y2Y'2Y
 #2Y -2Y 2Yj  $   J (*) +-.0*,))$) 	)
 &) ) ) () ,) () )XU		  
	 (*_M_M_M %_M &	_MJ )9/3	@+@+$@+ &@+ -	@+
 8@+F qc#c+c c 	c
 &c c c %c 6cL&$''"' ' %	'T $% % %: 0@kk'k  k 	k
 /k -k k kd 0@	++'+ /+ -	+
 +\//*=//j 0@/3EI"VV(V -V -	V
 CV V Vr&9>++(+ -+ C	+
 + +\H9H9(H9 -H9 C	H9
 H9 H9V$ # 	4 # 	,)D (,S(
 )-S(S(
+S( %S(
 &S( &S(r   