o
    Ih                     @   sD  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZmZm	Z	 d dl
mZ d dlZd dlmZ d dlmZ d dlZd dlmZmZmZ d dlmZmZmZ d dlmZ d dlmZ d dl m!Z! d d	l"m#Z# d d
l$m%Z% d dlm&Z&m'Z'm(Z( d dl)m*Z*m+Z+ ddl,m-Z-m.Z. ddlm/Z/m0Z0m1Z1 edZ2edZ3e4e5Z6ej7j8Z8ej7j9Z9ej7j:Z:ej7j;Z;ej7j<Z<eg e8j=e8j>e8j?e8j@e8jAe8jBe8jCe8jDe8jEe8jFe8jGe8jHe8jIe8jJe8jKe8jLe8jMe8jNe8jOe8jPe8jQe8jRe8jSe8jTe8jUe8jVe8jWe8jXe8jYe8jZe8j[e8j\e8j]e#e8j^e8j_e8j`e8jajbe8jcjde:jee;jfZgi e egZhe8jie8jje8jke8jljme8jne8joe8jpe8jqe8jre8jse8jte8jujve8jwe8jxe8jye8jzgZ{eehe{ de|e	ej}j~ej}jf  deee3e2f gee3e2f f fddZee8jjgdejvdeddfddZee8jjgdejvdeddfddZee8jjmgddddejdeejj deejj ddfd d!Zee8jge		dd"ejvdeejj deejj dejvfd#d$Zee8jgd%e|e	ebejf  d&ejjd'edejvfd(d)Zee8jqgdd*d"ejvd+ebd,ejvdejvd-ejjdejvfd.d/Zqee8jjmgd%e|e	ebejf  d0e|eb d'edejvfd1d2Zee8jgd3ejvd4ejvd5ejvd6e|eb d7e	ebe|eb f d8e	ebe|eb f d9e	ebe|eb f d:ed;e|eb d<ebd=e|e deejvejvejvf fd>d?Zee8jjgdd"ejvd@ebdejvfdAdBZee8jgedCejvdDejvdejvfdEdFZee8jge		ddCejvdGejvdHejvdIejjd-ejjdejvfdJdKZee8jgedCejvdLejvdejvfdMdNZee8jjmg	 ddOe|ejv d+ebdejvfdPdQZee8jgd"ejvdejvfdRdSZee8jgdd*d"ejvdTejvd-eejj dejvfdUdVZee8jgdCejvdejvfdWdXZee8je8jgdCejvdejvfdYdZZee8je9jgdCejvd[ejvdejvfd\d]Zee8je9jgdCejvd[ejvdejvfd^d_Zee8j		`ddCejvd+eeb daedejvfdbdcZee8j		`ddCejvd+eeb daedejvfdddeZee8jgdCejvd+ebdfebdgebdejvf
dhdiZee8jjmgdCejvd%e|e	ebejf  dejvfdjdkZee8jjgdCejvdlejdejvfdmdnZ	ddejvdoeej dejfdpdqZee8jddddrdCejvdleej dseej doeej d'edejvfdtduZee8jddddrdCejvdleej dseej doeej d'edejvfdvdwZee8jdddd`d`ejdxdCejvd&e	ebef dleej dyeej dseej dzed{edoejdejvfd|d}Zee8jjmddddrdCejvd~ebdleej dseej doeej d'edejvfddZee8jjddddrdCejvdebd~ebdleej dseej doeej d'edejvfddZee8jjmd~ebd%e|e	ebejf  d'edejvfddZee:jejmd4ejvd5ejvdejvdejvfddZeee;jfjmd4ejvdejvdejvd5ejvdejvdejvdejvdejvdejvdebdejvfddZfeej7j:jdejvdejvfddZee8jge	 	 	`ddejvdejvdebdebdedejvfddZee8jj	ddCe|ejv de|ejv de|ejv dede|ejv f
ddZee8jj	ddCe|ejv de|ejv de|ejv dede|ejv f
ddZee8jjde|ejv de|ejv d5ejjde|ejv fddZee8jjde|ejv de|ejv de|ejj de|ejv fddZe8jjmejjjáee8jd4ejvd5ejvdejejv dejejv dejejv dedededeejvejvejvf fddZe ddeeedef f fddZdeeedef f fddZee8jȃdCejvdejvdejvdejvfddZee<jjʃd4ejvdebdebdedlejdeejvejvf fddZee8j̃	`ddCejvd,ejvdejvdedejvf
ddZee8j̓	`ddCejvd,ejvdejvdedejvf
ddÄZee8jnjmed3ejvdejvd+ebdejdejvf
ddǄZnee8j΃ddɜdCejvd+ebd,ejvdejvdededejvfdd΄Zee8jσ		 		`dd"ejvde|eb d7ee	ebe|eb f  d8e	ebe|eb f d9e	ebe|eb f dedeejvejvf fdd҄Zee8jЃd"ejvde|eb deejvejvf fddՄZee8jjd`d`ddd֜dejvdCejjdededee deejv dejvfdd݄Zee8jӃ			`	ddCejvdejvdedededeej deejvejvf fddZdS )    N)AnyCallableOptionalTypeVarUnion)	ParamSpec)core_aten_decompositionsget_decompositionsremove_decompositions)_grid_sampler_2d
_index_addpw_cast_for_opmath)extra_random_decomps)counters)	is_fbcode)	out_dtype)pad_listlike)elementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDtype_to_dtype)definitely_trueguard_size_oblivious   )configinductor_prims)is_gpu,needs_fallback_due_to_atomic_add_limitationsuse_scatter_fallback_T_Popsreturnc                 C   s8   t | r| gn| D ]}|tv rtd|  q	t| tS )Nzduplicate decomp: %s)callabledecompositionslogwarningdecompregister_decomposition)r    op r)   Q/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_inductor/decomposition.pyr'   }   s
   r'   tensormsgc                 C      d S Nr)   r+   r,   r)   r)   r*   assert_async_msg_decomp      r0   c                 C   r-   r.   r)   r/   r)   r)   r*   "functional_assert_async_msg_decomp   r1   r2   )minmaxsymbolr3   r4   c                C   r-   r.   r)   )r5   r3   r4   r)   r)   r*   sym_constrain_range_for_size   s   r6   xc                 C   s(   |d ur	|  |} |d ur| |} | S r.   )	clamp_min	clamp_max)r7   r3   r4   r)   r)   r*   clamp   s
   

r:   size
fill_valuekwargsc                 K   s:   | d}|d u rtt||d< tj| |fi |S tS )Ndtype)getr   typetorchfullNotImplemented)r;   r<   r=   r>   r)   r)   r*   rB      s
   
rB   alphadimindexrE   c                C   s*   t  s| jtjkrtS t| |||d|dS )NF)inplacerE   )r   r>   rA   bfloat16rC   r   )r7   rF   rG   r+   rE   r)   r)   r*   	index_add   s   rJ   physical_layoutc                    sN   dgt   }t|D ]\}}|||< qtj fdd|D fi ||S )Nr   c                    s   g | ]} | qS r)   r)   ).0lr;   r)   r*   
<listcomp>   s    z"empty_permuted.<locals>.<listcomp>)len	enumeraterA   emptypermute)r;   rK   r=   permprM   r)   rN   r*   empty_permuted   s   
&rV   grad_outputinputweight
bias_sizesstridepaddingdilation
transposedoutput_paddinggroupsoutput_maskc                 C   st   |
d r
t | jjstS t| dgttd|   }t	| |||||||||	|
d |
d dg\}}}|||fS )N   r   r   F)
r   devicer@   rC   atensumlistrangerF   convolution_backward)rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   	grad_biasgrad_inpgrad_weight_r)   r)   r*   rh      s"    

rh   decimalsc                 C   s   d| }t | | d|  S )Ng      $@g      ?)rd   round)r7   rm   ten_pow_decimalsr)   r)   r*   	round_dec   s   rp   selfbatch2c                 C   s   t jr+| jjdkr+t| jd dkst|jd dkr+| d|d jdd}|S | jjdkr`t| ddkr`t|ddkr`t	d d  d7  < t
j| d|d ddd	dS tS )
Ncpur   rb   rF   inductordecompose_bmmTrF   keepdim)r   coordinate_descent_tuningrc   r@   r   shape	unsqueezere   r;   r   rA   squeezerC   )rq   rr   outr)   r)   r*   bmm  s"   r   mat1mat2betac                 C   s   | j jdkrtt|ddkr=t|ddkr=td d  d7  < tj|d|d dddd}|| ||   S t|ddkrtt	|dd	krtt	|dd	krttd d  d7  < |j
| jddd}|| ||   S tS )
Nrs   r   r   rt   rv   decompose_addmmTrx      )rc   r@   r   r;   r   rA   re   r}   r|   r   TrC   )rq   r   r   r   rE   r~   r)   r)   r*   addmm  s*   	r   input2c                    sV  t jr)jjdkr)tjd dkst jd dkr)d d jddS jjdkrtddkrztddkrzt ddkrzj	 j	krzt
tt  dkrztd d	  d7  < t fd
dtdD S tddkrt ddkrtd d	  d7  < tjd d ddddS tS )Nrs   r   r   rb   ru   rt       rv   decompose_mmc                    s    g | ]}|d d f   qS r.   r)   )rL   ir   rq   r)   r*   rO   K  s     zmm.<locals>.<listcomp>Trx   )r   rz   rc   r@   r   r{   r|   re   r;   r>   r   rA   numelr   catrg   r}   rC   )rq   r   r)   r   r*   mm5  s6   $r   tensorsc                    s   ddl m dtjdtf fdd}tt|| tdkr&d  S dt  k r4t| k r=n nt	j
 S tdkr|tfdd	D r|d }t|j} dk rb t|j n  | t | j|   d  S tS )
Nr   )r   r7   r!   c                    sL   t | jdkr| jd dkrdS  t | jk r$| j  dkr$dS dS )Nr   r   FT)rP   r{   )r7   )rF   r   r)   r*   non_empty_tensora  s
     zcat.<locals>.non_empty_tensorr   c                 3   s    | ]	}| d  u V  qdS )r   Nr)   )rL   t)filtered_tensorsr)   r*   	<genexpr>  s    
zcat.<locals>.<genexpr>)%torch.fx.experimental.symbolic_shapesr   rA   Tensorboolrf   filterrP   clonerd   r   defaultallr{   insertr|   expandflattenrC   )r   rF   r   inpr{   r)   )rF   r   r   r*   r   Z  s     
 r   c                 C   s~   |   rtt| jtdt| j| jS t| t	j
d\}}tjtj|| jd}t| dk |d}tt| td|S )Nnan)type_promotion_kindr>   rc   r   g        )
is_complexrA   whereisnanrealfloatatan2imagr   r   INT_TO_FLOATscalar_tensormathpirc   )r7   rl   r>   r   retr)   r)   r*   angle  s   
r   yc                C   s   t | o|  }t |o| }|r|stS |}|d ur"|| }t | j|j}dt jdt jfdd}|| | jj}|||jj}	t j	||	 dd|}
|
S )Nr+   r!   c                 S   sD   | j ^ }}|d dkrtdg ||d dR }| |}|S )zNReshape tensor from [*initial_dims, last_dim] to *initial_dims, last_dim/2, 2]rb   r   zQThe size of the last dimension must be even to reshape it to [..., last_dim/2, 2])r{   AssertionErrorview)r+   initial_dimslast_dim	new_shapereshaped_tensorr)   r)   r*   reshape_tensor_complex  s   
z#add.<locals>.reshape_tensor_complex)	start_dim)
rA   	is_tensorr   rC   promote_typesr>   r   r   r   r   )r7   r   rE   x_is_complex_tensory_is_complex_tensorzcomplex_typer   
x_reshaped
z_reshapedresultr)   r)   r*   add  s   r   c                 C   s   |   rJ d| S )NzTODO: implement this)r   rq   r)   r)   r*   conj_physical  s   r   c                 C   s   | S r.   r)   r   r)   r)   r*   lift  r1   r   otherc                 C   s   t t ||| kB | |S r.   rA   r   r   rq   r   r)   r)   r*   fmin     r   c                 C   s   t t ||| k B | |S r.   r   r   r)   r)   r*   fmax  r   r   Fry   c                 C       | j tjkrtj| ||dS tS Nrx   )r>   rA   r   anyrC   rq   rF   ry   r)   r)   r*   amax     r   c                 C   r   r   )r>   rA   r   r   rC   r   r)   r)   r*   amin  r   r   startlengthc                 C   s   t | ||| S r.   )rA   narrowr   )rq   rF   r   r   r)   r)   r*   narrow_copy  s   r   c                 C   s   t | | S r.   )rd   r   r   )rq   r;   r)   r)   r*   view_copy_default  s   r   r>   c                 C   s   |  | S r.   )tor   )rq   r>   r)   r)   r*   view_copy_dtype  s   r   memory_formatc                 C   s    |t ju s	|d u rt| S |S r.   )rA   preserve_formatutilssuggest_memory_format)r+   r   r)   r)   r*   get_like_layout  s   
r   )r>   rc   r   rc   c                K   :   t jg |  f|p| j|p| jd|jt| |dS Nr   r   )rA   randr;   r>   rc   r   r   rq   r>   rc   r   r=   r)   r)   r*   	rand_like     	

r   c                K   r   r   )rA   randnr;   r>   rc   r   r   r   r)   r)   r*   
randn_like/  r   r   )r>   layoutrc   
pin_memoryrequires_gradr   r   r   r   c                C   s>   t jg |  ||p| j|p| j|p| j|djt| |dS )N)r>   r   rc   r   r   )rA   rB   r;   r>   r   rc   r   r   )rq   r<   r>   r   rc   r   r   r   r)   r)   r*   	full_like@  s   

r   highc                K   s@   t jjd|g |  f|p| j|p| jd|jt| |dS )Nr   r   r   rd   randintlowr;   r>   rc   r   r   )rq   r   r>   rc   r   r=   r)   r)   r*   randint_likeV  s   


r   r   c                K   s@   t jj||g |  f|p| j|p| jd|jt| |dS r   r   )rq   r   r   r>   rc   r   r=   r)   r)   r*   randint_like_lowj  s   

r   c                 K   s   t jjd| |fi |S Nr   )rd   r   r   )r   r;   r=   r)   r)   r*   r     s   r   biasc                 C   s*   t jj|}t jj| ||| d S r   )rA   r    
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightr;   )rX   rY   r   packed_weightr)   r)   r*   #linear_dynamic_fp16_unpacked_weight  s   r   input_scaleinput_zero_pointweight_scaleweight_zero_point	out_scaleout_zero_pointout_channelc
              	   C   s.   t jj||||}
t jj| |||
|||	S r.   )rA   r    r   _wrapped_linear_prepack#_wrapped_quantized_linear_prepacked)rX   r   r   rY   r   r   r   r   r   r   r   r)   r)   r*   wrapped_quantized_linear  s   r  packedc                 C   s^   dt jdt jfdd}|| dddf }|| ddd f }| dd df t j| | S )Nu8r!   c                    st    fdddD \}}}}t jdkr&||d>  |d>  |d>  tjd S |d> |d>  |d>  | tjd S )	Nc                 3   s$    | ]} d |f  tjV  qdS .N)r   rA   int32)rL   nr  r)   r*   r     s   " zPq_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.<locals>.<genexpr>)r   r   rb      little   r      r  )sys	byteorderr   rA   float32)r  r7   r   r   wr)   r  r*   bitcast_u8_to_f32  s   
((z=q_embedding_bag_byte_unpack_decomp.<locals>.bitcast_u8_to_f32.i)rA   r   r   r  )r  r  scalesoffsetsr)   r)   r*   "q_embedding_bag_byte_unpack_decomp  s    r  agridinterpolation_modepadding_modealign_cornersc                 C   s@   | j t dko|dko| jtjd }t| |||||d}|S )Nrs   r   r   )r  r  r  r  _expand_grid)rc   rA   is_contiguouscontiguous_formatdecomp_grid_sampler_2d)r  r  r  r  r  r  outputr)   r)   r*   grid_sampler_2d  s   r   left_tensorsright_tensorsscalarc                 C      t jj| t j|||dS NrD   )rd   _foreach_addList_foreach_mulrq   r!  r"  r#  r)   r)   r*   _foreach_addcmul_scalar     r*  c                 C   r$  r%  )rd   r&  r'  _foreach_divr)  r)   r)   r*   _foreach_addcdiv_scalar  r+  r-  start_tensorsend_tensorsc              	   C   "   t j| t jt j|| |S r.   )rd   r&  r'  r(  Scalar_foreach_sub)r.  r/  rY   r)   r)   r*   _foreach_lerp_scalar     r3  scalarsc              	   C   r0  r.   )rd   r&  r'  r(  
ScalarListr2  )r.  r/  r5  r)   r)   r*   _foreach_lerp_scalarlist  r4  r7  running_meanrunning_vartrainingexponential_average_factorepsilonc              
   C   sB   t | |||||||\}}	}
|r||	|
fS ||d|dfS )Nr   )rd   native_batch_norm	new_zeros)rX   rY   r   r8  r9  r:  r;  r<  r  bcr)   r)   r*   miopen_batch_norm  s    

rB  .c                   C   s   i t tS r.   )r#   r   r)   r)   r)   r*   fast_random_decomps0  s   rC  c                   C   s   t jrtS t S )z"decomps can change based on config)r   fallback_randomr#   rC  r)   r)   r)   r*   select_decomp_table7  s   rE  masksourcec           
      C   s   ddl m}m} || j|jrCt| |g\} }|ddd }dd | ||fD \}}}t	|||gd}	t
||	|| jS tS )Nr   )BackendFeaturehas_backend_featurert   r   c                 s   s    | ]}|  V  qd S r.   )r   )rL   r7   r)   r)   r*   r   K  s    z!masked_scatter.<locals>.<genexpr>)codegen.commonrH  rI  rc   MASKED_SCATTER_WITH_INDEXrd   broadcast_tensorsreshapecumsum_unsafe_masked_indexrA   r   r   r{   rC   )
rq   rF  rG  rH  rI  
source_idx	self_flat	mask_flatsource_flatr   r)   r)   r*   masked_scatter>  s   rT  	quant_min	quant_maxepsc           	      C   sv   t | \}}|| t||  }t |t |g}|t || t j }t |||}|t j	|t j
fS r.   )rA   aminmaxr   r4   r   rn   r   intr:   float64int64)	rX   rU  rV  rW  r>   min_valmax_valscale
zero_pointr)   r)   r*   choose_qparams_tensorQ  s   r`  
accumulatec                 C   s.   |   }t||g||j|}|| jS r.   )r   rA   	index_putrM  r{   )rq   rG   rG  ra  	flattenedr)   r)   r*   puta  s
   rd  c                 C   s   t j| |||d}| |S )N)ra  )rd   rd  copy_)rq   rG   rG  ra  r~   r)   r)   r*   put_o  s   
rf  r  input_dtypec                 C   sD   | | }t j||dd}t| ||}| j|kr||}| S )NTrx   )rA   re   r   fmar>   r   
contiguous)rW   r  rF   rg  new_grad_outputsum_new_grad
grad_inputr)   r)   r*   _softmax_backward_dataz  s   

rm  Tinclude_selfsrcreduction_typero  c                C   sr  |dkrOt | jsO| jjp| jj}t|}|r%| }t| |||}	n| ||d}t| |||}	|		|	dk d}	||||}|rK||	 S ||	 S t
tjj|| j|j|jjdr`tS | j|d d   | jd |   }
| g| j|d d  | jd | R }g t| j| | jdtd| j| R }|tj|
||}| j|||||dS )Nmeanr   r   Trn  )r   r>   is_floating_pointr   rA   	ones_likerJ   
index_fill
zeros_likemasked_fillr   rd   scatter_reduce_tworc   r@   rC   r{   r   rg   ndimr   r[  repeat_interleaverM  rS   scatter_reduce)rq   rF   rG   rp  rq  ro  true_divisiononesr~   countsrepeatsindex_shaperT   scatter_indexr)   r)   r*   index_reduce  sJ   


(,,
r  kernel_size	ceil_modec           
      C   s   |dkrddg}|dkrddg}|s|}t |d}t |d}t |d}t |d}|d |d  }tjj||sA|ttjjkrCtS t	
| |||||\}}t	||d | d||}	||	fS )Nr   r   rb   rt   )r   rA   	_inductorlowering'should_fallback_max_pool2d_with_indicesiinfoint8r4   rC   prims#_low_memory_max_pool2d_with_offsets)_low_memory_max_pool2d_offsets_to_indicesr;   )
r7   r  r[   r\   r]   r  window_sizevalsr  indicesr)   r)   r*   max_pool2d_with_indices  sB   	



r  output_sizec           	      C   s   | j ^ }}}|\}}|dks|dkr(g |||}| || j|tjdfS || dkrB|| dkrB|| || g}t| |S tS )Nr   )r>   )r{   	new_emptyrA   r[  rd   r  rC   )	r7   r  batchh_inw_inh_outw_outo_sizer  r)   r)   r*   adaptive_max_pool2d  s   r  	out_int32rightsidesortersorted_sequencer  r  r  r  c                C   s(   t j| tj|g| jd||||dd S )N)rc   r  r   )rd   searchsortedrA   r+   rc   )r  rq   r  r  r  r  r)   r)   r*   searchsorted_scalar
  s   
r        ?UUUUUU?noiselowerupper	generatorc                 C   sf   |r#| dk}t j| |||d}t|| | | }t||d}	||	fS || d }
t | |
t fS )Nr   )r  r   rb   )rd   uniformrA   r   
leaky_relur   )rq   r  r  r  r:  r  not_positiverr  	noise_outnegative_sloper)   r)   r*   rrelu_with_noise_functional  s   	r  )NNr=  )r   r   )NFr.   )r   r   F)r   )F)Nr   r   F)r  r  FN)	functoolsloggingr   r  typingr   r   r   r   r   typing_extensionsr   rA   torch._decomp_decompr&   torch._prims_common_prims_commonr   $torch.ao.quantization.fx._decomposedr   r	   r
   torch._decomp.decompositionsr   r  r   r   $torch._decomp.decompositions_for_rngr   torch._dynamo.utilsr   torch._environmentr   !torch._higher_order_ops.out_dtyper   torch._inductor.utilsr   r   r   r   r   r   r    r   r   r   r   r   r   r   	getLogger__name__r$   r    rd   r  	quantizedr   quantized_decomposed_adaptive_avg_pool2d_backwardindex_selectaddmvarangebitwise_and_bitwise_or_
clamp_min_dist
empty_likeflipgeluhardtanhlcmr  linalg_vector_norm_log_softmax max_pool2d_with_indices_backward_native_batch_norm_legit#_native_batch_norm_legit_functional$_native_batch_norm_legit_no_training_batch_norm_with_update"_batch_norm_with_update_functional_batch_norm_no_updatebatch_norm_backwardr>  native_group_normnative_layer_normnll_loss2d_backwardpermute_copyrrelu_with_noise_backward_softmaxsin_sqrt__to_copytril_indicestriu_indicesunbind_copyrY  upsample_bilinear2dvecr   r  inductor_decompositionsr#   _unsafe_indexrO  #_unsafe_masked_index_put_accumulate+_scaled_dot_product_flash_attention_for_cpur   rm  r9   r8   rJ   gluselect_scatterslice_scattersplitr   r}   re   unbindbaddbmmdecomps_to_excluderf   _opsOperatorBaseOpOverloadPacketr'   _assert_asyncr,   strr0   _functional_assert_asyncr2   r6   SymInttypesNumberr:   rB   rV   rh   r   tuplern   rm   rp   r   r   r   r   r   r   r   r   detach_r   r   r   r   r   	view_copyr   r>   r   r   r   r   rc   r   r   r   r   r   r   	low_dtyper   r   embedding_bag_byte_unpackr  r   _foreach_addcmulr1  r*  _foreach_addcdivr-  _foreach_lerpr3  r6  r7  rB  py_impl_CDispatchKeyAutograd	lru_cachedictrC  rE  rT  choose_qparamsr+   r`  rd  rf  r  r  r  r  r  r  	Generatorr)   r)   r)   r*   <module>   s  
	
 !"#$%&'()-




	




	
 


#
5


)


		

	


	


	









	 


42
