o
    Ihi                     @  s  d dl mZ d dlZd dlZd dlmZmZmZ d dlZd dl	m
Z
 d dlZd dlmZ d dlmZ ddlmZmZ dd	lmZmZmZ d
dlmZmZmZmZmZmZ d
dlm Z m!Z!m"Z" er|d dlm#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z) d
dlm*Z* ej+dej,dej-dej.dej/dej0dej1dej2dej3di	Z4d,ddZ5G d d! d!eZ6G d"d# d#eZ7e78d$ G d%d& d&e!Z9ej:d-d(d)Z;G d*d+ d+e"Z<dS ).    )annotationsN)AnyOptionalTYPE_CHECKING)
PRECEDENCE)ExprPrinter)ValueRanges   )get_bounds_index_exprget_kernel_metadata)ops
OpsWrapperV   )CSEVariableDeferredLineDTYPE_TO_COMPUTATION_DTYPEIndentedBufferOpOverridesPythonPrinter)IterationRangesEntry
SIMDKernelSIMDScheduling)Union)ReductionType	StoreMode)	SchedulerSchedulerNode)OpVarTboolcharshortintlongucharfloathalfbfloatval)Union[float, int, bool, str, CSEVariable]returnstrc                 C  sZ   t | tr| tjkrdS | tj krdS | | krdS t| S t | tr)| r'dS dS t| S )N	HUGE_VALFz
-HUGE_VALFNANtruefalse)
isinstancer%   torchinfr+   r   )r(    r3   O/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_inductor/codegen/mps.pyvalue_to_metal2   s   


r5   c                   @  s\   e Zd ZdddZdddZdd	d
ZdddZdddZdddZdddZ	dddZ
dS )MetalExprPrinterexpr
sympy.Exprr*   r+   c                 C  sH   |j \}}| |}| |}|jrd| d| dS d| d| dS )N() / ()metal::floor(argsdoprint
is_integer)selfr7   xdivr3   r3   r4   _print_FloorDivA   s   


z MetalExprPrinter._print_FloorDivc                 C  sp   |j \}}}| |}|dkr*| |}|jr!d| d| d}n	d| d| d}| |}d| d| dS )Nr   r9   r:   r;   r<   z) % (r=   )rA   r7   rB   rC   modr3   r3   r4   _print_ModularIndexingI   s   


z'MetalExprPrinter._print_ModularIndexingc                 C  2   t |jdkrtdddt| j|j dS )Nr	   z$metal::min only supported for 2 argszmetal::min(, r;   lenr>   RuntimeErrorjoinmap_printrA   r7   r3   r3   r4   
_print_MinU      zMetalExprPrinter._print_Minc                 C  rG   )Nr	   z$metal::max only supported for 2 argszmetal::max(rH   r;   rI   rO   r3   r3   r4   
_print_MaxZ   rQ   zMetalExprPrinter._print_Maxc                 C  *   t |jdks	J d| |jd  dS )Nr   metal::abs(r   r;   rJ   r>   rN   rO   r3   r3   r4   
_print_Abs_      zMetalExprPrinter._print_Absc                 C  rS   )Nr   zstatic_cast<long>(metal::rint(r   z))rU   rO   r3   r3   r4   _print_RoundToIntc   rW   z"MetalExprPrinter._print_RoundToIntc                 C  sh   t |jdks	J |j\}}|jr|dk sJ td| d| |td }d| d| d|  d	S )
Nr	   r   zOFor integer inputs, only non-negative ndigits are currently supported, but got .Mulz!static_cast<float>(metal::rint(1e * z) * 1er;   )rJ   r>   r@   
ValueErrorparenthesizer   )rA   r7   numberndigits
number_strr3   r3   r4   _print_RoundDecimalg   s   

z$MetalExprPrinter._print_RoundDecimalc                 C  s(   |j \}}d| | d| | dS )Nstatic_cast<float>(z) / static_cast<float>(r;   )r>   rN   )rA   r7   lhsrhsr3   r3   r4   _print_IntTrueDivs   s   
z"MetalExprPrinter._print_IntTrueDivN)r7   r8   r*   r+   )__name__
__module____qualname__rD   rF   rP   rR   rV   rX   ra   re   r3   r3   r3   r4   r6   @   s    






r6   c                   @  s  e Zd Ze		ddddZedddZedddZedddZedddZedd"d#Z	edd$d%Z
edd&d'Zedd(d)Zedd*d+Zedd,d-Zedd.d/Zedd0d1Zedd2d3Zedd4d5Zedd6d7Zedd8d9Zedd:d;Zedd<d=Zedd>d?Zedd@dAZeddBdCZeddDdEZeddFdGZeddHdIZeddKdLZeddMdNZeddOdPZeddQdRZ eddSdTZ!eddUdVZ"eddWdXZ#eddYdZZ$edd[d\Z%edd]d^Z&edd_d`Z'eddadbZ(eddcddZ)eddedfZ*eddgdhZ+eddidjZ,eddkdlZ-eddodpZ.eddqdrZ/eddudvZ0eddwdxZ1eddydzZ2edd{d|Z3edd}d~Z4edddZ5edddZ6dS )MetalOverridesNTrB   r   dtypetorch.dtype	src_dtypeOptional[torch.dtype]use_compute_typesr   r*   r+   c                 C     dt |  d|  dS )Nzstatic_cast<>(r;   DTYPE_TO_METAL)rB   rj   rl   rn   r3   r3   r4   to_dtypez   s   zMetalOverrides.to_dtypec                 C  ro   )Nz*reinterpret_cast<thread z*>(&r;   rq   )rB   rj   rl   r3   r3   r4   to_dtype_bitcast   s   zMetalOverrides.to_dtype_bitcastr(   Union[bool, float, int]c                 C  s   t | S Nr5   )r(   rj   r3   r3   r4   constant   s   zMetalOverrides.constantr7   r8   c                 C  s<   t jt j| }t jjjt jj|t| d}t	||S )N)bounds)
r   kernelindex_to_strprepare_indexingcsegeneratecomputer
   r   rs   )r7   rj   idx_strvarr3   r3   r4   
index_expr   s
   zMetalOverrides.index_exprmaskbodyotherc                 C  sR   t j| |}| }W d    n1 sw   Y  |jjr"t|}t|||S rv   )r   rz   
mask_loadsry   is_boolr   r   where)r   r   r   new_maskresultr3   r3   r4   masked   s   zMetalOverrides.maskedar   bcc                 C  s   |  d| dt | S )Nz ? z : rw   )r   r   r   r3   r3   r4   r      s   zMetalOverrides.wherec                 C  s   t |tr|jd ur|jjs|  d| S t | tr&| jtjkr&d|  dn| }t |tr9|jtjkr9d| dn|}| d| d| d| dS )N % rb   r;   z - z * metal::floor( / )r0   r   rj   is_floating_pointr1   r%   r   r   float_afloat_br3   r3   r4   	remainder   s&   
zMetalOverrides.remainderc                 C  B   d|  d| d|  d}d|  d| d| d}d| d| dS )Nstatic_cast<decltype(+)>(r;   zc10::metal::max(rH   r3   r   r   
typecast_a
typecast_br3   r3   r4   maximum      zMetalOverrides.maximumc                 C  r   )Nr   r   r   r;   zc10::metal::min(rH   r3   r   r3   r3   r4   minimum   r   zMetalOverrides.minimumc                 C     |  d| S )Nz || r3   r   r   r3   r3   r4   
logical_or      zMetalOverrides.logical_orc                 C  r   )Nz && r3   r   r3   r3   r4   logical_and   r   zMetalOverrides.logical_andc                 C     d|  dS )Nzmetal::isnan(r;   r3   rB   r3   r3   r4   isnan      zMetalOverrides.isnanc                 C  r   )Nzmetal::isinf(r;   r3   r   r3   r3   r4   isinf   r   zMetalOverrides.isinfc                 C  r   )Nzmetal::log(r;   r3   r   r3   r3   r4   log   r   zMetalOverrides.logc                 C  r   )Nzmetal::exp(r;   r3   r   r3   r3   r4   exp   r   zMetalOverrides.expc                 C  r   )NrT   r;   r3   r   r3   r3   r4   abs   r   zMetalOverrides.absc                 C  r   )Nzmetal::signbit(r;   r3   r   r3   r3   r4   signbit   r   zMetalOverrides.signbitc                 C  r   )Nzmetal::precise::sin(r;   r3   r   r3   r3   r4   sin   r   zMetalOverrides.sinc                 C  r   )Nzc10::metal::sinc(r;   r3   r   r3   r3   r4   sinc   r   zMetalOverrides.sincc                 C  r   )Nzmetal::precise::cos(r;   r3   r   r3   r3   r4   cos   r   zMetalOverrides.cosc                 C  r   )Nzc10::metal::i0(r;   r3   r   r3   r3   r4   i0   r   zMetalOverrides.i0c                 C  r   )Nzc10::metal::i1(r;   r3   r   r3   r3   r4   i1   r   zMetalOverrides.i1c                 C  r   )Nzc10::metal::erf(r;   r3   r   r3   r3   r4   erf   r   zMetalOverrides.erfc                 C  r   )Nzc10::metal::erfinv(r;   r3   r   r3   r3   r4   erfinv   r   zMetalOverrides.erfinvc                 C  r   )Nzc10::metal::log_gamma(r;   r3   r   r3   r3   r4   lgamma  r   zMetalOverrides.lgammayc                 C     d|  d| dS )Nzc10::metal::polygamma(rH   r;   r3   )rB   r   r3   r3   r4   	polygamma     zMetalOverrides.polygammac                 C  r   )Nzc10::metal::digamma(r;   r3   r   r3   r3   r4   digamma	  r   zMetalOverrides.digammac                 C  r   )Nzmetal::tan(r;   r3   r   r3   r3   r4   tan  r   zMetalOverrides.tanc                 C  r   )Nzmetal::asin(r;   r3   r   r3   r3   r4   asin  r   zMetalOverrides.asinc                 C  r   )Nzmetal::acos(r;   r3   r   r3   r3   r4   acos  r   zMetalOverrides.acosc                 C  r   )Nzmetal::atan(r;   r3   r   r3   r3   r4   atan  r   zMetalOverrides.atanc                 C  r   )Nzmetal::sqrt(r;   r3   r   r3   r3   r4   sqrt  r   zMetalOverrides.sqrtc                 C  r   )Nzmetal::rsqrt(r;   r3   r   r3   r3   r4   rsqrt!  r   zMetalOverrides.rsqrtc                 C  r   )Nzmetal::tanh(r;   r3   r   r3   r3   r4   tanh%  r   zMetalOverrides.tanhc                 C  r   )Nzmetal::atanh(r;   r3   r   r3   r3   r4   atanh)  r   zMetalOverrides.atanhc                 C  sF   |  d| }|  d| }d|  d| d| d| d| d| d	S )
Nr   r   z((z
 < 0) != (z	 < 0) ? (z != 0 ? z - 1 : z) : r;   r3   )r   r   quotremr3   r3   r4   floordiv-  s   *zMetalOverrides.floordivc                 C  r   )Nr<   r;   r3   r   r3   r3   r4   floor4  r   zMetalOverrides.floorc                 C  r   )Nzmetal::sign(r;   r3   r   r3   r3   r4   sign8  r   zMetalOverrides.signc                 C  r   )Nr   r   r   r;   zmetal::fmod(rH   r3   r   r3   r3   r4   fmod<  r   zMetalOverrides.fmodc                 C  r   )Nmetal::trunc(r;   r3   r   r3   r3   r4   truncB  r   zMetalOverrides.truncc                 C  sJ   | j tjkrd|  dn| }|j tjkrd| dn|}d| d| dS )Nrb   r;   r   /)rj   r1   r%   r   r3   r3   r4   truncdivF  s   zMetalOverrides.truncdivc                 C  r   )Nzmetal::ceil(r;   r3   r   r3   r3   r4   ceilN  r   zMetalOverrides.ceilseedoffsetc                 C  r   )Nzc10::metal::rand(rH   r;   r3   r   r   r3   r3   r4   randR  r   zMetalOverrides.randc                 C  r   )Nzc10::metal::randn(rH   r;   r3   r   r3   r3   r4   randnV  r   zMetalOverrides.randnlowhighc              	   C  s   d|  d| d| d| d	S )Nzc10::metal::randint64(rH   r;   r3   )r   r   r   r   r3   r3   r4   	randint64Z  s   zMetalOverrides.randint64c                 C  r   )Nzmetal::round(r;   r3   r   r3   r3   r4   round`  r   zMetalOverrides.roundc                 C  r   )Nr   r   r   r;   zmetal::pow(rH   r3   )r   r   cast_acast_br3   r3   r4   powd  r   zMetalOverrides.powc                 C  r   )Nzc10::metal::zeta(rH   r;   r3   r   r3   r3   r4   zetaj  r   zMetalOverrides.zetac                 C  r   )Nz c10::metal::spherical_bessel_j0(r;   r3   r   r3   r3   r4   spherical_bessel_j0n  r   z"MetalOverrides.spherical_bessel_j0c                 C  r   )Nzc10::metal::xlog1py(r;   r3   r   r3   r3   r4   xlog1pyr  r   zMetalOverrides.xlog1pyc                 C  r   )Nzc10::metal::entr(r;   r3   r   r3   r3   r4   entrv  r   zMetalOverrides.entr)NT)
rB   r   rj   rk   rl   rm   rn   r   r*   r+   )rB   r   rj   rk   rl   rk   r*   r+   )r(   ru   rj   rk   r*   r+   )r7   r8   rj   rk   r*   r+   )r   r   r   r8   r   r   r*   r+   )r   r   r   r   r   r   r*   r+   )r   r   r   r   r*   r+   )r   r   r   r   r*   r+   )rB   r   r*   r+   )rB   r   r   r   r*   r+   )r   r   r   r   r*   r+   )
r   r   r   r   r   r   r   r   r*   r+   )7rf   rg   rh   staticmethodrs   rt   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r3   r3   r3   r4   ri   y   s    
ri   mpsc                      s   e Zd ZeZdZdZdZe j	Z
e j	ZeZd< fd
dZd=ddZd>ddZ	d?d@ddZde fdAd!d"ZdBd'd(ZdCd+d,ZdDd-d.Zd?dEd0d1Zd?dFd3d4ZdGd:d;Z  ZS )HMetalKernel;auto i   tilingdict[str, sympy.Expr]kwargsr   r*   Nonec                   s(   t  j|fi | t | _d| _d S )NF)super__init__	itertoolscountacc_var_idsmultistage_reduction)rA   r   r   	__class__r3   r4   r     s   

zMetalKernel.__init__rj   rk   r+   c                 C  s   t | S rv   rq   )rA   rj   r3   r3   r4   dtype_to_str  s   zMetalKernel.dtype_to_strnameindexr8   r   c                 C  sH   | j |}| |}| d| | d}| jj| j|tj	|dS )z"Codegen a load from an InputBuffer[]rj   )
r>   inputr|   r{   r}   r~   loadsr   graph	get_dtype)rA   r   r   r   liner3   r3   r4   load  s   
zMetalKernel.loadNvaluemoder   c                 C  s|   | j |}| |}| tj|}| d| | d| d| d}| jr3| j	
t|| d S | j
t|| d S )Nr   z] = static_cast<rp   );)r>   outputr|   r   r   r   r   r{   inside_reductionr   	writeliner   stores)rA   r   r   r   r   r   	dtype_strr   r3   r3   r4   store  s   
"zMetalKernel.store
elem_countOptional[int]ry   ValueRanges[Any]c              	   C  sr   dt | j }tj|||}|r'| jd| | d| d| d |S | jd| | d| d |S )Ntmp_acc_zthreadgroup  r   z];r   )nextr   r   rz   create_cse_varindexing_coder   r   )rA   rj   r  ry   var_namer   r3   r3   r4   _new_accvar  s   zMetalKernel._new_accvarrl   reduction_typer   +Union[CSEVariable, tuple[CSEVariable, ...]]c                 C  s  t dd | jD }t|j| j}|dkr>| |}| j| d | jd | j	d| d| d | j
d |S |d	v r| ||}| jry|d
krQdnd\}	}
| j| d|j d|	 d | j	| d|j d|
 d| d n| j	| d|j d| d | jj| j
d| d| d| dt| dS |dv r| ||}| d|j d}t| }| js| j	| d| d| d | jj| j
d| d| d| d|dS |drdnd}| j| d| d| d  |d!r\t d"d | j D }| tj|}|d#krd$nd%}| d|j d}| j	| d& | j	d| d'| d'| d| d(| d)| d(|j d* | jj| j
| d+| d| d| d,|dS | j| d-| d| d| d | jj| j
d| d| d| d|dS |d.kr| jrJ d/| | ||}| j	| d|j d| d | j| jd| d| d| d}t| d0| d1| jjfS t|)2zCodegen a reduction operationc                 s      | ]}|j r|V  qd S rv   is_reduction.0tr3   r3   r4   	<genexpr>  s    z(MetalKernel.reduction.<locals>.<genexpr>anyz	 = false;z7threadgroup_barrier(metal::mem_flags::mem_threadgroup);z
                if (z) {
                    z' = true;
                }
            )prodsumr  )r   r   )r   *r   z] = r   z] z= zc10::metal::threadgroup_r9   rH   r;   r   )maxminargminargmaxr   z = static_cast<rp   r   r  lowestz = ::metal::numeric_limits<z>::z();argc                 s  r  rv   r  r  r3   r3   r4   r    s    
r  ><z = -1;r   = z;
                    z$;
                }
                z[c10::metal::threadgroup_z)]z = ::c10::metal::welford_reducez+Multistage reduction not yet supported for z.xz.y)r	  range_treesr  numelmax_threadgroup_sizer  r  r   r   splicer  r   r   r}   r~   r   rr   endswith
startswithrange_tree_nodesvaluesr1   r#   r   _unwrapfeaturesreduction_numelNotImplementedError)rA   rj   rl   r  r   reduction_dimacc_buf_sizeaccacc_bufdefault_valreduction_opacc_thread_varsrc_metal_typelim_fnidx_varidx_acc_bufcmp_opidx_thread_varwf_resr3   r3   r4   	reduction  s   
 



 zMetalKernel.reductionentryr   c                 C  sD  |  |j}| |}|jr|jj| jk| _|jr| js/| j	| j
 d|j d| d d S |jj| j d | j }| j	d|j d|j d| d|j d		 | j B | j	| j
 d|j d| d
| d|j d
 || j |jjkr| j	d|j d|jj d W d    d S W d    d S 1 sw   Y  d S )Nr  r#  r   r   z	for(auto z
_cnt = 0; z_cnt < z; ++z_cnt) {r[   z + z_cnt;if ( >= z) break;)rename_indexingr7   sexprr  rootr&  r'  r   r  r   index_dtyper   r   indent)rA   r@  r   	index_str	loop_sizer3   r3   r4   codegen_iteration_ranges_entry!  s0   
"&""z*MetalKernel.codegen_iteration_ranges_entryc                 C  s   | j r0| j  | j| j | j| j W d   n1 s!w   Y  | jd d| _ n| j| j | j| j | j| j | j  | j  | j  dS )a  
        Concat output code from index_code, loads, compute, stores,
        suffix into self.body.

        For pointwise kernels, this is called just once at the end.

        For reduction kernels, this generates a loop over the reduction
        axis.
        N}F)	r   r   rG  r(  r   r   r   r  clearrA   r3   r3   r4   codegen_body<  s   


zMetalKernel.codegen_bodyOptional[str]c                 C  s  |    t }|d |  }|  |jddd | jr%|d |d |  | jj	 D ]\}}|| j
v r?q5| tj|}|d| d| d	 q5| jj	 D ]\}}| tj|}|d
| d| d	 q[| jj	 D ]\}}|d| d	 q{t|dk sJ dt|dkrdt| nd}t|dkr|d jnd}| jrd	nd}	|| d| d|	  | jr|| d W d   n1 sw   Y  |d | 5 t|dkrt|D ]\}
}|d|j dtd|
  d q|| j || j W d   n	1 sw   Y  |d W d   n	1 s4w   Y  |d | S )z3Called at the end to generate a final kernel stringzcompile_mps_shader("""z
            #include <c10/metal/random.h>
            #include <c10/metal/special_math.h>
            #include <c10/metal/utils.h>
            T)stripz&#include <c10/metal/reduction_utils.h>zkernel void generated_kernel(zdevice z* ,z	constant zconstant long&    z%Up to 3 index variables are supportedr   uintr   
thread_pos r  z [[thread_position_in_grid]]z- group_pos [[thread_position_in_threadgroup]]Nz) {r   z = thread_pos.x   r   rK  z"""))rN  r   r   active_range_treesrG  r(  r   r>   output_buffersitemsremoved_buffersr   r   r   r   input_bufferssizevarsrJ   r   	enumeratechrr  r   getvalue)rA   r   codeidx_varsouterinnerr  thread_pos_dtypethread_pos_var_namethread_pos_suffixidxr   r3   r3   r4   codegen_kernelT  sh   







0zMetalKernel.codegen_kernelnodec                   s   t jj}g  jj  jj } fdd|D }|dd  jj D 7 }t 	 dkrG fdd 	 D }|dd
| dg7 } jra fd	d 	 D }|d
d
| dg7 }|j||tddd dS )zCodegen a call to this kernelc                   s   g | ]	}| j vr|qS r3   )rZ  )r  r   rM  r3   r4   
<listcomp>  s    z+MetalKernel.call_kernel.<locals>.<listcomp>c                 S  s   g | ]}t |qS r3   )r+   r  vr3   r3   r4   rj    s    r   c                   s.   g | ]}  |jrt|j jn|jqS r3   )pexprr  sympyMinr&  r'  rk  rM  r3   r4   rj    s    z	threads=[rH   r   c                   s,   g | ]}|j r t|j jnd qS )1)r  rm  rn  ro  r&  r'  rk  rM  r3   r4   rj    s    zgroup_size=[cpuF)devicetritonN)r   r   wrapper_coder>   rX  keysr[  r\  rJ   rW  rL   r   generate_kernel_callr1   rr  )rA   r   ri  wrapperr>   threadsr3   rM  r4   call_kernel  s(   


zMetalKernel.call_kernelr7   sizelowerr   upperc           	      C  s   |s|sd S |  |}|r| dnd}|r | d|  | nd}|r0|r0d| d| d}nd| | d}| jj| j|d	d
 d S )Nz < 0rU  rB  zif ((z) && (z	)) returnrA  z) returnF)
assignment)r{   r}   r~   r   )	rA   r7   rz  r{  r|  expr_str
lower_expr
upper_exprr   r3   r3   r4   check_bounds  s   
zMetalKernel.check_bounds)r   r   r   r   r*   r   )rj   rk   r*   r+   )r   r+   r   r8   r*   r   rv   )
r   r+   r   r8   r   r   r   r   r*   r   )rj   rk   r  r  ry   r  r*   r   )
rj   rk   rl   rk   r  r   r   r  r*   r  )r@  r   r*   r   r*   r   )r   rO  r*   r+   )r   r+   ri  r   r*   r   )
r7   r8   rz  r8   r{  r   r|  r   r*   r   )rf   rg   rh   ri   	overridessuffixnewvar_prefixr'  r   r?   rm  r6   rD  kexprr   r   r   r  r   unknownr  r?  rJ  rN  rh  ry  r  __classcell__r3   r3   r   r4   r   ~  s,    
	


i
:"r   r   c                  C  s   dd l } | jddd d S )Nr   ztorch.compile for Metal is an early protoype and might not work as expected. For details see https://github.com/pytorch/pytorch/issues/150121r	   )
stacklevel)warningswarn)r  r3   r3   r4   _warn_prototype  s
   
r  c                      s,   e Zd ZeZd fddZdddZ  ZS )MetalScheduling	schedulerOptional[Scheduler]r*   r   c                   s6   t  | t  tjj}|d ur|jd d S d S )NzDfrom torch._inductor.runtime.runtime_utils import compile_mps_shader)r   r   r  r   r   rt  headerr(  )rA   r  rw  r   r3   r4   r     s   zMetalScheduling.__init__src_coder+   node_schedulelist[SchedulerNode]rz   r   c           
      C  sp   t jj}||jv r|j| }|S d|  }| d}||j|< t||\}}| d| }	||||	 |S )Nmps_lib_z.generated_kernel
)r   r   rt  src_to_kernelnext_kernel_suffixr   define_kernel)
rA   r  r  rz   rw  kernel_namemps_lib_nameoriginsdetailed_originsmetadata_commentr3   r3   r4   r    s   



zMetalScheduling.define_kernel)r  r  r*   r   )r  r+   r  r  rz   r   r*   r+   )rf   rg   rh   r   kernel_typer   r  r  r3   r3   r   r4   r    s    	r  )r(   r)   r*   r+   r  )=
__future__r   	functoolsr   typingr   r   r   rn  sympy.printing.precedencer   r1   torch.utils._sympy.printersr   ExprPrinter_torch.utils._sympy.value_rangesr   utilsr
   r   virtualizedr   r   r   commonr   r   r   r   r   r   simdr   r   r   r   ops_handlerr   r   r  r   r   r   r   int8int16int32int64uint8r%   r&   bfloat16rr   r5   r6   ri   _initialize_pointwise_overridesr   cacher  r  r3   r3   r3   r4   <module>   sP    
9  
  E
