o
    Ih                     @   s  d dl Z d dlmZ d dlZd dlmZ ddlmZm	Z	 ddl
mZmZ ddlmZ ddl	mZmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZ ddlmZ e eZeej dddZ!eej"j#j$ddej%dZ&ej"j#Z#ej"j'Z'ej"j(Z(dddZ)dddZ*dS )    N)Any)mm_args   )configlowering)CppGemmTemplateCppWoqInt4GemmTemplate)create_epilogue_with_attr)expandregister_lowering)WeightInt4PackMatmul)autotune_select_algorithmExternKernelChoicerealize_inputs)use_aten_gemm_kernelsuse_cpp_gemm_templateuse_max_autotune)Vzat::_weight_int8pack_mmF)has_out_variantz*at::native::_weight_int4pack_mm_cpu_tensor)r   kernel_creatorreturnc                   C   s>   t tjtjtjg t tj t tj t tj d S N)r   add_needs_realized_inputs	quantized
max_pool2d
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightmake_fallback r   r   W/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_inductor/quantized_lowerings.pyregister_quantized_ops'   s   r!   c                  C   s   t tjd dd ddtjdtjdtjdtdtf
dd	} t tjd dd ddtjdtjd
tdtjdtdtfdd}t	tj
 t	tj d S )N)type_promotion_kind)layoutinputweightscaler#   r   c          
         s   t | | dd\}}} }}| tjtjtjfv r!| tjks#J  }t r2t	||f|gng }dtj
dtf fdd}	t|||ddrVtj||||gd|	d t|d	krrtjrrt srtd
 t	||f| S td|||g|S )NT)r#   mat2_transposedbufr   c                    s   t | dtt jdS )Nmul)other)r	   r   r
   size)r(   r#   r&   r   r    _mul_epilogueO   s   z?register_woq_mm_ops.<locals>.int8pack_mm.<locals>._mul_epilogue)r'   )trans_wepilogue_creatorr   3No choices for GEMM, using ATen backend as fallback_weight_int8pack_mm)r   	get_dtypetorchbfloat16float16floatint8r   aten__weight_int8pack_mmbindTensorr   r   r   add_choicesleninductor_configautotune_fallback_to_atenlogwarningoutput_noder   )
r$   r%   r&   r#   _mat1mat2aten_layoutchoicesr-   r   r,   r    int8pack_mm5   sB   	

z(register_woq_mm_ops.<locals>.int8pack_mm
qGroupSizeqScaleAndZerosc                S   sD  t | ||ddd\}}}}}}| tjtjtjfv r"| tjks$J tjj	tj
|tjdd d}|}	t rBt||||f|	gng }
t rdt|	||dd|drd|  rdt| |
|	||||g t|
dkrtjrt std t||||f|	 S dtjjjd	tjfd
d}|dd d}td|
||||g|	|dS )NT)r#   use_4x2_dimr'   )dtype)name)r'   is_woq_int4q_group_sizer   r0   xr   c                 S   s6   |    sJ |  }|  }tjdd|tj|dS )Nr      )rK   device)
get_layoutis_contiguousget_size
get_devicer3   randintuint8)rO   shaperQ   r   r   r    get_example_weight   s   zHregister_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.get_example_weightc                 S   s   t jj|   S r   )r   graph	constantsget_name)rO   r   r   r    <lambda>   s    z>register_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.<lambda>)r      _weight_int4pack_mm_for_cpu)input_gen_fns) r   r2   r3   r4   r5   r6   rW   r   rZ   add_tensor_constanttensorint64r   aten__weight_int4pack_mm_cpur9   r   r   rR   rS   r   r;   r<   r=   r>   r?   r@   rA   	_inductorirIRNoder:   r   )r$   r%   rH   rI   r#   rB   rC   rD   
group_sizerE   rF   rY   r`   r   r   r    int4pack_mm_cpuk   sr   	
	





z,register_woq_mm_ops.<locals>.int4pack_mm_cpu)r   atenr1   r3   r:   r   r_   intr   r   _dyn_quant_matmul_4bit_dyn_quant_pack_4bit_weight)rG   ri   r   r   r    register_woq_mm_ops4   s<   5Orn   )r   N)+loggingtypingr   r3    torch._inductor.kernel.mm_commonr    r   r=   r   codegen.cpp_gemm_templater   r   codegen.cpp_utilsr	   r
   r   	mkldnn_irr   select_algorithmr   r   r   utilsr   r   r   virtualizedr   	getLogger__name__r?   r1   r8   opsr   int4mm_packed_weight_cpucreaterd   r   rj   r!   rn   r   r   r   r    <module>   s6    

