o
    Ih-                     @   s  d dl mZ d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
 ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZ dd	lmZ 	
			d8dddddddee dee dee dededeee  dee d  ded fddZ!			
d9dddddddee d  ded defddZ"dd Z#G dd deZ$G d d! d!eZ%G d"d# d#eZ&G d$d% d%eZ'G d&d' d'eZ(G d(d) d)eZ)G d*d+ d+eZ*G d,d- d-eZ+G d.d/ d/eZ,G d0d1 d1eZ-G d2d3 d3eZ.G d4d5 d5eZ/G d6d7 d7eZ0dS ):    )Sequence)AnyOptionalN)make_channels_last_strides_for
OrderedSet   )ExternKernelAllocFixedLayoutFlexibleLayoutget_device_typeir_node_to_tensor is_contiguous_storage_and_layoutLayoutmay_convert_to_optionalMultiOutputMultiOutputLayoutMutationOutput
NoneLayout	TensorBox)convert_shape_to_inductorpad_listlike)VFxr   weightbiaspaddingstridedilationgroups
transposedoutput_paddingquantize_argsotherc                  C   sX  dd }dd }|   |   |dur|   tjj t|dd}t|dd}t| d }d	t|  k r>|ksAJ  J d	t|  k rN|ksQJ  J d	t|  k r^|ksaJ  J t||}t||}t||}|	du r{td	g|}	nd	t|	  k r|ksJ  J t|	|}	t|t	t
jjjfsJ |r|||}| }|||||	|||}n|durt|ddn|}tjj||||||||	|	}| }d	gtttd
t|d
  }t|g| }W d   n1 sw   Y  | ||}tdd |D  }|rt|rt|}nt|}t|t|ks$J t|dv s-J |g}|
durY|
\}}}}|   |   |   |   |||g |g ||g }n||g7 }|durv| ||}t|tsqJ ||g7 }t| | t |t |}||||g}|r|!d
|	 |dur|"| n|!d	| |||||fS )a}  
    This function is a helper function to prepare inputs, layout and constant args
    for convolution post-op fusion's create function, including deciding the output
    layout (channels first or channels last), realizing inputs and make them etc. The
    function only supports the CPU/XPU device since conv post-op fusion kernel is only
    supported on CPU/XPU right now.
    c                 S   s   t | t |ksJ dt | }|dksJ dd}d}	g }
|
| |  |
||	 |  td|D ]1}|| d ||d   d }| | d ||d   ||d  d  | ||d   }|
| q3ttt|
S )NzExpect input dim == weight dim   zExpect input dim > 2r   r   )lenappendrangelistmapint)output_sizeweight_sizer   r!   r   r   r   dim	BATCH_DIMWEIGHT_INPUT_CHANNELS_DIM
input_sizedkernelinput_size_d r4   M/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_inductor/mkldnn_ir.py_conv_input_size5   s(   
z<_prepare_convolution_fusion_create.<locals>._conv_input_sizec                    s   |    t }|dksJ d|dkr9g }| d |  | d |  | fddtd|D  |S | dd  }|S )Nr$   zExpect weight dim > 2r   r   c                 3   s    | ]} | V  qd S Nr4   ).0r1   prepacked_weight_sizer4   r5   	<genexpr>[   s    z[_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size.<locals>.<genexpr>)sizer%   r&   extendr'   	transpose)prepacked_weightr   r-   r,   r4   r9   r5   _original_deconv_weight_sizeP   s   zH_prepare_convolution_fusion_create.<locals>._original_deconv_weight_sizeNT)guard_shaper$   r   r   c                 s   s    | ]}t |tV  qd S r7   )
isinstancer*   )r8   ir4   r4   r5   r;      s    z5_prepare_convolution_fusion_create.<locals>.<genexpr>cpuxpu)#realizer   graph	fake_moder   r%   r<   r   rB   r*   sympycorenumbersIntegertorchopsatenconvolutionr(   reversedr'   require_stride_orderallr   r   contiguous_stridesr   r   r   r
   get_device_or_error	get_dtyper   insertr&   ) clsr   r   r   r   r   r   r   r    r!   r"   r#   r6   r@   x_fakeweight_fakedimsr,   r0   r+   	bias_fakeoutputreq_stride_orderdynamic_shapesoutput_strideinputsx_scalex_zero_pointw_scalew_zero_pointkernel_layoutconstant_argsr4   r4   r5   "_prepare_convolution_fusion_create   s   
   


 

 4	




ri   
binary_sumc                 C   sd  |   |   |dur|   | ^ }}| \}}	t||	g }
tttt| }| ||}t|t|ks@J t|dv sHJ |g}|durs|\}}}}|   |   |   |   |||g |g ||g }n||g7 }|dur|r| ||}||g }t	|
}t
| | |
|}g }|dur|| n|d| |||||fS )z
    This function is a helper function to prepare inputs, layout and constant args
    for linear post-op fusion's create function. The function only supports the CPU device
    since linear post-op fusion kernel is only supported on CPU right now.
    NrD   r   )rG   get_sizer(   rR   r'   r%   rS   r   r   rU   r
   
get_devicerW   r&   rX   )rY   r   r   r   r"   r#   rj   m_ocr+   r_   rb   rc   rd   re   rf   ra   rg   rh   r4   r4   r5   _prepare_linear_fusion_create   sH   


rp   c                 C   s,   t |  | g }t|  d| _|g| _|S )Ndevice)r   
get_layoutr   rl   layoutoutputs)packed	output_irr4   r4   r5   _create_output_node
  s   rx   c                       sr   e Zd Z	d	d fddZ fddZedd	d
d	dd	dee dee dee dedeee	  fddZ
  ZS )ConvolutionUnaryr4   returnNc                    $   t  j|||d tjjjjdd d S )N,aoti_torch_cpu_mkldnn__convolution_pointwiseop_overloadcpp_kernel_name)super__init__rN   rO   mkldnn_convolution_pointwisedefaultselfrt   rb   rh   	__class__r4   r5   r        

zConvolutionUnary.__init__c                       | d t | d S Nz.torch/csrc/inductor/aoti_torch/c/shim_mkldnn.hinclude_extra_headerr   codegenr   wrapperr   r4   r5   r   %     
zConvolutionUnary.codegenr   r   r   r   padding_stride_	dilation_r   scalarsc              	   C   sH   t | |||||||\}}}}}||t|	|
g }t|||d}t|S )Nrt   rb   rh   )ri   r   ry   rx   )rY   r   r   r   r   r   r   r   attrr   	algorithmrb   rh   rg   rn   rv   r4   r4   r5   create)  s(   zConvolutionUnary.creater4   rz   N__name__
__module____qualname__r   r   classmethodr(   r*   r   r   r   __classcell__r4   r4   r   r5   ry     s0    

ry   c                       s   e Zd Z		d	d fddZ fddZedd	d
d	dd	dd	dee dee dee dedede	e
 de	e de	ee  de	e fddZ  ZS )ConvolutionBinaryr4   rz   Nc                    s*   t  j|||d tjjjjdd || _d S )N3aoti_torch_cpu_mkldnn__convolution_pointwise_binaryr}   )r   r   rN   rO   r   r   binarycpp_constant_args)r   rt   rb   rh   r   r   r4   r5   r   N  s   

zConvolutionBinary.__init__c                    r   r   r   r   r   r4   r5   r   _  r   zConvolutionBinary.codegenr   r   r#   r   r   r   r   r   r   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmc              	   C   sd   t | |||||||\}}}}}| ||}|d| ||	|
|t||g }t|||d}t|S )Nr   r   )ri   rS   rX   r   r   rx   )rY   r   r#   r   r   r   r   r   r   r   r   r   r   r   rb   rh   rg   r_   rn   rv   r4   r4   r5   r   c  s0   zConvolutionBinary.create)r4   r4   r   )r   r   r   r   r   r   r(   r*   strr   floatr   r   r   r4   r4   r   r5   r   M  sF    	

r   c                       s   e Zd Z	d	d fddZ fddZdeej fdd	Ze	d
dddddddde
e de
e de
e dededee dee dee
e  dee fddZ  ZS )ConvolutionBinaryInplacer4   rz   Nc                    s~   |d |d g|dd   }t  j|||d tjjjjdd tt|d 	 d|d | tt|d 	 d|d | g| _
d S )Nr   r   r$   4aoti_torch_cpu_mkldnn__convolution_pointwise_binary_r}   rq   )r   r   rN   rO   r   _convolution_pointwise_r   r   r   rl   mutation_outputs)r   rg   rb   rh   reordered_inputsr   r4   r5   r     s   


z!ConvolutionBinaryInplace.__init__c                    r   r   r   r   r   r4   r5   r     r   z ConvolutionBinaryInplace.codegenc                 C      t  S r7   r   r   r4   r4   r5   get_unbacked_symbol_defs     z1ConvolutionBinaryInplace.get_unbacked_symbol_defsr   r   r#   r   r   r   r   r   r   r   r   r   r   r   c              	   C   st   t | |||||||\}}}}}| ||}|d| ||	|
|t||g }tt|d  d||d}|jd S )Nr   rq   )rg   rb   rh   r   )ri   rS   rX   r   r   r   rl   rb   )rY   r   r#   r   r   r   r   r   r   r   r   r   r   r   rb   rh   rn   r_   rv   r4   r4   r5   r     s0   
zConvolutionBinaryInplace.creater   r   )r   r   r   r   r   r   rJ   Symbolr   r   r(   r*   r   r   r   r   r   r   r4   r4   r   r5   r     sF    	

r   c                       sz   e Zd Z	d	d fddZ fddZedd	d
d	dd	dee dee dee dee dedeee	  fddZ
  ZS )ConvolutionTransposeUnaryr4   rz   Nc                    r{   )N6aoti_torch_cpu_mkldnn__convolution_transpose_pointwiser}   )r   r   rN   rO   r    _convolution_transpose_pointwiser   r   r   r4   r5   r     r   z"ConvolutionTransposeUnary.__init__c                    r   r   r   r   r   r4   r5   r     r   z!ConvolutionTransposeUnary.codegenr   r   r   r   r   output_padding_r   r   groups_r   c                 C   sP   d}t | |||||||||
\}}}}}||	t|
|g }t|||d}t|S )NTr   )ri   r   r   rx   )rY   r   r   r   r   r   r   r   r   r   r   r   r    rb   rh   rg   rn   rv   r4   r4   r5   r     s<   z ConvolutionTransposeUnary.creater   r   r   r4   r4   r   r5   r     s4    	
r   c                       s~   e Zd Z	d	d fddZ fddZedd	d
d	dd	dd	dd	dd	dd	dee dee dee dededefddZ	  Z
S )QConvPointWisePT2Er4   rz   Nc                    s2   t |dk| _t j|||dtjjjjdd dS )a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
           N(aoti_torch_cpu__qconv2d_pointwise_tensorr}   )	r%   has_biasr   r   rN   rO   onednnqconv2d_pointwiser   r   r   r4   r5   r     s   

zQConvPointWisePT2E.__init__c                    4   | d t | t| jtr| | d S d S r   r   r   r   rB   rt   r   codegen_size_assertsr   r   r4   r5   r   9  
   
zQConvPointWisePT2E.codegenqxr   rc   rd   qwre   rf   r   r   r   r   r   output_scaleoutput_zero_pointc                 C   s   d}d }t | ||||	||
|||||||g\}}}}}|d u r-|d |d |d< |d< n|d |d |d< |d< |||||t||g }|d usLJ |tjtjfv rW||_t|||dS )NFr$   r   r   r   )ri   r   rN   float32bfloat16dtyper   )rY   r   rc   rd   r   re   rf   r   r   r   r   r   r   r   output_dtyper   r   r   r    r!   rb   rh   rg   rn   r4   r4   r5   r   ?  sP   
	zQConvPointWisePT2E.creater   r   )r   r   r   r   r   r   r(   r*   r   r   r   r4   r4   r   r5   r     sD    	
r   c                       s   e Zd Z	d	d fddZ fddZdd	 Zdeej fd
dZ	e
dddddddddddddee dee dee deddddfddZ  ZS )QConvPointWiseBinaryPT2Er4   rz   Nc                    s8   t |dk| _d| _t j|||dtjjjj	dd dS )ag  
        Needs input/weight/output qparams
        if bias is not None
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum, b]
            - const_args = [stride, padding, dilation, groups, o_scale, o_zp,
            output_dtype, accum_scale, accum_zp, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum]
            - const_args [b, stride, padding, dilation, groups, o_scale, o_zp,
             output_dtype, accum_scale, accum_zp, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
              N/aoti_torch_cpu__qconv2d_pointwise_binary_tensorr}   )
r%   r   idx_for_inplace_sumr   r   rN   rO   r   r   r   r   r   r4   r5   r     s   

z!QConvPointWiseBinaryPT2E.__init__c                    r   r   r   r   r   r4   r5   r     r   z QConvPointWiseBinaryPT2E.codegenc                 C   s   | j | j  gS r7   )rb   r   get_namer   r4   r4   r5   get_mutation_names  s   z+QConvPointWiseBinaryPT2E.get_mutation_namesc                 C   r   r7   r   r   r4   r4   r5   r     r   z1QConvPointWiseBinaryPT2E.get_unbacked_symbol_defsr   r   rc   rd   r   qaccumr   r   r   r   r   r   r   c                 C   s   d}d }t | ||||
|	||||||||g|\}}}}}|d u r.|d |d |d< |d< n|d |d |d< |d< |||||||||t||g
 }|dksSJ dtj|  tt| d||d}|j	|j
 S )	NFr$   r   r   sumzCFor now, only post op sum is supported in QConvPointWiseBinaryPT2E.rq   r   )ri   r   r   rH   mark_buffer_mutatedr   r   r   rl   rb   r   )rY   r   rc   rd   r   re   rf   r   r   r   r   r   r   r   r   r   accum_scaleaccum_zero_pointr   alphar   r   r   r    r!   rb   rh   _kernel_layoutr_   rv   r4   r4   r5   r     s^   

zQConvPointWiseBinaryPT2E.creater   r   )r   r   r   r   r   r   r   rJ   r   r   r   r(   r*   r   r   r4   r4   r   r5   r     sD    	
r   c                       s<   e Zd Z	d
	d fddZ fddZedd	 Z  ZS )MKLPackedLinearr4   rz   Nc                    "   t  j|||d tjjjjd d S N)r~   )r   r   rN   rO   mkl_mkl_linearr   r   r   r4   r5   r        

zMKLPackedLinear.__init__c                    r   r   r   r   r   r4   r5   r     r   zMKLPackedLinear.codegenc                 C   s   |  | |}|  | |}| ^ }}| \}}t||g }	t|	}
|||g}|g}|d ur;||g7 }n|dd  tt|	 |
 |	|
||dS )Nr   r   )require_stride1realize_inputrk   r(   r   rU   rX   r   r
   rl   rW   )rY   r   packed_worig_wB
batch_sizerm   rn   ro   r+   ra   rb   rh   r4   r4   r5   r     s$   

zMKLPackedLinear.creater   r   r   r   r   r   r   r   r   r   r4   r4   r   r5   r     s    r   c                       sD   e Zd Z	d	d fddZ fddZedd	 Zd
d Z  ZS )LinearUnaryr4   rz   Nc                    r{   )N aoti_torch_cpu__linear_pointwiser}   )r   r   rN   rO   r   _linear_pointwiser   r   r   r4   r5   r   -  r   zLinearUnary.__init__c                    r   r   r   r   r   r4   r5   r   <  r   zLinearUnary.codegenc                 C   s   |  | |}|  | |}| ^ }}| \}	}t||	g }
||g}||r-|ndg|g}|d urD|  | |}|| n|dd  tt| |	 |
d||d}t
|S )Nr   rr   r   r<   r   )require_contiguousr   rk   r(   r&   rX   r   r
   rl   rW   rx   )rY   r   wr   r   r   r   rm   _icro   r+   rb   rh   rv   r4   r4   r5   r   @  s*   	zLinearUnary.createc                 C      d S r7   r4   r   r4   r4   r5   apply_constraint[     zLinearUnary.apply_constraintr   r   )	r   r   r   r   r   r   r   r   r   r4   r4   r   r5   r   ,  s    
r   c                       sH   e Zd ZdZ	d	d fddZ fddZed	d
 Zdd Z  Z	S )LinearBinaryz)torch.ops.mkldnn._linear_pointwise.binaryr4   rz   Nc                    r{   )N'aoti_torch_cpu__linear_pointwise_binaryr}   )r   r   rN   rO   r   r   r   r   r   r4   r5   r   b  r   zLinearBinary.__init__c                    r   r   r   r   r   r4   r5   r   q  r   zLinearBinary.codegenc                 C   s   |  | |}|  | |}|  | |}| ^ }}| \}}t||g }	|||g}
|g}|d urF|  | |}|
| n|d| tt| |	 |	d|
|d}t
|S )Nr   r   r   )r   r   rk   r(   r&   rX   r   r
   rl   rW   rx   )rY   r   yr   r   r   rm   r   ro   r+   rb   rh   rv   r4   r4   r5   r   u  s,   
	zLinearBinary.createc                 C   r   r7   r4   r   r4   r4   r5   r     r   zLinearBinary.apply_constraintr   r   )
r   r   r   r2   r   r   r   r   r   r   r4   r4   r   r5   r   _  s    
r   c                       sd   e Zd Z		d	d fddZ fddZed	d
dd
dd
dd
dd
dd
dd
dedefddZ  Z	S )QLinearPointwisePT2Er4   Trz   Nc                    s*   || _ t j|||dtjjjjdd dS )a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        N(aoti_torch_cpu__qlinear_pointwise_tensorr}   )r   r   r   rN   rO   r   qlinear_pointwisetensorr   rt   rb   rh   r   r   r4   r5   r     s   

zQLinearPointwisePT2E.__init__c                    r   r   r   r   r   r4   r5   r     
   
zQLinearPointwisePT2E.codegenr   r   rc   rd   r   re   rf   r   r   r   c              	   C   sp   t | |||||||g\}}}}}|||	|
|t||g }|
d us#J |
tjtjfv r.|
|_t||||d udS )Nrt   rb   rh   r   )rp   r   rN   r   r   r   r   )rY   r   rc   rd   r   re   rf   r   r   r   r   post_op_namepost_op_argspost_op_algorithmrb   rh   rg   rn   r4   r4   r5   r     s0   
	zQLinearPointwisePT2E.creater4   Tr   )
r   r   r   r   r   r   r   r*   r   r   r4   r4   r   r5   r     s6    	
r   c                       sp   e Zd Z		d	d fddZ fddZd	d
 ZedddddddddddddddddedefddZ	  Z
S )QLinearPointwiseBinaryPT2Er4   Trz   Nc                    s0   || _ d| _t j|||dtjjjjdd dS )a  
        if bias is not None
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2, bias]
            - const_args is: [o_scale, o_zp,
              fp32_output, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2]
            - const_args is: [bias, o_scale, o_zp,
              fp32_output, binary_attr, aplha, unary_attr, unary_scalars, unary_algorithm]
        r   N/aoti_torch_cpu__qlinear_pointwise_binary_tensorr}   )	r   r   r   r   rN   rO   r   r   binary_tensorr   r   r4   r5   r     s   

z#QLinearPointwiseBinaryPT2E.__init__c                    r   r   r   r   r   r4   r5   r     r   z"QLinearPointwiseBinaryPT2E.codegenc                 C   s(   | j d }|dkr| j| j  gS g S )Nr   )rh   rb   r   r   )r   binary_post_opr4   r4   r5   r     s   
z-QLinearPointwiseBinaryPT2E.get_mutation_namesr   r   rc   rd   r   re   rf   r#   r   r   r   c                 C   s   t | |||||||g||dk\}}}}}||	|
||||||t||g
 }|dkrFtj|  tt| d|||d ud}|j	|j
 S |d usLJ |tjtjfv rW||_t||||d udS )Nr   rq   r   )rp   r   r   rH   r   r   r  r   rl   rb   r   rN   r   r   r   )rY   r   rc   rd   r   re   rf   r#   r   r   r   r   other_scaleother_zpr  r   unary_post_opunary_post_op_argsunary_post_op_algorithmrb   rh   rg   r_   rv   r4   r4   r5   r     sZ   
z!QLinearPointwiseBinaryPT2E.creater  r   )r   r   r   r   r   r   r   r   r*   r   r   r4   r4   r   r5   r    s<    	
r  c                !       s   e Zd Z	d	d fddZeddddd	dd
ddddddddedee dededededededef ddZ fddZ	  Z
S )MkldnnRnnLayerr4   rz   Nc                    r   r   )r   r   rN   rO   rP   mkldnn_rnn_layerr   r   r   r4   r5   r   _  r   zMkldnnRnnLayer.__init__r   r   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc              	      sf  |  |   |  | |}|  | |}|  | |}|  | |}|  | |}|  |  | |}|   }t|dksRJ d|\}}}|||g}| }| }||||||g}||	|
||||||g	}tt d||d dd }|||dgg}|||t	|t	|dgg} fdd	t
t||D }| _|S )
N   zExpect lstm input to be 3Drq   )rb   rh   c                 S   s   t | dks
J dt| S )Nr  zExpect output_shape to be 3D)r%   r   rU   )output_shaper  r4   r4   r5   get_strides_of_lstm_output  s   
z9MkldnnRnnLayer.create.<locals>.get_strides_of_lstm_outputr   c                    s8   g | ]\}\}}t t  || t|fgqS r4   )r   r
   rl   rW   tuple)r8   rC   r+   ra   rv   r   r4   r5   
<listcomp>  s    
z)MkldnnRnnLayer.create.<locals>.<listcomp>)r   r   freeze_layoutrk   r%   r  r   rl   r   rU   	enumeratezipru   )rY   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r0   
seq_length
mini_batchr  hy_shapecy_shaperb   rh   r  output_sizesoutput_stridesrw   r4   r!  r5   r   m  s\   

zMkldnnRnnLayer.createc                    s   | d t |S r   r   r   r   r4   r5   r     s   
zMkldnnRnnLayer.codegenr   r   )r   r   r   r   r   boolr(   r*   r   r   r   r4   r4   r   r5   r  ^  sP    	
]r  c                       sN   e Zd Z	d	d fddZ fddZe				
								dddZ  ZS )WeightInt4PackMatmulr4   rz   Nc                    sD   t |dksJ t |dksJ t j|||dtjjjjdd dS )zY
        inputs = [x, w, qGroupSize, qScalesAndZeros]
        constant_args = ()
           r   N-aoti_torch_cpu__weight_int4pack_mm_cpu_tensorr}   )r%   r   r   rN   rO   	quantizedint4mm_packed_weight_cpur   r   r   r4   r5   r     s   


zWeightInt4PackMatmul.__init__c                    r   r   r   r   r   r4   r5   r     r   zWeightInt4PackMatmul.codegenr   r   r   
qGroupSizeqScalesAndZerosc                 C   s`   ||||g}|  ^ }}|  \}}t||g }	t|	}
t| | |	|
}t||dS )N)rt   rb   )rk   r(   r   rU   r
   rl   rW   r-  )rY   r   r   r2  r3  rb   rm   rn   nr+   ra   rg   r4   r4   r5   r     s   
zWeightInt4PackMatmul.creater   r   )r   r   r   r   r2  r   r3  r   r   r4   r4   r   r5   r-    s     r-  )FNNN)NNF)1collections.abcr   typingr   r   rJ   rN   torch._prims_commonr   torch.utils._ordered_setr   irr	   r
   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   virtualizedr   r*   r,  r(   ri   rp   rx   ry   r   r   r   r   r   r   r   r   r   r  r  r-  r4   r4   r4   r5   <module>   s   <	



 3

?8AMCg|+36Svs