o
    Ih                     @   s$
  d dl Z d dlmZ d dlZd dlmZ d dlmZmZ d dl	m
Z
mZ eddZejejejejejgZejejgZdd	 eD Zed
d	 eD  dd Zed e
edddejdededededejdejfddZe
edddejdededededejdejfddZed e
edddejdejdejdededejdejfddZ e
edddejdejdejdededejdejfdd Z!ed! e
ed"ddejdejdejdejdejdejdejfd#d$Z"e
ed"ddejdejdejdejdejdejdejfd%d&Z#ed' e
ed(ddd)dejdededededejd*eej dejfd+d(Z$e
ed(ddd)dejdejdejdededejd*eej dejfd,d-Z%ed. e
ed/ddd)dejdejdejdededejd*eej dejfd0d1Z&e
ed/ddd)dejdejdejdededejd*eej dejfd2d3Z'ed4 e
ed5ddd)dejdejdejdejdejdejd*eej dejfd6d7Z(e
ed5ddd)d*eej dejfd8d9Z)ed: e
ed;ddejd<ed=ed>edejde*ejejf fd?d@Z+edA e
edBddejd<ed=ed>edejde*ejejf fdCdDZ,e
ed;ddejdeded>edejde*ejejf fdEdFZ-e
edBddejdeded>edejde*ejejf fdGdHZ.dIdJ Z/edK e
edLddejdMejdNejdOedededejdejfdPdLZ0e
edLddejdMejdNejdOedededejdejfdQdRZ1edS e
edTddd)dejdMejdNeej dOedededejd*eej dejfdUdTZ2e
edTddd)dejdMejdNeej dOedededejd*eej dejfdVdWZ3edX e
edYddejdejde*ejejf fdZdYZ4e
edYddejdejde*ejejf fd[d\Z5ed] e
ed^d_dejdejde*ejejf fd`d^Z6eda e
edbddejdejde*ejejf fdcdbZ7e
edbddejdejde*ejejf fdddeZ8dfdg Z9edh e
ediddejdMejdNejdededejfdjdiZ:e
ediddejdMejdNejdededejfdkdlZ;edm e
edndej<fdejdMejdNejdededejdoejfdpdnZ=e
edndej<fdejdMejdNejdededejdoejfdqdrZ>eds e
edtd	uddejdMejdNejdededejfdvdtZ?e
edtd	uddejdMejdNejdededejfdwdxZ@edy e
edzdduej<fd{ejdMejdNeej dededejd|edoejfd}dzZAed~ G dd dejBjCZDe
edddejdMejdNejdOedededejfddZEe
edddejdMejdNejdOedededejfddZFed e
edddejdejdejfddZGe
edddejdejdejfddZHdS )    N)Optional)_unsqueeze_multiple)determine_qparamsvalidate_qmin_qmax)implLibraryquantized_decomposedDEFc                 C   s&   i | ]}|t |jt |jfqS  )torchiinfominmax.0kr
   r
   X/var/www/vscode/kcb/lib/python3.10/site-packages/torch/ao/quantization/fx/_decomposed.py
<dictcomp>   s    r   c                 C   s.   i | ]}|t t|jt t|jfqS r
   )intr   finfor   r   r   r
   r
   r   r      s   . c                 C   s^   |t vrtd| t | \}}| |ksJ d| d|  ||ks-J d| d| d S )NzUnsupported dtype: z9quant_min out of bound for dtype, quant_min_lower_bound: z quant_min: z9quant_max out of bound for dtype, quant_max_upper_bound: z quant_max: )_DTYPE_TO_QVALUE_BOUNDS
ValueError)	quant_min	quant_maxdtypequant_min_lower_boundquant_max_upper_boundr
   r
   r   _quant_min_max_bounds_check   s"   

r   zxquantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tensorCompositeExplicitAutogradinputscale
zero_pointr   r   r   returnc                 C   sp   | j tjtjfv r| tj} | j tjksJ d| j  t||| d| }tt| | | |||S )a  Affine quantization for the Tensor using the same quantization parameters to map
    from floating point to quantized values

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scale (float): quantization parameter for affine quantization
       zero_point (int): quantization parameter for affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    <Expecting input to have dtype torch.float32, but got dtype:       ?)	r   r   float16bfloat16tofloat32r   clampround)r    r!   r"   r   r   r   	inv_scaler
   r
   r   r   1   s   
Metac                 C   sH   | j tjtjfv r| tj} | j tjksJ d| j  tj| |dS )Nr$   r   )r   r   r&   r'   r(   r)   
empty_liker    r!   r"   r   r   r   r
   r
   r   quantize_per_tensor_metaV   s   	
r1   zquantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensorc                 C   sV   |  dksJ d|   |  dksJ d|   t| | | |||S zAffine quantization for the Tensor using the same quantization parameters to map
    from floating point to quantized values
    Same as `quantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
    scalar values
       >Expecting zero_point tensor to be one element, but received : 9Expecting scale tensor to be one element, but received : numelr   itemr0   r
   r
   r   quantize_per_tensor_tensorm   s   r9   c                 C   s   | j tjtjfv r| tj} | dksJ d|  | dks-J d|  | j tjks;J d| j  tj| |dS )Nr3   r4   r5   r$   r.   )r   r   r&   r'   r(   r)   r7   r/   r0   r
   r
   r   quantize_per_tensor_tensor_meta   s   	
r:   zquantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensor2c                 C   s^   |  dksJ d|   |  dksJ d|   t| | | | | |S r2   r6   r0   r
   r
   r   quantize_per_tensor_tensor2   s   r;   c                 C   s   t | |||||S N)r:   r0   r
   r
   r    quantize_per_tensor_tensor2_meta   s   	r=   zdequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_tensor	out_dtyper@   c                C   sV   | j |ksJ d| d| j  |du rtj}|tv r$| || | S td| )a  Affine dequantization for the Tensor using the same quantization parameters to map
    from quantized values to floating point values

    Args:
       input (torch.Tensor): Tensor with dtype matching `dtype` argument,
       e.g. (`torch.uint8`), it is a per tensor quantized Tensor if combined with
       quantization parameters in the argument of this function (scale/zero_point)

       scale (float): quantization parameter for affine quantization

       zero_point (int): quantization parameter for affine quantization

       quant_min (int): minimum quantized value for input Tensor (not used in computation,
       reserved for pattern matching)

       quant_max (int): maximum quantized value for input Tensor (not used in computation,
       reserved for pattern matching)

       dtype (torch.dtype): dtype for input Tensor (not used in computation,
       reserved for pattern matching)

       out_dtype (torch.dtype?): optional dtype for output Tensor

    Returns:
       dequantized float32 Tensor
    Expecting input to have dtype: z
, but got N,Unsupported dtype in dequantize_per_tensor: )r   r   r)   r   r(   r   r    r!   r"   r   r   r   r@   r
   r
   r   r>      s   &c                C   s   |d u rt j}t j| |dS Nr.   )r   r)   r/   rC   r
   r
   r   dequantize_per_tensor_meta  s   rE   zdequantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensorc             	   C   sZ   |  dksJ d|   |  dksJ d|   t| | | ||||dS zAffine dequantization for the Tensor using the same quantization parameters to map
    from quantized values to floating point values
    Same as `dequantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
    scalar values
    r3   r4   r5   r?   r7   r>   r8   rC   r
   r
   r   dequantize_per_tensor_tensor'  s   rH   c                C   s   |d u rt j}| dksJ d|  | dks%J d|  | j|ks1J d| |tv r<t j| |dS td| )Nr3   r4   r5   rA   r.   rB   )r   r)   r7   r   r   r/   r   rC   r
   r
   r   !dequantize_per_tensor_tensor_metaL  s   rI   zdequantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensor2c             	   C   sb   |  dksJ d|   |  dksJ d|   t| | | | | ||dS rF   rG   rC   r
   r
   r   dequantize_per_tensor_tensor2m  s   rJ   c             	   C   s   t | ||||||dS )Nr?   )rI   rC   r
   r
   r   "dequantize_per_tensor_tensor2_meta  s   rK   zrchoose_qparams.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams.tensorqminqmaxepsc              	   C   s|   | j tjtjtjfv sJ d| j  |tv s#J dt  d| t|| t| \}}t	|||||t
|gddS )[  Given an input Tensor, derive the per tensor affine quantization parameter
    (scale and zero_point) for target quantized Tensor from the Tensor

    Args:
       input (torch.Tensor): floating point input Tensor
       quant_min (int): minimum quantized value for target quantized Tensor
       quant_max (int): maximum quantized value for target quantized Tensor
       dtype (torch.dtype): dtype for target quantized Tensor

    Returns:
       scale (float): quantization parameter for the target quantized Tensor
       zero_point (int): quantization parameter for the target quantized Tensor
    CExpecting input to have dtype torch.float32/16/b16, but got dtype: $Expecting target dtype to be one of , but got: F)has_customized_qrange)r   r   r)   r&   r'   r   keysr   aminmaxr   Tensorr    rL   rM   rN   r   min_valmax_valr
   r
   r   choose_qparams_tensor  s*   



rZ   z|choose_qparams_symmetric.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams_symmetric.tensorc              
   C   s   | j tjtjtjfv sJ d| j  |tv s#J dt  d| t|| t| \}}t	|||||t
|gdtjdS )rO   rP   rQ   rR   F)rS   qscheme)r   r   r)   r&   r'   r   rT   r   rU   r   rV   per_tensor_symmetricrW   r
   r
   r   choose_qparams_symmetric_tensor  s,   



r]   c                 C   sj   | j tjtjtjfv sJ d| j  ||k s!J d| d| tjdtj| jdtjdtj| jdfS )NrP   zKExpecting quant_min to be smaller than quant_max but received min:         z max: r3   r   device)	r   r   r)   r&   r'   emptydoubler_   int64r    r   r   rN   r   r
   r
   r   choose_qparams_tensor_meta  s"   


rd   c                 C   s(   t jdt j| jdt jdt j| jdfS )Nr3   r^   )r   r`   ra   r_   rb   rc   r
   r
   r   $choose_qparams_symmetric_tensor_meta  s   
re   c                 C   s6   t t|  }d||< ||d< | t|}||fS )Nr   )listrangedimpermutetuple)xaxisnew_axis_listyr
   r
   r   _permute_to_axis_zero  s
   ro   zquantize_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_channelscaleszero_pointsrl   c                 C   s   | j tjtjfv r| tj} | j tjksJ d| j  ||  k s,J d|   t||| t| |\} }dg|   }|j	d |d< |
|}|
|}tt| d|  | ||}	|	t|}
|
|S )at  Affine per channel quantization for the Tensor using the same quantization
    parameters for each channel/axis to map from floating point to quantized values

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (torch.Tensor): a list of scale quantization parameter for
       affine quantization, one per channel
       zero_point (torch.Tensor): a list of zero_point quantization parameter for
       affine quantization, one per channel
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r$   Expecting axis to be < r3   r   r%   )r   r   r&   r'   r(   r)   rh   r   ro   shapeviewr*   r+   ri   rj   )r    rq   rr   rl   r   r   r   permute_axis_list	new_shaperesoutr
   r
   r   rp   ,  s"   



c                 C   sr   | j tjtjfv r| tj} | j tjksJ d| j  ||  k s,J d|   t||| tj| |dS )Nr$   rs   r.   )	r   r   r&   r'   r(   r)   rh   r   r/   )r    rq   rr   rl   r   r   r   r
   r
   r   quantize_per_channel_meta\  s   

rz   zdequantize_per_channel(Tensor input, Tensor scales, Tensor? zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_channelc                C   s   | j |ksJ d| d| j  |du rtj}||  k s&J d|   t||| t| |\} }dg|   }	|jd |	d< ||	}|durT| ||	 | }
n| | }
|
|}
|
	t
|}|S )a  Affine per channel dequantization for the Tensor using the same quantization
    parameters for each channel/axis to map from quantized values to floating point values

    Args:
       input (torch.Tensor): Tensor with dtype matching `dtype` argument,
       e.g. (`torch.uint8`), it is a per channel quantized Tensor if combined with
       quantization parameter in the argument of this function (scales/zero_points/axis)

       scales (torch.Tensor): a list of scale quantization parameter for
       affine quantization, one per channel

       zero_points (torch.Tensor): a list of zero_point quantization parameter for
       affine quantization, one per channel

       quant_min (int): minimum quantized value for output Tensor (not used in computation,
       reserved for pattern matching)

       quant_max (int): maximum quantized value for output Tensor (not used in computation,
       reserved for pattern matching)

       dtype (torch.dtype): requested dtype for output Tensor (not used in computation,
       reserved for pattern matching)

       out_dtype (torch.dtype?): optional dtype for output Tensor

    Returns:
       dequantized float32 Tensor
    Expecting input to have dtype , but got dtype: Nrs   r3   r   )r   r   r)   rh   r   ro   rt   ru   r(   ri   rj   )r    rq   rr   rl   r   r   r   r@   rv   rw   rx   ry   r
   r
   r   r{   z  s"   )

c                C   sf   | j |ksJ d| d| j  |d u rtj}||  k s&J d|   t||| tj| |dS )Nr|   r}   rs   r.   )r   r   r)   rh   r   r/   )r    rq   rr   rl   r   r   r   r@   r
   r
   r   dequantize_per_channel_meta  s   r~   zLchoose_qparams_per_token(Tensor input, ScalarType dtype) -> (Tensor, Tensor)choose_qparams_per_tokenc                 C   sx   |   jddd}|jtjkr| }|tjkr#d}d|d  d }ntd| |jdd		|}t
|}||fS )
  Choose quantization parameters for per token quantization. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): original float32/float16 Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor

    Returns:
        scales and zero_points, both float32 Tensors
    Trh   keepdim      r3   z/unsupported dtype in choose_qparams_per_token: gh㈵>r   )absamaxr   r   r&   floatint8	Exceptionr*   div
zeros_like)r    r   rq   n_bitsr   rr   r
   r
   r   r     s   

c                 C   @   t | jd d dg }tj|tj| jdtj|tj| jdfS Nr   r3   r^   rf   rt   r   r`   ra   r_   rb   r    r   sizer
   r
   r   choose_qparams_per_token_meta     	
r   z]_choose_qparams_per_token_asymmetric_impl(Tensor input, ScalarType dtype) -> (Tensor, Tensor))_choose_qparams_per_token_asymmetric_implCompositeImplicitAutogradc                 C   s   d\}}t j| ddd}t j| ddd}t |t |}t |t |}t t jj}|| t	||  }	|	j
|d}	||	 }
||	 }||
 }|| }t || dk||
 || }t 
||| }|	t j|t jfS )r   )i   r   Tr   r   r   )r   aminr   r   r   r   r   r)   rN   r   r*   wherer+   r(   float64rb   )r    r   rL   rM   rX   rY   min_val_negmax_val_posrN   r!   descaled_mindescaled_maxzero_point_from_min_errorzero_point_from_max_errorr"   r
   r
   r   r     s&   
zWchoose_qparams_per_token_asymmetric(Tensor input, ScalarType dtype) -> (Tensor, Tensor)#choose_qparams_per_token_asymmetricc                 C   s
   t | |S r<   )r   r    r   r
   r
   r   r   E     
	c                 C   r   r   r   r   r
   r
   r   (choose_qparams_per_token_asymmetric_metaQ  r   r   c                 C   sf   t t|  d d }|| ksJ d| d|  || ks1J d| d|  d S )Nr   znum_tokens: z	 scales: z zero_points: )mathprodrf   r   r7   )r    rq   rr   
num_tokensr
   r
   r   !_per_token_quant_qparam_dim_check`  s   r   z}quantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tokenc                 C   sB   t ||| t| || | d| | |||} | S )a  Per token quantization for the Tensor using the quantization parameters to map
    from floating point to quantized values. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (float32 torch.Tensor): quantization parameter for per token affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per token affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r%   )r   r   muladdr+   r*   r(   r    rq   rr   r   r   r   r
   r
   r   r   p  s   c                 C   s   t ||| tj| |dS rD   r   r   r/   r   r
   r
   r   quantize_per_token_meta  s   	r   zdequantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, ScalarType output_dtype) -> Tensordequantize_per_tokenoutput_dtypec                 C   s   | | } | | } |  |S )a  Per token dequantization for the Tensor using the quantization parameters to map
    from floating point to quantized values. This means for a N dimension Tensor
    (M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
    every N elements with the same quantization parameter. The dimension for scales/zero_points
    will be (M1 * M2 ... * Mn)

    Args:
       input (torch.Tensor): quantized Tensor (uint8, int8 etc.)
       scales (float64 torch.Tensor): quantization parameter for per token affine quantization
       zero_points (int64 torch.Tensor): quantization parameter for per token affine quantization
       quant_min (int): minimum quantized value for input Tensor
       quant_max (int): maximum quantized value for input Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
       output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

    Returns:
       dequantized Tensor with dtype `output_dtype`
    )r(   r    rq   rr   r   r   r   r   r
   r
   r   r     s   
c                 C   s   t ||| tj| |dS rD   r   r   r
   r
   r   dequantize_per_token_meta  s   
r   zquantize_per_channel_group(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size) -> Tensorquantize_per_channel_group   c           	      C   s   |dksJ || j d kr|j d dkr| j d }| j d | dks$J |  dks,J | d|}t| dks=J |dd}|dd}|d| | 	||
|| }|S )Nr3   r   r   r   r%   )rt   rh   reshaper   isnansumr   r   r+   clamp_r(   
reshape_as)	r    rq   rr   r   r   r   
group_sizeto_quant
input_int8r
   r
   r   r     s"   
	c                 C   sf   |dksJ || j d kr|j d dkr| j d }| j d | dks$J |  dks,J tj| |dS )aX  Groupwise quantization within each channel for an 2-d Tensor using the quantization parameters
    to map from floating point to quantized values. This means for each row of a 2-d Tensor
    (M, N), we calculate scales/zero_points for each `group_size` elements
    and quantize every `group_size` elements with the same quantization parameter.
    The dimension for scales/zero_points will be (M * ceil(N, group_size),)

    Args:
       input (torch.Tensor): original float32 or bfloat16 Tensor
       scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
       quant_min (int): minimum quantized value for output Tensor
       quant_max (int): maximum quantized value for output Tensor
       dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

    Returns:
       Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
       are not stored in the Tensor, we are storing them in function arguments instead
    r3   r   r   r   r.   )rt   rh   r   r/   )r    rq   rr   r   r   r   r   r
   r
   r   quantize_per_channel_group_meta	  s   
r   zdequantize_per_channel_group(Tensor input, Tensor scales, Tensor? zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size, ScalarType output_dtype) -> Tensordequantize_per_channel_groupw_int8r   c                 C   s   |dksJ || j d kr|j d dkr| j d }| j d | dks$J |  dks,J | d|}|dd}|durC|dd}	n
tjg tj|jd}	||	|	| 
|}
|
S )a!  Groupwise dequantization within each channel for an 2-d Tensor using the quantization parameters
    to map from floating point to quantized values. This means for each row of a 2-d Tensor
    (M, N), we calculate scales/zero_points for each `group_size` elements
    and quantize every `group_size` elements with the same quantization parameter.
    The dimension for scales/zero_points will be (M * ceil(N, group_size),)

    Args:
       input (torch.Tensor): quantized Tensor (uint8/int8 etc.)
       scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
       zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
       quant_min (int): minimum quantized value for input Tensor
       quant_max (int): maximum quantized value for input Tensor
       dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
       output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

    Returns:
       dequantized Tensor with dtype `output_dtype`
    r3   r   r   r   Nr^   )rt   rh   r   r   zerosint32r_   subr   r   r(   )r   rq   rr   r   r   r   r   r   w_int8_groupedzpw_dqr
   r
   r   r   5  s   "
zyfake_quant_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max) -> Tensorc                   @   s$   e Zd Zedd Zedd ZdS )FakeQuantPerChannelc                 C   s   |j tjkr|tj}|j tjkr|tj}|j tjks&J d|j  || k s5J d|  ttd|tt|d |j }t	||}t	||}	t
|d|  |	 }
t|
|||	 | }t|
|k|
|k}| | |S )Nr$   rs   r   r3   r%   )r   r   r)   r(   r   rh   rf   rg   ndimr   r+   r*   logical_andsave_for_backward)ctxr    rq   rr   rl   r   r   broadcast_dimsunsqueeze_scalesunsqueeze_zero_pointstempry   maskr
   r
   r   forwardo  s$   
"


zFakeQuantPerChannel.forwardc                 C   s   | j \}|| d d d d d fS r<   )saved_tensors)r   gyr   r
   r
   r   backward  s   zFakeQuantPerChannel.backwardN)__name__
__module____qualname__staticmethodr   r   r
   r
   r
   r   r   n  s
    
r   fake_quant_per_channelAutogradc                 C   s   t | |||||S r<   )r   applyr    rq   rr   rl   r   r   r
   r
   r   r     s   	c                 C   s
   t | S r<   r   r/   r   r
   r
   r   fake_quant_per_channel_meta  r   r   zFconvert_element_type.no_fuse(Tensor input, ScalarType dtype) -> Tensorzconvert_element_type.no_fusec                 C   s   t jjj| |S r<   )r   opsprimsconvert_element_typedefaultr   r
   r
   r   r     s   r   c                 C   s   t j| |dS rD   r   r   r
   r
   r   convert_element_type_meta  s   r   )r   )Ir   typingr   r   torch._refsr   torch.ao.quantization.utilsr   r   torch.libraryr   r   quantized_decomposed_libuint8r   uint16int16r   _INTEGER_DTYPESfloat8_e5m2float8_e4m3fn_FLOAT_DTYPESr   updater   definerV   r   r   r   r   r1   r9   r:   r;   r=   r>   rE   rH   rI   rJ   rK   rj   rZ   r]   rd   re   ro   rp   rz   r{   r~   r   r   r   r   r   r   r   r   r)   r   r   r   r   r   autogradFunctionr   r   r   r   r   r
   r
   r
   r   <module>   s  


$


		
2			
 			
 		
'
'
	

/

	

>
	
#
+



&

!

$%.


"