o
    0h.                     @   sX   d dl Z d dlmZ d dlmZ G dd deZG dd deZG dd de jjZ	dS )	    N)Functionc                   @   $   e Zd Zedd Zedd ZdS )SyncBatchNormc
              
      sd  |j tjds|j tjds| }|d ur| }t| |d }
|
dkr4|	dk r4td|
 |j	d }| dkrdt
||\}}tjd| |d |j|jd}tj|||gdd ntjd| d |j|jd | d	kr  }tjd||	  j jd}tj| |d
d t||	|f tj |dd\}}}n& fddt|	D }tj| |d
d tj|dd tj |dd\}}}tj rtj s|ddk}|| }|| }|| }|d}|d ur|j|jkr||j}t||||||||\}}|  |||||tj! || _"| dkr-t#||||||S t$|S )Nmemory_format      zEExpected more than 1 value per channel when training, got input size r   )r   dtypedevicedimglooFasync_opc                    s   g | ]}t  qS  )torch
empty_like).0_combinedr   O/var/www/vscode/kcb/lib/python3.10/site-packages/torch/nn/modules/_functions.py
<listcomp>Q       z)SyncBatchNorm.forward.<locals>.<listcomp>)%is_contiguousr   channels_lastchannels_last_3d
contiguousintnumelsize
ValueErrorshapebatch_norm_statsfullr
   r   catzeros_get_backend_nameemptydistall_gather_into_tensorreshapesplitrange
all_gatherstackcudais_availableis_current_stream_capturingsqueezeviewto#batch_norm_gather_stats_with_countssave_for_backwardint32process_groupbatch_norm_elemtr   )selfinputweightbiasrunning_meanrunning_varepsmomentumr;   
world_sizer"   num_channelsmeaninvstdcountcombined_sizecombined_flatmean_all
invstd_all	count_allcombined_listmaskcountsr   r   r   forward   s   

	

zSyncBatchNorm.forwardc                 C   s  |j tjds|j tjds| }| j\}}}}}d  } }}	| j}
| dkrt|||||| j	d | j	d | j	d \}}}}	| j	d r|j
d }tj||gdd}tjj|tjjj|
dd t||\}}|d urz|j|jkrz||j}t||||||||}|d u s| j	d sd }|d u s| j	d sd }	n#|j
d }| j	d rtjd| |j|jd}tjj|tjjj|
dd |||	d d d d d d f	S )	Nr   r   r   r   r   Fr   r	   )r   r   r   r   r   saved_tensorsr;   r!   batch_norm_backward_reduceneeds_input_gradr$   r'   distributed
all_reduceReduceOpSUMr.   r
   r7   batch_norm_backward_elemtr(   r   )r=   grad_outputsaved_inputr?   rG   rH   count_tensor
grad_inputgrad_weight	grad_biasr;   sum_dy
sum_dy_xmurF   r   r   r   r   backward}   s~   




zSyncBatchNorm.backwardN__name__
__module____qualname__staticmethodrR   rc   r   r   r   r   r      s
    
tr   c                   @   s&   e Zd Zed	ddZedd ZdS )
CrossMapLRN2d-C6?      ?r   c                 C   s  || _ || _|| _|| _d | _| dkrtd|  d| jp%| | _| }| d}|| | j| |}t	j
|d|d t| j d d d }	t|	|}
| jdd}|  t|
D ]}||d| qdtd|D ]C}| jd|d }| jd|}|| |||	 d k r|d||	 d }|j|dd ||	kr|d||	 }|j|d	d qu| j| j| j  | j t	j
| j| j |d || | || |S )
N   z,CrossMapLRN2d: Expected input to be 4D, got z
D instead.r   r   outr   alphar   )r"   rp   betakscaler   r#   new
resize_as_r   powr    minselectzero_r/   add_copy_mul_r9   )ctxr>   r"   rp   rq   rr   outputchannelsinput_squarepre_padpre_pad_cropscale_firstcscale_previousscale_currentsquare_nextsquare_previousr   r   r   rR      sL   




zCrossMapLRN2d.forwardc                 C   s  | j \}}| }|d}|d}|d}|d}||| j d ||}	|||}
d| j | j | j }t| j| jd d  }|| tj| j	| j |d
| |	  |	d||}t|D ]T}tj|| || |d || j	|  tj|	dd| jd dd|
d t|D ](}|
|	|| j d   || | j|| | |
| d |
j|	| d	d
 qqh|d d d d fS )Nr   r   r      rm   F)keepdimrn   )valuer   ro   )rS   rt   r"   rp   rq   r    ru   r   rv   rs   r|   ry   narrowr/   muldiv_sumrz   addcmul_)r}   r[   r>   r~   r^   
batch_sizer   input_heightinput_widthpaddded_ratioaccum_ratiocache_ratio_valueinversePrePadpadded_ratio_centernr   r   r   r   rc     s>   





zCrossMapLRN2d.backwardN)rj   rk   r   rd   r   r   r   r   ri      s
    6ri   c                   @   r   )BackwardHookFunctionc                 G   s   | j dd |D   |S )Nc                 S   s   g | ]}|j s|qS r   )requires_grad)r   argr   r   r   r   6  r   z0BackwardHookFunction.forward.<locals>.<listcomp>)mark_non_differentiabler}   argsr   r   r   rR   4  s   zBackwardHookFunction.forwardc                 G   s   |S )Nr   r   r   r   r   rc   9  s   zBackwardHookFunction.backwardNrd   r   r   r   r   r   3  s
    
r   )
r   torch.distributedrV   r+   torch.autograd.functionr   r   ri   autogradr   r   r   r   r   <module>   s    M`