o
    Vh                     @   s&  d dl mZmZmZmZ d dlZd dlm  mZ	 d dl
Z
d dlmZmZ d dlmZmZ ddlmZ dd Zd	d
 Zdd Zdd Zdd Zdd Zejjdd Zdd Zdd Zdd Zdd Zdd Z ejj!dd  Z"d!d" Z#d#d$ Z$d%d& Z%ejjd'd( Z&d-d)d*Z'G d+d, d,ej(Z)dS ).    )DictListOptionalTupleN)nnTensor)boxes	roi_align   )_utilsc           
      C   s   t j|dd}t j|dd}t| |}t |dkd }|| }| j\}}|||dd d}tj|||f || ddd}	|	|	  }	||	fS )a  
    Computes the loss for Faster R-CNN.

    Args:
        class_logits (Tensor)
        box_regression (Tensor)
        labels (list[BoxList])
        regression_targets (Tensor)

    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    r   dim   gqq?sum)beta	reduction)
torchcatFcross_entropywhereshapereshapesizesmooth_l1_lossnumel)
class_logitsbox_regressionlabelsregression_targetsclassification_losssampled_pos_inds_subset
labels_posNnum_classesbox_loss r'   Z/var/www/vscode/kcb/lib/python3.10/site-packages/torchvision/models/detection/roi_heads.pyfastrcnn_loss   s   

r)   c                 C   sd   |   }| jd }dd |D }t|}tj||jd}|||f dddf }|j|dd}|S )a  
    From the results of the CNN, post process the masks
    by taking the mask corresponding to the class with max
    probability (which are of fixed size and directly output
    by the CNN) and return the masks in the mask field of the BoxList.

    Args:
        x (Tensor): the mask logits
        labels (list[BoxList]): bounding boxes that are used as
            reference, one for ech image

    Returns:
        results (list[BoxList]): one BoxList for each image, containing
            the extra field mask
    r   c                 S      g | ]}|j d  qS r   r   ).0labelr'   r'   r(   
<listcomp>I       z&maskrcnn_inference.<locals>.<listcomp>deviceNr   )sigmoidr   r   r   aranger2   split)xr   	mask_prob	num_masksboxes_per_imageindexr'   r'   r(   maskrcnn_inference4   s   

r;   c                 C   s\   | |}tj|dddf |gdd}| dddf  |} t| |||fddddf S )a%  
    Given segmentation masks and the bounding boxes corresponding
    to the location of the masks in the image, this function
    crops and resizes the masks in the position defined by the
    boxes. This prepares the masks for them to be fed to the
    loss computation as the targets.
    Nr
   r   g      ?r   )tor   r   r	   )gt_masksr   matched_idxsMroisr'   r'   r(   project_masks_on_boxesR   s   
	rA   c                    s   | j d  dd t||D } fddt|||D }tj|dd}tj|dd}| dkr6|  d S t| tj|j d |j	d|f |}|S )z
    Args:
        proposals (list[BoxList])
        mask_logits (Tensor)
        targets (list[BoxList])

    Return:
        mask_loss (Tensor): scalar tensor containing the loss
    r   c                 S   s   g | ]\}}|| qS r'   r'   )r-   gt_labelidxsr'   r'   r(   r/   n   s    z!maskrcnn_loss.<locals>.<listcomp>c                    s    g | ]\}}}t ||| qS r'   )rA   )r-   mpidiscretization_sizer'   r(   r/   o   s    r   r   r1   )
r   zipr   r   r   r   r    binary_cross_entropy_with_logitsr4   r2   )mask_logits	proposalsr=   	gt_labelsmask_matched_idxsr   mask_targets	mask_lossr'   rG   r(   maskrcnn_lossa   s   


rQ   c                 C   s  |d d df }|d d df }||d d df |d d df   }||d d df |d d df   }|d d d f }|d d d f }|d d d f }|d d d f }| d }| d }||d d df d d d f k}	||d d df d d d f k}
|| | }|   }|| | }|   }|d ||	< |d ||
< |dk|dk@ ||k @ ||k @ }| d dk}||@  }|| | }|| }||fS )Nr   r
         ).r   ).r
   ).rR   )floorlong)	keypointsr@   heatmap_sizeoffset_xoffset_yscale_xscale_yr6   yx_boundary_indsy_boundary_inds	valid_locvisvalidlin_indheatmapsr'   r'   r(   keypoints_to_heatmap   s0   $$   rd   c                 C   s  t j| dt jd}|| }	|| }
tj|d d d f t|t|fdddd d df }t j|dt jd}||djdd	}|| }|| | }t j	d
t j
d|jt j
d |	jt j
d }t j	d
t j
d|jt j
d |
jt j
d }||jt j
d }||jt j
d }t j|jt j
d}t |jt j
d|jt j
d|jt j
dgd}|| | d }t |}|jt jd| }|d|jt jdd|jt jddd|jt jd}||fS )Nr
   dtypebicubicFr   modealign_cornersr   rR   r   r         ?)r   scalar_tensorr   int64r   interpolateintr   argmaxtensorfloat32r<   onesr   stackr4   index_selectview)mapsmaps_iroi_map_widthroi_map_heightwidths_i	heights_i
offset_x_i
offset_y_inum_keypointswidth_correctionheight_correctionroi_mapwposx_inty_intr6   r\   xy_preds_i_0xy_preds_i_1xy_preds_i_2
xy_preds_ibaseindend_scores_ir'   r'   r(   _onnx_heatmaps_to_keypoints   sJ    
  

r   c	                 C   s   t jddt|ft j| jd}	t jdt|ft j| jd}
tt|dD ]G}t| | | || || || || || || \}}t |	j	t jd|
dj	t jdfd}	t |
j	t jd|j	t jd
dfd}
q&|	|
fS )Nr   rS   rf   r2   re   )r   zerosro   rr   r2   ranger   r   r   r<   	unsqueeze)rw   r@   widths_ceilheights_ceilwidthsheightsrX   rY   r   xy_preds
end_scoresrF   r   r   r'   r'   r(    _onnx_heatmaps_to_keypoints_loop   s   ,*"r   c                 C   sb  |dddf }|dddf }|dddf |dddf  }|dddf |dddf  }|j dd}|j dd}| }| }| jd }t rjt| |||||||tj|tjd	\}	}
|		ddd|
fS tj
t|d|ftj| jd}	tj
t||ftj| jd}
tt|D ]}t||  }t||  }|| | }|| | }tj| | dddf ||fd	d
ddddf }|jd }||djdd}|| }tj|| |dd}| d | }| d | }|||  |	|dddf< |||  |	|dddf< d|	|dddf< |tj||jd||f |
|ddf< q|		ddd|
fS )zExtract predicted keypoint locations from heatmaps. Output has shape
    (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
    for each keypoint.
    Nr   r
   rR   rS   minre   r   rg   Frh   r   r   rT   )rounding_moderk   r1   )clampceilr   torchvision_is_tracingr   r   rl   rm   permuter   lenrr   r2   r   ro   itemr   rn   r   rp   divfloatr4   )rw   r@   rX   rY   r   r   r   r   r   r   r   rF   ry   rz   r   r   r   r   r   r   r   r6   r\   r'   r'   r(   heatmaps_to_keypoints   sZ   
  


(r   c                 C   s
  | j \}}}}||krtd| d| |}g }	g }
t|||D ]!\}}}|| }t|||\}}|	|d |
|d q!tj|	dd}tj|
ddjtj	d}
t
|
d }
| dksit|
dkro|  d S | || || } t| |
 ||
 }|S )Nz_keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = z	 and W = r   r   r   re   )r   
ValueErrorrI   rd   appendrv   r   r   r<   uint8r   r   r   r   r   r   )keypoint_logitsrL   gt_keypointskeypoint_matched_idxsr$   KHWrH   rc   ra   proposals_per_imagegt_kp_in_imagemidxkpheatmaps_per_imagevalid_per_imagekeypoint_targetskeypoint_lossr'   r'   r(   keypointrcnn_loss*  s*   r   c           
      C   sb   g }g }dd |D }| j |dd}t||D ]\}}t||\}}	|| ||	 q||fS )Nc                 S   s   g | ]}| d qS r+   )r   )r-   boxr'   r'   r(   r/   N  r0   z*keypointrcnn_inference.<locals>.<listcomp>r   r   )r5   rI   r   r   )
r6   r   kp_probs	kp_scoresr9   x2xxbbkp_probscoresr'   r'   r(   keypointrcnn_inferenceI  s   
r   c                 C   s   | d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }|j tjd| }|j tjd| }|| }|| }|| }|| }	t||||	fd}
|
S )NrR   r   rk   rS   r
   re   )r<   r   rr   rt   )r   scalew_halfh_halfx_cy_c
boxes_exp0
boxes_exp1
boxes_exp2
boxes_exp3	boxes_expr'   r'   r(   _onnx_expand_boxesY  s   $$$$r   c                 C   s  t  r	t| |S | d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }| d d df | d d df  d }||9 }||9 }t| }|| |d d df< || |d d df< || |d d df< || |d d df< |S )NrR   r   rk   rS   r
   )r   r   r   r   
zeros_like)r   r   r   r   r   r   r   r'   r'   r(   expand_boxesn  s   
$$$$
r   c                 C   s,   t | d|  t jt | t j S )NrR   )r   rq   r<   rr   )r?   paddingr'   r'   r(   expand_masks_tracing_scale  s   ,r   c                 C   sN   | j d }tj rt||}n
t|d|  | }t| |fd }||fS )Nr   rR   r   )r   r   _C_get_tracing_stater   r   r   pad)maskr   r?   r   padded_maskr'   r'   r(   expand_masks  s   

r   c                 C   s  d}t |d |d  | }t |d |d  | }t|d}t|d}| d} tj| ||fddd} | d d } tj||f| j| jd	}t|d d}t	|d d |}	t|d d}
t	|d d |}| |
|d  ||d  ||d  |	|d  f ||
|||	f< |S )
Nr
   rR   r   rS   )r
   r
   r   r   bilinearFrh   r   )
ro   maxexpandr   rn   r   r   rf   r2   r   )r   r   im_him_w	TO_REMOVEr   him_maskx_0x_1y_0y_1r'   r'   r(   paste_mask_in_image  s   


Dr   c                 C   s  t jdt jd}t jdt jd}|d |d  | }|d |d  | }t t ||f}t t ||f}| dd| d| df} tj	| t
|t
|fddd} | d d } t t |d d|f}t t |d d| |df}	t t |d d|f}
t t |d d| |df}| |
|d  ||d  ||d  |	|d  f }t |
|d}t || |d}t ||jt jd|fdd|d d f }t |d|}t |d||	 }t |||fdd d d |f }|S )	Nr
   re   rR   r   rS   r   Frh   )r   rs   rm   r   r   r   r   r   r   rn   ro   r   r   r<   rr   )r   r   r   r   onezeror   r   r   r   r   r   unpaded_im_maskzeros_y0zeros_y1concat_0zeros_x0zeros_x1r   r'   r'   r(   _onnx_paste_mask_in_image  s*   ((4,"r   c                 C   sX   t d||}t| dD ]}t| | d || ||}|d}t ||f}q|S Nr   )r   r   r   r   r   r   r   )masksr   r   r   
res_appendrF   mask_resr'   r'   r(   _onnx_paste_masks_in_image_loop  s   
r   c                    s   t | |d\} }t||jtjd}|\ t r3t| |tj tjdtjtjdd d d f S  fddt	| |D }t
|dkrUtj|ddd d d f }|S | dd f}|S )N)r   re   c                    s"   g | ]\}}t |d  | qS r+   )r   )r-   rD   br   r   r'   r(   r/     s   " z(paste_masks_in_image.<locals>.<listcomp>r   r   r
   )r   r   r<   r   rm   r   r   r   rl   rI   r   rt   	new_empty)r   r   	img_shaper   r   resretr'   r   r(   paste_masks_in_image  s    
r   c                       s   e Zd ZejejejdZ						d fdd	Zdd Z	dd Z
d	d
 Zdd Zdd Zdd Zdd Zdd Z	dddZ  ZS )RoIHeads)	box_coderproposal_matcherfg_bg_samplerNc                    s   t    tj| _tj||dd| _t||| _	|d u rd}t
|| _|| _|| _|| _|	| _|
| _|| _|| _|| _|| _|| _|| _|| _d S )NF)allow_low_quality_matches)      $@r         @r   )super__init__box_opsbox_ioubox_similarity	det_utilsMatcherr   BalancedPositiveNegativeSamplerr   BoxCoderr   box_roi_poolbox_headbox_predictorscore_thresh
nms_threshdetections_per_imgmask_roi_pool	mask_headmask_predictorkeypoint_roi_poolkeypoint_headkeypoint_predictor)selfr	  r
  r  fg_iou_threshbg_iou_threshbatch_size_per_imagepositive_fractionbbox_reg_weightsr  r  r  r  r  r  r  r  r  	__class__r'   r(   r    s&   

zRoIHeads.__init__c                 C   .   | j d u rdS | jd u rdS | jd u rdS dS NFT)r  r  r  r  r'   r'   r(   has_mask&     


zRoIHeads.has_maskc                 C   r  r  )r  r  r  r  r'   r'   r(   has_keypoint/  r!  zRoIHeads.has_keypointc                 C   s   g }g }t |||D ]c\}}}| dkr3|j}	tj|jd ftj|	d}
tj|jd ftj|	d}n0t||}| 	|}|j
dd}
||
 }|jtjd}|| j	jk}d||< || j	jk}d||< ||
 || q
||fS )Nr   r   r   re   r   )rI   r   r2   r   r   r   rm   r  r  r   r   r<   BELOW_LOW_THRESHOLDBETWEEN_THRESHOLDSr   )r  rL   gt_boxesrM   r>   r   proposals_in_imagegt_boxes_in_imagegt_labels_in_imager2   clamped_matched_idxs_in_imagelabels_in_imagematch_quality_matrixmatched_idxs_in_imagebg_indsignore_indsr'   r'   r(   assign_targets_to_proposals8  s*   

z$RoIHeads.assign_targets_to_proposalsc           	      C   sN   |  |\}}g }tt||D ]\}\}}t||B d }|| q|S r   )r   	enumeraterI   r   r   r   )	r  r   sampled_pos_indssampled_neg_indssampled_indsimg_idxpos_inds_imgneg_inds_imgimg_sampled_indsr'   r'   r(   	subsample[  s   zRoIHeads.subsamplec                 C   s   dd t ||D }|S )Nc                 S   s   g | ]\}}t ||fqS r'   )r   r   )r-   proposalgt_boxr'   r'   r(   r/   f  s    z-RoIHeads.add_gt_proposals.<locals>.<listcomp>)rI   )r  rL   r%  r'   r'   r(   add_gt_proposalsd  s   zRoIHeads.add_gt_proposalsc                 C   sn   |d u rt dtdd |D st dtdd |D s"t d|  r3tdd |D s5t dd S d S )	Ntargets should not be Nonec                 S      g | ]}d |v qS r   r'   r-   tr'   r'   r(   r/   n      z*RoIHeads.check_targets.<locals>.<listcomp>z0Every element of targets should have a boxes keyc                 S   r=  r   r'   r?  r'   r'   r(   r/   p  rA  z1Every element of targets should have a labels keyc                 S   r=  r   r'   r?  r'   r'   r(   r/   s  rA  z0Every element of targets should have a masks key)r   allr   )r  targetsr'   r'   r(   check_targetsj  s   zRoIHeads.check_targetsc                    s  |  | |d u rtd|d j |d j} fdd|D }dd |D }| ||}| |||\}}| |}g }	t|}
t|
D ]9}|| }|| | ||< || | ||< || | ||< || }|	 dkrut
jd |d}|	|||   qE| j|	|}||||fS )Nr<  r   c                    s   g | ]	}|d    qS r>  )r<   r?  re   r'   r(   r/     s    z4RoIHeads.select_training_samples.<locals>.<listcomp>c                 S      g | ]}|d  qS rB  r'   r?  r'   r'   r(   r/     rA  )r
   r   r   )rF  r   rf   r2   r;  r/  r8  r   r   r   r   r   r   r   encode)r  rL   rE  r2   r%  rM   r>   r   r3  matched_gt_boxes
num_imagesimg_idr7  r'  r    r'   re   r(   select_training_samplesv  s.   



z RoIHeads.select_training_samplesc                 C   s  |j }|jd }dd |D }| j||}t|d}	||d}
|	|d}g }g }g }t|
||D ]\}}}t	||}t
j||d}|dd|}|d d dd f }|d d dd f }|d d dd f }|dd}|d}|d}t
|| jkd }|| || || }}}tj|dd	}|| || || }}}t|||| j}|d | j }|| || || }}}|| || || q4|||fS )
Nr   c                 S   r*   r+   r,   )r-   boxes_in_imager'   r'   r(   r/     r0   z3RoIHeads.postprocess_detections.<locals>.<listcomp>r   r1   r
   r   g{Gz?)min_size)r2   r   r   decoder   softmaxr5   rI   r  clip_boxes_to_imager   r4   rv   	expand_asr   r   r  remove_small_boxesbatched_nmsr  r  r   )r  r   r   rL   image_shapesr2   r%   r9   
pred_boxespred_scorespred_boxes_listpred_scores_list	all_boxes
all_scores
all_labelsr   r   image_shaper   indskeepr'   r'   r(   postprocess_detections  s>   





zRoIHeads.postprocess_detectionsc           ,      C   sh  |durJ|D ]C}t jt jt jf}|d j|vr!td|d j |d jt jks3td|d j |  rI|d jt jksItd|d j q| j	rX| 
||\}}}}	nd}d}	d}| |||}
| |
}
| |
\}}g }i }| j	r|du rtd|	du rtd	t||||	\}}||d
}n%| ||||\}}}t|}t|D ]}||| || || d q|  rsdd |D }| j	r|du rtdt|}g }g }t|D ]}t || dkd }||| |  ||| |  qnd}| jdur| |||}| |}| |}ntdi }| j	rS|du s4|du s4|du r8tddd |D }dd |D }t|||||}d|i}ndd |D }t||}t||D ]	\} }!| |!d< qd|| | jdur0| jdur0| j dur0dd |D }"| j	rt|}g }"g }|du rtdt|D ] }t || dkd }|"|| |  ||| |  qnd}| ||"|}#| |#}#|  |#}$i }%| j	r|du s|du rtddd |D }&t!|$|"|&|}'d|'i}%n*|$du s|"du rtdt"|$|"\}(})t|(|)|D ]\}*}+}!|*|!d< |+|!d< q||% ||fS )z
        Args:
            features (List[Tensor])
            proposals (List[Tensor[N, 4]])
            image_shapes (List[Tuple[H, W]])
            targets (List[Dict])
        Nr   z-target boxes must of float type, instead got r   z.target labels must of int64 type, instead got rV   z1target keypoints must of float type, instead got zlabels cannot be Nonez!regression_targets cannot be None)loss_classifierloss_box_reg)r   r   r   c                 S   rG  r>  r'   r-   rE   r'   r'   r(   r/     rA  z$RoIHeads.forward.<locals>.<listcomp>z/if in training, matched_idxs should not be Noner   z%Expected mask_roi_pool to be not NonezCtargets, pos_matched_idxs, mask_logits cannot be None when trainingc                 S   rG  rC  r'   r?  r'   r'   r(   r/   /  rA  c                 S   rG  rB  r'   r?  r'   r'   r(   r/   0  rA  	loss_maskc                 S   rG  rB  r'   )r-   rr'   r'   r(   r/   4  rA  r   c                 S   rG  r>  r'   rc  r'   r'   r(   r/   B  rA  z0if in trainning, matched_idxs should not be NonezJboth targets and pos_matched_idxs should not be None when in training modec                 S   rG  )rV   r'   r?  r'   r'   r(   r/   [  rA  loss_keypointzXboth keypoint_logits and keypoint_proposals should not be None when not in training modekeypoints_scores)#r   r   doublehalfrf   	TypeErrorrm   r"  rr   trainingrL  r	  r
  r  r   r)   r`  r   r   r   r   r   r  r  r  	ExceptionrQ   r;   rI   updater  r  r  r   r   ),r  featuresrL   rU  rE  r@  floating_point_typesr>   r   r    box_featuresr   r   resultlossesra  rb  r   r   rJ  rF   mask_proposalspos_matched_idxsrK  r   mask_featuresrK   rd  r=   rM   rcnn_loss_maskmasks_probsr7   re  keypoint_proposalskeypoint_featuresr   rf  r   rcnn_loss_keypointkeypoints_probsr   keypoint_probkpsr'   r'   r(   forward  s   










zRoIHeads.forward)NNNNNN)N)__name__
__module____qualname__r  r  r  r  __annotations__r  r   r"  r/  r8  r;  rF  rL  r`  r~  __classcell__r'   r'   r  r(   r     s,    3		#	&@r   )r
   )*typingr   r   r   r   r   torch.nn.functionalr   
functionalr   r   r   torchvision.opsr   r  r	    r   r  r)   r;   rA   rQ   rd   r   jit_script_if_tracingr   r   r   r   r   r   unusedr   r   r   r   r   r   Moduler   r'   r'   r'   r(   <module>   s:    ( $3
@
$

	