o
    Wh-5                     @   sp   d dl Z d dlmZ d dlm  mZ d dlmZ d dlm	Z	 d dl
mZmZ G dd dejZ	
dddZdS )    Nlinear_sum_assignment)bbox_iou)	xywh2xyxy	xyxy2xywhc                       s,   e Zd ZdZd fdd		Zdd
dZ  ZS )HungarianMatchera=  
    A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
    end-to-end fashion.

    HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
    function that considers classification scores, bounding box coordinates, and optionally, mask predictions.

    Attributes:
        cost_gain (dict): Dictionary of cost coefficients: 'class', 'bbox', 'giou', 'mask', and 'dice'.
        use_fl (bool): Indicates whether to use Focal Loss for the classification cost calculation.
        with_mask (bool): Indicates whether the model makes mask predictions.
        num_sample_points (int): The number of sample points used in mask cost calculation.
        alpha (float): The alpha factor in Focal Loss calculation.
        gamma (float): The gamma factor in Focal Loss calculation.

    Methods:
        forward: Computes the assignment between predictions and ground truths for a batch.
        _cost_mask: Computes the mask cost and dice cost if masks are predicted.
    NTF 1        ?       @c                    sJ   t    |du rdddddd}|| _|| _|| _|| _|| _|| _dS )a  
        Initialize a HungarianMatcher module for optimal assignment of predicted and ground truth bounding boxes.

        The HungarianMatcher uses a cost function that considers classification scores, bounding box coordinates,
        and optionally mask predictions to perform optimal bipartite matching between predictions and ground truths.

        Args:
            cost_gain (dict, optional): Dictionary of cost coefficients for different components of the matching cost.
                Should contain keys 'class', 'bbox', 'giou', 'mask', and 'dice'.
            use_fl (bool, optional): Whether to use Focal Loss for the classification cost calculation.
            with_mask (bool, optional): Whether the model makes mask predictions.
            num_sample_points (int, optional): Number of sample points used in mask cost calculation.
            alpha (float, optional): Alpha factor in Focal Loss calculation.
            gamma (float, optional): Gamma factor in Focal Loss calculation.
        N         )classbboxgioumaskdice)super__init__	cost_gainuse_fl	with_masknum_sample_pointsalphagamma)selfr   r   r   r   r   r   	__class__ P/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/models/utils/ops.pyr   !   s   

zHungarianMatcher.__init__c                    s  |j \}}	}
t dkrdd t|D S | d|
}| jr%t|ntj|dd}| dd}|dd|f }| jrhd| j	 || j
  d| d	    }| j	d| | j
  |d	    }|| }n| }|d|d  d}d
t|d|ddddd }| jd | | jd |  | jd |  }| jr|| | ||7 }d|| | B < |||	d }dd t| dD }tdg dd d  fddt|D S )a  
        Forward pass for HungarianMatcher. Computes costs based on prediction and ground truth and finds the optimal
        matching between predictions and ground truth based on these costs.

        Args:
            pred_bboxes (torch.Tensor): Predicted bounding boxes with shape (batch_size, num_queries, 4).
            pred_scores (torch.Tensor): Predicted scores with shape (batch_size, num_queries, num_classes).
            gt_cls (torch.Tensor): Ground truth classes with shape (num_gts, ).
            gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (num_gts, 4).
            gt_groups (List[int]): List of length equal to batch size, containing the number of ground truths for
                each image.
            masks (torch.Tensor, optional): Predicted masks with shape (batch_size, num_queries, height, width).
            gt_mask (List[torch.Tensor], optional): List of ground truth masks, each with shape (num_masks, Height, Width).

        Returns:
            (List[Tuple[torch.Tensor, torch.Tensor]]): A list of size batch_size, each element is a tuple (index_i, index_j), where:
                - index_i is the tensor of indices of the selected predictions (in order)
                - index_j is the tensor of indices of the corresponding selected ground truth targets (in order)
                For each batch element, it holds:
                    len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        r   c                 S   s,   g | ]}t jg t jd t jg t jd fqS dtypetorchtensorlong).0_r   r   r   
<listcomp>T   s   , z,HungarianMatcher.forward.<locals>.<listcomp>dim   Nr   g:0yE>      ?T)xywhGIoUr   r   r           c                 S   s   g | ]
\}}t || qS r   r   )r'   icr   r   r   r)   z   s    c                    s<   g | ]\}\}}t j|t jd t j|t jd  |  fqS r    r#   )r'   kr2   j	gt_groupsr   r   r)   |   s    
&)shapesumrangedetachviewr   Fsigmoidsoftmaxr   r   log	unsqueezeabsr   squeezer   r   
_cost_maskisnanisinfcpu	enumeratesplitr$   	as_tensorcumsum_)r   pred_bboxespred_scores	gt_bboxesgt_clsr7   masksgt_maskbsnqncneg_cost_classpos_cost_class
cost_class	cost_bbox	cost_giouCindicesr   r6   r   forward;   s:   &"
&
zHungarianMatcher.forward)NTFr   r	   r
   )NN)__name__
__module____qualname____doc__r   r\   __classcell__r   r   r   r   r      s    r   d         ?r.   Fc           "         s  |r
|dks
| du rdS | d }t |}	t|dkrdS | }
|
dkr(dn|
}
t|}| d }| d }| d }|d	|
 }|d	|
 d}|d	|
 d
}tj|	|
 tj|jd|
|	  }|dkrt	|j
|d k }t|d
}tj|d||j|jd}|||< |dkrt|}|dd	df d dd	| }t|dd	d d }t|}||  d7  < ||9 }||| 7 }|jddd t|}tj|dd}td	 |
 }|| }tj|||j
d
 |jd}tj||d|jd}tdd |D  tj fddt|
D dd}t fddtd	|
 D  ||| f< ||| f< || }tj||gtjd}d||dd|f< t|
D ]u} | dkrmd|d	 |  d	 | d  d	 | d  |f< | |
d krd|d	 |  d	 | d  d|  d	 f< qLd|d	 |  d	 | d  d	 | d  |f< d|d	 |  d	 | d  dd	 |  f< qLdd | jt|ddD |
||gd}!||j||j||j|!fS )a  
    Get contrastive denoising training group with positive and negative samples from ground truths.

    Args:
        batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape (num_gts, )), 'gt_bboxes'
            (torch.Tensor with shape (num_gts, 4)), 'gt_groups' (List[int]) which is a list of batch size length
            indicating the number of gts of each image.
        num_classes (int): Number of classes.
        num_queries (int): Number of queries.
        class_embed (torch.Tensor): Embedding weights to map class labels to embedding space.
        num_dn (int, optional): Number of denoising queries.
        cls_noise_ratio (float, optional): Noise ratio for class labels.
        box_noise_scale (float, optional): Noise scale for bounding box coordinates.
        training (bool, optional): If it's in training mode.

    Returns:
        padding_cls (Optional[torch.Tensor]): The modified class embeddings for denoising.
        padding_bbox (Optional[torch.Tensor]): The modified bounding boxes for denoising.
        attn_mask (Optional[torch.Tensor]): The attention mask for denoising.
        dn_meta (Optional[Dict]): Meta information for denoising.
    r   N)NNNNr7   r   clsbboxes	batch_idxr   r*   )r"   devicerc   .r
   r.   r1   )minmaxgư>)eps)rg   r-   c                 S   s    g | ]}t jt|t jd qS r    )r$   r%   r:   r&   )r'   numr   r   r   r)      s     z!get_cdn_group.<locals>.<listcomp>c                       g | ]} |  qS r   r   r'   r2   map_indicesmax_numsr   r   r)          r+   c                    rl   r   r   rm   rn   r   r   r)      rq   r!   Tc                 S   s   g | ]}| d qS )r*   )reshape)r'   pr   r   r   r)     s    )
dn_pos_idxdn_num_groupdn_num_split)r9   ri   lenrepeatr<   r$   aranger&   rg   randr8   nonzerorC   randint_liker"   r   	rand_likeclip_r   logitintzeroscatstackr:   boolrG   rI   listto)"batchnum_classesnum_queriesclass_embednum_dncls_noise_ratiobox_noise_scaletrainingr7   	total_num	num_grouprR   rO   gt_bboxb_idxdn_clsdn_bboxdn_b_idxneg_idxr   idx	new_label
known_bboxdiff	rand_sign	rand_partdn_cls_embedpadding_clspadding_bboxpos_idxtgt_size	attn_maskr2   dn_metar   rn   r   get_cdn_group   sz     
""
4444


r   )rb   rc   r.   F)r$   torch.nnnntorch.nn.functional
functionalr=   scipy.optimizer   ultralytics.utils.metricsr   ultralytics.utils.opsr   r   Moduler   r   r   r   r   r   <module>   s    