o
    Wh                      @   s   d dl mZ d dlZd dlmZ d dlmZmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZmZ G dd deZG dd deeZdS )    )deepcopyN)
functional)YOLOConcatDatasetbuild_dataloaderbuild_yolo_dataset)LoadVisualPrompt)check_det_dataset)DetectionValidator)SegmentationValidator)YOLOEDetect)
YOLOEModel)LOGGERTQDM)select_devicesmart_inference_modec                       sJ   e Zd ZdZe dd Z fddZdd Ze d fd
d	Z  Z	S )YOLOEDetectValidatoraS  
    A mixin class for YOLOE model validation that handles both text and visual prompt embeddings.

    This mixin provides functionality to validate YOLOE models using either text or visual prompt embeddings.
    It includes methods for extracting visual prompt embeddings from samples, preprocessing batches, and
    running validation with different prompt types.

    Attributes:
        device (torch.device): The device on which validation is performed.
        args (namespace): Configuration arguments for validation.
        dataloader (DataLoader): DataLoader for validation data.
    c              	   C   s  t |tsJ dd t|jjd  D }tjt||j	d j
| jd}tt|}d}|D ]}|d dtj }tj|t|d}	||	7 }q0|| j}t|t||d	}
|
D ]b}| |}|j|d
 |d d}|d }t|jd D ]D}|d ||k dtjjdd}tj|jd | jdd }||dt|< |D ]}||  || ||k d||  7  < qq{q^tj||dk ddd||dk< d||dk< |dS )aa  
        Extract visual prompt embeddings from training samples.

        This function processes a dataloader to compute visual prompt embeddings for each class
        using a YOLOE model. It normalizes the embeddings and handles cases where no samples
        exist for a class.

        Args:
            dataloader (torch.utils.data.DataLoader): The dataloader providing training samples.
            model (YOLOEModel): The YOLOE model from which to extract visual prompt embeddings.

        Returns:
            (torch.Tensor): Visual prompt embeddings with shape (1, num_classes, embed_dim).
        c                 S      g | ]	}| d d qS /r   split.0name r   U/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/models/yolo/yoloe/val.py
<listcomp>2       z6YOLOEDetectValidator.get_visual_pe.<locals>.<listcomp>names)devicez)Get visual prompt embeddings from samplescls)	minlength)totaldescimgvisuals)visual	batch_idxr   T)sorted   N   )dimp)
isinstancer   listdatasetdatavaluestorchzeroslenmodelembedr    squeezetointuniquebincountr   
preprocessget_visual_perangeshapeonessumF	normalize	unsqueeze)self
dataloaderr6   r   	visual_pecls_visual_numr$   batchr!   countpbarpredsr(   ipad_clscr   r   r   r>   !   s4   

&, 
z"YOLOEDetectValidator.get_visual_pec                    s0   t  |}d|v r|d |d j|d< |S )zIPreprocess batch data, ensuring visuals are on the same device as images.r&   r%   )superr=   r9   r    )rF   rJ   	__class__r   r   r=   P   s   zYOLOEDetectValidator.preprocessc                 C   sz   t | j|| jj|d| jj|ddd}t|tr)|jD ]	}|j	t
  qn|j	t
  t|| jj| jjdddS )a  
        Create a dataloader for LVIS training visual prompt samples.

        This function prepares a dataloader for visual prompt embeddings (VPE) using the LVIS dataset.
        It applies necessary transformations and configurations to the dataset and returns a dataloader
        for validation purposes.

        Args:
            data (dict): Dataset configuration dictionary containing paths and settings.

        Returns:
            (torch.utils.data.DataLoader): The dataLoader for visual prompt samples.
        valF)moderectr   )shufflerank)r   argsgetr   rJ   r.   r   datasets
transformsappendr   r   workers)rF   r1   r0   dr   r   r   get_vpe_dataloaderW   s(   

z'YOLOEDetectValidator.get_vpe_dataloaderNFc                    s  |durN|j | _ |jj}dd t| jjjd  D }|r5td d| j	_
| | j|}||| ntd ||}||| t ||}|S |durX|sXJ dt| j	j | _ t|trrd	d
lm}	 |	|| j dd}| | j  t|p| j	j}
dd t|
d  D }|rtd d| j	_
| |
}| ||}||| t jt|d}|S t|jd trt|jd drt ||S td ||}||| t jt|d}|S )a9  
        Run validation on the model using either text or visual prompt embeddings.

        This method validates the model using either text prompts or visual prompts, depending
        on the `load_vp` flag. It supports validation during training (using a trainer object)
        or standalone validation with a provided model.

        Args:
            trainer (object, optional): Trainer object containing the model and device.
            model (YOLOEModel, optional): Model to validate. Required if `trainer` is not provided.
            refer_data (str, optional): Path to reference data for visual prompts.
            load_vp (bool): Whether to load visual prompts. If False, text prompts are used.

        Returns:
            (dict): Validation statistics containing metrics computed during validation.
        Nc                 S   r   r   r   r   r   r   r   r      r   z1YOLOEDetectValidator.__call__.<locals>.<listcomp>r   z!Validate using the visual prompt.FzValidate using the text prompt.z5Refer data is only used for visual prompt validation.r   )attempt_load_weightsT)r    inplacec                 S   r   r   r   r   r   r   r   r      r   )r6   r   lrpc)r    emar/   rG   r0   r1   r2   r   inforY   halfr>   set_classesget_text_perQ   __call__r   r.   strultralytics.nn.tasksra   evalr9   r   r`   r   r6   r   hasattr)rF   trainerr6   
refer_dataload_vpr   vpetpestatsra   r1   rG   rR   r   r   ri   z   sL    





 

zYOLOEDetectValidator.__call__)NNNF)
__name__
__module____qualname____doc__r   r>   r=   r`   ri   __classcell__r   r   rR   r   r      s    
.#r   c                   @   s   e Zd ZdZdS )YOLOESegValidatorzRYOLOE segmentation validator that supports both text and visual prompt embeddings.N)rt   ru   rv   rw   r   r   r   r   ry      s    ry   )copyr   r3   torch.nnr   rC   ultralytics.datar   r   r   ultralytics.data.augmentr   ultralytics.data.utilsr   ultralytics.models.yolo.detectr	   ultralytics.models.yolo.segmentr
   ultralytics.nn.modules.headr   rk   r   ultralytics.utilsr   r   ultralytics.utils.torch_utilsr   r   r   ry   r   r   r   r   <module>   s    *