o
    WhE                     @   s   d dl Z d dlmZmZ d dlmZ d dlZd dlmZmZm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZ ddlmZ G dd deZG dd deZG dd deZG dd deeZG dd deZdS )    N)copydeepcopy)Path)YOLOConcatDatasetbuild_groundingbuild_yolo_dataset)LoadVisualPromptcheck_det_dataset)DetectionTrainerDetectionValidator)
YOLOEModel)DEFAULT_CFGLOGGERRANK)de_parallel   )YOLOEDetectValidatorc                       sP   e Zd ZdZeddf fdd	ZdddZdd	 ZdddZ fddZ	  Z
S )YOLOETrainerz"A base trainer for YOLOE training.Nc                    s(   |du ri }d|d< t  ||| dS )aQ  
        Initialize the YOLOE Trainer with specified configurations.

        This method sets up the YOLOE trainer with the provided configuration and overrides, initializing
        the training environment, model, and callbacks for YOLOE object detection training.

        Args:
            cfg (dict): Configuration dictionary with default training settings from DEFAULT_CFG.
            overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
            _callbacks (list, optional): List of callback functions to be applied during training.
        NFoverlap_masksuper__init__selfcfg	overrides
_callbacks	__class__ W/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/models/yolo/yoloe/train.pyr      s   zYOLOETrainer.__init__Tc                 C   sN   t t|tr
|d n|| jd t| jd d|otdkd}|r%|| |S )aR  
        Return a YOLOEModel initialized with the specified configuration and weights.

        Args:
            cfg (dict | str | None): Model configuration. Can be a dictionary containing a 'yaml_file' key,
                a direct path to a YAML file, or None to use default configuration.
            weights (str | Path | None): Path to pretrained weights file to load into the model.
            verbose (bool): Whether to display model information during initialization.

        Returns:
            (YOLOEModel): The initialized YOLOE model.

        Notes:
            - The number of classes (nc) is hard-coded to a maximum of 80 following the official configuration.
            - The nc parameter here represents the maximum number of different text samples in one image,
              rather than the actual number of classes.
        	yaml_filechannelsncP   chr$   verbose)r   
isinstancedictdataminr   load)r   r   weightsr)   modelr    r    r!   	get_model(   s   

zYOLOETrainer.get_modelc                 C   "   d| _ t| j| jt| j| jdS z7Returns a DetectionValidator for YOLO model validation.)boxclsdfl)save_dirargsr   )
loss_namesr   test_loaderr7   r   r8   	callbacksr   r    r    r!   get_validatorG      zYOLOETrainer.get_validatortrainc              
   C   sH   t t| jrt| jj  ndd}t| j||| j||dk||dkdS )a  
        Build YOLO Dataset.

        Args:
            img_path (str): Path to the folder containing images.
            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
            batch (int, optional): Size of batches, this is for `rect`.

        Returns:
            (Dataset): YOLO dataset configured for training or validation.
        r       valr?   )moderectstridemulti_modal)maxintr0   r   rD   r   r8   r,   )r   img_pathrB   batchgsr    r    r!   build_datasetN   s   $zYOLOETrainer.build_datasetc                    s   t  |}|S )KProcess batch for training, moving text features to the appropriate device.)r   preprocess_batchr   rI   r   r    r!   rM   _   s   zYOLOETrainer.preprocess_batchNNTr?   N)__name__
__module____qualname____doc__r   r   r1   r=   rK   rM   __classcell__r    r    r   r!   r      s    

r   c                   @   s   e Zd ZdZdddZdS )YOLOEPETrainerz,Fine-tune YOLOE model in linear probing way.NTc                 C   s:  t t|tr
|d n|| jd | jd |otdkd}|jd `|dus(J d|r/|| |  t	| jd 
 }||}||| |jd |j t|jd jd	 d
 d|jd jd	 d
< t|jd jd d
 d|jd jd d
< t|jd jd
 d
 d|jd jd
 d
< |`|  |S )a  
        Return YOLOEModel initialized with specified config and weights.

        Args:
            cfg (dict | str, optional): Model configuration.
            weights (str, optional): Path to pretrained weights.
            verbose (bool): Whether to display model information.

        Returns:
            (YOLOEModel): Initialized model with frozen layers except for specific projection layers.
        r"   r#   r$   r&   r'   Nz7Pretrained weights must be provided for linear probing.namesr      Tr   )r   r*   r+   r,   r   r0   savper.   evallistvaluesget_text_peset_classesfuseper   cv3requires_grad_r?   )r   r   r/   r)   r0   rW   tper    r    r!   r1   h   s*   



...zYOLOEPETrainer.get_modelrO   )rQ   rR   rS   rT   r1   r    r    r    r!   rV   e   s    rV   c                       sl   e Zd ZdZeddf fdd	ZdddZdd	 Z fd
dZdddZ	dd Z
dd Z fddZ  ZS )YOLOETrainerFromScratchz Train YOLOE models from scratch.Nc                    s    |du ri }t  ||| dS )a  
        Initialize the YOLOETrainerFromScratch class.

        This class extends YOLOETrainer to train YOLOE models from scratch. It inherits all functionality from
        the parent class while providing specialized initialization for training without pre-trained weights.

        Args:
            cfg (dict, optional): Configuration dictionary with training parameters. Defaults to DEFAULT_CFG.
            overrides (dict, optional): Dictionary of parameter overrides for configuration.
            _callbacks (list, optional): List of callback functions to be executed during training.

        Examples:
            >>> from ultralytics.models.yoloe.train import YOLOETrainerFromScratch
            >>> trainer = YOLOETrainerFromScratch()
            >>> trainer.train()
        Nr   r   r   r    r!   r      s   z YOLOETrainerFromScratch.__init__r?   c              	      s   t tjrtjj  ndd|dkr#tj| j|ddS  fdd|D }|  t	|dkr>t
|S |d S )	a  
        Build YOLO Dataset for training or validation.

        This method constructs appropriate datasets based on the mode and input paths, handling both
        standard YOLO datasets and grounding datasets with different formats.

        Args:
            img_path (List[str] | str): Path to the folder containing images or list of paths.
            mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
            batch (int, optional): Size of batches, used for rectangular training/validation.

        Returns:
            (YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
        r   r@   r?   F)rB   rC   rD   c              
      sN   g | ]#}t |trtj| j| d dntj|d |d  dqS )T)rD   rE   rH   	json_file)rD   )r*   strr   r8   training_datar   ).0im_pathrI   rJ   r   r    r!   
<listcomp>   s    z9YOLOETrainerFromScratch.build_dataset.<locals>.<listcomp>r   )rF   rG   r0   r   rD   r   r8   r,   set_text_embeddingslenr   )r   rH   rB   rI   datasetsr    rj   r!   rK      s   $z%YOLOETrainerFromScratch.build_datasetc                 C   sP   t  }|D ]}t|dsq||jO }q|d j}| j||t|jd d| _dS )a  
        Set text embeddings for datasets to accelerate training by caching category names.

        This method collects unique category names from all datasets, then generates and caches text embeddings
        for these categories to improve training efficiency.

        Args:
            datasets (List[Dataset]): List of datasets from which to extract category names.
            batch (int | None): Batch size used for processing.

        Notes:
            This method collects category names from datasets that have the 'category_names' attribute,
            then uses the first dataset's image path to determine where to cache the generated text embeddings.
        category_namesr   ztext_embeddings.pt)
cache_pathN)sethasattrro   rH   generate_text_embeddingsr   parenttext_embeddings)r   rn   rI   ro   datasetrH   r    r    r!   rl      s   

z+YOLOETrainerFromScratch.set_text_embeddingsc                    sf   t  |}ttj|d  }t fdd|D  j}|	t
|d d|jd }||d< |S )rL   textsc                    s   g | ]} j | qS r    )ru   )rh   textr<   r    r!   rk      s    z<YOLOETrainerFromScratch.preprocess_batch.<locals>.<listcomp>r&   	txt_feats)r   rM   r[   	itertoolschaintorchstacktodevicereshaperm   shape)r   rI   rw   ry   r   r<   r!   rM      s    z(YOLOETrainerFromScratch.preprocess_batchembeddings.ptc                 C   sV   |  r	t|S | jdusJ | jj||dd}tt||d}t|| |S )am  
        Generate text embeddings for a list of text samples.

        Args:
            texts (List[str]): List of text samples to encode.
            batch (int): Batch size for processing.
            cache_path (str | Path): Path to save/load cached embeddings.

        Returns:
            (dict): Dictionary mapping text samples to their embeddings.
        NT)without_reprtar   )	existsr|   r.   r0   r]   r+   zipsqueezesave)r   rw   rI   rp   ry   txt_mapr    r    r!   rs      s   
z0YOLOETrainerFromScratch.generate_text_embeddingsc                    s  i }| j j}|ddsJ d|ddsJ ddd | D }t|d dks7J d	t|d  d
d|d d d v rCdnd|d D ]}|ddu rSqIt|d |d  |d< qIdD ]A  fdd|  D | < |  d}|du r~qbt|tr|n|g}|D ]}t|tsJ dt	| q|   |7  < qb|d d d |d< |d d d |d< |d d d |d< || _| j j
rtd ddi| jd< d| jd< i | _|d D ]}| j j
rddi|d< d|d< || j|d < q|d |d d fS )a  
        Get train and validation paths from data dictionary.

        Processes the data configuration to extract paths for training and validation datasets,
        handling both YOLO detection datasets and grounding datasets.

        Returns:
            (str): Train dataset path.
            (str): Validation dataset path.

        Raises:
            AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
        r?   Fztrain dataset not foundrA   zvalidation dataset not foundc                 S   s(   i | ]\}}|d d | dg D qS )c                 S   s   g | ]}t |qS r    r	   rh   dr    r    r!   rk     s    zBYOLOETrainerFromScratch.get_dataset.<locals>.<dictcomp>.<listcomp>	yolo_data)get)rh   kvr    r    r!   
<dictcomp>  s   ( z7YOLOETrainerFromScratch.get_dataset.<locals>.<dictcomp>r   z6Only support validating on 1 dataset for now, but got .lvisr   minivalNpath)r?   rA   c                    s    g | ]}| d krd n qS )r?   r    r   s	val_splitr    r!   rk     s     z7YOLOETrainerFromScratch.get_dataset.<locals>.<listcomp>grounding_dataz:Grounding data should be provided in dict format, but got r$   rW   z)Overriding class names with single class.object)r8   r,   r   itemsrm   rf   r*   r[   r+   type
single_clsr   inforg   )r   
final_data	data_yamlr,   r   r   gr    r   r!   get_dataset   sF   (

z#YOLOETrainerFromScratch.get_datasetc                 C      dS )z+Do not plot labels for YOLO-World training.Nr    r<   r    r    r!   plot_training_labels5  s   z,YOLOETrainerFromScratch.plot_training_labelsc                    sH   | j jd d d }|| jj _t|trd|v rdnd| jj _t  S )z
        Perform final evaluation on the validation dataset.

        Configures the validator with the appropriate dataset and split before running evaluation.

        Returns:
            (dict): Evaluation metrics.
        rA   r   r   r   r   )r8   r,   	validatorr*   rf   splitr   
final_eval)r   rA   r   r    r!   r   9  s   	
 
z"YOLOETrainerFromScratch.final_evalrP   )r   )rQ   rR   rS   rT   r   r   rK   rl   rM   rs   r   r   r   rU   r    r    r   r!   rd      s    


5rd   c                       s0   e Zd ZdZdd Z fddZdd Z  ZS )YOLOEPEFreeTrainerzTrain prompt-free YOLOE model.c                 C   r2   r3   )r9   r   r:   r7   r   r8   r;   r<   r    r    r!   r=   K  r>   z YOLOEPEFreeTrainer.get_validatorc                    s   t t| |}|S )zaPreprocesses a batch of images for YOLOE training, adjusting formatting and dimensions as needed.)r   r   rM   rN   r   r    r!   rM   R  s   z#YOLOEPEFreeTrainer.preprocess_batchc                 C   r   )a8  
        Set text embeddings for datasets to accelerate training by caching category names.

        This method collects unique category names from all datasets, generates text embeddings for them,
        and caches these embeddings to improve training efficiency. The embeddings are stored in a file
        in the parent directory of the first dataset's image path.

        Args:
            datasets (List[Dataset]): List of datasets containing category names to process.
            batch (int): Batch size for processing text embeddings.

        Notes:
            The method creates a dictionary mapping text samples to their embeddings and stores it
            at the path specified by 'cache_path'. If the cache file already exists, it will be loaded
            instead of regenerating the embeddings.
        Nr    )r   rn   rI   r    r    r!   rl   W  s   z&YOLOEPEFreeTrainer.set_text_embeddings)rQ   rR   rS   rT   r=   rM   rl   rU   r    r    r   r!   r   H  s
    r   c                       s:   e Zd ZdZd
 fdd	Z fddZ fdd	Z  ZS )YOLOEVPTrainerz&Train YOLOE model with visual prompts.r?   Nc                    sJ   t  |||}t|tr|jD ]	}|jt  q|S |jt  |S )a	  
        Build YOLO Dataset for training or validation with visual prompts.

        Args:
            img_path (List[str] | str): Path to the folder containing images or list of paths.
            mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
            batch (int, optional): Size of batches, used for rectangular training/validation.

        Returns:
            (Dataset): YOLO dataset configured for training or validation, with visual prompts for training mode.
        )r   rK   r*   r   rn   
transformsappendr   )r   rH   rB   rI   rv   r   r   r    r!   rK   n  s   

zYOLOEVPTrainer.build_datasetc                    sP   t    t| jjtr| jjjD ]	}|jt	  qdS | jjjt	  dS )zPClose mosaic augmentation and add visual prompt loading to the training dataset.N)
r   _close_dataloader_mosaicr*   train_loaderrv   r   rn   r   r   r   )r   r   r   r    r!   r     s   
z'YOLOEVPTrainer._close_dataloader_mosaicc                    s$   t  |}|d | j|d< |S )zcPreprocesses a batch of images for YOLOE training, moving visual prompts to the appropriate device.visuals)r   rM   r~   r   rN   r   r    r!   rM     s   zYOLOEVPTrainer.preprocess_batchrP   )rQ   rR   rS   rT   rK   r   rM   rU   r    r    r   r!   r   k  s
    	r   ) rz   r   r   pathlibr   r|   ultralytics.datar   r   r   ultralytics.data.augmentr   ultralytics.data.utilsr
   ultralytics.models.yolo.detectr   r   ultralytics.nn.tasksr   ultralytics.utilsr   r   r   ultralytics.utils.torch_utilsr   rA   r   r   rV   rd   r   r   r    r    r    r!   <module>   s$   Q. 6#