o
    VhE                     @   sv   d dl Z d dlZd dlmZ d dlmZmZ d dlmZm	Z	m
Z
 dddZed d	d
fddZedkr9ed dS dS )    N)Path)IMG_FORMATSimg2label_paths)DATASETS_DIRLOGGERTQDM皙?c                 C   sp  t | }t | d}|d |d }}|jdd |jdd |jdd dd | D }tdd	 |D }t| d
| d}td| d| d|ddd| dd	 |D ]R}	||	j jdd ||	j jdd t|		d}
t
|
 tt|
| }|
d| D ]}t|||	j |j  q|
|d D ]}t|||	j |j  qqZtd| d |S )u  
    Split dataset into train and val directories in a new directory.

    Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class
    structure with an 80/20 split by default.

    Directory structure:
        Before:
            caltech/
            ├── class1/
            │   ├── img1.jpg
            │   ├── img2.jpg
            │   └── ...
            ├── class2/
            │   ├── img1.jpg
            │   └── ...
            └── ...

        After:
            caltech_split/
            ├── train/
            │   ├── class1/
            │   │   ├── img1.jpg
            │   │   └── ...
            │   ├── class2/
            │   │   ├── img1.jpg
            │   │   └── ...
            │   └── ...
            └── val/
                ├── class1/
                │   ├── img2.jpg
                │   └── ...
                ├── class2/
                │   └── ...
                └── ...

    Args:
        source_dir (str | Path): Path to Caltech dataset root directory.
        train_ratio (float): Ratio for train split, between 0 and 1.

    Examples:
        >>> # Split dataset with default 80/20 ratio
        >>> split_classify_dataset("path/to/caltech")
        >>> # Split with custom ratio
        >>> split_classify_dataset("path/to/caltech", 0.75)
    _splittrainvalT)exist_okc                 S   s   g | ]}|  r|qS  )is_dir.0dr   r   J/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/data/split.py
<listcomp>B   s    z*split_classify_dataset.<locals>.<listcomp>c                 s   s"    | ]}t t|d V  qdS )*.*N)lenlistglobr   r   r   r   	<genexpr>C   s     z)split_classify_dataset.<locals>.<genexpr>z
 classes, z imagesz
Splitting z (z) into z.0%z train,    z val...r   NzSplit complete in u    ✅)r   mkdiriterdirsumr   r   infonamer   r   randomshuffleintshutilcopy2)
source_dirtrain_ratiosource_path
split_path
train_pathval_path
class_dirstotal_imagesstats	class_dirimage_files	split_idximgr   r   r   split_classify_dataset	   s.   /,
r1   zcoco8/images)g?g?g        Fc              	   C   s"  t | } tdd | dD }t|}td tjg d||d}g d}|D ]}| j|  r9| j| 	  q)t
d|  d	|   tt|||d
D ]?\}}	|rbt tt|	gd  rt| j||  ddd}
|
d|	| j  d  W d   n1 sw   Y  qOdS )a  
    Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.

    Args:
        path (Path, optional): Path to images directory.
        weights (list | tuple, optional): Train, validation, and test split fractions.
        annotated_only (bool, optional): If True, only images with an associated txt file are used.

    Examples:
        >>> from ultralytics.data.split import autosplit
        >>> autosplit()
    c                 s   s*    | ]}|j d d  tv r|V  qdS )r   N)suffixlowerr   )r   xr   r   r   r   i   s   ( zautosplit.<locals>.<genexpr>r   r   )r   r      )weightsk)zautosplit_train.txtzautosplit_val.txtzautosplit_test.txtzAutosplitting images from z!, using *.txt labeled images only)totalazutf-8)encodingz./
N)r   sortedrglobr   r   seedchoicesparentexistsunlinkr   r   r   zipr   stropenwriterelative_toas_posix)pathr6   annotated_onlyfilesnindicestxtr4   ir0   fr   r   r   	autosplit[   s$   
"rQ   __main__z../datasets/caltech101)r   )r   r"   pathlibr   ultralytics.data.utilsr   r   ultralytics.utilsr   r   r   r1   rQ   __name__r   r   r   r   <module>   s    
R