o
    Vhŉ                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZm Z m!Z! d dl"m#Z#m$Z$m%Z% d d	l&m'Z'm(Z(m)Z) d d
l*m+Z+ dZ,h dZ-h dZ.e/e0dd1 dkZ2de- de. Z3dd Z4d>ddZ5dd Z6dejfddZ7d d! Z8d"d# Z9d$d% Z:d?d'd(Z;d@d)d*Z<d@d+d,Z=d-e
d.e
fd/d0Z>dAd1d2Z?dBd3d4Z@G d5d6 d6ZAdCd8d9ZBd:d; ZCd<d= ZDdS )D    N)
ThreadPool)Path)
is_tarfile)ImageImageOps)check_class_names)DATASETS_DIRLOGGERNUM_THREADSROOTSETTINGS_FILETQDM	clean_urlcolorstremojisis_dir_writeable	yaml_load	yaml_save)
check_file
check_fontis_ascii)downloadsafe_download
unzip_file)segments2boxeszJSee https://docs.ultralytics.com/datasets for dataset formatting guidance.>   bmpdngjpgmpopfmpngtifheicjpegtiffwebp>   tsasfavigifm4vmkvmovmp4mpgwmvmpegwebm
PIN_MEMORYTtruezSupported formats are:
images: z	
videos: c                    s:   t j dt j t j dt j   fdd| D S )z0Define label paths as a function of image paths.imageslabelsc                    s.   g | ]} | d dd d d qS )   .r   z.txt)joinrsplit.0xsasb J/var/www/vscode/kcb/lib/python3.10/site-packages/ultralytics/data/utils.py
<listcomp>/   s   . z#img2label_paths.<locals>.<listcomp>)ossep)	img_pathsr@   r=   rA   img2label_paths,   s   &rF   
   2       c              	   C   s  | rt | dkrt| d dS t| t|t | } g }g }g }| D ]W}zLt }	t	|j
}
|t |	 d  ||
 t }	t|d}| }W d   n1 sZw   Y  t |	 }|dkrr||
d |  W q% ty|   Y q%w |st| d dS t|}t |dkrtj|dd	nd}d
t|d dd}d|dd|dd}|rt|}t |dkrtj|dd	nd}d|dd|dd}nd}||k s||k rt| d| | | d dS t| d| | | d dS )am  
    Check dataset file access speed and provide performance feedback.

    This function tests the access speed of dataset files by measuring ping (stat call) time and read speed.
    It samples up to 5 files from the provided list and warns if access times exceed the threshold.

    Args:
        files (list): List of file paths to check for access speed.
        threshold_ms (float, optional): Threshold in milliseconds for ping time warnings.
        threshold_mb (float, optional): Threshold in megabytes per second for read speed warnings.
        max_files (int, optional): The maximum number of files to check.
        prefix (str, optional): Prefix string to add to log messages.

    Examples:
        >>> from pathlib import Path
        >>> image_files = list(Path("dataset/images").glob("*.jpg"))
        >>> check_file_speeds(image_files, threshold_ms=15)
    r   z%Image speed checks: No files to checkNi  rbi   z*Image speed checks: failed to access filesr6   )ddofz, size: i   .1fz KBzping:    ±z msz, read: z MB/srJ   u   Fast image access ✅ ()zSlow image access detected (z). Use local storage instead of remote/mounted storage for better performance. See https://docs.ultralytics.com/guides/model-training-tips/)lenr	   warningrandomsamplemintimeperf_counterrC   statst_sizeappendopenread	Exceptionnpmeanstdinfo)filesthreshold_msthreshold_mb	max_filesprefix
ping_times
file_sizesread_speedsfstart	file_sizefile_obj_	read_timeavg_pingstd_pingsize_msgping_msg	avg_speed	std_speed	speed_msgr@   r@   rA   check_file_speeds2   sR   



"rv   c              	   C   sb   d}| D ]}z
|t |j7 }W q ty   Y qw tt| }|d	|   |
 S )z?Returns a single hash value of a list of paths (files or dirs).r   rJ   )rC   rW   rX   OSErrorhashlibsha256strencodeupdater8   	hexdigest)pathssizephr@   r@   rA   get_hash~   s   r   imgc                 C   sl   | j }| jdkr4z!|   }r$|dd}|dv r'|d |d f}W |S W |S W |S  ty3   Y |S w |S )z Returns exif-corrected PIL size.JPEGi  N>         r6   r   )r   formatgetexifgetr\   )r   sexifrotationr@   r@   rA   	exif_size   s"   
r   c              
   C   s\  | \\}}}d\}}}z}t |}|  t|}|d |d f}|d dk|d dk@ s5J d| d|j tv sGJ d|j dt |j d	v rt|d
*}	|	dd |		 dkrvt
t |j|dddd | | d}W d   n1 sw   Y  d}W n ty }
 zd}| | d|
 }W Y d}
~
nd}
~
ww ||f|||fS )zVerify one image.)r   r   rJ   r6   r   	   image size  <10 pixelszInvalid image format .    r   r#   rK         r   d   subsamplingquality!: corrupt JPEG restored and savedN : ignoring corrupt image/label: )r   rZ   verifyr   r   lowerIMG_FORMATSFORMATS_HELP_MSGseekr[   r   exif_transposesaver\   )argsim_fileclsre   nfncmsgimshaperi   er@   r@   rA   verify_image   s0   

($r   c                    s  | \}}}}}}}}dddddg df\}	}
}}} }z(t |}|  t|}|d |d f}|d dk|d dk@ sEJ d| d|j tv sWJ d|j d	t |j d
v rt|d*}|dd |	 dkrt
t |j|dddd | | d}W d   n1 sw   Y  tj|rd}
t|ddJ}dd |	   D }tdd |D r|stjdd |D tjd}dd |D  t|ddt fd}tj|tjd}W d   n1 sw   Y  t| }r|r/|jd d||  ksJ dd||   d|ddddf d|ddddf }n|jd dksBJ d |jd  d!|ddddf }| dks^J d"||dk  | dkspJ d#||dk   |r{d|dddf< |dddf  }||k sJ d$t| d%| d&|d  tj|dd'd(\}}t||k r|| } r fd)d|D  | | d*|t|  d+}n+d}tjd|rd||  ndftjd}nd}	tjd|rd||  ndftjd}|r-|ddddf d||}|dkr-t |d, dk |d- dk B d.d/!tj}tj||d0 gdd1}|ddddf }||| ||	|
|||f
W S  t"ym } zd}| | d2| }ddddd|	|
|||g
W  Y d}~S d}~ww )3zVerify one image-label pair.r   rJ   Nr6   r   r   r   zinvalid image format r   r   rK   r   r   r   r   r   r   r   utf-8encodingc                 S   s   g | ]
}t |r| qS r@   )rP   splitr:   r@   r@   rA   rB          z&verify_image_label.<locals>.<listcomp>c                 s   s    | ]	}t |d kV  qdS )r   N)rP   r:   r@   r@   rA   	<genexpr>   s    z%verify_image_label.<locals>.<genexpr>c                 S   s   g | ]}|d  qS )r   r@   r:   r@   r@   rA   rB          dtypec                 S   s,   g | ]}t j|d d t jdddqS )r6   Nr   r   )r]   arrayfloat32reshaper:   r@   r@   rA   rB         , r   rI   zlabels require z columns eachzlabels require 5 columns, z columns detectedz,non-normalized or out of bounds coordinates znegative label values zLabel class z exceeds dataset class count z. Possible class labels are 0-T)axisreturn_indexc                    s   g | ]} | qS r@   r@   r:   segmentsr@   rA   rB      r   : z duplicate labels removed).r   ).r6   g              ?).N)r   r   )#r   rZ   r   r   r   r   r   r   r   r[   r   r   r   rC   pathisfilestrip
splitlinesanyr]   r   r   concatenater   r   rP   r   maxrT   intuniquezeroswhereastyper\   )r   r   lb_filere   keypointnum_clsnkptndim
single_clsnmr   ner   r   	keypointsr   r   ri   lbclassesnlpointsmax_clsrm   ikpt_maskr   r@   r   rA   verify_image_label   s    
($0.&$$(&
*$r   c              
   C   s  ddl m} ddlm} tt| }|jdd \}}g }t|dd=}	|	D ]2}
t	t
|
 \}}}}}||d  | }||d  | }|| }|| }|||||t|f q(W d   n1 sew   Y  |d\}}|D ]K\}}}}}tdd	 ||d
D }|j||f||d|dd}|| d|d  d|d   d|d   }|j||d || |dk rdnd|d qs|| |  dS )a  
    Visualizes YOLO annotations (bounding boxes and class labels) on an image.

    This function reads an image and its corresponding annotation file in YOLO format, then
    draws bounding boxes around detected objects and labels them with their respective class names.
    The bounding box colors are assigned based on the class ID, and the text color is dynamically
    adjusted for readability, depending on the background color's luminance.

    Args:
        image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL.
        txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object.
        label_map (dict): A dictionary that maps class IDs (integers) to class labels (strings).

    Examples:
        >>> label_map = {0: "cat", 1: "dog", 2: "bird"}  # It should include all annotated classes details
        >>> visualize_image_annotations("path/to/image.jpg", "path/to/annotations.txt", label_map)
    r   N)colorsr   r   r   r6   c                 s   s    | ]}|d  V  qdS )   Nr@   )r;   cr@   r@   rA   r         z.visualize_image_annotations.<locals>.<genexpr>Tnone)	linewidth	edgecolor	facecolorgz6?g,C?g]m{?rI   g      ?whiteblack)colorbackgroundcolor)matplotlib.pyplotpyplotultralytics.utils.plottingr   r]   r   r   rZ   r   mapfloatr   rY   r   subplotstuple	Rectangle	add_patchtextimshowshow)
image_pathtxt_path	label_mappltr   r   
img_height	img_widthannotationsfilelineclass_idx_centery_centerwidthheightr<   ywr   figaxlabelr   rect	luminancer@   r@   rA   visualize_image_annotations   s0   
$*
r  r6   c                 C   sp   t j| t jd}t j|t jd}||jd ddf}tj|||d | d | | d | }}t	|||fS )a  
    Convert a list of polygons to a binary mask of the specified image size.

    Args:
        imgsz (tuple): The size of the image as (height, width).
        polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
                                     N is the number of polygons, and M is the number of points such that M % 2 = 0.
        color (int, optional): The color value to fill in the polygons on the mask.
        downsample_ratio (int, optional): Factor by which to downsample the mask.

    Returns:
        (np.ndarray): A binary mask of the specified image size with the polygons filled in.
    r   r   r   r   )r   r6   )
r]   r   uint8asarrayint32r   r   cv2fillPolyresize)imgszpolygonsr   downsample_ratiomasknhnwr@   r@   rA   polygon2mask'  s   r  c                    s   t  fdd|D S )a  
    Convert a list of polygons to a set of binary masks of the specified image size.

    Args:
        imgsz (tuple): The size of the image as (height, width).
        polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
                                     N is the number of polygons, and M is the number of points such that M % 2 = 0.
        color (int): The color value to fill in the polygons on the masks.
        downsample_ratio (int, optional): Factor by which to downsample each mask.

    Returns:
        (np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
    c                    s"   g | ]}t |d g qS )r   )r  r   r:   r   r  r  r@   rA   rB   L     " z"polygons2masks.<locals>.<listcomp>)r]   r   )r  r  r   r  r@   r  rA   polygons2masks>  s   r  c           
      C   s   t j| d | | d | ft|dkrt jnt jd}g }g }tt|D ] }t| || dg|dd}||	|j
 ||  q%t |}t | }t || }tt|D ]}	||	 |	d  }|| }t j|d|	d d}q^||fS )z!Return a (640, 640) overlap mask.r   r6   r   r   r   )r  r   )a_mina_max)r]   r   rP   r  r  ranger  r   rY   r   r   sumr  argsortr   clip)
r  r   r  masksareasmssir  indexr   r@   r@   rA   polygons2masks_overlapO  s$   
r'  r   returnc                    s   t  dpt  d}|sJ d   dt|dkr) fdd|D }t|dks@J d   dt| d	| |d
 S )a  
    Find and return the YAML file associated with a Detect, Segment or Pose dataset.

    This function searches for a YAML file at the root level of the provided directory first, and if not found, it
    performs a recursive search. It prefers YAML files that have the same stem as the provided path.

    Args:
        path (Path): The directory path to search for the YAML file.

    Returns:
        (Path): The path of the found YAML file.
    z*.yamlzNo YAML file found in ''r6   c                    s   g | ]
}|j  j kr|qS r@   )stemr;   ri   r   r@   rA   rB   u  r   z%find_dataset_yaml.<locals>.<listcomp>zExpected 1 YAML file in 'z', but found z.
r   )listglobrglobresolverP   )r   ra   r@   r,  rA   find_dataset_yamle  s   .r1  c                    s  t | }d}t|st|r#t|tddd}tt| }|jd}}t|dd dD ]&}| vrQ|dks9d vrEt	t
|  d	| d
td  d d< q+d vrcd vrct	t
|  dd v rd v rt d  d krt	t
|  dt d  d d  dd vrdd t d D  d< nt d  d< t d  d<  dd d< t|pĈ dpt ddj st   d< dD ]E} |rt | tr |   }| s | dr | dd   }t| |< qևfdd | D  |< qև fdddD \}}|rdd t|tr5|n|gD }td d |D st| }	td d!|	 d"d#d |D d$  d%}
|ri|rit|
 n|
d&t d't d%7 }
t|
t }d}|d(r| d)rt|tdd* n|d+rtd,| d- t!"|}nt#|d. i d/t$t | d0 d1}|d2v rd3| d4t%d5t nd6| d7}td8| d9 t&t' d rd:  S d;  S )<ah  
    Download, verify, and/or unzip a dataset if not found locally.

    This function checks the availability of a specified dataset, and if not found, it has the option to download and
    unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
    resolves paths related to the dataset.

    Args:
        dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
        autodownload (bool, optional): Whether to automatically download the dataset if not found.

    Returns:
        (dict): Parsed dataset information and paths.
    rJ   TFdirunzipdelete)append_filename)trainvalr8  
validation 'uE   :' key missing ❌.
'train' and 'val' are required in all data YAMLs.zBrenaming data YAML 'validation' key to 'val' to match YOLO format.namesr   uI    key missing ❌.
 either 'names' or 'nc' are required in all data YAMLs.z 'names' length z
 and 'nc: z' must match.c                 S   s   g | ]}d | qS )class_r@   )r;   r   r@   r@   rA   rB     s    z%check_det_dataset.<locals>.<listcomp>channels   r   	yaml_file)r7  r8  testminivalz../Nc                    s   g | ]
}t  |  qS r@   )rz   r0  r:   r,  r@   rA   rB     r   c                 3   s    | ]}  |V  qd S N)r   r:   )datar@   rA   r     s    z$check_det_dataset.<locals>.<genexpr>)r8  r   c                 S   s   g | ]}t | qS r@   )r   r0  r:   r@   r@   rA   rB         c                 s   s    | ]}|  V  qd S rB  existsr:   r@   r@   rA   r     r   z	Dataset 'z"' images not found, missing path 'c                 S   s   g | ]}|  s|qS r@   rE  r:   r@   r@   rA   rB     rD  r   r)  z%
Note dataset download directory is 'z'. You can update this in 'http.zip)urlr3  r5  bash zRunning z ...yaml(r6   zs)>   Nr   u   success ✅ z, saved to boldzfailure u    ❌zDataset download 
z	Arial.ttfzArial.Unicode.ttf)(r   zipfile
is_zipfiler   r   r   r1  parentr   SyntaxErrorr   r	   rQ   poprP   r  r   r   r   is_absoluter0  
isinstancerz   rF  
startswithr-  allr   r`   r   FileNotFoundErrorrU   endswithrC   systemexecroundr   r   r   )datasetautodownloadr   extract_dirnew_dirkr<   r8  r   namemtrdtr@   )rC  r   rA   check_det_datasetz  s   
$*$ 
 ,rg  c                 C   sn  t | drt| tddd} nt| jdv r#t| }t|tddd} t| } |  r-| nt|   }| s~t	
d t	d| d t }t | d	kr]tjd
td  ddd nd|  d}t||jd t	
dt | ddtd| d |d }| st	d|  t|dt|d }|rddlm} t	
dt| d ||dd}|d }n	t	d| d  |d!  r|d! n|d"  r|d" nd#}	|d$  r|d$ nd#}
|d!kr|	st	d% |
}	n|d$kr|
st	d& |	}
td'd( |d d)D }d*d( |d  D }ttt|}||	|
d+ D ]\}}t| d, d-| d.}|d#u rHt	
| q+d/d( |d0D }t|}td1d2 |D }|dkr|dkrst |  d3| d4t	| d5| d6| d7 q+||krt	| d5| d6| d8| d9| d:
 q+t	
| d5| d6| d; q+||	|
||d<d=S )>a  
    Checks a classification dataset such as Imagenet.

    This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information.
    If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.

    Args:
        dataset (str | Path): The name of the dataset.
        split (str, optional): The split of the dataset. Either 'val', 'test', or ''.

    Returns:
        (dict): A dictionary containing the following keys:

            - 'train' (Path): The directory path containing the training set of the dataset.
            - 'val' (Path): The directory path containing the validation set of the dataset.
            - 'test' (Path): The directory path containing the test set of the dataset.
            - 'nc' (int): The number of classes in the dataset.
            - 'names' (dict): A dictionary of class names in the dataset.
    )zhttp:/zhttps:/TFr2  >   .gz.tarrH  rJ   z Dataset not found, missing path z, attempting download...imagenetrJ  zdata/scripts/get_imagenet.sh)shellcheckz?https://github.com/ultralytics/assets/releases/download/v0.0.0/rH  )r3  u   Dataset download success ✅ (rM   zs), saved to rM  rN  r7  z#Dataset 'split=train' not found at z*.jpgz*.pngr   )split_classify_datasetzFound z1 images in subdirectories. Attempting to split...g?)train_ratiozNo images found in z or its subdirectories.r8  r9  Nr@  z:Dataset 'split=val' not found, using 'split=test' instead.z:Dataset 'split=test' not found, using 'split=val' instead.c                 S   s   g | ]}|  r|qS r@   )is_dirr:   r@   r@   rA   rB     rD  z%check_cls_dataset.<locals>.<listcomp>*c                 S   s   g | ]	}|  r|jqS r@   )ro  rb  r:   r@   r@   rA   rB     s    r7  r8  r@  : ...c                 S   &   g | ]}|j d d  tv r|qS r6   Nsuffixr   r   )r;   r   r@   r@   rA   rB   %     & *.*c                 S   s   h | ]}|j qS r@   )rQ  )r;   r   r@   r@   rA   	<setcomp>'  s    z$check_cls_dataset.<locals>.<setcomp>r:  z:' no training images foundz found z images in z classes (no images found)z classes (requires z classes, not rO   u    classes ✅ r>  )r7  r8  r@  r   r;  r=  )!rz   rV  r   r   r   rx  r   ro  r0  r	   r`   rQ   rU   
subprocessrunr   r   rQ  r   r-  r/  ultralytics.data.splitrm  rP   errorrF  r.  iterdirdict	enumeratesorteditemsrX  )r]  r   r   data_dirrd  rI  	train_setimage_filesrm  val_settest_setr   r;  ra  vre   ra   r   ndr@   r@   rA   check_cls_dataset  sr   
(









 
, r  c                   @   s@   e Zd ZdZdddZedd Zd	d
 ZdddZdd Z	dS )HUBDatasetStatsa  
    A class for generating HUB dataset JSON and `-hub` dataset directory.

    Args:
        path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
        task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
        autodownload (bool): Attempt to download dataset if not found locally. Default is False.

    Note:
        Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
        i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.

    Examples:
        >>> from ultralytics.data.utils import HUBDatasetStats
        >>> stats = HUBDatasetStats("path/to/coco8.zip", task="detect")  # detect dataset
        >>> stats = HUBDatasetStats("path/to/coco8-seg.zip", task="segment")  # segment dataset
        >>> stats = HUBDatasetStats("path/to/coco8-pose.zip", task="pose")  # pose dataset
        >>> stats = HUBDatasetStats("path/to/dota8.zip", task="obb")  # OBB dataset
        >>> stats = HUBDatasetStats("path/to/imagenet10.zip", task="classify")  # classification dataset
        >>> stats.get_json(save=True)
        >>> stats.process_images()
    
coco8.yamldetectFc           
   
   C   s   t | }td| d || _| jdkr$t|}t|}||d< n4| t |\}}}zt|}d|d< t	|| t
||}||d< W n tyW }	 ztd|	d}	~	ww t |d  d| _| jd	 | _t|d
 t|d
  d| _|| _dS )zInitialize class.z Starting HUB dataset checks for z....classifyr   rJ   zerror/HUB/dataset_stats/initNz-hubr4   r;  )r   r;  )r   r0  r	   r`   taskr   r  _unzipr   r   rg  r\   hub_dirim_dirrP   r-  valuesstatsrC  )
selfr   r  r^  	unzip_dirrC  rm   r  	yaml_pathr   r@   r@   rA   __init__M  s,   




 
zHUBDatasetStats.__init__c                 C   sV   t | dsdd| fS t| | jd}| s"J d|  d| ddt |t|fS )	zUnzip data.zip.rH  FNr,  zError unzipping z, z6 not found. path/to/abc.zip MUST unzip to path/to/abc/T)rz   rY  r   rQ  ro  r1  )r   r  r@   r@   rA   r  h  s   

zHUBDatasetStats._unzipc                 C   s   t || jt|j  dS )z*Saves a compressed image for HUB previews.N)compress_one_imager  r   rb  )r  ri   r@   r@   rA   _hub_opss  s   zHUBDatasetStats._hub_opsc              	      s@  fdd dD ]}dj |< j|}|du rqdd t|dD }|s)qjdkrud	d
lm} |j| }t	t
|jt}|jD ]}	||	d   d7  < qIt
|| dt
|d	| ddd |jD dj |< qd	dlm}
 |
j| jjd}tfddt|jt
|ddD }t| |d	 dt
|tt|d	kd |d	kd	 d fddt|j|jD dj |< q|rjjddd jd }td|  d t|ddd}t j | W d   n	1 s	w   Y  |rttj!j ddd  j S )!z(Return dataset JSON for Ultralytics HUB.c                    s    j dkr
| d }n8 j dv rdd | d D }n) j dkr9| d j\}}}t| d | d ||| fd	}n	td
 j  dt| d |}dd |D S )z:Update labels to integer class and 4 decimal place floats.r  bboxes>   obbsegmentc                 S   s   g | ]}|  qS r@   )flattenr:   r@   r@   rA   rB     r   z<HUBDatasetStats.get_json.<locals>._round.<locals>.<listcomp>r   poser   r6   zUndefined dataset task=r7   r   c                 S   s,   g | ]\}}t |d  gdd |D qS )r   c                 s   s    | ]
}t t|d V  qdS )   N)r\  r   r:   r@   r@   rA   r     s    zFHUBDatasetStats.get_json.<locals>._round.<locals>.<listcomp>.<genexpr>)r   )r;   r   r   r@   r@   rA   rB     r   )r  r   r]   r   r   
ValueErrorzip)r5   coordinatesnnkr  zippedr  r@   rA   _roundz  s   



&z(HUBDatasetStats.get_json.<locals>._roundrq  Nc                 S   ru  rv  rw  r+  r@   r@   rA   rB     ry  z,HUBDatasetStats.get_json.<locals>.<listcomp>rz  r  r   )ImageFolderr6   )total	per_class)r  
unlabelledr  c                 S   s   g | ]\}}t |j|iqS r@   r   rb  r;   ra  r  r@   r@   rA   rB     s    )instance_statsimage_statsr5   YOLODataset)img_pathrC  r  c                    s.   g | ]}t j|d  t  jd dqS )r   r   )	minlength)r]   bincountr   r   r  rC  )r;   r  r  r@   rA   rB     s     
Statisticsr  descc                    s"   g | ]\}}t |j |iqS r@   r  r  )r  r@   rA   rB     r  Tparentsexist_okz
stats.jsonzSaving rt  r  r   r   r   F)indent	sort_keys)"r  rC  r   r   r/  r  torchvision.datasetsr  r]   r   rP   r   r   r   imgstolistultralytics.datar  r   r   r5   r  rW  r  im_filesr  mkdirr	   r`   r0  rZ   jsondumpdumps)r  r   verboser   r   ra   r  r]  r<   r   r  
stats_pathri   r@   )r  r  rA   get_jsonw  sV   




zHUBDatasetStats.get_jsonc              	   C   s   ddl m} | jjddd dD ]>}| j|du rq|| j| | jd}tt}t|	| j
|jt|| dd	D ]}q<W d   n1 sIw   Y  qtd
| j  | jS )z$Compress images for Ultralytics HUB.r   r  Tr  rq  N)r  rC  z imagesr  zDone. All images saved to )r  r  r  r  rC  r   r   r
   r   imapr  r  rP   r	   r`   )r  r  r   r]  poolrm   r@   r@   rA   process_images  s   
(zHUBDatasetStats.process_imagesN)r  r  F)FF)
__name__
__module____qualname____doc__r  staticmethodr  r  r  r  r@   r@   r@   rA   r  5  s    



Jr    c           	   
   C   s  z1t | }|t|j|j }|dk r$|t|j| t|j| f}|j|p)| d|dd W dS  ty } zHt	
d|  d|  t| }|jdd \}}|t|| }|dk rptj|t|| t|| ftjd	}tt|pv| | W Y d}~dS d}~ww )
a=  
    Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
    Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
    resized.

    Args:
        f (str): The path to the input image file.
        f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
        max_dim (int, optional): The maximum dimension (width or height) of the output image.
        quality (int, optional): The image compression quality as a percentage.

    Examples:
        >>> from pathlib import Path
        >>> from ultralytics.data.utils import compress_one_image
        >>> for f in Path("path/to/dataset").rglob("*.jpg"):
        >>>    compress_one_image(f)
    r   r   T)r   optimizezHUB ops PIL failure r   Nr   )interpolation)r   rZ   r   r  r  r  r   r   r\   r	   rQ   r  imreadr   
INTER_AREAimwriterz   )	ri   f_newmax_dimr   r   re  r   	im_heightim_widthr@   r@   rA   r    s    
"
&"r  c                 C   s2   ddl }|  tjt| dd }|  |S )z1Load an Ultralytics *.cache dictionary from path.r   NT)allow_pickle)gcdisabler]   loadrz   itemenable)r   r  cacher@   r@   rA   load_dataset_cache_file  s
   r  c                 C   s   ||d< t |jr:| r|  tt|d}t|| W d   n1 s)w   Y  t	|  d|  dS t
|  d|j d dS )z9Save an Ultralytics dataset *.cache dictionary x to path.versionwbNzNew cache created: zCache directory z# is not writeable, cache not saved.)r   rQ  rF  unlinkrZ   rz   r]   r   r	   r`   rQ   )re   r   r<   r  r   r@   r@   rA   save_dataset_cache_file  s   
r  )rG   rH   rI   rJ   )r6   r6   )r6   )T)rJ   )Nr  rH   )Erx   r  rC   rR   r|  rU   rO  multiprocessing.poolr   pathlibr   tarfiler   r  numpyr]   PILr   r   ultralytics.nn.autobackendr   ultralytics.utilsr   r	   r
   r   r   r   r   r   r   r   r   r   ultralytics.utils.checksr   r   r   ultralytics.utils.downloadsr   r   r   ultralytics.utils.opsr   HELP_URLr   VID_FORMATSrz   getenvr   r2   r   rF   rv   r   r   r   r   r  r  r  r'  r1  rg  r  r  r  r  r  r@   r@   r@   rA   <module>   sR   8
LI
,



_\ 
"
