o
    Vh&                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZ d dlmZ dd	lmZmZ dd
lmZmZmZmZ ddlmZ ddlmZ deee	f deee	f deddfddZ G dd deZ!dS )    N)partial)Pool)path)Path)AnyCallableDictOptionalTupleUnion)Tensor   )find_classesmake_dataset)check_integritydownload_and_extract_archivedownload_urlverify_str_arg)
VideoClips)VisionDatasettarpath	videopathlinereturnc                 C   s   t || | d S N)r   )r   r   r    r   Q/var/www/vscode/kcb/lib/python3.10/site-packages/torchvision/datasets/kinetics.py_dl_wrap   s   r   c                )       s4  e Zd ZdZddddZddddZ			
															d5deeef de	dedede
e	 de	de
e deedf dede	de	de
eeef  de	de	d e	d!e	d"e	d#ed$ed%df( fd&d'Zd6d(d)Zd6d*d+Zd6d,d-Zed%eeef fd.d/Zd%e	fd0d1Zd2e	d%eeee	f fd3d4Z  ZS )7Kineticsu  `Generic Kinetics <https://www.deepmind.com/open-source/kinetics>`_
    dataset.

    Kinetics-400/600/700 are action recognition video datasets.
    This dataset consider every video as a collection of video clips of fixed size, specified
    by ``frames_per_clip``, where the step in frames between each clip is given by
    ``step_between_clips``.

    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
    elements will come from video 1, and the next three elements from video 2.
    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
    frames in a video might be present.

    Args:
        root (str or ``pathlib.Path``): Root directory of the Kinetics Dataset.
            Directory should be structured as follows:
            .. code::

                root/
                ├── split
                │   ├──  class1
                │   │   ├──  vid1.mp4
                │   │   ├──  vid2.mp4
                │   │   ├──  vid3.mp4
                │   │   ├──  ...
                │   ├──  class2
                │   │   ├──   vidx.mp4
                │   │    └── ...

            Note: split is appended automatically using the split argument.
        frames_per_clip (int): number of frames in a clip
        num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700
        split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` ``"test"``
        frame_rate (float): If omitted, interpolate different frame rate for each clip.
        step_between_clips (int): number of frames between each clip
        transform (callable, optional): A function/transform that takes in a TxHxWxC video
            and returns a transformed version.
        download (bool): Download the official version of the dataset to root folder.
        num_workers (int): Use multiple workers for VideoClips creation
        num_download_workers (int): Use multiprocessing in order to speed up download.
        output_format (str, optional): The format of the output video tensors (before transforms).
            Can be either "THWC" or "TCHW" (default).
            Note that in most other utils and datasets, the default is actually "THWC".

    Returns:
        tuple: A 3-tuple with the following entries:

            - video (Tensor[T, C, H, W] or Tensor[T, H, W, C]): the `T` video frames in torch.uint8 tensor
            - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
              and `L` is the number of points in torch.float tensor
            - label (int): class of the video clip

    Raises:
        RuntimeError: If ``download is True`` and the video archives are already extracted.
    zChttps://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txtzChttps://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txtzMhttps://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt400600700z=https://s3.amazonaws.com/kinetics/400/annotations/{split}.csvz=https://s3.amazonaws.com/kinetics/600/annotations/{split}.csvzBhttps://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csvr    trainNr   avimp4Fr   TCHWrootframes_per_clipnum_classessplit
frame_ratestep_between_clips	transform
extensions.downloadnum_download_workersnum_workers_precomputed_metadata_video_width_video_height_video_min_dimension_audio_samples_audio_channels_legacyoutput_formatr   c                    s   t |dg dd| _|| _|
| _|| _|| _|r'|| _d| _d}|	r&tdnt	
||| _t |dg dd| _|	r>|   t | j t| j\| _}t| j||d d	| _d
d | jD }t||||||||||||d| _|| _d S )Nr*   r   )argvalid_valuesunknownTHWCz2Cannot download the videos using legacy_structure.r+   )r#   valtest)is_valid_filec                 S   s   g | ]}|d  qS )r   r   ).0xr   r   r   
<listcomp>   s    z%Kinetics.__init__.<locals>.<listcomp>)r2   r4   r5   r6   r7   r8   r:   )r   r*   r/   r1   r(   r9   split_folderr+   
ValueErrorr   joindownload_and_process_videossuper__init__r   classesr   samplesr   video_clipsr.   )selfr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   class_to_idx
video_list	__class__r   r   rJ   [   sF   
zKinetics.__init__c                 C   s   |    |   dS )zEDownloads all the videos to the _root_ folder in the expected format.N)_download_videos_make_ds_structurerN   r   r   r   rH      s   z$Kinetics.download_and_process_videosc           
      C   s   t | jrdS t | jd}t | jd}| j| j j| jd}t |t 	|}t
|s3t|| t|}dd |  D }W d   n1 sMw   Y  | jdkre|D ]	}t||| j qYdS tt|| j}t| j}	|	|| dS )a  download tarballs containing the video to "tars" folder and extract them into the _split_ folder where
        split is one of the official dataset splits.

        Raises:
            RuntimeError: if download folder exists, break to prevent downloading entire dataset again.
        Ntarsfilesr+   c                 S   s   g | ]
}t jj|d dqS )z/,:)safe)urllibparsequote)rB   r   r   r   r   rD      s    z-Kinetics._download_videos.<locals>.<listcomp>r   )r   existsrE   rG   r(   	_TAR_URLSr*   formatr+   basenamer   r   openread
splitlinesr1   r   r   r   r   map)
rN   tar_pathfile_list_path	split_urlsplit_url_filepathfilelist_video_urlsr   partpoolprocr   r   r   rS      s$   



zKinetics._download_videosc           
   
   C   s0  t | jd}tt || j ds!t| j| j j| jd| t || j d}d}t	|]}t
|}|D ]L}|j|d t|d t|d d}|d	 d
ddddddd}tjt | j|dd t | j|}	t |	rt|	t | j|| q9W d   dS 1 sw   Y  dS )u   move videos from
        split_folder/
            ├── clip1.avi
            ├── clip2.avi

        to the correct format as described below:
        split_folder/
            ├── class1
            │   ├── clip1.avi

        annotationsz.csvrX   z{ytid}_{start:06}_{end:06}.mp4
youtube_id
time_starttime_end)ytidstartendlabel _' ()T)exist_okN)r   rG   r(   r   r+   r   _ANNOTATION_URLSr*   r_   ra   csv
DictReaderintreplaceosmakedirsrE   isfile)
rN   annotation_pathrm   file_fmtstrcsvfilereaderrowfrt   downloaded_filer   r   r   rT      s0   



(
"zKinetics._make_ds_structurec                 C   s   | j jS r   )rM   metadatarU   r   r   r   r      s   zKinetics.metadatac                 C   s
   | j  S r   )rM   	num_clipsrU   r   r   r   __len__   s   
zKinetics.__len__idxc                 C   s@   | j |\}}}}| j| d }| jd ur| |}|||fS )Nr   )rM   get_cliprL   r.   )rN   r   videoaudioinfo	video_idxrt   r   r   r   __getitem__   s
   


zKinetics.__getitem__)r    r#   Nr   Nr$   Fr   r   Nr   r   r   r   r   Fr'   )r   N)__name__
__module____qualname____doc__r^   r|   r   strr   r   r	   r   r
   boolr   r   rJ   rH   rS   rT   propertyr   r   r   r   __classcell__r   r   rQ   r   r      s    :


	

A

#$r   )"r}   r   rZ   	functoolsr   multiprocessingr   r   pathlibr   typingr   r   r   r	   r
   r   torchr   folderr   r   utilsr   r   r   r   video_utilsr   visionr   r   r   r   r   r   r   r   <module>   s     *