o
    VhBQ                  %   @   s(  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
Z
ddlmZ ddlmZ zed d	ZW n eefyA   d
ZY nw eZed dZG dd dZG dd dZdeeef ddfddZde
jde
jde
jde
jde
jde
jdefddZde
jde
jdeeef de
jfddZd d	d d d d d!ed	d d d!efd"ed#ed$ed%ed&ed'ed(ed)eeef d*ed+ed,ed-edeeef d.edee
je
jef fd/d0Zd"edeee ee ef fd1d2Zd"edefd3d4Z 	 		 	 	 	 	!	 			 	 	!	 	dHd5e
jd#ed$ed%ed&ed'ed(ed)eeef d6ed7ed+ed,ed-edeeef d8ed9edee
je
jf f"d:d;Z!d5e
jdeee ee ef fd<d=Z"d5e
jdefd>d?Z#	 		@dId"edAe	eef dBee	eef  dCedee
je
jeeef f f
dDdEZ$	@dJd"edCedee	ee ee f ee f fdFdGZ%dS )K    N)Fraction)DictListOptionalTupleUnion   )_load_library   ) _raise_video_deprecation_warningvideo_readerTFc                   @   s4   e Zd ZeedZddgZdededdfddZdS )Timebase	numeratordenominatorr   r   returnNc                 C   s   || _ || _d S )Nr   )selfr   r    r   M/var/www/vscode/kcb/lib/python3.10/site-packages/torchvision/io/_video_opt.py__init__   s   
zTimebase.__init__)__name__
__module____qualname__int__annotations__	__slots__r   r   r   r   r   r      s    
r   c                	   @   s4   e Zd ZeeeeeeeedZg dZdddZdS )VideoMetaData)	has_videovideo_timebasevideo_duration	video_fps	has_audioaudio_timebaseaudio_durationaudio_sample_rater   Nc                 C   s@   d| _ tdd| _d| _d| _d| _tdd| _d| _d| _d S )NFr   r
   g        )	r   r   r   r   r    r!   r"   r#   r$   )r   r   r   r   r   ;   s   
zVideoMetaData.__init__)r   N)	r   r   r   boolr   floatr   r   r   r   r   r   r   r   %   s    
r   	pts_ranger   c                 C   sB   | d | d   krdkrn d S t d| d  d| d  d S )Nr   r
   z=Start pts should not be smaller than end pts, got start pts: z and end pts: )
ValueError)r'   r   r   r   _validate_ptsF   s
   "r)   	vtimebasevfps	vduration	atimebaseasample_rate	adurationc                 C   s  t  }|  dkr;tt| d  t| d  |_| d  t| d   }| dkr;d|_t| | |_| dkrHt| |_	| dkrtt|d  t|d  |_
|d  t|d   }| dkrd|_t| | |_| dkrt| |_|S )zE
    Build update VideoMetaData struct with info about the video
    r   r
   T)r   numelr   r   itemr   r&   r   r   r    r"   r!   r#   r$   )r*   r+   r,   r-   r.   r/   metatimebaser   r   r   
_fill_infoN   s$   $$r4   aframes
aframe_ptsaudio_pts_rangec           	      C   s   |d |d }}|  d}t|| d t| }d}|}||d k r.t|d | | }|d dkrD||d krDt|d | | }| ||d d f S )Nr   r
   )sizer&   r   )	r5   r6   r7   startendnum_samplesstep_per_aframes_idxe_idxr   r   r   _align_audio_framesn   s   
r@         ?r   r8   filenameseek_frame_marginread_video_streamvideo_widthvideo_heightvideo_min_dimensionvideo_max_dimensionvideo_pts_ranger   read_audio_streamaudio_samplesaudio_channelsr"   c                 C   s   t   t| t| tjj| |d||||||d |d |j|j|	|
||d |d |j|j}|\
}}}}}}}}}}t||||||}|	 dkrQt
|||}|||fS )ab  
    Reads a video from a file, returning both the video frames and the audio frames

    Args:
    filename (str): path to the video file
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise. Thus,
        when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase (Fraction, optional): a Fraction rational number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase (Fraction, optional): a Fraction rational number which denotes time base in audio stream

    Returns
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of audio_channels
        info (Dict): metadata for the video and audio. Can contain the fields video_fps (float)
            and audio_fps (int)
    r   r
   )r   r)   torchopsr   read_video_from_filer   r   r4   r0   r@   )rC   rD   rE   rF   rG   rH   rI   rJ   r   rK   rL   rM   r7   r"   resultvframes_vframe_ptsr*   r+   r,   r5   r6   r-   r.   r/   infor   r   r   _read_video_from_file}   s:   @
rU   c                 C   s~   t jj| dddddddddddddddddd}|\
}}}}}}}}	}
}t||||	|
|}|  }|  }|||fS )z
    Decode all video- and audio frames in the video. Only pts
    (presentation timestamp) is returned. The actual frame pixel data is not
    copied. Thus, it is much faster than read_video(...)
    r   r
   r8   )rN   rO   r   rP   r4   numpytolist)rC   rQ   _vframes
vframe_ptsr*   r+   r,   _aframesr6   r-   r.   r/   rT   r   r   r    _read_video_timestamps_from_file   s4   
r[   c           	      C   s:   t   tjj| }|\}}}}}}t||||||}|S )zO
    Probe a video file and return VideoMetaData with info about the video
    )r   rN   rO   r   probe_video_from_filer4   )	rC   rQ   r*   r+   r,   r-   r.   r/   rT   r   r   r   _probe_video_from_file  s
   r]   
video_datavideo_timebase_numeratorvideo_timebase_denominatoraudio_timebase_numeratoraudio_timebase_denominatorc                 C   s   t   t| t| t| tjs4t  tjddd tj| tj	d} W d   n1 s/w   Y  tj
j| |d||||||d |d ||	|
|||d |d ||}|\
}}}}}}}}}}| dkrmt|||}||fS )a  
    Reads a video from memory, returning both the video frames as the audio frames
    This function is torchscriptable.

    Args:
    video_data (data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes):
        compressed video content stored in either 1) torch.Tensor 2) python bytes
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise.
        Thus, when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase_numerator / video_timebase_denominator (float, optional): a rational
        number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio audio_channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase_numerator / audio_timebase_denominator (float, optional):
        a rational number which denotes time base in audio stream

    Returns:
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of channels
    ignore The given buffer is not writablemessagedtypeNr   r
   )r   r)   
isinstancerN   Tensorwarningscatch_warningsfilterwarnings
frombufferuint8rO   r   read_video_from_memoryr0   r@   )r^   rD   rE   rF   rG   rH   rI   rJ   r_   r`   rK   rL   rM   r7   ra   rb   rQ   rR   rS   r*   r+   r,   r5   r6   r-   r.   r/   r   r   r   _read_video_from_memory  sB   E
rq   c                 C   s   t | tjs)t  tjddd tj| tjd} W d   n1 s$w   Y  tjj	
| dddddddddddddddddd}t  |\
}}}}}}}}	}
}t||||	|
|}|  }|  }|||fS )	z
    Decode all frames in the video. Only pts (presentation timestamp) is returned.
    The actual frame pixel data is not copied. Thus, read_video_timestamps(...)
    is much faster than read_video(...)
    rc   rd   re   rg   Nr   r
   r8   )ri   rN   rj   rk   rl   rm   rn   ro   rO   r   rp   r   r4   rV   rW   )r^   rQ   rX   rY   r*   r+   r,   rZ   r6   r-   r.   r/   rT   r   r   r   "_read_video_timestamps_from_memoryz  s@   

rr   c           	      C   s   t   t| tjs,t  tjddd tj| tjd} W d   n1 s'w   Y  tj	j
| }|\}}}}}}t||||||}|S )zy
    Probe a video in memory and return VideoMetaData with info about the video
    This function is torchscriptable
    rc   rd   re   rg   N)r   ri   rN   rj   rk   rl   rm   rn   ro   rO   r   probe_video_from_memoryr4   )	r^   rQ   r*   r+   r,   r-   r.   r/   rT   r   r   r   _probe_video_from_memory  s   
rt   pts	start_ptsend_ptspts_unitc              	      s   t    d u rtd dkrtd t| }|j}|j} fdd}d}t}	|r9t|j	j
|j	j}	||	}d}
t}|rLt|jj
|jj}||}
t| d||	d|
|d\}}}i }|rc|j|d	< |rj|j|d
< |||fS )Ninfru   mThe pts_unit 'pts' gives wrong results and will be removed in a follow-up version. Please use pts_unit 'sec'.c                    s`   } }dkr$t td|   }|tdkr$t t d|   }|tdkr,d}||fS )Nsecr
   ry   r8   )r   mathfloorr&   ceil)	time_basestart_offset
end_offsetrw   rx   rv   r   r   get_pts  s   z_read_video.<locals>.get_ptsrB   T)rE   rJ   r   rK   r7   r"   r    	audio_fps)r   r&   rk   warnr]   r   r!   default_timebaser   r   r   r   r"   rU   r    r$   )rC   rv   rw   rx   rT   r   r!   r   rJ   r   r7   r"   rR   r5   _infor   r   r   _read_video  sH   	


r   c                    sj   t   |dkrtd t| \}}}|dkr)t|jj|jj  fdd|D }|jr/|j	nd }||fS )Nru   rz   r{   c                    s   g | ]}|  qS r   r   ).0xvideo_time_baser   r   
<listcomp>  s    z*_read_video_timestamps.<locals>.<listcomp>)
r   rk   r   r[   r   r   r   r   r   r    )rC   rx   ru   _rT   r    r   r   r   _read_video_timestamps  s   r   )rA   r
   r   r   r   r   rB   r   r
   r
   r   r   rB   r   r
   )r   Nru   )ru   )&r|   rk   	fractionsr   typingr   r   r   r   r   rN   	extensionr	   _video_deprecation_warningr   _HAS_CPU_VIDEO_DECODERImportErrorOSError_HAS_VIDEO_OPTr   r   r   r   r)   rj   r4   r@   strr&   r%   rU   r[   r]   rq   rr   rt   r   r   r   r   r   r   <module>   sT   
!
 


	


$a#
	


n
+


?