o
    Vh                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZ d dlZd dlmZ ddlmZmZmZ dd	lmZ eejejeej ejf Zeejejeej f Zd
Ze j eddZ!G dd deeZ"G dd de"Z#G dd de"Z$G dd de"Z%G dd de"Z&G dd de"Z'G dd de"Z(G dd de"Z)G dd de"Z*G dd de"Z+G d d! d!e"Z,dS )"    N)ABCabstractmethod)glob)Path)CallablecastListOptionalTupleUnion)Image   )	_read_pfmdownload_and_extract_archiveverify_str_arg)VisionDataset )slice_channelsc                       s   e Zd ZdZdZddeeef dee	 ddf fddZ
d	eeef dejfd
dZ	ddedee deeeee f  fddZed	edeeej eej f fddZdedeeef fddZdefddZ  ZS )StereoMatchingDatasetz+Base interface for Stereo matching datasetsFNroot
transformsreturnc                    s$   t  j|d || _g | _g | _dS )a}  
        Args:
            root(str): Root directory of the dataset.
            transforms(callable, optional): A function/transform that takes in Tuples of
                (images, disparities, valid_masks) and returns a transformed version of each of them.
                images is a Tuple of (``PIL.Image``, ``PIL.Image``)
                disparities is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (1, H, W)
                valid_masks is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (H, W)
                In some cases, when a dataset does not provide disparities, the ``disparities`` and
                ``valid_masks`` can be Tuples containing None values.
                For training splits generally the datasets provide a minimal guarantee of
                images: (``PIL.Image``, ``PIL.Image``)
                disparities: (``np.ndarray``, ``None``) with shape (1, H, W)
                Optionally, based on the dataset, it can return a ``mask`` as well:
                valid_masks: (``np.ndarray | None``, ``None``) with shape (H, W)
                For some test splits, the datasets provides outputs that look like:
                imgaes: (``PIL.Image``, ``PIL.Image``)
                disparities: (``None``, ``None``)
                Optionally, based on the dataset, it can return a ``mask`` as well:
                valid_masks: (``None``, ``None``)
        r   N)super__init__r   _images_disparities)selfr   r   	__class__r   Y/var/www/vscode/kcb/lib/python3.10/site-packages/torchvision/datasets/_stereo_matching.pyr      s   
zStereoMatchingDataset.__init__	file_pathc                 C   s"   t |}|jdkr|d}|S )NRGB)r   openmodeconvert)r   r!   imgr   r   r    	_read_img:   s   


zStereoMatchingDataset._read_imgpaths_left_patternpaths_right_patternc              
   C   s   t tt|}|rt tt|}n	t dd |D }|s%td| |s.td| t|t|krKtdt| dt| d| d| d	t d	d t||D }|S )
Nc                 s   s    | ]}d V  qd S Nr   .0_r   r   r    	<genexpr>L       z4StereoMatchingDataset._scan_pairs.<locals>.<genexpr>z0Could not find any files matching the patterns: zFound z left files but z# right files using:
 left pattern: z
right pattern: 
c                 s   s    | ]	\}}||fV  qd S r*   r   )r,   leftrightr   r   r    r.   [   s    )listsortedr   FileNotFoundErrorlen
ValueErrorzip)r   r(   r)   
left_pathsright_pathspathsr   r   r    _scan_pairs@   s$   z!StereoMatchingDataset._scan_pairsc                 C   s   d S r*   r   )r   r!   r   r   r    _read_disparity^   s   z%StereoMatchingDataset._read_disparityindexc                 C   s   |  | j| d }|  | j| d }| | j| d \}}| | j| d \}}||f}||f}	||f}
| jdurG| ||	|
\}}	}
| jsP|
d durb|d |d |	d ttj|
d fS |d |d |	d fS )ao  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 3 or 4-tuple with ``(img_left, img_right, disparity, Optional[valid_mask])`` where ``valid_mask``
                can be a numpy boolean mask of shape (H, W) if the dataset provides a file
                indicating which disparity pixels are valid. The disparity is a numpy array of
                shape (1, H, W) and the images are PIL images. ``disparity`` is None for
                datasets on which for ``split="test"`` the authors did not provide annotations.
        r   r   N)	r'   r   r=   r   r   _has_built_in_disparity_maskr   npndarray)r   r>   img_left	img_rightdsp_map_leftvalid_mask_leftdsp_map_rightvalid_mask_rightimgsdsp_mapsvalid_masksr   r   r    __getitem__c   s    
$z!StereoMatchingDataset.__getitem__c                 C   s
   t | jS r*   )r6   r   )r   r   r   r    __len__   s   
zStereoMatchingDataset.__len__r*   )__name__
__module____qualname____doc__r?   r   strr   r	   r   r   r   r'   r   r
   r<   r   r@   rA   r=   intT1T2rK   rL   __classcell__r   r   r   r    r      s"    (	
(#r   c                       sn   e Zd ZdZddeeef dee ddf fddZ	dede
ejdf fd	d
Zdedef fddZ  ZS )CarlaStereoaz  
    Carla simulator data linked in the `CREStereo github repo <https://github.com/megvii-research/CREStereo>`_.

    The dataset is expected to have the following structure: ::

        root
            carla-highres
                trainingF
                    scene1
                        img0.png
                        img1.png
                        disp0GT.pfm
                        disp1GT.pfm
                        calib.txt
                    scene2
                        img0.png
                        img1.png
                        disp0GT.pfm
                        disp1GT.pfm
                        calib.txt
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where `carla-highres` is located.
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Nr   r   r   c           	         s   t  || t|d }t|d d d }t|d d d }| ||}|| _t|d d d }t|d d d }| ||}|| _d S )Nzcarla-highres	trainingF*im0.pngim1.pngdisp0GT.pfmzdisp1GT.pfmr   r   r   rQ   r<   r   r   )	r   r   r   left_image_patternright_image_patternrH   left_disparity_patternright_disparity_patterndisparitiesr   r   r    r      s   
zCarlaStereo.__init__r!   c                 C      t |}t|}d }||fS r*   _read_pfm_filer@   absr   r!   disparity_map
valid_maskr   r   r    r=         
zCarlaStereo._read_disparityr>   c                       t tt |S a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            If a ``valid_mask`` is generated within the ``transforms`` parameter,
            a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
        r   rS   r   rK   r   r>   r   r   r    rK         zCarlaStereo.__getitem__r*   rM   rN   rO   rP   r   rQ   r   r	   r   r   r
   r@   rA   r=   rR   rS   rK   rU   r   r   r   r    rV      s
    (rV   c                	       z   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeeej df fddZdedef fddZ  ZS )Kitti2012Stereoa
  
    KITTI dataset from the `2012 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php>`_.
    Uses the RGB images for consistency with KITTI 2015.

    The dataset is expected to have the following structure: ::

        root
            Kitti2012
                testing
                    colored_0
                        1_10.png
                        2_10.png
                        ...
                    colored_1
                        1_10.png
                        2_10.png
                        ...
                training
                    colored_0
                        1_10.png
                        2_10.png
                        ...
                    colored_1
                        1_10.png
                        2_10.png
                        ...
                    disp_noc
                        1.png
                        2.png
                        ...
                    calib

    Args:
        root (str or ``pathlib.Path``): Root directory where `Kitti2012` is located.
        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    TtrainNr   splitr   r   c                    s   t  || t|ddd t|d |d  }t|d d }t|d d }| ||| _|d	krDt|d
 d }| |d | _d S tdd | jD | _d S )Nrs   rr   testvalid_values	Kitti2012ing	colored_0z*_10.png	colored_1rr   disp_noc*.pngc                 s       | ]}d V  qdS NNNr   r+   r   r   r    r.     r/   z+Kitti2012Stereo.__init__.<locals>.<genexpr>	r   r   r   r   rQ   r<   r   r   r3   )r   r   rs   r   left_img_patternright_img_patterndisparity_patternr   r   r    r      s   zKitti2012Stereo.__init__r!   c                 C   B   |d u rdS t t|d }|d d d d d f }d }||fS Nr   g      p@r@   asarrayr   r#   rf   r   r   r    r=        zKitti2012Stereo._read_disparityr>   c                    rj   a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
            generate a valid mask.
            Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
        rl   rm   r   r   r    rK        zKitti2012Stereo.__getitem__rr   NrM   rN   rO   rP   r?   r   rQ   r   r	   r   r   r
   r@   rA   r=   rR   rS   rK   rU   r   r   r   r    rq      s    &, rq   c                	       rp   )Kitti2015StereoaM  
    KITTI dataset from the `2015 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php>`_.

    The dataset is expected to have the following structure: ::

        root
            Kitti2015
                testing
                    image_2
                        img1.png
                        img2.png
                        ...
                    image_3
                        img1.png
                        img2.png
                        ...
                training
                    image_2
                        img1.png
                        img2.png
                        ...
                    image_3
                        img1.png
                        img2.png
                        ...
                    disp_occ_0
                        img1.png
                        img2.png
                        ...
                    disp_occ_1
                        img1.png
                        img2.png
                        ...
                    calib

    Args:
        root (str or ``pathlib.Path``): Root directory where `Kitti2015` is located.
        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Trr   Nr   rs   r   r   c                    s   t  || t|ddd t|d |d  }t|d d }t|d d }| ||| _|d	krLt|d
 d }t|d d }| ||| _d S tdd | jD | _d S )Nrs   rt   rv   	Kitti2015ry   image_2r}   image_3rr   
disp_occ_0
disp_occ_1c                 s   r~   r   r   r+   r   r   r    r.   Z  r/   z+Kitti2015Stereo.__init__.<locals>.<genexpr>r   r   r   rs   r   r   r   r_   r`   r   r   r    r   K  s   zKitti2015Stereo.__init__r!   c                 C   r   r   r   rf   r   r   r    r=   \  r   zKitti2015Stereo._read_disparityr>   c                    rj   r   rl   rm   r   r   r    rK   g  r   zKitti2015Stereo.__getitem__r   r   r   r   r   r    r     s    ), r   c                       s   e Zd ZdZg dg dg ddZdZ					
		ddeeef dede	e de
de	e de
dd
f fddZdeeef dejf fddZdedeed eejejf f fddZdeeef dd
fddZdedef fddZ  ZS ) Middlebury2014StereoaZ	  Publicly available scenes from the Middlebury dataset `2014 version <https://vision.middlebury.edu/stereo/data/scenes2014/>`.

    The dataset mostly follows the original format, without containing the ambient subdirectories.  : ::

        root
            Middlebury2014
                train
                    scene1-{perfect,imperfect}
                        calib.txt
                        im{0,1}.png
                        im1E.png
                        im1L.png
                        disp{0,1}.pfm
                        disp{0,1}-n.png
                        disp{0,1}-sd.pfm
                        disp{0,1}y.pfm
                    scene2-{perfect,imperfect}
                        calib.txt
                        im{0,1}.png
                        im1E.png
                        im1L.png
                        disp{0,1}.pfm
                        disp{0,1}-n.png
                        disp{0,1}-sd.pfm
                        disp{0,1}y.pfm
                    ...
                additional
                    scene1-{perfect,imperfect}
                        calib.txt
                        im{0,1}.png
                        im1E.png
                        im1L.png
                        disp{0,1}.pfm
                        disp{0,1}-n.png
                        disp{0,1}-sd.pfm
                        disp{0,1}y.pfm
                    ...
                test
                    scene1
                        calib.txt
                        im{0,1}.png
                    scene2
                        calib.txt
                        im{0,1}.png
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory of the Middleburry 2014 Dataset.
        split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional"
        use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible.
            The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``.
        calibration (string, optional): Whether or not to use the calibrated (default) or uncalibrated scenes.
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
        download (boolean, optional): Whether or not to download the dataset in the ``root`` directory.
    )

Adirondack	Jadeplant
MotorcyclePianoPipesPlayroom	PlaytableRecycleShelvesVintage)BackpackBicycle1Cable
Classroom1CouchFlowersMaskShopvacSticksStorageSword1Sword2Umbrella)PlantsClassroom2E
Classroom2	AustraliaDjembeLCrusadePCrusadeHoopsBicycle2	StaircaseNewkuba
AustraliaPDjembe
LivingroomComputer)rr   
additionalru   Trr   perfectFNr   rs   calibrationuse_ambient_viewsr   downloadr   c                    s  t  || t|ddd || _|r#t|ddd |dkr"tdn|dkr2td| d	| d
|r9| | t|d }tj	|| sOt
d| d| j|  t fddt|| D slt
d| ddgdgdgddgd| }|D ]T}d| }	t|| |	 d }
t|| |	 d }|  j| |
|7  _|dkrtdd | jD | _q|t|| |	 d }t|| |	 d }|  j| ||7  _q||| _d S )Nrs   )rr   ru   r   rv   r   )r   	imperfectbothNru   zMSplit 'test' has only no calibration settings, please set `calibration=None`.zSplit 'zr' has calibration settings, however None was provided as an argument.
Setting calibration to 'perfect' for split 'zF'. Available calibration settings are: 'perfect', 'imperfect', 'both'.Middlebury2014zThe z7 directory was not found in the provided root directoryc                 3   s$    | ]} D ]}| |V  qqd S r*   )
startswith)r,   scenessplit_scenesr   r    r.     s    z0Middlebury2014Stereo.__init__.<locals>.<genexpr>z:Provided root folder does not contain any scenes from the z split. z-perfectz
-imperfect)Nr   r   r   rX   rY   rZ   c                 s   r~   r   r   r+   r   r   r    r.     r/   z	disp0.pfmz	disp1.pfm)r   r   r   rs   r7   _download_datasetr   ospathexistsr5   splitsanylistdirrQ   r   r<   r3   r   r   )r   r   rs   r   r   r   r   calibrartion_suffixescalibration_suffixscene_patternr   r   left_dispartity_patternright_dispartity_patternr   r   r    r     sT   	


zMiddlebury2014Stereo.__init__r!   c                    sp   t |ts	t|}|jdkr2| jr2|j t fdddD }ttdd |}|| t	|}t
 |S )a  
        Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True.
        When ``use_ambient_views`` is True, the dataset will return at random one of ``[im1.png, im1E.png, im1L.png]``
        as the right image.
        rZ   c                 3   s    | ]} | V  qd S r*   r   )r,   	view_name	base_pathr   r    r.   0  s    z1Middlebury2014Stereo._read_img.<locals>.<genexpr>)zim1E.pngzim1L.pngc                 S   s   t j| S r*   )r   r   r   )pr   r   r    <lambda>2  s    z0Middlebury2014Stereo._read_img.<locals>.<lambda>)
isinstancer   namer   parentr3   filterappendrandomchoicer   r'   )r   r!   ambient_file_pathsr   r   r    r'   "  s   


zMiddlebury2014Stereo._read_imgr   c                 C   sB   |d u rdS t |}t|}d||tjk< |dkd}||fS )Nr   r   )rd   r@   re   infsqueezerf   r   r   r    r=   8  s   
z$Middlebury2014Stereo._read_disparityc                    s@  d}t  d  | j}|dkrD| j| D ]-} | }dD ]$}| d| }| d| d}||  s@t|| dt|dd	 qqd S t d  t fd
d| jd D rd}	t|	t dd t	t d D ]%\}
}}|D ]} d }t |
| }tj|dd t
t|t| qtqmt
t d  d S d S )Nz8https://vision.middlebury.edu/stereo/data/scenes2014/zipr   ru   )r   r   -/z.zipT)urlfilenamedownload_rootremove_finishedc                 3   s"    | ]}|t  d  vV  qdS )ru   N)r   r   )r,   r   r   r   r    r.   Y  s     z9Middlebury2014Stereo._download_dataset.<locals>.<genexpr>zEhttps://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip)r   r   r   zMiddEval3/testF)exist_ok	MiddEval3)r   rs   r   r   r   rQ   r   makedirsr   walkshutilmovermtree)r   r   base_url
split_namesplit_scene
split_rootr   
scene_name	scene_urltest_set_url	scene_dirscene_namesr-   r   scene_dst_dirscene_src_dirr   r   r    r   C  s@   z&Middlebury2014Stereo._download_datasetr>   c                    rj   )az  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            ``valid_mask`` is implicitly ``None`` for `split=test`.
        r   rT   r   rK   rm   r   r   r    rK   i     z Middlebury2014Stereo.__getitem__)rr   r   FNF)rM   rN   rO   rP   r   r?   r   rQ   r   r	   boolr   r   r   r'   r
   r@   rA   r=   r   rR   rT   rK   rU   r   r   r   r    r   w  s>    9/
 A*&r   c                       st   e Zd ZdZdZ	ddeeef dee	 ddf fddZ
d	edeejdf fd
dZdedef fddZ  ZS )	CREStereoa  Synthetic dataset used in training the `CREStereo <https://arxiv.org/pdf/2203.11483.pdf>`_ architecture.
    Dataset details on the official paper `repo <https://github.com/megvii-research/CREStereo>`_.

    The dataset is expected to have the following structure: ::

        root
            CREStereo
                tree
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    img2_left.jpg
                    img2_right.jpg
                    img2_left.disp.jpg
                    img2_right.disp.jpg
                    ...
                shapenet
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    ...
                reflective
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    ...
                hole
                    img1_left.jpg
                    img1_right.jpg
                    img1_left.disp.jpg
                    img1_right.disp.jpg
                    ...

    Args:
        root (str): Root directory of the dataset.
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    TNr   r   r   c                    s   t  || t|d }g d}|D ]<}t|| d }t|| d }| ||}|  j|7  _t|| d }t|| d }	| ||	}
|  j|
7  _qd S )Nr   )shapenet
reflectivetreeholez
*_left.jpgz*_right.jpgz*_left.disp.pngz*_right.disp.pngr\   )r   r   r   dirsr   r]   r^   rH   r_   r`   ra   r   r   r    r     s   zCREStereo.__init__r!   c                 C   <   t jt|t jd}|d d d d d f d }d }||fS )Ndtypeg      @@r@   r   r   r#   float32rf   r   r   r    r=        zCREStereo._read_disparityr>   c                    rj   )a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
            ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
            generate a valid mask.
        rl   rm   r   r   r    rK     rn   zCREStereo.__getitem__r*   r   r   r   r   r    r   w  s    )
r   c                	       r   e Zd ZdZddeeef dedee ddf fdd	Z	d
ede
ejdf fddZdedef fddZ  ZS )FallingThingsStereoa  `FallingThings <https://research.nvidia.com/publication/2018-06_falling-things-synthetic-dataset-3d-object-detection-and-pose-estimation>`_ dataset.

    The dataset is expected to have the following structure: ::

        root
            FallingThings
                single
                    dir1
                        scene1
                            _object_settings.json
                            _camera_settings.json
                            image1.left.depth.png
                            image1.right.depth.png
                            image1.left.jpg
                            image1.right.jpg
                            image2.left.depth.png
                            image2.right.depth.png
                            image2.left.jpg
                            image2.right
                            ...
                        scene2
                    ...
                mixed
                    scene1
                        _object_settings.json
                        _camera_settings.json
                        image1.left.depth.png
                        image1.right.depth.png
                        image1.left.jpg
                        image1.right.jpg
                        image2.left.depth.png
                        image2.right.depth.png
                        image2.left.jpg
                        image2.right
                        ...
                    scene2
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where FallingThings is located.
        variant (string): Which variant to use. Either "single", "mixed", or "both".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    singleNr   variantr   r   c                    s   t  || t|d }t|ddd dgdgddgd| }tdd tdd}|D ]H}t|| ||  d	 }t|| ||  d
 }|  j| ||7  _t|| ||  d }	t|| ||  d }
|  j| |	|
7  _q-d S )NFallingThingsr  )r  mixedr   rv   r  r  rX   )r  r  z
*.left.jpgz*.right.jpgz*.left.depth.pngz*.right.depth.pngr   r   r   r   rQ   r   r<   r   )r   r   r  r   variantssplit_prefixr   r   r   r_   r`   r   r   r    r     s(   
zFallingThingsStereo.__init__r!   c                 C   s   t t|}t|jd }t|d8}t|}|d d d d }d\}}|| | |t j	 }	|	d d d d d f }	d }
|	|
fW  d    S 1 sOw   Y  d S )Nz_camera_settings.jsonrcamera_settingsr   intrinsic_settingsfx)   d   )
r@   r   r   r#   r   r   jsonloadastyper
  )r   r!   depthcamera_settings_pathf
intrinsicsfocalbaselinepixel_constantrg   rh   r   r   r    r=     s   
$z#FallingThingsStereo._read_disparityr>   c                    rj   rk   rl   rm   r   r   r    rK   (  rn   zFallingThingsStereo.__getitem__)r  Nro   r   r   r   r    r    s
    ,,r  c                       s|   e Zd ZdZ			ddeeef dededee d	df
 fd
dZ	ded	e
ejdf fddZded	ef fddZ  ZS )SceneFlowStereoa  Dataset interface for `Scene Flow <https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html>`_ datasets.
    This interface provides access to the `FlyingThings3D, `Monkaa` and `Driving` datasets.

    The dataset is expected to have the following structure: ::

        root
            SceneFlow
                Monkaa
                    frames_cleanpass
                        scene1
                            left
                                img1.png
                                img2.png
                            right
                                img1.png
                                img2.png
                        scene2
                            left
                                img1.png
                                img2.png
                            right
                                img1.png
                                img2.png
                    frames_finalpass
                        scene1
                            left
                                img1.png
                                img2.png
                            right
                                img1.png
                                img2.png
                        ...
                        ...
                    disparity
                        scene1
                            left
                                img1.pfm
                                img2.pfm
                            right
                                img1.pfm
                                img2.pfm
                FlyingThings3D
                    ...
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where SceneFlow is located.
        variant (string): Which dataset variant to user, "FlyingThings3D" (default), "Monkaa" or "Driving".
        pass_name (string): Which pass to use, "clean" (default), "final" or "both".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.

    FlyingThings3DcleanNr   r  	pass_namer   r   c                    s(  t  || t|d }t|ddd t|ddd dgdgddgd| }|| }td	td	d	 d	 td	d	 d	 d
}|D ]P}t|| ||  d d }t|| ||  d d }	|  j| ||	7  _t|d ||  d d }
t|d ||  d d }|  j| |
|7  _qAd S )N	SceneFlowr  )r&  DrivingMonkaarv   r(  )r'  finalr   frames_cleanpassframes_finalpassrX   )r+  r&  r*  r1   r}   r2   	disparityz*.pfmr  )r   r   r  r(  r   passesprefix_directoriesr   r]   r^   r_   r`   r   r   r    r   m  s.   zSceneFlowStereo.__init__r!   c                 C   rb   r*   rc   rf   r   r   r    r=     ri   zSceneFlowStereo._read_disparityr>   c                    rj   rk   rl   rm   r   r   r    rK     rn   zSceneFlowStereo.__getitem__)r&  r'  Nro   r   r   r   r    r%  7  s$    8
%r%  c                	       s   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeeef fddZdedeed eejejf f fddZdedef fddZ  ZS )SintelStereoa  Sintel `Stereo Dataset <http://sintel.is.tue.mpg.de/stereo>`_.

    The dataset is expected to have the following structure: ::

        root
            Sintel
                training
                    final_left
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...
                    final_right
                        scene2
                            img1.png
                            img2.png
                            ...
                        ...
                    disparities
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...
                    occlusions
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...
                    outofframe
                        scene1
                            img1.png
                            img2.png
                            ...
                        ...

    Args:
        root (str or ``pathlib.Path``): Root directory where Sintel Stereo is located.
        pass_name (string): The name of the pass to use, either "final", "clean" or "both".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Tr,  Nr   r(  r   r   c           	         s   t  || t|ddd t|d }dgdgddgd| }|D ]B}t|d | d d	 d
 }t|d | d d	 d
 }|  j| ||7  _t|d d d	 d
 }|  j| |d 7  _q"d S )Nr(  )r,  r'  r   rv   Sintelr,  r'  training_leftrX   r}   _rightra   )r   r   r   r   rQ   r   r<   r   )	r   r   r(  r   
pass_namesr   r   r   r   r   r   r    r     s    zSintelStereo.__init__r!   c                 C   s   t |}|j}|j}|jj}t|d |j | }t|d |j | }tj|s2td| dtj|s@td| d||fS )N
occlusions
outofframezOcclusion mask z does not existzOut of frame mask )r   r   r   rQ   r   r   r   r5   )r   r!   fpathbasenamescenedir	sampledirocclusion_pathoutofframe_pathr   r   r    _get_occlussion_mask_paths  s   z'SintelStereo._get_occlussion_mask_pathsr   c           
      C   s   |d u rdS t jt|t jd}t j|ddd\}}}|d |d  |d  }t |d	}| |\}}t t|d
k}t t|d
k}	t |	|}||fS )Nr   r     )axis   @   i @  )   r   r   r   )	r@   r   r   r#   r
  rs   	transposer@  logical_and)
r   r!   rg   r  gbocclued_mask_pathout_of_frame_mask_pathrh   off_maskr   r   r    r=     s   zSintelStereo._read_disparityr>   c                    rj   )a  Return example at given index.

        Args:
            index(int): The index of the example to retrieve

        Returns:
            tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
            The disparity is a numpy array of shape (1, H, W) and the images are PIL images whilst
            the valid_mask is a numpy array of shape (H, W).
        r   rm   r   r   r    rK     r   zSintelStereo.__getitem__)r,  N)rM   rN   rO   rP   r?   r   rQ   r   r	   r   r   r
   r@  r@   rA   r=   rR   rT   rK   rU   r   r   r   r    r2    s    ,,*r2  c                	       r  )
InStereo2ka  `InStereo2k <https://github.com/YuhuaXu/StereoDataset>`_ dataset.

    The dataset is expected to have the following structure: ::

        root
            InStereo2k
                train
                    scene1
                        left.png
                        right.png
                        left_disp.png
                        right_disp.png
                        ...
                    scene2
                    ...
                test
                    scene1
                        left.png
                        right.png
                        left_disp.png
                        right_disp.png
                        ...
                    scene2
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory where InStereo2k is located.
        split (string): Either "train" or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    rr   Nr   rs   r   r   c                    s   t  || t|d | }t|ddd t|d d }t|d d }| ||| _t|d d }t|d d	 }| ||| _d S )
NrN  rs   rt   rv   rX   zleft.pngz	right.pngzleft_disp.pngzright_disp.png)r   r   r   r   rQ   r<   r   r   r   r   r   r    r   A  s   zInStereo2k.__init__r!   c                 C   r  )Nr  g      @r	  rf   r   r   r    r=   P  r  zInStereo2k._read_disparityr>   c                    rj   rk   rl   rm   r   r   r    rK   W  rn   zInStereo2k.__getitem__r   ro   r   r   r   r    rN  !  s
    ,rN  c                	       s   e Zd ZdZdZddeeef dedee	 ddf fd	d
Z
dedeed eejejf f fddZdedef fddZ  ZS )ETH3DStereoaf  ETH3D `Low-Res Two-View <https://www.eth3d.net/datasets>`_ dataset.

    The dataset is expected to have the following structure: ::

        root
            ETH3D
                two_view_training
                    scene1
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    scene2
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    ...
                two_view_training_gt
                    scene1
                        disp0GT.pfm
                        mask0nocc.png
                    scene2
                        disp0GT.pfm
                        mask0nocc.png
                    ...
                two_view_testing
                    scene1
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    scene2
                        im1.png
                        im0.png
                        images.txt
                        cameras.txt
                        calib.txt
                    ...

    Args:
        root (str or ``pathlib.Path``): Root directory of the ETH3D Dataset.
        split (string, optional): The dataset split of scenes, either "train" (default) or "test".
        transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
    Trr   Nr   rs   r   r   c           	         s   t  || t|ddd t|d }|dkrdnd}d}t|| d	 d
 }t|| d	 d }| ||| _|dkrJtdd | jD | _d S t|| d	 d }| |d | _d S )Nrs   rt   rv   ETH3Drr   two_view_trainingtwo_view_testtwo_view_training_gtrX   rY   rZ   ru   c                 s   r~   r   r   r+   r   r   r    r.     r/   z'ETH3DStereo.__init__.<locals>.<genexpr>r[   )	r   r   r   r   rQ   r<   r   r3   r   )	r   r   rs   r   img_diranot_dirr   r   r   r   r   r    r     s   zETH3DStereo.__init__r!   r   c                 C   sN   |d u rdS t |}t|}t|jd }t|}t|t	}||fS )Nr   zmask0nocc.png)
rd   r@   re   r   r   r   r#   r   r  r   )r   r!   rg   	mask_pathrh   r   r   r    r=     s   

zETH3DStereo._read_disparityr>   c                    rj   r   r   rm   r   r   r    rK     r   zETH3DStereo.__getitem__r   )rM   rN   rO   rP   r?   r   rQ   r   r	   r   r   r
   r@   rA   r=   rR   rT   rK   rU   r   r   r   r    rO  f  s    1,*rO  )-	functoolsr  r   r   r   abcr   r   r   pathlibr   typingr   r   r   r	   r
   r   numpyr@   PILr   utilsr   r   r   visionr   rA   rS   rT   __all__partialrd   r   rV   rq   r   r   r   r  r%  r2  rN  rO  r   r   r   r    <module>   s<     q@UX  XhpzE