
    Ph.a              
         d dl mZ d dlZd dlZd dlmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlmZ dgZd"dZ G d	 d
e      Z e       Z G d d      Zd Zd Zd Zd Zd Zd Zd Zd Ze	j8                  e	j:                  e	j<                  e	j>                  e	j@                  e	jB                  e	jD                  e	jF                  e	jH                  e	jJ                  f
D ]  Z& ejO                  e&      e        d Z(e	jR                  fD ]  Z& ejO                  e&      e(        d Z*e	jV                  e	jX                  e	jZ                  fD ]  Z& ejO                  e&      e*        ejO                  e	j\                        d        Z/ejO                  e	j`                        d        Z1 ejO                  e	jd                        e       g dZ3 e e4e3 e5 e6e3                        Z7ejO                  e	jp                        d        Z9ejO                  e	jt                        d        Z;ejO                  e	jx                        d        Z=g dZ> e e4e> e5 e6e>                        Z?ejO                  e	j                        d        ZAejO                  e	j                        d        ZCejO                  e	j                        d         ZEejO                  e	j                        d!        ZGy)#    )annotationsN)defaultdict)copy)datasets
tv_tensors)
functionalwrap_dataset_for_transforms_v2c                   |I|dk(  sDt        |t        j                  j                        rt	        d |D              st        d|       t        dt        |       j                   t        t        |       fi       } || |      S )ab  Wrap a ``torchvision.dataset`` for usage with :mod:`torchvision.transforms.v2`.

    Example:
        >>> dataset = torchvision.datasets.CocoDetection(...)
        >>> dataset = wrap_dataset_for_transforms_v2(dataset)

    .. note::

       For now, only the most popular datasets are supported. Furthermore, the wrapper only supports dataset
       configurations that are fully supported by ``torchvision.transforms.v2``. If you encounter an error prompting you
       to raise an issue to ``torchvision`` for a dataset or configuration that you need, please do so.

    The dataset samples are wrapped according to the description below.

    Special cases:

        * :class:`~torchvision.datasets.CocoDetection`: Instead of returning the target as list of dicts, the wrapper
          returns a dict of lists. In addition, the key-value-pairs ``"boxes"`` (in ``XYXY`` coordinate format),
          ``"masks"`` and ``"labels"`` are added and wrap the data in the corresponding ``torchvision.tv_tensors``.
          The original keys are preserved. If ``target_keys`` is omitted, returns only the values for the
          ``"image_id"``, ``"boxes"``, and ``"labels"``.
        * :class:`~torchvision.datasets.VOCDetection`: The key-value-pairs ``"boxes"`` and ``"labels"`` are added to
          the target and wrap the data in the corresponding ``torchvision.tv_tensors``. The original keys are
          preserved. If ``target_keys`` is omitted, returns only the values for the ``"boxes"`` and ``"labels"``.
        * :class:`~torchvision.datasets.CelebA`: The target for ``target_type="bbox"`` is converted to the ``XYXY``
          coordinate format and wrapped into a :class:`~torchvision.tv_tensors.BoundingBoxes` tv_tensor.
        * :class:`~torchvision.datasets.Kitti`: Instead returning the target as list of dicts, the wrapper returns a
          dict of lists. In addition, the key-value-pairs ``"boxes"`` and ``"labels"`` are added and wrap the data
          in the corresponding ``torchvision.tv_tensors``. The original keys are preserved. If ``target_keys`` is
          omitted, returns only the values for the ``"boxes"`` and ``"labels"``.
        * :class:`~torchvision.datasets.OxfordIIITPet`: The target for ``target_type="segmentation"`` is wrapped into a
          :class:`~torchvision.tv_tensors.Mask` tv_tensor.
        * :class:`~torchvision.datasets.Cityscapes`: The target for ``target_type="semantic"`` is wrapped into a
          :class:`~torchvision.tv_tensors.Mask` tv_tensor. The target for ``target_type="instance"`` is *replaced* by
          a dictionary with the key-value-pairs ``"masks"`` (as :class:`~torchvision.tv_tensors.Mask` tv_tensor) and
          ``"labels"``.
        * :class:`~torchvision.datasets.WIDERFace`: The value for key ``"bbox"`` in the target is converted to ``XYXY``
          coordinate format and wrapped into a :class:`~torchvision.tv_tensors.BoundingBoxes` tv_tensor.

    Image classification datasets

        This wrapper is a no-op for image classification datasets, since they were already fully supported by
        :mod:`torchvision.transforms` and thus no change is needed for :mod:`torchvision.transforms.v2`.

    Segmentation datasets

        Segmentation datasets, e.g. :class:`~torchvision.datasets.VOCSegmentation`, return a two-tuple of
        :class:`PIL.Image.Image`'s. This wrapper leaves the image as is (first item), while wrapping the
        segmentation mask into a :class:`~torchvision.tv_tensors.Mask` (second item).

    Video classification datasets

        Video classification datasets, e.g. :class:`~torchvision.datasets.Kinetics`, return a three-tuple containing a
        :class:`torch.Tensor` for the video and audio and a :class:`int` as label. This wrapper wraps the video into a
        :class:`~torchvision.tv_tensors.Video` while leaving the other items as is.

        .. note::

            Only datasets constructed with ``output_format="TCHW"`` are supported, since the alternative
            ``output_format="THWC"`` is not supported by :mod:`torchvision.transforms.v2`.

    Args:
        dataset: the dataset instance to wrap for compatibility with transforms v2.
        target_keys: Target keys to return in case the target is a dictionary. If ``None`` (default), selected keys are
            specific to the dataset. If ``"all"``, returns the full target. Can also be a collection of strings for
            fine grained access. Currently only supported for :class:`~torchvision.datasets.CocoDetection`,
            :class:`~torchvision.datasets.VOCDetection`, :class:`~torchvision.datasets.Kitti`, and
            :class:`~torchvision.datasets.WIDERFace`. See above for details.
    allc              3  <   K   | ]  }t        |t                y wN)
isinstancestr).0keys     rC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torchvision/tv_tensors/_dataset_wrapper.py	<genexpr>z1wrap_dataset_for_transforms_v2.<locals>.<genexpr>\   s     Gthsad
SVX[H\hss   zg`target_keys` can be None, 'all', or a collection of strings denoting the keys to be returned, but got Wrapped)	r   collectionsabc
Collectionr   
ValueErrortype__name__VisionDatasetTVTensorWrapper)datasettarget_keyswrapped_dataset_clss      r   r	   r	      s    N 	%{KOO$>$>?CGthsGtDt"m%
 	
 g)?)?(@AD`bfgnboCprtu
 w44    c                      e Zd Zd Zy)WrapperFactoriesc                      fd}|S )Nc                    | <   | S r    )wrapper_factorydataset_clsselfs    r   	decoratorz,WrapperFactories.register.<locals>.decoratorq   s     /D""r   r$   )r'   r&   r(   s   `` r   registerzWrapperFactories.registerp   s    	# r   N)r   
__module____qualname__r)   r$   r   r   r!   r!   o   s    r   r!   c                  *    e Zd Zd Zd Zd Zd Zd Zy)r   c                   t        |      }t        |t        j                        st	        d|j
                   d      |j                         D ]  }|t        v rft        |   }|Y|t        j                  t        j                  t        j                  t        j                  hvrt        d|j
                   d       nT|t        j                  u sd|j
                   d}|t        j                  j                         v r| d}t	        |       || _        || _         ||      | _        |j$                  d c| _        |_        |j&                  d c| _        |_        |j(                  d c| _        |_        y )NzYThis wrapper is meant for subclasses of `torchvision.datasets.VisionDataset`, but got a 'z' instead.
For an example of how to perform the wrapping for custom datasets, see

https://pytorch.org/vision/main/auto_examples/plot_tv_tensors.html#do-i-have-to-wrap-the-output-of-the-datasets-myselfzq`target_keys` is currently only supported for `CocoDetection`, `VOCDetection`, `Kitti`, and `WIDERFace`, but got .z$No wrapper exists for dataset class z". Please wrap the output yourself.z If an automated wrapper for this dataset would be useful for you, please open an issue at https://github.com/pytorch/vision/issues.)r   r   r   VisionDataset	TypeErrorr   mroWRAPPER_FACTORIESCocoDetectionVOCDetectionKitti	WIDERFacer   __dict__values_dataset_target_keys_wrapper	transformtarget_transform
transforms)r'   r   r   r&   clsr%   msgs          r   __init__z%VisionDatasetTVTensorWrapper.__init__   s   7m'8#9#9:)223 4II  ??$C''"3C"8*s**))NN&&	; 0 %447LL>D  ...<[=Q=Q<RRtu("3"3":":"<<%  \ ]   n$- %0  ''= -4,=,=t)):A:R:RTX7w7.5.@.@$++r   c                    t        j                  t              5  t        j	                  | |      cd d d        S # 1 sw Y   nxY wt        | j                  |      S r   )
contextlibsuppressAttributeErrorobject__getattribute__getattrr9   )r'   items     r   __getattr__z(VisionDatasetTVTensorWrapper.__getattr__   s?      0**46 100 t}}d++s	   :Ac                ~    | j                   |   }| j                  ||      }| j                   | j                  | }|S r   )r9   r;   r>   )r'   idxsamples      r   __getitem__z(VisionDatasetTVTensorWrapper.__getitem__   sC     s#sF+ ??&$T__f-Fr   c                ,    t        | j                        S r   )lenr9   )r'   s    r   __len__z$VisionDatasetTVTensorWrapper.__len__   s    4==!!r   c                    t        | j                        }| j                  |_        | j                  |_        | j                  |_        t
        || j                  ffS r   )r   r9   r<   r>   r=   r	   r:   )r'   r   s     r   
__reduce__z'VisionDatasetTVTensorWrapper.__reduce__   sN     t}}% NN!__#'#8#8 -9J9J/KKKr   N)r   r*   r+   rA   rJ   rN   rQ   rS   r$   r   r   r   r      s    /Gb,"Lr   r   c                    t        |  d      )Nz is currently not supported by this wrapper. If this would be helpful for you, please open an issue at https://github.com/pytorch/vision/issues.)RuntimeError)descriptions    r   raise_not_supportedrW      s     
- n 	o r   c                    | S r   r$   )rI   s    r   identityrY      s    Kr   c                    d }|S )Nc                    |S r   r$   )rL   rM   s     r   wrapperz)identity_wrapper_factory.<locals>.wrapper   s    r   r$   r   r   r\   s      r   identity_wrapper_factoryr^      s     Nr   c                ,    t        j                  |       S r   )r   Mask)	pil_images    r   pil_image_to_maskrb      s    ??9%%r   c               t    | |} | dk(  r|} | S t        |       } | |z
  }|rt        dt        |       d      | S )Nr   zTarget keys z are not available)setr   sorted)r   	availabledefaultextras       r   parse_target_keysri      sZ    e  +&i'|F5M?:LMNNr   c                    t        t              }| D ].  }|j                         D ]  \  }}||   j                  |        0 t	        |      S r   )r   listitemsappenddict)list_of_dictsdict_of_listsdctr   values        r   list_of_dicts_to_dict_of_listsrs      sJ    %M))+JC#%%e, &  r   c                   t        | t        t        f      s| g} t        fdt        ||       D              }t	        |      dk(  r|d   }|S )Nc              3  \   K   | ]#  \  }} j                  |t              |       % y wr   )getrY   )r   target_typerI   type_wrapperss      r   r   z&wrap_target_by_type.<locals>.<genexpr>  s0      Pi;L;0+x06Pis   ),   r   )r   tuplerk   ziprP   )targettarget_typesrx   wrapped_targets     ` r   wrap_target_by_typer     sW    fudm, PST`bhPi N >a'*r   c                    t        | |      S r   )r^   r   r   s     r   classification_wrapper_factoryr     s    #G[99r   c                    d }|S )Nc                &    |\  }}|t        |      fS r   )rb   )rL   rM   imagemasks       r   r\   z-segmentation_wrapper_factory.<locals>.wrapper%  s    t'---r   r$   r]   s      r   segmentation_wrapper_factoryr   $  s    . Nr   c                    | j                   j                  dk(  r!t        t        |       j                   d      d }|S )NTHWCz with `output_format='THWC'` is not supported by this wrapper, since it is not compatible with the transformations. Please use `output_format='TCHW'` instead.c                B    |\  }}}t        j                  |      }|||fS r   )r   Video)rL   rM   videoaudiolabels        r   r\   z5video_classification_wrapper_factory.<locals>.wrapper9  s+    $ue  'eU""r   )video_clipsoutput_formatrU   r   r   r]   s      r   $video_classification_wrapper_factoryr   2  sK    ((F2G}%%& 'n o
 	

# Nr   c                L    d| j                   v rt        d       t        | |      S )N
annotationz9Caltech101 dataset with `target_type=['annotation', ...]`)rw   rW   r   r   s     r   caltech101_wrapper_factoryr   K  s&    w***WX)';??r   c                D     t        h dh d      d  fd}|S )N>	   areabboxboxesmaskslabelsiscrowdimage_idcategory_idsegmentation>   r   r   r   rf   rg   c                   ddl m} t        | t              r |j                  | g| n!|j                   |j                  | g|       } t        j                  |j                  |             S )Nr   )r   )	pycocotoolsr   r   rn   frPyObjectsmergetorch
from_numpydecode)r   canvas_sizer   s      r   segmentation_to_maskz=coco_dectection_wrapper_factory.<locals>.segmentation_to_maskg  sh    $ ,- D\8K8,D,,\HKHI 	
 L 9::r   c                   	j                   |    }|\  }}|s|t        |      fS t        t        j                  |            }t        |      }i }dv r||d<   dv rct        j                  t        j                  |d   t        j                  j                  |      t        j                  j                        |d<   dv rDt        j                  t        j                  |d   D cg c]  } 
||	       c}            |d<   d
v rt        j                  |d         |d
<   h dz
  D ]
  }||   ||<    ||fS c c}w )N)r   r   r   r   formatr   
new_formatr   r   )r   r   r   >   r   r   r   r   )idsrn   rz   Fget_sizers   convert_bounding_box_formatr   BoundingBoxesBoundingBoxFormatXYWHXYXYr`   r   stacktensor)rL   rM   r   r   r|   r   batched_targetr   
target_keyr   r   r   s            r   r\   z0coco_dectection_wrapper_factory.<locals>.wrapperq  sZ   ;;s#v$111AJJu-.7?$!)F:k!;;(("6*%77<< +
 &77<<F7O k!(oo -;>,J,JL -\{S,JF7O {"$||N=,IJF8%(PPJ!/
!;F: Q f}s   *E ri   )r   r   r\   r   s   `` @r   coco_dectection_wrapper_factoryr   S  s,    #
 0K$;*X Nr   )__background__	aeroplanebicyclebirdboatbottlebuscarcatchaircowdiningtabledoghorse	motorbikepersonpottedplantsheepsofatrain	tvmonitorc                6    t        h dddh      fd}|S )N>   r   r   r   r   r   r   c                   |\  }}t        |d   d         }dvri }dv rvt        j                  |d   D cg c]  }dD cg c]  }t        ||          c}  c}}t        j                  j
                  |j                  |j                  f      |d<   dv r0t        j                  |d   D cg c]  }t        |    c}      |d<   ||fS c c}w c c}}w c c}w )	Nr   rF   r   bndbox)xminyminxmaxymaxr   r   name)rs   r   r   intr   r   heightwidthr   r   VOC_DETECTION_CATEGORY_TO_IDX)	rL   rM   r   r|   batched_instancesr   partcategoryr   s	           r   r\   z.voc_detection_wrapper_factory.<locals>.wrapper  s   v:6,;OPX;YZ{*Fk!(66 #4H"="= 4TT3S4S&3ST"= "3388"\\5;;7F7O {"$||IZ[aIbcIbX.x8Ibc F8 f} U ds   	C
CC
2CC
r   r]   s    ` r   voc_detection_wrapper_factoryr     s+    #
 (#
K2 Nr   c                N    | j                   dk(  rt        d       t        | |      S )N
boundariesz SBDataset with mode='boundaries')moderW   r   r   s     r   sbd_wrapperr     s$    |||#>?'==r   c                P     t         fddD              rt        d        fd}|S )Nc              3  :   K   | ]  }|j                   v   y wr   rw   r   rw   r   s     r   r   z)celeba_wrapper_factory.<locals>.<genexpr>  s     
WAV+;'---AV   )attr	landmarksz>`CelebA` dataset with `target_type=['attr', 'landmarks', ...]`c                R    |\  }t        |j                  dfdi      }|fS )Nr   c                    t        j                  t        j                  | t        j                  j
                  j                  j                  f      t        j                  j                        S )Nr   r   	r   r   r   r   r   r   r   r   r   )rI   r   s    r   <lambda>z9celeba_wrapper_factory.<locals>.wrapper.<locals>.<lambda>  sQ    Q%B%B,,);;@@%*\\5;;$?
  *;;@@&r   r}   rx   )r   rw   )rL   rM   r|   r   r   s      @r   r\   z'celeba_wrapper_factory.<locals>.wrapper  s;    v$ ,, 	
 f}r   anyrW   r]   s   `  r   celeba_wrapper_factoryr     s'    

WAV
WW\]( Nr   )	CarVanTruck
PedestrianPerson_sittingCyclistTramMiscDontCarec                6    t        h dddh      fd}|S )N>
   r   r   alphar   r   locationoccluded	truncated
dimensions
rotation_yr   r   r   c                v   |\  }}|||fS t        |      }i }dv rLt        j                  |d   t        j                  j                  |j
                  |j                  f      |d<   dv r0t        j                  |d   D cg c]  }t        |    c}      |d<   ddhz
  D ]
  }||   ||<    ||fS c c}w )Nr   r   r   r   r   )
rs   r   r   r   r   r   r   r   r   KITTI_CATEGORY_TO_IDX)rL   rM   r   r|   r   r   r   r   s          r   r\   z&kitti_wrapper_factory.<locals>.wrapper$  s    v>&= 7?k!(66v&!3388"\\5;;7F7O {"$||]klr]s,t]sQY-B8-L]s,tuF8%((;;J!/
!;F: < f} -us   B6r   r]   s    ` r   kitti_wrapper_factoryr    s+    #
 (#!K&0 Nr   c                      fd}|S )Nc                V    |\  }}|t        |j                  dt        i      }||fS )Nr   r   )r   _target_typesrb   )rL   rM   r   r|   r   s       r   r\   z/oxford_iiit_pet_wrapper_factor.<locals>.wrapperA  s?    v($22"$5F f}r   r$   r]   s   `  r   oxford_iiit_pet_wrapper_factorr
  ?  s     Nr   c                Z     t         fddD              rt        d       d  fd}|S )Nc              3  :   K   | ]  }|j                   v   y wr   r   r   s     r   r   z-cityscapes_wrapper_factory.<locals>.<genexpr>T  s     
VAU+;'---AUr   )polygoncolorzA`Cityscapes` dataset with `target_type=['polygon', 'color', ...]`c                8   t        |       }g }g }|j                         D ]3  }|j                  ||k(         |}|dk\  r|dz  }|j                  |       5 t        t	        j
                  t        j                  |            t        j                  |            S )Ni  )r   r   )rb   uniquerm   rn   r   r`   r   r   )r   datar   r   idr   s         r   instance_segmentation_wrapperzAcityscapes_wrapper_factory.<locals>.instance_segmentation_wrapperW  s     &++-BLL$E~%MM%    *//%++e*<=ekkRXFYZZr   c                T    |\  }}t        |j                  t        d      }||fS )N)instancesemanticr   )r   rw   rb   )rL   rM   r   r|   r   r  s       r   r\   z+cityscapes_wrapper_factory.<locals>.wrapperd  s9    v$ ,,9-
 f}r   r   )r   r   r\   r  s   `  @r   cityscapes_wrapper_factoryr  R  s-    

VAU
VV_`[ Nr   c                2    t        h dd      fd}|S )N>   r   blurposeinvalid	occlusion
expressionilluminationr   r   c                L   |\  }}|||fS D ci c]  }|||   
 }}dv ryt        j                  t        j                  |d   t        j                  j
                  |j                  |j                  f      t        j                  j                        |d<   ||fS c c}w )Nr   r   r   r   )rL   rM   r   r|   r   r   s        r   r\   z"widerface_wrapper.<locals>.wrapper  s    v>&= .9:ks#vc{"k:[ ::((6N:+G+G+L+L[`[g[ginititZu &77<<	F6N f} ;s   B!r   r]   s    ` r   widerface_wrapperr   u  s&    #
 K$ Nr   r   )H
__future__r   collections.abcr   rC   r   r   r   torchvisionr   r   torchvision.transforms.v2r   r   __all__r	   rn   r!   r2   r   rW   rY   r^   rb   ri   rs   r   r   
Caltech256CIFAR10CIFAR100ImageNetMNISTFashionMNISTGTSRBDatasetFolderImageFolder
Imagenetter&   r)   r   VOCSegmentationr   HMDB51KineticsUCF101
Caltech101r   r3   r   CocoCaptionsVOC_DETECTION_CATEGORIESr{   rangerP   r   r4   r   	SBDatasetr   CelebAr   KITTI_CATEGORIESr  r5   r  OxfordIIITPetr
  
Cityscapesr  r6   r   r$   r   r   <module>r=     s0   #   #   , 5+
,Y5xt  %& UL ULp&:
 NNNNK ,{+,JK K ,{+,HI$ OOOOK
 ,{+,PQ H//0@ 1@ H223I 4IX 2   800 12J K . !%S)A5MeIfCg%h i  H112& 3&R H../> 0> HOO, -6 r S!15=M9N3OPQ  HNN+, ,,^ H223 4$ H//0 1D H../! 0!r   