
    PhU]             !       ^   U d dl Z d dlZd dlZd dlZd dlmZ d dl mZmZ d dlm	Z	m
Z
 d dlmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmc mZ d dlmZmZm Z  d dl!m"Z" d dl#m$Z$ d d	lm%Z%m&Z&m'Z'm(Z( d d
l)m*Z*m+Z+m,Z,m-Z- d dl.m/Z0 d dl1m2Z2 ejf                  jh                  Z4g Z5ee6   e7d<   ejp                  jr                  jt                  Z: G d de      Z;	 dodedejx                  de=fdZ> ee>ejx                  j~                  d      Z@ ee>ejx                  j~                        ZA ee>ejx                  j                        ZCde deDde fdZE e"e:j                         e-d      eAde de fd                     ZF e"e:j                         e-d      eAde de fd                      ZG e"e:j                         e-d      eAde de d!eId"eIfd#                     ZH e"e:j                         e-d      eAd$e d%eId&eId'eId(e=d)e fd*                     ZJ e"e:j                  j                  g      d+        ZM e"e:j                  j@                  g      d,e fd-       ZN e"e:j                         e-       eAd.e de fd/                     ZO e"e:j                         e-d      eAd$e d.e fd0                     ZP e"e:j                         e-d      d$e d.e d1eId2eIfd3              ZQ e"e:j                         e-       eAd.e de fd4                     ZR e"e:j                         e-       eAd$e d.e de fd5                     ZS e"e:j                         e-d      d$e d.e d"eIfd6              ZT e"e:j                         e-d      eAd$e d.e d7eId8e=fd9                     ZU e"e:j                         e-d      eAdpd:e d.e d;e6fd<                     ZV e"e:j                        eAd$e d=e fd>              ZW e"e:j                         e-       eAd.e de fd?                     ZX e"e:j                         e-d      eAd$e d.e de fd@                     ZY e"e:j                        d.e dAe de fdB       ZZ e"e:j                        d$e d.e dAe dee e f   fdC       Z[ e"e:j                        e:j                  j                  j                  e4j                         e-       eA	 	 dqd.e dDe dEeIdFeIdGe=dHeej                     de fdI                            Z\ e"e:j                        e:j                  j                  j                  e4j                        eA	 	 dqd.e dDe dEeIdFeIdGe=dHeej                     de fdJ                     Za e"e:j                         e-       eAd$e d.e dDe dEeIdFeIdGe=d8e=de fdK                     Zb e"e:j                         e-d      eAd$e d.e dLe de fdM                     ZcdNe dOeDfdPZddQej                  fdRZf e"e:j                         e-       eAe;j                  j                  fd.e dSe dOeDde fdT                     Zg e"e:j                         e-d      eAd$e d=e dSe dOeDfdU                     Zj e"e:j                         e-       eAe;j                  j                  dVfd.e dSe dOeDd!eIfdW                     Zk e"e:j                  j                        eAd$e d.e dSe dOeDd!eIf
dX              Zl e"e:j                  j                        eAd$e d.e dSe dOeDd!eIde fdY              Zn e"e:j                  j                        eAd$e d.e dSe dOeDdZeIf
d[              Zo e"e:j                  j                        eAd$e d.e dSe dOeDdZeIde fd\              Zqd$e d.e dSe dAee    dOeDd]eDd^e de fd_Zr e"e:j                         e-d      eAd$e d.e deDde fd`                     Zs e"e:j                         e-d      d$e d.e dSe dAee    dOeDd]eDd^e de fda              Zt e"e:j                         e-d      d$e d.e dSe dAee    dOeDd]eDd^e de fdb              Zu e"e:j                         e-       eAde;j                  j                  fd.e dSe dAee    dOeDde f
dc                     Zv e"e:j                         e-d      eAde;j                  j                  fd$e d.e dSe dAee    dOeDde fdd                     Zw e"e:j                         e-       eAe;j                  j                  fd=e dSe dOeDde fde                     Zx e"e:j                         e-d      eAe;j                  j                  fd$e d.e dSe dOeDde f
df                     Zy e"e:j                         e-       drd=e dge dheIfdi              Zz e"e:j                         e-       dje dke de fdl              Z{ e"e:j                         e-       d$e dmeeD   deDdneDdoeDdpeDfdq              Z| e"e:j                  j@                        	 	 	 	 dsd.e deDdneeD   doeeD   dpeDf
ds       Z~ e"e:j                         e-       d$e dmeeD   deDdteDfdu              Z e"e:j                          e-       d$e dmeeD   dveDdweDdxeDf
dy              Zd$e de dzej                  fd{Z e"e:j                         e-d      e@d$e d|e deDdzej                  fd}                     Z e"e:j                         e-       e@d$e d|e deDdzej                  fd~                     Zd Z e"e:j
                         e-       eAd=e deeD   deeD   deeD   deeD   de fd                     Z e"e:j                         e-       eAd=e deeD   deeD   deeD   deeD   deeD   de fd                     Z e"e:j                         e-       d$e de d&eIfd              Z e"e:j                         e-       d:e deeD   deDdeDdpeDde fd              Z e"e:j                  j                        eA	 dtd$e d.e deeI   de fd              Z e"e:j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d=e dheIdee=   fd                     Z e"e:j                         e-dd      d=e dheIdee=   fd              Z e"e:j                         e-       de deDde=fd              Z e"e:j                         e-       de deDde=fd              Z e"e:j                   j@                        dud.e dge d%eIde fd       Z e"e:j                   j                        dud.e dgeId%eIde fd       Z e"e:j&                         e-       	 	 	 dvdAe de deDde=de=de fd              Z e"e:j(                         e-       d$e de deDdeDde=f
d              ZdeeD   fdZ e"e:j,                        	 dwd.e deeD   deDdee    fd       Z e"e:j.                  j@                        dwd=e deDdeDdee df   fd       Z e"e:j0                  j                        	 dwd=e deeD   deDdee df   fd       Z e"e:j2                  j@                        dwd.e deDdeDdee df   fd       Ze:j4                  j6                  j                  e4j                        	 dwd.e de deDdee    fd       Z e"e:j:                         e-       eAdxd.e de de d!eDd%eDf
d                     Z e"e:j<                         e-       eA	 	 	 dyd.e de de d!eDd%eDde=fd                     Z e"e:j>                         e-       eAdxd.e de de d!eDd%eDf
d                     Z e"e:j@                  j                        eAd$e d=e de de dee    deDdeDdeDdeDdee=   deee    ee    ee    f   fd              Z e"e:j@                  j                        d$e d=e de de dee    deDdeDdeDdeDdee=   dej@                  dej@                  dej@                  deee    ee    ee    f   fd       Zdee    dee    fdZ e"e:jF                  j                        de d=e deeD   de de dAee    dee    dee=   deee    ee    ee    f   fd       Z e"e:jF                  j                        de d=e deeD   de de dAee    dee    dee=   dej@                  dej@                  dej@                  deee    ee    ee    f   fd       Zd=e dAee    dee    dee    dee    dGe=deIdeIde=dee e e ee    ee    f   fdƄZ e"e:jL                         e-dddɫ      d=e dAee    dee    dee    dee    dGe=deIdeIdee e e f   fdʄ              Ze:jL                  j                  j                  e4j                        e:jL                  j                  j                  e4j                        d=e dAee    dee    dee    dee    dGe=deIdeIdee e e f   fd˄              Ze:jP                  j                  j                  e4j                        dwdee    fd̄       Z e"e:jT                  j                        d=e dAee    dee    de de deIdeIdee e e f   fd̈́       Z e"e:jV                  j                        d=e dAee    dee    de de dGe=deIdeIdee e e f   fd΄       Z e"e:jV                  jX                        d=e dAee    dee    dGe=deIdeIdee e e f   fdτ       Z e"e:j\                  j                        d=e dAee    dee    de de dGe=deIdeIdee e e e e f   fdЄ       Z e"e:j^                         e-dd      eAdtdф                     Zd҄ Zdӄ Z e"e:jf                         e-       dddddddԜde dQeej                     deejh                     de=de=deejj                     fdل              Z e"e:jl                  e:jn                  e:jp                  g       e-       dڄ               Ze:jt                  j                  j                  e4j                         e"e:jt                         e-dddd۫      d=e dAe dee    dee    dee    dGe=deIdeIfdބ                     Zd߄ Z e"e:jx                  j                        de d=e dAee    dee    dee    dee    dee    de=deIdee=   dee ee    ee    f   fd       Z e"e:jx                  j                        de d=e dAee    dee    dee    dee    dee    de=deIdee=   dej@                  dej@                  dej@                  dee ee    ee    f   fd       Z e"e:j|                         e-ddd      d=e d$e dAe dee    dee    dee    dee    deIde fd              Z e"e:j~                         e-       eAd=e deeDeDf   fd                     Z e"e:j                        drdde'deDdte'de'd%e&f
d       Z e"e:j                         e-       drdde'deDdte'de'd%e&f
d              Zdrdde'deDdte'de'de=d%e&fdZ e"e:j                        de'deDdte'de'fd       Z e"e:j                         e-       de'deDdte'de'fd              Zde'deDdte'de'de=f
dZ e"e:j                         e-d|dL      eAd.e dee e f   fd                     Z e"e:j                         e-       	 	 	 dzde dee=eDeIf   dee=eDeIf   dHeej                     fd              Z e"e:j                        d{d       Zd Zd Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      ZؐdodZ e"e:j                  j                        e:j                  j                  j                  e4j                        eA	 dtd=e deeD   deeI   de fd                     Z e"e:j                  j                        e:j                  j                  j                  e4j                        eA	 dtd=e deeD   deeI   de fd                     Zϐd  Z e"e:j                  j                        e:j                  j                  j                  e4j                        eA	 	 d|d=e deeD   deeI   deeI   de f
d                     Z e"e:j                  j                        e:j                  j                  j                  e4j                        eA	 	 d|d=e deeD   deeI   deeI   de f
d                     Z e"e:j                  j                        e:j                  j                  j                  e4j                        eA	 	 	 d}d=e deeD   deeI   deeI   deeI   de fd                     Z e"e:j                  j                        e:j                  j                  j                  e4j                        eA	 	 	 d}d=e deeD   deeI   deeI   deeI   de fd                     Zאd Zېd	 Zܐd
 Zݐd Z	 dodZߐd Zd ZdodZdodZd Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Zd ZdodZdodZd Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Zd Zd Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d                       Z e"e:j                  j                        e:j                  j                  j                  e4j                        e:j                  j                  j                  e4j                        d!                      Z e"e:j                  j                        e:j                  j                  j                  e4j                        eA	 	 d|d=e deeD   d"e=deeI   deeI   de fd#                     Z e"e:j                  j                        eAd=e deeD   de fd$              Z e"e:j                  j                        d%e d&e de=fd'       Z e"e:j                  e:j                   g       e-       d(               Z e"e:j                  g      d)        Zd.e dSe dAee    dOeDd]eDdee e f   fd*Z e"e:j                         e-d|d^      d.e dSe dAee    dOeDd]eDdee e f   fd+              Z e"e:j
                         e-d|d^      d.e dSe dAee    dOeDd]eDdee e f   fd,              Zde d-eIde fd.Zde d-eIde fd/Zd0e de(fd1Zd2e(d3e de fd4Z	d3ee    de fd5Z
d6eDd"e=dQej                  dejh                  fd7Zd8e d9eDd:eDd"e=fd;Zd8e d<eDd9eDd:eDd"e=f
d=Zd8e deeD   d"e=fd>Zd8e deeD   d"e=fd?Z e"e:j                          e-       eAd8e deeD   d"e=fd@                     Z	 	 	 	 d~d%e dAe dBeDdCeDd"e=dDe=de fdEZ e"e:j$                         e-       eA	 	 	 dd%e dAe dBeDdCeDd"e=de fdF                     Z e"e:j&                         e-       eAdG                      Z e"e:j(                         e-       dde;j                  j                  fdH              ZdIej@                  dJej@                  de=fdKZe:j,                  j                  j                  e4j                         e-       dL               Z e"e:j.                  j                        eA	 	 d|d%e deeDeDf   d"e=dMeeI   dNeeI   de fdO              Z e"e:j.                  j                        e:j.                  j                  j                  e4j                        e:j.                  j                  j                  e4j                         e-       eA	 dtd%e deeeDeDf      d"e=dPeeeIeIf      de f
dQ                                   Z e"e:j4                         e-dRdS      dddTdU              Z e"e:j6                         e-       dddVdW              Z e"e:j8                  j                  e:j8                  j                  g       e-       dej:                  dddXdoe&dQeej                     dYej<                  deejh                     de=f
dZ              Z e"e:j8                  j@                  g      dej:                  dddXdne&doe&dQeej                     dYej<                  deejh                     de=fd[       Z! e"e$      d\        Z" e"e:jF                        e:jF                  j                  j                  e4j                         e-       drdrde;j                  j                  fd=e dSe dhe&d]e&dAee    dOeDde fd^                     Z# e"e:jH                        e:jH                  j                  j                  e4j                         e-d|d_      d=e dSe dOeDdee e f   fd`                     Z$ e"e:jJ                  j                        	 	 	 dddadbe dce d,e ddeIdee=dfe=d&eeI   dee e e e eDeDe e e f	   fdg       Z&dh Z' e"e:jP                  g       e-       eAdxdi                     Z( e"e:jR                         e-       dj               Z) e"e:jT                  j                  e:jT                  j                  g      dddkd.e dQeej                     dee    de fdl       Z+ e"e:jX                  j                  e:jX                  jZ                  g      dtd.e deeD   fdm       Z. e"ejr                  jt                  j^                        dn        Z/ e'e:j`                  e:jb                          e'e:jd                  e:j:                          e'e:jf                  e:j>                          e'e:jh                  e:jP                          e'e:jj                  e:j                          e'e:jl                  e:jn                          e'e:jp                  e:j                          e'e:jr                  e:jt                          e'e:jv                  e:j                          e'e:jx                  e:jz                          e'e:j|                  e:j~                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                          e'e:j                  e:j                         y(      N)Enum)partialreduce)chainproduct)CallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberType
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                       e Zd ZdZdZdZy)	Reductionr         N)__name__
__module____qualname__NONEMEANSUM     gC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/_decomp/decompositions.pyr!   r!   $   s    DD
Cr+   r!   Fftype_promotioncompute_dtype_onlyc                 J     t        j                          fd       }|S )Nc                     t        j                  | i |D cg c]  }t        |t              s| }}t	        j
                  |di\  fd}fd} 
t        ||       i t        ||      }	r|S t        ||      S c c}w )Ntype_promotion_kindc                 J    t        | t              r| j                        S | S N
isinstancer   to)xcomputation_dtypes    r,   increase_precz0type_casts.<locals>.inner.<locals>.increase_prec<   s"    !V$tt-..r+   c                 J    t        | t              r| j                        S | S r4   r5   )r8   result_dtypes    r,   decrease_precz0type_casts.<locals>.inner.<locals>.decrease_precB   s!    !V$ttL))r+   )pytreearg_tree_leavesr6   r   utilselementwise_dtypesr   )argskwargsr8   	flat_argsr:   r=   rr9   r<   r/   r-   r.   s          @@r,   innerztype_casts.<locals>.inner2   s     --t>v>
>!*QPVBWA> 	 
 +0*B*B+
,:+
'<
		 xt,P0OPHM1--1
s
   BB)	functoolswraps)r-   r.   r/   rF   s   ``` r,   
type_castsrI   -   s'    
 __Q. .6 Lr+   T)r.   r/   )r.   r8   dimreturnc                 j    t        || j                         z
        D ]  }| j                  d      }  | S )N)rangerJ   	unsqueeze)r8   rJ   _s      r,   _unsqueeze_to_dimrQ   _   s-    3=!KKO "Hr+   
grad_inputout_gradyc                 4    | d||z  z
  j                         z  S Nr"   conj_physicalrS   rT   s     r,   tanh_backwardrZ   e   s      q1q5y//111r+   c                 4    | |d|z
  z  j                         z  S rV   rW   rY   s     r,   sigmoid_backwardr\   l   s      qAE{11333r+   beta	thresholdc                 t    ||z  j                         }t        j                  ||z  |kD  | | |z  |dz   z        S N      ?)exptorchwhere)rS   r8   r]   r^   zs        r,   softplus_backwardrf   s   s=     
TA;;DI-xAS9QRRr+   grad_outputalphascaleinput_scale	is_resultself_or_resultc                     ||z  }|}|}|r&t        j                  |dk  | |z  ||z   z  | |z        S t        j                  |dk  | |z  |z  t        j                  ||z        z  | |z        S Nr   )rc   rd   rb   )	rg   rh   ri   rj   rk   rl   negcoefposcoef
negiptcoefs	            r,   elu_backwardrr   {   s     emGGJ{{a*$(@A'!
 	
 {{a*$w.>J;V1WW'!
 	
r+   c                 .    t        j                  | |      S r4   )rc   	full_likeselfvalues     r,   fill_scalarrx      s    ??4''r+   rw   c                     t        j                  j                         dk(  fd       t        j	                  |       S )Nr   c                  ,    d j                          dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrJ   )rw   s   r,   <lambda>zfill_tensor.<locals>.<lambda>   s    RSXS\S\S^R__jkr+   )rc   _checkrJ   atencopyru   s    `r,   fill_tensorr      s3    	LL		qk 99T5!!r+   rv   c                 f    t        j                  t        j                  | dz   d      d      dz  S N   r   min   maxrc   clamprv   s    r,   hardsigmoidr      s)     ;;u{{4!83;a??r+   c                 H    t        j                  |dkD  |dk  z  | dz  d      S )Ng      g      @gUUUUUU?        rc   rd   rg   rv   s     r,   hardsigmoid_backwardr      s0     ;;	$y! r+   min_valmax_valc                 B    t        j                  ||k  ||k\  z  d|       S )Nr   r   )rg   rv   r   r   s       r,   hardtanh_backwardr      s$    
 ;;DGO<c;OOr+   c                 l    | t        j                  t        j                  | dz   d      d      z  dz  S r   r   r   s    r,   	hardswishr      s.     %++ekk$(:BBQFFr+   c           
      x    t        j                  |dk  dt        j                  |dk  | |dz  dz   z  |             S )Nr   r         ?r   r   s     r,   hardswish_backwardr      sA     ;;r	DAI{tax3.>?M r+   c                 6    t        j                  ||k  d|       S rn   r   )rg   rv   r^   s      r,   threshold_backwardr      s     ;;ty(![99r+   negative_slopeself_is_resultc                 <    t        j                  |dkD  | | |z        S rn   r   )rg   rv   r   r   s       r,   leaky_relu_backwardr      s      ;;taxkN.JKKr+   gradapproximatec                    d}d}d}|dk(  ri||z  dz  }d}||z  }||z  }	||||	z  z   z  }
t        j                  |
      }d|z  }d|z   }d|z  }d||z  z
  }|dd|z  |z  z   z  }||z  |z  }| ||z   z  S |}||z  dz  }ddt        j                  ||z        z   z  }|t        j                  ||z  d	z        z  }| |||z  z   z  S )
Ng;f?g;f?gmBP?tanhr   gHm?r"   r   g      )rc   r   erfrb   )r   rv   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberF   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfs                        r,   gelu_backwardr      s'    %G&I'Jf*$s*d{/0ZZ&
TzJ+j:55 AF
T(9$9:/14DD)99::Y&,Q4&=112eiitd 233sTCZ'((r+   inputc                     t        j                  t        j                  |            }t        j                  |      }||z  d||z  z
  z  }| ||z   z  S rV   )rc   r   Fsoftplussigmoid)rg   r   input_tanh_softplusinput_sigmoidouts        r,   mish_backwardr     sV      **QZZ%67MM%(M
-
1':=P'P#P
QC-344r+   c                 2    | t        j                  |       z  S r4   )rc   r   r   s    r,   silur   
  s     %--%%%r+   c                 \    ddt        j                  |       z   z  }| |z  d|d|z
  z  z   z  S rV   )rc   rb   )rg   rv   r   s      r,   silu_backwardr     s<     1uyy$''(G AG(<$<==r+   weightc                 <    t        j                  | dkD  | || z        S rn   r   )rv   r   s     r,   _prelu_kernelr     s    ;;taxv}55r+   c                 ~    t        j                  |dkD  | || z        }t        j                  |dkD  d|| z        }||fS )Nr   r   r   )rg   rv   r   
input_gradweight_grads        r,   _prelu_kernel_backwardr     sE     TAX{F[4HIJ++dQhTK-?@K$$r+   noiseloweruppertraining	generatorc                    |J |r^| dk  }t         j                  | ||      }t        j                  || |z  |       }|j	                  t        j                  ||d             |S ||z   dz  }	t         j                  | |	      S Nr   r"   r#   )r~   uniformrc   rd   copy_
leaky_relu)
rv   r   r   r   r   r   not_positiverE   outputr   s
             r,   rrelu_with_noiser   )  s     qyLLue,\4!8T:EKKa34%-1,t^44r+   c           
      @    | j                  t        | |||||            S r4   )r   r   )rv   r   r   r   r   r   s         r,   rrelu_with_noise_r   A  s#     ::&tUE5(IVWWr+   c                 x    |r||z
  dkD  r| j                  |      S ||z   dz  }t        j                  | |||      S )Ngư>r#   )mulr~   r   )rg   rv   r   r   r   r   r   r   s           r,   rrelu_with_noise_backwardr   O  sK     EEMD(u%%%-1,''~~
 	
r+   bufferc                     |dk  }t        j                  |dd      }t        j                  |dd      }t        j                  t        j                  |             }| |||d|z   z  z  z
  z  S )Nr   r"   rM   )rc   rd   rb   abs)rg   rv   r   in_negative	max_derivsignre   s          r,   log_sigmoid_backwardr   d  sg     (KKA.I;;{Ar*D		599T?"#A)da1q5k&::;;r+   loss	reductionc                     |t         j                  j                  k(  rt        j                  |       S |t         j
                  j                  k(  rt        j                  |       S | S r4   )r!   r(   rw   rc   meanr)   sum)r   r   s     r,   apply_loss_reductionr   q  sH    INN(((zz$	imm))	)yyr+   dtypec                     | t         j                  k(  rt         j                  S | t         j                  k(  rt         j                  S | t         j
                  k(  rt         j                  S y r4   )rc   	complex32float16	complex64float32
complex128float64r   s    r,   to_real_dtyper   z  sK    }}	%//	!}}	%""	"}} 
#r+   targetc                 *    | |z
  dz  }t        ||      S )Nr#   )r   )rv   r   r   r   s       r,   mse_lossr     s     6MaDi00r+   c                 |    |t         j                  j                  k(  rd|j                         z  nd}|||z
  z  | z  S )N       @)r!   r(   rw   numel)rg   r   r   r   norms        r,   mse_loss_backwardr     s;     #,y~~/C/C"C3D56>"[00r+   ra   c                     | |z
  j                         }t        j                  ||k  d|dz  z  |z  |d|z  z
        }t        ||      S )Nr   r#   )r   rc   rd   r   )rv   r   r   r]   r   s        r,   smooth_l1_lossr     sO     6M D;;td{C$'MD$8$t:KLDi00r+   c                    |t         j                  j                  k(  rd|j                         z  nd}||z
  }t	        j
                  |      }|| z  }t	        j                  ||k  ||z  |z  |t	        j                  |      z        S r`   )r!   r(   rw   r   rc   r   rd   r   )	rg   rv   r   r   r]   r   r8   abs_x	norm_grads	            r,   smooth_l1_loss_backwardr    s{    
 "+inn.B.B!B3DvAIIaLE{"I;;AEJJqM! r+   c                 h    t        | ||||      }t        ||j                         t        ||d      S NT)	copy_fromcopy_toexact_dtype)r  r   shaper   )rg   rv   r   r   r]   rR   results          r,   smooth_l1_loss_backward_outr    s3     %[$	4PFj&,,/FJDQQr+   deltac           
          |t         j                  j                  k(  rd|j                         z  nd}||z
  }t	        j
                  || k  | | z  |z  t	        j
                  ||kD  || z  |z  ||z  | z              S r`   )r!   r(   rw   r   rc   rd   )rg   rv   r   r   r  r   r8   s          r,   huber_loss_backwardr    s    
 "+inn.B.B!B3DvA;;	UF
	e#AItk1E94!8k;QR r+   c                 h    t        | ||||      }t        ||j                         t        ||d      S r  )r  r   r	  r   )rg   rv   r   r   r  rR   r
  s          r,   huber_loss_backward_outr    s3     !dFIuMFj&,,/FJDQQr+   ignore_indextotal_weightc                    |j                         dk  rdnd}|t        j                  j                  k(  r| |z  } |j	                  |      }t        j                  ||k7  |d      }t        j                  |      }	t        j                  |	||d      }	|	j                         | j                         cxkD  rdkD  rn n| j	                  |      } |Nt        |j                               D 
cg c]  }
d }}
|j                  d   ||<   |j                  |      }| |z  } t        j                  ||k7  | d      } |	| z  S c c}
w )Nr#   r   r"   g      )rJ   r!   r(   rw   rO   rc   rd   
zeros_likescatterrN   r	  reshape)rg   rv   r   r   r   r  r  channel_dimsafe_targetrR   rP   	new_shapes               r,   _nll_loss_backwardr    s!    xxzA~!1KINN(((!L0k*F++f4fa@K!!$'Jz;TJJ~~+//+/a/!++K8 %dhhj 12 11Q 1	2!'a	+	*!F*++f4k1EK## 3s   *	D<c                    |j                         dkD  sJ d       t        j                  |j                         |      }|j                  |      }|dz  dk(  sJ d| d|        |dz  }|j	                  |d|      }|j	                  |||      }t        j                  |      }d|z
  |z  |z  | z  }	|| z  }t        j                  ||	g|      S )Nr   z*glu does not support 0-dimensional tensorsr#   z.Halving dimension must be even, but dimension z	 is size ra   r{   )rJ   r@   canonicalize_dimsizenarrowrc   r   cat)
rg   rv   rJ   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfs
             r,   glu_backwardr'    s     88:>GGG>%%dhhj#6H
))H
Ca1Q	7z3%PQqIHa3IXy)<Jz2	!	!%77)CkQ  ,k999(*=>HMMr+   c           	         d|j                         cxk  rdk  sJ d        J d       |j                         dk  sJ d       |j                         dk(  xr |j                         dk(  }|sA|j                  d   |j                  d   k(  s"J d|j                   d|j                   d       |j                         dk(  s'J d	|j                   d
|j                          df       |'|j                         |j                  d   k(  sJ d       |t        j                  j
                  k(  r}|j                         dk(  rj| j                         dk(  r| j                  d   |j                  d   k(  srJ d|j                  d    d| j                          d| j                  d           | j                         dk  r| j                         dk(  sJ d| j                          t        | ||||||      S )Nr   r#   input tensor should be 1D or 2Dr"   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rM   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rJ   r	  r   r!   r'   rw   r  )rg   rv   r   r   r   r  r  no_batch_dims           r,   nll_loss_backwardr0    s    
aB!BBB!BB

EDE 88:?8vzz|q'8L

1a(J	#DJJ<z&,,qIJ  1$ D
b!3!3!5 6jA' $ 	&,,.DJJrN:FEF: INN(((TXXZ1_ A%+*;*;A*>$**Q-*O 	
EdjjQRm_ U)oo/00GHYHYZ[H\G]_	
O OO"{'8'8':a'?	YD[EVEVDWX	Y? T669lL r+   c           	      >   |j                         dk(  sJ d|j                                 |j                         dk(  sJ d|j                                 |j                  d   |j                  d   k(  r>|j                  d   |j                  d   k(  r|j                  d   |j                  d   k(  s!J d|j                   d	|j                          |j                         dk(  s&J d
|j                   d|j                          d       t        | ||||||      S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r#   r"   r+  r,  r.  z ( z, elements))rJ   r	  r   r  )rg   rv   r   r   r   r  r  s          r,   nll_loss2d_backwardr3  I  sE    	
aj	\]a]e]e]g\hij 	

n	^_e_i_i_k^lmn 	

1a(JJqMV\\!_,JJqMV\\!_,I 
$DJJ<z&,,H	I	-
 1$ 	""#3|'9'9';&<K	I$
 T669lL r+   c           	      "   |dz
  t        j                  t        j                  |        | j                  dd            z  |t        j                  t        j                  |       | j                  dd            z  z
  }|||z  }t        ||      S )Nr"   r*   i)rc   maximumlog1pnew_fulllogr   )rv   r   r   r   r   s        r,   binary_cross_entropyr9  l  s     QJ%--TEDMM"d3 uyyb$0GHHID f}i00r+   c                     d}| ||z
  z  t        j                  |d|z
  z  |      z  }|||z  }|t        j                  j                  k(  r||j                         z  }|S )Ng-q=r"   r   )rc   r   r!   r(   rw   r   )rg   rv   r   r   r   EPSILONr
  s          r,   binary_cross_entropy_backwardr<    sg     GD6M*U[[T9JPW-XXF&INN((($**,&Mr+   c                 r    t        j                  t        j                  |  |z              }t        ||      S r4   )rc   r6  rb   r   )r   r   r   r   s       r,   soft_margin_lossr>    s.     ;;uyy%&12Di00r+   c                     || z  t        j                  ||z        dz
  z  }|t        j                  j                  k(  r||j                         z  }|S rV   )rc   r   r!   r(   rw   r   )rg   rv   r   r   rR   s        r,   soft_margin_loss_backwardr@    sM     +%v})E)IJJINN((($**,.
r+   otherpc                 6    t         j                  | |z
  |      S )N)rB  )r~   r   )r   rA  rB  s      r,   distrD    s     99UU]a9((r+   x1x2c                    | j                  d      j                  dd      }t        j                  |t        j                        }|j                  d      j                  dd      }t        j                  |t        j                        }t        j
                  | j                  d      ||gd      }t        j
                  |||gd      }|j                  |j                        }|j                  d      j                         S )Nr#   rM   Tmemory_formatr   )powr   rc   	ones_likecontiguous_formatr  r   matmulmT	clamp_minsqrt)	rE  rF  x1_normx1_padx2_normx2_padx1_x2_r
  s	            r,   _euclidean_distrX    s     ffQimmB%G__WE4K4KLFffQimmB%G__WE4K4KLF
))RVVBZ&12
6C
))R)2
.CZZFA##%%r+   input_sizesstartendstepc                 X    | j                  |      }t        j                  || ||||      S r4   )	new_zerosrc   slice_scatter)rg   rY  rJ   rZ  r[  r\  rR   s          r,   slice_backwardr`    s/     &&{3Jz;UCNNr+   r"   c                    | j                         }|dk(  rt        d      t        j                  | j                         |      }t	        | j                               }t	        | j                               }|dk  rt        d      ||nd}||nt        j                  }	|dk  r|||   z  }|	dk  r|	||   z  }	|dk  rd}n|||   kD  r||   }|	|k  r|}	n|	||   kD  r||   }	| j                         |||   z  z   }
|	|z
  }||z   dz
  |z  ||<   ||xx   |z  cc<   | j                  rt        d      | j                  |||
      S )Nr   z,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver"   z<Slice decomposition for quantized tensors aren't implemented)rJ   RuntimeErrorr@   r  listr  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)rv   rJ   rZ  r[  r\  ndimsizesstrides	start_valend_valrg  lens               r,   slice_forwardrq    sm    88:DqyIJJ

 
 S
1CE4;;=!Gqy899*I_c#++G1}U3Z	{5:1}		U3Z	#J		5:	*((*Y-EEN
I
C*q.T)E#JCLDL!J
 	
 ug~>>r+   indexc                 T    | j                  |      }t        j                  || ||      S r4   )r^  rc   select_scatter)rg   rY  rJ   rr  rR   s        r,   select_backwardru    s+     &&{3J
KeDDr+   offsetdim1dim2c                 V    | j                  |      }t        j                  || |||      S r4   )r^  rc   diagonal_scatter)rg   rY  rv  rw  rx  rR   s         r,   diagonal_backwardr{    s-    
 &&{3J!!*k64NNr+   input_dtypec                 F    | j                   |k7  r|j                  |      }|S r4   )r   r7   )rg   rR   r|  s      r,   _cast_grad_to_input_dtyper~    s&     K']];/
r+   r   c                 ~    | |z  }||t        j                  ||d      z  z
  }t        | ||      j                         S NTrJ   keepdim)rc   r   r~  
contiguous)rg   r   rJ   r|  new_grad_outputrR   s         r,   _softmax_backward_datar    sK     "F*O 6EIIS$- $ J %[*kJUUWWr+   c                 ~    | t        j                  |      t        j                  | |d      z  z
  }t        | ||      S r  )rc   rb   r   r~  )rg   r   rJ   r|  rR   s        r,   _log_softmax_backward_datar  .  sA     uyy0599d4   J %[*kJJr+   c                     | |dz  z   ||dz
  z  z
  }t        t        j                  t        j                  |      } |d||      j	                  d      } |d||z  |      j	                  d      }	||	z   S )z/Utility function to implement im2col and col2imr#   r"   r   devicer   rM   )r   rc   arangeint64rO   )
input_dkernel_d
dilation_d	padding_dstride_dr  blocks_d	arange_kwblocks_d_indiceskernel_grids
             r,    _im2col_col2im_indices_along_dimr  :  s     Q&x!|)DDHEKKGI !Hh7AA!D Ax*4jAKKBOK k))r+   kernel_sizedilationpaddingrd  c           
         t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        dd} |d        |d        |d	d
        |d       | j                  t              }t        j                  |dv xr t	        d dd  D              fd       t        d t        dd        D              t        j                  t	        d D              fd       |dk(  }|s| j                  d      } | j                  \  }}	}
}\  }}\  }}\  }}\  }}t        |
||||| j                        }t        |||||| j                        }t        j                  | ||||f      }|j                  d      j                  d      }|d d d d ||f   }|j                  dddddd      }|j                  d      }|j                  d      }|j                  ||	|z  |z  ||z        }|s|j                  d      }|S )Nr#   c                       y)Nz"im2col(): only 2D kernel supportedr*   r*   r+   r,   r|   zim2col.<locals>.<lambda>W  s    0Tr+   c                       y)Nz$im2col(): only 2D dilation supportedr*   r*   r+   r,   r|   zim2col.<locals>.<lambda>X  s    -Sr+   c                       y)Nz#im2col(): only 2D padding supportedr*   r*   r+   r,   r|   zim2col.<locals>.<lambda>Y      ,Qr+   c                       y)Nz"im2col(): only 2D stride supportedr*   r*   r+   r,   r|   zim2col.<locals>.<lambda>Z  s    +Or+   c                 ~    |rt        d | D              nt        d | D              }t        j                  |d        y )Nc              3   &   K   | ]	  }|d kD    ywr   Nr*   .0rB  s     r,   	<genexpr>z1im2col.<locals>.check_positive.<locals>.<genexpr>]       (%Q1q5%   c              3   &   K   | ]	  }|d k\    ywr  r*   r  s     r,   r  z1im2col.<locals>.check_positive.<locals>.<genexpr>]       ;REqAFEr  c                       y)Nz<{param_name} should be greater {'than' zero, but got {param}r*   r*   r+   r,   r|   z0im2col.<locals>.check_positive.<locals>.<lambda>_  s    Xr+   allrc   r}   param
param_namestrictconds       r,   check_positivezim2col.<locals>.check_positive\  s3    ,2s(%((;RE;R8RX	
r+   r  r  r  Fr  rd  r   r2  c              3   &   K   | ]	  }|d k7    ywr  r*   r  ds     r,   r  zim2col.<locals>.<genexpr>j       :z!qAvzr  r   c                       dt                S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler	  s   r,   r|   zim2col.<locals>.<lambda>k       --25\N<r+   c              3   \   K   | ]$  \  }}}}}d |d|z  z   ||d z
  z  z
  d z
  |z  z    & yw)r"   r#   Nr*   )r  r   paddilkersts         r,   r  zim2col.<locals>.<genexpr>n  sK      '
"Cc3 	
S1s7]SC!G_,q0R77'
s   *,rJ  c              3   &   K   | ]	  }|d kD    ywr  r*   )r  cs     r,   r  zim2col.<locals>.<genexpr>u  s     ';aAE;r  c                  F    dt        dd         d d  d d d dS )	Nz!Given an input with spacial size rJ  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  )r  r  output_sizer  r	  rd  s   r,   r|   zim2col.<locals>.<lambda>v  sJ    3E%*4E3F G"m;xj 9)9VH -]D	Fr+   r2  r   rM   r"   r      T)rc   r}   rp  r	  r  r  ziprO   r  r  r   r  permuter  r  squeeze)r   r  r  r  rd  r  rk  batched_input	batch_dimr  input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr   num_blocks_rownum_blocks_colr  r	  s    ````                     @@r,   im2colr  M  s~    
LL[!Q&(TU	LLX!#%ST	LLW"$QR	LLV!#OP
 ;.8Z(8Yu568$KKEu:D	LL:3:uRSz::	<
  &)"#J;'
 K 
LL';''	F 	F AIM"/4{{,I{GWHh"Iy%J
$Hh9:y(ELL ::y(ELL 55Iy) LML+55b9CCBG!Q 24FFGF^^Aq!Q1-F',,Q/N',,Q/N^^;)H4n~6UF "Mr+   r  c                   !" t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        dd} |d	        |d
        |dd        |d        |d       | j                  "t        "      }t        j                  |dv xr t	        d "dd  D              "fd       d   d   z  }t        j                  "d   |z  dk(  "fd       t              D 	
cg c]"  \  }	}
}}}d|	d|
z  z   ||dz
  z  z
  dz
  |z  z   $ }}}}
}	}|d   |d   z  !t        j                  "d   !k(  !"fd       t        j                  !dkD  !"fd       |dk(  }|s| j                  d      } | j                  "\  }}\  }}\  }}\  }}\  }}| j                  "d   "d   |z  gz   |z         } | j                  dddddd      } t        |||||| j                        }t        |d      }t        |||||| j                        }t              D cg c]  \  }}|d|z  z    }}}| j                  "d   "d   t              z  g|z         }d d ||f} t        j                  || | d      }t!        j"                  || | | | f      }|s|j%                  d      }|S c c}}}}
}	w c c}}w )Nr#   c                       y)Nzonly 2D output_size supportedr*   r*   r+   r,   r|   zcol2im.<locals>.<lambda>  s    0Or+   c                       y)Nzonly 2D kernel supportedr*   r*   r+   r,   r|   zcol2im.<locals>.<lambda>  s    0Jr+   c                       y)Nzonly 2D dilation supportedr*   r*   r+   r,   r|   zcol2im.<locals>.<lambda>  s    -Ir+   c                       y)Nzonly 2D padding supportedr*   r*   r+   r,   r|   zcol2im.<locals>.<lambda>  s    ,Gr+   c                       y)Nzonly 2D stride supportedr*   r*   r+   r,   r|   zcol2im.<locals>.<lambda>  s    +Er+   Tc                 ~    |rt        d | D              nt        d | D              }t        j                  |d        y )Nc              3   &   K   | ]	  }|d kD    ywr  r*   r  s     r,   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r  c              3   &   K   | ]	  }|d k\    ywr  r*   r  s     r,   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r  c                       y)Nz9{param_name} should be greater than zero, but got {param}r*   r*   r+   r,   r|   z0col2im.<locals>.check_positive.<locals>.<lambda>  s    Ur+   r  r  s       r,   r  zcol2im.<locals>.check_positive  s3    ,2s(%((;RE;R8RU	
r+   r  r  r  Fr  rd  r  )r#   r   c              3   &   K   | ]	  }|d k7    ywr  r*   r  s     r,   r  zcol2im.<locals>.<genexpr>  r  r  rJ  c                       dt                S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r  s   r,   r|   zcol2im.<locals>.<lambda>  r  r+   r   r"   c                      dd    d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = rJ  z and kernel_size=r*   )r  r	  s   r,   r|   zcol2im.<locals>.<lambda>  s!     ==B2YK H"m%r+   rM   c                  :    d d d d d d  dd    d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rM   .r*   Lr  r  r  r  r	  rd  s   r,   r|   zcol2im.<locals>.<lambda>  D    $[M} M:Zy	& B))*9U2YKqBr+   c                  :    d d d d d d  dd    d	S r  r*   r  s   r,   r|   zcol2im.<locals>.<lambda>  r  r+   r   r2  r  
accumulater  )rc   r}   rp  r	  r  r  rO   r  r  r  r  rQ   r^  prodr~   _unsafe_index_putr   r  r  )#r   r  r  r  r  rd  r  rk  prod_kernel_sizer   r  r  r  r  colr  out_hout_wr  r  r  r  r  r  r  r  indices_rowindices_colorB  output_padded_sizer   idxr  r	  s#    `````                           @@r,   col2imr    s    
LL[!Q&(OP	LL[!Q&(JK	LLX!#%IJ	LLW"$GH	LLV!#EF
 ;.8Z(7Ie468$;.KKEu:D	LL:3:uRSz::	<
 #1~A6	LLb	$$)	% '*(K'
'
"Cc3 	
S1s7]SC!G_,q0R77'
   	AQA	LLb	Q	B 	B 
LL	A	B 	B AIM"KKELE5Hh"Iy%J
$Hh MM58U1X1A%AB[PSVVWEMM!Q1a+E2xY%,,K $K3K2xY%,,K 14K0IJ0I1!a!e)0IJ__	q58tK0014FFF {K
0C##FC4#HFUU6YJ
YJ
KLF"MkV Ks   6'M9Mmaskc                 z    | |j                  |       |z  z  j                  t        j                  |             }|S )NrH  )type_ascloner@   suggest_memory_format)rg   r  ri   rE   s       r,   native_dropout_backwardr
    sB     
[1E9	:AA11+> 	B 	A Hr+   
input_size	dimensionr  c                    t        |      dk(  rt        j                  | d      S t        j                  t        |      |      }t        j
                  ||   | j                  t        j                        }|j                  d||      j                         }| j                  d|dz         j                  ||dz         } | j                  |      }d|z  |fz   }t        j                  ||| d      j                         S )Nr   r  r   rM   r"   r4   Tr  )rp  rc   squeeze_copyr@   r  r  r  int32unfoldflattenmovedimr^  r~   r  r  )	r   r  r  r  r\  rJ   r  rR   rr  s	            r,   unfold_backwardr    s    
 :!!!$**

 
 Z)
<C
,,z#t{{%++
NC
**Qd
#
+
+
-C<<C!G$,,S#':D 
+JcMSF"E!!*eTd!KVVXXr+   epsc           
      .   |A|}d|z
  }t        j                  t        j                  ||k\  ||k        | |d|z
  z  z  d      S t        j                  t        j                  |dk\  |dk        | |d|z
  z  z  |j                  dt	        d                  S )Nra   r   r*   nan)rc   rd   logical_andr7  float)rg   rv   r  lohis        r,   logit_backwardr  %  s    
 2X{{dbj$"*543:./
 	
 {{dck43;743:./MM"eEl+
 	
r+   trainc                 d    |r|dk7  rt         j                  | ||      d   S | j                         S rn   )r~   native_dropoutr  )r   rB  r  s      r,   dropoutr   :  s3     a""5!U3A66{{}r+   out0out1c                    |r|dk7  r|dk(  r:t        j                  |       t        j                  | t         j                        fS | j                  j                  st        d      t        j                  |       |kD  }|| z  t        dd|z
  z        z  }||fS | t        j                  | t         j                        fS )Nr   r"   r   z?result type Float can't be cast to the desired output type Longra   )	rc   r  boolr   is_floating_pointrb  	rand_liker  rL  )r   rB  r  	bool_maskress        r,   r  r  D  s     a6$$U+U-=-=e5::-VWW{{,,Q  OOE*Q.	%%sQw"88YuuEJJ?@@r+   half_to_floatc                    | j                         } |r| j                  t        j                  k(  sJ t	        j
                  | t        j                  j                        \  }}| j                  |      } | j                         dk(  rt        j                  |       }n0t        j                  | |d      }t        j                  | |z
        }|t        j                  ||d      z  }|s|j                  |      }|S Nr2   r   Tr  )r  r   rc   halfr@   rA   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr7   r   rb   amaxr   )r8   rJ   r)  r9   r<   unnormalizedx_maxr
  s           r,   _softmaxr4  U  s    
 	
Aww%**$$$&+&>&>	uDDLL'#| 	
AwwyA~yy|

1c40yyU+EIIlCFFF<(Mr+   c                     | j                         } |r| j                  t        j                  k(  sJ t	        j
                  | t        j                  j                        \  }}| j                  |      } | j                         dk(  r| }nt        j                  | |d      }| |z
  }t        j                  t        j                  t        j                  |      |d            }||z
  }|s|j                  |      }|S r+  )r  r   rc   r.  r@   rA   r/  r0  r7   r   r1  r8  r   rb   )	r8   rJ   r)  r9   r<   shiftedr3  shifted_logsumexpr
  s	            r,   _log_softmaxr8  l  s    
 	
Aww%**$$$&+&>&>	uDDLL'#| 	
AwwyA~

1c40e)		%))EIIg,>T"RS((F<(Mr+   c                 2    t        j                  || |      S Nrh   rc   subrv   rA  rh   s      r,   rsub_Tensorr?        99UD..r+   c                 2    t        j                  || |      S r:  r<  r>  s      r,   rsub_ScalarrB    r@  r+   indicespadding_idxscale_grad_by_freqsparsec                     | j                         dk(  sJ d       |j                  dk  r4| j                  d|      }|j                  dk(  r|j                  d      }|S | |   S )Nr#   z'weight' must be 2-Dr"   r   )rJ   rk  index_selectr  )r   rC  rD  rE  rF  r   s         r,   	embeddingrI    sd     ::<1444||q!!!W-<<1++a.C
gr+   num_weightsc                 t   t        j                  | t         j                  j                        \  }}| j	                  |      } t        |t        j                        }|rZ|j                  |f      }t        j                  |      }t        j                  ||g|d      }||   }	| |	j                  d      z  } t        ||k(  | j                        }
| j                  |
d      }| j                  |f| j                   |j                  d  z         }t        j                  ||g|d      j	                  |      S )Nr,  Tr  rM   r   )r@   rA   r/  r0  r7   r   rc   longr^  rL  r~   r  rO   rQ   rk  masked_fillr	  )rg   rC  rJ  rD  rE  r9   r<   countsonesgrad_weights_scaler  r   grad_weights                r,   embedding_dense_backwardrR    s,    ',&>&>)N)N)V)V'#| ..!23K%guzz:G""K>2w'''	4D'Q#G_!$6$@$@$DDW3[5E5EFD""4+D''	**7<<>::K !!+y$4!PSS r+   c                 "    d}| D ]  }||z  }	 |S rV   r*   )r8   rE   is      r,   r  r    s    	A	Q Hr+   split_sizesc                     t        j                  t        t               j                     k(   fd       t              }g }d}ddlm} t        |      D ]`  }|   }t        j                  |d         |||z    j                     k         |j                   j                  ||             ||z  }b |S )Nc                  @    dt               dj                       S )NzSplit sizes add up to z but got the tensor's size of )r   r	  )rJ   rv   rU  s   r,   r|   z"split_with_sizes.<locals>.<lambda>  s(    ([)9(::XY]YcYcdgYhXijr+   r   )expect_truec                       y)NzCsplit_with_sizes expects split_sizes have only non-negative entriesr*   r*   r+   r,   r|   z"split_with_sizes.<locals>.<lambda>  s    Yr+   )rc   _check_with
ValueErrorr   r	  rp  %torch.fx.experimental.symbolic_shapesrX  rN   _check_is_sizeappendr  )	rv   rU  rJ   
num_splitssplits	start_idxrX  rT  lengths	   ```      r,   split_with_sizesrc    s     
KDJJsO+j
 [!JFI B:QY	
 	I&$**S/9:dkk#y&9:V	  Mr+   
split_size.c                 D    t         j                  j                  | ||      S r4   )r~   splitr   )r   rd  rJ   s      r,   unsafe_splitrg    s    ::UJ44r+   c                 D    t         j                  j                  | ||      S r4   )r~   rc  default)r   rU  rJ   s      r,   unsafe_split_with_sizesrj    s       ((SAAr+   c                     | j                   }||   }|dk(  r
|dk(  sJ | fS ||z   dz
  |z  }ddlm}  ||      }t        |      D cg c]  }| }}|||z  |z
  z
  |d<   t	        j
                  | ||      S c c}w )Nr   r"   )	guard_intrM   )r	  r\  rl  rN   rc   rf  )	rv   rd  rJ   rY  dim_sizechunksrl  rT  rU  s	            r,   rf  rf    s    **K3HQ1}}w#a'J6F @vF',V}5}!:}K5 J$7($BCKO;;t[#.. 6s   	A7tensor_indices_or_sectionsc                    |j                   j                  dk(  sJ |j                  t        j                  k(  sJ |j                         t        j                  dk(  xs dk(  fd       dk(  r4|j                         }t        |t              sJ | j                  ||      S |D cg c]  }|j                          }}| j                  ||      S c c}w )Ncpur"   r   c                      d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr*   )	split_dims   r,   r|   zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>  s     <<E;eMr+   )r  typer   rc   r  rJ   r}   itemr6   r   tensor_split)rv   ro  rJ   sectionsrT  rC  rs  s         @r,   /tensor_split_tensor_indices_or_sections_py_implrx    s     &,,11U:::%++u{{:::*..0I	LLQ()q.	M
 A~-224(G,,,  3//%?@%?1668%?@  #.. As   ,Cmat1mat2c                     | j                         s&| j                         st        |      }t        |      }|t        j                  ||      z  }|dk(  r|S ||| z  z   S rn   )r%  
is_complexintrc   mm)rv   ry  rz  r]   rh   r   s         r,   addmmr    s]     !!#DOO,=4yE

%((4&
&Cqy
 r+   use_geluc                     t        | ||||      }|r8| j                  rt        j                  |d      S t        j                  |      S t        j	                  |      S )Nr   )r   )r  is_cudar~   gelurelu)rv   ry  rz  r]   rh   r  r   s          r,   _addmm_activationr  3  sO     dD$
.C<<99Sf95599S>!99S>r+   vecc                     | j                         s&| j                         st        |      }t        |      }|t        j                  ||      z  }|dk(  r|S ||| z  z   S rn   )r%  r|  r}  rc   mv)rv   ry  r  r]   rh   r   s         r,   addmvr  G  s[     !!#DOO,=4yE

%((4%
%Cqy
r+   r   rstdgammaNCHxWgroupoutput_maskc
           	      R   t        j                  | ||d       t        j                  || d       t        j                  |d       t        j                  |j                         z  z  k(  fd       t        j                  j                  fk(  fd       t        j                  d u xs j                         k(  fd       t              \  }
}t        j                  |dk(  fd       t        j                  | |      j                        j                  dg	      }| j                        j                  dg	      }d }d }d }|	d   r*d
|
z  z  }t        j                  |j                  d            j                  |
      j                  d      }t        j                  |j                  d            j                  |
      j                  d      }t        j                  |j                  d      j                  d|
            }n|j                  |
      j                  d      }|j                  |
      j                  d      }t        j                  |j                  d      t        j                  d|
f|j                              }|z  |z
  |z  |z  |z  |z  }| z  ||z  |z  z
  }|j                  d      }t        |d      }t        |d      }t        j                  | j                  |
      |      t        j                  |j                  |
      |      z   |z   }|j                  |j                        j!                  |j"                        }|	d   rk|j                  |
      |j                  |
      j                  d      z  z
  |j                  d      z  j                  dg	      j                        }|	d   r|j                  dg	      }|||fS )NF)allow_cpu_scalar_tensorsc                      d z  z   dS )NzExpect input to have z	 elementsr*   )r  r  r  s   r,   r|   z,native_group_norm_backward.<locals>.<lambda>i  s    'Q~Y?r+   c                  .    d  d dj                    S )NzExpect mean to have shape (, z
, but got r  )r  r  r   s   r,   r|   z,native_group_norm_backward.<locals>.<lambda>m  s    -aS5'DJJ<Pr+   c                  <    d  dj                          S d S )NzExpect gamma to have z elements but got rM   )r   )r  r  s   r,   r|   z,native_group_norm_backward.<locals>.<lambda>q  s)    's*<eN_U[[]<hieg<hir+   r   c                      d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r*   )r  r  s   r,   r|   z,native_group_norm_backward.<locals>.<lambda>w  s    ,QC/[\a[bcr+   r#   r{   ra   rM   r"   r  r2  )r@   check_same_devicecheck_same_shaperc   r}   r   r	  divmodr   viewr   rO   r  rO  r  rQ   r7   r   )rg   r   r   r  r  r  r  r  r  r  cpg_remdsdbd_inputd_gammad_biassds_valdb_valc1c2c3s     ` `````              r,   native_group_norm_backwardr  T  s    
UD$ 
5+N	4F	LLQ$? 
LL

q%j P 
LL+!+i
 q% IC	LL	c 
;	&	+	+Aq#	6	:	:s	:	CB			!Q	$	(	(aS	(	1B $G $G#F1~39YYr5??1#56>>q%MQQRSTFYYr5??1#56>>q%MQQRSTFr"a,B
 ZZ5#.2215FZZ5#.2215Fr"

Auc?4;;?B tmf$,t3d:Q>S4Z&4-!++\\"r1%r1%IIk))!UC=rBiiaS92>? 	
 //%++.11%++>1~ E3'"''!UC*@4>>RTCU*UU..$% SaSS\WQZ 	 1~QCWf%%r+   out2c
                    t        | |||||||||	
      }|
||f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )r  	enumerater   r	  r   )rg   r   r   r  r  r  r  r  r  r  r!  r"  r  r
  rR   rT  rE   s                    r,   native_group_norm_backward_outr    ss    " (UD$q!S%F d#J&!1=jmQWW5Q
14P "
 r+   c                 ,    | | j                  |      S | S r4   r7   )r8   r   s     r,   _maybe_castr    s    }ttE{Hr+   grad_outnormalized_shapebiasc                   ! |j                   }|j                         }	t        j                  |j                        !!fd| |||fD        \  }
}}}|
J |	t        |      z
  }||d  }|d | }g }g }t        |	      D ]*  }||k\  r|j                  |       |j                  |       , t        |      }t        |      }|dk  s|dk  rN|d   r|j                  |      nd |d   r|j                  ||d        nd |d   r|j                  ||d        fS d fS t        ||j                               }t        ||j                               }||z
  |z  }||
|z  }n|
}||z  }t        j                  ||d      }t        j                  ||      }t        j                  ||d      }t        j                  ||      }||z
  |z
  }d }d }d } |d   r||z  |z  }|d   r0|.t        |      dkD  rt        j                  |
|z  |d      }n|
|z  }|d   r8|6t        |      dkD  rt        j                  |
|d      } n|
j                         } t        ||j                        t        ||j                        t        | |j                        fS )Nc              3   d   K   | ]'  }||j                        j                         n| ) y wr4   )r7   r  r  r8   r9   s     r,   r  z-native_layer_norm_backward.<locals>.<genexpr>  s5      90A 12**,1D0s   -0r   r"   r#   TF)r	  rJ   r@   get_computation_dtyper   rp  rN   r^  r  r^  rQ   rc   r   r   r  r  )"r  r   r  r   r  r   r  r  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesrT  r  Mx_hat
grad_x_hatabr  r  r  rF   r  d_weightr  r9   s"                                    @r,   native_layer_norm_backwardr    s    ++KJ33EKK@9E64095M:{I $$$,--DTU#JUd#J#%#%:9$$Q'$$Q'	  	ZAZAAva,7NEOOK(3>q>EOOK./t3>q>EOOK./
 	
 HL
 	

 T:>>#34DT:>>#34D$$&E"[0
"
QA		*/6A	:u	%B	2($	/B	5"	BEBJE $G!%H#F1~!8u$1~+1 !A%yy!68I5QH$u,H1~)/ !A%YY}.?GF"((*F 	GU[[)Hekk*FEKK( r+   c          
          t        | |||||||      }||	|
f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )r  r  r   r	  r   )r  r   r  r   r  r   r  r  r!  r"  r  r
  rR   rT  rE   s                  r,   native_layer_norm_backward_outr     sp     (%)4vt[F d#J&!1=jmQWW5Q
14P "
 r+   running_meanrunning_varmomentum
functionalc	                 2   dgt        t        d| j                                     z   }	t        j                  | j
                        }
|}|}|r"t        j                  | j
                        }
| j                  |
      }t        j                  ||	dd      \  }}t        j                  ||z         }| |z
  |z  }t        j                  ||	      }t        j                  ||	      }|!||z  d|z
  |z  z   }|s|j                  |       |2| j                         | j                  d   z  }t        j                  ||	      }|||dz
  z  z  }||z  d|z
  |z  z   }|s|j                  |       n||J |j                  |
d      }|}|j                  |
d      }|}|}dt        j                  ||z         z  }| j                  j                   dk7  r|}|}n"| j#                  d	      }| j#                  d	      }t%        || j                         dz
        }t%        || j                         dz
        }| |z
  |z  }|2|j'                         }t%        || j                         dz
        }||z  }|2|j'                         }t%        || j                         dz
        }||z   }| j                  j                   dk(  r8|j                  | j
                        }|j                  | j
                        }|j                  | j
                        ||||fS )
Nr   r#   r   T)rJ   
correctionr  r"   )r   r   rq  r   )rc  rN   rJ   r@   r  r   r7   rc   var_meanrsqrtr  r   r   r	  rQ  r  rt  r^  rQ   r  )r   r   r  r  r  r   r  r  r  reduction_dimsr9   new_running_meannew_running_var	input_acc
biased_varr   r  r   	save_mean	save_rstdnsqueezed_varunbiased_varinvstds                           r,   native_batch_norm_helperr  ;  s    S4a 566N33EKK@#!O!77DHH#4H5	 >>>a

D {{:+,$,$&MM$7	MM$7	#')3q8||6SS""#34"A.A !==^DL'1A;7L&5X8TTO!!/2'K,CCC#->TJ'!nn+<4nH%ejjs!234<<%$II-I-I uyy{Q7"6599;?;$,&(!"6599;?;&||~ uyy{Q7$||E!LLu{{L3	LLu{{L3				$ r+   r   r  save_invstdc                 >    t        | |||||||d	      \  }}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  r  r   r  r  rP   s               r,   native_batch_normr    s=     *Bvt\;(CQV*&FIy!Q 9i''r+   c           
          ||t         j                  | |||||      S |t        d      |t        d      |rt         j                  | |||||||      S t         j                  | ||||||      S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r~   _native_batch_norm_legitrb  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r  s           r,   native_batch_norm_decompositionr    s      3,,648S
 	
 <
 	
 <
 	
 ,,64{HhPS
 	
 8864{Hc
 	
r+   c                 l   | j                  |      }||z   dz
  |z  }|dk(  rZ|dk(  rU|D cg c]  }| }}|||z  |z
  z
  ||dz
  <   t        j                  j                  j                  j                  | ||      S t        j                  j                  j                  j                  | ||      S c c}w Nr"   r   )r  rc   opsr~   rj  ri  rg  r   )tensorrn  rJ   rm  rd  rP   rU  s          r,   unsafe_chunk_py_implr    s    {{3HV#a'F2JQ8q=+126az62",
V0Ch0N"OFQJyy~~55==fkSVWW99>>&&--fj#FF 3s   	B1c           
      N    t         j                  j                  | ||||d||      S r  )r~   r  ri  )r   r   r  r  r  r  r  s          r,   r  r    s5     ((00	 	r+   c                 >    t        | |||||||d	      \  }}	}
}}||	|
fS r  r  r  s               r,   r  r    s=     *Bvt\;(CQV*&FIy!Q 9i''r+   c                 >    t        | ||d d |||d	      \  }}}}	}	|||fS r  r  )
r   r   r  r   r  r  r   r  r  rP   s
             r,   !_native_batch_norm_legit_no_statsr    s<     *BvtT48S%*&FIy!Q 9i''r+   c                 f    t        | |||||||d	      \  }}	}
}}|J d       |J d       ||	|
||fS )NTz#new_running_mean should not be Nonez"new_running_var should not be Noner  )r   r   r  r  r  r   r  r  r   r  r  r  r  s                r,   #_native_batch_norm_legit_functionalr    sl    " 	!vt\;(CQU	 'N)NN'&L(LL&9i)9?JJr+   c                     |J t        j                  |       |k  j                  t         j                        }|j	                  |       | z  d|z  z  }||fS )Nr   ra   )rc   r&  r7   uint8r  )r   rB  r   r  r(  s        r,   _fused_dropout_decompositionr  )  s[     OOE"Q&***=D
,,u

%q
1C;r+   c                 d    t        | t        j                  j                        r| j                  S y r4   )r6   rc   _subclasses
FakeTensorfake_device)r  s    r,   device_hintr  3  s'    &%++667!!!r+   c                     |L| j                   j                  dk(  r3ddlm}  |       }d|_        |j
                  }|j                  || |      S | S )Nmetar   )FakeTensorModeT)r  rt  torch._subclasses.fake_tensorr  in_kernel_invocationfake_tensor_converterfrom_meta_and_device)r8   common_devicer  	fake_mode	converters        r,   wrap_output_with_input_device_r	  :  sO     QXX]]f%<@"$	)-	&33	--iMJJHr+   )r   layoutr  
pin_memorynon_blockingrI  r  r  r  rI  c                   |r|t         j                  k(  sJ d       |rJ d       |||| j                         S d}t        |       }|b|| j                  k7  rS|1|j
                  dk(  r"t         j                  j                  | |      } d}t         j                  j                  | |      } |$|s"t         j                  j                  | |      } d}|rt        | |      } |t        j                  | |      S | S )NTODOFrq  TrH  )
rc   stridedr  r  r  rt  _primsconvert_element_type
device_putr	  )	r8   r   r
  r  r  r  rI  dtype_convertedr  s	            r,   _to_copyr  G  s     5==08&80!6!>~%-M,AwwyONMf0!511!U;A"OLL##Av.LL--a7 *1m< {{1M::Hr+   c                 ,    t         j                  |       S r4   )r~   alias)r8   s    r,   nop_decompositionr  n  s     ::a=r+   out3exponential_average_factorepsilonc           
         t         j                  | |||||||      \  }}	}
|r%||	|
| j                  dt        j                        fS ||j                  d      |j                  d      | j                  dt        j                        fS )Nr  r   )r~   r  r^  rc   r  )r   r   r  r  r  r   r  r  r  r  r  s              r,   cudnn_batch_normr  v  s     $$"	GAq! 1aU[[ABB	EKK0	 r+   c                     t        |      D ]A  \  }}|dk(  s|| j                  k  r| j                  |   ||   k(  r1| j                  |      } C | S rV   )r  rk  r	  rO   )r8   broadcast_maskr  r  s       r,   _broadcast_batch_norm_backwardr    sP    /
d19dQVVmPTAU0UD!A 0 Hr+   c
                   & |j                   }
||j                   }n|
}t        j                  |j                         &&fd| ||||||fD        \  }}}}}}}|j                  }|j	                         }|dk\  sJ d       d}t        t        |            ||   z  }|}|}|r||"J ||J |}t        j                  ||z         }dg|z  }||   ||<   g }t        |      D ]  }||k7  s	|j                  |        t        ||      }d|z  }t        j                  ||      }t        j                  |||z
  z  |      }t        ||z  |      }t        t        j                  ||z  ||z        |      } |t        ||      dz  }!nt        ||z  |      }!|r||z
  | z  }"||"z
  |z
  |!z  }#n||!z  }#|	d   r||z  }$nd }$|	d   r|}%nd }%|#j                  |
      t        |$|      t        |%|      fS )Nc              3   H   K   | ]  }||j                        n|  y wr4   r  r  s     r,   r  z-native_batch_norm_backward.<locals>.<genexpr>  s0      	
A $%=a7
s   "r#   z$rank of the input must be at least 2r"   ra   )r   r@   r  r	  rJ   r  rc  rc   r  rN   r^  r  r   r   r7   r  )'r  r   r   r  r  r  r  r  r  r  r|  weight_dtyper  r  r  running_mean_castrunning_var_castsave_mean_castsave_invstd_castr  
input_rankr  num_featuresr   r  r  reduction_axesrT  r   grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojrR   rQ  	grad_biasr9   s'                                         @r,   native_batch_norm_backwardr1    sn    ++K||"33EKK@	 
	 ++KJ?BBB?D[)*[->>LDF).>.JJJ ,1A1MMM -34!"j 0N&t,N4 "N:9!!!$  *$?DDii~>OIImzD'89>JE./E~VI/		%$,QW0XZhiJ3FNKcQ
3[ .

 T!Z/$t+y8JF
"Z/
1~fn1~#		 	k"K.I|, r+   c
                    t        | |||||||||	
      }|
||f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )r1  r  r   r	  r   )r  r   r   r  r  r  r  r  r  r  r!  r"  r  r
  rR   rT  rE   s                    r,   native_batch_norm_backward_outr3    s|    " (F d#J&!1=jmQWW5Q
14P "
 r+   save_varreserveSpacec	                 B    t         j                  || |||||d|g d
      S )NT)TTT)r~   r1  )	r   rg   r   r  r  r  r4  r  r5  s	            r,   cudnn_batch_norm_backwardr7  *  s5     ** r+   c                    | j                   | j                  t              t        j                  dv fd       | j                  dd  D ]  }t        j                  |dk7  fd         d   |d   z  dk(  rxd   |d   z  dk(  rjt        d t        dd  |      D              }t        d t        dd  ||      D              }t        j                  j                  j                  | ||      S d	 d
 fd} |d   |d         \  }}}}	 |d   |d         \  }
}}}| dt        |d      |
f   }|	s|st        j                  |d      S d } |||||	d      \  }} |||||d      \  }}d }t        t        |j                  d         t        |j                  d               D ]!  \  }}||d|d d |f   }||d|d d |f   z   }# |||z  z  S )Nr  c                      d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r*   rk  s   r,   r|   z%adaptive_avg_pool2d.<locals>.<lambda>O  s    KD6Rr+   rJ  r   c                  "    dt                dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r  r  s   r,   r|   z%adaptive_avg_pool2d.<locals>.<lambda>T  s     99>uaIr+   rM   c              3   ,   K   | ]  \  }}||z    y wr4   r*   )r  rT  r  s      r,   r  z&adaptive_avg_pool2d.<locals>.<genexpr>Z  s     G*F$!QqAv*F   c              3   :   K   | ]  \  }}}||d z
  |z  z
    yw)r"   Nr*   )r  rT  r  r  s       r,   r  z&adaptive_avg_pool2d.<locals>.<genexpr>[  s'      
+O1aAQ!O+Os   c                 8    t        j                  | |z  |d      S )Ntruncrounding_moderc   divr  r  r  s      r,   start_indexz(adaptive_avg_pool2d.<locals>.start_index`  s    yyQ99r+   c                 J    t        j                  | dz   |z  |z   dz
  |d      S )Nr"   r@  rA  rC  rE  s      r,   	end_indexz&adaptive_avg_pool2d.<locals>.end_indexc  s&    yy!a%1q1,awGGr+   c                    t        j                  |t         j                        } |||       }| |z  dz   }| |z  }|dk(  xs ||z  dk(   }|r|dz  }n
|dk(  r|dz  }t        j                  |t         j                        }|j                  d      |z   }|rUt        j                  | dz
  |j
                  |j                        }	t        j                  ||	      } |||       }
|
|z
  }n|}||||fS )Nr  r"   r   rM   r  )rc   r  r  rO   scalar_tensorr   r  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxr  maxvali1rb  r  rH  rF  s               r,   compute_idxz(adaptive_avg_pool2d.<locals>.compute_idxf  s   hvU[[I73 x'!+	(#q(GH{,Ba,GHNIANILL6M	ll2* ((!399SZZF --V,C 68W5B"WFFFIx//r+   .r2  )r   rM   r{   c                     t        |t              r| |fS |dk  sJ ||j                  d      k\  }|dk(  rt        |d      }t	        j
                  | |d      } t        ||       }| |fS )Nr   rM   rJ  r2  r   )r6   r   rO   rQ   rc   rM  )valsrb  rS  rR  rJ   r  s         r,   
maybe_maskz'adaptive_avg_pool2d.<locals>.maybe_mask  sw    fg&< 7N7 0 0 44Dby(q1$$T45D&vt4F<r+   )rR  rJ   r   )r  r	  rp  rc   r}   r  r  nnr  
avg_pool2drQ   r   r   rN   )r   r  r  rd  kernelrV  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wrX  rY  retrT  jr  rH  rk  r	  rF  s                      @@@@@r,   adaptive_avg_pool2drg  E  s%   
 \\FKKEu:D	LLR [[FI	
  Ry;r?"a'E"IB,G1,LG#eBCj+*FGG 
+.uRSz;+O
 
 xx""--eVVDD:H0@ /:%)[QS_.U+D(K.9%)[QS_.U+D(K'a0$67Djzz$H--   hjbND(  hjbND(
 Cdjjn-uTZZ^/DE1;sAq!|$CS!Q\**C	 F
 (X%&&r+   r;  r  c                $    t        | |||d|      S )NTinplacerh   
_index_addr8   rJ   rr  r  rh   s        r,   
index_add_rn    s     aeVTGGr+   c                $    t        | |||d|      S )NFri  rk  rm  s        r,   	index_addrp    s     aeVU%HHr+   rj  c                8   t        j                  | j                        t        j                  j                  dk  fd       j                  dk(  rj                  d      nd|j                  dkD  r|j                        ndt        j                  k(  fd       dk7  rft        j                  | j                        t        j                  t        k(  xs t        j                  t                    fd       |z  }| j                  dk(  }|r| j                  d      n| }dz  fz   }|rt        j                  nt        j                  }	 |	|||d      }
|r| S |r|
j                  d      S |
j!                         S )	Nr"   c                  $    d j                    dS Nz(Index should have dimension 1 or 0 (got r-  r:  rr  s   r,   r|   z_index_add.<locals>.<lambda>      :5::,aHr+   r   c                      d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r*   )rJ   
index_sizetensor_sizes   r,   r|   z_index_add.<locals>.<lambda>  s    %j\1XYdXeeqmplrsr+   c                  (    dt                d dS )Nzalpha argument of type z cannot be safely cast to type !)rt  )rh   python_types   r,   r|   z_index_add.<locals>.<lambda>  s    -d5k]:YZeYffghr+   r4   Tr  )r@   canonicalize_dimsrk  rc   r}   r  dtype_to_typer   r$  is_weakly_lesser_typert  rO   r~   
index_put_	index_putr  r  )r8   rJ   rr  r  rj  rh   zero_dimrE  r  r  r   rw  r{  rx  s    ``  `     @@@r,   rl  rl    sM    
!
!!&&#
.C	LL

aH #(**/AqJ&,kkAo&++c"1K	LLz!s z))!''24 E**4;Dh	

 %vv{H#QB
C-5(
"C#*I
BV
5C!)s{{1~?s~~/??r+   c                 "    t        | |||d      S )NTrj  _index_copyr8   rJ   rr  r  s       r,   index_copy_r    s    q#ufd;;r+   c                 "    t        | |||d      S )NFr  r  r  s       r,   
index_copyr    s     q#ufe<<r+   c                   t        j                  | j                  |      }t        j                  j                  dk  fd       | j                  dk(  }|r| j                  d      n| }d|z  fz   }|rt        j                  nt        j                  } ||||      }	|r| S |r|	j                  d      S |	j                         S )Nr"   c                  $    d j                    dS rs  r:  rt  s   r,   r|   z_index_copy.<locals>.<lambda> 	  ru  r+   r   r4   )r@   r|  rk  rc   r}   rO   r~   r  r  r  r  )
r8   rJ   rr  r  rj  r  rE  r  r  r   s
     `       r,   r  r    s     
!
!!&&#
.C	LL

aH
 vv{H#QB
C-5(
"C#*I
BV
$C!)s{{1~?s~~/??r+   c                    t        j                  | j                  d      |       }t        j                  t        j                  |              }| j
                  r| j                  d      }n|}|t        j                  |      z
  |fS )Nr*   r  )rc   rK  r^  rb   r   r  r6  )rv   r   re   r   s       r,   log_sigmoid_forwardr  	  sh     --r*D
1C		599T?"#A||%Q''r+   lowhighc                     t        j                  | j                  t        |      t        |      | j                  | j
                  |      S )N)r  r  r   r  r   )prims_uniform_helperr	  r   r   r  )r8   r  r  r   s       r,   r   r   	  s=       	cNt_ggxx r+   c                 <    | j                  t        | |||            S r4   )r   r   )rv   r  r  r   s       r,   uniform_r  .	  s    ::gdCy9::r+   c                 "   t        |       dz
  }|>t        j                  |d u d        t        j                  t        |      |k(  d        |S |t        j                  |d u d        t        j                  t        |      |k(  d        g }t        |      D ]Z  \  }}t	        |      |k(  r$|j                  | |dz      t	        |      z         8|j                  t        | |dz      |z               \ |S t        j                  dd        y )Nr#   c                       yNz9Must specify exactly one of output_size and scale_factorsr*   r*   r+   r,   r|   z.upsample_compute_output_size.<locals>.<lambda>9	      Or+   c                       yN r*   r*   r+   r,   r|   z.upsample_compute_output_size.<locals>.<lambda>;	  s    Rr+   c                       yr  r*   r*   r+   r,   r|   z.upsample_compute_output_size.<locals>.<lambda>A	  r  r+   c                       yr  r*   r*   r+   r,   r|   z.upsample_compute_output_size.<locals>.<lambda>C	  s    rr+   Fc                       yr  r*   r*   r+   r,   r|   z.upsample_compute_output_size.<locals>.<lambda>L	  s    Rr+   )rp  rc   r}   r  r}  r^  r   )r  r  scale_factorsspatial_dimensionsrT  r  s         r,   upsample_compute_output_sizer  4	  s   Z1,T!O	
 	S%);;ZH 4O	
 	S'+==zJm,DAq1v{"":a!e#4s1v#=>""7:a!e+<q+@#AB	 -
 	LLRr+   c                     | y | |   S r4   r*   )scalesr  s     r,   get_scale_valuer  P	  s    ~#;r+   c                     t        | j                         ||      }t        |d      }t        j                  j                  | ||      S rn   )r  r  r  r~   upsample_nearest1dri  r   r  r  osizeri   s        r,   upsample_nearest1d_vecr  V	  s?     ){MREM1-E""**5%??r+   c                     t        | j                         ||      }t        |d      }t        j                  j                  | ||      S rn   )r  r  r  r~   _upsample_nearest_exact1dri  r  s        r,   _upsample_nearest_exact1d_vecr  `	  s?     ){MREM1-E))11%FFr+   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j                  | |||      S Nr   r"   )r  r  r  r~   upsample_nearest2dri  r   r  r  r  scale_hscale_ws         r,   upsample_nearest2d_vecr  j	  sM     ){MREmQ/GmQ/G""**5%'JJr+   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j                  | |||      S r  )r  r  r  r~   _upsample_nearest_exact2dri  r  s         r,   _upsample_nearest_exact2d_vecr  u	  sM     ){MREmQ/GmQ/G))11%QQr+   c                     t        | j                         ||      }t        |d      }t        |d      }t        |d      }t        j                  j                  | ||||      S r   )r  r  r  r~   upsample_nearest3dri  r   r  r  r  scale_dr  r  s          r,   upsample_nearest3d_vecr  	  s[     ){MREmQ/GmQ/GmQ/G""**5%'7SSr+   c                     t        | j                         ||      }t        |d      }t        |d      }t        |d      }t        j                  j                  | ||||      S r   )r  r  r  r~   _upsample_nearest_exact3dri  r  s          r,   _upsample_nearest_exact3d_vecr  	  s`     ){MREmQ/GmQ/GmQ/G))11ugw r+   c                    g }t        |      }|rdnd}t        |      D ]  }||   }| j                  | |z      }	||   |	|	||   z  z  n|	|z  }
t        j                  |t        j
                  | j                        }||z   |
z  j                  t        j                        }t        |dz
  |z
        D ]  }|j                  d      } |j                  |        t        |      S )Nr   r   r  r"   rM   )rp  rN   r	  rc   r  r   r  r7   r  rO   r^  r  )r   r  r  exactrC  num_spatial_dimsrv  r  r  isizeri   output_indicesinput_indicesrP   s                 r,   !_compute_upsample_nearest_indicesr  	  s     G;'SsF#$ A--12/5ay/D*+%RW-e5==V(61U:>>u{{K'!+a/0A)33B7M 1}%+ %, >r+   r  c                 T    t        | ||f      \  }t        j                  | d d |f      S r4   r  r~   _unsafe_indexr   r  r  	l_indicess       r,   r  r  	  s0     5UK&SLYedD)%<==r+   c                 X    t        | ||fd      \  }t        j                  | d d |f      S NT)r  r  r  s       r,   r  r  	  s7     5{VITLY edD)%<==r+   c                    t         j                  | d d ||f      }t        j                  |       }| j                  \  }}}}| j
                  j                  dk(  r|dk  rt        j                  }|j                  |      }|S )Ncudar2  rH  )
r~   r  r@   r	  r	  r  rt  rc   rM  r  )r   	h_indices	w_indicesr
  rI  rP   
n_channelss          r,   _upsample_nearest2d_commonr  	  s~    dIy'IJF //6M  ++Az1a||F"zA~//];FMr+   scales_hscales_wc                 @    t        | |||f      \  }}t        | ||      S r4   r  r  r   r  r  r  r  r  s         r,   r  r  	  s0     ={Xx0Iy &eY	BBr+   c                 D    t        | |||fd      \  }}t        | ||      S r  r  r  s         r,   r  r  	  s2     ={Xx0Iy &eY	BBr+   scales_dc                 d    t        | ||||f      \  }}}t        j                  | d d |||f      }|S r4   r  	r   r  r  r  r  	d_indicesr  r  r
  s	            r,   r  r  
  sH     'H{Xx:'#Iy) dIy)'TUFMr+   c                 h    t        | ||||fd      \  }}}t        j                  | d d |||f      }|S r  r  r  s	            r,   r  r  
  sJ     'H{Xx:$'#Iy) dIy)'TUFMr+   c           	          |r|rd}n|rd}n|rd}nd}t        |       |z  dk(  sJ t        |              t        dt        |       |      D cg c]  }t        | |||z           c}S c c}w )Nr  r2  r   r#   r   )rp  rN   r  )params
has_biaseshas_projections
group_sizerT  s        r,   gather_paramsr  %
  s    o
	
	

v;#q(5#f+5(38CK3T3TafQZ()3T  s   A(c                 ~    |r'| d|z     |d|z     }}| d|z  dz      |d|z  dz      }}n| |   ||   }}d\  }}||||fS )Nr#   r"   NNr*   )r  hiddensrT  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddens           r,   params_hiddensr  5
  sk    !'AAJ
%+AEAI%6A	8Jl!'GAJJ
%/"lz<==r+   c                     ||kD  sJ |j                  | j                  d|||z
               | j                  dd|      S rn   )r^  r  )r  last_batch_size
batch_sizer  s       r,   update_hidden_for_packedr  @
  sE    Z'''NN:$$Q
Oj4PQRQ:..r+   c           	      t    ||k(  r| S ||k  sJ t        j                  | |j                  d|||z
        f      S rn   )rc   concatr  )r  r  r  
inp_hiddens       r,    update_hidden_for_packed_reverser  F
  sP     *$Z'''<<a*2NO	
 r+   c           	      X   |d   }|d   }|r|d   nd }	|r|d   nd }
g }g }|r|d   n|d   }|j                  dd|      }t        j                  | t        |            }|r|d d d   }|D ]V  } | j                  d   }||k(  rn|rt        ||||      }nt        ||||      } || |||	||
      }|}|j                  |       X |r|j                          n!|j                  |       |j                          t        j                  |d      }|st        j                  |d      n|}||fS )Nr   r"   r#   r   rM   )
r  rc   rf  rc  r	  r  r  r^  reverser  )inphiddenr  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inprT  r   
hidden_outs                      r,   one_layer_rnn_datar  T
  sM    q	Iq	I%fQi4G%fQi4GK"$G)0k"ok!nOq!_5JCk!23IddO	IIaLa9OQJ 2OQJ sJ	7IwW
:&# & z"
))K
#C.57A&:J
?r+   c                       fd}|S )Nc                 D     t        j                  |||      | z         S r4   r   linearrT  r  r  r  r  r  nonlinearitys         r,   rF   zrnn_cell.<locals>.inner
  s     AHHZGDqHIIr+   r*   r  rF   s   ` r,   rnn_cellr
  
  s    J Lr+   c                       fd}|S )Nc                 r    t        j                  | ||      }  t        j                  |||      | z         S r4   r  r  s         r,   rF   zrnn_cell_data.<locals>.inner
  s2    HHQ	7+AHHZGDqHIIr+   r*   r	  s   ` r,   rnn_cell_datar  
  s    J Lr+   c           	      x   |d   }|d   }|r|d   nd }|r|d   nd }	t        j                  | ||      }
|r|
j                  d      n|
}
|j                  d      }g }|
D ]   } |||||||	      }|j	                  |       " |r|j                          t        j                  |d      }||j                  d      fS )Nr   r"   r#   r   )	r   r  fliprO   r^  r  rc   r  r  )r  r  r  r  r  r  r  r  r  r  precomputed_inputr  r  rT  r   s                  r,   one_layer_rnnr  
  s    q	Iq	I%fQi4G%fQi4Gi95<)..q1BS!!!$JKq*i)WU
:&  
))K
#C
""1%%%r+   c                    |d   }|d   }|r|d   }|d   }nFt        j                  |j                               }t        j                  |j                               }|d   j                  d      }	|d   j                  d      }
g }d}|	j                  d      }d}d}d}d}| j	                         } |	j	                         }	|
j	                         }
t         j
                  j                  j                  j                  | |||||	|
|||||||||      }|d   |d   |d   }}}||j                  d      |j                  d      ffS )Nr   r"   r#   r   F)
rc   zerosr  rO   r  r  r~   mkldnn_rnn_layerri  r  )r  r  r  r  r  w0w1w2w3hxcxr  modehidden_size
num_layersr  batch_firstr  outputsrT   hycys                         r,   mkldnn_one_layer_lstmr"  
  sS   	B	BAYAY[[#[[#			Q	B			Q	BKD''!*KJ MKE ..
C	B	Biinn--55





!G$ 
GAJ
2rArzz!}bjjm,,,r+   c
                    |r| j                  dd      n| } g }
t        |      D ]  }t        ||||      \  }}}}|r
||dz
  k  r|nd} |	| |||      \  }}|
j                  |       |r! |	| |||d      \  }}|
j                  |       |r*t	        j
                  |g|j                         dz
        } n|} |dk7  s|s||dz
  k  st	        j                  | |d      }  |r| j                  dd      n| } | |
fS )Nr   r"   r   T)r  )r  )	transposerN   r  r^  rc   r  rJ   r   )r   r  r  r  r  r   r  r  r  layer_fnfinal_hiddensrT  r  r  r  r  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddens                       r,   _rnn_helperr+  
  s    &1EOOAq!eEM:=KFA}>
:
Jl $
QU(:'&uj*jQZ("*|\:t#GZ   ,IIw0'++-!2CDEEa<Ea*q.&8MM%=E) , &1EOOAq!eE-r+   c	                     |j                  d      }	t        ||d      }t        | |	|||||||t        t        t        t        j                              
      \  }
}|
t        j                  |d      fS Nr   Fr  )	unbindr  r+  r   r  r
  rc   r   stackr   r  r  r  r  r   r  r  r  r  r   r&  s               r,   rnn_tanh_inputr2    t     YYq\F6:u5F$%**)=>C M1---r+   c	                     |j                  d      }	t        ||d      }t        | |	|||||||t        t        t        t        j                              
      \  }
}|
t        j                  |d      fS r-  )	r/  r  r+  r   r  r
  rc   r  r0  r1  s               r,   rnn_relu_inputr5  !  r3  r+   c	                     |j                  d      }	t        ||d      }t        | |	||||||dt        t        |t        t        j                              
      \  }
}|
t        j                  |d      fS Nr   Fr  r  )	r/  r  r+  r   r  r  rc   r  r0  datar  r  r  r  r  r   r  r  r  r   r&  s               r,   rnn_relu_datar;  @  {     YYq\F6:u5F$##EJJ/	
C  M1---r+   c	                     |j                  d      }	t        ||d      }t        | |	||||||dt        t        |t        t        j                              
      \  }
}|
t        j                  |d      fS r7  )	r/  r  r+  r   r  r  rc   r   r0  r9  s               r,   rnn_tanh_datar>  c  r<  r+   c                 l   t        j                  |||      | z   }|j                  d|      }|d   j                         }	|d   j                         }
|d   j	                         }|d   j                         }|
|z  |	|z  z   }||j	                         z  }||nt        j                  ||d       }||fS )Nr2  r   r"   r#   r   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gater!  r   s                  r,   	lstm_cellrJ    s    HHRG,s2EKK9-MA&&(G"**,Ka %%'IQ'')H	r	Wy0	1B	BGGI	B ahhr9d&CBr6Mr+   c           
      (   |d   }|d   }|r|d   nd }|r|d   nd }t        |      dk(  r|d   nt        |      dk(  r|d   nd }	|d   j                  d      }
|d   j                  d      }t        j                  | ||      }|r|j	                  d      n|}g }|D ](  } t        | |
||||	d      \  }
}|j                  |
       * |r|j                          t        j                  |d      }||
j                  d      |j                  d      ffS )Nr   r"   r#   r   r  r2  rC  )rp  rO   r   r  r  rJ  r^  r  rc   r  r  )r  r  r  r  r  r  r  r  r  rB  r  r  r  r  r   s                  r,   one_layer_lstmrM    s%   q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  
		Q	B			Q	Bi95<)..q1BSK 3B	7IQRSB2 ! 
))K
#CA

1...r+   c           
         |d   }|d   }|r|d   nd }|r|d   nd }	t        |      dk(  r|d   nt        |      dk(  r|d   nd }
g }g }|r|d   n|d   }t        j                  | t        |            }|r|d d d   }|d   }|d   }|j	                  dd|      |j	                  dd|      }}|D ]  } | j
                  d   }t        j                  | ||      } ||k  ra|j                  |j	                  d|||z
        |j	                  d|||z
        f       |j	                  dd|      |j	                  dd|      }}||kD  rXt        j                  ||j	                  d|||z
        fd      }t        j                  ||j	                  d|||z
        fd      }t        | ||||	|
d      \  }}|}|j                  |        |r|j                          ||f}nZ|j                  ||f       |j                          t        | \  }}t        j                  |d      t        j                  |d      f}t        j                  |d      }||fS )	Nr   r"   r#   r   r  r2  rM   rL  )rp  rc   rf  rc  r  r	  r   r  r^  r  rJ  r  r  r  )r  r  r  r  r  r  r  r  r  r  rB  r  r  r  r   orig_hxorig_cxr  r  rT  r  hidden0hidden1r   s                           r,   one_layer_lstm_datarS    ss   q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  KG)0k"ok!nOCk!23IddO	QiGQiG^^Aq/2GNN	1o5B IIaLhhsIw/ NNIIaOa$78IIaOa$78 YYq!Q'1a);B W^^AO8KLMqB W^^AO8KLMqB 3B	7IQRSB23 6 "X
Bx =YYw*EIIgq,AA

))K
#C
?r+   c                 4    d } || ||      rt         S t        S )a   Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._has_mkldnn`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c                 F   t         j                  j                  sy| gt        |      z   t        t	        j
                  |            z   }|D ch c]  }|j                   }}t        |      dk7  ry|j                         }|t        j                  d      k7  ry|D ch c]  }|j                   }}|D ]&  }|t         j                  t         j                  fvs& y | j                  ry|d   j                  d      |d   j                  d      k7  }	|	ryyc c}w c c}w )NFr"   rq  r   r#   T)rc   _C_has_mkldnnrc  r   from_iterabler  rp  popr   r  bfloat16requires_gradr  )
r   r  r  tensorstdevicesr  dtypesr   r  s
             r,   
use_mkldnnz2select_one_layer_lstm_function.<locals>.use_mkldnn  s    xx##'DH$tE,?,?,G'HH%,-W188W-w<1U\\%((#*+7a!''7+EU[[%..99  Q%**Q-2a5::a=8) . ,s   DD)r"  rM  )r   r  r  r`  s       r,   select_one_layer_lstm_functionra    s!    : %V$$$r+   c	                    t        |      dk(  sJ d       t        |||d   j                  d      |d   j                  d      k7        }t        t	        |d   |d               }	t        | ||      }
t        | |	||||||||

      \  }}t        t	        |       }|t        j                  |d   d      t        j                  |d   d      fS )Nr#   lstm expects two hidden statesr   r"   )	rp  r  r  rc  r  ra  r+  rc   r0  )r   r  r  r  r  r   r  r  r  r  r%  r   r&  s                r,   	lstm_implrd     s     r7a<999<6:r!uzz!}1

1/MNF#beRU#$F-eR@H$C m,-MM!,a0%++mA>NPQ2RRRr+   c	                    t        |      dk(  sJ d       t        |||d   j                  d      |d   j                  d      k7        }t        t	        |d   |d               }	t        | |	||||||dt        t        |      
      \  }
}t        t	        |       }|
t        j                  |d   d      t        j                  |d   d      fS )Nr#   rc  r   r"   F)r  )
rp  r  r  rc  r  r+  r   rS  rc   r0  r9  s               r,   lstm_data_implrf  B  s     r7a<999<6:r!uzz!}1

1/MNF#beRU#$F$#=C m,-MM!,a0%++mA>NPQ2RRRr+   c                 &   | j                  dd      }t        j                  |||      j                  dd      }|d   |d   z   j                         }|d   |d   z   j                         }	|d   |d   |z  z   j	                         }
||
z
  |	z  |
z   S )Nr   r"   r#   r   )rA  r   r  r   r   r  r  r  r  r  r  chunked_igateschunked_hgates
reset_gate
input_gatenew_gates              r,   gru_cellrn  c  s    YYq!_NXXj)W=CCAqIN #nQ&77@@BJ #nQ&77@@BJq!^A%6%CDJJLH!Z/(::r+   c                 P   t        j                  | ||      j                  dd      }t        j                  |||      j                  dd      }|d   |d   z   j                         }|d   |d   z   j                         }	|d   |d   |z  z   j	                         }
||
z
  |	z  |
z   S )Nr   r"   r   r#   r@  rh  s              r,   gru_cell_datarp  l  s    XXc9g6<<QBNXXj)W=CCAqIN #nQ&77@@BJ #nQ&77@@BJq!^A%6%CDJJLH!Z/(::r+   c	                     t        ||d      }t        | |j                  d      ||||||dt        t        |t
              
      \  }	}
|	t        j                  |
d      fS )NFr   r8  )r  r+  r/  r   r  rp  rc   r0  )r:  r  r  r  r  r  r   r  r  r   r&  s              r,   gru_impl_datarr  u  si     6:u5F$
		!"}UC M1---r+   c	                     t        ||d      }t        | |j                  d      |||||||t        t        t
              
      \  }	}
|	t        j                  |
d      fS )NFr   r.  )r  r+  r/  r   r  rn  rc   r0  )r   r  r  r  r  r   r  r  r  r   r&  s              r,   gru_implrt    sf     6:u5F$
		!2C M1---r+   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j
                  j                  | ||||      S r  )r  r  r  rc   r  r~   _upsample_bilinear2d_aar   r  align_cornersr  r  r  r  s          r,   upsample_bilinear2d_aa_vecry    sV     ){MREmQ/GmQ/G99>>11umWg r+   c                     t        | j                         ||      }t        |d      }t        |d      }t        | ||||      S r  )r  r  r  upsample_bilinear2drw  s          r,   upsample_bilinear2d_vecr|    sC     ){MREmQ/GmQ/Gue]GWMMr+   rx  c                    | j                   \  }}}}|d   }	|d   }
|	dkD  r|r|dz
  |	dz
  z  }n|d|z  n||	z  }nd}|
dkD  r|r|dz
  |
dz
  z  }n|d|z  n||
z  }nd}t        j                  |	| j                  | j                        }t        j                  |
| j                  | j                        }|r||z  }||z  }n6||dz   z  dz
  j                  d      }||dz   z  dz
  j                  d      }|j                  t        j                        }t        j                  |      j                  |dz
        j                  t        j                        }|j                  t        j                        }t        j                  |      j                  |dz
        j                  t        j                        }|j                  d      }|j                  d      }|j                  d      }t        j                  | d d ||g      }t        j                  | d d ||g      }t        j                  | d d ||g      }t        j                  | d d ||g      }||z
  }d|z
  }||z
  }d|z
  }t        j                  ||      t        j                  ||      z   } t        j                  ||      t        j                  ||      z   }!t        j                  | |      t        j                  |!|      z   }"t        j                  |       }#| j                  j                  d	k(  r|d
k  rt        j                   }#|"j#                  |#      }"|"S )Nr   r"   ra   r   r  r   r   r   r     rH  )r	  rc   r  r   r  r   r7   r  ceilrO   r~   r  r   r@   r	  rt  rM  r  )$r   r  rx  r  r  n_batchr  in_hin_wr  r  h_scale_factorw_scale_factorrT  rf  r8   rT   x_floorx_ceily_floory_ceilx_viewx_floor_viewx_ceil_viewv1v2v3v4xscale2xscale1yscale2yscale1q1q2r
  rI  s$                                       r,   r{  r{    s    ',kk#GZtNENE qy"Qh5195N/7/CS8^PUNqy"Qh5195N/7/CS8^PUNU%++ellCAU%++ellCAQQq3w'#-444=q3w'#-444=dd5;;GZZ]  TAX .11%++>Fdd5;;GZZ]  TAX .11%++>F[[^F$$Q'L""1%K			ED$g#F	GB			ED$W#E	FB			ED$f#E	FB			ED$V#D	EB|#GGmG'kGGmG	2w	%))B"8	8B	2w	%))B"8	8BYYr7#eiiG&<<F //6M ||F"zB//];FMr+   c           	      .   |d   }|d   }|d   }|d   }| }| }| }|dk  r|d| d f   }|d| d f   }d}|dk  r|dd |f   }|dd |f   }d}|dk  r|d| d d d f   }|d| d d d f   }d}|dk  r|dd |d d f   }|dd |d d f   }d}d| j                         dz
  z  }	|	||fz   }
|	|dfz   }|	||fz   }t        j                  | ddgd d f   ddgf   j                  |
      |ddgd d f   j                  |      | ddgd d f   ddgf   j                  |      gd      }|	d|fz   }|	d|fz   }t        j                  |ddgf   j                  |      ||ddgf   j                  |      gd      }|	||fz   }|	|dfz   }|	||fz   }t        j                  | ddgd d f   ddgf   j                  |      |ddgd d f   j                  |      | ddgd d f   ddgf   j                  |      gd      }t        j                  |||gd	      S )
Nr   r"   r#   r   .r"   rM   r{   rJ  )rJ   rc   r  repeat)r   r  pad_left	pad_rightpad_top
pad_bottom	input_midinput_mid_tbinput_mid_lrbatch_dims_no_repeatrepeat_top_leftrepeat_top_middlerepeat_top_righttop_rowsrepeat_middle_leftrepeat_middle_rightmiddle_rowsrepeat_bottom_leftrepeat_bottom_middlerepeat_bottom_rightbottom_rowss                        r,   replication_pad2dr    s    qzH
IajGJ ILL!|cH9:o.	#C(O41}c:I:o.	#C)O4	{cG89a/0	#C'A$56A~c;J;12	#C*a$78
599;?3*gx-@@O,|;+w	.BByy #sA+sQCx(//@qc1%,,->?#sA+sRDy)001AB	
 
H .H=.!Y?)) qc"))*<=rd#**+>?	
 
K .X0FF/:q/A.*i1HH)) #tQ,aS)001CDrdA&--.BC#tQ,bT	*112EF	
 
K 99h[9rBBr+   r  r  c                 4    | j                   |j                   k(  S r4   r  )r  r  s     r,   is_same_sizer  r  s    77aggr+   c                 .    t         j                  | |      S r4   )r~   r  )r8   r	  rB   s      r,   _reshape_aliasr  w  s     99Qr+   c                 .    t         j                  | |      S r4   )r~   rr  )r8   rC  s     r,   _indexr  }  s    ::a!!r+   c                    | j                         }d}|dk  rd}|6|dkD  r*dg|z  }|j                  d   ||<   |j                  |      }n|}| |z  } t        j                  ||k7  |d      }	|	j                  |      }
t        j                  | ||
      j                  |       }t        j                  ||k7  |d      }|t        j                  j                  k(  r|dkD  r| j                  dd      }||fS |lj                  | j                        }t        j                  |||
      j                  |      }t        j                  ||k7  |d      }|j                         }n"||k7  j                         j                  |       }|t        j                  j                  k(  r|j                         }||fS |t        j                   j                  k(  r|j                         |z  }||fS )Nr"   r#   r   r*   r   )rJ   r	  r  rc   rd   rO   gatherr  r!   r'   rw   r7  expandr   r7   r)   r(   )rv   r   r   r   r  n_dimsr  r	  wr  safe_target_r
  r  wsums                 r,   _nll_loss_forwardr    s    XXZFKzA:E "(aE+E"AAax++f4fa@K((5L ll4l;CCKPPF[[</;FINN(((VaZ}}R-|##HHTZZ ||A{L9AA+N{{6\14;xxz,.33588>IMM''' < 
inn**	*,<r+   c                     | j                         dkD  r| j                         dk  sJ d       |j                         dk  sJ d       | j                         dk(  xr |j                         dk(  }|sA| j                  d   |j                  d   k(  s"J d| j                   d|j                   d       | j                  d	   }|=|j                         dk(  r|j                         |k(  sJ d
| d|j                          t        | ||||      S )Nr   r#   r)  r"   r*  r+  r,  r-  rM   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rJ   r	  r   r  )rv   r   r   r   r  r/  	n_classess          r,   nll_loss_forwardr    s1    88:>dhhjAoP/PP-

EDE 88:?8vzz|q'8L

1a(J	#DJJ<z&,,qIJ  

2I>

flln	9J	8Cz  |B  |H  |H  {I  JJ  T669lKKr+   c                      t        | ||||      S r4   )r  )rv   r   r   r   r  s        r,   nll_loss2d_forwardr    s     T669lKKr+   Ac                 0    |dz   | z  |dz   z
  | z  | z  dz   S )Nr#   r   r"   r*   r8   r  s     r,   _upsample_cubic_convolution1r    s(    UaK1q5!Q&*Q..r+   c                 <    || z  d|z  z
  | z  d|z  z   | z  d|z  z
  S )Nr     r2  r*   r  s     r,   _upsample_cubic_convolution2r    s0    UQU]a!a%'1,q1u44r+   r]  c                 t    d}t        | dz   |      t        | |      t        d| z
  |      t        d| z
  |      fS )Ng      ra   r   )r  r  )r]  r  s     r,    _upsample_get_cubic_coefficientsr    sE    A$QWa0$Q*$S1Wa0$S1Wa0	 r+   coeffstsc                 P    t        |      }t        d t        | |      D              S )Nc              3   ,   K   | ]  \  }}||z    y wr4   r*   )r  r  r  s      r,   r  z+_upsample_cubic_interp1d.<locals>.<genexpr>  s     E0DHRR0Dr=  )r  _sum_tensorsr  )r  r  coeffs2s      r,   _upsample_cubic_interp1dr    s$    .r2GEFG0DEEEr+   c                 6    t        t        j                  |       S r4   )r   rc   add)r  s    r,   r  r    s    %))R  r+   	num_stepsc                     | dk  rt        j                  d||      S |s| dz
  | z  nd}t        j                  | || ||      S )Nr"   r   r  )stepsr  r   )rc   r  linspace)r  rx  r   r  r  s        r,   _linspace_from_neg_oner    sI     A~||AfE::-:)a-9	$A>>1"ayuMMr+   thetahr  c                    | j                   }| j                  }t        ||||      j                  d|d      }t        ||||      j                  |dd      }t	        j
                  d||      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |d	dd      }||z   |z   S )
Nr"   )r"   r"   r"   r  )r   r#   constantr   r  r  rw   r"   r"   )r#   r   	r   r  r  r  rc   rO  rZ  r  r  )	r  r  r  rx  r   r  grid_xgrid_ygrid_ones	            r,   _make_base_grid_4dr    s    KKE\\F $A}eVDII!QPQRF#A}eVDII!QPQRFzz)5@H XX  $$VjPQ$RFXX  $$VjPQ$RFxx""&&xV*TU&VHF?X%%r+   r  c                    | j                   }| j                  }t        ||||      j                  dd|d      }t        ||||      j                  d|dd      }t        ||||      j                  |ddd      }	t	        j
                  d||      }
t        j                  j                  j                  |ddd      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |	d	dd      }	t        j                  j                  j                  |
d
dd      }
||z   |	z   |
z   S )Nr"   )r"   r"   r"   r"   r  )r   r   r  r   r  )r"   r#   )r#   r"   )r   r   r  )r  r  r  r  rx  r   r  r  r  grid_zr  s              r,   _make_base_grid_5dr    s5   KKE\\F#A}eVDII!QPQSTUF#A}eVDII!QPQSTUF#A}eVDII!QPQSTUFzz,eFCH XX  $$VjPQ$RFXX  $$VjPQ$RFXX  $$VjPQ$RFxx""&&xV*TU&VHF?V#h..r+   c                     |\  }}}}t        | |||      }|j                  ddd      | j                  j                  d      z  j	                  d      }|j                  |||d      S )Nrx  rM   r   r"   rJ  r#   )r  r  rO  rO   r   )	r  r  rx  r  rP   r  r  	base_gridgrids	            r,   _affine_grid_generator_4dr  )  sg    JAq!Q"5!QmLI NN2q!$uxx'9'9!'<<AA"ED99Q1a  r+   c                     |\  }}}}}t        | ||||      }|j                  ddd      | j                  j                  d      z  j	                  d      }	|	j                  ||||d      S )Nr  rM   r2  r"   rJ  r   )r  r  rO  rO   r   )
r  r  rx  r  rP   r  r  r  r  r  s
             r,   _affine_grid_generator_5dr  3  sm    MAq!Q"5!QOI NN2q!$uxx'9'9!'<<AA"ED99Q1a##r+   c                     t        j                  t        |      dv d        t        |      dk(  rt        | ||      S t	        | ||      S )N)r2  r  c                       y)NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r*   r*   r+   r,   r|   z'affine_grid_generator.<locals>.<lambda>C  s    Ur+   r2  r  )rc   r}   rp  r  r  )r  r  rx  s      r,   affine_grid_generatorr  =  sJ     
LLD	VU 4yA~(MRR(MRRr+   r  interpolation_modepadding_mode_expand_gridc           	          !"#$%&'()*+,- t        j                  dv fd       t        j                  dv fd       dt        dt        dt        ffd-dt        dt        d	t        dt        fd
+dt        dt        dt        f+fd dt        dt        dt        f -fd} j                  \  $%|j                  \  })*}|dk(  sJ r(|j                  d)*|      j                  )*d      }dt        dt        dt        f$%fd&t        j                   j                        j                  ddd      t        j                   j                        j                  ddd      dt        dt        dt        dt        f&)*fddt        dt        dt        f fd"|d   }	|d   }
dk(  r ||	%      } ||
$      }|j                         |j                         c'('dz   (}}'(dz   }}||}}||z
  ||z
  z  }||z
  ||z
  z  }||z
  ||z
  z  }|'z
  |(z
  z  }t        "fd'(|f|||f|||f|||ffD              S dk(  r< ||	%      } ||
$      }|j                         }|j                         } "||d      S  -|	%      } -|
$      }|j                         '|j                         (|'z
  ,|(z
  }s",j                  d      ,|j                  d      }dt        dt        dt        f "$%fd#dt        dt        f#'(,fd!t        !fdt        d       D              }t!        ||      S )!N)r   r"   r#   c                      d  S )NzInvalid interpolation mode r*   )r  s   r,   r|   z"_grid_sampler_2d.<locals>.<lambda>\  s    -.@-ABr+   c                      d  S )NzInvalid padding mode r*   )r  s   r,   r|   z"_grid_sampler_2d.<locals>.<lambda>_  s    -B<.+Qr+   coordsr  rK   c                 B    r|dz  dz
  n|dz  }|dz  dz
  }| |z  |z   S Nr   r*   )r  r  r   ofsrx  s       r,   unnormalizez%_grid_sampler_2d.<locals>.unnormalizeb  s8     %2tczCs
Sj3|c!!r+   	twice_low
twice_highc                 P   ||k(  rt        j                  |       S |dz  }||z
  dz  }| |z
  j                         }t        j                  ||      }||z  j	                         j                  t         j                        }t        j                  |dz  dk(  ||z   ||z   |z
        S )Nr#   r   r"   r   )rc   r  r   fmodfloorr7   int8rd   )r  r  r  
coords_mincoords_spancoords2extraflipss           r,   reflect_coordinatesz-_grid_sampler_2d.<locals>.reflect_coordinatesm  s    
"##F++]
!I-2J&++-

7K0;&--/222D{{AINEJ.j0H50P
 	
r+   c                     dk(  r| S dk(  rt        j                  | d|dz
        S r | dd|dz
  z        }n | dd|z  dz
        }t        j                  |d|dz
        S )Nr   r"   r#   rM   r   )r  r  coords_reflectedrx  r  r  s      r,   compute_coordinatesz-_grid_sampler_2d.<locals>.compute_coordinatesy  sx    1MQ;;vq$(33#6vq!tax.#Q #6vr1t8a<#P ;;/D1H==r+   c                 (     | |      } ||      S r4   r*   )r  r  	coords_unr  r  s      r,   compute_source_indexz._grid_sampler_2d.<locals>.compute_source_index  s    -	"9d33r+   r#   r"   xsysc                     t        j                  d| k  t        j                  | k  t        j                  d|k  |k                    S rn   )rc   r  )r  r  iHiWs     r,   in_bounds_condz(_grid_sampler_2d.<locals>.in_bounds_cond  sF      GU&&rBw0A0A!r'2PR70ST
 	
r+   r  wsc                      | |      rndt        	
fd| j                  t        j                        |j                  t        j                        |fD              S )Nr"   c              3   p   K   | ]-  }t        j                  |d       j                         / ywr  )rc   rd   r  )r  r]  r  r  r  oHoWs     r,   r  z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>  s7      
M KKa#((Ar26Ms   36r   )r  r7   rc   r  )r  r  r  r  r  r  r  r  r  r  r  s      @@r,   clipz_grid_sampler_2d.<locals>.clip  sY    b"%
 A1 
ee%++e.EKK0H"M
 
 	
r+   ixiyc                 8     	| ||      \  }}}||f   |z  S r4   r*   )
r  r  r  idx_xidx_yw_C_idxN_idxr  r  s
         r,   get_summandz%_grid_sampler_2d.<locals>.get_summand  s0    B?ubue+,r11r+   ).r   ).r"   r   c              3   :   K   | ]  \  }}} |||        y wr4   r*   )r  r  r  r  r   s       r,   r  z#_grid_sampler_2d.<locals>.<genexpr>  s,      
 R B" s   c                 <     |       } |      } ||d      S rV   r*   )r  r  r8   rT   r  r   r  r  s       r,   get_value_boundedz+_grid_sampler_2d.<locals>.get_value_bounded  s*    #B+A#B+Aq!Q''r+   r  c                     | dz
  z   } dz
  |       |       dz   |       dz   |      f}t        |      S )Nr"   r#   )r  )r  iy_ofscsr#  ix_nwiy_nwtxs      r,   	get_coeffz#_grid_sampler_2d.<locals>.get_coeff  s[    cAg&F!%!)V4!%0!%!)V4!%!)V4	B ,B33r+   c              3   .   K   | ]  } |        y wr4   r*   )r  r  r*  s     r,   r  z#_grid_sampler_2d.<locals>.<genexpr>  s     :#y~   r2  )rc   r}   r   r}  r	  r  r  r  r  r   r  r  roundrO   r  rN   r  ).r  r  r  r  rx  r  r  rP   twor8   rT   r  r  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyr  r  r  r  r  r  r  r*  r   r#  r  r  r  r'  r(  r  r  r  r)  r  s.   ` ````                     @@@@@@@@@@@@@@@@@@@r,   _grid_sampler_2dr<  K  s    
LLi'B 
LL	!#Q"F "# "& "

F 

s 

 

PV 


>F 
># 
>& 
>4V 43 46 4 77LAq"bZZNAr2s!8O8 yyAr2s+221aRC
6 
v 
& 

 LL188,11!Q1=ELL188,11!Q1=E

 

V 

 

4F 

 

2 2F 2& 2 2
 	VAVAQ!!R(!!R(xxz288:uqy%ueaiueu
urz*U
urz*
rEz*U
rEz* 
 t$t$t$t$	 
 
 	
 
q	 !!R(!!R(XXZ
XXZ
:z155BB

%Z%ZaBaB	(& 	(f 	( 	( 	(
	43 	46 	4 	4 :q::'33r+   c                 "    t        | ||||      S )N)r  r  r  rx  )r<  )r  r  r  r  rx  s        r,   grid_sampler_2dr>    s      	-!# r+   c                 &    t        j                   j                         dk(  xr j                         dk(   fd       t        j                   j                  d      j                  d      k(   fd        z  j	                  d      S )Nr#   r"   c                  L    d j                          dj                          S )Nzmatrix @ vector expected, got r  r{   rv   r  s   r,   r|   zmv.<locals>.<lambda>
  s    0BswwykJr+   r   c                  v    d j                  d       d j                  d       dj                  d       dS )Nzsize mismatch, got input (r   r8   r"   z), vec (r-  )r  rA  s   r,   r|   zmv.<locals>.<lambda>  s:    ,TYYq\N!DIIaL>RURZRZ[\R]Q^^_`r+   r{   )rc   r}   rJ   r  r   rA  s   ``r,   r  r    ss     
LL
a*CGGINJ 
LL		!#` 3J""r+   c                 n   |  j                  d      }|O|dz
  |z  dz   }d|z
  | z  || j                         |  |z
  j                         z   j                         |z   z  z   }n@d|z
  | z  |z   | j                         |  |z
  j                         z   j                         z   }|||z  }t        ||      S r  )rP  rb   r8  r   )rv   r   r   
pos_weightr   r   
log_weightr   s           r,    binary_cross_entropy_with_logitsrF    s    
 u"G 1n.2
F
d"Zh^^$ 5 5 77<<>H&
 

 Z4~~D57?"7"7"99>>@A 	 f}i00r+   tensor1tensor2c           
      n   | j                   |j                   k\  r| |fn|| f\  }}|j                   dk\  r|j                   dk  sy|j                  ry| j                   dk(  ry|j                         dk(  ry|j                  }|j	                         }t        d t        |d d |dd	 |dd	       D              S )
Nr   r#   FTr   c              3   4   K   | ]  \  }}}|||z  k(    y wr4   r*   )r  st1st2s2s       r,   r  zshould_fold.<locals>.<genexpr>;  s&      RNS#r 	sRxRs   rJ  r"   rM   )rk  r[  r   r	  rd  r  r  )rG  rH  t1t2t1_shape	t1_strides         r,   should_foldrR  +  s     $+<<7<<#?gwgwEWFBGGqLRWW\	||q	xxzQxxH		I !)CR.)Ab/8Ab>R  r+   c                    | j                         }|j                         }|dk7  r|dk7  sJ |dk(  r|dk(  rt        j                  | |      S |dk(  r|dk(  rt        j                  | |      S |dk(  rC|dk(  r>t        j                  t        j
                  t        j                  | d      |      d      S |dk(  r|dk(  rt        j
                  | |      S t        | |      r||kD  }|r|j                  n| }|s|n|dk(  r| j                         n| }|j                  }t        |d d       }t        t        j                  |      }	|j                         dk(  }
|
r|j                  |j                  d          |j!                  |	|d         }|
r>|j                  |      j#                  |      }|r|j                  j%                         S |S |j                  |      j#                  |      S |dk\  r^|dk\  rX|dkD  r| j'                  d      nd}| j'                  d      }| j                  d d }|dkD  r|j'                  d      n|j'                  d      }|dkD  r|j'                  d      nd}g }t)        |dz
        D ]"  }|j                  |j'                  |             $ |dk(  rn|dk(  ri|d   |d   k7  r^|d   dk(  r'| j*                  rt-        | j	                  d      |      S |d   dk(  r'|j*                  rt-        | |j	                  d            S t        t        j.                  ||            }|||gz   }t1        |      }| j3                  |      j!                  |||      }|dk(  }|r7||gz   }|j3                  |      j!                  ||      j                  d      }n)|||gz   }|j3                  |      j!                  |||      }|}|dkD  r|j                  |       |dkD  r|j                  |       |r/|j5                  |      j	                  d      j#                  |      S |j5                  |      j#                  |      S t        j6                  dd        y )	Nr   r"   r#   rM   rJ  r   Fc                       y)Nz/both arguments to matmul need to be at least 1Dr*   r*   r+   r,   r|   zmatmul.<locals>.<lambda>  s    $Ur+   )rJ   rc   dotr  r  r~  rO   rR  rO  r]  r	  rc  r   operatorr   r^  r  r  r  r  rN   r[  rN  broadcast_shapesr  r  bmmr}   )rG  rH  dim_tensor1dim_tensor2r$  rN  rO  sizes_1output_shapefolded_dim1t2_is_matrix	t1_foldedr   r  m1batch_tensor1m2rB  batch_tensor2rT  expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandeds                              r,   rN  rN  A  s@    ++-K++-K!q 000aK1,yy'**		kQ.xx))		kQ.}}UXXeoogq&A7KQOO		kQ.xx))	Wg	&  +-	$WZZ'$G+:J799;PW 	 ((GCRL)X\\<8 vvx1}, JJ{GBK8	 \\"%**<8F-6699'')BFB<<#((66		kQ. !,aGLLQ\\"cr*!,qW\\"gll26F +aGLLQ#%{Q'A  a1 ( 1q a M!$44Q1$)>)>gooa0'::Q1$)>)>gwq'9::  $""=-@ 
 3aW<#$89 #>>*=>FF !R
 !A%
"6""=23-r21  #7"a"@&~~.ABJJ$b!  ,?"?"#''(89AA"EJJ<XX#''(89>>|LLUUVr+   r  r  c                      !  j                   \  }}|\  }}dd}	d }
 |	|||      } |	|||      }t        j                  | j                        j	                  |ddd      t        j                  | j                        j	                  d|dd      t        j                  | j                        j	                  dd|df      }t        j                  | j                        j	                  ddd|f      } |
|||      }|j                         }||z
  !|j                  t        j                        } |
|||      }|j                         }||z
  }|j                  t        j                        }|dz
  ||dz   |dz   f}|dz
  ||dz   |dz   f fd  !fdt        fd	|D              }t        ||      }t        j                         }|j                  |
      }|S )Nc                 L    |r|dkD  r| dz
  |dz
  z  S dS |
|dkD  rd|z  S | |z  S r  r*   )rL  rM  rx  ri   s       r,   compute_scalez1upsample_bicubic2d_default.<locals>.compute_scale  sB    5=\GaKHqL1HqH % 1eai1u9WWxEWWr+   c                 &    |r| |z  S | |dz   z  dz
  S r  r*   )ri   	dst_indexrx  s      r,   r  z8upsample_bicubic2d_default.<locals>.compute_source_index  s$    9$$IO,s22r+   r  r"   r   r#   c                     t        j                  | ddz
        }t        j                  |ddz
        }t        j                  ||g      S r  )rc   r   r~   r  )	r  r  y_idxx_idxr  r  r  r  r  s	       r,   load_boundedz0upsample_bicubic2d_default.<locals>.load_bounded  sK    B26*B26*!!!eUE5%ABBr+   c                 H     t         fdD              }t        |      S )Nc              3   0   K   | ]  } |        y wr4   r*   )r  x_ofsrs  rT   s     r,   r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>  s     EWEa/Ws   )r  r  )rT   coeffs_xixs_ofsrs  t_xs   ` r,   get_x_interpz0upsample_bicubic2d_default.<locals>.get_x_interp  s     EWEE'#66r+   c              3   .   K   | ]  } |        y wr4   r*   )r  y_ofsrz  s     r,   r  z-upsample_bicubic2d_default.<locals>.<genexpr>  s     >gU\%(gr,  rH  r4   )r	  rc   r  r  r  r  r7   r  r  r  r@   r	  r  )"r  r  rx  r  r  r  r  r  r  rm  r  height_scalewidth_scaleout_yout_xreal_xin_xr  real_yin_yt_yr  iys_ofscoeffs_yr
  rI  r  r  rz  r  r  rx  rs  ry  s"   `                         @@@@@@@@r,   upsample_bicubic2d_defaultr    s    77LAq"bFBX3 !R@LBw?KLL188,11!Q1=ELL188,11!Q1=ELLAHH-22Aq"a=AELLAHH-22Aq!R=AE!+umDF<<>D
4-C	u{{	#B!,}EF<<>D
4-C	u{{	#BAvr2626*GAvr2626*GC C
7 >g>>H%h4F //2M];FMr+   r  c                 $   t        j                  t        |      t        |      z   dk(  d        |H|J t        t        t
        t
        f   t        d t        | j                  dd  |      D                    }|r|nd\  }}t        | ||||      S )Nr"   c                       y)Nz:Must specify exactly one of output_size and scale_factors.r*   r*   r+   r,   r|   z(upsample_bicubic2d_vec.<locals>.<lambda>  s    Lr+   c              3   P   K   | ]  \  }}t        t        |      |z           y wr4   )r   r   )r  r  ri   s      r,   r  z)upsample_bicubic2d_vec.<locals>.<genexpr>  s*       ?HAu 	!u,- ?s   $&r#   r  )
rc   r}   r$  r	   r   r}  r  r  r	  r  )r  r  rx  r  r  r  s         r,   upsample_bicubic2d_vecr    s     
LL[D//14L (((#s(O  #AGGABK ? 
 )6}<GW%amWgVVr+   r   r   r  c                j    t        j                  | ||      }t        j                  | ||      }||fS )Nr  )rc   aminr1  )rv   rJ   r  r  r1  s        r,   aminmaxr    s2     ::dW5D::dW5D:r+   r   c                    t         j                  t        j                  t        j                  |       d|       |||      S )Nr   r   )r~   r   rc   rd   isnan)rv   rJ   r  r   s       r,   nansumr    s2     88EKKD 11d;S'QV8WWr+   r   r
  r  r  r
  c          	      N    t         j                  j                  d| d||||      S )Nr   r"   r  r~   r  
start_step)r[  r   r
  r  r  s        r,   arange_defaultr  #  s/     ;;!!	3vf "  r+   c          	      N    t         j                  j                  | |d||||      S )Nr"   r  r  )rZ  r[  r   r
  r  r  s         r,   arange_startr  2  s/     ;;!!sAU6&Z "  r+   c                      ddl m}  || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper  )rB   rC   r  s      r,   out_dtype_decompr  A  s    AD+F++r+   marginc                 t   	
 t        j                          t        j                         j                  d   
 j                  d   	t        j                  |dk(  xs |dk(  d        t        j                   j
                  dk(  xr 	dk7   fd       t        j                  j
                  dk(  xr j                         
k(  
fd       Qt        j                        t        j                  j
                  dk(  xr j                         	k(  	fd       j                  d      t        j                   d      }||z
   z   }|j                  d      }|dk(  r|n||z  }|   z  }t        j                  	 j                  	      }t        j                  |k7  |d      }|t        j                  j                  k(  r|j!                         S |t        j"                  j                  k(  r |j%                         |j                  d   z  S |j!                  d
      S )Nr   r"   r#   c                       y)Nz only p == 1 and p == 2 supportedr*   r*   r+   r,   r|   z#multi_margin_loss.<locals>.<lambda>W  s    +Mr+   c                  "    d j                    S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  )r   s   r,   r|   z#multi_margin_loss.<locals>.<lambda>Z  s    _`e`k`k_lmr+   c                  (    d  dj                    S )Nz#inconsistent target size, expected r  r  )nframer   s   r,   r|   z#multi_margin_loss.<locals>.<lambda>^  s    5fXYv||nUr+   c                  (    d  dj                    S )Nz#inconsistent weight size, expected r  r  )rJ   r   s   r,   r|   z#multi_margin_loss.<locals>.<lambda>d  s    9#i~Vr+   rJ   rr  r  r{   )rc   
atleast_2d
atleast_1dr	  r}   rk  r   rO   r  rP  r  r  rd   r!   r(   rw   r   r)   r   )r   r   rB  r  r   r   ure   r  rJ   r  s   ``  `    @@r,   multi_margin_lossr  H  s    U#Ef%F[[^F
++a.C	LLa!16#MN	LL

a$C1Hm 
LLq5V\\^v5U !!&)KK163!6V	
 a FU0A
UA	AA!VQAv
,,s5<<
0CC6M1a(AINN(((vvx	imm))	)uuw##vv!v}r+   	is_targetc                    | j                   |j                   t        j                  |       } t        j                  |      }| j                   d   }t        j                  t	              dk  xr |dk7  fd       t        j                  t	              dk  xr k(  fd       t        j
                  ||j                        }|dk(  }t        j                  t        j                  |||      dd	      }||k  }t        j                  ||d      }t        j                  | d|
      }	t        j                  ||d      }
t        j                  ||
j                  d      k(  d      }d|	j                  j                  d      z
  | z   }|j                  d      }||z  }t        j                  |d|      }|t        j                  j                   k(  r!|j#                  d      j%                         }n@|t        j&                  j                   k(  r|j#                         }n|j#                  d      }|j)                  | j*                        j-                        }||fS )Nr"   r#   r   c                      d  S r  r*   )orig_input_shapes   r,   r|   z0multilabel_margin_loss_forward.<locals>.<lambda>  s    _`p_qrr+   c                      d d  S )Nzinconsistent target size: z for input of size: r*   )r  orig_target_shapes   r,   r|   z0multilabel_margin_loss_forward.<locals>.<lambda>  s    ,->,??STdSefr+   r  rM   Tr  r  r{   ra   )r   rM   )r	  rc   r  r}   rp  r  r  r  rd   r  anyrO   TrP  r!   r(   rw   r   r   r)   r7   r   r  )r   r   r   rJ   r  is_endend_idxtarget_masktidx0r  tidx1r  re   r  r  s                @@r,   multilabel_margin_loss_forwardr  w  s    {{U#Ef%F
++a.C	LL"/saxr 
LL!#M(9=M(Mf
 ,,s6==
1Cr\FjjVS#6BMG-KKKVQ/EU%0AKKVR0E		#R!88a@Iaccmmm##e+A	AA	CAIq!$AINN(((EEgE##%	imm))	)EEGEEgEU[[)112CDIi<r+   ri   querykey	dropout_p	is_causalreturn_debug_maskc          
           j                   } j                  d    j                  d    j                  d    j                  d   f\  }}	}
}t        j                  t        j                         xr |t        j
                  u fd       t        j                   j                         dk(  xr( j                         dk(  xr j                         dk(   fd       t        j                  dk(  fd	       t        j                   j                  d   j                  d   k(  xr j                  d   j                  d   k(  d
        t        j                  |du d        t        j                  ||
|	|gt        j                        }t        j                  g t        j                        t        j                  g t        j                        }}d\  }}t        j                  g t        j                        t        j                  g t        j                        }}t        j                  g  j                    j                   j                        }t        j                  j                   d |d |      \  }}|j                  dd      j!                  t        j"                        }|j                  dd      ||||||||f	S )Nr   r"   r#   r   c                  "    d j                    S )Nz'query must be FP32, FP64, BF16 but got r   )r  s   r,   r|   z4scaled_dot_product_flash_attention.<locals>.<lambda>  s    9%++Gr+   r2  c                  n    dj                          d j                          dj                          S )Nz,q, k, v must be a 4 dimensional tensor, got r  r{   )r  r  rw   s   r,   r|   z4scaled_dot_product_flash_attention.<locals>.<lambda>  s1    >uyy{m2cggi[XZ[`[d[d[fZghr+   r   c                      d  S )Nz&dropout probability must be zero, got r*   )r  s   r,   r|   z4scaled_dot_product_flash_attention.<locals>.<lambda>  s    $J9+"Vr+   c                       y)Nz&q, k, v should have the same head sizer*   r*   r+   r,   r|   z4scaled_dot_product_flash_attention.<locals>.<lambda>  s    8r+   Fc                       y)Nz#return_debug_mask is not supported.r*   r*   r+   r,   r|   z4scaled_dot_product_flash_attention.<locals>.<lambda>  r  r+   r   )r   r   )r   r  r[  r  rH  )r   r	  rc   r}   r%  r.  rJ   emptyr  rL  r  r[  r~   "_scaled_dot_product_attention_mathri  r$  r  rM  )r  r  rw   r  r  r  ri   r   	batchSizenum_headqSizeheadSize	logsumexp	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetdebug_attn_maskr   rP   s   ````                  r,   "scaled_dot_product_flash_attentionr    sa    KKEAAAA	,(Ix 
LL&B5

+BG 
LL		q@SWWY!^@		q0@h 
LLSV 
LLA%++a.(KSYYq\U[[^-K8 
LLU"$Q YxB%++VI ;;r<ekk
%**?yI LE5!&Ruzz!BEKK
%**EK kk
kk||))	O 77??sE4It5 @ IFAH a#..U=T=T.UFA
 
r+   c                 .    t        |       fd       }|S )Nc                  <     | i |}| d   j                  |      S rn   )r   )rB   rC   r   outplace_ops      r,   
inplace_opz$register_inplace.<locals>.inplace_op  s%    4*6*Aw}}S!!r+   r   )aten_opr  r  s    ` r,   register_inplacer    s"    G$" %" r+   c                 B   | j                         s&| j                         st        |      }t        |      }t        j                  ||      }t        |t        j                        r|dk7  r||z  }|dk(  r|S t        |t        j                        r|dk7  r| |z  } | |z   S r  )r%  r|  r}  rc   rX  r6   numbersNumber)rv   batch1batch2r]   rh   r
  s         r,   baddbmmr  %  s     !!#DOO,=4yE
YYvv&FeW^^,
%qydGNN+tqyd{&=r+   c                 2    t        j                  | |d      S )Nr  rA  rC  )rv   rA  s     r,   floor_divider  6  s     99T588r+   r   r   c                    |"t         j                  j                  | g |      S t         j                  j                  | g ||      S )Nr   r  )r~   r   dim_IntListIntList_out)rv   r   r   s      r,   sum_defaultr  <  sC     {xx##D"E#::xx##D"Es#CCr+   c           	          |@t         j                  j                  | t        t	        | j                                           S t         j                  j                  | |g      S r4   )r~   r  dimsrc  rN   rJ   )rv   rJ   s     r,   squeeze_defaultr  I  sG    
{||  tE$((*,='>??||  u--r+   c                     t        fdt        t        | j                              D              }| j	                  d|d      }| ||z  z  |fS )Nc              3   .   K   | ]  }|k7  s	|  y wr4   r*   )r  rT  rJ   s     r,   r  z)_weight_norm_interface.<locals>.<genexpr>T  s     @ 31qCxQ 3s   
r#   Tr-  )r  rN   rp  r	  r   )r8   rT   rJ   keep_dimr   s     `  r,   _weight_norm_interfacer  Q  sI     @c!''l 3@@H66!Xt6,DD>4r+   )F)none)FN)r#   )r   NNr"   r4   r  )rM   FFr  r  )r"   r"   F)r   ra   N)r   r"   Nr  )NNN)r   r   FT)r   r   Fr  )r   FF([  rG   r  rV  re  enumr   r   r   	itertoolsr   r   typingr   r	   r
   r   r   r   r   rc   torch._primsr  r  torch._prims_common_prims_commonr@   torch.nn.functionalrZ  r  r   r   r   r   torch._decompr   r  r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   torch.utilsr   r>   torch.utils._pytreer   rV  DispatchKeyr   str__annotations___opsr  r~   r!   r/  r$  rI   r0  compute_only_pw_cast_for_opmathpw_cast_for_opmathINT_TO_FLOATpw_cast_for_int_to_realr}  rQ   rZ   r\   rf   r  rr   fillScalarrx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ri  py_implAutogradCUDA	Generatorr   r   r   r   r   r   r   r(   rw   r   r   r  rR   r  r  r   r  r  r'  r0  r3  r9  r<  r>  r@  rD  rX  r`  slicerq  ru  r{  r~  r  r  r  r  r  r
  r  r  r   CompositeImplicitAutogradAutogradr  r4  r8  rsubr?  rB  rI  rR  r  rc  rg  rj  rf  rv  ro  rx  r  r  r  r  r  r  r  r  r  r  r  unsafe_chunkr  r  r  no_statsr  r  _fused_dropoutr  r  r	  r  r  rI  detachlift
lift_freshr  r  r  r1  r3  r7  _adaptive_avg_pool2drg  rn  rp  rl  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r"  r+  rnn_tanhr   r2  rnn_relur5  r:  r;  r>  rJ  rM  rS  ra  lstmrd  rf  rn  rp  grurr  rt  rv  ry  r{  r|  r  r  r  _unsafe_viewr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r<  r>  r  rF  rR  rN  upsample_bicubic2dr  r  r  r  r  r  r
  r  rZ  r  r  r  r  #_scaled_dot_product_flash_attentionr  r  r  r  r   r  r  rJ   r  r  addbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_r  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__r  r  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__leaky_relu_r   logit_logitrelu_r  renorm_renormround_r-  scatter_r  scatter_add_scatter_addscatter_reduce_scatter_reducesilu_r*   r+   r,   <module>r=     s-@      
  % $ I I I   #   , , 0 7 S S  * (hh"" c zz~~   %!!99! !H #*88@@# 
 uDDLL  "uDDQQ  c f  **+\2F 2v 2   ,2 --.\4v 4& 4   /4 ../\S S6 S S5 S   0S
 ))*\


 
 	

 
 
   +
2 ))*+( ,( ))*+"V " ," (()@f @ @   *@ 112\f F    3 ../\PP%P05P@EP  0P 'GF Gv G   (G //0F & V    1 //0\:F :& :U :  1: 001\LL%L7<LNRL   2L **+\) )f )3 )   ,)< **+5v 5f 5  ,5 		"&v && &   #& **+\>v >V > >   ,>
 **+6 6 66 6 ,6 334%%
% % 66>	% 5% --.&&{'?'?@ +/5
55 5 	5
 5 (5 5   A /5( ../''(@(@A +/X
XX X 	X
 X (X X  B 0X 667



 
 	

 
 
 
 
   8
$ 112\<f <F <F <v <   3<v #   &3<>>3G3G1
1 1-011   '1 ../\11 &1061CF1   01 ++, ^^))	1
11 1 	1   -1 44<<=%/5BEMR  > 44??@
R
R

R 
R 	
R
 
R 
R  A
R 00889		%	/5	BE	NS	  :	 00445
R
R

R 
R 	
R
 
R 
R  6
R$$
$ $ V	$
 $ $ $ $@ ))*\Nf NF N N N   +N$ ../\''
' ' V	'
 ' ' ' '  0'T 001\
  V	
      2B 112  $^^))	1
11 V1 	1
 1   31& ::;\
  $^^))
  V	
     <  --. ^^))111 1 	1   /1 667\
 ^^))			
	 	 		
 	   8	 		") )v )% )  #) ,,-& &F &v &  .& ++,	O	Oc	O 
	O 		O
 
	O 	O  -	O 

))* /?
/? 
/? C=	/?
 
#/? /? +/?d ,,-E Ed3i Ec ERU E  .E
 ../OO&*3iO9<ODGOORO  0O%+:?++ 334\ XX!'X.1X@EX !  5X 778 KK!'K.1K@EK !  9K*& $MMcM 3iM #Y	M
 IM M   %M` $]]c] c] 3i	]
 #Y] I] ]   %]@ 445
 
v 
e 
  6
 ,,-Y
Y"3iY47Y?BYJMYY  .Y" ++334>B

%
,4UO

  5
& %kCCDk2236 e HTN  4 E & ++,VVA& AU A8D> A  -A & S    '* ))*F  T   +, 		(()/f /V /E /& / */ 		(()/f /U /5 / / */ ' $  	
    (& 556  	
   7:DI  --.56
#Cy/2	&\ /: ))0015 5C 5c 5%PSBT 5 25 44<<=67BB $S	B03B
63;B >B 

))*/ /C /c /%:L / +/" --55)) /
/ &/ 
/ 
&\	//. 

# f F # #    $" ../
 

  	
     0" 

# f 6      $ 77??@S&S&S& S& 	S&
 FS& S& S& 
S& S& dS& 8FXf-x/??@S&  AS&n 77;;<  	
 F   
  d ,, ,, ,, 8FXf-x/??@ =88F# x/?  77??@JJJ 3iJ 	J
 J VJ 6
J dJ 8FXf-x/??@J AJ\ 77;;< 3i 	
  V 6
 d ,, ,, ,, 8FXf-x/??@ =4OOVO 6
O 6"	O
 &!O O O 
O O 6668F#3Xf5EEFOd ../UK/((V( 6
( 6"	(
 &!( ( ( 
( 666!"( 0 0(4 ''(<(<=''(M(MN 
 
V 
 6
 
 6"	 

 &! 
  
  
 
 
 666!" 
 O > 
F "";#H#HIG4< G JG AAIIJV 6
 	
   
 666!" K* 55==>((V( 6
( 	(
 ( ( ( 
( 666!"( ?(  55>>?((V( 6
( 	(
 ( 
( 666!"( @( @@HHIKKVK 6
K 	K
 K K K 
K 6666612K JK0 ++,VV   -
 & $(%)37 EKK 
 U\\"   E//0  'J diiAB  C &&{';';<--.VVVV, 6
 6"	
 &!  !&  - / => 77??@``` V` 6"	`
 &!` ` &!` ` 
` d` 68F#Xf%556` A`H 77;;<""" V" 6"	"
 &!" " &!" " 
" d" ,," ,," ,," 68F#Xf%556" ="J 667VVV$  6"	
 &!  v   % 82 112c'v c'E#s(O c'   3c'L ( HH	H H 	H H )H ' II	I I 	I I  (I$ %@%@	%@ %@ 	%@ %@ %@P (()<: <C <
 <J < *< (=* =3 =z =: =  )=@@@$.@8B@PT@* 001Xx (f (vv~)> (  ! 2( % $'$'+/		tS%	  c5 
! (	  &  &; ';
8 //334$$[%J%JK$$[%9%9:@ ; L 5@ 66::;##++K,Q,QR##++K,@,@AG B S <G //334$$[%J%JK$$[%9%9:K ; L 5K 66::;##++K,Q,QR##++K,@,@AR B S <R //334$$[%J%JK$$[%9%9:T ; L 5T 66::;##++K,Q,QR##++K,@,@A B S <@ //778  (()=)=> #>>c> UO> 	>  ? 9> 66>>?''//0D0DE #>>c> UO> 	>  F @> //778  (()=)=> !% $		C	Cc	C uo	C uo		C
 	C  ? 9	C 66>>?''//0D0DE !% $		C	Cc	C uo	C uo		C
 	C  F @	C //778  (()=)=> !% $ $c uo uo	
 uo   ? 9 66>>?''//0D0DE !% $ $c uo uo	
 uo   F @ >/ FK+\&,/-d& R ++,[BBC[112. 3 D -.8 ++,[BBC[112. 3 D -.8 **+KAABK001. 2 C ,.@ **+KAABK001. 2 C ,.@/6<~/d 		(>>?--.S / @ )S> 		'==>,,-S . ? (S<;; &{<<={++,. - > '.6 '==>,,-. . ? (.6 44889!!))+*O*OP!!))+*>*>? @ Q : 00445%%k&K&KL%%k&:&:;N < M 6N 00889!!))+*>*>?
 !% $JJcJ J uo	J
 uoJ J  @ :JZ ..667UCV UCd3i UCF UC  8UCr ))112F v $  3 ,,d.?.?@A  B ++,-" ."2 
2 2  V2  	2 
 2  66>2 j --.X~&L
LL VL 	L
 L 66>L ' /L4 //0X~&L
LL VL 	L
 L 66>L ' 1L/F /u / /5F 5u 5 5 3E F%7 FV F F!Xf% !& !NN#'N05NEJ\\N&f & & &T &"/f / / / /T /"!V !49 !T !$V $49 $T $ 223S StCy S S   4S  c4c4
c4 c4 	c4
 c4 c4 c4L ,,-  
  	
     .   	#   !	# ==>$)..:N:N1  ?1,   , [BBCtW  DtWn //778
  $#::sCx: : e_	:
 e_: :  9:z //334$$[%J%JK$$[%9%9:
 48	WW%S/*W W E%,/0	W
 W   ; L 5
W. %UEu   & $X4 X  %X ,,dkkoo>? $( ==%)
	
 EKK 
 LL	

 U\\"
 
  @
 **+,
 $( ==%)	 EKK 	
 LL U\\"  - 	", #, ../''(<(<= #^^))))) ) 	)
 V) ) )  > 0)X ;;<$$,,44[5I5IJX{#--- - 66>	- $ K =-x @@HHI
 #b "bb	b b 	b
 b b E?b 66663VVVKLb JbJ '   ( ))*9  +9 ))488<<89 $( 		D
	D EKK 	D 
&			D
 	D :	D --t||/?/?@A.& .x} . B. 		==>  ?  t{{ + djj ) djj )  - TYY ' TYY ' $.. 1  / ""D$4$4 5  - !!4?? 3 $.. 1 ##T%6%6 7 t{{ + !!4?? 3  - !!4?? 3 djj ) TYY ' t{{ + djj )  - ""D$4$4 5 %%t':': ; TYY 'r+   