
    Ph                         d dl mZmZmZ d dlZd dlmZmZ d dlmZ d dl	m
Z
mZmZ d Z G d dej                        Zdd	Zd
 ZddZd Zd Zd Zd Zd Zd Zd Zy)    )ListOptionalTupleN)nnTensor)load_state_dict_from_url)wav2vec2_modelWav2Vec2Modelwavlm_modelc                     t         t        d}| |vr(t        dt        |j	                                d|        ||    } |di |S )N)Wav2Vec2WavLMzSupported model types are z	. Found:  )r	   r   
ValueErrortuplekeys)type_params	factoriesfactorys       oC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torchaudio/pipelines/_wav2vec2/utils.py
_get_modelr   
   sY    "I I5eINN<L6M5NiX]W^_``GV    c                        e Zd ZdZdedededef fdZddedee   d	e	eee   f   fd
Z
ej                  j                  	 	 ddedee   dee   d	e	ee   ee   f   fd       Z xZS )_Wav2Vec2Modelz{Wrapper class for :py:class:`~torchaudio.models.Wav2Vec2Model`.

    This is used for layer normalization at the input
    modelnormalize_waveformapply_log_softmaxappend_starc                 Z    t         |           || _        || _        || _        || _        y N)super__init__r   r   r   r   )selfr   r   r   r   	__class__s        r   r#   z_Wav2Vec2Model.__init__   s-    
"4!2&r   	waveformslengthsreturnc                    | j                   r*t        j                  j                  ||j                        }| j                  ||      \  }}| j                  r+t        j                  j                  j                  |d      }| j                  rWt        j                  d|j                  d      df|j                  |j                        }t        j                  ||fd      }||fS )N)dim   )dtypedevice)r   r   
functional
layer_normshaper   r   torchlog_softmaxr   zerossizer-   r.   cat)r$   r&   r'   outputoutput_lengthsstar_dims         r   forwardz_Wav2Vec2Model.forward"   s    ""00IOOLI!%Iw!?!!XX((44V4DF{{Av{{1~q#9V\VcVcdHYY1r:F~%%r   
num_layersc                     | j                   r*t        j                  j                  ||j                        }| j
                  j                  |||      S r!   )r   r   r/   r0   r1   r   extract_features)r$   r&   r'   r;   s       r   r=   z_Wav2Vec2Model.extract_features-   s@     ""00IOOLIzz**9gzJJr   r!   )NN)__name__
__module____qualname____doc__r
   boolr#   r   r   r   r:   r2   jitexportintr   r=   __classcell__)r%   s   @r   r   r      s    
'm ' 'Z^ 'mq '	& 	&(62B 	&eTZ\dek\lTlNm 	& YY %)$(	KK &!K SM	K
 
tF|Xf--	.K Kr   r   c                     t        | |||      S )z&Add extra transformations to the model)r   )moduler   r   r   s       r   _extend_modelrI   9   s    &"46GUUr   c                     dD ]M  }| |   }t        j                  t        |j                  d            D cg c]  }||vs||    c}      | |<   O y c c}w )N)z
aux.weightzaux.biasr   )r2   stackranger5   )
state_dictaxeskeymatis        r   _remove_aux_axesrR   >   sU     *o++uSXXa[7I&[7I!QVZ]s1v7I&[\
3 *&[s   	A
 A
c                 r    | j                  d      sd|  } |i n|}t        | fi |}|rt        ||       |S )Nhttpsz/https://download.pytorch.org/torchaudio/models/)
startswithr   rR   )url	dl_kwargsremove_axesrM   s       r   _get_state_dictrY   O   sH    >>'"?uE'YI)#;;J[1r   c                       y)N)|ETAONIHSRDLUMWCFGYPBVK'XJQZr   r   r   r   _get_en_labelsrw   Y   s    r   c                       y)N)r[   enrQ   rstadhulgcmobwfkzpv   ü   ä   öj   ßyxqr   r   r   r   _get_de_labelsr   z   s     r   c                       y)N)r[   ry   r}   r   rQ   r~   rz   r|   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _get_vp_en_labelsr          r   c                       y)N)"r[   ry   r~   r   r|   rz   r{   rQ   r   r   r   r}   r   r   r   r   r   r   r   r   r      ór      í   ár   r      ñ   ér      úr   r   r   r   r   r   r   _get_es_labelsr      s    #r   c                       y)N)*r[   ry   r|   rz   rQ   r}   r{   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      àr      èr      êr      ôr      çu   œ   û   ù   î   âr      ï   ër      ær   r   r   r   _get_fr_labelsr      s    +r   c                       y)N)$r[   ry   rQ   r~   r   rz   r}   r{   r   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      ò   ìr   r   r   r   r   r   r   r   r   r   r   r   _get_it_labelsr     s    %r   c                       y)N)r~   rQ   ry   rz   r   r   r}   r|   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rr   r   r   r   r   r   r   _get_mms_labelsr   =  r   r   )FFr!   )typingr   r   r   r2   r   r   torchaudio._internalr   torchaudio.modelsr	   r
   r   r   Moduler   rI   rR   rY   rw   r   r   r   r   r   r   r   r   r   <module>r      sl    ( (   9 H H!KRYY !KHV
]"B!H@$N,^&Rr   