
    Ph                        d dl mZ d dlZd dlmZmZmZ d dlZd dlZej                  j                  d       d dlmc mZ g dZd Z G d de      Zd	Z G d
 d      Zddef	 	 	 	 	 	 	 	 	 ddZy)    )annotationsN)List
NamedTupleUnionlibctc_prefix_decoder)CUCTCHypothesisCUCTCDecodercuda_ctc_decoderc                    g }t        | dd      5 }|D ]4  }|j                         j                         }|j                  |d          6 	 d d d        |S # 1 sw Y   |S xY w)Nrzutf-8)encodingr   )openstripsplitappend)
vocab_filevocabflines       vC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torchaudio/models/decoder/_cuda_ctc_decoder.py_get_vocab_listr      s\    E	j#	0AD::<%%'DLLa!  
1 L	 
1 Ls   :AA c                  4    e Zd ZU dZded<   	 ded<   	 ded<   y)	r   zSRepresents hypothesis generated by CUCTC beam search decoder :class:`CUCTCDecoder`.z	List[int]tokens	List[str]wordsfloatscoreN)__name__
__module____qualname____doc____annotations__     r   r   r      s#    ^h L+r$   r   gffffff?c                  H    e Zd ZdZdddedf	 	 	 	 	 	 	 	 	 	 	 d	dZd Zd
dZy)r	   zCUDA CTC beam search decoder.

    .. devices:: CUDA

    Note:
        To build the decoder, please use the factory function :func:`cuda_ctc_decoder`.
    r   
      Nc                   |r9t        |t        j                  j                  j                        st        d      |r|j                  n't        j                  j                         j                  }t        j                  |      | _
        t        j                  dt        j                  t        j                  d            | _        |dk7  rt        d      || _        || _        d| _        || _        |dk\  r|dk  st        d      t'        j(                  |      | _        t-        |t/        |            | _        y)	ay  
        Args:
            blank_id (int): token id corresopnding to blank, only support 0 for now. (Default: 0)
            vocab_list (List[str]): list of vocabulary tokens
            beam_size (int, optional): max number of hypos to hold after each decode step (Default: 10)
            nbest (int): number of best decodings to return
            blank_skip_threshold (float):
                skip frames if log_prob(blank) > log(blank_skip_threshold), to speed up decoding.
                (Default: 0.95).
            cuda_stream (torch.cuda.streams.Stream): using assigned cuda stream (Default: using default stream)

        z-cuda_stream must be torch.cuda.streams.Streamr   cudadtypedevicezblank_id must be 0r'   z,blank_skip_threshold must be between 0 and 1N)
isinstancetorchr)   streamsStreamAssertionErrorcuda_streamcurrent_streamcuctcprefixCTC_allocinternal_dataemptyint8r,   memoryblank_id
vocab_listspace_idnbestmathlogblank_skip_thresholdminlen	beam_size)selfr;   r:   rC   r=   r@   r2   cuda_stream_s           r   __init__zCUCTCDecoder.__init__3   s    * k5::+=+=+D+DE$%TUU2={..5::C\C\C^CjCj"22<@kk!5::ell6>RSq= !566 $
$).Ba.G !OPP$(HH-A$B!YJ8r$   c                P    t          t        j                  | j                         y y )N)r4   prefixCTC_freer6   )rD   s    r   __del__zCUCTCDecoder.__del__Y   s!      !3!34 r$   c                   |j                   t        j                  k(  st        d      |j                   t        j                  k(  st        d      |j
                  r|j
                  st        d      |j                         r|j                         st        d      t        j                  | j                  | j                  j                         | j                  j                  d      |j                         |j                         |j                         |j                         | j                  | j                  | j                   | j"                        \  }}|dkD  r t        j$                  |t        j&                  |j(                        j+                         | _
        t        j                  | j                  | j                  j                         | j                  j                  d      |j                         |j                         |j                         |j                         | j                  | j                  | j                   | j"                        \  }}t-        |      }g }t/        |      D ]x  }|j1                  t/        | j2                        D 	
cg c]F  }	t5        ||   |	   d   ||   |	   d   D 
cg c]  }
| j6                  |
    c}
||   |	   d         H c}
}	       z |S c c}
w c c}
}	w )	a  
        Args:
            log_prob (torch.FloatTensor): GPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
                probability distribution over labels; log_softmax(output of acoustic model).
            lengths (dtype torch.int32): GPU tensor of shape `(batch, )` storing the valid length of
                in time axis of the output Tensor in each batch.

        Returns:
            List[List[CUCTCHypothesis]]:
                List of sorted best hypotheses for each audio sequence in the batch.
        z$encoder_out_lens must be torch.int32zlog_prob must be torch.float32zinputs must be cuda tensorsz input tensors must be contiguousr   r*   r'   )r   r   r   )r+   r.   int32r1   float32is_cudais_contiguousr4   $ctc_beam_search_decoder_batch_gpu_v2r6   r9   data_ptrsizestriderC   r:   r<   r@   r7   r8   r,   
contiguousrB   ranger   r=   r   r;   )rD   log_probencoder_out_lensrequired_size
score_hyps_
batch_sizehyposijword_ids              r   __call__zCUCTCDecoder.__call__]   s|     %%4 !GHH~~. !ABB  %5%=%= !>??&&(-=-K-K-M !CDD$)$N$NKK  "KKQ%%'MMOOONNMMMM%%%
!z 1++m5::hoo^iikDK!FF""$$&  #!!# ))+!))MAz _
z"ALL #4::. / $)!}Q/2GQRS}UVGWXYGZ[GZGtw7GZ[(mA.q1
 /	 #  \s    !K<K7K<7K<)r;   r   r:   intrC   r`   r=   r`   r@   r   r2   ztorch.cuda.streams.Stream)rU   torch.TensorrV   ra   )r   r   r    r!   _DEFAULT_BLANK_SKIP_THREASHOLDrF   rI   r_   r#   r$   r   r	   r	   *   sb     &D15$9$9 $9 	$9
 $9 $$9 /$9L5=r$   r	   r'   r&   c                Z    t        |       t        k(  rt        |       } t        | |||      S )a  Builds an instance of :class:`CUCTCDecoder`.

    Args:
        tokens (str or List[str]): File or list containing valid tokens.
            If using a file, the expected format is for tokens mapping to the same index to be on the same line
        beam_size (int, optional): The maximum number of hypos to hold after each decode step (Default: 10)
        nbest (int): The number of best decodings to return
        blank_id (int): The token ID corresopnding to the blank symbol.
        blank_skip_threshold (float): skip frames if log_prob(blank) > log(blank_skip_threshold), to speed up decoding
            (Default: 0.95).

    Returns:
        CUCTCDecoder: decoder

    Example
        >>> decoder = cuda_ctc_decoder(
        >>>     vocab_file="tokens.txt",
        >>>     blank_skip_threshold=0.95,
        >>> )
        >>> results = decoder(log_probs, encoder_out_lens) # List of shape (B, nbest) of Hypotheses
    )r;   rC   r=   r@   )typestrr   r	   )r   r=   rC   r@   s       r   r
   r
      s,    6 F|s (6Yebvwwr$   )
r   zUnion[str, List[str]]r=   r`   rC   r`   r@   r   returnr	   )
__future__r   r>   typingr   r   r   r.   
torchaudio
_extension	_load_lib!torchaudio.lib.pybind11_prefixctclibpybind11_prefixctcr4   __all__r   r   rb   r	   r
   r#   r$   r   <module>rp      s    "  * *   
     7 8 1 1 B
,j 
, "& p pj "@	x!xx x  	x
 xr$   