
    PhP                       d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZ d dlZd dlmZmZmZmZmZmZmZmZ m!Z"m#Z$ d dl%m&Z'm(Z)m*Z+ d dl,m-Z- 	 d d	l.m/Z0 g d
Z2 edg d      Z3d Z4d Z5 G d de      Z6 G d de      Z7 G d de      Z8 G d d      Z9dddddddd  e:d      d ddddf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"dZ;d#d Z<d#d!Z=y# e1$ r 	 d d	lm/Z0 n# e1$ r dZ0Y nw xY wY w xY w)$    )annotationsN)abstractmethod)
namedtuple)DictList
NamedTupleOptionalTupleUnion)
CriterionTypeLexiconDecoderLexiconDecoderOptionsLexiconFreeDecoderLexiconFreeDecoderOptionsLMLMStateSmearingModeTrieZeroLM)create_word_dict
Dictionary
load_words)download_asset)KenLM)CTCHypothesis
CTCDecoderCTCDecoderLMCTCDecoderLMStatectc_decoderdownload_pretrained_filesPretrainedFileslexicontokenslmc                   | j                         }t        ||      }|j                  d      }|j                         D ]c  \  }}	|j	                  |      }
|j                  ||
      \  }}|	D ]3  }|D cg c]  }| j	                  |       }}|j                  ||
|       5 e |j                  t        j                         |S c c}w )NF)

index_size_Triestartitems	get_indexscoreinsertsmear_SmearingModeMAX)tokens_dict	word_dictr#   r%   silence
vocab_sizetriestart_stateword	spellingsword_idx_r,   spellingtokenspelling_idxs                   qC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torchaudio/models/decoder/_ctc_decoder.py_construct_trier?   2   s    '')JW%D((5/K"==?i&&t,88K25!HFNOhUK11%8hLOKKh6 " + 	JJ}  !K Ps   3Cc                .   d }|t        |      }| r|t        |       }|S | sm|kt        |      t        k(  rYt	        |j                               D ci c]%  }|j                  |      |j                  |      gg' }}|gg||<   t        |      }|S c c}w N)_Dictionary_create_word_dicttypestrranger'   	get_entry)r#   r%   lm_dictr1   unk_wordr2   ids           r>   _get_word_dictrL   A   s    I(	9$%g.	  *tBx3MRS^SiSiSkMlmMl[""1%)>)>q)A(B'CCMlm zl(%a(		 ns   *Bc                  @    e Zd ZU dZded<   	 ded<   	 ded<   	 ded	<   y
)r   zORepresents hypothesis generated by CTC beam search decoder :class:`CTCDecoder`.torch.LongTensorr$   z	List[str]wordsfloatr,   torch.IntTensor	timestepsN)__name__
__module____qualname____doc____annotations__     r>   r   r   P   s,    Zh L+nrY   r   c                  B     e Zd ZdZed fd       Zd fdZddZ xZS )r   zLanguage model state.c                    t         |   S )zMap of indices to LM states)superchildren)self	__class__s    r>   r]   zCTCDecoderLMState.childrenh   s     wrY   c                "    t         |   |      S )a!  Returns child corresponding to usr_index, or creates and returns a new state if input index
        is not found.

        Args:
            usr_index (int): index corresponding to child state

        Returns:
            CTCDecoderLMState: child state corresponding to usr_index
        )r\   child)r^   	usr_indexr_   s     r>   ra   zCTCDecoderLMState.childm   s     w}Y''rY   c                     y)zCompare two language model states.

        Args:
            state (CTCDecoderLMState): LM state to compare against

        Returns:
            int: 0 if the states are the same, -1 if self is less, +1 if self is greater.
        NrX   r^   states     r>   comparezCTCDecoderLMState.comparey   s     	rY   )returnzDict[int, CTCDecoderLMState])rb   intrg   r   )re   r   rg   r   )	rS   rT   rU   rV   propertyr]   ra   rf   __classcell__)r_   s   @r>   r   r   e   s"       
(	rY   r   c                  F    e Zd ZdZedd       Zedd       Zedd       Zy)	r   zVLanguage model base class for creating custom language models to use with the decoder.c                    t         )zInitialize or reset the language model.

        Args:
            start_with_nothing (bool): whether or not to start sentence with sil token.

        Returns:
            CTCDecoderLMState: starting state
        NotImplementedError)r^   start_with_nothings     r>   r)   zCTCDecoderLM.start   s
     "!rY   c                    t         )ax  Evaluate the language model based on the current LM state and new word.

        Args:
            state (CTCDecoderLMState): current LM state
            usr_token_idx (int): index of the word

        Returns:
            (CTCDecoderLMState, float)
                CTCDecoderLMState:
                    new LM state
                float:
                    score
        rm   )r^   re   usr_token_idxs      r>   r,   zCTCDecoderLM.score   s
     "!rY   c                    t         )a8  Evaluate end for language model based on current LM state.

        Args:
            state (CTCDecoderLMState): current LM state

        Returns:
            (CTCDecoderLMState, float)
                CTCDecoderLMState:
                    new LM state
                float:
                    score
        rm   rd   s     r>   finishzCTCDecoderLM.finish   s
     "!rY   N)ro   boolrg   r   )re   r   rq   rh   rg   Tuple[CTCDecoderLMState, float])re   r   rg   ru   )rS   rT   rU   rV   r   r)   r,   rs   rX   rY   r>   r   r      s?    `	" 	" " "  " "rY   r   c                      e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZd Zd ZddZ	ddZ
dd	Z	 d	 	 	 	 	 ddZddZy
)r   zCTC beam search decoder from *Flashlight* :cite:`kahn2022flashlight`.

    .. devices:: CPU

    Note:
        To build the decoder, please use the factory function :func:`ctc_decoder`.
    c
           
        || _         || _        || _        | j                  j                  |      | _        | j                  j                  |      }
g }|rKt        |||||
      }|j                  |	      }	d}t        ||||
| j                  |	||      | _        || _	        yt        |||
| j                  |      | _        || _	        y)a  
        Args:
            nbest (int): number of best decodings to return
            lexicon (Dict or None): lexicon mapping of words to spellings, or None for lexicon-free decoder
            word_dict (_Dictionary): dictionary of words
            tokens_dict (_Dictionary): dictionary of tokens
            lm (CTCDecoderLM): language model. If using a lexicon, only word level LMs are currently supported
            decoder_options (_LexiconDecoderOptions or _LexiconFreeDecoderOptions):
                parameters used for beam search decoding
            blank_token (str): token corresopnding to blank
            sil_token (str): token corresponding to silence
            unk_word (str): word corresponding to unknown
        FN)
nbestr2   r1   r+   blankr?   _LexiconDecoderdecoder_LexiconFreeDecoderr%   )r^   rx   r#   r2   r1   r%   decoder_optionsblank_token	sil_tokenrI   r3   transitionsr5   token_lms                 r>   __init__zCTCDecoder.__init__   s    4 
"&%%//<
"",,Y7";	7BPD **84HH*

	DL"  /GTZZYdeDL rY   c                     d t        j                  |      D        }t         fd|      }t        j                  t        |            S )Nc              3  &   K   | ]	  }|d      yw)r   NrX   ).0gs     r>   	<genexpr>z)CTCDecoder._get_tokens.<locals>.<genexpr>   s     /.!.s   c                "    | j                   k7  S rA   )ry   )xr^   s    r>   <lambda>z(CTCDecoder._get_tokens.<locals>.<lambda>   s    TZZrY   )itgroupbyfiltertorch
LongTensorlist)r^   idxss   ` r>   _get_tokenszCTCDecoder._get_tokens   s8    /bjj.//6T
++rY   c                    g }t        |      D ]7  \  }}|| j                  k(  r|dk(  s|||dz
     k7  s'|j                  |       9 t        j                  |      S )z8Returns frame numbers corresponding to non-blank tokens.r      )	enumeratery   appendr   	IntTensor)r^   r   rR   rJ   idxs        r>   _get_timestepszCTCDecoder._get_timesteps   s_     	oFAsdjj AvQU+  #	 &
 y))rY   c                8    | j                   j                          y)a  Initialize the internal state of the decoder.

        See :py:meth:`decode_step` for the usage.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.
        N)r{   decode_beginr^   s    r>   r   zCTCDecoder.decode_begin  s     	!!#rY   c                8    | j                   j                          y)a  Finalize the internal state of the decoder.

        See :py:meth:`decode_step` for the usage.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.
        N)r{   
decode_endr   s    r>   r   zCTCDecoder.decode_end  s     	!rY   c                   |j                   t        j                  k7  rt        d      |j                  st        d      |j                         st        d      |j                  dk7  rt        d|j                         |j                         \  }}| j                  j                  |j                         ||       y)a  Perform incremental decoding on top of the curent internal state.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.

        Args:
            emissions (torch.FloatTensor): CPU tensor of shape `(frame, num_tokens)` storing sequences of
                probability distribution over labels; output of acoustic model.

        Example:
            >>> decoder = torchaudio.models.decoder.ctc_decoder(...)
            >>> decoder.decode_begin()
            >>> decoder.decode_step(emission1)
            >>> decoder.decode_step(emission2)
            >>> decoder.decode_end()
            >>> result = decoder.get_final_hypothesis()
        emissions must be float32.emissions must be a CPU tensor.emissions must be contiguous.   zemissions must be 2D. Found N)dtyper   float32
ValueErroris_cpuRuntimeErroris_contiguousndimshapesizer{   decode_stepdata_ptr)r^   	emissionsTNs       r>   r   zCTCDecoder.decode_step   s    ( ??emm+9::@AA&&(>??>>Q!=ioo=NOPP~~1  !3!3!5q!<rY   c                <   |D cg c]  }t        | j                  |j                        |j                  D cg c]#  }|dk\  s	| j                  j                  |      % c}|j                  | j                  |j                               c}}S c c}w c c}}w )Nr   )r$   rO   r,   rR   )r   r   r$   rO   r2   rG   r,   r   )r^   resultsresultr   s       r>   _to_hypozCTCDecoder._to_hypoC  s     "
 " ''6<BLLSLqAQRFt~~//2LSll--fmm<	 "
 	
 T
s   2B
BB /BBc                r    | j                   j                         }| j                  |d| j                         S )a9  Get the final hypothesis

        Returns:
            List[CTCHypothesis]:
                List of sorted best hypotheses.

        .. note::

           This method is required only when performing online decoding.
           It is not necessary when performing batch decoding with :py:meth:`__call__`.
        N)r{   get_all_final_hypothesisr   rx   )r^   r   s     r>   get_final_hypothesiszCTCDecoder.get_final_hypothesisN  s0     ,,779}}W\tzz233rY   Nc                   |j                   t        j                  k7  rt        d      |j                  st        d      |j                         st        d      |j                  dk7  rt        d|j                         ||j                  st        d      |j                         \  }}}|t        j                  |f|      }d}g }t        |      D ]w  }|j                         ||z  |j                  d	      z  z   }	| j                  j                  |	||   |      }
|j!                  | j#                  |
d| j$                                y |S )
a  
        Performs batched offline decoding.

        .. note::

           This method performs offline decoding in one go. To perform incremental decoding,
           please refer to :py:meth:`decode_step`.

        Args:
            emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
                probability distribution over labels; output of acoustic model.
            lengths (Tensor or None, optional): CPU tensor of shape `(batch, )` storing the valid length of
                in time axis of the output Tensor in each batch.

        Returns:
            List[List[CTCHypothesis]]:
                List of sorted best hypotheses for each audio sequence in the batch.
        r   r   r      zemissions must be 3D. Found Nzlengths must be a CPU tensor.   r   )r   r   r   r   r   r   r   r   r   r   fullrF   r   strider{   decoder   r   rx   )r^   r   lengthsBr   r   float_byteshyposbemissions_ptrr   s              r>   __call__zCTCDecoder.__call__]  s5   , ??emm+9::@AA&&(>??>>Q!=ioo=NOPPw~~>??.."1a?jj!q)GqA%..0;?YEUEUVWEX3XXMll))-QGGLLw|'<=>  rY   c                z    |D cg c]+  }| j                   j                  |j                               - c}S c c}w )z
        Map raw token IDs into corresponding tokens

        Args:
            idxs (LongTensor): raw token IDs generated from decoder

        Returns:
            List: tokens corresponding to the input IDs
        )r1   rG   item)r^   r   r   s      r>   idxs_to_tokenszCTCDecoder.idxs_to_tokens  s5     CGG$3  **388:6$GGGs   08)rx   rh   r#   zOptional[Dict]r2   rB   r1   rB   r%   r   r}   z9Union[_LexiconDecoderOptions, _LexiconFreeDecoderOptions]r~   rE   r   rE   rI   rE   rg   None)r   rQ   rg   rN   )r   rQ   rg   rQ   )r   torch.FloatTensor)rg   zList[CTCHypothesis]rA   )r   r   r   zOptional[torch.Tensor]rg   zList[List[CTCHypothesis]])r   rN   rg   r   )rS   rT   rU   rV   r   r   r   r   r   r   r   r   r   r   rX   rY   r>   r   r      s    77  7 	7
 !7 7 S7 7 7 7 
7r,
	*
$
"!=F	
4  OS0*05K0	"0d
HrY   r   r   2   r   z-infF-|z<unk>c                   |t        |      t        urt        d      t        |      }| r@t	        |       } t        ||xs |j                         |||	|
||t        j                  	      }n2t        ||xs |j                         ||||t        j                        }t        | ||||      }t        |      t        k(  rt        t        d      t        ||      }n|
t               }t        || |||||||	      S )aY	  Builds an instance of :class:`CTCDecoder`.

    Args:
        lexicon (str or None): lexicon file containing the possible words and corresponding spellings.
            Each line consists of a word and its space separated spelling. If `None`, uses lexicon-free
            decoding.
        tokens (str or List[str]): file or list containing valid tokens. If using a file, the expected
            format is for tokens mapping to the same index to be on the same line
        lm (str, CTCDecoderLM, or None, optional): either a path containing KenLM language model,
            custom language model of type `CTCDecoderLM`, or `None` if not using a language model
        lm_dict (str or None, optional): file consisting of the dictionary used for the LM, with a word
            per line sorted by LM index. If decoding with a lexicon, entries in lm_dict must also occur
            in the lexicon file. If `None`, dictionary for LM is constructed using the lexicon file.
            (Default: None)
        nbest (int, optional): number of best decodings to return (Default: 1)
        beam_size (int, optional): max number of hypos to hold after each decode step (Default: 50)
        beam_size_token (int, optional): max number of tokens to consider at each decode step.
            If `None`, it is set to the total number of tokens (Default: None)
        beam_threshold (float, optional): threshold for pruning hypothesis (Default: 50)
        lm_weight (float, optional): weight of language model (Default: 2)
        word_score (float, optional): word insertion score (Default: 0)
        unk_score (float, optional): unknown word insertion score (Default: -inf)
        sil_score (float, optional): silence insertion score (Default: 0)
        log_add (bool, optional): whether or not to use logadd when merging hypotheses (Default: False)
        blank_token (str, optional): token corresponding to blank (Default: "-")
        sil_token (str, optional): token corresponding to silence (Default: "|")
        unk_word (str, optional): word corresponding to unknown (Default: "<unk>")

    Returns:
        CTCDecoder: decoder

    Example
        >>> decoder = ctc_decoder(
        >>>     lexicon="lexicon.txt",
        >>>     tokens="tokens.txt",
        >>>     lm="kenlm.bin",
        >>> )
        >>> results = decoder(emissions) # List of shape (B, nbest) of Hypotheses
    z!lm_dict must be None or str type.)		beam_sizebeam_size_tokenbeam_threshold	lm_weight
word_score	unk_score	sil_scorelog_addcriterion_type)r   r   r   r   r   r   r   zflashlight-text is installed, but KenLM is not installed. Please refer to https://github.com/kpu/kenlm#python-module for how to install it.)	rx   r#   r2   r1   r%   r}   r~   r   rI   )rD   rE   r   rB   _load_words_LexiconDecoderOptionsr'   _CriterionTypeCTC_LexiconFreeDecoderOptionsrL   _KenLMr   _ZeroLMr   )r#   r$   r%   rH   rx   r   r   r   r   r   r   r   r   r~   r   rI   r1   r}   r2   s                      r>   r   r     s"   r tG}C7<==f%K g&0+G{/E/E/G)!)--

 5+G{/E/E/G))--
 wG[(KIBx3>d  B	"	Y'
 
rY   c                v    | dvrt        |  d      d|  }t        | d| d| dk7  r
| d      S d       S )	N)librispeechzlibrispeech-3-gramzlibrispeech-4-gramzZ not supported. Must be one of ['librispeech-3-gram', 'librispeech-4-gram', 'librispeech']zdecoder-assets/z/lexicon.txtz/tokens.txtr   z/lm.binr"   )r   _PretrainedFiles)modelprefixs     r>   _get_filenamesr     ss    OOgop
 	
 ug&F(,'%!&-!7fXW  >B rY   c                    t        |       }t        |j                        }t        |j                        }|j                  t        |j                        }nd}t        |||      S )aM  
    Retrieves pretrained data files used for :func:`ctc_decoder`.

    Args:
        model (str): pretrained language model to download.
            Valid values are: ``"librispeech-3-gram"``, ``"librispeech-4-gram"`` and ``"librispeech"``.

    Returns:
        Object with the following attributes

            * ``lm``: path corresponding to downloaded language model,
              or ``None`` if the model is not associated with an lm
            * ``lexicon``: path corresponding to downloaded lexicon file
            * ``tokens``: path corresponding to downloaded tokens file
    Nr"   )r   r   r#   r$   r%   r   )r   fileslexicon_filetokens_filelm_files        r>   r    r      s]    " 5!E!%--0L .Kxx * rY   )"r#   Optional[str]r$   zUnion[str, List[str]]r%   zUnion[str, CTCDecoderLM]rH   r   rx   rh   r   rh   r   zOptional[int]r   rP   r   rP   r   rP   r   rP   r   rP   r   rt   r~   rE   r   rE   rI   rE   rg   r   )r   rE   rg   r   )>
__future__r   	itertoolsr   abcr   collectionsr   typingr   r   r   r	   r
   r   r   flashlight.lib.text.decoderr   r   r   rz   r   r   r   r|   r   r   r   _LMr   _LMStater   r/   r   r(   r   r   flashlight.lib.text.dictionaryr   rC   r   rB   r   r   torchaudio.utilsr   !flashlight.lib.text.decoder.kenlmr   r   	Exception__all__r   r?   rL   r   r   r   r   rP   r   r   r    rX   rY   r>   <module>r      s   "   " A A    
 ,A /1LM oJ o* @."3 ."bcH cHR $(!%)V}!nn!n 	!n 	n
 n n #n n n n n n n n n  !n" #nbw  ? s6   C C:#C*)C:*C41C:3C44C:9C: