o
    *if                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ dddZh d	Zd
ee dedee fddZG dd de
ZdS )    N)Path)IterableListTupleUnion)Dataset)download_url_to_fileZ@209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4Z@408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027)Bhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7bJhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols>8   z.DOTz"UNQUOTEz)END-PARENSz#POUND-SIGNz)PARENSz)PARENz}RIGHT-BRACEz,COMMAz)END-THE-PARENz)UN-PARENTHESESz.DECIMALz;SEMI-COLON(1)z
"END-QUOTEz)CLOSE-PARENTHESESz/SLASHz(LEFT-PARENz(OPEN-PARENTHESESz--DASHz(BEGIN-PARENSz
)END-PARENz(PARENTHESESz)RIGHT-PARENz"END-OF-QUOTEz"QUOTEz
&AMPERSANDz?QUESTION-MARKz;SEMI-COLONz(PARENz"DOUBLE-QUOTEz#SHARP-SIGNz+PLUSz"CLOSE-QUOTEz'INNER-QUOTEz'END-INNER-QUOTEz%PERCENTz{BRACEz
.FULL-STOPz{LEFT-BRACEz
"IN-QUOTESz{OPEN-BRACEz.PERIODz!EXCLAMATION-POINTz...ELLIPSISz)CLOSE-PARENz
'END-QUOTEz
#HASH-MARKz'SINGLE-QUOTEz(PARENSz)END-PARENTHESESz-DASHz.POINTz(IN-PARENTHESESz-HYPHENz'QUOTEz}CLOSE-BRACEz:COLONlinesexclude_punctuationsreturnc                 C   s   t d}g }| D ]A}|r|drq	| d\}}|tv r7|r#q	|dr+d}n|dr3d}n|d }t |d|}|d}|||f q	|S )	Nz
\([0-9]+\)z;;;z  z...z--r     )recompile
startswithstripsplit_PUNCTUATIONSsubappend)r   r   Z_alt_reZcmudictlinewordZphones r   j/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/torchaudio/datasets/cmudict.py_parse_dictionaryJ   s$   



r   c                   @   s   e Zd ZdZ	ddddddeeef ded	ed
ededdfddZde	de
eee f fddZde	fddZedee fddZdS )CMUDictaZ  *CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.
        exclude_punctuations (bool, optional):
            When enabled, exclude the pronounciation of punctuations, such as
            `!EXCLAMATION-POINT` and `#HASH-MARK`.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional):
            The URL to download the dictionary from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
        url_symbols (str, optional):
            The URL to download the list of symbols from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
    TFr	   r
   )downloadurlurl_symbolsrootr   r   r   r    r   Nc          
      C   sB  || _ t|| _tj| jstd| | jtj| }| jtj| }tj|sC|s7td| t	
|d }t||| tj|s^|sRtd| t	
|d }t||| t|d}	dd |	 D | _W d    n1 sxw   Y  t|ddd}	t|	 | j d	| _W d    d S 1 sw   Y  d S )
Nz#The root directory does not exist; z`The dictionary file is not found in the following location. Set `download=True` to download it. z\The symbol file is not found in the following location. Set `download=True` to download it. rc                 S   s   g | ]}|  qS r   )r   ).0r   r   r   r   
<listcomp>   s    z$CMUDict.__init__.<locals>.<listcomp>zlatin-1)encoding)r   )r   r   Z
_root_pathospathisdirRuntimeErrorbasenameexists
_CHECKSUMSgetr   open	readlines_symbolsr   _dictionary)
selfr!   r   r   r   r    Z	dict_fileZsymbol_fileZchecksumtextr   r   r   __init__{   s<   

"zCMUDict.__init__nc                 C   s
   | j | S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded.

        Returns:
            Tuple of a word and its phonemes

            str:
                Word
            List[str]:
                Phonemes
        )r1   )r2   r5   r   r   r   __getitem__   s   
zCMUDict.__getitem__c                 C   s
   t | jS )N)lenr1   r2   r   r   r   __len__   s   
zCMUDict.__len__c                 C   s
   | j  S )zLlist[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``.)r0   copyr8   r   r   r   symbols   s   
zCMUDict.symbols)T)__name__
__module____qualname____doc__r   strr   boolr4   intr   r   r6   r9   propertyr;   r   r   r   r   r   i   s0    

)r   )r&   r   pathlibr   typingr   r   r   r   Ztorch.utils.datar   Ztorchaudio._internalr   r,   r   r@   rA   r   r   r   r   r   r   <module>   s    <