o
    i.                     @   s   d dl mZ d dlmZ d dlmZ d dlZddlm	Z	m
Z
mZmZmZ ddlmZ ddd	d
dZd"ddZG dd deZdd ZG dd deZdd Zdd ZddddZd#ddZG dd deZd d! ZdS )$    )Counter)suppress)
NamedTupleN   )_isin_searchsorteddeviceget_namespacexpxis_scalar_nanFreturn_inversereturn_countsc                C   s&   | j tkrt| ||dS t| ||dS )a  Helper function to find unique values with support for python objects.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : ndarray
        Values to check for unknowns.

    return_inverse : bool, default=False
        If True, also return the indices of the unique values.

    return_counts : bool, default=False
        If True, also return the number of times each unique item appears in
        values.

    Returns
    -------
    unique : ndarray
        The sorted unique values.

    unique_inverse : ndarray
        The indices to reconstruct the original array from the unique array.
        Only provided if `return_inverse` is True.

    unique_counts : ndarray
        The number of times each of the unique values comes up in the original
        array. Only provided if `return_counts` is True.
    r   )dtypeobject_unique_python
_unique_np)valuesr   r    r   d/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/utils/_encode.py_unique   s   
r   c           
      C   s  t | \}}d\}}|r|r|| \}}}}n|r"|| \}}n|r,|| \}}n|| }|jrgt|d rgt||j|d}|d|d  }|rR||||k< |rg|	||d ||< |d|d  }|f}	|rq|	|f7 }	|rx|	|f7 }	t
|	dkr|	d S |	S )zHelper function to find unique values for numpy arrays that correctly
    accounts for nans. See `_unique` documentation for details.)NNxpNr   r   )r	   Z
unique_allZunique_inverseZunique_countsunique_valuessizer   r   nansumlen)
r   r   r   r   _ZinversecountsuniquesZnan_idxretr   r   r   r   =   s.   


r   c                   @   s*   e Zd ZU dZeed< eed< dd ZdS )MissingValuesz'Data class for missing data informationr   nonec                 C   s*   g }| j r
|d | jr|tj |S )z3Convert tuple to a list where None is always first.N)r%   appendr   np)selfoutputr   r   r   to_listj   s   
zMissingValues.to_listN)__name__
__module____qualname____doc__bool__annotations__r*   r   r   r   r   r$   d   s
   
 r$   c                 C   sn   dd | D }|s| t dddfS d|v r)t|dkr"t ddd}nt ddd}nt ddd}| | }||fS )a.  Extract missing values from `values`.

    Parameters
    ----------
    values: set
        Set of values to extract missing from.

    Returns
    -------
    output: set
        Set with missing values extracted.

    missing_values: MissingValues
        Object with missing value information.
    c                 S   s    h | ]}|d u st |r|qS Nr   .0valuer   r   r   	<setcomp>   s    z#_extract_missing.<locals>.<setcomp>F)r   r%   Nr   T)r$   r   )r   Zmissing_values_setZoutput_missing_valuesr)   r   r   r   _extract_missingt   s   r6   c                       s(   e Zd ZdZ fddZdd Z  ZS )_nandictz!Dictionary with support for nans.c                    s6   t  | | D ]\}}t|r|| _ d S q
d S r1   )super__init__itemsr   	nan_value)r(   mappingkeyr4   	__class__r   r   r9      s   z_nandict.__init__c                 C       t | drt|r| jS t|)Nr;   )hasattrr   r;   KeyErrorr(   r=   r   r   r   __missing__      z_nandict.__missing__)r+   r,   r-   r.   r9   rD   __classcell__r   r   r>   r   r7      s    r7   c                    sD   t | |\}}tdd t|D  |j fdd| D t| dS )z,Map values based on its position in uniques.c                 S   s   i | ]\}}||qS r   r   )r3   ivalr   r   r   
<dictcomp>   s    z#_map_to_integer.<locals>.<dictcomp>c                    s   g | ]} | qS r   r   r3   vtabler   r   
<listcomp>       z#_map_to_integer.<locals>.<listcomp>)r   )r	   r7   	enumerateZasarrayr   )r   r"   r   r    r   rL   r   _map_to_integer   s    rQ   c                C   s   zt | }t|\}}t|}||  tj|| jd}W n ty=   tdd t dd | D D }td| w |f}|rK|t	| |f7 }|rU|t
| |f7 }t|dkr_|d S |S )Nr   c                 s   s    | ]}|j V  qd S r1   )r-   )r3   tr   r   r   	<genexpr>   s    z!_unique_python.<locals>.<genexpr>c                 s   s    | ]}t |V  qd S r1   )typerJ   r   r   r   rT      s    zPEncoders require their input argument must be uniformly strings or numbers. Got r   r   )setr6   sortedextendr*   r'   arrayr   	TypeErrorrQ   _get_countsr   )r   r   r   uniques_setZmissing_valuesr"   typesr#   r   r   r   r      s(    r   T)check_unknownc             
   C   s~   t | |\}}|| jds(zt| |W S  ty' } ztd| d}~ww |r8t| |}|r8td| t|| |dS )a  Helper function to encode values into [0, n_uniques - 1].

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.
    The numpy method has the limitation that the `uniques` need to
    be sorted. Importantly, this is not checked but assumed to already be
    the case. The calling method needs to ensure this for all non-object
    values.

    Parameters
    ----------
    values : ndarray
        Values to encode.
    uniques : ndarray
        The unique values in `values`. If the dtype is not object, then
        `uniques` needs to be sorted.
    check_unknown : bool, default=True
        If True, check for values in `values` that are not in `unique`
        and raise an error. This is ignored for object dtype, and treated as
        True in this case. This parameter is useful for
        _BaseEncoder._transform() to avoid calling _check_unknown()
        twice.

    Returns
    -------
    encoded : ndarray
        Encoded values
    numericz%y contains previously unseen labels: Nr   )r	   isdtyper   rQ   rB   
ValueError_check_unknownr   )r   r"   r^   r   r    ediffr   r   r   _encode   s   
re   c                    s  t | |\}}d}|| jdspt| }t|\}}t|t\| }|jo.j }	|jo5j }
fdd |r\|sE|	sE|
rR| fdd| D }n
|jt	| |j
d}t|}|
rg|d |	ro|tj nM|| }tj||d|d	}|r|jrt| ||}n
|jt	| |j
d}|||r||}||r|jr|r|| }d
||< ||  }t|}|r||fS |S )a  
    Helper function to check for unknowns in values to be encoded.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : array
        Values to check for unknowns.
    known_values : array
        Known values. Must be unique.
    return_mask : bool, default=False
        If True, return a mask of the same shape as `values` indicating
        the valid values.

    Returns
    -------
    diff : list
        The unique values present in `values` and not in `know_values`.
    valid_mask : boolean array
        Additionally returned if ``return_mask=True``.

    Nr_   c                    s$   | v p j o
| d u p jot| S r1   )r%   r   r   )r4   )missing_in_uniquesr\   r   r   is_valid  s
   z _check_unknown.<locals>.is_validc                    s   g | ]} |qS r   r   r2   )rg   r   r   rN   &  rO   z"_check_unknown.<locals>.<listcomp>rR   T)assume_uniquer   r   )r	   r`   r   rV   r6   r   r%   rY   Zonesr   r/   listr&   r'   r   r
   Z	setdiff1dr   r   anyisnan)r   Zknown_valuesZreturn_maskr   r    Z
valid_maskZ
values_setZmissing_in_valuesrd   Znan_in_diffZnone_in_diffr   Zdiff_is_nanis_nanr   )rg   rf   r\   r   rb      sL   






rb   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )_NaNCounterz$Counter with support for nan values.c                    s   t  | | d S r1   )r8   r9   _generate_items)r(   r:   r>   r   r   r9   M  s   z_NaNCounter.__init__c                 c   s>    |D ]}t |s|V  qt| dsd| _|  jd7  _qdS )z>Generate items without nans. Stores the nan counts separately.	nan_countr   r   N)r   rA   ro   )r(   r:   itemr   r   r   rn   P  s   
z_NaNCounter._generate_itemsc                 C   r@   )Nro   )rA   r   ro   rB   rC   r   r   r   rD   Z  rE   z_NaNCounter.__missing__)r+   r,   r-   r.   r9   rn   rD   rF   r   r   r>   r   rm   J  s
    
rm   c           
   	   C   s   | j jdv r9t| }tjt|tjd}t|D ]\}}tt	 || ||< W d   n1 s1w   Y  q|S t
| dd\}}tj||dd}t|d r[t|d r[d|d< t||| }	tj|tjd}||	 ||< |S )zGet the count of each of the `uniques` in `values`.

    The counts will use the order passed in by `uniques`. For non-object dtypes,
    `uniques` is assumed to be sorted and `np.nan` is at the end.
    ZOUrR   NT)r   )rh   r   )r   kindrm   r'   Zzerosr   Zint64rP   r   rB   r   isinrk   ZsearchsortedZ
zeros_like)
r   r"   counterr)   rG   rp   r   r!   Zuniques_in_valuesZunique_valid_indicesr   r   r   r[   `  s"   
r[   )FF)F)collectionsr   
contextlibr   typingr   numpyr'   Z
_array_apir   r   r   r	   r
   Z_missingr   r   r   r$   r6   dictr7   rQ   r   re   rb   rm   r[   r   r   r   r   <module>   s"   
)'&
+T