o
    i<X                     @   s^  d Z ddlZddlmZmZ ddlmZmZmZm	Z	 ddl
mZmZmZ ddlZddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZmZmZmZ e e!Z"eddddZ#eddddZ$eddddeddddeddddfZ%	d<d d!Z&d"d# Z'	d=d%d&Z(ee)edgd'geeddd(d)dgeeddd*d)dgd'ge*edgd'gd'geeddd*d)geed+dd(d)gd,
dd-ddd.dd$e+d/d0e+d1d2fdd$ddd,
d3d4Z,	d>d5d6Z-eeh d7ge)edgd'geeddd(d)dgd'ge*edgd'geeddd*d)geed+dd(d)gd8	dd-d9ddd.d$e+d/d0e+d1d2fdddd8	d:d;Z.dS )?zLabeled Faces in the Wild (LFW) dataset

This dataset is a collection of JPEG pictures of famous people collected
over the internet, all details are available on the official website:

    http://vis-www.cs.umass.edu/lfw/
    N)IntegralReal)PathLikelistdirmakedirsremove)existsisdirjoin)Memory   )Bunch)HiddenInterval
StrOptionsvalidate_params)tarfile_extractall   )RemoteFileMetadata_fetch_remoteget_data_home
load_descrzlfw.tgzz.https://ndownloader.figshare.com/files/5976018Z@055f7d9c632d7370e6fb4afc7468d40f970c34a80d4c6f50ffec63f5a8d536c0)filenameurlZchecksumzlfw-funneled.tgzz.https://ndownloader.figshare.com/files/5976015Z@b47c8422c8cded889dc5a13418c4bc2abbda121092b3533a83306f90d900100apairsDevTrain.txtz.https://ndownloader.figshare.com/files/5976012Z@1d454dada7dfeca0e7eab6f65dc4e97a6312d44cf142207be28d688be92aabfapairsDevTest.txtz.https://ndownloader.figshare.com/files/5976009Z@7cb06600ea8b2814ac26e946201cdb304296262aad67d046a16a7ec85d0ff87c	pairs.txtz.https://ndownloader.figshare.com/files/5976006Z@ea42330c62c92989f9d7c03237ed5d591365e89b3e649747777b70e692dc1592T         ?c                 C   s<  t | d} t| d}t|st| tD ]$}t||j}t|s8|r2td|j t	||||d qt
d| q|rCt|d}t}	nt|d}t}	t|st||	j}
t|
sp|rjtd|	j t	|	|||d nt
d|
 d	d
l}td| ||
d}t||d W d
   n1 sw   Y  t|
 ||fS )z0Helper function to download any missing LFW data)	data_homelfw_homezDownloading LFW metadata: %s)dirname	n_retriesdelayz%s is missingZlfw_funneledZlfwz!Downloading LFW data (~200MB): %sr   Nz$Decompressing the data archive to %szr:gz)path)r   r
   r   r   TARGETSr   loggerinfor   r   OSErrorFUNNELED_ARCHIVEARCHIVEtarfiledebugopenr   r   )r   funneleddownload_if_missingr"   r#   r    targetZtarget_filepathdata_folder_patharchivearchive_pathr+   fp r5   d/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/datasets/_lfw.py_check_fetch_lfwM   sF   

	

r7   c                 C   s  zddl m} W n ty   tdw tddtddf}|du r%|}ntdd t||D }|\}}|j|j |jp>d }|j|j |jpId }	|dur_t	|}t
|| }t
||	 }	t| }
|sqtj|
||	ftjd	}ntj|
||	d
ftjd	}t| D ]U\}}|d dkrtd|d |
 ||}||j|j|j|jf}|dur||	|f}tj|tjd	}|jdkrtd| |d }|s|jdd}|||df< q|S )zInternally used to load imagesr   )ImagezThe Python Imaging Library (PIL) is required to load data from jpeg files. Please refer to https://pillow.readthedocs.io/en/stable/installation.html for installing PIL.   Nc                 s   s    | ]	\}}|p	|V  qd S )Nr5   ).0sZdsr5   r5   r6   	<genexpr>   s    z_load_imgs.<locals>.<genexpr>r   Zdtyper   i  zLoading face #%05d / %05dzLFailed to read the image file %s, Please make sure that libjpeg is installedg     o@r   )Zaxis.)ZPILr8   ImportErrorslicetuplezipstopstartstepfloatintlennpzerosZfloat32	enumerater&   r,   r-   cropresizeZasarrayndimRuntimeErrormean)
file_pathsslice_colorrL   r8   Zdefault_sliceZh_sliceZw_slicehwn_facesfacesi	file_pathZpil_imgZfacer5   r5   r6   
_load_imgs   sV   


rY   Fc                    s   g g }}t t| D ]4}t| | t sq fddt t D }t|}	|	|kr?|dd}||g|	  || qt|}
|
dkrNtd| t	|}t
||}t||||}t|
}tjd| || || }}|||fS )z~Perform the actual data loading for the lfw people dataset

    This operation is meant to be cached by a joblib wrapper.
    c                    s   g | ]}t  |qS r5   )r
   )r:   fZfolder_pathr5   r6   
<listcomp>   s    z%_fetch_lfw_people.<locals>.<listcomp>_ r   z*min_faces_per_person=%d is too restrictive*   )sortedr   r
   r	   rG   replaceextend
ValueErrorrH   uniqueZsearchsortedrY   ZarangerandomZRandomStateshuffle)r1   rQ   rR   rL   min_faces_per_personZperson_namesrP   Zperson_namepathsZ
n_picturesrU   target_namesr0   rV   indicesr5   r[   r6   _fetch_lfw_people   s0   
	




rk   booleanZneither)closedleftg        )
r   r.   rL   rg   rR   rQ   r/   
return_X_yr"   r#   )Zprefer_skip_nested_validationg      ?F      N      c        
         C   s   t | ||||	d\}
}td|
 t|
ddd}|t}||||||d\}}}|t|d}td}|r;||fS t	|||||d	S )
a|  Load the Labeled Faces in the Wild (LFW) people dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                5749
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    For a usage example of this dataset, see
    :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`.

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    data_home : str or path-like, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    funneled : bool, default=True
        Download and use the funneled variant of the dataset.

    resize : float or None, default=0.5
        Ratio used to resize the each face picture. If `None`, no resizing is
        performed.

    min_faces_per_person : int, default=None
        The extracted dataset will only retain pictures of people that have at
        least `min_faces_per_person` different pictures.

    color : bool, default=False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    return_X_y : bool, default=False
        If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch
        object. See below for more information about the `dataset.data` and
        `dataset.target` object.

        .. versionadded:: 0.20

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

        .. versionadded:: 1.5

    delay : float, default=1.0
        Number of seconds between retries.

        .. versionadded:: 1.5

    Returns
    -------
    dataset : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : numpy array of shape (13233, 2914)
            Each row corresponds to a ravelled face image
            of original size 62 x 47 pixels.
            Changing the ``slice_`` or resize parameters will change the
            shape of the output.
        images : numpy array of shape (13233, 62, 47)
            Each row is a face image corresponding to one of the 5749 people in
            the dataset. Changing the ``slice_``
            or resize parameters will change the shape of the output.
        target : numpy array of shape (13233,)
            Labels associated to each face image.
            Those labels range from 0-5748 and correspond to the person IDs.
        target_names : numpy array of shape (5749,)
            Names of all persons in the dataset.
            Position in array corresponds to the person ID in the target array.
        DESCR : str
            Description of the Labeled Faces in the Wild (LFW) dataset.

    (data, target) : tuple if ``return_X_y`` is True
        A tuple of two ndarray. The first containing a 2D array of
        shape (n_samples, n_features) with each row representing one
        sample and each column representing the features. The second
        ndarray of shape (n_samples,) containing the target samples.

        .. versionadded:: 0.20

    Examples
    --------
    >>> from sklearn.datasets import fetch_lfw_people
    >>> lfw_people = fetch_lfw_people()
    >>> lfw_people.data.shape
    (13233, 2914)
    >>> lfw_people.target.shape
    (13233,)
    >>> for name in lfw_people.target_names[:5]:
    ...    print(name)
    AJ Cook
    AJ Lamas
    Aaron Eckhart
    Aaron Guiel
    Aaron Patterson
    r   r.   r/   r"   r#   z Loading LFW people faces from %s   r   locationcompressverbose)rL   rg   rR   rQ   lfw.rst)dataZimagesr0   ri   DESCR)
r7   r&   r,   r   cacherk   reshaperG   r   r   )r   r.   rL   rg   rR   rQ   r/   ro   r"   r#   r    r1   m	load_funcrV   r0   ri   Xfdescrr5   r5   r6   fetch_lfw_people   s2    


r   c              
   C   s  t | d}dd |D }W d   n1 sw   Y  dd |D }t|}tj|td}	t }
t|D ]\}}t|dkr\d|	|< |d	 t|d d f|d	 t|d
 d ff}n-t|dkrd	|	|< |d	 t|d d f|d
 t|d d ff}n
td|d |f t|D ]3\}\}}zt||}W n t	y   t|t
|d}Y nw ttt|}t||| }|
| qq5t|
|||}t|j}|d	}|d	d
 |d	|d
  ||_||	tddgfS )z}Perform the actual data loading for the LFW pairs dataset

    This operation is meant to be cached by a joblib wrapper.
    rbc                 S   s   g | ]}|   d qS )	)decodestripsplit)r:   lnr5   r5   r6   r\     s    z$_fetch_lfw_pairs.<locals>.<listcomp>Nc                 S   s   g | ]
}t |d kr|qS )r   )rG   )r:   slr5   r5   r6   r\     s    r=   r   r   r   r      zinvalid line %d: %rzUTF-8zDifferent personszSame person)r-   rG   rH   rI   rF   listrJ   rc   r
   	TypeErrorstrr`   r   appendrY   shapepopinsertarray)index_file_pathr1   rQ   rR   rL   Z
index_filesplit_linesZ
pair_specsZn_pairsr0   rP   rW   
componentspairjnameidxZperson_folder	filenamesrX   pairsr   rU   r5   r5   r6   _fetch_lfw_pairs  sH   		

r   >   10_foldstraintest)	subsetr   r.   rL   rR   rQ   r/   r"   r#   r   c        	         C   s   t |||||d\}	}
td| |	 t|	ddd}|t}dddd	}| |vr6td
| tt|	 f t
|	||  }|||
|||d\}}}td}t|t|d||||dS )aw  Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).

    Download it if necessary.

    =================   =======================
    Classes                                   2
    Samples total                         13233
    Dimensionality                         5828
    Features            real, between 0 and 255
    =================   =======================

    In the `original paper <https://people.cs.umass.edu/~elm/papers/lfw.pdf>`_
    the "pairs" version corresponds to the "restricted task", where
    the experimenter should not use the name of a person to infer
    the equivalence or non-equivalence of two face images that
    are not explicitly given in the training set.

    The original images are 250 x 250 pixels, but the default slice and resize
    arguments reduce them to 62 x 47.

    Read more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.

    Parameters
    ----------
    subset : {'train', 'test', '10_folds'}, default='train'
        Select the dataset to load: 'train' for the development training
        set, 'test' for the development test set, and '10_folds' for the
        official evaluation set that is meant to be used with a 10-folds
        cross validation.

    data_home : str or path-like, default=None
        Specify another download and cache folder for the datasets. By
        default all scikit-learn data is stored in '~/scikit_learn_data'
        subfolders.

    funneled : bool, default=True
        Download and use the funneled variant of the dataset.

    resize : float, default=0.5
        Ratio used to resize the each face picture.

    color : bool, default=False
        Keep the 3 RGB channels instead of averaging them to a single
        gray level channel. If color is True the shape of the data has
        one more dimension than the shape with color = False.

    slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))
        Provide a custom 2D slice (height, width) to extract the
        'interesting' part of the jpeg files and avoid use statistical
        correlation from the background.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

        .. versionadded:: 1.5

    delay : float, default=1.0
        Number of seconds between retries.

        .. versionadded:: 1.5

    Returns
    -------
    data : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        data : ndarray of shape (2200, 5828). Shape depends on ``subset``.
            Each row corresponds to 2 ravel'd face images
            of original size 62 x 47 pixels.
            Changing the ``slice_``, ``resize`` or ``subset`` parameters
            will change the shape of the output.
        pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``
            Each row has 2 face images corresponding
            to same or different person from the dataset
            containing 5749 people. Changing the ``slice_``,
            ``resize`` or ``subset`` parameters will change the shape of the
            output.
        target : numpy array of shape (2200,). Shape depends on ``subset``.
            Labels associated to each pair of images.
            The two label values being different persons or the same person.
        target_names : numpy array of shape (2,)
            Explains the target values of the target array.
            0 corresponds to "Different person", 1 corresponds to "same person".
        DESCR : str
            Description of the Labeled Faces in the Wild (LFW) dataset.

    Examples
    --------
    >>> from sklearn.datasets import fetch_lfw_pairs
    >>> lfw_pairs_train = fetch_lfw_pairs(subset='train')
    >>> list(lfw_pairs_train.target_names)
    [np.str_('Different persons'), np.str_('Same person')]
    >>> lfw_pairs_train.pairs.shape
    (2200, 2, 62, 47)
    >>> lfw_pairs_train.data.shape
    (2200, 5828)
    >>> lfw_pairs_train.target.shape
    (2200,)
    rt   zLoading %s LFW pairs from %sru   r   rv   r   r   r   )r   r   r   z+subset='%s' is invalid: should be one of %r)rL   rR   rQ   r{   rz   )r|   r   r0   ri   r}   )r7   r&   r,   r   r~   r   rc   r   r`   keysr
   r   r   r   rG   )r   r   r.   rL   rR   rQ   r/   r"   r#   r    r1   r   r   Zlabel_filenamesr   r   r0   ri   r   r5   r5   r6   fetch_lfw_pairs  sB    


r   )NTTr   r   )NFNr   )NFN)/__doc__loggingnumbersr   r   osr   r   r   r   Zos.pathr   r	   r
   numpyrH   Zjoblibr   utilsr   Zutils._param_validationr   r   r   r   Zutils.fixesr   _baser   r   r   r   	getLogger__name__r&   r*   r)   r%   r7   rY   rk   r   r@   r?   r   r   r   r5   r5   r5   r6   <module>   s    

3K
+
 (
4
