o
    id                     @   s  d dl Z d dlmZmZ d dlZddlmZmZm	Z	m
Z
mZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZmZmZmZm Z  dd
l!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* e+dZ,dd Z-G dd dee
e	eZ.dS )    N)IntegralReal   )BaseEstimatorMetaEstimatorMixinMultiOutputMixinRegressorMixin_fit_contextclone)ConvergenceWarning)check_consistent_lengthcheck_random_stateget_tags)Bunch)
HasMethodsIntervalOptions
RealNotInt
StrOptions)MetadataRouterMethodMapping_raise_for_params_routing_enabledprocess_routing)sample_without_replacement)_check_method_params_check_sample_weightcheck_is_fittedhas_fit_parametervalidate_data   )LinearRegressionc                 C   sj   | t | }ttd| }ttd||  }|dkrdS |dkr$t dS tt tt|t| S )a  Determine number trials such that at least one outlier-free subset is
    sampled for the given inlier/outlier ratio.

    Parameters
    ----------
    n_inliers : int
        Number of inliers in the data.

    n_samples : int
        Total number of samples in the data.

    min_samples : int
        Minimum number of samples chosen randomly from original data.

    probability : float
        Probability (confidence) that one outlier-free sample is generated.

    Returns
    -------
    trials : int
        Number of trials.

    r    r   inf)floatmax_EPSILONabsnpceillog)Z	n_inliers	n_samplesmin_samplesZprobabilityZinlier_ratioZnomdenom r-   k/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/linear_model/_ransac.py_dynamic_max_trials/   s   "r/   c                       sZ  e Zd ZU dZeg ddgeeddddeedddddgeedddddge	dge	dgeedddde
eejhgeedddde
eejhgeedddde
eejhgeeddddgeeddddged	d
he	gdgdZeed< 	ddddddejejejdd	ddddZedddddZdd Zdd Zdd Z fddZ  ZS ) RANSACRegressora  RANSAC (RANdom SAmple Consensus) algorithm.

    RANSAC is an iterative algorithm for the robust estimation of parameters
    from a subset of inliers from the complete data set.

    Read more in the :ref:`User Guide <ransac_regression>`.

    Parameters
    ----------
    estimator : object, default=None
        Base estimator object which implements the following methods:

        * `fit(X, y)`: Fit model to given training data and target values.
        * `score(X, y)`: Returns the mean accuracy on the given test data,
          which is used for the stop criterion defined by `stop_score`.
          Additionally, the score is used to decide which of two equally
          large consensus sets is chosen as the better one.
        * `predict(X)`: Returns predicted values using the linear model,
          which is used to compute residual error using loss function.

        If `estimator` is None, then
        :class:`~sklearn.linear_model.LinearRegression` is used for
        target values of dtype float.

        Note that the current implementation only supports regression
        estimators.

    min_samples : int (>= 1) or float ([0, 1]), default=None
        Minimum number of samples chosen randomly from original data. Treated
        as an absolute number of samples for `min_samples >= 1`, treated as a
        relative number `ceil(min_samples * X.shape[0])` for
        `min_samples < 1`. This is typically chosen as the minimal number of
        samples necessary to estimate the given `estimator`. By default a
        :class:`~sklearn.linear_model.LinearRegression` estimator is assumed and
        `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly
        dependent upon the model, so if a `estimator` other than
        :class:`~sklearn.linear_model.LinearRegression` is used, the user must
        provide a value.

    residual_threshold : float, default=None
        Maximum residual for a data sample to be classified as an inlier.
        By default the threshold is chosen as the MAD (median absolute
        deviation) of the target values `y`. Points whose residuals are
        strictly equal to the threshold are considered as inliers.

    is_data_valid : callable, default=None
        This function is called with the randomly selected data before the
        model is fitted to it: `is_data_valid(X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.

    is_model_valid : callable, default=None
        This function is called with the estimated model and the randomly
        selected data: `is_model_valid(model, X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.
        Rejecting samples with this function is computationally costlier than
        with `is_data_valid`. `is_model_valid` should therefore only be used if
        the estimated model is needed for making the rejection decision.

    max_trials : int, default=100
        Maximum number of iterations for random sample selection.

    max_skips : int, default=np.inf
        Maximum number of iterations that can be skipped due to finding zero
        inliers or invalid data defined by ``is_data_valid`` or invalid models
        defined by ``is_model_valid``.

        .. versionadded:: 0.19

    stop_n_inliers : int, default=np.inf
        Stop iteration if at least this number of inliers are found.

    stop_score : float, default=np.inf
        Stop iteration if score is greater equal than this threshold.

    stop_probability : float in range [0, 1], default=0.99
        RANSAC iteration stops if at least one outlier-free set of the training
        data is sampled in RANSAC. This requires to generate at least N
        samples (iterations)::

            N >= log(1 - probability) / log(1 - e**m)

        where the probability (confidence) is typically set to high value such
        as 0.99 (the default) and e is the current fraction of inliers w.r.t.
        the total number of samples.

    loss : str, callable, default='absolute_error'
        String inputs, 'absolute_error' and 'squared_error' are supported which
        find the absolute error and squared error per sample respectively.

        If ``loss`` is a callable, then it should be a function that takes
        two arrays as inputs, the true and predicted value and returns a 1-D
        array with the i-th value of the array corresponding to the loss
        on ``X[i]``.

        If the loss on a sample is greater than the ``residual_threshold``,
        then this sample is classified as an outlier.

        .. versionadded:: 0.18

    random_state : int, RandomState instance, default=None
        The generator used to initialize the centers.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Attributes
    ----------
    estimator_ : object
        Final model fitted on the inliers predicted by the "best" model found
        during RANSAC sampling (copy of the `estimator` object).

    n_trials_ : int
        Number of random selection trials until one of the stop criteria is
        met. It is always ``<= max_trials``.

    inlier_mask_ : bool array of shape [n_samples]
        Boolean mask of inliers classified as ``True``.

    n_skips_no_inliers_ : int
        Number of iterations skipped due to finding zero inliers.

        .. versionadded:: 0.19

    n_skips_invalid_data_ : int
        Number of iterations skipped due to invalid data defined by
        ``is_data_valid``.

        .. versionadded:: 0.19

    n_skips_invalid_model_ : int
        Number of iterations skipped due to an invalid model defined by
        ``is_model_valid``.

        .. versionadded:: 0.19

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    HuberRegressor : Linear regression model that is robust to outliers.
    TheilSenRegressor : Theil-Sen Estimator robust multivariate regression model.
    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/RANSAC
    .. [2] https://www.sri.com/wp-content/uploads/2021/12/ransac-publication.pdf
    .. [3] https://bmva-archive.org.uk/bmvc/2009/Papers/Paper355/Paper355.pdf

    Examples
    --------
    >>> from sklearn.linear_model import RANSACRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
    >>> reg = RANSACRegressor(random_state=0).fit(X, y)
    >>> reg.score(X, y)
    0.9885
    >>> reg.predict(X[:1,])
    array([-31.9417])

    For a more detailed example, see
    :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py`
    )fitscorepredictNr    left)closedr   Zbothabsolute_errorsquared_errorrandom_state)	estimatorr+   residual_thresholdis_data_validis_model_valid
max_trials	max_skipsstop_n_inliers
stop_scorestop_probabilitylossr8   _parameter_constraintsd   gGz?)r+   r:   r;   r<   r=   r>   r?   r@   rA   rB   r8   c                C   sL   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _d S N)r9   r+   r:   r;   r<   r=   r>   r?   r@   rA   r8   rB   )selfr9   r+   r:   r;   r<   r=   r>   r?   r@   rA   rB   r8   r-   r-   r.   __init__   s   
zRANSACRegressor.__init__F)Zprefer_skip_nested_validationc           &      K   s  t || d tddd}tdd}t| ||||fd\}}t|| | jdur-t| j}nt }| jdu rFt|ts>t	d|j
d	 d	 }n!d
| j  k rQd	k r_n nt| j|j
d
  }n| jd	krg| j}||j
d
 krwt	d|j
d
  | jdu rtt|t| }	n| j}	| jdkr|jd	krdd }
n!dd }
n| jdkr|jd	krdd }
ndd }
nt| jr| j}
t| j}z|j|d W n	 t	y   Y nw t|d}t|j}|dur|st	d| |dur||d< t rt| dfi |}nt }ti i i d|_|durt||}d|i|j_d	}tj }d}d}d}d}d
| _d
| _ d
| _!|j
d
 }t"|}d
| _#| j$}| j#|k r!|  j#d	7  _#| j| j  | j! | j%kr\nt&|||d}|| }|| }| j'dur| '||s|  j d	7  _ qAt(||jj|d}|j||fi | | j)dur| )|||s|  j!d	7  _!qA|*|}|
||}||	k}t+|}||k r|  jd	7  _qA|| } ||  }!||  }"t(||jj,| d}#|j,|!|"fi |#}$||kr|$|k rqA|}|$}|}|!}|"}| }t-|t.|||| j/}|| j0ks|| j1krn| j#|k sG|du r;| j| j  | j! | j%kr7t	dt	d| j| j  | j! | j%krNt23dt4 t(||jj|d}%|j||fi |% || _5|| _6| S )a
  Fit estimator using RANSAC algorithm.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Individual weights for each sample
            raises error if sample_weight is passed and estimator
            fit method does not support it.

            .. versionadded:: 0.18

        **fit_params : dict
            Parameters routed to the `fit` method of the sub-estimator via the
            metadata routing API.

            .. versionadded:: 1.5

                Only available if
                `sklearn.set_config(enable_metadata_routing=True)` is set. See
                :ref:`Metadata Routing User Guide <metadata_routing>` for more
                details.

        Returns
        -------
        self : object
            Fitted `RANSACRegressor` estimator.

        Raises
        ------
        ValueError
            If no valid consensus set could be found. This occurs if
            `is_data_valid` and `is_model_valid` return False for all
            `max_trials` randomly chosen sub-samples.
        r1   ZcsrF)accept_sparseensure_all_finite)Z	ensure_2d)Zvalidate_separatelyNzR`min_samples` needs to be explicitly set when estimator is not a LinearRegression.r    r   zG`min_samples` may not be larger than number of samples: n_samples = %d.r6   c                 S   s   t | | S rE   )r'   r&   Zy_truey_predr-   r-   r.   <lambda>  s    z%RANSACRegressor.fit.<locals>.<lambda>c                 S   s   t jt | | ddS )Nr    Zaxis)r'   sumr&   rJ   r-   r-   r.   rL     s    r7   c                 S   s   | | d S )Nr   r-   rJ   r-   r-   r.   rL     s    c                 S   s   t j| | d ddS )Nr   r    rM   )r'   rN   rJ   r-   r-   r.   rL     s    )r8   sample_weightz[%s does not support sample_weight. Sample weights are only used for the calibration itself.)r1   r3   r2   )paramsindiceszRANSAC skipped more iterations than `max_skips` without finding a valid consensus set. Iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC could not find a valid consensus set. All `max_trials` iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC found a valid consensus set but exited early due to skipping more iterations than `max_skips`. See estimator attributes for diagnostics (n_skips*).)7r   dictr   r   r9   r
   r!   r+   
isinstance
ValueErrorshaper'   r(   r:   Zmedianr&   rB   ndimcallabler   r8   Z
set_paramsr   type__name__r   r   r   r   r1   r"   Zn_skips_no_inliers_Zn_skips_invalid_data_Zn_skips_invalid_model_ZarangeZ	n_trials_r=   r>   r   r;   r   r<   r3   rN   r2   minr/   rA   r?   r@   warningswarnr   
estimator_Zinlier_mask_)&rF   XyrO   Z
fit_paramsZcheck_X_paramsZcheck_y_paramsr9   r+   r:   Zloss_functionr8   Zestimator_fit_has_sample_weightZestimator_nameZrouted_paramsZn_inliers_bestZ
score_bestZinlier_mask_bestZX_inlier_bestZy_inlier_bestZinlier_best_idxs_subsetr*   Zsample_idxsr=   Zsubset_idxsZX_subsetZy_subsetZfit_params_subsetrK   Zresiduals_subsetZinlier_mask_subsetZn_inliers_subsetZinlier_idxs_subsetZX_inlier_subsetZy_inlier_subsetZscore_params_inlier_subsetZscore_subsetZfit_params_best_idxs_subsetr-   r-   r.   r1   =  sX  0































^
zRANSACRegressor.fitc                 K   s^   t |  t| |dddd}t|| d t r#t| dfi |jd }ni }| jj|fi |S )a   Predict using the estimated model.

        This is a wrapper for `estimator_.predict(X)`.

        Parameters
        ----------
        X : {array-like or sparse matrix} of shape (n_samples, n_features)
            Input data.

        **params : dict
            Parameters routed to the `predict` method of the sub-estimator via
            the metadata routing API.

            .. versionadded:: 1.5

                Only available if
                `sklearn.set_config(enable_metadata_routing=True)` is set. See
                :ref:`Metadata Routing User Guide <metadata_routing>` for more
                details.

        Returns
        -------
        y : array, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        FTrI   rH   resetr3   )r   r   r   r   r   r9   r]   r3   )rF   r^   rP   Zpredict_paramsr-   r-   r.   r3   \  s   zRANSACRegressor.predictc                 K   s`   t |  t| |dddd}t|| d t r#t| dfi |jd }ni }| jj||fi |S )a6  Return the score of the prediction.

        This is a wrapper for `estimator_.score(X, y)`.

        Parameters
        ----------
        X : (array-like or sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values.

        **params : dict
            Parameters routed to the `score` method of the sub-estimator via
            the metadata routing API.

            .. versionadded:: 1.5

                Only available if
                `sklearn.set_config(enable_metadata_routing=True)` is set. See
                :ref:`Metadata Routing User Guide <metadata_routing>` for more
                details.

        Returns
        -------
        z : float
            Score of the prediction.
        FTr`   r2   )r   r   r   r   r   r9   r]   r2   )rF   r^   r_   rP   Zscore_paramsr-   r-   r.   r2     s   zRANSACRegressor.scorec                 C   sH   t | jjdj| jt jdddjdddjdddjdddd}|S )aj  Get metadata routing of this object.

        Please check :ref:`User Guide <metadata_routing>` on how the routing
        mechanism works.

        .. versionadded:: 1.5

        Returns
        -------
        routing : MetadataRouter
            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
            routing information.
        )ownerr1   )ZcallerZcalleer2   r3   )r9   Zmethod_mapping)r   	__class__rY   addr9   r   )rF   Zrouterr-   r-   r.   get_metadata_routing  s   z$RANSACRegressor.get_metadata_routingc                    s6   t   }| jd u rd|j_|S t| jjj|j_|S )NT)super__sklearn_tags__r9   Z
input_tagssparser   )rF   tagsrc   r-   r.   rg     s   

z RANSACRegressor.__sklearn_tags__rE   )rY   
__module____qualname____doc__r   r   r   r   r   rW   r   r'   r"   r   rC   rR   __annotations__rG   r	   r1   r3   r2   re   rg   __classcell__r-   r-   rj   r.   r0   Q   sb   
  /  ..r0   )/r[   numbersr   r   numpyr'   baser   r   r   r   r	   r
   
exceptionsr   utilsr   r   r   Zutils._bunchr   Zutils._param_validationr   r   r   r   r   Zutils.metadata_routingr   r   r   r   r   Zutils.randomr   Zutils.validationr   r   r   r   r   _baser!   spacingr%   r/   r0   r-   r-   r-   r.   <module>   s(    

"