o
    i_                     @   s   d Z ddlZddlmZmZ ddlZddlmZ	 ddl
mZmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZm Z m!Z! G dd deeeZ"G dd deeeZ#dS )z6Dummy estimators that implement simple rules of thumb.    N)IntegralReal   )BaseEstimatorClassifierMixinMultiOutputMixinRegressorMixin_fit_context)check_random_state)Interval
StrOptions)class_distribution)_random_choice_csc_weighted_percentile)_check_sample_weight_num_samplescheck_arraycheck_consistent_lengthcheck_is_fittedvalidate_datac                       s   e Zd ZU dZeh dgdgeeddgdZee	d< ddddd	d
Z
edddddZdd Zdd Zdd Z fddZd fdd	Z  ZS )DummyClassifiera]  DummyClassifier makes predictions that ignore the input features.

    This classifier serves as a simple baseline to compare against other more
    complex classifiers.

    The specific behavior of the baseline is selected with the `strategy`
    parameter.

    All strategies make predictions that ignore the input feature values passed
    as the `X` argument to `fit` and `predict`. The predictions, however,
    typically depend on values observed in the `y` parameter passed to `fit`.

    Note that the "stratified" and "uniform" strategies lead to
    non-deterministic predictions that can be rendered deterministic by setting
    the `random_state` parameter if needed. The other strategies are naturally
    deterministic and, once fit, always return the same constant prediction
    for any value of `X`.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"most_frequent", "prior", "stratified", "uniform",             "constant"}, default="prior"
        Strategy to use to generate predictions.

        * "most_frequent": the `predict` method always returns the most
          frequent class label in the observed `y` argument passed to `fit`.
          The `predict_proba` method returns the matching one-hot encoded
          vector.
        * "prior": the `predict` method always returns the most frequent
          class label in the observed `y` argument passed to `fit` (like
          "most_frequent"). ``predict_proba`` always returns the empirical
          class distribution of `y` also known as the empirical class prior
          distribution.
        * "stratified": the `predict_proba` method randomly samples one-hot
          vectors from a multinomial distribution parametrized by the empirical
          class prior probabilities.
          The `predict` method returns the class label which got probability
          one in the one-hot vector of `predict_proba`.
          Each sampled row of both methods is therefore independent and
          identically distributed.
        * "uniform": generates predictions uniformly at random from the list
          of unique classes observed in `y`, i.e. each class has equal
          probability.
        * "constant": always predicts a constant label that is provided by
          the user. This is useful for metrics that evaluate a non-majority
          class.

          .. versionchanged:: 0.24
             The default value of `strategy` has changed to "prior" in version
             0.24.

    random_state : int, RandomState instance or None, default=None
        Controls the randomness to generate the predictions when
        ``strategy='stratified'`` or ``strategy='uniform'``.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    constant : int or str or array-like of shape (n_outputs,), default=None
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    Attributes
    ----------
    classes_ : ndarray of shape (n_classes,) or list of such arrays
        Unique class labels observed in `y`. For multi-output classification
        problems, this attribute is a list of arrays as each output has an
        independent set of possible classes.

    n_classes_ : int or list of int
        Number of label for each output.

    class_prior_ : ndarray of shape (n_classes,) or list of such arrays
        Frequency of each class observed in `y`. For multioutput classification
        problems, this is computed independently for each output.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X` has
        feature names that are all strings.

    n_outputs_ : int
        Number of outputs.

    sparse_output_ : bool
        True if the array returned from predict is to be in sparse CSC format.
        Is automatically set to True if the input `y` is passed in sparse
        format.

    See Also
    --------
    DummyRegressor : Regressor that makes predictions using simple rules.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyClassifier
    >>> X = np.array([-1, 1, 1, 1])
    >>> y = np.array([0, 1, 1, 1])
    >>> dummy_clf = DummyClassifier(strategy="most_frequent")
    >>> dummy_clf.fit(X, y)
    DummyClassifier(strategy='most_frequent')
    >>> dummy_clf.predict(X)
    array([1, 1, 1, 1])
    >>> dummy_clf.score(X, y)
    0.75
    >   prior
stratifiedconstantmost_frequentuniformrandom_state
array-likeNstrategyr   r   _parameter_constraintsr   c                C      || _ || _|| _d S Nr   )selfr    r   r    r%   \/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/dummy.py__init__      
zDummyClassifier.__init__TZprefer_skip_nested_validationc                    s  t | |dd | j| _| jdkrt|r| }tdt t|| _	| j	s2t
|}t
|}|jdkr=t
|d}|jd | _t|| |durQt||}| jdkrx| jdu r_td	t
t
| jd  jd
 | jkrxtd| j t||\| _| _| _| jdkrt| jD ]!t fdd| j D sd| j| j  }t|q| jdkr| jd
 | _| jd
 | _| jd
 | _| S )a  Fit the baseline classifier.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
            Returns the instance itself.
        TZskip_check_arrayr   zA local copy of the target data has been converted to a numpy array. Predicting on sparse target data with the uniform strategy would not save memory and would be slower.r   r   Nr   MConstant target value has to be specified when the constant strategy is used.r   0Constant target value should have shape (%d, 1).c                 3   s     | ]}  d  |kV  qdS )r   Nr%   .0cr   kr%   r&   	<genexpr>   s    z&DummyClassifier.fit.<locals>.<genexpr>zrThe constant target value must be present in the training data. You provided constant={}. Possible values are: {}.)r   r    	_strategyspissparseZtoarraywarningswarnUserWarningsparse_output_npZasarrayZ
atleast_1dndimreshapeshape
n_outputs_r   r   r   
ValueErrorr   classes_
n_classes_class_prior_rangeanyformattolist)r$   Xysample_weighterr_msgr%   r2   r&   fit   s`   









zDummyClassifier.fitc                    s  t |  t|t| j| j| j| j | j}| jdkr*gg g |g}| j	dkr<| 
|| jdkr<g| jrud}| j	dv rNdd  D n| j	dkrV }n| j	dkr_td| j	d	krkd
d |D t|| j}|S | j	dv rt fddt| jD dg}n@| j	dkrtfddt| jD j}n)| j	dkrfddt| jD }t|j}n| j	d	krt| jdf}| jdkrt|}|S )a;  Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.
        r   r   N)r   r   c                 S   s   g | ]
}t | gqS r%   )r<   arrayargmax)r0   cpr%   r%   r&   
<listcomp>#  s    z+DummyClassifier.predict.<locals>.<listcomp>r   zCSparse target prediction is not supported with the uniform strategyr   c                 S   s   g | ]}t |gqS r%   )r<   rN   r/   r%   r%   r&   rQ   /  s    c                    s    g | ]}|  |    qS r%   rO   r0   r3   )rD   rB   r%   r&   rQ   5  s    c                    s$   g | ]} | | j d d qS )r   axisrR   rS   )rB   probar%   r&   rQ   >  s    c                    s&   g | ]} | j | d  qS )size)randintrS   )rB   rC   	n_samplesrsr%   r&   rQ   E      )r   r   r
   r   rC   rB   rD   r   r@   r5   predict_probar;   rA   r   r<   ZtilerE   ZvstackTravel)r$   rI   r   Z
class_probrJ   retr%   )rD   rB   rC   rZ   rV   r[   r&   predict   sn   








 





zDummyClassifier.predictc                 C   s  t |  t|}t| j}| j}| j}| j}| j}| jdkr*|g}|g}|g}|g}g }t	| jD ]}	| j
dkrS||	  }
tj|||	 ftjd}d|dd|
f< nc| j
dkrdt|df||	  }nR| j
dkrz|jd||	 |d}|tj}n<| j
d	krtj|||	 ftjd}|||	  }n$| j
d
krt||	 ||	 k}
tj|||	 ftjd}d|dd|
f< || q1| jdkr|d }|S )a  
        Return probability estimates for the test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the probability of the sample for each class in
            the model, where classes are ordered arithmetically, for each
            output.
        r   r   dtype      ?Nr   r   rW   r   r   r   )r   r   r
   r   rC   rB   rD   r   r@   rE   r5   rO   r<   zerosZfloat64ZonesZmultinomialZastypewhereappend)r$   rI   rZ   r[   rC   rB   rD   r   Pr3   indoutr%   r%   r&   r]   S  sD   







zDummyClassifier.predict_probac                 C   s,   |  |}| jdkrt|S dd |D S )a  
        Return log probability estimates for the test vectors X.

        Parameters
        ----------
        X : {array-like, object with finite length or shape}
            Training data.

        Returns
        -------
        P : ndarray of shape (n_samples, n_classes) or list of such arrays
            Returns the log probability of the sample for each class in
            the model, where classes are ordered arithmetically for each
            output.
        r   c                 S   s   g | ]}t |qS r%   )r<   log)r0   pr%   r%   r&   rQ     s    z5DummyClassifier.predict_log_proba.<locals>.<listcomp>)r]   r@   r<   rk   )r$   rI   rV   r%   r%   r&   predict_log_proba  s   


z!DummyClassifier.predict_log_probac                    $   t   }d|j_d|j_d|_|S NT)super__sklearn_tags__
input_tagssparseZclassifier_tags
poor_scoreno_validationr$   tags	__class__r%   r&   rq     
   
z DummyClassifier.__sklearn_tags__c                    ,   |du rt jt|dfd}t |||S )ak  Return the mean accuracy on the given test data and labels.

        In multi-label classification, this is the subset accuracy
        which is a harsh metric since you require for each sample that
        each label set be correctly predicted.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since DummyClassifier
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True labels for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            Mean accuracy of self.predict(X) w.r.t. y.
        Nr   r?   r<   re   lenrp   scorer$   rI   rJ   rK   rx   r%   r&   r     s   zDummyClassifier.scorer#   )__name__
__module____qualname____doc__r   r   strr!   dict__annotations__r'   r	   rM   ra   r]   rm   rq   r   __classcell__r%   r%   rx   r&   r   "   s   
 
s
ZW?r   c                       s   e Zd ZU dZeh dgeedddddgeeddddd	dgd
Zee	d< ddddddZ
edddddZdddZ fddZd fdd	Z  ZS )DummyRegressora  Regressor that makes predictions using simple rules.

    This regressor is useful as a simple baseline to compare with other
    (real) regressors. Do not use it for real problems.

    Read more in the :ref:`User Guide <dummy_estimators>`.

    .. versionadded:: 0.13

    Parameters
    ----------
    strategy : {"mean", "median", "quantile", "constant"}, default="mean"
        Strategy to use to generate predictions.

        * "mean": always predicts the mean of the training set
        * "median": always predicts the median of the training set
        * "quantile": always predicts a specified quantile of the training set,
          provided with the quantile parameter.
        * "constant": always predicts a constant value that is provided by
          the user.

    constant : int or float or array-like of shape (n_outputs,), default=None
        The explicit constant as predicted by the "constant" strategy. This
        parameter is useful only for the "constant" strategy.

    quantile : float in [0.0, 1.0], default=None
        The quantile to predict using the "quantile" strategy. A quantile of
        0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the
        maximum.

    Attributes
    ----------
    constant_ : ndarray of shape (1, n_outputs)
        Mean or median or quantile of the training targets or constant value
        given by the user.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X` has
        feature names that are all strings.

    n_outputs_ : int
        Number of outputs.

    See Also
    --------
    DummyClassifier: Classifier that makes predictions using simple rules.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.dummy import DummyRegressor
    >>> X = np.array([1.0, 2.0, 3.0, 4.0])
    >>> y = np.array([2.0, 3.0, 5.0, 10.0])
    >>> dummy_regr = DummyRegressor(strategy="mean")
    >>> dummy_regr.fit(X, y)
    DummyRegressor()
    >>> dummy_regr.predict(X)
    array([5., 5., 5., 5.])
    >>> dummy_regr.score(X, y)
    0.0
    >   meanmedianr   quantileg        rd   Zboth)closedNZneitherr   )r    r   r   r!   r   r    r   r   c                C   r"   r#   r   )r$   r    r   r   r%   r%   r&   r'     r(   zDummyRegressor.__init__Tr)   c                    s  t | |dd tdddtdkrtdjdkr#td	jd | _t	| d
ur8t
|| jdkrGtjdd| _n| jdkrhd
u rYtjdd| _nsfddt| jD | _nd| jdkr| jd
u rvtd| jd  d
u rtjd d| _nC fddt| jD | _n3| jdkr| jd
u rtdt| jg dddd| _| jdkr| jjd jd krtdjd  t| jd| _| S )a  Fit the baseline regressor.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Target values.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        self : object
            Fitted estimator.
        Tr*   FrJ   )	ensure_2dZ
input_namer   zy must not be empty.r   r+   Nr   )rU   weightsr   rT   c                    s&   g | ]}t d d |f  ddqS )Ng      I@percentile_rankr   rS   )rK   rJ   r%   r&   rQ   H  r\   z&DummyRegressor.fit.<locals>.<listcomp>r   z^When using `strategy='quantile', you have to specify the desired quantile in the range [0, 1].g      Y@)rU   qc                    s&   g | ]}t d d |f  dqS )Nr   r   rS   r   rK   rJ   r%   r&   rQ   W  s    r   r-   )ZcsrZcscZcoo)Zaccept_sparser   Zensure_min_samplesr.   )r   r,   )r   r   r~   rA   r=   r<   r>   r?   r@   r   r   r    Zaverage	constant_r   rE   r   Z
percentiler   	TypeErrorr   r%   r   r&   rM     s\   










 zDummyRegressor.fitFc                 C   sp   t |  t|}tj|| jf| jt| jjd}t|| jf}| jdkr0t	|}t	|}|r6||fS |S )a  Perform classification on test vectors X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test data.

        return_std : bool, default=False
            Whether to return the standard deviation of posterior prediction.
            All zeros in this case.

            .. versionadded:: 0.20

        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted target values for X.

        y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Standard deviation of predictive distribution of query points.
        rb   r   )
r   r   r<   fullr@   r   rN   rc   re   r_   )r$   rI   Z
return_stdrZ   rJ   Zy_stdr%   r%   r&   ra   t  s   


zDummyRegressor.predictc                    rn   ro   )rp   rq   rr   rs   Zregressor_tagsrt   ru   rv   rx   r%   r&   rq     rz   zDummyRegressor.__sklearn_tags__c                    r{   )a  Return the coefficient of determination R^2 of the prediction.

        The coefficient R^2 is defined as `(1 - u/v)`, where `u` is the
        residual sum of squares `((y_true - y_pred) ** 2).sum()` and `v` is the
        total sum of squares `((y_true - y_true.mean()) ** 2).sum()`. The best
        possible score is 1.0 and it can be negative (because the model can be
        arbitrarily worse). A constant model that always predicts the expected
        value of y, disregarding the input features, would get a R^2 score of
        0.0.

        Parameters
        ----------
        X : None or array-like of shape (n_samples, n_features)
            Test samples. Passing None as test samples gives the same result
            as passing real test samples, since `DummyRegressor`
            operates independently of the sampled observations.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for X.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            R^2 of `self.predict(X)` w.r.t. y.
        Nr   r|   r}   r   rx   r%   r&   r     s   zDummyRegressor.scorer#   )F)r   r   r   r   r   r   r   r!   r   r   r'   r	   rM   ra   rq   r   r   r%   r%   rx   r&   r     s   
 B

U&r   )$r   r8   numbersr   r   numpyr<   Zscipy.sparsers   r6   baser   r   r   r   r	   utilsr
   Zutils._param_validationr   r   Zutils.multiclassr   Zutils.randomr   Zutils.statsr   Zutils.validationr   r   r   r   r   r   r   r   r%   r%   r%   r&   <module>   s"     
   .