o
    i                     @   sF  d Z ddlmZmZ ddlZddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ dd	lmZ dd
lmZm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z, ddl-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6m7Z7 ddl8m9Z9m:Z: ddl;m<Z< ddl=m>Z>m?Z? ddl@mAZAmBZB e<dZCe ZDeCEeDjFjGZHeDjIeH eD_IeDjFeH eD_Fe ZJeCEeJjFjGZHeJjIeH eJ_IeJjFeH eJ_Fdd ZKejLMdeeBeA dddddddddddddd dddd!gg d"d#d$ ZNd%d& ZOejLMd'eBeA d(d) ZPG d*d+ d+eZQd,d- ZRd.d/ ZSd0d1 ZTd2d3 ZUd4d5 ZVd6d7 ZWd8d9 ZXd:d; ZYd<d= ZZd>d? Z[d@dA Z\dBdC Z]G dDdE dEeZ^dFdG Z_d|dIdJZ`dKdL ZadMdN ZbdOdP ZcdQdR ZddSdT ZedUdV ZfdWdX ZgdYdZ Zhd[d\ Zid]d^ Zjd_d` Zkdadb Zldcdd Zmdedf Zndgdh ZoejLMdieedjdkdfeedjdkdfee dfee1 dfgdldm Zpe	ddnejLMdoeedjdpdjdqeedjdpdjdqgdrds ZqejLMdte5dudufe4dvdwfe3dvdufge	ddndxdy ZrejLMdoeedjdpdjdqeedjdpdjdqgdzd{ ZsdS )}zE
Testing for the bagging ensemble module (sklearn.ensemble.bagging).
    )cycleproductN)config_context)BaseEstimator)load_diabetes	load_irismake_hastie_10_2)DummyClassifierDummyRegressor)AdaBoostClassifierAdaBoostRegressorBaggingClassifierBaggingRegressorHistGradientBoostingClassifierHistGradientBoostingRegressorRandomForestClassifierRandomForestRegressor)SelectKBest)LogisticRegression
Perceptron)GridSearchCVParameterGridtrain_test_split)KNeighborsClassifierKNeighborsRegressor)make_pipeline)FunctionTransformerscale)SparseRandomProjection)SVCSVR)"ConsumingClassifierWithOnlyPredict)ConsumingClassifierWithoutPredictLogProba&ConsumingClassifierWithoutPredictProba	_Registrycheck_recorded_metadata)DecisionTreeClassifierDecisionTreeRegressor)check_random_state)assert_array_almost_equalassert_array_equal)CSC_CONTAINERSCSR_CONTAINERSc            	      C   s   t d} ttjtj| d\}}}}tddgddgddgddgd	}d t td
dtddt	 t
 g}t|t|D ]\}}td|| dd|||| q:d S )Nr   random_state      ?      ?      TFmax_samplesmax_features	bootstrapbootstrap_features   Zmax_iter   )	max_depth)	estimatorr.   n_estimators )r(   r   irisdatatargetr   r	   r   r&   r   r   zipr   r   fitpredict)	rngX_trainX_testy_trainy_testgrid
estimatorsparamsr<   r>   r>   r/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/ensemble/tests/test_bagging.pytest_classificationA   s8   
	
rN   z sparse_container, params, methodr/   r:   Tr3   r0   r2   Fr5   r6   r7   r4   r6   r7   )rD   predict_probapredict_log_probadecision_functionc                    s   G dd dt }td}tttjtj|d\}}}}| |}	| |}
td|ddddd	||	|}t	|||
}td|ddddd	|||}t	|||}t
|| t|	 d
d |jD }t fdd|D ssJ d S )Nc                           e Zd ZdZ fddZ  ZS )z-test_sparse_classification.<locals>.CustomSVC7SVC variant that records the nature of the training setc                       t  || t|| _| S NsuperrC   type
data_type_selfXy	__class__r>   rM   rC         
z1test_sparse_classification.<locals>.CustomSVC.fit__name__
__module____qualname____doc__rC   __classcell__r>   r>   r`   rM   	CustomSVC|       ri   r   r-   Zlinearovr)Zkerneldecision_function_shaper1   r<   r.   c                 S      g | ]}|j qS r>   r[   .0ir>   r>   rM   
<listcomp>       z.test_sparse_classification.<locals>.<listcomp>c                       g | ]}| kqS r>   r>   rq   tZsparse_typer>   rM   rs          r>   )r   r(   r   r   r?   r@   rA   r   rC   getattrr)   rZ   estimators_all)sparse_containerrL   methodri   rE   rF   rG   rH   rI   X_train_sparseX_test_sparsesparse_classifiersparse_resultsZdense_classifierdense_resultstypesr>   rx   rM   test_sparse_classificationb   s:   


r   c                  C   s   t d} ttjd d tjd d | d\}}}}tddgddgddgddgd}d t t t t	 fD ]}|D ]}t
d
|| d	|||| q9q5d S )Nr   2   r-   r/   r0   TFr3   rm   r>   )r(   r   diabetesr@   rA   r   r
   r'   r   r    r   rC   rD   )rE   rF   rG   rH   rI   rJ   r<   rL   r>   r>   rM   test_regression   s0   

r   r}   c                    s"  t d}ttjd d tjd d |d\}}}}G dd dt}ddddd	d
dddd	ddddddddg}| |}| |}	|D ]K}
td| dd|
||}||	}td| dd|
|||}t	| dd |j
D }t|| t fdd|D sJ t|| qCd S )Nr   r   r-   c                       rT   )z)test_sparse_regression.<locals>.CustomSVRrU   c                    rV   rW   rX   r\   r`   r>   rM   rC      rb   z-test_sparse_regression.<locals>.CustomSVR.fitrc   r>   r>   r`   rM   	CustomSVR   rj   r   r/   r:   Tr3   r0   r2   FrO   rP   r1   rm   c                 S   rn   r>   ro   rp   r>   r>   rM   rs      rt   z*test_sparse_regression.<locals>.<listcomp>c                    ru   r>   r>   rv   rx   r>   rM   rs      ry   r>   )r(   r   r   r@   rA   r    r   rC   rD   rZ   r{   r)   r|   )r}   rE   rF   rG   rH   rI   r   Zparameter_setsr   r   rL   r   r   r   r   r>   rx   rM   test_sparse_regression   sN   




r   c                   @      e Zd Zdd Zdd ZdS )DummySizeEstimatorc                 C   s   |j d | _t|| _d S Nr   )shapetraining_size_joblibhashtraining_hash_r\   r>   r>   rM   rC      s   zDummySizeEstimator.fitc                 C   s   t |jd S r   )nponesr   r]   r^   r>   r>   rM   rD      s   zDummySizeEstimator.predictNrd   re   rf   rC   rD   r>   r>   r>   rM   r          r   c                  C   s   t d} ttjtj| d\}}}}t ||}tt dd| d||}||||||ks3J tt dd| d||}||||||ksNJ tt	 dd||}g }|j
D ]}|j|jd ksjJ ||j q^tt|t|ks}J d S )Nr   r-   r0   F)r<   r4   r6   r.   T)r<   r6   )r(   r   r   r@   rA   r'   rC   r   scorer   r{   r   r   appendr   lenset)rE   rF   rG   rH   rI   r<   ensembleZtraining_hashr>   r>   rM   test_bootstrap_samples   s>   

r   c                  C   s   t d} ttjtj| d\}}}}tt dd| d||}|jD ]}tjj	d t
|j	d ks3J q!tt dd| d||}|jD ]}tjj	d t
|j	d ksVJ qDd S )Nr   r-   r0   F)r<   r5   r7   r.   r1   T)r(   r   r   r@   rA   r   r'   rC   estimators_features_r   r   unique)rE   rF   rG   rH   rI   r   featuresr>   r>   rM   test_bootstrap_features*  s2   

"
"r   c                  C   s  t d} ttjtj| d\}}}}tjddd` tt | d	||}t
tj||ddtt| t
||t|| tt | dd		||}t
tj||ddtt| t
||t|| W d    d S 1 s{w   Y  d S )
Nr   r-   ignore)divideinvalidrm   r1   )Zaxis   )r<   r.   r4   )r(   r   r?   r@   rA   r   Zerrstater   r&   rC   r)   sumrQ   r   r   exprR   r   rE   rF   rG   rH   rI   r   r>   r>   rM   test_probabilityF  s8   
"r   c            	   	   C   s   t d} ttjtj| d\}}}}t t fD ]H}t|ddd| d||}|	||}t
||j dk s7J d}tjt|d t|d	dd| d}||| W d    n1 sZw   Y  qd S )
Nr   r-   d   Tr<   r=   r6   	oob_scorer.   皙?{Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.matchr1   )r(   r   r?   r@   rA   r&   r   r   rC   r   abs
oob_score_pytestwarnsUserWarning)	rE   rF   rG   rH   rI   r<   clf
test_scorewarn_msgr>   r>   rM   test_oob_score_classificationi  s<   
r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd| d||}|||}t	||j
 dk s0J d}tjt|d tt d	dd| d}||| W d    d S 1 sUw   Y  d S )
Nr   r-   r   Tr   r   r   r   r1   )r(   r   r   r@   rA   r   r'   rC   r   r   r   r   r   r   )	rE   rF   rG   rH   rI   r   r   r   Zregrr>   r>   rM   test_oob_score_regression  s6   
"r   c                  C   sf   t d} ttjtj| d\}}}}tt ddd| d||}t ||}t|	||	| d S )Nr   r-   r1   F)r<   r=   r6   r7   r.   )
r(   r   r   r@   rA   r   r   rC   r)   rD   )rE   rF   rG   rH   rI   Zclf1Zclf2r>   r>   rM   test_single_estimator  s   
r   c                  C   s2   t jt j} }t }tt|| |drJ d S )NrS   )r?   r@   rA   r&   hasattrr   rC   )r^   r_   baser>   r>   rM   
test_error  s   r   c                  C   s  t tjtjdd\} }}}tt ddd| |}||}|jdd ||}t	|| tt ddd| |}||}t	|| tt
ddddd| |}||}|jdd ||}	t	||	 tt
ddddd| |}||}
t	||
 d S )	Nr   r-      n_jobsr.   r1   r   rk   )rl   )r   r?   r@   rA   r   r&   rC   rQ   
set_paramsr)   r   rS   )rF   rG   rH   rI   r   y1y2y3Z
decisions1Z
decisions2Z
decisions3r>   r>   rM   test_parallel_classification  sF   









r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd||}|jdd |	|}|jdd |	|}t
|| tt ddd||}|	|}t
|| d S )Nr   r-   r   r   r1   r   r:   )r(   r   r   r@   rA   r   r'   rC   r   rD   r)   )	rE   rF   rG   rH   rI   r   r   r   r   r>   r>   rM   test_parallel_regression  s"   




r   c                  C   sD   t jt j} }d||dk< ddd}ttt |dd| | d S )Nr1   r:   )r1   r:   )r=   Zestimator__CZroc_auc)Zscoring)r?   r@   rA   r   r   r   rC   )r^   r_   
parametersr>   r>   rM   test_gridsearch  s   
 r   c                  C   s,  t d} ttjtj| d\}}}}td ddd||}t|jt	s$J tt	 ddd||}t|jt	s8J tt
 ddd||}t|jt
sLJ ttjtj| d\}}}}td ddd||}t|jtslJ tt ddd||}t|jtsJ tt ddd||}t|jtsJ d S )Nr   r-   r   r   )r(   r   r?   r@   rA   r   rC   
isinstanceZ
estimator_r&   r   r   r   r'   r    r   r>   r>   rM   test_estimator  s6   

r   c                  C   sL   t ttddt dd} | tjtj t| d j	d d j
ts$J d S )Nr1   )kr:   )r5   r   )r   r   r   r&   rC   r?   r@   rA   r   stepsr.   intr<   r>   r>   rM   test_bagging_with_pipelineH  s
   "r   c                   @   r   )DummyZeroEstimatorc                 C   s   t || _| S rW   )r   r   classes_r\   r>   r>   rM   rC   Q  s   zDummyZeroEstimator.fitc                 C   s   | j tj|jd td S )Nr   )dtype)r   r   Zzerosr   r   r   r>   r>   rM   rD   U  s   zDummyZeroEstimator.predictNr   r>   r>   r>   rM   r   P  r   r   c                  C   s   t t } td}| tjtjtj t	t
 | jtjtj|jdtjjd dd W d    d S 1 s9w   Y  d S )Nr   
   )size)sample_weight)r   r   r(   rC   r?   r@   rA   rD   r   raises
ValueErrorrandintr   )r<   rE   r>   r>   rM   1test_bagging_sample_weight_unsupported_but_passedY  s   
"r   *   c                 C   s   t ddd\}}d }dD ]"}|d u rt|| dd}n|j|d ||| t||ks.J qtd| d	d}||| td
d |D tdd |D ksPJ d S )Nr8   r1   Z	n_samplesr.   )r   r   T)r=   r.   
warm_startr=   r   Fc                 S   rn   r>   r-   rq   treer>   r>   rM   rs   {  rt   z#test_warm_start.<locals>.<listcomp>c                 S   rn   r>   r-   r   r>   r>   rM   rs   |  rt   )r   r   r   rC   r   r   )r.   r^   r_   clf_wsr=   Z	clf_no_wsr>   r>   rM   test_warm_startf  s"   r   c                  C   sp   t ddd\} }tddd}|| | |jdd tt || | W d    d S 1 s1w   Y  d S )	Nr8   r1   r   r   T)r=   r   r2   r   )r   r   rC   r   r   r   r   r^   r_   r   r>   r>   rM   $test_warm_start_smaller_n_estimators  s   "r   c            	      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| ||}|d
7 }d}tjt|d ||| W d    n1 sCw   Y  t||| d S )Nr8   r1   r   +   r-   r   TS   r=   r   r.   r0   z;Warm-start fitting without increasing n_estimators does notr   )	r   r   r   rC   rD   r   r   r   r*   )	r^   r_   rF   rG   rH   rI   r   Zy_predr   r>   r>   rM   "test_warm_start_equal_n_estimators  s   
r   c            
      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| |jd
d ||| ||}td
ddd	}||| ||}	t||	 d S )Nr8   r1   r   r   r-   r   TiE  r   r   r   F)r   r   r   rC   r   rD   r)   )
r^   r_   rF   rG   rH   rI   r   r   r   r   r>   r>   rM   test_warm_start_equivalence  s   

r   c                  C   sZ   t ddd\} }tdddd}tt || | W d    d S 1 s&w   Y  d S )Nr8   r1   r   r   T)r=   r   r   )r   r   r   r   r   rC   r   r>   r>   rM   $test_warm_start_with_oob_score_fails  s
   "r   c                  C   s~   t ddd\} }tddd}|| | |jdddd	 || | tt t|d
 W d    d S 1 s8w   Y  d S )Nr   r1   r   r   T)r=   r   Fr   )r   r   r=   r   )r   r   rC   r   r   r   AttributeErrorrz   r   r>   r>   rM   $test_oob_score_removed_on_warm_start  s   "r   c                  C   sH   t ddd\} }tt ddddd}|| |j|| |jks"J d S )N   r1   r   r/   T)r4   r5   r   r.   )r   r   r   rC   r   r^   r_   baggingr>   r>   rM   test_oob_score_consistency  s   $r   c                  C   s   t ddd\} }tt ddddd}|| | |j}|j}|j}t|t|ks+J t|d t| d ks9J |d jj	d	ksCJ d}|| }|| }|| }	| | d d |f }
|| }|	j
}|	|
| |	j
}t|| d S )
Nr   r1   r   r/   F)r4   r5   r.   r6   r   r:   rr   )r   r   r   rC   estimators_samples_r   r{   r   r   kindcoef_r)   )r^   r_   r   Zestimators_samplesZestimators_featuresrK   Zestimator_indexZestimator_samplesZestimator_featuresr<   rF   rH   Z
orig_coefsZ	new_coefsr>   r>   rM   test_estimators_samples  s2   r   c                  C   s   t  } | j| j}}ttddt }t|ddd}||| |jd j	d d j
 }|jd }|jd }|jd }|| d d |f }	|| }
||	|
 t|j	d d j
| d S )Nr:   )Zn_componentsr/   r   )r<   r4   r.   r   r1   )r   r@   rA   r   r   r   r   rC   r{   r   r   copyr   r   r*   )r?   r^   r_   Zbase_pipeliner   Zpipeline_estimator_coefr<   Zestimator_sampleZestimator_featurerF   rH   r>   r>   rM   %test_estimators_samples_deterministic  s   


r   c                  C   sH   d} t d|  dd\}}tt | ddd}||| |j| ks"J d S )Nr   r:   r1   r   r/   )r4   r5   r.   )r   r   r   rC   Z_max_samples)r4   r^   r_   r   r>   r>   rM   test_max_samples_consistency  s   r   c                  C   s   d} dgdgdggd }g dd }g dd }g dd }t d| d	||j}t d| d	||j}t d| d	||j}||g||gksIJ d S )
Nr   r   r   r1   )ABC)r   r   r1   )r   r1   r:   T)r   r.   )r   rC   r   )r.   r^   ZY1ZY2ZY3x1Zx2Zx3r>   r>   rM   !test_set_oob_score_label_encoding$  s$   


r   c                 C   s"   | j ddd} d| t|  < | S )NfloatT)r   r   )Zastyper   isfinite)r^   r>   r>   rM   replace>  s   r  c               	   C   sL  t g dg ddt jdgdt jdgdt j dgg} t g dt g dg dg dg dg dgg}|D ]k}t }ttt|}|| |	|  t
|}|| |	| }|j|jksbJ t }t|}tt || | W d    n1 sw   Y  t
|}tt || | W d    n1 sw   Y  q8d S )Nr1   r   r   r:   N   r:   r  )r:   r   r   r   r   )r:   r1   	   )r   r     )r   arraynaninfr'   r   r   r  rC   rD   r   r   r   r   r   )r^   Zy_valuesr_   Z	regressorpipelineZbagging_regressory_hatr>   r>   rM   *test_bagging_regressor_with_missing_inputsD  sH   


r  c               	   C   s4  t g dg ddt jdgdt jdgdt j dgg} t g d}t }ttt|}|| |	|  t
|}|| | |	| }|j|jksLJ ||  ||  t }t|}tt || | W d    n1 ssw   Y  t
|}tt || | W d    d S 1 sw   Y  d S )Nr  r  r:   r  )r   r  r  r  r  )r   r  r  r	  r&   r   r   r  rC   rD   r   r   rR   rQ   r   r   r   )r^   r_   Z
classifierr
  Zbagging_classifierr  r>   r>   rM   +test_bagging_classifier_with_missing_inputsm  s6   

	


"r  c                  C   sD   t ddgddgg} t ddg}tt ddd}|| | d S )Nr1   r:   r   r2   r   g333333?)r5   r.   )r   r  r   r   rC   r   r>   r>   rM   test_bagging_small_max_features  s   r  c                 C   sj   t j| }|dd}t d}G dd dt}t| ddd}||| t|j	d j
|jd  d S )N   r2   c                   @   s   e Zd ZdZdd ZdS )z8test_bagging_get_estimators_indices.<locals>.MyEstimatorz7An estimator which stores y indices information at fit.c                 S   s
   || _ d S rW   )_sample_indicesr\   r>   r>   rM   rC     s   
z<test_bagging_get_estimators_indices.<locals>.MyEstimator.fitN)rd   re   rf   rg   rC   r>   r>   r>   rM   MyEstimator  s    r  r1   r   )r<   r=   r.   )r   randomZRandomStateZrandnZaranger'   r   rC   r*   r{   r  r   )Zglobal_random_seedrE   r^   r_   r  r   r>   r>   rM   #test_bagging_get_estimators_indices  s   
r  zbagging, expected_allow_nanr1   r9   c                 C   s   |   jj|ks
J dS )z*Check that bagging inherits allow_nan tag.N)Z__sklearn_tags__Z
input_tags	allow_nan)r   Zexpected_allow_nanr>   r>   rM   test_bagging_allow_nan_tag  s   r  )Zenable_metadata_routingmodelr   )r<   r=   c                 C      |  tjtj dS )zAMake sure that metadata routing works with non-default estimator.NrC   r?   r@   rA   r  r>   r>   rM   "test_bagging_with_metadata_routing  s   r  zsub_estimator, caller, calleerD   rR   rQ   c                 C   s   t ddgddgddgg}g d}dgd}}t }| |d}d	| d
 }	t||	ddd t|d}
|
|| t|
|t ddgddgddgg||d t|sVJ |D ]}t|||||d qXdS )a  Test that metadata routing works in `BaggingClassifier` with dynamic selection of
    the sub-estimator's methods. Here we test only specific test cases, where
    sub-estimator methods are not present and are not tested with `ConsumingClassifier`
    (which possesses all the methods) in
    sklearn/tests/test_metaestimators_metadata_routing.py: `BaggingClassifier.predict()`
    dynamically routes to `predict` if the sub-estimator doesn't have `predict_proba`
    and `BaggingClassifier.predict_log_proba()` dynamically routes to `predict_proba` if
    the sub-estimator doesn't have `predict_log_proba`, or to `predict`, if it doesn't
    have it.
    r   r:   r1   r2   r  )r1   r:   r   a)registryset__requestT)r   metadatar   r   )r^   r   r  )objr~   parentr   r  N)r   r  r$   rz   r   rC   r   r%   )Zsub_estimatorZcallerZcalleer^   r_   r   r  r  r<   Zset_callee_requestr   r>   r>   rM   3test_metadata_routing_with_dynamic_method_selection  s0   

r"  c                 C   r  )z^Make sure that we still can use an estimator that does not implement the
    metadata routing.Nr  r  r>   r>   rM   -test_bagging_without_support_metadata_routing  s   r#  )r   )trg   	itertoolsr   r   r   numpyr   r   Zsklearnr   Zsklearn.baser   Zsklearn.datasetsr   r   r   Zsklearn.dummyr	   r
   Zsklearn.ensembler   r   r   r   r   r   r   r   Zsklearn.feature_selectionr   Zsklearn.linear_modelr   r   Zsklearn.model_selectionr   r   r   Zsklearn.neighborsr   r   Zsklearn.pipeliner   Zsklearn.preprocessingr   r   Zsklearn.random_projectionr   Zsklearn.svmr   r    Z%sklearn.tests.metadata_routing_commonr!   r"   r#   r$   r%   Zsklearn.treer&   r'   Zsklearn.utilsr(   Zsklearn.utils._testingr)   r*   Zsklearn.utils.fixesr+   r,   rE   r?   ZpermutationrA   r   permr@   r   rN   markZparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r"  r#  r>   r>   r>   rM   <module>   s   (
!


)
8	*#%$),	

() 

		

*
