o
    iy                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlZddlZddlZddlZddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlm Z m!Z!m"Z" dZ#dZ$dZ%G dd dZ&eeddZdd Z'ej()ddddidddfdddddddfdddidddfdd dddddfd!dd!id"d#dfd!d$ddd"d#dfd%dd%id&d'd(fd)dd)id*d+dfd)d,d-id*d+dfd.dd.id#d/dfd.d,d0id#d/dfd1dd1id2d&dfgej()d3d4d5gej()d6dd7gd8d9 Z*ej()ddddidddfdddddddfdddidddfdd dddddfd!dd!id"d#dfd!d$ddd"d#dfd%dd%id&d'd(fd)dd)id*d+dfd)d,d-id*d+dfd.dd.id#d/dfd.d,d0id#d/dfgej()d3d4d5gd:d; Z+ej()dg d<d=d> Z,ej()d3d4d5gd?d@ Z-ej()d3d4d5gdAdB Z.ej()d3d4d5gej()dCdDdDdEggdFdG Z/ej()dg dHej()d3d4d5gdIdJ Z0ej()dg dKej()d3d4d5gdLdM Z1dNdO Z2ej3dPdQdRdS Z4ej3dPdQdTdU Z5ej()dVg dWej()d6dd7gdXdY Z6ej()dZd3d[id\fd]d[id^fgd_d` Z7ej()daddbdcdbdbdcd7d5dcd7dbdcgddde Z8ej(9dfej()dZd3d5idgfd]didhfd5ddidhfgdjdk Z:ej(9dfej()dldmdngdodp Z;dqdr Z<ej()d6dd7gdsdt Z=ej()d6dd7gdudv Z>ej()d6dd7gej()d3d4d5gdwdx Z?ej()d6dd7gej()dyddzidd{dd|gd}d~ Z@ej()d6dd7gej()ddzd,d{ieAdfddddgdeAdfd1d1d7deAdfdddddeAdfdddd7deAdfddddeBdfddddgdeBdfgej()d3d4d5gdd ZCej()ddddd|eAdfdddeAdfdddd|eAdfi eAdfgdd ZDej()d6dd7gdd ZEej()d6dd7gdd ZFej()d6dd7gdd ZGdd ZHej()d6dd7gdd ZIej()ddd7gdd ZJdd ZKdd ZLej()d6dd7gdd ZMej()dg ddd ZNdd ZOej()d6dd7gej()d3ddd ZPdd ZQdd ZRdd ZSdS )zTest the openml loader.    N)partial)	resources)BytesIO)	HTTPError)config_context)fetch_openml)_get_local_path_open_openml_url_retry_with_clean_cache)Bunch)check_pandas_support)SkipTestassert_allcloseassert_array_equalz"sklearn.datasets.tests.data.openmlTzdata/v1/download/{}c                   @   sF   e Zd Zdd ZdddZdd Zdd	 Zd
d Zdd Zdd Z	dS )_MockHTTPResponsec                 C   s   || _ || _d S N)datais_gzip)selfr   r    r   q/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/datasets/tests/test_openml.py__init__'   s   
z_MockHTTPResponse.__init__c                 C   s   | j |S r   )r   read)r   amtr   r   r   r   +   s   z_MockHTTPResponse.readc                 C   s   | j   d S r   )r   closer   r   r   r   r   .   s   z_MockHTTPResponse.closec                 C   s   | j rddiS i S )NzContent-Encodinggzipr   r   r   r   r   info1   s   z_MockHTTPResponse.infoc                 C   s
   t | jS r   )iterr   r   r   r   r   __iter__6   s   
z_MockHTTPResponse.__iter__c                 C   s   | S r   r   r   r   r   r   	__enter__9      z_MockHTTPResponse.__enter__c                 C   s   dS )NFr   )r   exc_typeexc_valexc_tbr   r   r   __exit__<   r#   z_MockHTTPResponse.__exit__N)r   )
__name__
__module____qualname__r   r   r   r   r!   r"   r'   r   r   r   r   r   &   s    
r   )	data_homec                    s   d
ddddt j	td d|  fdd	  	fd
d
fddfddfdd 	fdd
fdd}tr]| tjjd| d S d S )Nz(https://api.openml.org/api/v1/json/data/z1https://api.openml.org/api/v1/json/data/features/z'https://www.openml.org/data/v1/downloadz-https://api.openml.org/api/v1/json/data/list/z.gz.id_c                    s~   t dd| tdd  |   }|dddddd	d
dddddddddddddddS )Nz\W-zhttps://api.openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resublenreplace)urlsuffixoutput)path_suffixr   r   
_file_nameU   s$   
z4_monkey_patch_webbased_functions.<locals>._file_namec           	         s   |  |sJ |d|  | |}t| }|d.}|r6r6t| }t|dW  d    S |d}t| }t|dW  d    S 1 sPw   Y  d S )N does not match rbTF)
startswithr   filesopenr   r   r   )	r3   has_gzip_headerexpected_prefixr4   data_file_namedata_file_pathffpdecompressed_f)r7   data_modulegzip_responseread_fnr   r   _mock_urlopen_sharedk   s   

$z>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedc                        | |ddS N.jsonr3   r=   r>   r4   r   r3   r=   )rG   url_prefix_data_descriptionr   r   _mock_urlopen_data_description|      zH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_descriptionc                    rH   rI   r   rL   )rG   url_prefix_data_featuresr   r   _mock_urlopen_data_features   rO   zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_featuresc                    s    |  ddd } ||ddS )N/   r   z.arffrK   )rsplit)r3   r=   Zurl_without_filename)rG   url_prefix_download_datar   r   _mock_urlopen_download_data   s   	zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_datac           	         s  |  sJ d|  | d}t| }|d}|d}| d}t|}W d    n1 s;w   Y  d|v rNtd ddd t	 d|d,}|rht	| }t
|d	W  d    S |d}t	| }t
|d
W  d    S 1 sw   Y  d S )Nr8   rJ   r9   zutf-8error  Simulated mock errorr3   codemsghdrsrB   TF)r:   r   r;   r<   r   decodejsonloadsr   r   r   )	r3   r=   r?   r@   rA   rC   Z	decoded_sZ	json_datarB   )r7   rD   rF   url_prefix_data_listr   r   _mock_urlopen_data_list   s.   


$zA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_listc                    sr   |   }| ddk}|r||S |r||S |r)||S |r3 ||S td| )NzAccept-encodingr   zUnknown mocking URL pattern: %s)get_full_url
get_headerr:   
ValueError)requestargskwargsr3   r=   )rN   rQ   rb   rV   rM   rP   ra   rU   r   r   _mock_urlopen   s   







z7_monkey_patch_webbased_functions.<locals>._mock_urlopenurlopen)r   r<   OPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextdata_idrE   ri   r   )r7   rN   rQ   rb   rV   rG   rD   rE   r6   rF   rM   rP   ra   rU   r    _monkey_patch_webbased_functionsG   s"   rs   z9data_id, dataset_params, n_samples, n_features, n_targets=   rr         rS   iris)nameversion      &   Zanneal1        cpu鍞     H      _  
      rx   zadult-census  M   ZMiceProtein  i  parser	liac-arffpandasrE   Fc           
      C   s  t d}t| ||d td	dd|d|}	t|	jd |ks"J t|	ts)J t|	j|j	s2J |	jj
||| fks>J t|	j|j	sGJ |	jj
||fksQJ |dkrht|	j|js^J |	jj
|fksgJ nt|	j|j	sqJ |	jj
||fks{J |	jdu sJ dS )
zCheck the behaviour of `fetch_openml` with `as_frame=True`.

    Fetch by ID and/or name (depending if the file was previously cached).
    r   rE   TFas_framecacher   idrS   Nr   )pytestimportorskiprs   r   intdetails
isinstancer   frame	DataFrameshaper   targetSeries
categories)
monkeypatchrr   dataset_params	n_samples
n_features	n_targetsr   rE   pdbunchr   r   r   test_fetch_openml_as_frame_true   s*   
(r   c                 C   s   t d t| |dd td	dd|d|}t|jd |ks"J t|ts)J |jdu s0J t|j	t
js9J |j	j||fksCJ t|jt
jsLJ |dkrZ|jj|fksYJ n
|jj||fksdJ t|jtslJ dS )
znCheck the behaviour of `fetch_openml` with `as_frame=False`.

    Fetch both by ID and/or name + version.
    r   Tr   Fr   r   NrS   r   )r   r   rs   r   r   r   r   r   r   r   npZndarrayr   r   r   dict)r   rr   r   r   r   r   r   r   r   r   r    test_fetch_openml_as_frame_false  s&   
$r   )rt   r   r   c           
         s   t dt| |dd t|dddd}t|dddd}|j|j}  fdd}||}j|  |j|j}j|j	   fd	d
}||}	j|	 dS )z:Check the consistency of the LIAC-ARFF and pandas parsers.r   Tr   Fr   rr   r   r   r   c                    s(    | j  }jj|r| |jS | S r   )rx   apitypesis_numeric_dtypeastypedtypeZseriesZpandas_series)data_pandasr   r   r   convert_numerical_dtypesk  s   
zFtest_fetch_openml_consistency_parser.<locals>.convert_numerical_dtypesc                    sF    | j  }jj|r| |jS t|jjr!| j	|jj
S | S r   )rx   r   r   r   r   r   r   CategoricalDtypecatZrename_categoriesr   r   )frame_pandasr   r   r   (convert_numerical_and_categorical_dtypes  s   
zVtest_fetch_openml_consistency_parser.<locals>.convert_numerical_and_categorical_dtypesN)
r   r   rs   r   r   applytestingassert_frame_equalr   feature_names)
r   rr   Z
bunch_liacbunch_pandasZ	data_liacr   Zdata_liac_with_fixed_dtypesZ
frame_liacr   Zframe_liac_with_fixed_dtypesr   )r   r   r   r   $test_fetch_openml_consistency_parserS  s2   


r   c                 C   s\   t d d}t| |dd t|dd|d}t|dd|d}t|j|j t|j|j dS )z^Check the equivalence of the dataset when using `as_frame=False` and
    `as_frame=True`.
    r   rt   Tr   Fr   N)r   r   rs   r   r   r   r   r   )r   r   rr   Zbunch_as_frame_trueZbunch_as_frame_falser   r   r   -test_fetch_openml_equivalence_array_dataframe  s"   
r   c                 C   sn  t d}|jjj}d}d}d}d}|g d}tjgd }	g d}
d	}t| |d
 t|d
d|d}|j	}|j
}|j}t||jsCJ t|j|	ksMJ |j|ksTJ t|j|
ks^J t|j|
kshJ |j|gkspJ t||jsxJ |j|ksJ |j|ksJ |j|ksJ |jjsJ t||jsJ |j|ksJ t|j|	|g ksJ |jjsJ dS )z>Check fetching on a numerical only dataset with string labels.r   rt   ru   rv   )ru   )ru      )zIris-setosazIris-versicolorzIris-virginicarv   )sepallength
sepalwidthpetallength
petalwidthclassTFr   N)r   r   r   r   r   r   Zfloat64rs   r   r   r   r   r   r   alldtypesr   columnsr   Ztarget_namesr   r   rx   indexZ	is_unique)r   r   r   r   rr   Z
data_shapeZtarget_shapeZframe_shapeZtarget_dtypeZdata_dtypesZ
data_namestarget_namer   r   r   r   r   r   r   test_fetch_openml_iris_pandas  sJ   

r   target_columnr   r   c                 C   s   t d}d}t| |d t|dd||d}t|dd|d}|j|j|j t|trB|j	|j
j|| |jjdks@J d	S |j
j|ksJJ |jjdksRJ d	S )
z@Check that we can force the target to not be the default target.r   rt   TF)rr   r   r   r   r   r   )ru      r   N)r   r   rs   r   r   r   r   r   listZassert_index_equalr   r   Indexr   r   rx   )r   r   r   r   rr   Zbunch_forcing_targetZbunch_defaultr   r   r   !test_fetch_openml_forcing_targets  s0   

r   )rt   rz   r}   r   r   c                 C   s   t d}t| |dd t|ddd|d}t|ddd|d\}}|j|j| t||jr8|j	|j
| dS |j|j
| dS )z>Check the behaviour of `return_X_y=True` when `as_frame=True`.r   Tr   Frr   r   r   
return_X_yr   N)r   r   rs   r   r   r   r   r   r   assert_series_equalr   )r   rr   r   r   r   Xyr   r   r   .test_fetch_openml_equivalence_frame_return_X_y  s(   

r   )rt   r}   r   r   c                 C   s\   t d t| |dd t|ddd|d}t|ddd|d\}}t|j| t|j| dS )z?Check the behaviour of `return_X_y=True` when `as_frame=False`.r   Tr   Fr   N)r   r   rs   r   r   r   r   )r   rr   r   r   r   r   r   r   r   .test_fetch_openml_equivalence_array_return_X_y  s$   

r   c                 C   sf   t d d}t| |dd d}t||ddd}t||ddd}|jjjdks)J |jjd	ks1J d
S )z9Check the difference between liac-arff and pandas parser.r   r   Tr   Fr   r   rA   ON)r   r   rs   r   r   r   kind)r   rr   r   Zbunch_liac_arffr   r   r   r   $test_fetch_openml_difference_parsers6  s$   
r   module)scopec                   C   s0   g dg dg dg dg dg dg ddS )	z+Returns the columns names for each dataset.)r   r   r   r   r   )'familyzproduct-typeZsteelZcarbonZhardnesstemper_rolling	conditionformabilityZstrength
non-ageingsurface-finishzsurface-qualityenamelabilitybcbfbtbw%2Fmeblmchromphoscbondmarviexptlferrocorrblue%2Fbright%2Fvarn%2Fcleanlustrejurofmspr   Zthickwidthr1   oilZborepackingr   )vendorZMYCTZMMINZMMAXZCACHZCHMINZCHMAXr   )NZ Mean_Acc1298_Mean_Mem40_CentroidZMean_Acc1298_Mean_Mem40_RolloffZMean_Acc1298_Mean_Mem40_FluxZMean_Acc1298_Mean_Mem40_MFCC_0ZMean_Acc1298_Mean_Mem40_MFCC_1ZMean_Acc1298_Mean_Mem40_MFCC_2ZMean_Acc1298_Mean_Mem40_MFCC_3ZMean_Acc1298_Mean_Mem40_MFCC_4ZMean_Acc1298_Mean_Mem40_MFCC_5ZMean_Acc1298_Mean_Mem40_MFCC_6ZMean_Acc1298_Mean_Mem40_MFCC_7ZMean_Acc1298_Mean_Mem40_MFCC_8ZMean_Acc1298_Mean_Mem40_MFCC_9ZMean_Acc1298_Mean_Mem40_MFCC_10ZMean_Acc1298_Mean_Mem40_MFCC_11ZMean_Acc1298_Mean_Mem40_MFCC_12ZMean_Acc1298_Std_Mem40_CentroidZMean_Acc1298_Std_Mem40_RolloffZMean_Acc1298_Std_Mem40_FluxZMean_Acc1298_Std_Mem40_MFCC_0ZMean_Acc1298_Std_Mem40_MFCC_1ZMean_Acc1298_Std_Mem40_MFCC_2ZMean_Acc1298_Std_Mem40_MFCC_3ZMean_Acc1298_Std_Mem40_MFCC_4ZMean_Acc1298_Std_Mem40_MFCC_5ZMean_Acc1298_Std_Mem40_MFCC_6ZMean_Acc1298_Std_Mem40_MFCC_7ZMean_Acc1298_Std_Mem40_MFCC_8ZMean_Acc1298_Std_Mem40_MFCC_9ZMean_Acc1298_Std_Mem40_MFCC_10ZMean_Acc1298_Std_Mem40_MFCC_11ZMean_Acc1298_Std_Mem40_MFCC_12ZStd_Acc1298_Mean_Mem40_CentroidZStd_Acc1298_Mean_Mem40_RolloffZStd_Acc1298_Mean_Mem40_FluxZStd_Acc1298_Mean_Mem40_MFCC_0ZStd_Acc1298_Mean_Mem40_MFCC_1ZStd_Acc1298_Mean_Mem40_MFCC_2ZStd_Acc1298_Mean_Mem40_MFCC_3ZStd_Acc1298_Mean_Mem40_MFCC_4ZStd_Acc1298_Mean_Mem40_MFCC_5ZStd_Acc1298_Mean_Mem40_MFCC_6ZStd_Acc1298_Mean_Mem40_MFCC_7ZStd_Acc1298_Mean_Mem40_MFCC_8ZStd_Acc1298_Mean_Mem40_MFCC_9ZStd_Acc1298_Mean_Mem40_MFCC_10ZStd_Acc1298_Mean_Mem40_MFCC_11ZStd_Acc1298_Mean_Mem40_MFCC_12ZStd_Acc1298_Std_Mem40_CentroidZStd_Acc1298_Std_Mem40_RolloffZStd_Acc1298_Std_Mem40_FluxZStd_Acc1298_Std_Mem40_MFCC_0ZStd_Acc1298_Std_Mem40_MFCC_1ZStd_Acc1298_Std_Mem40_MFCC_2ZStd_Acc1298_Std_Mem40_MFCC_3ZStd_Acc1298_Std_Mem40_MFCC_4ZStd_Acc1298_Std_Mem40_MFCC_5ZStd_Acc1298_Std_Mem40_MFCC_6ZStd_Acc1298_Std_Mem40_MFCC_7ZStd_Acc1298_Std_Mem40_MFCC_8ZStd_Acc1298_Std_Mem40_MFCC_9ZStd_Acc1298_Std_Mem40_MFCC_10ZStd_Acc1298_Std_Mem40_MFCC_11ZStd_Acc1298_Std_Mem40_MFCC_12ZBH_LowPeakAmpZBH_LowPeakBPMZBH_HighPeakAmpZBH_HighPeakBPMZBH_HighLowRatioZBHSUM1ZBHSUM2ZBHSUM3zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)ageZ	workclasszfnlwgt:z
education:zeducation-num:zmarital-status:zoccupation:zrelationship:zrace:zsex:zcapital-gain:zcapital-loss:zhours-per-week:znative-country:r   )NZDYRK1A_NZITSN1_NZBDNF_NZNR1_NZNR2A_NZpAKT_NZpBRAF_NZ	pCAMKII_NZpCREB_NZpELK_NZpERK_NZpJNK_NZPKCA_NZpMEK_NZpNR1_NZpNR2A_NZpNR2B_NZpPKCAB_NZpRSK_NZAKT_NZBRAF_NZCAMKII_NZCREB_NZELK_NZERK_NZGSK3B_NZJNK_NZMEK_NZTRKA_NZRSK_NZAPP_NZ
Bcatenin_NZSOD1_NZMTOR_NZP38_NZpMTOR_NZDSCR1_NZAMPKA_NZNR2B_NZpNUMB_NZRAPTOR_NZTIAM1_NZpP70S6_NNUMB_NZP70S6_NZpGSK3B_NZpPKCG_NZCDK5_NZS6_NZADARB1_NZAcetylH3K9_NZRRP1_NZBAX_NZARC_NZERBB4_NZnNOS_NZTau_NZGFAP_NZGluR3_NZGluR4_NZIL1B_NZP3525_NZpCASP9_NZPSD95_NZSNCA_NZUbiquitin_NZpGSK3B_Tyr216_NZSHH_NZBAD_NBCL2_NZpS6_NZpCFOS_NZSYP_NZ	H3AcK18_NZEGR1_NZH3MeK4_NZCaNA_Nr   )ZpclassZsurvivedrx   sexr   ZsibspZparchticketfarecabinembarkedboatbody	home.destrt   rz   r}   r   r   r   r   r   r   r   r   r   datasets_column_namesU  s   )PP r  c                   C   s   i i ddddddddd	d
dddddddd
dddddddddddddd
ddddddddddd
dd
i i i ddiddddddd d!d"S )#Nr   r{   r   	   r   rz   r   rv   r   r   r   r   r   r   r   r      r   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   r   r   i  rS   i  i7  i  i4  )r   r   r   r   r   r  r  r  r   r   r   r   r   datasets_missing_valuesH  sx   	
r  zJdata_id, parser, expected_n_categories, expected_n_floats, expected_n_ints))rt   r   rS   rv   r   )rt   r   rS   rv   r   )rz   r   !   r   r   )rz   r   r  rz   rv   )r}   r   rS   r   r   )r}   r   rS   r   r   )r   r   r   r   r   )r   r   r   E   r   )r   r   r  r   r   )r   r   r  r   r   )r   r   rS   r   r   )r   r   rS   r   r   )r   r   r   r   r   )r   r   r   r   r   c	                    s   t d}	|	jjj t| ||d t|dd|d}
|
j}t fdd|j	D }tdd |j	D }td	d |j	D }||ksBJ ||ksHJ ||ksNJ |j
 || ksYJ |   }| D ]\}}|| |d
}||kswJ qedS )zYCheck that `fetch_openml` infer the right number of categories, integers, and
    floats.r   r   TFr   c                    s   g | ]	}t | r|qS r   )r   .0r   r   r   r   
<listcomp>      z5test_fetch_openml_types_inference.<locals>.<listcomp>c                 S      g | ]	}|j d kr|qS )rA   r   r
  r   r   r   r    r  c                 S   r  )ir  r
  r   r   r   r    r  r   N)r   r   r   r   r   rs   r   r   r1   r   r   tolistisnasumto_dictitemsget)r   rr   r   Zexpected_n_categoriesZexpected_n_floatsZexpected_n_intsrE   r  r  r   r   r   Zn_categoriesZn_floatsZn_intsZframe_feature_to_n_nanrx   Z	n_missingZexpected_missingr   r  r   !test_fetch_openml_types_inferencey  s0   
(
r  zparams, err_msgunknownz:The 'parser' parameter of fetch_openml must be a str amongr   z<The 'as_frame' parameter of fetch_openml must be an instancec                 C   sV   d}t | |d tjt|d tdd|i| W d    d S 1 s$w   Y  d S )Nr   Tmatchrr   r   )rs   r   raisesre   r   r   paramserr_msgrr   r   r   r   &test_fetch_openml_validation_parameter  s
   "r   r  auto)r   r   c                 C   s   d}z	t d W td ty?   t| |d d}tjt|d td	d|i| W d   Y dS 1 s7w   Y  Y dS w )
z=Check that we raise the proper errors when we require pandas.r   Z!test_fetch_openml_requires_pandasTz:requires pandas to be installed. Alternatively, explicitlyr  rr   Nz.This test requires pandas to not be installed.r   )r   ImportErrorrs   r   r  r   r   )r   r  rr   r  r   r   r   'test_fetch_openml_requires_pandas_error  s   
&r#  z2ignore:Version 1 of dataset Australian is inactivez:Sparse ARFF datasets cannot be loaded with parser='pandas'z9Sparse ARFF datasets cannot be loaded with as_frame=True.)r   r   c                 C   sb   t d d}t| |d t jt|d td|dd| W d   dS 1 s*w   Y  dS )	ztCheck that we raise the expected error for sparse ARFF datasets and
    a wrong set of incompatible parameters.
    r   $  Tr  F)rr   r   Nr   )r   r   rs   r  re   r   r  r   r   r   #test_fetch_openml_sparse_arff_error  s   
"r%  zdata_id, data_type)rt   	dataframe)r$  sparsec                 C   sN   t d}t| |d t|ddd}|dkr|jntjj}t|j	|s%J dS )z&Check the auto mode of `fetch_openml`.r   Tr!  F)rr   r   r   r&  N)
r   r   rs   r   r   scipyr'  Z
csr_matrixr   r   )r   rr   Z	data_typer   r   klassr   r   r   test_fetch_openml_auto_mode  s
   

r*  c              	   C   s   t d d}t| |d d}t jt|d. tdd t|ddd	d
 W d   n1 s/w   Y  W d   dS W d   dS 1 sGw   Y  dS )z[Check that we raise a warning regarding the working memory when using
    LIAC-ARFF parser.r   r   Tz*Could not adhere to working_memory config.r  gư>)Zworking_memoryFr   r   N)r   r   rs   warnsUserWarningr   r   )r   rr   r\   r   r   r   :test_convert_arff_data_dataframe_warning_low_memory_pandas#  s    
"r-  c                 C   sb   d}d}t | || td}tjt|d t|dddd W d   dS 1 s*w   Y  dS )	z\Check that a warning is raised when multiple versions exist and no version is
    requested.rt   rw   a;  Multiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1. Available versions:
- version 1, status: active
  url: https://www.openml.org/search?type=data&id=61
- version 3, status: active
  url: https://www.openml.org/search?type=data&id=969
r  Fr   )rx   r   r   r   N)rs   r/   escaper   r+  r,  r   )r   rE   rr   Z	data_namer\   r   r   r   ,test_fetch_openml_iris_warn_multiple_version6  s   	"r/  c                 C   sT   d}d}d}d}t | || t||dddd}|jj||fks!J |jdu s(J dS )z/Check that we can get a dataset without target.rt   Nru   r   Fr   rr   r   r   r   r   )rs   r   r   r   r   )r   rE   rr   r   Zexpected_observationsZexpected_featuresr   r   r   r   test_fetch_openml_no_targetQ  s   r1  c                 C   sb   t d d}t| ||d t|dd|d}|jjd }|jd   s'J t|j	g d d	S )
zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  r   FTrr   r   r   r   r   )ZFEMALEZMALE_N)
r   r   rs   r   r   r   r  anyr   r   )r   rE   r   rr   ZpenguinsZ	cat_dtyper   r   r   test_missing_values_pandase  s   
r5  r     glass2)rr   rx   ry   c                 C   s~   d}t | || d}tjt|d tddddd|}W d   n1 s'w   Y  |jjdks4J |jd	 d
ks=J dS )z;Check that we raise a warning when the dataset is inactive.r6  z(Version 1 of dataset glass2 is inactive,r  Fr   )r   r   r   N)   r  r   Z40675r   )rs   r   r+  r,  r   r   r   r   )r   rE   r   rr   r\   r7  r   r   r   test_fetch_openml_inactive{  s   
r9  z"data_id, params, err_type, err_msgzNo active dataset glass2 foundr   r   )rr   r   z1Can only handle homogeneous multi-target datasets)rr   r   zOSTRING attributes are not supported for array representation. Try as_frame=Truer   )rr   r   r   zTarget column 'family'	undefinedz(Could not find target_column='undefined'c                 C   sr   t | || |dds|dkrtd tj||d tdd|d| W d    d S 1 s2w   Y  d S )Nr   Tr   r  F)r   r   r   )rs   r  r   r   r  r   )r   rE   rr   r  err_typer  r   r   r   r   test_fetch_openml_error  s   2
"r<  zparams, err_type, err_msgr   ry   zCThe 'version' parameter of fetch_openml must be an int in the rangeZnAmE)rr   rx   zCThe 'data_id' parameter of fetch_openml must be an int in the rangez6The 'version' parameter of fetch_openml must be an intzFNeither name nor data_id are provided. Please provide name or data_id.c                 C   sB   t j||d tdi |  W d    d S 1 sw   Y  d S )Nr  r   )r   r  r   )r  r;  r  r   r   r   )test_fetch_openml_raises_illegal_argument  s   "r=  c                 C   s^  d}d}d}t | || d}||}tjt|d t||dddd W d    n1 s.w   Y  d	}||}tjt|d t||dddd W d    n1 sUw   Y  d}||}tjt|d t||d
gdddd W d    n1 s~w   Y  d	}||}tjt|d t||d
gdddd W d    d S 1 sw   Y  d S )Nr   z.target_column='{}' has flag is_row_identifier.z&target_column='{}' has flag is_ignore.ZMouseIDr  Fr   r0  ZGenotyper   )rs   formatr   r+  r,  r   )r   rE   rr   Zexpected_row_id_msgZexpected_ignore_msgZ
target_colr\   r   r   r   test_warn_ignore_attribute  s`   

	

"r?  c                 C   X   d}t | || d}tjt|d t|dddd W d    d S 1 s%w   Y  d S )NrS   zJOpenML registered a problem with the dataset. It might be unusable. Error:r  Fr   r2  rs   r   r+  r,  r   r   rE   rr   r\   r   r   r   test_dataset_with_openml_error     "rC  c                 C   r@  )Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:r  Fr   r2  rA  rB  r   r   r    test_dataset_with_openml_warning"  rD  rE  c                 C   s   t d d}t| |dd |dddd}tdi |}tdi |dddii}td	d
 |jd jjD s9J tdd
 |jd jjD rIJ dS )zACheck that we can overwrite the default parameters of `read_csv`.r   6  Frr   rE   Tr   Zread_csv_kwargsskipinitialspacec                 s       | ]}| d V  qdS  Nr:   r  r   r   r   r   	<genexpr>>  s    

zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>r   c                 s   rI  rJ  rL  rM  r   r   r   rN  A  s
    
Nr   )	r   r   rs   r   r   r   r   r   r4  )r   rr   common_paramsZadult_without_spacesZadult_with_spacesr   r   r   3test_fetch_openml_overwrite_default_params_read_csv+  s(   
	rP  c           
      C   s|   d}t | || t|d }d| }t|d}t||}t||}tj	|s-J t||}	|
 |	
 ks<J d S )Nrt   /filename.arffhttps://www.openml.org/scikit_learn_data)rs   _MONKEY_PATCH_LOCAL_OPENML_PATHr>  strmkdirr	   r   ospathisfiler   )
r   rE   tmpdirrr   openml_pathr3   cache_directoryZ	response1locationZ	response2r   r   r   test_open_openml_url_cacheK  s   



r^  write_to_diskc                    s   d}t |d }d| }t|d}t||  fdd}| tjjd| t	j
tdd	 t|| W d    n1 sAw   Y  tj rNJ d S )
Nrt   rQ  rR  rS  c                    sF   rt  d}|d W d    td1 sw   Y  td)Nw Invalid request)r<   writere   )rf   rg   rh   rA   r]  r_  r   r   ri   e  s   
z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenrj   rb  r  )rT  r>  rU  rV  r   rm   rn   ro   rp   r   r  re   r	   rW  rX  exists)r   rZ  r_  rr   r[  r3   r\  ri   r   rd  r   'test_open_openml_url_unlinks_local_path]  s   

rf  c                    s   d}t |}t| d}t|| ttj  t	 d}|
d W d    n1 s1w   Y  t|| fdd}d}tjt|d | }W d    n1 sXw   Y  |d	kscJ d S )
Nrt   rS  r`  ra  c                      s   t j r
tddS )NzFile exist!rS   )rW  rX  re  	Exceptionr   r]  r   r   
_load_data}  s   z/test_retry_with_clean_cache.<locals>._load_dataz!Invalid cache, redownloading filer  rS   )rT  r>  rU  rV  r   rW  makedirsrX  dirnamer<   rc  r
   r   r+  RuntimeWarning)rZ  rr   r[  r\  rA   ri  Zwarn_msgresultr   rh  r   test_retry_with_clean_caches  s   

rn  c                 C   sl   d}t |}t| d}t||dd }d}tjt|d |  W d    d S 1 s/w   Y  d S )Nrt   rS  c                   S      t d ddd t d)NrX   rY   rZ   r   r   r   r   r   r   ri    s   z:test_retry_with_clean_cache_http_error.<locals>._load_datarY   r  )rT  r>  rU  rV  r
   r   r  r   )rZ  rr   r[  r\  ri  	error_msgr   r   r   &test_retry_with_clean_cache_http_error  s   

"rr  c           
      C   s   dd }d}t |d}t| || t|d|dddd\}}| tjjd	| t|d|dddd\}}	tj	
|| tj	
||	 d S )
Nc                 _   s   t d|   )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)re   rc   rf   rg   rh   r   r   r   _mock_urlopen_raise  s
   z4test_fetch_openml_cache.<locals>._mock_urlopen_raisert   rS  TFr   )rr   r   r+   r   r   r   rj   )rU  rV  rs   r   rm   rn   ro   rp   r   r   r   )
r   rE   rZ  rt  rr   r\  Z	X_fetchedZ	y_fetchedZX_cachedZy_cachedr   r   r   test_fetch_openml_cache  s.   
	
ru  zas_frame, parser))Tr   )Fr   )Tr   )Fr   c                    sT  |s|dkrt d d}t| |d td d|  }d}t|| }|d  |d}t|d}	t|		 }
d	|
t
|
d
 < W d   n1 sMw   Y  t d}||
 W d   n1 shw   Y  tjjj fdd}| tjjd| t t}tjj|d||d W d   n1 sw   Y  |dsJ dS )z/Check that the checksum is working as expected.r   rz   Tr,   r-   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffr9   %   rS   Nwbc                    s\   |   }|dr*t d}| }W d    n1 sw   Y  tt|ddS | S )Nz$data/v1/download/1666876/anneal.arffr9   Tr   )rc   endswithr<   r   r   r   )rf   rg   rh   r3   rA   Zcorrupted_dataZcorrupt_copy_pathZmocked_openml_urlr   r   swap_file_mock  s   

z9test_fetch_openml_verify_checksum.<locals>.swap_file_mockrj   Fr2  Z1666876)r   r   rs   rk   r   r;   r<   r   	bytearrayr   r1   GzipFilerc  rn   ro   rp   rj   rm   r  re   r   r  )r   r   rZ  r   rr   Zoriginal_data_moduleZoriginal_data_file_nameZoriginal_data_pathZ	orig_fileZ	orig_gzipr   Zmodified_gziprz  excr   ry  r   !test_fetch_openml_verify_checksum  s2   

	r~  c              	   C   s   dd }|  tjjd| d}tjttd| dd4}tj	t
dd}t|d d	d
 W d    n1 s7w   Y  t|dksDJ |j  W d    d S 1 sTw   Y  d S )Nc                 _   ro  )Ni  Simulated network errorrZ   rp  rs  r   r   r   _mock_urlopen_network_error  s   zPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_errorrj   z"https://api.openml.org/invalid-urlz+A network error occurred while downloading z. Retrying...r  r  r   )delayr   )rm   rn   ro   rp   r   r+  r,  r/   r.  r  r   r	   r1   valuer   )r   r  Zinvalid_openml_urlrecordexc_infor   r   r   +test_open_openml_url_retry_on_network_error  s(   
"r  )r   r   c                 C   sh   |dkr	t d d}t| || tjj|dd|d}|dus!J |d jdks*J d|d	 vs2J dS )
zCheck that we can load the "zoo" dataset.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14340
    r   >   Fr2  Nr   )e      Zanimalr   )r   r   rs   rn   ro   r   r   )r   rE   r   rr   Zdatasetr   r   r   &test_fetch_openml_with_ignored_feature  s   
r  c                 C   s  t d}d}t| |dd dd|d}tdddi|}tdddi|}|j|j|j |jjd		 r:J |jj
d		 rEJ tddd
d|}tddd
d|}|j|jd |jd  |jd jd		 rsJ |jd j
d		 rJ dS )zCheck that we strip the single quotes when used as a string delimiter.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23381
    r   r   FrG  Tr   r   rr   r   r   'r   )r   r   r   Nr   )r   r   rs   r   r   r   r   rU  r:   r4  rx  r   )r   r   rr   rO  Zmice_pandasZmice_liac_arffr   r   r   test_fetch_openml_strip_quotes(  s(   
r  c                 C   sj   t d}d}t| |dd dd|d}tdddi|}tdddi|}|j|jd	 |jd	  d
S )zCheck that we can strip leading whitespace in pandas parser.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25311
    r   rF  FrG  Tr  r   r   r   Nr   )r   r   rs   r   r   r   r   r   r   rr   rO  Zadult_pandasZadult_liac_arffr   r   r   $test_fetch_openml_leading_whitespaceE  s   
r  c                 C   sb   t d}d}t| |dd dd|d}td
ddi|}td
ddi|}|j|j|j d	S )zCheck that we can handle escapechar and single/double quotechar.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25478
    r   iZ  FrG  Tr  r   r   Nr   )r   r   rs   r   r   r   r   r  r   r   r   &test_fetch_openml_quotechar_escapecharW  s   
r  )T__doc__r   r_   rW  r/   	functoolsr   	importlibr   ior   urllib.errorr   numpyr   r   Zscipy.sparser(  rn   r   Zsklearn.datasetsr   Zfetch_openml_origZsklearn.datasets._openmlr   r	   r
   Zsklearn.utilsr   Z$sklearn.utils._optional_dependenciesr   Zsklearn.utils._testingr   r   r   rk   rl   rT  r   rs   markZparametrizer   r   r   r   r   r   r   r   r   Zfixturer  r  r  r   r#  filterwarningsr%  r*  r-  r/  r1  r5  r9  re   KeyErrorr<  r=  r?  rC  rE  rP  r^  rf  rn  rr  ru  r~  r  r  r  r  r  r   r   r   r   <module>   s    
+'
?

/

 
s
00

	








-




1

 


"
	.