o
    iY                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd d	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 G dd dZ2dd Z3ej4dddd Z5ej4dddd Z6ej4dd Z7ej4dd Z8ej9:ddee2gdd Z;dd  Z<d!d" Z=d#d$ Z>d%d& Z?ej9:d'd(gd(d)gfd*d+ Z@ej9:d,d-d.d/g d0fd1d2d3g d4fd5d6d7d8d9gfgd:d; ZAd<d= ZBej9:d>d?i d@dAgfdBi d@gfdCdDdEidFdGgfgdHdI ZCdJdK ZDdLdM ZEdNdO ZFdPdQ ZGej9:dRedSdTdUdVdWgfe%dXdYdZdVg fe!d[d\dZdVdWgfe"d]d]dZdVd^d_gfed`daddVg fedbdcdAdVg feedddedfdgdAdVg fgdhdi ZHej9:djeejIeJfeejIejIfeejIeJfe!ejIeJfe"ejIejIfe%ejIeJfgdkdl ZKdmdn ZLdodp ZMdqdr ZNdsdt ZOdudv ZPdwdx ZQdydz ZRd{d| ZSd}d~ ZTdd ZUdS )    N)partial)	resources)Path)dumpsloadsMock)	HTTPError)urlparse)clear_data_home
fetch_fileget_data_homeload_breast_cancerload_diabetesload_digits
load_files	load_irisload_linnerudload_sample_imageload_sample_images	load_wine)RemoteFileMetadata$_derive_folder_and_filename_from_url_fetch_remoteload_csv_dataload_gzip_compressed_csv_datacheck_as_frame)scale)Bunchc                   @   s    e Zd ZdZdd Zdd ZdS )
_DummyPathz8Minimal class that implements the os.PathLike interface.c                 C   s
   || _ d S Npath)selfr#    r%   o/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/datasets/tests/test_base.py__init__0   s   
z_DummyPath.__init__c                 C   s   | j S r!   r"   )r$   r%   r%   r&   
__fspath__3   s   z_DummyPath.__fspath__N)__name__
__module____qualname____doc__r'   r(   r%   r%   r%   r&   r    -   s    r    c                 C   s   t j| rt|  d S d S r!   )osr#   isdirshutilrmtreer"   r%   r%   r&   _remove_dir7   s   r1   module)scopec                 c   "    t | d}|V  t| d S )NZscikit_learn_data_home_teststrmktempr1   Ztmpdir_factoryZtmp_filer%   r%   r&   	data_home<      r9   c                 c   r4   )NZscikit_learn_load_files_testr5   r8   r%   r%   r&   load_files_rootC   r:   r;   c                 c   sD    t j| d}t j|dd}|d |  t|V  t| d S )NdirF)r=   delete   Hello World!
)tempfilemkdtempNamedTemporaryFilewritecloser6   r1   )r;   Ztest_category_dir1Zsample_filer%   r%   r&   test_category_dir_1J   s   

rE   c                 c   s$    t j| d}t|V  t| d S )Nr<   )r@   rA   r6   r1   )r;   Ztest_category_dir2r%   r%   r&   test_category_dir_2T   s   
rF   path_containerc                 C   s~   | d ur| |}t |d}||ksJ tj|sJ | d ur#| |}t|d tj|r0J t |d}tj|s=J d S )N)r9   )r   r-   r#   existsr   )rG   r9   r%   r%   r&   test_data_home[   s   


rI   c                 C   s>   t | }t|jdksJ t|jdksJ |jd u sJ d S )Nr   )r   len	filenamestarget_namesDESCR)r;   resr%   r%   r&   test_default_empty_load_fileso   s   rO   c                 C   sN   t |}t|jdksJ t|jdksJ |jd u sJ |jdgks%J d S )N      r?   )r   rJ   rK   rL   rM   datarE   rF   r;   rN   r%   r%   r&   test_default_load_filesv   s
   rT   c                 C   sp   t j| t j }t|d|gdd}t|jdksJ t|j	dks'J |j
dks.J |jdgks6J d S )Ntestutf-8)description
categoriesencodingrP   zHello World!
)r-   r#   abspathsplitseppopr   rJ   rK   rL   rM   rR   )rE   rF   r;   categoryrN   r%   r%   r&   .test_load_files_w_categories_desc_and_encoding~   s   
r_   c                 C   sT   t |dd}t|jdksJ t|jdksJ |jd u sJ |dd u s(J d S )NF)Zload_contentrP   rQ   rR   )r   rJ   rK   rL   rM   getrS   r%   r%   r&   test_load_files_wo_load_content   s
   ra   allowed_extensionsz.txtz.jsonc                    sn   | d    d}fdd|D }|D ]}|d qt|  d}t fdd|D t|jks5J dS )	z;Check the behaviour of `allowed_extension` in `load_files`.sub)z	file1.txtz
file2.jsonz
file3.jsonzfile4.mdc                    s   g | ]} | qS r%   r%   .0f)dr%   r&   
<listcomp>   s    z6test_load_files_allowed_extensions.<locals>.<listcomp>s   hellorb   c                    s   g | ]}|j  v rt|qS r%   )suffixr6   )re   pri   r%   r&   rh      s    N)mkdirwrite_bytesr   setrK   )Ztmp_pathrb   filespathsrk   rN   r%   )rb   rg   r&   "test_load_files_allowed_extensions   s   rq   zHfilename, expected_n_samples, expected_n_features, expected_target_nameszwine_data.csv      )Zclass_0Zclass_1Zclass_2iris.csv      )ZsetosaZ
versicolorZ	virginicazbreast_cancer.csv9     Z	malignantZbenignc                 C   sV   t | \}}}|jd |ksJ |jd |ksJ |jd |ks"J tj|| d S )Nr   rP   )r   shapenptestingassert_array_equal)filenameZexpected_n_samplesZexpected_n_featuresZexpected_target_namesactual_dataZactual_targetZactual_target_namesr%   r%   r&   test_load_csv_data   s
   r   c                  C   s   d} d}t | d}t | |d}t|dksJ t|dksJ tj|d |d  tj|d |d  tj|d	 |d	  |d
 dsIJ d S )Nrt   ziris.rstdata_file_namer   descr_file_namerv      r   rP   rQ   z.. _iris_dataset:)r   rJ   rz   r{   r|   
startswith)r   r   Zres_without_descrZres_with_descrr%   r%   r&   test_load_csv_data_with_descr   s   
r   z filename, kwargs, expected_shapezdiabetes_data_raw.csv.gz  
   diabetes_target.csv.gzzdigits.csv.gz	delimiter,  A   c                 C   s&   t | fi |}|jt|ksJ d S r!   )r   ry   tuple)r}   kwargsZexpected_shaper~   r%   r%   r&   "test_load_gzip_compressed_csv_data   s   	r   c                  C   sB   d} d}t | d}t | |d\}}tj|| |dsJ d S )Nr   zdiabetes.rstr   r   z.. _diabetes_dataset:)r   rz   r{   r|   r   )r   r   Zexpected_datar~   descrr%   r%   r&   -test_load_gzip_compressed_csv_data_with_descr   s   

r   c                  C   s   zTt  } t| jdksJ t| jdksJ | j}t|d ddd d f tjg dtjdks3J t|d ddd d f tjg dtjdksMJ | jsRJ W d S  t	yc   t
d Y d S w )NrQ   r   )         )dtyperP   )rQ      rs   3Could not load sample images, PIL is not available.)r   rJ   imagesrK   rz   allarrayuint8rM   ImportErrorwarningswarn)rN   r   r%   r%   r&   test_load_sample_images   s   44r   c                  C   sJ   zt d} | jdksJ | jdksJ W d S  ty$   td Y d S w )Nz	china.jpgr   )i  i  r   r   )r   r   ry   r   r   r   )chinar%   r%   r&   test_load_sample_image   s   r   c                  C   sn   t dd} | jjdksJ | jjdksJ t| jdksJ | js#J t  }tj	j
t| jd |jdd d	S )
zTest to check that we load a scaled version by default but that we can
    get an unscaled version when setting `scaled=False`.F)Zscaledr   r   r   r   gT5@g-C6?)ZatolN)r   rR   ry   targetsizerJ   feature_namesrM   rz   r{   Zassert_allcloser   )Zdiabetes_rawZdiabetes_defaultr%   r%   r&   test_load_diabetes_raw   s   


r   zEloader_func, data_shape, target_shape, n_target, has_descr, filenames)rw   rx   )rw   rQ   Tr}   )rr   rs   )rr   r   )ru   rv   )ru   )   r   Zdata_filenameZtarget_filenamer   )r   )r   @   )r   	   )Zn_class)Q  r   )r   c                    s   |   t  ts
J  jj|ksJ  jj|ksJ t dr*t j|d ks*J |d ur7t j|ks7J |r> j	s>J |rSd v sFJ t
 fdd|D sUJ d S d S )Nr   rP   data_modulec                    s.   g | ]}| v ot  d   |   qS )r   )r   ro   is_filerd   bunchr%   r&   rh   /  s
    ztest_loader.<locals>.<listcomp>)
isinstancer   rR   ry   r   hasattrrJ   r   rL   rM   r   )loader_funcZ
data_shapeZtarget_shapeZn_targetZ	has_descrrK   r%   r   r&   test_loader  s&   


r   z%loader_func, data_dtype, target_dtypec                 C   s   |  }t || ||d d S )N)Zexpected_data_dtypeZexpected_target_dtyper   )r   Z
data_dtypeZtarget_dtypeZdefault_resultr%   r%   r&   test_toy_dataset_frame_dtype7  s   
r   c                  C   s2   t dd} tt| }d|_|d |jksJ d S )Nx)r   y)r   r   r   r   r   Zbunch_from_pklr%   r%   r&   test_loads_dumps_bunchL  s   
r   c                  C   sf   t dd} d| jd< tt| }|jdksJ |d dksJ d|_|jdks)J |d dks1J d S )Noriginal)keyzset from __dict__r   changed)r   __dict__r   r   r   r   r%   r%   r&   8test_bunch_pickle_generated_with_0_16_and_read_with_0_17S  s   

r   c                  C   s   t  } dt| v sJ d S )NrR   )r   r=   )rR   r%   r%   r&   test_bunch_dirh  s   r   c                  C   s   d} t jt| d ddlm} W d   n1 sw   Y  d} t jt| d ddlm} W d   dS 1 s:w   Y  dS )zLCheck that we raise the ethical warning when trying to import `load_boston`.z8The Boston housing prices dataset has an ethical problemmatchr   )load_bostonNzBcannot import name 'non_existing_function' from 'sklearn.datasets')non_existing_function)pytestraisesr   sklearn.datasetsr   r   )msgr   r   r%   r%   r&   test_load_boston_errorn  s   "r   c              	   C   s   d}t d|d}tt|dddt dd}| d| tjtd	d
G}tj	tdd
 t
|ddd W d   n1 s=w   Y  |jdksIJ |D ]}t|jd| ksYJ qKt|dksbJ W d   dS 1 smw   Y  dS )z'Check retry mechanism in _fetch_remote.z8https://scikit-learn.org/this_file_does_not_exist.tar.gzZinvalid_fileN  	Not Found)urlcoder   hdrsfpZside_effect"sklearn.datasets._base.urlretrievezRetry downloadingr   zHTTP Error 404r   r   )Z	n_retriesdelayrv   zRetry downloading from url: )r   r   r	   ioBytesIOsetattrr   warnsUserWarningr   r   
call_countr6   messagerJ   )monkeypatchr   Zinvalid_remote_fileurlretrieve_mockrecordrr%   r%   r&   1test_fetch_remote_raise_warnings_with_invalid_urlz  s"   "r   c                  C   s  t d\} }| dksJ |dksJ t d\} }| dksJ |dks$J t d\} }| dks0J |dks6J t d\} }| dksBJ |d	ksHJ t d
\} }| dksTJ |d	ksZJ t d\} }| dksfJ |dkslJ t d\} }| dksxJ |dks~J t d\} }| dksJ |dksJ t d\} }| dksJ |dksJ t d\} }| dksJ |dksJ t d\} }| dksJ |dksJ t d\} }| dksJ |d	ksJ tjtdd t d W d    d S 1 sw   Y  d S )Nzhttps://example.com/file.tar.gzexample.comzfile.tar.gzu2   https://example.com/نمونه نماینده.datau   نمونه-نماینده.dataz)https://example.com/path/to-/.file.tar.gzzexample.com/path_tozhttps://example.com/Zdownloaded_filezhttps://example.comz2https://example.com/path/@to/data.json?param=valuez	data.jsonz4https://example.com/path/@@to._/-_.data.json.#anchorz"https://example.com//some_file.txtzsome_file.txtzhttp://example/../some_file.txtZexamplez'https://example.com/!.'.,/some_file.txtz+https://example.com/a/!.'.,/b/some_file.txtzexample.com/a_bzhttps://example.com/!.'.,zInvalid URLr   z
https:/../)r   r   r   
ValueError)folderr}   r%   r%   r&   (test_derive_folder_and_filename_from_url  sr   
"r   c                    s    fdd}t |dS )Nc                    sH   t  }t| jd}||  st| ddd d t|| | d S )N/r   r   )r   r
   r#   striprH   r	   r/   copy)r   Z
local_pathZserver_root	file_pathserver_sider%   r&   _urlretrieve_mock  s
   z,_mock_urlretrieve.<locals>._urlretrieve_mockr   r   )r   r   r%   r   r&   _mock_urlretrieve  s   
r   c              	   C   s  t |}|d }|  |d }d}|j|dd |d }|  |d }|jddd |d	 }|  t|}| d
| | dt|d td}	|	|d d ksUJ |	jdd|ks_J td}	|	|d d d ksoJ |	jdd|dks|J t	d}
t
t$ t
j|
d tddd W d    n1 sw   Y  W d    n1 sw   Y  |d d }t| |d gksJ d S )Nr   
data.jsonl{"a": 1, "b": 2}
rV   rY   Z	subfolderzother_file.txtzSome important text data.r9   r   z$sklearn.datasets._base.get_data_home)return_valuehttps://example.com/data.jsonlr   z,https://example.com/subfolder/other_file.txtzERetry downloading from url: https://example.com/subfolder/invalid.txtr   z)https://example.com/subfolder/invalid.txtr   )r   )r   rl   
write_textr   r   r   r   	read_textreescaper   r   r	   r   sortediterdir)r   tmpdirr   	data_fileserver_dataZserver_subfolderZother_data_filer9   r   fetched_file_pathexpected_warning_msgZlocal_subfolderr%   r%   r&   test_fetch_file_using_data_home  sV   

r   c                 C   s  | d}t|d }d}|j|dd | d}t|}| d| td|d	}||d ks1J |jdd|ks;J |jd
ksBJ td|d	}||d ksPJ |jdd|ksZJ |jd
ksaJ |  td|d	}||d kssJ |jdd|ks}J |jdksJ d S )Nr   r   r   rV   r   client_sider   r   r   rP   rQ   )	rl   r   r   r   r   r   r   r   unlink)r   r   r   r   r   r   r   r   r%   r%   r&   test_fetch_file_without_sha256  s:   

r   c              	   C   s  | d}t|d }d}|j|dd t|  }| d}t|}| d| t	d||d	}||d ks;J |j
dd|ksEJ |jd
ksLJ t	d||d	}||d ks[J |j
dd|kseJ |jd
kslJ |jddd d| d}	tj|	d( t	d||d	}||d ksJ |j
dd|ksJ |jdksJ W d    n1 sw   Y  t	d||d	}||d ksJ |j
dd|ksJ |jdksJ |  t	d||d	}||d ksJ |j
dd|ksJ |jdksJ t	d|d}||d ksJ |j
dd|ksJ |jdksJ d}
d}td| d|
 d}tjt|d/ tj|d t	d||
d	 W d    n1 sFw   Y  W d    d S W d    d S 1 s_w   Y  d S )Nr   r   r   rV   r   r   r   r   )r   sha256rP   zcorrupted contentszQSHA256 checksum of existing local file data.jsonl \(.*\) differs from expected \(z9\): re-downloading from https://example.com/data.jsonl \.r   rQ   r   r   Zdeadbabecafebeefzdiffers from expectedz#The SHA256 checksum of data.jsonl (z) differs from expected (z).)rl   r   r   hashlibr   
read_bytes	hexdigestr   r   r   r   r   r   r   r   r   r   r   OSError)r   r   r   r   r   Zexpected_sha256r   r   r   Zexpected_msgZnon_matching_sha256r   Zexpected_error_msgr%   r%   r&   test_fetch_file_with_sha256?  s   

	 $r  )Vr   r   r-   r   r/   r@   r   	functoolsr   	importlibr   pathlibr   pickler   r   Zunittest.mockr   urllib.errorr	   urllib.parser
   numpyrz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.datasets._baser   r   r   r   r   Z"sklearn.datasets.tests.test_commonr   Zsklearn.preprocessingr   Zsklearn.utilsr   r    r1   Zfixturer9   r;   rE   rF   markZparametrizerI   rO   rT   r_   ra   rq   r   r   r   r   r   r   r   r   Zfloat64intr   r   r   r   r   r   r   r   r   r   r  r%   r%   r%   r&   <module>   s    8





	







	






G4(