o
    iv}                  	   @   s  d Z ddlZddlZddlmZ ddlmZ ddlZddl	Z	ddl
mZ ddlmZ ddlmZmZmZ ddlmZmZmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7 dd Z8dd Z9dd Z:dd Z;dd Z<e	j=>ddd ge	j=>d!d"d#ge	j=>d$g d%d&d' Z?e	j=>d(e7d)d* Z@d+d, ZAd-d. ZBd/d0 ZCd1d2 ZDd3d4 ZEd5d6 ZFe	j=>d7e+d8d9 ZGd:d; ZHd<d= ZId>d? ZJd@dA ZKdBdC ZLdDdE ZMdFdG ZNdHdI ZOdJdK ZPdLdM ZQdNdO ZRe	j=>d$g dPdQdR ZSdSdT ZTdUdV ZUe	j=>d$g dPe	j=>dWdXdYdgfdZdYdgfd[ddgfgd\d] ZVd^d_ ZWd`da ZXdbdc ZYdS )dz=
Several basic tests for hierarchical clustering procedures

    N)partial)mkdtemp)	hierarchy)connected_components)AgglomerativeClusteringFeatureAgglomeration	ward_tree)_TREE_BUILDERS_fix_connectivity_hc_cutlinkage_tree)average_merge	max_mergemst_linkage_core)make_circles
make_moons)grid_to_graph)DistanceMetric)adjusted_rand_scorenormalized_mutual_info_score)PAIRED_DISTANCEScosine_distancesmanhattan_distancespairwise_distances)METRICS_DEFAULT_PARAMS)kneighbors_graph)IntFloatDict)assert_almost_equalassert_array_almost_equalassert_array_equalcreate_memmap_backed_dataignore_warnings)LIL_CONTAINERSc                  C   s   t jd} | jdd}tt t|dd W d    n1 s"w   Y  tt t|t dd W d    n1 s@w   Y  t	 
| t|}t|dd	}t|d
 t|dd	d
  t|td	}t|d
 t|dd	d
  d S )N*   )   r$   sizeZfoo)linkage   r)   connectivityprecomputedaffinityr   cosine	manhattan)nprandomRandomStatenormalpytestraises
ValueErrorr   onesr   fitr   r   r   )rngXdisres r>   v/var/www/html/eduruby.in/lip-sync/lip-sync-env/lib/python3.10/site-packages/sklearn/cluster/tests/test_hierarchical.pytest_linkage_misc6   s   r@   c            
   	   C   s  t jd} t jddgtd}d|ddddf< | dd}t|j }t	 D ]_}||j
|d\}}}}d	|jd
  d
 }	t|| |	ksHJ tt ||j
t dd W d    n1 sbw   Y  tt ||j
d d |d W d    n1 sw   Y  q(d S )Nr   
   Zdtyper)      2   d   r*         r(   )r1   r2   r3   r8   boolrandnr   shaper	   valuesTlenr5   r6   r7   )
r:   maskr;   r+   tree_builderchildrenn_componentsn_leavesparentn_nodesr>   r>   r?   test_structured_linkage_treeO   s&   
rU   c                  C   sf  t jd} | dd}||d fD ]H}t ( tt t|j	dd\}}}}W d    n1 s3w   Y  W d    n1 sBw   Y  d|j
d  d }t|| |ksZJ qt D ]Q}||d fD ]H}t ( tt ||j	dd\}}}}W d    n1 sw   Y  W d    n1 sw   Y  d|j
d  d }t|| |ksJ qgq_d S )Nr   rD   rE   rA   )
n_clustersrF   rG   )r1   r2   r3   rI   r!   r5   warnsUserWarningr   rL   rJ   rM   r	   rK   )r:   r;   Zthis_XrP   rT   rR   rS   rO   r>   r>   r?   test_unstructured_linkage_treef   s2   rY   c            	      C   s   t jd} t jddgtd}| dd}t|j }t	 D ] }||j
|d\}}}}d|jd  d }t|| |ks>J qd S )	Nr   rA   rB   rD   rE   r*   rF   rG   )r1   r2   r3   r8   rH   rI   r   rJ   r	   rK   rL   rM   )	r:   rN   r;   r+   linkage_funcrP   rT   rR   rS   r>   r>   r?   test_height_linkage_tree~   s   
r[   c                  C   sZ   t ddgddgg} d}tjt|d t| dd W d    d S 1 s&w   Y  d S )Nr   rG   z;Cosine affinity cannot be used when X contains zero vectorsmatchr/   r-   )r1   arrayr5   r6   r7   r   )r;   msgr>   r>   r?   test_zero_cosine_linkage_tree   s
   "r`   zn_clusters, distance_threshold)N      ?)rA   Ncompute_distancesTFr'   wardcompleteaveragesinglec                 C   s   t jd}t jddgtd}d}||d}t|j }t| ||||d}	|		| |s0|d urNt
|	ds7J |	jjd }
|
d }|	jj|d fksLJ d S t
|	drUJ d S )	Nr   rA   rB   rE   rD   )rV   r+   r'   distance_thresholdrb   
distances_rG   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   hasattr	children_ri   )rV   rb   rh   r'   r:   rN   	n_samplesr;   r+   
clusteringZ
n_childrenrT   r>   r>   r?   'test_agglomerative_clustering_distances   s&   

rn   lil_containerc              
   C   s\  t j| }t jddgtd}d}||d}t|j }dD ]}td||d}|	| z&t
 }	td||	|d}|	| |j}
t t |
dksLJ W t|	 nt|	 w td||d}d|_|	| tt|j|
d	 d |_|	| t t |jdksJ td|| d dd df |d}tt |	| W d    n1 sw   Y  qtd| d
dd}tt |	| W d    n1 sw   Y  t D ]+}tdt ||f|dd}|	| tdd |dd}|	| tt|j|jd	 qtd|dd}|	| t|}td|ddd}|	| t|j|j d S )NrA   rB   rE   rD   rc   rV   r+   r'   )rV   r+   Zmemoryr'   FrG   r0   rd   )rV   r+   metricr'   re   r,   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   r   labels_r&   uniqueshutilrmtreecompute_full_treer   r   r+   Ztoarrayr5   r6   r7   r   keysr   r   )global_random_seedro   r:   rN   rl   r;   r+   r'   rm   tempdirlabelsrq   Zclustering2X_distr>   r>   r?   test_agglomerative_clustering   s   








r|   c                  C   s2   t jd} t| dd}tddd| dS )zhAgglomerativeClustering must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    r   rD   rE   	euclideanrg   rq   r'   N)r1   r2   r3   r    rI   r   r9   )r:   Xmmr>   r>   r?   +test_agglomerative_clustering_memory_mapped  s   r   c                 C   s   t j| }t jddgtd}|dd}t|j }td|d}|	| t 
t |jdks2J ||}|jd dks@J ||}t |d j
dksQJ t||| tt |	|d d  W d    d S 1 ssw   Y  d S )	NrA   rB   rD   rE   r$   rV   r+   rG   r   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   r&   rs   rr   Z	transformZinverse_transformr   r5   r6   r7   )rx   r:   rN   r;   r+   ZaggloZX_redZX_fullr>   r>   r?   test_ward_agglomeration  s   



"r   c                  C   sv   t ddd\} }tddd}||  tt|j|d tdd	dd
\}}tddd}|| tt|j|d d S )Ng?r#   )noiserandom_staterF   rg   )rV   r'   rG   ra   g?)factorr   r   )r   r   r9   r   r   rr   r   )ZmoonsZmoon_labelsrm   ZcirclesZcircle_labelsr>   r>   r?   test_single_linkage_clustering2  s   

r   c                 C   sv   g }| |fD ]&}t |}| d }t||f}d|t||f< |t||j q|d |d k s9J dS )zUtil for comparison with scipyrG   r   N)	rM   maxr1   ZzerosarangeappenddotrL   all)Zcut1Zcut2Zco_clustcutnkZecutr>   r>   r?   assess_same_labellingC  s   r   c                 C   sT  d\}}}t j| }t ||f}t D ]s}tdD ]l}d|j||fd }|dt |d d t j	f  8 }||j
ddd d t j	f 8 }tj||d}	|	d d d d	f jtd
d}
t| ||d\}}}}|jdd t||
d|  t|||}t||
|}t|| qqtt t|d || W d    d S 1 sw   Y  d S )NrA   r$      r$   皙?r%         @rG   ZaxismethodrF   Fcopyr*   z2linkage tree differs from scipy impl for linkage: )r1   r2   r3   r8   r	   rw   ranger4   r   newaxismeanr   r'   astypeintsortr   r   r   r5   r6   r7   )rx   r   pr   r:   r+   r'   ir;   outrk   rP   _rR   r   Zcut_r>   r>   r?   test_sparse_scikit_vs_scipyO  s4   
 "r   c                 C   s   d\}}}t j| }d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }tj|dd}|d d d d	f 	t
}td |\}}	}
}	|jdd t||d
 t|||
}t|||
}t|| d S )Nr   r   r%   r   rG   r   rg   r   rF   z8linkage tree differs from scipy impl for single linkage.)r1   r2   r3   r4   r   r   r   r   r'   r   r   r	   r   r   r   r   )rx   rl   
n_featuresrV   r:   r;   r   Zchildren_scipyrP   r   rR   r   Z	cut_scipyr>   r>   r?   )test_vector_scikit_single_vs_scipy_singlew  s"   
 r   metric_param_gridc                 C   s   t jjdd}|jdd}t|}| \}}| }tj|  D ]#}t	t
||}tj|fi |}	t||	}
t||	}t j|
| q dS )zoThe MST-LINKAGE-CORE algorithm must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    rG   )seed)   r)   r%   N)r1   r2   r3   r4   r    rw   	itertoolsproductrK   dictzipr   Z
get_metricr   testingZassert_equal)r   r:   r;   r   rq   Z
param_gridrw   valskwargsZdistance_metricZmstZmst_mmr>   r>   r?   #test_mst_linkage_core_memory_mapped  s   

r   c               	   C   s   t g dg dg dg dg dg dg} t g d}t| ddd}d||j  }t| |d	\}}d
D ]}td||d}||  tt|j	|d q6d S )N)r   r   r   )rG   rG   rG   )rF   rF   rF   )r   r   rG   rG   rF   rF   r   FZn_neighborsinclude_selfra   r}   )rg   rf   rf   rd   )rV   r'   r+   rG   )
r1   r^   r   rL   r
   r   r9   r   r   rr   )r;   Ztrue_labelsr+   rQ   r'   rm   r>   r>   r?   test_identical_points  s   .
r   c                  C   s8   t g d} t| ddd}td|dd}||  d S )N))y&1?gQ?)r   gMbX?)r   gEԸ?g rh?/$?r   ;On?r   r   r   r   r   r   r   )r   g~jt?)r   gOn?)r   g;On?rA   Fr   r)   rd   rp   )r1   r^   r   r   r9   )r;   r+   rd   r>   r>   r?   test_connectivity_propagation  s   r   c           	      C   s   d\}}t j| }t ||f}tdD ]>}d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }t	|}t	||d}t
|d	 |d	  qd S )
NrA   r$   r$   r   r%   r   rG   r   r*   r   )r1   r2   r3   r8   r   r4   r   r   r   r   r   )	rx   r   r   r:   r+   r   r;   out_unstructuredout_structuredr>   r>   r?   test_ward_tree_children_order  s    r   c              	   C   s8  d\}}t j| }t ||f}tdD ]}d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }t	|dd	}t	||dd
}|d }	|d }
t
|	|
 |d }|d }t|| dD ]/}t|||ddd }t||ddd }|d }|d }|d }|d }t|| t|| qiqt ddgddgddgddgddgddgg}t g dg dg dg dg d g}t g dg dg dg d!g d"g}t g dg dg dg d#g d$g}t |\}}t ||f}t	|dd	}t	||dd
}t
|d d d d%f |d  t
|d d d d%f |d  t|d d d%f |d&  t|d d d%f |d&  g d'}||g}t||D ]L\}}t|d|d(}t|||dd}t
|d d d d%f |d  t
|d d d d%f |d  t|d d d%f |d&  t|d d d%f |d&  qMd S ))Nr   r$   r   r%   r   rG   r   T)return_distance)r+   r   r   )rf   re   rg   )r+   r'   r   )r'   r   gя?geGgw7@g})J@gZ!E@gn]#g!܄@g,8g!Yz @gRա&<agڎF@gT!@)g      @r   g0rq5?       @)      ?g      @gAVJS?r   )g        r   gL/u@r   )      @       @g6SHD4"@r   )      @      "@gwʴG8@r   )r   r   gwfۣ@r   )r   r   g63C2@r   )r   r   go;@r   )r   r   g_ .@r   rF   r)   )re   rf   rg   )r   r'   )r1   r2   r3   r8   r   r4   r   r   r   r   r   r   r   r^   rJ   r   )rx   r   r   r:   r+   r   r;   r   r   Zchildren_unstructuredZchildren_structuredZdist_unstructuredZdist_structuredr'   Zstructured_itemsZunstructured_itemsZstructured_distZunstructured_distZstructured_childrenZunstructured_childrenZlinkage_X_wardZlinkage_X_completeZlinkage_X_averagerl   r   Zconnectivity_XZout_X_unstructuredZout_X_structuredZlinkage_optionsZX_linkage_truthZX_truthr>   r>   r?   &test_ward_linkage_tree_return_distance  s    





r   c                  C   s   t ddgddgg} t ddgddgg}tdd|d}t|dd}tt ||  W d    d S 1 s9w   Y  d S )	Nr   rG   TFrF   )n_xn_yrN   rd   r+   r'   )r1   r^   r   r   r5   rW   rX   r9   )xmcwr>   r>   r?    test_connectivity_fixing_non_lil`  s   "r   c            	      C   s   t jd} t | jdddjt jdd}| t|}t	||}t
||D ]\}}|| |ks3J q't jdt jdd d d	 }t dd
d d d	 }t	||}t||t jdt jdddd t||t jdt jdddd d S )Nr   rE   rA   r%   Fr   rD   rB   rF   ra   rG   )rN   Zn_aZn_b)r1   r2   r3   rs   randintr   ZintprandrM   r   r   r   fullr   r8   r   )	r:   rw   rK   dkeyvalueZ
other_keysZother_valuesotherr>   r>   r?   test_int_float_dictm  s    

"r   c                  C   sj   t jd} | dd}t|ddd}t|d}tttdddd}|| || t|j	|j	 d S )	Nr   r   r$   r   Fr   r*   r   )
r1   r2   r3   r   r   r   r   r9   r   rr   )r:   r;   r+   aglc1aglc2r>   r>   r?   test_connectivity_callable~  s   


r   c                  C   sn   t jd} | dd}t|ddd}t|ddd}t|d}t|d}|| || t|j|j d S )	Nr   r   r$   r   Fr   Tr*   )	r1   r2   r3   r   r   r   r9   r   rr   )r:   r;   r+   Zconnectivity_include_selfr   r   r>   r>   r?   "test_connectivity_ignores_diagonal  s   



r   c                  C   s   t jd} | dd}t|ddd}td|d}|| |jd }|jjd }||d ks1J d	}| d
d}t|ddd}t||d}|| |jd }|jjd }||| ks^J d S )Nr   rA   rF   r$   Fr   r   rG   e      )	r1   r2   r3   rI   r   r   r9   rJ   rk   )r:   r;   r+   Zagcrl   rT   rV   r>   r>   r?   test_compute_full_tree  s    



r   c                  C   sP   t jd} | dd}t d}t D ]}t|||dd dks%J qd S )Nr   r$   r*   rG   )r1   r2   r3   r   eyer	   rK   r!   )r:   r;   r+   rZ   r>   r>   r?   test_n_components  s   
r   c                  C   sr   d} t jd}|| | }t g d}t| | |t jd}G dd d}| }t|||jd |j	dks7J d S )	NrF   r   )TFFT)r   r   rN   Z	return_asc                   @   s   e Zd Zdd Zdd ZdS )z>test_affinity_passed_to_fix_connectivity.<locals>.FakeAffinityc                 S   s
   d| _ d S )Nr   counter)selfr>   r>   r?   __init__  s   
zGtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.__init__c                 _   s   |  j d7  _ | j S )NrG   r   )r   argsr   r>   r>   r?   	increment  s   zHtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.incrementN)__name__
__module____qualname__r   r   r>   r>   r>   r?   FakeAffinity  s    r   )r+   r.   r   )
r1   r2   r3   rI   r^   r   Zndarrayr   r   r   )r&   r:   r;   rN   r+   r   far>   r>   r?   (test_affinity_passed_to_fix_connectivity  s   r   )rd   re   rf   c                 C   s   t j|}t jddgtd}d}||d}t|j }d}d |fD ]I}td ||| d}	|		| |	j
}
tt |	j
}t|  }|||d dd\}}}}}t ||kd }||ks\J t|||d	}t |
|skJ q"d S )
NrA   rB   rE   rD   )rV   rh   r+   r'   T)r+   rV   r   rG   )rV   rP   rR   )r1   r2   r3   r8   rH   rI   r   rJ   r   r9   rr   rM   rs   r	   Zcount_nonzeror   Zarray_equiv)r'   rx   r:   rN   rl   r;   r+   rh   connrm   Zclusters_producedZnum_clusters_producedrO   rP   rQ   rR   rS   Z	distancesZnum_clusters_at_thresholdZclusters_at_thresholdr>   r>   r?   5test_agglomerative_clustering_with_distance_threshold  s8   

r   c                 C   sx   t j| }d}|jdd|dfd}td ddd|}t|d	d
d}t |t j t 	|dks3J |j
|ks:J d S )NrA   ii,  r   r%   r   rg   rV   rh   r'   	minkowskirF   rq   r   r   )r1   r2   r3   r   r   r9   r   fill_diagonalinfr   Zn_clusters_)rx   r:   rl   r;   rm   Zall_distancesr>   r>   r?   test_small_distance_threshold  s   r   c                 C   s   t j| }d}|jdd|dfd}d}td |dd|}|j}t|d	d
d}t |t j	 t 
|D ]9}||k}	||	 d d |	f jdd }
||	 d d |	 f jdd }|	 dkrg|
|k sgJ ||ksmJ q4d S )NrE   irA   r   r%   r)   rg   r   r   rF   r   r   r   rG   )r1   r2   r3   r   r   r9   rr   r   r   r   rs   minr   sum)rx   r:   rl   r;   rh   rm   rz   DlabelZin_cluster_maskZmax_in_cluster_distanceZmin_out_cluster_distancer>   r>   r?   .test_cluster_distances_with_distance_threshold  s,    r   )	thresholdy_truera   rG   r   g      ?c                 C   s:   dgdgg}t d || d}||}t||dksJ d S )Nr   rG   r   )r   Zfit_predictr   )r'   r   r   r;   	clustererZy_predr>   r>   r?   ?test_agglomerative_clustering_with_distance_threshold_edge_case*  s   
r   c                  C   s   dgdgg} t jtdd td d d|  W d    n1 s!w   Y  t jtdd tddd|  W d    n1 sAw   Y  dgdgg} t jtdd td ddd	|  W d    d S 1 siw   Y  d S )
Nr   rG   zExactly one of r\   )rV   rh   rF   z!compute_full_tree must be True ifF)rV   rh   rv   )r5   r6   r7   r   r9   )r;   r>   r>   r?   &test_dist_threshold_invalid_parameters:  s   
"r   c                  C   s^   t jd} | dd}tjtdd tddd| W d    d S 1 s(w   Y  d S )	Nr   r$   r   z>Distance matrix should be square, got matrix of shape \(5, 3\)r\   r,   re   r~   )	r1   r2   r3   r   r5   r6   r7   r   r9   )r:   r;   r>   r>   r?   *test_invalid_shape_precomputed_dist_matrixI  s   "r   c                  C   s
  t g dg dg dg dg dg} t| d dksJ t jd}|dd}t|}td	| d
d}d}tj	t
|d || W d   n1 sNw   Y  t| d
d}tj	t
|d || W d   n1 spw   Y  t|j|j t|j|j dS )zCheck that connecting components works when connectivity and
    affinity are both precomputed and the number of connected components is
    greater than 1. Non-regression test for #16151.
    )r   rG   rG   r   r   )r   r   rG   r   r   )r   r   r   r   r   )r   r   r   r   rG   r   rF   r$   rA   r,   re   )rq   r+   r'   z.Completing it to avoid stopping the tree earlyr\   Nr   )r1   r^   r   r2   r3   rI   r   r   r5   rW   rX   r9   r   rr   rk   )Zconnectivity_matrixr:   r;   r{   Zclusterer_precomputedr_   r   r>   r>   r?   @test_precomputed_connectivity_metric_with_2_connected_componentsU  s6   
r   )Z__doc__r   rt   	functoolsr   tempfiler   numpyr1   r5   Zscipy.clusterr   Zscipy.sparse.csgraphr   Zsklearn.clusterr   r   r   Zsklearn.cluster._agglomerativer	   r
   r   r   Z"sklearn.cluster._hierarchical_fastr   r   r   Zsklearn.datasetsr   r   Z sklearn.feature_extraction.imager   Zsklearn.metricsr   Zsklearn.metrics.clusterr   r   Zsklearn.metrics.pairwiser   r   r   r   Z'sklearn.metrics.tests.test_dist_metricsr   Zsklearn.neighborsr   Zsklearn.utils._fast_dictr   Zsklearn.utils._testingr   r   r   r    r!   Zsklearn.utils.fixesr"   r@   rU   rY   r[   r`   markZparametrizern   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r>   r>   r?   <module>   s    	
^
(
v
&"