
    riG                     
   d dl Zd dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dl	mZ d dlmZ d dlmZmZ d d	lmZ ej,                  j/                  d
ddg      ej,                  j/                  dddg      d               Zej,                  j/                  d
ddg      ej,                  j/                  ddddddgg      ej,                  j/                  dddg      d                      Zej,                  j/                  dddg      d        Zd Zd Zej,                  j/                  d
ddg      ej,                  j/                  dg d      d               Zej,                  j/                  d
ddg      ej,                  j/                  dg d       d!               Zej,                  j/                  d"dd#g      ej,                  j/                  d
ddg      ej,                  j/                  dddddg d g      d$                      Zej,                  j/                  ddg d%g      ej,                  j/                  d
ddg      d&               Z ej,                  j/                  d' e             ej,                  j/                  d( ejB                  d)       ejD                  d*      dfd+  ejF                  d      jI                  ejD                        dfd, d- d.fd/ d0 d1d.gf ejJ                  g d2       ejJ                  g d3      d f ejJ                  ejL                  ejL                  d d4d5dg       ejJ                  g d6      d f ejJ                  g d2       ejJ                  g d6ejD                  7      d1d.gfg      d8               Z'ej,                  j/                  d
ddg      ej,                  j/                  d9d*d:g      d;               Z(ej,                  j/                  d<d=ejL                  dgfdd=gejL                  ejL                  gd>d?ggfg      d@        Z)ej,                  jU                  e edA      k  dBC      ej,                  j/                  dDg dE      ej,                  j/                  d
ddg      ej,                  j/                  dFddg      dG                             Z+ej,                  jU                  e edA      k  dHC      ej,                  j/                  dDg dE      ej,                  j/                  d
ddg      ej,                  j/                  dFddg      dI                             Z,y)J    N)assert_allcloseassert_array_equal)approx)config_context)_convert_to_numpyget_namespace)yield_namespace_device_dtype_combinationsdevice)_array_api_for_tests)
np_versionparse_version)_weighted_percentileaverageTFsize
      c                 
   t        j                  |       }t        j                  |      }t        ||d|      }| dz  dk(  r|du r|t        j                  |      k7  sJ yt        |      t        j                  |      k(  sJ y)au  Ensure `_weighted_percentile` matches `median` when expected.

    With unit `sample_weight`, `_weighted_percentile` should match the median except
    when `average=False` and the number of samples is even.
    For an even array and `average=False`, `percentile_rank=50` gives the lower
    of the two 'middle' values, that are averaged when calculating the `median`.
    2   r      r   FN)nparange	ones_liker   medianr   )r   r   ysample_weightscores        \/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/sklearn/utils/tests/test_stats.py'test_weighted_percentile_matches_medianr       sr     			$ALLOM M2wGE ax1}E)		!$$$e}		!,,,    percentile_rank   #   =      /   c                 
   t         j                  j                  |       }|j                  d|      }t        j                  |      }t        ||||      }|rd}nd}t        |      t        j                  |||      k(  sJ y)a  Check `_weighted_percentile` with unit weights is correct.

    `average=True` results should be the same as `np.percentile`'s
    'averaged_inverted_cdf'.
    `average=False` results should be the same as `np.percentile`'s
    'inverted_cdf'.
    Note `np.percentile` is the same as `np.quantile` except `q` is in range [0, 100].

    We parametrize through different `percentile_rank` and `size` to
    ensure we get cases where `g=0` and `g>0` (see Hyndman and Fan 1996 for details).
    r#   r   r   averaged_inverted_cdfinverted_cdf)methodN)r   randomRandomStaterandintr   r   r   
percentile)	global_random_seedr   r"   r   rngr   swr   r,   s	            r   &test_weighted_percentile_matches_numpyr4   (   st    " ))

 2
3CBT"A	aB BIE(%=BMM!_VLLLLr!   r   d   c                     t        j                  ddgddgg      }t        j                  ddgddgg      }t        ||| d      }t        d      D ]  }||   t	        d	      k(  rJ  y
)a  Check `j+1` index is clipped to max, when `average=True`.

    `percentile_plus_one_indices` can exceed max index when `percentile_indices`
    is already at max index.
    Note that when `g` (Hyndman and Fan) / `fraction_above` is greater than 0,
    `j+1` (Hyndman and Fan) / `percentile_plus_one_indices` is calculated but
    never used, so it does not matter what this value is.
    When percentile of percentile rank 100 falls exactly on the last value in the
    `weighted_cdf`, `g=0` and `percentile_indices` is at max index. In this case
    we set `percentile_plus_one_indices` to be max index as well, so the result is
    the average of 2x the max index (i.e. last value of `weighted_cdf`).
    r      g?g?r      Tr   g      ?N)r   arrayr   ranger   )r"   r   r3   r   idxs        r   *test_weighted_percentile_plus_one_clip_maxr<   G   ss      	1a&1a&!"A	C:1v&	'B BFEQx )SzVC[((()r!   c                      t        j                  dt         j                        } t        j                  dt         j                        }t	        | |d      }t        |      dk(  sJ y)zJCheck `weighted_percentile` with unit weights and all 0 values in `array`.f   dtyper   r   N)r   zerosfloat64onesr   r   )r   r3   r   s      r   test_weighted_percentile_equalrD   ^   sJ    
BJJ'A	BJJ	'B B+E%=Ar!   c                      t        j                  d      } t        j                  d      }t        | |d      }t	        |      dk(  sJ y)zKCheck `weighted_percentile` with all weights equal to 0 returns last index.r   r   g      "@N)r   r   rA   r   r   )r   r3   values      r   )test_weighted_percentile_all_zero_weightsrG   h   s<    
		"A	"B B+E%=Cr!   zpercentile_rank, expected_value))r   r   )r   r8   )r5   r&   c                 @   t        j                  g d      }t        j                  g d      }t        t        j                  ||f      j                  t        j                  ||f      j                  ||       }t        d      D ]  }t        ||         |k(  rJ  y)a  Check leading, trailing and middle 0 weights behave correctly.

    Check that leading zero-weight observations are ignored when `percentile_rank=0`.
    See #20528 for details.
    Check that when `average=True` and the `j+1` ('plus one') index has sample weight
    of 0, it is ignored. Also check that trailing zero weight observations are ignored
    (e.g., when `percentile_rank=100`).
    )r   r7   r   r8      r&      )r   r   r7   r7   r   r7   r   r   r   N)r   r9   r   vstackTr:   r   )r   r"   expected_valuer   r3   rF   r;   s          r   ,test_weighted_percentile_ignores_zero_weightrN   p   s     	&'A	'	(B 
		1a&RYYBx022OWE Qx 4eCj!^3334r!   )r#   r$   r   r%   c                    t         j                  j                  |       }|j                  dd      }|j	                  dd      }t        j
                  ||      }t        ||||      }t        |t        j                  |      ||      }|t        |      k(  sJ |dk(  r'|r$|t        t        j                  |            k(  sJ yyy)z?Check integer weights give the same result as repeating values.r#   r   r)   r&   r   r   N)
r   r-   r.   r/   choicerepeatr   r   r   r   )	r1   r"   r   r2   xweights
x_repeatedpercentile_weightspercentile_repeateds	            r   3test_weighted_percentile_frequency_weight_semanticsrW      s     ))

 2
3CBR Ajjj$G1g&J-	7OW /BLL,ow (;!<<<<"!VBIIj,A%BBBB ")r!   constant   c                     t         j                  j                  |       }|j                  dd      }|j	                  dd      }||z  }t        ||||      }t        ||||      }	|t        |	      k(  sJ y)zCheck multiplying weights by a constant does not change the result.

    Note scale invariance does not always hold when multiplying by a
    float due to cumulative sum numerical error (which grows proportional to n).
    r#   r)   r&   r   N)r   r-   r.   r/   rP   r   r   )
r1   r"   r   rX   r2   rR   rS   weights_multipliedr0   percentile_multipliers
             r   ,test_weighted_percentile_constant_multiplierr]      s     ))

 2
3CBR Ajjj$G 8+%a/7SJ0	  56666r!   )r#   r$   r   c                    t         j                  j                  |       }|j                  dd      }|j	                  dd      }|j                  dd      }t        j
                  ||f      j                  }t        ||||      }t        |t              rg }	|D ]K  }
|	j                  t        |j                  d         D cg c]  }t        |dd|f   ||
|       c}       M t        j                  |	d	      }|j                  |j                  d   t        |      fk(  sZJ t        |j                  d         D cg c]  }t        |dd|f   |||       }}|j                  |j                  d   fk(  sJ t        ||       |j	                  dd      }t        j
                  ||f      j                  }t        ||||      }t        |t              rg }	|D ]R  }
|	j                  t        |j                  d         D cg c]  }t        |dd|f   |dd|f   |
|      ! c}       T t        j                  |	d	      }|j                  |j                  d   t        |      fk(  saJ t        |j                  d         D cg c]  }t        |dd|f   |dd|f   ||      ! }}|j                  |j                  d   fk(  sJ t        ||       yc c}w c c}w c c}w c c}w )
zECheck `_weighted_percentile` behaviour is correct when `array` is 2D.r   r)   r&   r#   )r"   r   r7   N)axis)r   r-   r.   r/   rP   rK   rL   r   
isinstancelistappendr:   shapestacklenr   )r1   r"   r   r2   x1w1x2x_2dwpp_listprip_axis_0w2w_2ds                  r   test_weighted_percentile_2drr      s   
 ))

 2
3C	Rb	!B	AB	B	Rb	!B99b"X  D	b/7
B /4(! 	BMM
 #4::a=1	  )QT
BG	 88F,xxDJJqM3+?@@@@ 4::a=)	
  !QT
B
 
 xxDJJqM++++B! 
AB	B99b"X  D	dOW
B /4(! 	BMM
 #4::a=1	  )QT
DAJG	 88F,xxDJJqM3+?@@@@ 4::a=)	
  !QT
DAJQX
 
 xxDJJqM++++B!i
,
s   K'
K,$K1
$K6z#array_namespace, device, dtype_namezdata, weights, percentile*   r7   c                 $    | j                  d      S Nr   randr2   s    r   <lambda>ry   	  s    SXXb\ r!   c                 &    | j                  dd      S )Nr   r8   rv   rx   s    r   ry   ry     s    SXXb!_ r!   c                 ^    | j                  d      j                  t        j                        S ru   rw   astyper   float32rx   s    r   ry   ry     s    #((2,2E2Ebjj2Q r!   K   c                 &    | j                  dd      S Nr#   r8   rv   rx   s    r   ry   ry     s    Q r!   c                 `    | j                  dd      j                  t        j                        S r   r|   rx   s    r   ry   ry     s    Q..rzz: r!      )r   r7   r   r8   rI   r&   )r   r   r7   r7   r7   r   r8   rI   )r   r7   r7   r7   r7   r   r?   c                    t        ||      }|j                  d|      }|j                  d|      }	|dk(  r<|j                  |j	                  ||	      |k(        rt        j                  d|        t        j                  j                  |       }
t        |      r ||
      n|}t        |      r ||
      n|}|j                  |      }t        |||      }|j                  ||      }|j                  ||      }t        d      5  t        |||      }t        |      t        |      k(  sJ t!        |      d   t!        |      d   k(  sJ t#        ||      }ddd       j$                  |j$                  k(  sJ |j&                  |j&                  k(  sJ t)        ||       |d	k(  r3|j$                  |j$                  cxk(  rt        j*                  k(  sJ  J y|j$                  t        j,                  k(  sJ y# 1 sw Y   xY w)
zECheck `_weighted_percentile` gives consistent results with array API.r7   r
   r   zxp.nextafter is broken on T)array_api_dispatch)xpNr~   )r   rA   rC   all	nextafterpytestxfailr   r-   r.   callabler}   r   asarrayr   array_devicer   r   r@   rd   r   r~   rB   )r1   array_namespacer   
dtype_namedatarS   r0   r   zerooner2   X_np
weights_np	result_npX_xp
weights_xp	result_xpresult_xp_nps                     r   .test_weighted_percentile_array_api_consistencyr      s   D 
ov	6B
 88Af8%D
''!F'
#CQ266",,tS"9T"AB1&:;
))

 2
3C 49DD!)'!2J;;z"D$T:zBI::d6:*DJv6J	4	0 ;(z:F	I&,t*<<<<Y'*mD.A!.DDDD(r:	; 000000I|, Y!!Y__B

BBBBB!!RZZ///; ;s   AHHsample_weight_ndimr   c                    t         j                  j                  |       }|j                  dd      }t         j                  | |j                  |j
                   dk  <   t        j                  |      }|dk(  r|j                  ddd      }n|j                  ddd	      }t        ||d
|      }t        |j
                  d         D cg c]  }||dd|f    |f    }	}|j                  dk(  rMt        j                  ||j
                  d         j                  |j
                  d   |j
                  d         }t        |j
                  d         D cg c]  }||dd|f    |f    }
}t        j                  t        |j
                  d         D cg c]  }t        |	|   |
|   d
|       c}      }t        ||       yc c}w c c}w c c}w )a>  Test `_weighted_percentile` ignores NaNs.

    Calling `_weighted_percentile` on an array with nan values returns the same
    results as calling `_weighted_percentile` on a filtered version of the data.
    We test both with sample_weight of the same shape as the data and with
    one-dimensional sample_weight.
    r5   r         ?r   r7   rJ   )r5   r   r)   )r5      r   Nr   )r   r-   r.   rw   nanrd   isnanr/   r   r:   ndimrQ   reshaper9   r   )r1   r   r   r2   array_with_nansnan_maskr   resultscolfiltered_arrayfiltered_weightsexpected_resultss               r   %test_weighted_percentile_nan_filteredr   H  s    ))

 2
3ChhsB'O>@ffOHCHHo334s:;xx(HQAqy9Aqv6 #?M2wWG
 ..q12 	!S&))3./N  Q		-1F1Fq1IJRR!!!$o&;&;A&>
 :??T?TUV?W9X25x3'',-  xx
 _22156		
  !s#%5c%:B	
 '1+
	
s   G
GGzpercentile_rank, expectedZ   g       @g      @c           	         t        j                  t         j                  dgt         j                  dgt         j                  t         j                  gt         j                  t         j                  gt         j                  dgt         j                  t         j                  gg      }t        j                  |      }t	        |||       }t        j
                  ||d      sJ y)zCCheck that nans are ignored in general, except for all NaN columns.r&   r7   r   T)	equal_nanN)r   r9   r   r   r   array_equal)r"   expectedr9   rS   valuess        r   'test_weighted_percentile_all_nan_columnr   {  s     HHVVQKVVQKVVRVVVVRVVVVQKVVRVV	
	E ll5!G!%/BF
 >>&(d;;;r!   z2.0z2np.quantile only accepts weights since version 2.0)reasonr0   )B   r   r   uniform_weightc                    |r|st        j                  d       t        j                  j	                  |      }|j                  dd      }|r+t        j                  |      |j                  ddd      z  }n|j                  ddd      }t        ||| |      }t        j                  || dz  |s|nd	|rd
ndd      }t        ||       y	)zICheck `_weighted_percentile` is equivalent to `np.quantile` with weights.zHnp.quantile does not support weights with method='averaged_inverted_cdf'r   r5   r7   rJ   r)   r   r5   r   Nr*   r+   r   rS   r,   r`   )r   skipr   r-   r.   rw   r   r/   r   quantiler   )	r0   r   r   r1   r2   r9   r   percentile_weighted_percentilepercentile_numpy_quantiles	            r   ,test_weighted_percentile_like_numpy_quantiler     s     ~V	
 ))

 2
3CHHREU+ckk!QQk.GGAqy9%9}j'&" !#S%3*1&~! 57PQr!   z5np.nanquantile only accepts weights since version 2.0c                    |r|st        j                  d       t        j                  j	                  |      }|j                  dd      }t        j                  | |j
                  |j                   dk  <   |r+t        j                  |      |j                  ddd      z  }n|j                  ddd      }t        ||| |	      }t        j                  || dz  |s|nd
|rdndd      }t        ||       y
)zICheck `_weighted_percentile` equivalent to `np.nanquantile` with weights.zKnp.nanquantile does not support weights with method='averaged_inverted_cdf'r   r5   r   r7   rJ   r)   r   r   Nr*   r+   r   r   )r   r   r   r-   r.   rw   r   rd   r   r/   r   nanquantiler   )	r0   r   r   r1   r2   r   r   r   percentile_numpy_nanquantiles	            r   /test_weighted_percentile_like_numpy_nanquantiler     s     ~-	

 ))

 2
3Chhr3'O>@ffOHCHHo334s:;_5 9D 9
 
 Aqy9%9
G&" $&>>S%3*1&~$  57STr!   )-numpyr   r   numpy.testingr   r   r   sklearn._configr   sklearn.utils._array_apir   r   r	   r   r   sklearn.utils.estimator_checksr   sklearn.utils.fixesr   r   sklearn.utils.statsr   markparametrizer    r4   r<   rD   rG   rN   rW   r]   rr   r~   int32rC   r}   r9   r   r   r   r   skipifr   r    r!   r   <module>r      s\     =  * 
 < ? 9 4 T5M2"b*- + 3-( T5M2*RR!R,AB"b*M + C 3M8 *RI6) 7),  T5M2:<WX4 Y 34* T5M2*,<=C > 3C* aV,T5M2*RR=M,NO7 P 3 -7( *R,>?T5M2F" 3 @F"R )+T+V  
B!b)	!72772;#5#5bhh#?D	$&QSUV (:H	
 
$	%xrxx0B'CQG	2662661aA.	/:L1MqQ BHH'(BHH'rxx8H	
%6'07<'0T T5M2-1v6.2 7 3.2b 	bffa[
bRVVRVV$sCj12<<, u%%?   |4UDM2)E4=9R : 3 5	R> u%%B   |4UDM2)E4=9"U : 3 5	"Ur!   