
    ri:<              	          d dl Z d dlmZ d dlmZmZmZmZ d dlm	Z	m
Z
mZ d dlmZ ddgZd Zd	 Zd
 Zd Z eddg       ee
ddedd      d dddd              Z eddgdg       ee
ddedd      ddd dd              Zy)    N)stats)xp_capabilitiesarray_namespace
xp_promotexp_result_type)_SimpleNormalSignificanceResult_get_pvalue)_axis_nan_policy_factorychatterjeexispearmanrhoc                 P   | j                   d   }|j                  | d      }|j                  ||      \  }}|j                  ||d      }t	        j
                  |dd      }t	        j
                  | dd      }|j                  || j                        |j                  || j                        }}|j                  |j                  |j                  |d            d      }|rdd|z  |dz  dz
  z  z
  }	n'd|j                  ||z
  |z  d      z  }
d||z  |
z  z
  }	|	||fS )Naxismax)methodr            )shapeargsortbroadcast_arraystake_along_axisr   rankdataastypedtypesumabsdiff)xyy_continuousxpnjrlnum	statisticdens              V/var/www/auto_recruiter/arenv/lib/python3.12/site-packages/scipy/stats/_correlation.py_xi_statisticr-      s&    	
A 	

12
Aq!$DAq
1ab)A 	qR0A 	r%b1A99Q "))Aqww"7qA
&&+,2&
6CC16A:..	"&&!a%12&..C#%	a?    c                    | j                   d   }|rF|j                  t        j                  d      t        j                  |      z  | j                        S |j                  d|dz   | j                        }|j                  | d      }|j                  |d      }d|dz  z  |j                  d|z  d|z  z
  dz   |dz  z  d      z  }d|dz  z  |j                  |||z
  |z  z   dz  d      z  }	d|d	z  z  |j                  d|z  d|z  z
  dz   |z  d      z  }
d|d	z  z  |j                  |||z
  z  d      z  }|d|	z  z
  |
dz  z   |dz  z  }|j                  |      t        j                  |      z  S )
Nr   g?)r   r   r      r      r   )	r   asarraymathsqrtr   arangesortcumulative_sumr   )r'   r(   r#   r$   r%   iuvanbncndntau2s                r,   _xi_stdr@   )   s    	
A zz$))E*TYYq\9zII
 			!QU!''	*A
A
!"%A	
QTBFFAaC!A#IMQT1F;	;B	
QTBFFAQ	MA-BF7	7B	
QTBFFAaC!A#IMQ.RF8	8B	
QTBFFAQKrF2	2B2IAQ&D774=499Q<''r.   c                     | dvrt        d      t        |t        j                        s"|j	                         }d}|dk7  rt        |      | |fS )N>   FTz`y_continuous` must be boolean.z@`method` must be 'asymptotic' or a `PermutationMethod` instance.
asymptotic)
ValueError
isinstancer   PermutationMethodlower)r#   r   messages      r,   _chatterjeexi_ivrH   C   sV     =(:;;fe556T\!W%%r.   c                 2    | j                   | j                  fS )N)r*   pvalue)res_s     r,   _unpackrM   S   s    ==#**$$r.   )
dask.arrayzno take_along_axis)cupyz#no rankdata (xp.repeats limitation))skip_backendsTr   r   )paired	n_samplesresult_to_tuple	n_outputs	too_smallFrB   )r   r#   r   c                &    t         |      t        |      \  }t         |d      \   }d}|dk(  r?t         |      \  }}}t	        ||      }	t               }
t        ||	z  |
|      }ndt        |t        j                        rJt        j                  d|f fd|dd	|j                         d
di}|j                  |j                  }}j                  dk(  r|d   n|}j                  dk(  r|d   n|}t        ||      S )a  Compute the xi correlation and perform a test of independence

    The xi correlation coefficient is a measure of association between two
    variables; the value tends to be close to zero when the variables are
    independent and close to 1 when there is a strong association. Unlike
    other correlation coefficients, the xi correlation is effective even
    when the association is not monotonic.

    Parameters
    ----------
    x, y : array-like
        The samples: corresponding observations of the independent and
        dependent variable. The (N-d) arrays must be broadcastable.
    axis : int, default: 0
        Axis along which to perform the test.
    method : 'asymptotic' or `PermutationMethod` instance, optional
        Selects the method used to calculate the *p*-value.
        Default is 'asymptotic'. The following options are available.

        * ``'asymptotic'``: compares the standardized test statistic
          against the normal distribution.
        * `PermutationMethod` instance. In this case, the p-value
          is computed using `permutation_test` with the provided
          configuration options and other appropriate settings.

    y_continuous : bool, default: False
        Whether `y` is assumed to be drawn from a continuous distribution.
        If `y` is drawn from a continuous distribution, results are valid
        whether this is assumed or not, but enabling this assumption will
        result in faster computation and typically produce similar results.

    Returns
    -------
    res : SignificanceResult
        An object containing attributes:

        statistic : float
            The xi correlation statistic.
        pvalue : float
            The associated *p*-value: the probability of a statistic at least as
            high as the observed value under the null hypothesis of independence.

    See Also
    --------
    scipy.stats.pearsonr, scipy.stats.spearmanr, scipy.stats.kendalltau

    Notes
    -----
    There is currently no special handling of ties in `x`; they are broken arbitrarily
    by the implementation. [1]_ recommends: "if there are ties among the Xi's, then
    choose an increasing rearrangement as above by breaking ties uniformly at random."
    This is easily accomplished by adding a small amount of random noise to `x`; see
    examples.

    [1]_ notes that the statistic is not symmetric in `x` and `y` *by design*:
    "...we may want to understand if :math:`Y` is a function :math:`X`, and not just
    if one of the variables is a function of the other." See [1]_ Remark 1.

    References
    ----------
    .. [1] Chatterjee, Sourav. "A new coefficient of correlation." Journal of
           the American Statistical Association 116.536 (2021): 2009-2022.
           :doi:`10.1080/01621459.2020.1758115`.

    Examples
    --------
    Generate perfectly correlated data, and observe that the xi correlation is
    nearly 1.0.

    >>> import numpy as np
    >>> from scipy import stats
    >>> rng = np.random.default_rng(348932549825235)
    >>> x = rng.uniform(0, 10, size=100)
    >>> y = np.sin(x)
    >>> res = stats.chatterjeexi(x, y)
    >>> res.statistic
    np.float64(0.9012901290129013)

    The probability of observing such a high value of the statistic under the
    null hypothesis of independence is very low.

    >>> res.pvalue
    np.float64(2.2206974648177804e-46)

    As noise is introduced, the correlation coefficient decreases.

    >>> noise = rng.normal(scale=[[0.1], [0.5], [1]], size=(3, 100))
    >>> res = stats.chatterjeexi(x, y + noise, axis=-1)
    >>> res.statistic
    array([0.79507951, 0.41824182, 0.16651665])

    Because the distribution of `y` is continuous, it is valid to pass
    ``y_continuous=True``. The statistic is identical, and the p-value
    (not shown) is only slightly different.

    >>> stats.chatterjeexi(x, y + noise, y_continuous=True, axis=-1).statistic
    array([0.79507951, 0.41824182, 0.16651665])

    Consider a case in which there are ties in `x`.

    >>> x = rng.integers(10, size=1000)
    >>> y = rng.integers(10, size=1000)

    [1]_ recommends breaking the ties uniformly at random.

    >>> d = rng.uniform(1e-5, size=x.size)
    >>> res = stats.chatterjeexi(x + d, y)
    >>> res.statistic
    -0.029919991638798438

    Since this gives a randomized estimate of the statistic, [1]_ also suggests
    considering the average over all possibilities of breaking ties. This is
    computationally infeasible when there are many ties, but a randomized estimate of
    *this* quantity can be obtained by considering many random possibilities of breaking
    ties.

    >>> d = rng.uniform(1e-5, size=(9999, x.size))
    >>> res = stats.chatterjeexi(x + d, y, axis=1)
    >>> np.mean(res.statistic)
    0.001186895213756626

    Tforce_floatingr$   greaterrB   r$   )alternativer$   c                 (    t        |       d   S )NrZ   r   )r-   )r"   r   r!   r$   r#   s     r,   <lambda>zchatterjeexi.<locals>.<lambda>   s    mAq,2&Nq&Q r.   pairings)datar*   r[   permutation_typer   r   r    )r   rH   r   r-   r@   r   r
   rD   r   rE   permutation_test_asdictr*   rJ   ndimr	   )r!   r"   r   r#   r   r[   xir'   r(   stdnormrJ   rK   r$   s   `  `         @r,   r   r   W   s#   ~ 
A	B ,L&AL&a4B7DAq
 K A|;AqaLR0R#XtL	FE33	4$$ Q#j	 EKNNDT	
  ]]CJJF77a<BRB!;;!+VBZFb&))r.   z	jax.numpy)rN   z+not supported by rankdata (take_along_axis))cpu_only
exceptionsrP   z	two-sided)r[   r   r   c               X   t        | |      }t        | |d|      }t        j                  | |      }t        j                  ||      }|j	                  ||d      }|j	                  ||d      }t        j
                  |||||      }	t        |	j                  |	j                        S )a  Calculate a Spearman rho correlation coefficient with associated p-value.

    The Spearman rank-order correlation coefficient is a nonparametric measure
    of the monotonicity of the relationship between two datasets.
    Like other correlation coefficients, it varies between -1 and +1 with 0
    implying no correlation. Coefficients of -1 or +1 are associated with an exact
    monotonic relationship.  Positive correlations indicate that as `x` increases,
    so does `y`; negative correlations indicate that as `x` increases, `y` decreases.
    The p-value is the probability of an uncorrelated system producing datasets
    with a Spearman correlation at least as extreme as the one computed from the
    observed dataset.

    Parameters
    ----------
    x, y : array-like
        The samples: corresponding observations of the independent and
        dependent variable. The (N-d) arrays must be broadcastable.
    alternative : {'two-sided', 'less', 'greater'}, optional
        Defines the alternative hypothesis. Default is 'two-sided'.
        The following options are available:

        * 'two-sided': the correlation is nonzero
        * 'less': the correlation is negative (less than zero)
        * 'greater':  the correlation is positive (greater than zero)

    method : ResamplingMethod, optional
        Defines the method used to compute the p-value. If `method` is an
        instance of `PermutationMethod`/`MonteCarloMethod`, the p-value is
        computed using
        `scipy.stats.permutation_test`/`scipy.stats.monte_carlo_test` with the
        provided configuration options and other appropriate settings.
        Otherwise, the p-value is computed using an asymptotic approximation of
        the null distribution.
    axis : int or None, optional
        If axis=0 (default), then each column represents a variable, with
        observations in the rows. If axis=1, the relationship is transposed:
        each row represents a variable, while the columns contain observations.
        If axis=None, then both arrays will be raveled.
        Like other `scipy.stats` functions, `axis` is interpreted after the
        arrays are broadcasted.

    Returns
    -------
    res : SignificanceResult
        An object containing attributes:

        statistic : floating point array or NumPy scalar
            Spearman correlation coefficient
        pvalue : floating point array NumPy scalar
            The p-value - the probabilitiy of realizing such an extreme statistic
            value under the null hypothesis that two samples have no ordinal
            correlation. See `alternative` above for alternative hypotheses.

    Warns
    -----
    `~scipy.stats.ConstantInputWarning`
        Raised if an input is a constant array.  The correlation coefficient
        is not defined in this case, so ``np.nan`` is returned.

    Notes
    -----
    `spearmanrho` was created to make improvements to SciPy's implementation of
    the Spearman correlation test without making backward-incompatible changes
    to `spearmanr`. Advantages of `spearmanrho` over `spearmanr` include:

    - `spearmanrho` follows standard array broadcasting rules.
    - `spearmanrho` is compatible with some non-NumPy arrays.
    - `spearmanrho` can compute exact p-values, even in the presence of ties,
      when an appropriate instance of `PermutationMethod` is provided via the
      `method` argument.

    References
    ----------
    .. [1] Zwillinger, D. and Kokoska, S. (2000). CRC Standard
       Probability and Statistics Tables and Formulae. Chapman & Hall: New
       York. 2000.
       Section  14.7
    .. [2] Kendall, M. G. and Stuart, A. (1973).
       The Advanced Theory of Statistics, Volume 2: Inference and Relationship.
       Griffin. 1973.
       Section 31.18

    Examples
    --------
    Univariate samples, approximate p-value.

    >>> import numpy as np
    >>> from scipy import stats
    >>> x = [1, 2, 3, 4, 5]
    >>> y = [5, 6, 7, 8, 7]
    >>> res = stats.spearmanrho(x, y)
    >>> res.statistic
    np.float64(0.8207826816681233)
    >>> res.pvalue
    np.float64(0.08858700531354405)

    Univariate samples, exact p-value.

    >>> res = stats.spearmanrho(x, y, method=stats.PermutationMethod())
    >>> res.statistic
    np.float64(0.8207826816681233)
    >>> res.pvalue
    np.float64(0.13333333333333333)

    Batch of univariate samples, one vectorized call.

    >>> rng = np.random.default_rng(98145152315484)
    >>> x2 = rng.standard_normal((2, 100))
    >>> y2 = rng.standard_normal((2, 100))
    >>> res = stats.spearmanrho(x2, y2, axis=-1)
    >>> res.statistic
    array([ 0.16585659, -0.12151215])
    >>> res.pvalue
    array([0.0991155 , 0.22846869])

    Bivariate samples using standard broadcasting rules.

    >>> res = stats.spearmanrho(x2[np.newaxis, :], x2[:, np.newaxis], axis=-1)
    >>> res.statistic
    array([[ 1.        , -0.14670267],
           [-0.14670267,  1.        ]])
    >>> res.pvalue
    array([[0.        , 0.14526128],
           [0.14526128, 0.        ]])

    TrW   r   F)copy)r   r[   r   )	r   r   r   r   r   pearsonrr	   r*   rJ   )
r!   r"   r[   r   r   r$   r   rxryrK   s
             r,   r   r      s    H 
A	B1a<E		%B		%B	2u5	)B	2u5	)B
..RKd
SCcmmSZZ88r.   )r3   scipyr   scipy._lib._array_apir   r   r   r   scipy.stats._stats_pyr   r	   r
   scipy.stats._axis_nan_policyr   __all__r-   r@   rH   rM   r   r   ra   r.   r,   <module>rt      s     3 3 P P A =
):(4  %  D O Q R,TQ*1Q!M u\ X*MRX*v $K=PQ ,TQ*1Q!M(3Dq F9M
F9r.   