Source code for normtest.ryan_joiner

"""This module contains functions related to the Ryan-Joiner test

##### List of functions (cte_alphabetical order) #####

## Functions WITH good TESTS ###
- _critical_value(sample_size, alpha=0.05, safe=False)
- _normal_order_statistic(x_data, weighted=False, cte_alpha="3/8", safe=False)
- _order_statistic(sample_size, cte_alpha="3/8", safe=False)
- _p_value(statistic, sample_size, safe=False)
- citation(export=False)
- rj_test(x_data, alpha=0.05, cte_alpha="3/8", weighted=False, safe=False)

## Functions WITH some TESTS ###
- _statistic(x_data, zi, safe=False)
- correlation_plot(axes, x_data, cte_alpha="3/8", weighted=False, safe=False)
- dist_plot(axes, x_data, cte_alpha="3/8", min=4, max=50, weighted=False, safe=False)
- _make_line_up_data(x_data, weighted, cte_alpha, safe)
- line_up(x_data, cte_alpha="3/8", weighted=False, seed=42, correct=False, safe=False)


## Functions WITHOUT tests ###



##### List of CLASS (alphabetical order) #####



Author: Anderson Marcos Dias Canteli <andersonmdcanteli@gmail.com>

Created : November 02, 2023

Last update: November 13, 2023
"""

##### IMPORTS #####

### Standard ###
from collections import namedtuple


### Third part ###
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
from scipy import interpolate

# import seaborn as sns


### self made ###
from paramcheckup import parameters, types, numbers, numpy_arrays
from . import bibmaker
from .utils import constants
from .utils.helpers import AlphaManagement, SafeManagement

##### DOCUMENTATION #####
from .utils import documentation as docs

#### CONSTANTS ####
RyanJoiner1976 = "RYAN, T. A., JOINER, B. L. Normal Probability Plots and Tests for Normality, Technical Report, Statistics Department, The Pennsylvania State University, 1976. Available at `www.additive-net.de <https://www.additive-net.de/de/component/jdownloads/send/70-support/236-normal-probability-plots-and-tests-for-normality-thomas-a-ryan-jr-bryan-l-joiner>`_. Access on: 22 Jul. 2023."
Blom1958 = "BLOM, G. Statistical Estimates and Transformed Beta-Variables. New York: John Wiley and Sons, Inc, p. 71-72, 1958."


##### CLASS #####


##### FUNCTIONS #####



[docs]
def citation(export=False):
    """This function returns the reference from Ryan Joiner's test, with the option to export the reference in `.bib` format.

    Parameters
    ----------
    export : bool
        Whether to export the reference as `ryan-joiner.bib` file (`True`) or not (`False`, default);


    Returns
    -------
    reference : str
        The Ryan Joiner Test reference

    """
    reference = bibmaker.make_techreport(
        citekey="RyanJoiner1976",
        author="Thomas A. Ryan and Brian L. Joiner",
        title="Normal Probability Plots and Tests for Normality",
        institution="The Pennsylvania State University, Statistics Department",
        year="1976",
        url="https://api.semanticscholar.org/CorpusID:9233652",
        export=export,
    )
    return reference




[docs]
@docs.docstring_parameter(
    sample_size=docs.SAMPLE_SIZE["type"],
    samp_size_desc=docs.SAMPLE_SIZE["description"],
    alpha=docs.ALPHA["type"],
    alpha_desc=docs.ALPHA["description"],
    critical=docs.CRITICAL["type"],
    critical_desc=docs.CRITICAL["description"],
    rj_ref=RyanJoiner1976,
)
def _critical_value(sample_size, alpha=0.05):
    """This function calculates the critical value of the Ryan-Joiner test [1]_.

    Parameters
    ----------
    {sample_size}
        {samp_size_desc}
    {alpha}
        {alpha_desc}


    Returns
    -------
    {critical}
        {critical_desc}


    See Also
    --------
    rj_test


    Notes
    -----
    The critical values are calculated using [1]_ the following equations:

    .. math::

            R_{{p;\\alpha=0.10}}^{{'}} = 1.0071 - \\frac{{0.1371}}{{\\sqrt{{n}}}} - \\frac{{0.3682}}{{n}} + \\frac{{0.7780}}{{n^{{2}}}}

            R_{{p;\\alpha=0.05}}^{{'}} = 1.0063 - \\frac{{0.1288}}{{\\sqrt{{n}}}} - \\frac{{0.6118}}{{n}} + \\frac{{1.3505}}{{n^{{2}}}}

            R_{{p;\\alpha=0.01}}^{{'}} = 0.9963 - \\frac{{0.0211}}{{\\sqrt{{n}}}} - \\frac{{1.4106}}{{n}} + \\frac{{3.1791}}{{n^{{2}}}}

    where :math:`n` is the sample size.


    References
    ----------
    .. [1] {rj_ref}


    Examples
    --------
    >>> from normtest import ryan_joiner
    >>> critical = ryan_joiner._critical_value(10, alpha=0.05)
    >>> print(critical)
    0.9178948637370312

    """

    if alpha == 0.1:
        return (
            1.0071
            - (0.1371 / np.sqrt(sample_size))
            - (0.3682 / sample_size)
            + (0.7780 / sample_size**2)
        )
    elif alpha == 0.05:
        return (
            1.0063
            - (0.1288 / np.sqrt(sample_size))
            - (0.6118 / sample_size)
            + (1.3505 / sample_size**2)
        )
    else:  # alpha == 0.01:
        return (
            0.9963
            - (0.0211 / np.sqrt(sample_size))
            - (1.4106 / sample_size)
            + (3.1791 / sample_size**2)
        )




[docs]
@docs.docstring_parameter(
    x_data=docs.X_DATA["type"],
    x_data_desc=docs.X_DATA["description"],
    weighted=docs.WEIGHTED["type"],
    weighted_desc=docs.WEIGHTED["description"],
    cte_alpha=docs.CTE_ALPHA["type"],
    cte_alpha_desc=docs.CTE_ALPHA["description"],
    zi=docs.ZI["type"],
    zi_desc=docs.ZI["description"],
)
def _normal_order_statistic(x_data, weighted=False, cte_alpha="3/8"):
    """This function transforms the statistical order to the standard Normal distribution scale (:math:`z_{{i}}`).

    Parameters
    ----------
    {x_data}
        {x_data_desc}
    {cte_alpha}
        {cte_alpha_desc}

    {weighted}
        {weighted_desc}


    Returns
    -------
    {zi}
        {zi_desc}


    Notes
    -----
    The transformation to the standard Normal scale is done using the equation:

    .. math::

            z_{{i}} = \\phi^{{-1}} \\left(p_{{i}} \\right)

    where :math:`p_{{i}}` is the normal statistical order and :math:`\\phi^{{-1}}` is the inverse of the standard Normal distribution. The transformation is performed using :doc:`stats.norm.ppf() <scipy:reference/generated/scipy.stats.norm>`.

    The statistical order (:math:`p_{{i}}`) is estimated using :func:`_order_statistic` function. See this function for details on parameter `cte_alpha`.


    See Also
    --------
    rj_test


    Examples
    --------
    The first example uses `weighted=False`:

    >>> import numpy as np
    >>> from normtest import ryan_joiner
    >>> data = np.array([148, 148, 154, 158, 158, 160, 161, 162, 166, 170, 182, 195, 210])
    >>> result = ryan_joiner._normal_order_statistic(data, weighted=False)
    >>> print(result)
    [-1.67293739 -1.16188294 -0.84837993 -0.6020065  -0.38786869 -0.19032227
    0.          0.19032227  0.38786869  0.6020065   0.84837993  1.16188294
    1.67293739]

    The second example uses `weighted=True`, with the same data set:

    >>> result = ryan_joiner._normal_order_statistic(data, weighted=True)
    >>> print(result)
    [-1.37281032 -1.37281032 -0.84837993 -0.4921101  -0.4921101  -0.19032227
    0.          0.19032227  0.38786869  0.6020065   0.84837993  1.16188294
    1.67293739]


    Note that the results are only different for positions where we have repeated values. Using `weighted=True`, the normal statistical order is obtained with the average of the order statistic values.

    The results will be identical if the data set does not contain repeated values.

    """

    # ordering
    x_data = np.sort(x_data)
    if weighted:
        df = pd.DataFrame({"x_data": x_data})
        # getting mi values
        df["Rank"] = np.arange(1, df.shape[0] + 1)
        df["Ui"] = _order_statistic(
            sample_size=x_data.size,
            cte_alpha=cte_alpha,
        )
        df["Mi"] = df.groupby(["x_data"])["Ui"].transform("mean")
        normal_ordered = stats.norm.ppf(df["Mi"])
    else:
        ordered = _order_statistic(
            sample_size=x_data.size,
            cte_alpha=cte_alpha,
        )
        normal_ordered = stats.norm.ppf(ordered)

    return normal_ordered




[docs]
@docs.docstring_parameter(
    samp_size=docs.SAMPLE_SIZE["type"],
    samp_size_desc=docs.SAMPLE_SIZE["description"],
    cte_alpha=docs.CTE_ALPHA["type"],
    cte_alpha_desc=docs.CTE_ALPHA["description"],
    blom_ref=Blom1958,
    rj_ref=RyanJoiner1976,
)
def _order_statistic(sample_size, cte_alpha="3/8"):
    """This function estimates the normal statistical order (:math:`p_{{i}}`) using approximations [1]_.

    Parameters
    ----------
    {samp_size}
        {samp_size_desc}
    {cte_alpha}
        A `str` with the `cte_alpha` value that should be adopted (see details in the Notes section). The options are:

        * `"0"`;
        * `"3/8"` (default);
        * `"1/2"`;



    Returns
    -------
    pi : :doc:`numpy array <numpy:reference/generated/numpy.array>`
        The estimated statistical order (:math:`p_{{i}}`)


    See Also
    --------
    ryan_joiner


    Notes
    -----

    The `cte_alpha` (:math:`\\alpha_{{cte}}`) parameter corresponds to the values studied by [1]_, which adopts the following equation to estimate the statistical order:

    .. math::

            p_{{i}} = \\frac{{i - \\alpha_{{cte}}}}{{n - 2 \\times \\alpha_{{cte}} + 1}}

    where :math:`n` is the sample size and :math:`i` is the ith observation.


    .. admonition:: Info

        `cte_alpha="3/8"` is adopted in the implementations of the Ryan-Joiner test in Minitab and Statext software. This option is also cited as an alternative by [2]_.


    References
    ----------
    .. [1] {blom_ref}

    .. [2] {rj_ref}


    Examples
    --------
    >>> from normtest import ryan_joiner
    >>> size = 10
    >>> pi = ryan_joiner._order_statistic(size)
    >>> print(pi)
    [0.06097561 0.15853659 0.25609756 0.35365854 0.45121951 0.54878049
    0.64634146 0.74390244 0.84146341 0.93902439]

    """

    i = np.arange(1, sample_size + 1)
    if cte_alpha == "1/2":
        cte_alpha = 0.5
    elif cte_alpha == "0":
        cte_alpha = 0
    else:
        cte_alpha = 3 / 8

    return (i - cte_alpha) / (sample_size - 2 * cte_alpha + 1)




[docs]
@docs.docstring_parameter(
    statistic=docs.STATISTIC["type"],
    statistic_desc=docs.STATISTIC["description"],
    samp_size=docs.SAMPLE_SIZE["type"],
    samp_size_desc=docs.SAMPLE_SIZE["description"],
    p_value=docs.P_VALUE["type"],
    p_value_desc=docs.P_VALUE["description"],
    rj_ref=RyanJoiner1976,
)
def _p_value(statistic, sample_size):
    """This function estimates the probability associated with the Ryan-Joiner Normality test [1]_.


    Parameters
    ----------
    {statistic}
        {statistic_desc}
    {samp_size}
        {samp_size_desc}


    Returns
    -------
    {p_value}
        {p_value_desc}


    See Also
    --------
    rj_test


    Notes
    -----
    The test probability is estimated through linear interpolation of the test statistic with critical values from the Ryan-Joiner test [1]_. The Interpolation is performed using the :doc:`scipy.interpolate.interp1d() <scipy:reference/generated/scipy.interpolate.interp1d>` function.

    * If the test statistic is greater than the critical value for :math:`\\alpha=0.10`, the result is always *"p > 0.100"*.
    * If the test statistic is lower than the critical value for :math:`\\alpha=0.01`, the result is always *"p < 0.010"*.


    .. warning:: The estimated :math:`p_{{value}}` may not be accurate as it is calculated using linear interpolation


    References
    ----------
    .. [1] {rj_ref}


    Examples
    --------
    >>> from normtest import ryan_joiner
    >>> p_value = ryan_joiner._p_value(0.90, 10)
    >>> print(p_value)
    0.030930589077996555

    """

    alphas = np.array([0.10, 0.05, 0.01])
    criticals = np.array(
        [
            _critical_value(sample_size=sample_size, alpha=alphas[0]),
            _critical_value(sample_size=sample_size, alpha=alphas[1]),
            _critical_value(sample_size=sample_size, alpha=alphas[2]),
        ]
    )
    f = interpolate.interp1d(criticals, alphas)
    if statistic > max(criticals):
        return "p > 0.100"
    elif statistic < min(criticals):
        return "p < 0.010"
    else:
        p_value = float(f(statistic))
        return p_value




[docs]
@docs.docstring_parameter(
    x_data=docs.X_DATA["type"],
    x_data_desc=docs.X_DATA["description"],
    zi=docs.ZI["type"],
    zi_desc=docs.ZI["description"],
    statistic=docs.STATISTIC["type"],
    statistic_desc=docs.STATISTIC["description"],
    rj_ref=RyanJoiner1976,
)
def _statistic(x_data, zi):
    """This function estimates the Ryan-Joiner test statistic [1]_.

    Parameters
    ----------
    {x_data}
        {x_data_desc}
    {zi}
        {zi_desc}


    Returns
    -------
    {statistic}
        {statistic_desc}


    Notes
    -----
    The test statistic (:math:`R_{{p}}`) is estimated through the correlation between the ordered data and the Normal statistical order:

    .. math::

            R_{{p}}=\\dfrac{{\\sum_{{i=1}}^{{n}}x_{{(i)}}z_{{(i)}}}}{{\\sqrt{{s^{{2}}(n-1)\\sum_{{i=1}}^{{n}}z_{{(i)}}^2}}}}

    where :math:`z_{{(i)}}` values are the z-score values of the corresponding experimental data (:math:`x_{{({{i)}}}}`) value, :math:`n` is the sample size and :math:`s^{{2}}` is the sample variance.

    The correlation is estimated using :doc:`scipy.stats.pearsonr() <scipy:reference/generated/scipy.stats.pearsonr>`.


    References
    ----------
    .. [1] {rj_ref}


    Examples
    --------
    >>> from normtest import ryan_joiner
    >>> import numpy as np
    >>> x_data = np.array([148, 148, 154, 158, 158, 160, 161, 162, 166, 170, 182, 195, 210])
    >>> x_data = np.sort(x_data)
    >>> normal_order = ryan_joiner._normal_order_statistic(x_data)
    >>> result = ryan_joiner._statistic(x_data, normal_order)
    >>> print(result)
    0.9225156050800545

    """
    return stats.pearsonr(zi, x_data)[0]




[docs]
@docs.docstring_parameter(
    x_data=docs.X_DATA["type"],
    x_data_desc=docs.X_DATA["description"],
    alpha=docs.ALPHA["type"],
    alpha_desc=docs.ALPHA["description"],
    cte_alpha=docs.CTE_ALPHA["type"],
    cte_alpha_desc=docs.CTE_ALPHA["description"],
    weighted=docs.WEIGHTED["type"],
    weighted_desc=docs.WEIGHTED["description"],
    statistic=docs.STATISTIC["type"],
    statistic_desc=docs.STATISTIC["description"],
    critical=docs.CRITICAL["type"],
    critical_desc=docs.CRITICAL["description"],
    p_value=docs.P_VALUE["type"],
    p_value_desc=docs.P_VALUE["description"],
    rj_ref=RyanJoiner1976,
)
def rj_test(x_data, alpha=0.05, cte_alpha="3/8", weighted=False):
    """This function applies the Ryan-Joiner Normality test [1]_.

    Parameters
    ----------
    {x_data}
        {x_data_desc}
    {alpha}
        {alpha_desc}
    {cte_alpha}
        {cte_alpha_desc}
    {weighted}
        {weighted_desc}


    Returns
    -------
    result : tuple with
        {statistic}
            {statistic_desc}
        critical
            {critical_desc}
        {p_value}
            {p_value_desc}
        conclusion : str
            The test conclusion (e.g, Normal/Not Normal).


    See Also
    --------
    correlation_plot
    dist_plot


    Notes
    -----
    The test statistic (:math:`R_{{p}}`) is estimated through the correlation between the ordered data and the Normal statistical order:

    .. math::

            R_{{p}}=\\dfrac{{\\sum_{{i=1}}^{{n}}x_{{(i)}}z_{{(i)}}}}{{\\sqrt{{s^{{2}}(n-1)\\sum_{{i=1}}^{{n}}z_{{(i)}}^2}}}}

    where :math:`z_{{(i)}}` values are the z-score values of the corresponding experimental data (:math:`x_{{({{i)}}}}`) value and :math:`s^{{2}}` is the sample variance.

    The correlation is estimated using :func:`_statistic`.

    The Normality test has the following assumptions:

    .. admonition:: \u2615

       :math:`H_0:` Data was sampled from a Normal distribution.

       :math:`H_1:` The data was sampled from a distribution other than the Normal distribution.


    The conclusion of the test is based on the comparison between the `critical` value (at :math:`\\alpha` significance level) and `statistic` of the test:

    .. admonition:: \u2615

       if critical :math:`\\leq` statistic:
           Fail to reject :math:`H_0:` (e.g., data is Normal)
       else:
           Reject :math:`H_0:` (e.g., data is not Normal)

    The critical values are obtained using :func:`_critical_value`.


    .. warning:: The estimated :math:`p_{{value}}` may not be accurate as it is calculated using linear interpolation.

    References
    ----------
    .. [1] {rj_ref}


    Examples
    --------
    >>> from normtest import ryan_joiner
    >>> from scipy import stats
    >>> data = stats.norm.rvs(loc=0, scale=1, size=30, random_state=42)
    >>> result = ryan_joiner.rj_test(data)
    >>> print(result)
    RyanJoiner(statistic=0.990439558451558, critical=0.963891667086667, p_value='p > 0.100', conclusion='Fail to reject H₀')

    """
    # ordering
    x_data = np.sort(x_data)

    # zi
    zi = _normal_order_statistic(
        x_data=x_data,
        weighted=weighted,
        cte_alpha=cte_alpha,
    )

    # calculating the stats
    statistic = _statistic(x_data=x_data, zi=zi)

    # getting the critical values
    critical_value = _critical_value(sample_size=x_data.size, alpha=alpha)

    # conclusion
    if statistic < critical_value:
        conclusion = constants.REJECTION
    else:
        conclusion = constants.ACCEPTATION

    # pvalue
    p_value = _p_value(statistic=statistic, sample_size=x_data.size)

    result = namedtuple(
        "RyanJoiner", ("statistic", "critical", "p_value", "conclusion")
    )
    return result(statistic, critical_value, p_value, conclusion)




[docs]
@docs.docstring_parameter(
    axes=docs.AXES["type"],
    axes_desc=docs.AXES["description"],
    x_data=docs.X_DATA["type"],
    x_data_desc=docs.X_DATA["description"],
    cte_alpha=docs.CTE_ALPHA["type"],
    cte_alpha_desc=docs.CTE_ALPHA["description"],
    weighted=docs.WEIGHTED["type"],
    weighted_desc=docs.WEIGHTED["description"],
    rj_ref=RyanJoiner1976,
)
def correlation_plot(axes, x_data, cte_alpha="3/8", weighted=False):
    """This function creates an `axis` with the Ryan-Joiner test [1]_ correlation graph.

    Parameters
    ----------
    {axes}
        {axes_desc}
    {x_data}
        {x_data_desc}
    {cte_alpha}
        {cte_alpha_desc}

    {weighted}
        {weighted_desc}



    Returns
    -------
    {axes}
        {axes_desc}


    See Also
    --------
    rj_test
    dist_plot


    References
    ----------
    .. [1] {rj_ref}


    Examples
    --------
    >>> from normtest import ryan_joiner
    >>> import matplotlib.pyplot as plt
    >>> from scipy import stats
    >>> data = stats.norm.rvs(loc=0, scale=1, size=30, random_state=42)
    >>> fig, ax = plt.subplots(figsize=(6, 4))
    >>> ryan_joiner.correlation_plot(axes=ax, x_data=data)
    >>> #plt.savefig("correlation_plot.png")
    >>> plt.show()

    .. image:: img/correlation_plot.png
        :alt: Correlation chart for Ryan-Joiner test Normality test
        :align: center

    """

    constants.warning_plot()

    # ordering the sample
    zi = _normal_order_statistic(
        x_data=x_data,
        weighted=weighted,
        cte_alpha=cte_alpha,
    )
    x_data = np.sort(x_data)

    # performing regression
    reg = stats.linregress(zi, x_data)
    # pred data
    y_pred = zi * reg.slope + reg.intercept

    ## making the plot

    # adding the data
    axes.scatter(zi, x_data, fc="none", ec="k")

    # adding the trend line
    axes.plot(zi, y_pred, c="r")

    # adding the statistic
    text = "$R_{p}=" + str(round(reg.rvalue, 4)) + "$"
    axes.text(0.1, 0.9, text, ha="left", va="center", transform=axes.transAxes)

    # perfuming
    axes.set_xlabel("Normal statistical order")
    axes.set_ylabel("Ordered data")

    return axes




[docs]
@docs.docstring_parameter(
    axes=docs.AXES["type"],
    axes_desc=docs.AXES["description"],
    statistic=docs.STATISTIC["type"],
    statistic_desc=docs.STATISTIC["description"],
    sample_size=docs.SAMPLE_SIZE["type"],
    sample_size_desc=docs.AXES["description"],
    rj_ref=RyanJoiner1976,
)
def dist_plot(
    axes,
    critical_range=(4, 50),
    test=None,
):
    """This function generates axis with critical data from the Ryan-Joiner Normality test [1]_.

    Parameters
    ----------
    {axes}
        {axes_desc}
    critical_range : tuple (optional), with two elements:
        x_min : int, optional
            The lower range of the number of observations for the critical values (default is ``4``).
        x_max : int, optional
            The upper range of the number of observations for the critical values (default is ``50``).
    test : tuple (optional), with two elements:
        {statistic}
            {statistic_desc}
        {sample_size}
            {sample_size_desc}


    Returns
    -------
    {axes}
        {axes_desc}


    See Also
    --------
    rj_test
    correlation_plot


    References
    ----------
    .. [1] {rj_ref}


    Examples
    --------
    >>> from normtest import ryan_joiner
    >>> import matplotlib.pyplot as plt
    >>> from scipy import stats
    >>> data = stats.norm.rvs(loc=0, scale=1, size=30, random_state=42)


    Apply the Ryan Joiner test


    >>> result = ryan_joiner.rj_test(data)


    Create the distribution graph using the test result


    >>> fig, ax = plt.subplots(figsize=(6, 4))
    >>> ryan_joiner.dist_plot(axes=ax, test=(result.statistic, data.size))
    >>> # plt.savefig("rj_dist_plot.png")
    >>> plt.show()


    .. image:: img/dist_plot.png
        :alt: Critical chart for Ryan-Joiner test Normality test
        :align: center

    """
    constants.warning_plot()

    n_samples = np.arange(critical_range[0], critical_range[1] + 1)
    alphas = [0.10, 0.05, 0.01]
    alphas_label = ["$10\\%$", "$5\\%$", "$1\\%$"]
    colors = [
        (0.2980392156862745, 0.4470588235294118, 0.6901960784313725),
        (0.8666666666666667, 0.5176470588235295, 0.3215686274509804),
        (0.3333333333333333, 0.6588235294117647, 0.40784313725490196),
    ]

    # main test
    if test is not None:
        if test[1] > critical_range[1]:
            constants.user_warning(
                f"The graphical visualization is best suited if the sample size ({test[1]}) is smaller than the max value ({critical_range[1]})."
            )
        if test[1] < critical_range[0]:
            constants.user_warning(
                f"The graphical visualization is best suited if the sample size ({test[1]}) is greater than the min value ({critical_range[0]})."
            )

        axes.scatter(test[1], test[0], color="r", label="$R_{p}$", marker="^")

    # adding critical values
    for alp, color, alp_label in zip(alphas, colors, alphas_label):
        criticals = []
        for sample in n_samples:
            criticals.append(_critical_value(sample_size=sample, alpha=alp))
        axes.scatter(n_samples, criticals, label=alp_label, color=color, s=10)

    axes.set_title("Ryan-Joiner")

    # adding details
    axes.legend(loc=4)
    axes.set_xlabel("Sample size")
    axes.set_ylabel("Critical value")

    return axes



# this function does not have documentation on purpose (private)
@docs.docstring_parameter(
    x_data=docs.X_DATA["type"],
    x_data_desc=docs.X_DATA["description"],
    cte_alpha=docs.CTE_ALPHA["type"],
    cte_alpha_desc=docs.CTE_ALPHA["description"],
    weighted=docs.WEIGHTED["type"],
    weighted_desc=docs.WEIGHTED["description"],
    zi=docs.ZI["type"],
    zi_desc=docs.ZI["description"],
)
def _make_line_up_data(x_data, weighted, cte_alpha):
    """Tthis function prepares the data for the Ryan Joiner test `line_up` function.

    Parameters
    ----------
    {x_data}
        {x_data_desc}
    {weighted}
        {weighted_desc}
    {cte_alpha}
        {cte_alpha_desc}


    Returns
    -------
    {x_data}
        The input data *ordered*
    {zi}
        {zi_desc}
    y_pred : :doc:`numpy array <numpy:reference/generated/numpy.array>`
        The predicted values for the linear regression between `x_data` and `zi`;

    """
    # ordering the sample
    x_data = np.sort(x_data)
    zi = _normal_order_statistic(
        x_data=x_data,
        weighted=weighted,
        cte_alpha=cte_alpha,
    )

    # performing regression
    reg = stats.linregress(zi, x_data)
    # pred data
    y_pred = zi * reg.slope + reg.intercept

    return x_data, zi, y_pred



[docs]
@docs.docstring_parameter(
    x_data=docs.X_DATA["type"],
    x_data_desc=docs.X_DATA["description"],
    cte_alpha=docs.CTE_ALPHA["type"],
    cte_alpha_desc=docs.CTE_ALPHA["description"],
    weighted=docs.WEIGHTED["type"],
    weighted_desc=docs.WEIGHTED["description"],
    zi=docs.ZI["type"],
    zi_desc=docs.ZI["description"],
)
def line_up(
    x_data,
    cte_alpha="3/8",
    weighted=False,
    seed=42,
    correct=False,
):
    """This function exports the figure with the correlation graphs for the line up method [1]_.

    Parameters
    ----------
    {x_data}
        {x_data_desc}
    {cte_alpha}
        {cte_alpha_desc}

    {weighted}
        {weighted_desc}
    seed : int, optional
        A numerical value that generates a new set or repeats pseudo-random numbers. Use a positive integer value to be able to repeat results. Default is ``42``;
    correct : bool, optional
        Whether the `x_data` is to be drawn in red (`False`) or black (`True`, default);


    Returns
    -------
    fig : matplotlib.figure.Figure
        A figure with the generated graphics;


    Notes
    -----
    This function is based on the line up method, where ``20`` correlation graphs are generated. One of these graphs contains the graph obtained with the true data (`x_data`). The other `19` graphs are drawn from pseudo-random data obtained from the Normal distribution with a mean and standard deviation similar to `x_data`.

    The objective is to observe the 20 graphs at the same time and discover *which graph is the *least similar* to the behavior expected for the Normal distribution*.

    * If the identified graph corresponds to the true data, it can be concluded that the data set is not similar to a Normal distribution (with 95% confidence);
    * If the identified graph does not correspond to that obtained with real data, it can be concluded that the data set is similar to a Normal distribution (with 95% confidence);


    See Also
    --------
    rj_test
    dist_plot


    References
    ----------
    .. [1] BUJA, A. et al. Statistical inference for exploratory data analysis and model diagnostics. Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences, v. 367, n. 1906, p. 4361–4383, 13 nov. 2009


    Examples
    --------
    The line-up method must be conducted in two steps. The first step involves generating a figure with 20 graphs from the data, without indicating which graph is the true one.


    >>> from normtest import ryan_joiner
    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>> x_exp = np.array([5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 5, 4.4, 4.9, 5.4])
    >>> fig = ryan_joiner.line_up(x_exp, seed=42, correct=False)
    >>> fig.tight_layout()
    >>> # plt.savefig("line_up.png", bbox_inches="tight")
    >>> plt.show()


    .. image:: img/line_up.png
        :alt: Line-up method chart for Ryan-Joiner test Normality test
        :align: center

    The researcher must identify which of the 20 graphs deviates most significantly from what is expected for a Normal distribution. For instance, the graph located in the first row and second column.

    The second step involves determining which graph corresponds to the true data set. This can be accomplished by simply changing parameter `correct` from `False` to `True`:


    >>> fig = ryan_joiner.line_up(x_exp, seed=42, correct=True)
    >>> fig.tight_layout()
    >>> # plt.savefig("line_up_true.png", bbox_inches="tight")
    >>> plt.show()


    .. dropdown:: click to reveal output
        :animate: fade-in

        .. image:: img/line_up_true.png
            :alt: Line-up method chart for Ryan-Joiner test Normality test
            :align: center


        Given that the true data corresponds to the graph in the second row and first column, which was not identified as deviating from the others, we can conclude that the data set follows the Normal distribution, at least approximately.


    """
    constants.warning_plot()

    # getting a properly seed
    if seed is None:
        seed = int(np.random.rand() * (2**32 - 1))

    # creating a list of 20 integers and shuffling
    position = np.arange(20)
    rng = np.random.default_rng(seed)
    # sampling one to be the true position
    real_data_position = rng.choice(position)

    # preparing a list of 1000 seeds to generate Normal data
    seeds = np.arange(1000)
    seeds = rng.choice(seeds, 20)

    # making the synthetic data and grouping it with the real data at the random position
    data = []
    mean = x_data.mean()
    std = x_data.std(ddof=1)
    size = x_data.size

    for pos, sed in zip(position, seeds):
        if pos == real_data_position:
            data.append(x_data)
        else:
            data.append(
                stats.norm.rvs(loc=mean, scale=std, size=size, random_state=sed)
            )

    # creating the figure with the 20 axes

    rows = 5
    cols = 4
    fig, ax = plt.subplots(cols, rows, figsize=(10, 7))

    i = 0
    if correct:
        color = "r"
    else:
        color = "k"
    for row in range(rows):
        for col in range(cols):
            if i == real_data_position:
                x, zi, y_pred = _make_line_up_data(
                    x_data=data[i],
                    weighted=weighted,
                    cte_alpha=cte_alpha,
                )
                ax[col, row].scatter(zi, x, c=color)
                ax[col, row].plot(zi, y_pred, ls="--", c=color)

            else:
                x, zi, y_pred = _make_line_up_data(
                    x_data=data[i],
                    weighted=weighted,
                    cte_alpha=cte_alpha,
                )
                ax[col, row].scatter(zi, x, c="k")
                ax[col, row].plot(zi, y_pred, ls="--", c="k")

            i += 1
            ax[col, row].tick_params(axis="both", which="major", labelsize=5)

    fig.text(0.5, 0.0, f"Normal statistical order (seed={seed})", ha="center")
    fig.text(0.0, 0.5, "Ordered data", va="center", rotation="vertical")
    fig.patch.set_facecolor("white")

    return fig



##### CLASS #####



[docs]
@docs.docstring_parameter(
    x_data=docs.X_DATA["type"],
    x_data_desc=docs.X_DATA["description"],
    statistic=docs.STATISTIC["type"],
    statistic_desc=docs.STATISTIC["description"],
    critical=docs.CRITICAL["type"],
    critical_desc=docs.CRITICAL["description"],
    p_value=docs.P_VALUE["type"],
    p_value_desc=docs.P_VALUE["description"],
    conclusion=docs.CONCLUSION["type"],
    conclusion_desc=docs.CONCLUSION["description"],
    alpha=docs.ALPHA["type"],
    alpha_desc=docs.ALPHA["description"],
    safe=docs.SAFE["type"],
    safe_desc=docs.SAFE["description"],
    rj_ref=RyanJoiner1976,
)
class RyanJoiner(AlphaManagement, SafeManagement):
    """This class instantiates an object to perform the Ryan-Joiner Normality test [1]_.


    Attributes
    ----------
    {statistic}
        {statistic_desc}
    {critical}
        {critical_desc}
    {p_value}
        {p_value_desc}
    {conclusion}
        {conclusion_desc}
    {alpha}
        {alpha_desc}
    normality : named tuple
        A tuple with the main test results summarized
    normality_hypothesis : str
        Description of the Normality test
    {safe}
        {safe_desc}

    Methods
    -------
    fit(x_data)
        Applies the Ryan-Joiner Normality test;
    dist_plot(axes, alphas=[0.10, 0.05, 0.01]):
        Generates `axis` with critical data from the Ryan-Joiner Normality test;
    correlation_plot(axes)
        Generates an `axis` with the Ryan-Joiner test correlation graph;
    line_up(seed=None, correct=False)
        Generates a `Figure` with the correlation graphs for the line up method;
    citation(export=False)
        Returns the Ryan-Joiner's test reference;

    References
    ----------
    .. [1] {rj_ref}


    Examples
    --------
    >>> from normtest import RyanJoiner
    >>> import numpy as np
    >>> x = np.array([6, 1, -4, 8, -2, 5, 0])
    >>> test = RyanJoiner()
    >>> test.fit(x)
    >>> print(test.normality)
    RyanJoiner(statistic=0.9844829186140105, critical=0.8977794003662074, p_value='p > 0.100', conclusion='Fail to reject H₀')

    """

    def __init__(
        self, alpha=0.05, safe=True, cte_alpha="3/8", weighted=False, **kwargs
    ):
        """Initiates RyanJoiner `class` inheriting the `AlphaManagement` and `SafeManagement` classes

        Attributes
        ----------
        class_name : "RyanJoiner"
        conclusion : None
            This attribute is used to check whether the fit method was applied or not
        alpha : float
        safe : bool
        cte_alpha : str
        weighted : bool


        """
        super().__init__(alpha=alpha, safe=safe, **kwargs)
        self.class_name = "RyanJoiner"
        self.conclusion = None  # for checking if the fit was applied
        if safe:
            parameters.param_options(
                option=alpha,
                options=[0.01, 0.05, 0.10],
                param_name="alpha",
                kind="class",
                kind_name=self.class_name,
                stacklevel=4,
                error=True,
            )
            parameters.param_options(
                option=cte_alpha,
                options=["0", "3/8", "1/2"],
                param_name="cte_alpha",
                kind="class",
                kind_name=self.class_name,
                stacklevel=4,
                error=True,
            )

            types.is_bool(
                value=weighted,
                param_name="weighted",
                kind="class",
                kind_name=self.class_name,
                stacklevel=4,
                error=True,
            )
        self.cte_alpha = cte_alpha
        self.weighted = weighted
        self.set_safe(safe=safe)
        self.alpha = alpha
        self.normality_hypothesis = constants.HYPOTESES


[docs]
    @docs.docstring_parameter(
        x_data=docs.X_DATA["type"],
        x_data_desc=docs.X_DATA["description"],
        statistic=docs.STATISTIC["type"],
        statistic_desc=docs.STATISTIC["description"],
        critical=docs.CRITICAL["type"],
        critical_desc=docs.CRITICAL["description"],
        p_value=docs.P_VALUE["type"],
        p_value_desc=docs.P_VALUE["description"],
        conclusion=docs.CONCLUSION["type"],
        conclusion_desc=docs.CONCLUSION["description"],
    )
    def fit(
        self,
        x_data,
    ):
        """This method applies the Ryan-Joiner test.

        Parameters
        ----------
        {x_data}
            {x_data_desc}


        Returns
        -------
        {x_data}
            {x_data_desc}
        {statistic}
            {statistic_desc}
        {critical}
            {critical_desc}
        {p_value}
            {p_value_desc}
        {conclusion}
            {conclusion_desc}
        normality : named tuple
            A tuple with the main test results summarized


        See Also
        --------
        rj_test


        """
        func_name = "fit"

        if self.safe:
            types.is_numpy(
                value=x_data,
                param_name="x_data",
                kind="method",
                kind_name=func_name,
                stacklevel=4,
                error=True,
            )
            numpy_arrays.n_dimensions(
                array=x_data,
                param_name="x_data",
                ndim=1,
                kind="method",
                kind_name=func_name,
                stacklevel=4,
                error=True,
            )
            numpy_arrays.size_is_greater_than_lower(
                array=x_data,
                param_name="x_data",
                kind="method",
                kind_name=func_name,
                lower=4,
                inclusive=True,
                stacklevel=4,
                error=True,
            )

        result = rj_test(
            x_data=x_data,
            alpha=self.alpha,
            cte_alpha=self.cte_alpha,
            weighted=self.weighted,
        )
        self.x_data = x_data
        self.statistic = result.statistic
        self.critical = result.critical
        self.p_value = result.p_value
        self.conclusion = result.conclusion
        self.normality = result



[docs]
    @docs.docstring_parameter(
        axes=docs.AXES["type"],
        axes_desc=docs.AXES["description"],
    )
    def dist_plot(self, axes, critical_range=(4, 50)):
        """This method generates an `axis` with the critical data from the Ryan-Joiner Normality test.

        Parameters
        ----------
        {axes}
            {axes_desc}
        critical_range : tuple (optional), with two elements:
            x_min : int, optional
                The lower range of the number of observations for the critical values (default is ``4``).
            x_max : int, optional
                The upper range of the number of observations for the critical values (default is ``50``).


        Returns
        -------
        {axes}
            {axes_desc}


        See Also
        --------
        dist_plot


        """
        method_name = "dist_plot"
        if self.conclusion is None:
            return "The Ryan Joiner Normality test was not performed yet.\nUse the 'fit' method to perform the test."
        else:
            if self.safe:
                types.is_subplots(
                    value=axes,
                    param_name="axes",
                    kind="method",
                    kind_name=method_name,
                    stacklevel=4,
                    error=True,
                )
                types.is_tuple(
                    value=critical_range,
                    param_name="critical_range",
                    kind="method",
                    kind_name=method_name,
                    stacklevel=4,
                    error=True,
                )
                types.is_int(
                    value=critical_range[0],
                    param_name="x_min",
                    kind="method",
                    kind_name=method_name,
                    stacklevel=4,
                    error=True,
                )
                types.is_int(
                    value=critical_range[1],
                    param_name="x_max",
                    kind="method",
                    kind_name=method_name,
                    stacklevel=4,
                    error=True,
                )
                numbers.is_greater_than(
                    number=critical_range[0],
                    lower=4,
                    param_name="x_min",
                    kind="method",
                    kind_name=method_name,
                    inclusive=True,
                    stacklevel=4,
                    error=True,
                )

            return dist_plot(
                axes=axes,
                critical_range=critical_range,
                test=(self.statistic, self.x_data.size),
            )



[docs]
    @docs.docstring_parameter(
        axes=docs.AXES["type"],
        axes_desc=docs.AXES["description"],
    )
    def correlation_plot(
        self,
        axes,
    ):
        """This method generates an axis with the correlation plotfor the Ryan-Joiner Normality test.

        Parameters
        ----------
        {axes}
            {axes_desc}


        Returns
        -------
        {axes}
            {axes_desc}


        See Also
        --------
        correlation_plot


        """
        method_name = "correlation_plot"
        if self.conclusion is None:
            return "The Ryan Joiner Normality test was not performed yet.\nUse the 'fit' method to perform the test."
        else:
            if self.safe:
                types.is_subplots(
                    value=axes,
                    param_name="axes",
                    kind="method",
                    kind_name=method_name,
                    stacklevel=4,
                    error=True,
                )

            return correlation_plot(
                axes=axes,
                x_data=self.x_data,
                cte_alpha=self.cte_alpha,
                weighted=self.weighted,
            )



[docs]
    def line_up(
        self,
        seed=None,
        correct=False,
    ):
        """This method generates a `Figure` with the correlation graphs for the line up method.

        Parameters
        ----------
        seed : int or None, optional
            A numerical value that generates a new set or repeats pseudo-random numbers. Use a positive integer value to be able to repeat results. Default is ``None`` what generates a random seed;
        correct : bool, optional
            Whether the `x_data` is to be drawn in red (`False`) or black (`True`, default);


        Returns
        -------
        fig : matplotlib.figure.Figure
            A figure with the generated graphics;


        See Also
        --------
        line_up



        """
        method_name = "line_up"
        if self.conclusion is None:
            return "The Ryan Joiner Normality test was not performed yet.\nUse the 'fit' method to perform the test."
        else:
            if self.safe:
                types.is_bool(
                    value=correct,
                    param_name="correct",
                    kind="method",
                    kind_name=method_name,
                    stacklevel=4,
                    error=True,
                )
                if seed is not None:
                    types.is_int(
                        value=seed,
                        param_name="seed",
                        kind="method",
                        kind_name=method_name,
                        stacklevel=4,
                        error=True,
                    )
                    numbers.is_positive(
                        number=seed,
                        param_name="seed",
                        kind="method",
                        kind_name=method_name,
                        stacklevel=4,
                        error=True,
                    )

            return line_up(
                x_data=self.x_data,
                cte_alpha=self.cte_alpha,
                weighted=self.weighted,
                seed=seed,
                correct=correct,
            )



[docs]
    def citation(self, export=False):
        """This method returns the reference from Ryan-Joiner's test, with the option to export the reference in `.bib` format.

        Parameters
        ----------
        export : bool
            Whether to export the reference as `RyanJoiner1976.bib` file (`True`) or not (`False`, default);


        Returns
        -------
        reference : str
            The Ryan-Joiner Test reference;

        """
        return citation(export=export)


    def __str__(self):
        if self.conclusion is None:
            text = "The Ryan-Joiner Normality test was not performed yet.\nUse the 'fit' method to perform the test."
            return text
        else:
            return self.conclusion

    def __repr__(self):
        return "Ryan-Joiner Normality test"