From ce434879f22e90aefcf203a0b3bde8250e3ca40c Mon Sep 17 00:00:00 2001 From: Ziwei Huang Date: Tue, 29 Apr 2025 15:02:52 +0200 Subject: [PATCH] change: raise error in `circ_mean_ci` when data are uniform --- pycircstat2/descriptive.py | 85 +++++++++++++++++++++++++------------- tests/test_descriptive.py | 15 +++++++ 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/pycircstat2/descriptive.py b/pycircstat2/descriptive.py index 6e70fd1..ec2c216 100644 --- a/pycircstat2/descriptive.py +++ b/pycircstat2/descriptive.py @@ -826,7 +826,7 @@ def circ_mean_ci( \delta = \arccos\left(\sqrt{\frac{2n(2R^{2} - n\chi^{2}_{\alpha, 1})}{4n - \chi^{2}_{\alpha, 1}}} /R \right) $$ - For r $ge$ 0.9, + For r $\ge$ 0.9, $$ \delta = \arccos \left(\sqrt{n^2 - (n^2 - R^2)e^{\chi^2_{\alpha, 1}/n} } /R \right) @@ -880,6 +880,8 @@ def circ_mean_ci( - Section 4.4.4a/b, Fisher (1993) """ + + # n > 8, according to Ch 26.7 (Zar, 2010) if method == "approximate": (lb, ub) = _circ_mean_ci_approximate( @@ -947,10 +949,17 @@ def _circ_mean_ci_dispersion( f"n={n} is too small (< 25) for computing CI with circular dispersion." ) - # TODO: sometime return nan because x in arcsin(x) is larger than 1. - # Should we centered the data here? <- No. + delta = circ_dispersion(alpha=alpha, w=w) + z = norm.ppf(1 - 0.5 * (1 - ci)) + inner = np.sqrt(delta / n) * z + + if inner >= 1: + raise ValueError( + "Data are too dispersed, likely close to uniform. CI undefined." + ) + d = np.arcsin( - np.sqrt(circ_dispersion(alpha=alpha, w=w) / n) * norm.ppf(1 - 0.5 * (1 - ci)) + inner ) lb = mean - d ub = mean + d @@ -1016,37 +1025,40 @@ def _circ_mean_ci_approximate( if n is None: raise ValueError("Sample size `n` is missing.") + if not (8 <= n): + raise ValueError("n must be ≥ 8 for the approximation (Zar 26.6).") + R = n * r + chi2_critical = chi2.isf(1 - ci, 1) + r_min = np.sqrt(chi2_critical / (2 * n)) - # Zar cited (Upton, 1986) that n can be as small as 8 - # but I encountered a few cases where n<11 will result in negative - # value within `inner`. - if n >= 8: # - if r <= 0.9: # eq(26.24) - inner = ( - np.sqrt( - (2 * n * (2 * R**2 - n * chi2.isf(0.05, 1))) - / (4 * n - chi2.isf(1 - ci, 1)) - ) - / R - ) - else: # eq(26.25) - inner = np.sqrt(n**2 - (n**2 - R**2) * np.exp(chi2.isf(1 - ci, 1) / n)) / R + if r < r_min: + raise ValueError( + f"Resultant length r={r:.4g} is too small for the " + f"approximate CI (must be > {r_min:.4g}). " + "Switch to the bootstrap (8 ≤ n < 25) or dispersion (n ≥ 25) method." + ) - d = np.arccos(inner) - - lb = float(mean - d) - ub = float(mean + d) + if r <= 0.9: # eq(26.24) + inner = ( + np.sqrt( + (2 * n * (2 * R**2 - n * chi2_critical)) + / (4 * n - chi2_critical) + ) + / R + ) + else: # eq(26.25) + inner = np.sqrt(n**2 - (n**2 - R**2) * np.exp(chi2_critical / n)) / R - if not is_within_circular_range(mean, lb, ub): - lb, ub = ub, lb + d = np.arccos(inner) + + lb = float(mean - d) + ub = float(mean + d) - return (lb, ub) + if not is_within_circular_range(mean, lb, ub): + lb, ub = ub, lb - else: - raise ValueError( - f"n={n} is too small (<= 8) for computing CI with approximation method." - ) + return (lb, ub) def _circ_mean_ci_bootstrap( @@ -1057,6 +1069,21 @@ def _circ_mean_ci_bootstrap( """ Implementation of Section 8.3 (Fisher, 1993, P207) """ + # sanity-check: is a mean direction identifiable? + n = len(alpha) + r = circ_r(alpha) + + # classic Rayleigh test approximation, avoids import cycles + z_stat = n * r**2 # Rayleigh's Z + p_val = np.exp(-z_stat) # p ≈ e^(−Z) (valid for n ≥ 10) + + if p_val > 0.05: # data look uniform ⇒ no mean dir. + raise ValueError( + "Bootstrap CI not computed: resultant length r={:.4f} " + "(Rayleigh p≈{:.3f}) is too small. " + "Sample may be uniform, so the mean direction is undefined." + .format(r, p_val) + ) # Precompute z0 and v0 from original data # algo 1 diff --git a/tests/test_descriptive.py b/tests/test_descriptive.py index a8e63f8..e34caea 100644 --- a/tests/test_descriptive.py +++ b/tests/test_descriptive.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pycircstat2 import Circular, load_data from pycircstat2.descriptive import ( @@ -178,6 +179,20 @@ def test_circ_mean_ci(): # method: bootstrap (from P78, Fisher, 1993) # but how to test boostrap? + # test uniform distributed data (all method should raise errors) + from pycircstat2.distributions import circularuniform + rng = np.random.default_rng(seed=25) + d_uni = circularuniform.rvs(size=25) + + with pytest.raises(ValueError): + circ_mean_ci(alpha=d_uni, method="approximate") + + with pytest.raises(ValueError): + circ_mean_ci(alpha=d_uni, method="bootstrap") + + with pytest.raises(ValueError): + circ_mean_ci(alpha=d_uni, method="dispersion") + def test_circ_median_ci(): d_ex3 = load_data("B6", "fisher")