From ce434879f22e90aefcf203a0b3bde8250e3ca40c Mon Sep 17 00:00:00 2001
From: Ziwei Huang <huang-ziwei@outlook.com>
Date: Tue, 29 Apr 2025 15:02:52 +0200
Subject: [PATCH] change: raise error in `circ_mean_ci` when data are uniform

---
 pycircstat2/descriptive.py | 85 +++++++++++++++++++++++++-------------
 tests/test_descriptive.py  | 15 +++++++
 2 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/pycircstat2/descriptive.py b/pycircstat2/descriptive.py
index 6e70fd1..ec2c216 100644
--- a/pycircstat2/descriptive.py
+++ b/pycircstat2/descriptive.py
@@ -826,7 +826,7 @@ def circ_mean_ci(
     \delta = \arccos\left(\sqrt{\frac{2n(2R^{2} - n\chi^{2}_{\alpha, 1})}{4n - \chi^{2}_{\alpha, 1}}} /R \right)
     $$
 
-    For r $ge$ 0.9,
+    For r $\ge$ 0.9,
 
     $$
     \delta = \arccos \left(\sqrt{n^2 - (n^2 - R^2)e^{\chi^2_{\alpha, 1}/n} } /R \right)
@@ -880,6 +880,8 @@ def circ_mean_ci(
     - Section 4.4.4a/b, Fisher (1993)
     """
 
+
+
     #  n > 8, according to Ch 26.7 (Zar, 2010)
     if method == "approximate":
         (lb, ub) = _circ_mean_ci_approximate(
@@ -947,10 +949,17 @@ def _circ_mean_ci_dispersion(
             f"n={n} is too small (< 25) for computing CI with circular dispersion."
         )
 
-    # TODO: sometime return nan because x in arcsin(x) is larger than 1.
-    # Should we centered the data here? <- No.
+    delta = circ_dispersion(alpha=alpha, w=w)
+    z = norm.ppf(1 - 0.5 * (1 - ci))
+    inner = np.sqrt(delta / n) * z
+
+    if inner >= 1:
+        raise ValueError(
+            "Data are too dispersed, likely close to uniform.  CI undefined."
+        )
+
     d = np.arcsin(
-        np.sqrt(circ_dispersion(alpha=alpha, w=w) / n) * norm.ppf(1 - 0.5 * (1 - ci))
+        inner
     )
     lb = mean - d
     ub = mean + d
@@ -1016,37 +1025,40 @@ def _circ_mean_ci_approximate(
     if n is None:
         raise ValueError("Sample size `n` is missing.")
 
+    if not (8 <= n):
+        raise ValueError("n must be ≥ 8 for the approximation (Zar 26.6).")
+
     R = n * r
+    chi2_critical = chi2.isf(1 - ci, 1)
+    r_min = np.sqrt(chi2_critical / (2 * n))
 
-    # Zar cited (Upton, 1986) that n can be as small as 8
-    # but I encountered a few cases where n<11 will result in negative
-    # value within `inner`.
-    if n >= 8:  #
-        if r <= 0.9:  # eq(26.24)
-            inner = (
-                np.sqrt(
-                    (2 * n * (2 * R**2 - n * chi2.isf(0.05, 1)))
-                    / (4 * n - chi2.isf(1 - ci, 1))
-                )
-                / R
-            )
-        else:  # eq(26.25)
-            inner = np.sqrt(n**2 - (n**2 - R**2) * np.exp(chi2.isf(1 - ci, 1) / n)) / R
+    if r < r_min:
+        raise ValueError(
+            f"Resultant length r={r:.4g} is too small for the "
+            f"approximate CI (must be > {r_min:.4g}). "
+            "Switch to the bootstrap (8 ≤ n < 25) or dispersion (n ≥ 25) method."
+        )
 
-        d = np.arccos(inner)
-        
-        lb = float(mean - d)
-        ub = float(mean + d)
+    if r <= 0.9:  # eq(26.24)
+        inner = (
+            np.sqrt(
+                (2 * n * (2 * R**2 - n * chi2_critical))
+                / (4 * n - chi2_critical)
+            )
+            / R
+        )
+    else:  # eq(26.25)
+        inner = np.sqrt(n**2 - (n**2 - R**2) * np.exp(chi2_critical / n)) / R
 
-        if not is_within_circular_range(mean, lb, ub):
-            lb, ub = ub, lb
+    d = np.arccos(inner)
+    
+    lb = float(mean - d)
+    ub = float(mean + d)
 
-        return (lb, ub)
+    if not is_within_circular_range(mean, lb, ub):
+        lb, ub = ub, lb
 
-    else:
-        raise ValueError(
-            f"n={n} is too small (<= 8) for computing CI with approximation method."
-        )
+    return (lb, ub)
 
 
 def _circ_mean_ci_bootstrap(
@@ -1057,6 +1069,21 @@ def _circ_mean_ci_bootstrap(
     """
     Implementation of Section 8.3 (Fisher, 1993, P207)
     """
+    # sanity-check: is a mean direction identifiable?
+    n = len(alpha)
+    r = circ_r(alpha)
+
+    # classic Rayleigh test approximation, avoids import cycles
+    z_stat = n * r**2                  # Rayleigh's Z
+    p_val  = np.exp(-z_stat)           # p ≈ e^(−Z)  (valid for n ≥ 10)
+
+    if p_val > 0.05:                   # data look uniform ⇒ no mean dir.
+        raise ValueError(
+            "Bootstrap CI not computed: resultant length r={:.4f} "
+            "(Rayleigh p≈{:.3f}) is too small. "
+            "Sample may be uniform, so the mean direction is undefined."
+            .format(r, p_val)
+        )
 
     # Precompute z0 and v0 from original data
     # algo 1
diff --git a/tests/test_descriptive.py b/tests/test_descriptive.py
index a8e63f8..e34caea 100644
--- a/tests/test_descriptive.py
+++ b/tests/test_descriptive.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 from pycircstat2 import Circular, load_data
 from pycircstat2.descriptive import (
@@ -178,6 +179,20 @@ def test_circ_mean_ci():
     # method: bootstrap (from P78, Fisher, 1993)
     # but how to test boostrap?
 
+    # test uniform distributed data (all method should raise errors)
+    from pycircstat2.distributions import circularuniform
+    rng = np.random.default_rng(seed=25)
+    d_uni = circularuniform.rvs(size=25)
+
+    with pytest.raises(ValueError):
+        circ_mean_ci(alpha=d_uni, method="approximate")
+
+    with pytest.raises(ValueError):
+        circ_mean_ci(alpha=d_uni, method="bootstrap")
+
+    with pytest.raises(ValueError):
+        circ_mean_ci(alpha=d_uni, method="dispersion")
+    
 
 def test_circ_median_ci():
     d_ex3 = load_data("B6", "fisher")