From 9fa392dfffee6c783108581c7e762802ec1f58f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aman=20=7C=20=E0=A4=85=E0=A4=AE=E0=A4=A8?= <121532863+beingamanforever@users.noreply.github.com> Date: Tue, 20 Jan 2026 06:37:08 +0530 Subject: [PATCH] tabulate: fix categorical and string edge cases --- inst/tabulate.m | 68 ++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/inst/tabulate.m b/inst/tabulate.m index 1e378a6c..3ea3f3e4 100644 --- a/inst/tabulate.m +++ b/inst/tabulate.m @@ -84,7 +84,11 @@ endif total = sum (counts); - percents = 100 * counts ./ total; + if (total == 0) + percents = zeros (size (counts)); + else + percents = 100 * counts ./ total; + endif ## Output format: Cell array out = cell (length (vals), 3); @@ -96,40 +100,21 @@ ## Handle string arrays x(ismissing (x)) = []; - ## Convert to cellstr and use grp2idx which is robust - [idx, vals] = grp2idx (cellstr (x)); - - if (isempty (idx)) - counts = []; - percents = []; + if (isempty (x)) + out = cell (0, 3); else - counts = accumarray (idx, 1); - total = sum (counts); - percents = 100 * counts ./ total; - endif + ## Convert to cellstr and use grp2idx which is robust + [idx, vals] = grp2idx (cellstr (x)); - ## Output format: Cell array - vals_cell = vals; - out = cell (length (vals_cell), 3); - out(:,1) = vals_cell; - out(:,2) = num2cell (counts); - out(:,3) = num2cell (percents); - - if (isempty (idx)) - counts = []; - percents = []; - else counts = accumarray (idx, 1); total = sum (counts); percents = 100 * counts ./ total; - endif - ## Output format: Cell array - vals_cell = vals; - out = cell (length (vals_cell), 3); - out(:,1) = vals_cell; - out(:,2) = num2cell (counts); - out(:,3) = num2cell (percents); + out = cell (length (vals), 3); + out(:,1) = vals; + out(:,2) = num2cell (counts); + out(:,3) = num2cell (percents); + endif elseif (islogical (x)) ## Handle logical arrays @@ -407,3 +392,28 @@ %!error tabulate ({1, 2, 3, 4}) %!error ... %! tabulate ({"a", "b"; "a", "c"}) + +## Test categorical with all undefined values (should return zero counts/percents) +%!test +%! x = categorical ({'a','b','c'}); +%! x(:) = categorical (missing); +%! tbl = tabulate (x); +%! assert (iscell (tbl)); +%! assert ([tbl{:,2}]', [0; 0; 0]); +%! assert ([tbl{:,3}]', [0; 0; 0]); + +## Test categorical with defined categories but no data +%!test +%! x = categorical ({}, {'low','med','high'}); +%! tbl = tabulate (x); +%! assert (iscell (tbl)); +%! assert ([tbl{:,2}]', [0; 0; 0]); +%! assert ([tbl{:,3}]', [0; 0; 0]); + +## Test string array with all missing values (should return empty table) +%!test +%! x = string ({'a','b'}); +%! x(:) = missing; +%! tbl = tabulate (x); +%! assert (iscell (tbl)); +%! assert (isempty (tbl));