From 343a496b33e800c1f5436d0e4836b33e154f07dd Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Sun, 5 Oct 2025 14:55:44 -0400 Subject: [PATCH 1/3] moved relabeler to internal package --- .../distance/KendallTauSequenceDistance.java | 5 +- .../{ => internal}/KendallTauRelabeler.java | 6 +- .../{ => internal}/RelabelByHashing.java | 6 +- .../{ => internal}/RelabelBySorting.java | 6 +- .../KendallTauSequenceDistanceTests.java | 33 +--------- ...ndallTauSequenceDistanceInternalTests.java | 60 +++++++++++++++++++ 6 files changed, 74 insertions(+), 42 deletions(-) rename src/main/java/org/cicirello/sequences/distance/{ => internal}/KendallTauRelabeler.java (97%) rename src/main/java/org/cicirello/sequences/distance/{ => internal}/RelabelByHashing.java (98%) rename src/main/java/org/cicirello/sequences/distance/{ => internal}/RelabelBySorting.java (97%) create mode 100644 src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java diff --git a/src/main/java/org/cicirello/sequences/distance/KendallTauSequenceDistance.java b/src/main/java/org/cicirello/sequences/distance/KendallTauSequenceDistance.java index 7fcc0907..d4169831 100644 --- a/src/main/java/org/cicirello/sequences/distance/KendallTauSequenceDistance.java +++ b/src/main/java/org/cicirello/sequences/distance/KendallTauSequenceDistance.java @@ -1,6 +1,6 @@ /* * JavaPermutationTools: A Java library for computation on permutations and sequences - * Copyright 2005-2023 Vincent A. Cicirello, . + * Copyright 2005-2025 Vincent A. Cicirello, . * * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). * @@ -23,6 +23,9 @@ import java.util.Arrays; import java.util.List; +import org.cicirello.sequences.distance.internal.KendallTauRelabeler; +import org.cicirello.sequences.distance.internal.RelabelByHashing; +import org.cicirello.sequences.distance.internal.RelabelBySorting; /** * Kendall Tau Sequence Distance is the minimum number of adjacent swaps necessary to transform one diff --git a/src/main/java/org/cicirello/sequences/distance/KendallTauRelabeler.java b/src/main/java/org/cicirello/sequences/distance/internal/KendallTauRelabeler.java similarity index 97% rename from src/main/java/org/cicirello/sequences/distance/KendallTauRelabeler.java rename to src/main/java/org/cicirello/sequences/distance/internal/KendallTauRelabeler.java index 3225966d..959c4f8a 100644 --- a/src/main/java/org/cicirello/sequences/distance/KendallTauRelabeler.java +++ b/src/main/java/org/cicirello/sequences/distance/internal/KendallTauRelabeler.java @@ -1,6 +1,6 @@ /* * JavaPermutationTools: A Java library for computation on permutations and sequences - * Copyright 2005-2022 Vincent A. Cicirello, . + * Copyright 2005-2025 Vincent A. Cicirello, . * * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). * @@ -19,7 +19,7 @@ * You should have received a copy of the GNU General Public License * along with JavaPermutationTools. If not, see . */ -package org.cicirello.sequences.distance; +package org.cicirello.sequences.distance.internal; import java.util.List; @@ -30,7 +30,7 @@ * @author Vincent A. Cicirello, https://www.cicirello.org/ */ -interface KendallTauRelabeler { +public interface KendallTauRelabeler { /** * Relabels the elements of the sequence to integers from 0 to k where there are k+1 distinct diff --git a/src/main/java/org/cicirello/sequences/distance/RelabelByHashing.java b/src/main/java/org/cicirello/sequences/distance/internal/RelabelByHashing.java similarity index 98% rename from src/main/java/org/cicirello/sequences/distance/RelabelByHashing.java rename to src/main/java/org/cicirello/sequences/distance/internal/RelabelByHashing.java index 8113db20..dcf5a65f 100644 --- a/src/main/java/org/cicirello/sequences/distance/RelabelByHashing.java +++ b/src/main/java/org/cicirello/sequences/distance/internal/RelabelByHashing.java @@ -1,6 +1,6 @@ /* * JavaPermutationTools: A Java library for computation on permutations and sequences - * Copyright 2005-2022 Vincent A. Cicirello, . + * Copyright 2005-2025 Vincent A. Cicirello, . * * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). * @@ -19,7 +19,7 @@ * You should have received a copy of the GNU General Public License * along with JavaPermutationTools. If not, see . */ -package org.cicirello.sequences.distance; +package org.cicirello.sequences.distance.internal; import java.util.HashMap; import java.util.Iterator; @@ -31,7 +31,7 @@ * @author Vincent A. Cicirello, https://www.cicirello.org/ */ -final class RelabelByHashing implements KendallTauRelabeler { +public final class RelabelByHashing implements KendallTauRelabeler { @Override public int relabel(Object[] s1, Object[] s2, int[][] relabeling) { diff --git a/src/main/java/org/cicirello/sequences/distance/RelabelBySorting.java b/src/main/java/org/cicirello/sequences/distance/internal/RelabelBySorting.java similarity index 97% rename from src/main/java/org/cicirello/sequences/distance/RelabelBySorting.java rename to src/main/java/org/cicirello/sequences/distance/internal/RelabelBySorting.java index d2885b41..489691fb 100644 --- a/src/main/java/org/cicirello/sequences/distance/RelabelBySorting.java +++ b/src/main/java/org/cicirello/sequences/distance/internal/RelabelBySorting.java @@ -1,6 +1,6 @@ /* * JavaPermutationTools: A Java library for computation on permutations and sequences - * Copyright 2005-2023 Vincent A. Cicirello, . + * Copyright 2005-2025 Vincent A. Cicirello, . * * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). * @@ -19,7 +19,7 @@ * You should have received a copy of the GNU General Public License * along with JavaPermutationTools. If not, see . */ -package org.cicirello.sequences.distance; +package org.cicirello.sequences.distance.internal; import java.util.Arrays; import java.util.Iterator; @@ -31,7 +31,7 @@ * @author Vincent A. Cicirello, https://www.cicirello.org/ */ -final class RelabelBySorting implements KendallTauRelabeler { +public final class RelabelBySorting implements KendallTauRelabeler { @Override public int relabel(int[] s1, int[] s2, int[][] relabeling) { diff --git a/src/test/java/org/cicirello/sequences/distance/KendallTauSequenceDistanceTests.java b/src/test/java/org/cicirello/sequences/distance/KendallTauSequenceDistanceTests.java index 68d9fad2..e5bf5b47 100644 --- a/src/test/java/org/cicirello/sequences/distance/KendallTauSequenceDistanceTests.java +++ b/src/test/java/org/cicirello/sequences/distance/KendallTauSequenceDistanceTests.java @@ -1,5 +1,5 @@ /* - * Copyright 2018-2022 Vincent A. Cicirello, . + * Copyright 2018-2025 Vincent A. Cicirello, . * * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). * @@ -154,37 +154,6 @@ public void testKendallTauDistanceExceptions() { thrown = assertThrows(IllegalArgumentException.class, () -> d.distance(s1, s2)); } - @Test - public void testKendallTauSequenceDistance_HashTableBaseClass() { - class TestHT extends RelabelByHashing.BaseHT { - TestHT(int min) { - super(32, min); - } - } - for (int n = 1; n <= 32; n *= 2) { - TestHT ht = new TestHT(n); - assertEquals(n, ht.minSize); - assertEquals(n - 1, ht.mask); - } - TestHT ht = new TestHT(3); - assertEquals(4, ht.minSize); - for (int n = 5; n < 8; n++) { - ht = new TestHT(n); - assertEquals(8, ht.minSize); - assertEquals(7, ht.mask); - } - for (int n = 9; n < 16; n++) { - ht = new TestHT(n); - assertEquals(16, ht.minSize); - assertEquals(15, ht.mask); - } - for (int n = 17; n <= 64; n++) { - ht = new TestHT(n); - assertEquals(32, ht.minSize); - assertEquals(31, ht.mask); - } - } - @Test public void testKendallTauDistanceExceptionsDiffElements() { final KendallTauSequenceDistance d = new KendallTauSequenceDistance(); diff --git a/src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java b/src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java new file mode 100644 index 00000000..d5229e77 --- /dev/null +++ b/src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java @@ -0,0 +1,60 @@ +/* + * Copyright 2018-2025 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.sequences.distance.internal; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.*; + +/** JUnit tests for KendallTauSequenceDistance. */ +public class KendallTauSequenceDistanceInternalTests { + + @Test + public void testKendallTauSequenceDistance_HashTableBaseClass() { + class TestHT extends RelabelByHashing.BaseHT { + TestHT(int min) { + super(32, min); + } + } + for (int n = 1; n <= 32; n *= 2) { + TestHT ht = new TestHT(n); + assertEquals(n, ht.minSize); + assertEquals(n - 1, ht.mask); + } + TestHT ht = new TestHT(3); + assertEquals(4, ht.minSize); + for (int n = 5; n < 8; n++) { + ht = new TestHT(n); + assertEquals(8, ht.minSize); + assertEquals(7, ht.mask); + } + for (int n = 9; n < 16; n++) { + ht = new TestHT(n); + assertEquals(16, ht.minSize); + assertEquals(15, ht.mask); + } + for (int n = 17; n <= 64; n++) { + ht = new TestHT(n); + assertEquals(32, ht.minSize); + assertEquals(31, ht.mask); + } + } +} From dcc197c7dc8ee515301c1334d7e801b515491547 Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Sun, 5 Oct 2025 15:57:01 -0400 Subject: [PATCH 2/3] refactored Relabel;ByHashing --- .../sequences/distance/internal/CharHT.java | 116 ++++++ .../sequences/distance/internal/DoubleHT.java | 107 +++++ .../sequences/distance/internal/FloatHT.java | 106 +++++ .../sequences/distance/internal/IntHT.java | 105 +++++ .../sequences/distance/internal/LongHT.java | 106 +++++ .../distance/internal/RelabelByHashing.java | 386 ------------------ .../sequences/distance/internal/ShortHT.java | 105 +++++ ...ndallTauSequenceDistanceInternalTests.java | 167 ++++++-- 8 files changed, 788 insertions(+), 410 deletions(-) create mode 100644 src/main/java/org/cicirello/sequences/distance/internal/CharHT.java create mode 100644 src/main/java/org/cicirello/sequences/distance/internal/DoubleHT.java create mode 100644 src/main/java/org/cicirello/sequences/distance/internal/FloatHT.java create mode 100644 src/main/java/org/cicirello/sequences/distance/internal/IntHT.java create mode 100644 src/main/java/org/cicirello/sequences/distance/internal/LongHT.java create mode 100644 src/main/java/org/cicirello/sequences/distance/internal/ShortHT.java diff --git a/src/main/java/org/cicirello/sequences/distance/internal/CharHT.java b/src/main/java/org/cicirello/sequences/distance/internal/CharHT.java new file mode 100644 index 00000000..169ea82c --- /dev/null +++ b/src/main/java/org/cicirello/sequences/distance/internal/CharHT.java @@ -0,0 +1,116 @@ +/* + * JavaPermutationTools: A Java library for computation on permutations and sequences + * Copyright 2005-2025 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.sequences.distance.internal; + +/** Internal class for hashtable of chars. */ +final class CharHT { + + private final Node[] table; + private final int mask; + + CharHT(int minSize) { + minSize = adjustSize(minSize); + mask = minSize - 1; + table = new Node[minSize]; + } + + int adjustSize(int minSize) { + final int MAX_SIZE = 0x40000000; + if (minSize > MAX_SIZE) { + return MAX_SIZE; + } + minSize = minSize - 1; + minSize = minSize | (minSize >> 1); + minSize = minSize | (minSize >> 2); + minSize = minSize | (minSize >> 4); + minSize = minSize | (minSize >> 8); + minSize = minSize | (minSize >> 16); + return minSize + 1; + } + + int size() { + return table.length; + } + + int mask() { + return mask; + } + + int populate(char[] s1) { + int current = -1; + for (int i = 0; i < s1.length; i++) { + if (!containsKey(s1[i])) { + current++; + put(s1[i], current); + } + } + return current; + } + + int populate(String s1) { + int current = -1; + for (int i = 0; i < s1.length(); i++) { + if (!containsKey(s1.charAt(i))) { + current++; + put(s1.charAt(i), current); + } + } + return current; + } + + int index(char key) { + return key & mask; + } + + boolean containsKey(char key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return true; + } + return false; + } + + int get(char key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return current.value; + } + // NOTE: our internal usage never puts a negative as a value + return -1; + } + + void put(char key, int value) { + // warning: assumes key is not already in hash table (only used internally so ok). + int i = index(key); + table[i] = new Node(key, value, table[i]); + } + + static final class Node { + char key; + int value; + Node next; + + Node(char key, int value, Node next) { + this.key = key; + this.value = value; + this.next = next; + } + } +} diff --git a/src/main/java/org/cicirello/sequences/distance/internal/DoubleHT.java b/src/main/java/org/cicirello/sequences/distance/internal/DoubleHT.java new file mode 100644 index 00000000..2dbd44ac --- /dev/null +++ b/src/main/java/org/cicirello/sequences/distance/internal/DoubleHT.java @@ -0,0 +1,107 @@ +/* + * JavaPermutationTools: A Java library for computation on permutations and sequences + * Copyright 2005-2025 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.sequences.distance.internal; + +/** Internal class for hashtable of doubles. */ +final class DoubleHT { + + private final Node[] table; + private final int mask; + + DoubleHT(int minSize) { + minSize = adjustSize(minSize); + mask = minSize - 1; + table = new Node[minSize]; + } + + int adjustSize(int minSize) { + final int MAX_SIZE = 0x40000000; + if (minSize > MAX_SIZE) { + return MAX_SIZE; + } + minSize = minSize - 1; + minSize = minSize | (minSize >> 1); + minSize = minSize | (minSize >> 2); + minSize = minSize | (minSize >> 4); + minSize = minSize | (minSize >> 8); + minSize = minSize | (minSize >> 16); + return minSize + 1; + } + + int size() { + return table.length; + } + + int mask() { + return mask; + } + + int populate(double[] s1) { + int current = -1; + for (int i = 0; i < s1.length; i++) { + if (!containsKey(s1[i])) { + current++; + put(s1[i], current); + } + } + return current; + } + + int index(double key) { + long x = Double.doubleToLongBits(key); + int y = (int) (x ^ (x >>> 32)); + return (y ^ (y >>> 16)) & mask; + } + + boolean containsKey(double key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return true; + } + return false; + } + + int get(double key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return current.value; + } + // NOTE: our internal usage never puts a negative as a value + return -1; + } + + void put(double key, int value) { + // warning: assumes key is not already in hash table (only used internally so ok). + int i = index(key); + table[i] = new Node(key, value, table[i]); + } + + static final class Node { + double key; + int value; + Node next; + + Node(double key, int value, Node next) { + this.key = key; + this.value = value; + this.next = next; + } + } +} diff --git a/src/main/java/org/cicirello/sequences/distance/internal/FloatHT.java b/src/main/java/org/cicirello/sequences/distance/internal/FloatHT.java new file mode 100644 index 00000000..e1d87578 --- /dev/null +++ b/src/main/java/org/cicirello/sequences/distance/internal/FloatHT.java @@ -0,0 +1,106 @@ +/* + * JavaPermutationTools: A Java library for computation on permutations and sequences + * Copyright 2005-2025 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.sequences.distance.internal; + +/** Internal class for hashtable of floats. */ +final class FloatHT { + + private final Node[] table; + private final int mask; + + FloatHT(int minSize) { + minSize = adjustSize(minSize); + mask = minSize - 1; + table = new Node[minSize]; + } + + int adjustSize(int minSize) { + final int MAX_SIZE = 0x40000000; + if (minSize > MAX_SIZE) { + return MAX_SIZE; + } + minSize = minSize - 1; + minSize = minSize | (minSize >> 1); + minSize = minSize | (minSize >> 2); + minSize = minSize | (minSize >> 4); + minSize = minSize | (minSize >> 8); + minSize = minSize | (minSize >> 16); + return minSize + 1; + } + + int size() { + return table.length; + } + + int mask() { + return mask; + } + + int populate(float[] s1) { + int current = -1; + for (int i = 0; i < s1.length; i++) { + if (!containsKey(s1[i])) { + current++; + put(s1[i], current); + } + } + return current; + } + + int index(float key) { + int x = Float.floatToIntBits(key); + return (x ^ (x >>> 16)) & mask; + } + + boolean containsKey(float key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return true; + } + return false; + } + + int get(float key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return current.value; + } + // NOTE: our internal usage never puts a negative as a value + return -1; + } + + void put(float key, int value) { + // warning: assumes key is not already in hash table (only used internally so ok). + int i = index(key); + table[i] = new Node(key, value, table[i]); + } + + static final class Node { + float key; + int value; + Node next; + + Node(float key, int value, Node next) { + this.key = key; + this.value = value; + this.next = next; + } + } +} diff --git a/src/main/java/org/cicirello/sequences/distance/internal/IntHT.java b/src/main/java/org/cicirello/sequences/distance/internal/IntHT.java new file mode 100644 index 00000000..060e40b0 --- /dev/null +++ b/src/main/java/org/cicirello/sequences/distance/internal/IntHT.java @@ -0,0 +1,105 @@ +/* + * JavaPermutationTools: A Java library for computation on permutations and sequences + * Copyright 2005-2025 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.sequences.distance.internal; + +/** Internal class for hashtable of ints. */ +final class IntHT { + + private final Node[] table; + private final int mask; + + IntHT(int minSize) { + minSize = adjustSize(minSize); + mask = minSize - 1; + table = new Node[minSize]; + } + + int adjustSize(int minSize) { + final int MAX_SIZE = 0x40000000; + if (minSize > MAX_SIZE) { + return MAX_SIZE; + } + minSize = minSize - 1; + minSize = minSize | (minSize >> 1); + minSize = minSize | (minSize >> 2); + minSize = minSize | (minSize >> 4); + minSize = minSize | (minSize >> 8); + minSize = minSize | (minSize >> 16); + return minSize + 1; + } + + int size() { + return table.length; + } + + int mask() { + return mask; + } + + int populate(int[] s1) { + int current = -1; + for (int i = 0; i < s1.length; i++) { + if (!containsKey(s1[i])) { + current++; + put(s1[i], current); + } + } + return current; + } + + int index(int key) { + return (key ^ (key >>> 16)) & mask; + } + + boolean containsKey(int key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return true; + } + return false; + } + + int get(int key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return current.value; + } + // NOTE: our internal usage never puts a negative as a value + return -1; + } + + void put(int key, int value) { + // warning: assumes key is not already in hash table (only used internally so ok). + int i = index(key); + table[i] = new Node(key, value, table[i]); + } + + static final class Node { + int key; + int value; + Node next; + + Node(int key, int value, Node next) { + this.key = key; + this.value = value; + this.next = next; + } + } +} diff --git a/src/main/java/org/cicirello/sequences/distance/internal/LongHT.java b/src/main/java/org/cicirello/sequences/distance/internal/LongHT.java new file mode 100644 index 00000000..973319b8 --- /dev/null +++ b/src/main/java/org/cicirello/sequences/distance/internal/LongHT.java @@ -0,0 +1,106 @@ +/* + * JavaPermutationTools: A Java library for computation on permutations and sequences + * Copyright 2005-2025 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.sequences.distance.internal; + +/** Internal class for hashtable of longs. */ +final class LongHT { + + private final Node[] table; + private final int mask; + + LongHT(int minSize) { + minSize = adjustSize(minSize); + mask = minSize - 1; + table = new Node[minSize]; + } + + int adjustSize(int minSize) { + final int MAX_SIZE = 0x40000000; + if (minSize > MAX_SIZE) { + return MAX_SIZE; + } + minSize = minSize - 1; + minSize = minSize | (minSize >> 1); + minSize = minSize | (minSize >> 2); + minSize = minSize | (minSize >> 4); + minSize = minSize | (minSize >> 8); + minSize = minSize | (minSize >> 16); + return minSize + 1; + } + + int size() { + return table.length; + } + + int mask() { + return mask; + } + + int populate(long[] s1) { + int current = -1; + for (int i = 0; i < s1.length; i++) { + if (!containsKey(s1[i])) { + current++; + put(s1[i], current); + } + } + return current; + } + + int index(long key) { + int x = (int) (key ^ (key >>> 32)); + return (x ^ (x >>> 16)) & mask; + } + + boolean containsKey(long key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return true; + } + return false; + } + + int get(long key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return current.value; + } + // NOTE: our internal usage never puts a negative as a value + return -1; + } + + void put(long key, int value) { + // warning: assumes key is not already in hash table (only used internally so ok). + int i = index(key); + table[i] = new Node(key, value, table[i]); + } + + static final class Node { + long key; + int value; + Node next; + + Node(long key, int value, Node next) { + this.key = key; + this.value = value; + this.next = next; + } + } +} diff --git a/src/main/java/org/cicirello/sequences/distance/internal/RelabelByHashing.java b/src/main/java/org/cicirello/sequences/distance/internal/RelabelByHashing.java index dcf5a65f..b68f28c1 100644 --- a/src/main/java/org/cicirello/sequences/distance/internal/RelabelByHashing.java +++ b/src/main/java/org/cicirello/sequences/distance/internal/RelabelByHashing.java @@ -218,390 +218,4 @@ public int relabel(byte[] s1, byte[] s2, int[][] relabeling) { } return current; } - - static class BaseHT { - protected final int mask; - protected final int minSize; - - BaseHT(int maxSize, int minSize) { - final int MAX_SIZE = maxSize; - if (minSize > MAX_SIZE) { - minSize = MAX_SIZE; - mask = minSize - 1; - } else { - minSize = minSize - 1; - minSize = minSize | (minSize >> 1); - minSize = minSize | (minSize >> 2); - minSize = minSize | (minSize >> 4); - minSize = minSize | (minSize >> 8); - minSize = minSize | (minSize >> 16); - mask = minSize; - minSize++; - } - this.minSize = minSize; - } - } - - private static final class IntHT extends BaseHT { - - private final Node[] table; - - IntHT(int min) { - super(0x40000000, min); - table = new Node[minSize]; - } - - int populate(int[] s1) { - int current = -1; - for (int i = 0; i < s1.length; i++) { - if (!containsKey(s1[i])) { - current++; - put(s1[i], current); - } - } - return current; - } - - int index(int key) { - return (key ^ (key >>> 16)) & mask; - } - - boolean containsKey(int key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return true; - } - return false; - } - - int get(int key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return current.value; - } - // NOTE: our internal usage never puts a negative as a value - return -1; - } - - void put(int key, int value) { - // warning: assumes key is not already in hash table (only used internally so ok). - int i = index(key); - table[i] = new Node(key, value, table[i]); - } - - static final class Node { - int key; - int value; - Node next; - - Node(int key, int value, Node next) { - this.key = key; - this.value = value; - this.next = next; - } - } - } - - private static final class LongHT extends BaseHT { - - private final Node[] table; - - LongHT(int min) { - super(0x40000000, min); - table = new Node[minSize]; - } - - int populate(long[] s1) { - int current = -1; - for (int i = 0; i < s1.length; i++) { - if (!containsKey(s1[i])) { - current++; - put(s1[i], current); - } - } - return current; - } - - int index(long key) { - int x = (int) (key ^ (key >>> 32)); - return (x ^ (x >>> 16)) & mask; - } - - boolean containsKey(long key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return true; - } - return false; - } - - int get(long key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return current.value; - } - // NOTE: our internal usage never puts a negative as a value - return -1; - } - - void put(long key, int value) { - // warning: assumes key is not already in hash table (only used internally so ok). - int i = index(key); - table[i] = new Node(key, value, table[i]); - } - - static final class Node { - long key; - int value; - Node next; - - Node(long key, int value, Node next) { - this.key = key; - this.value = value; - this.next = next; - } - } - } - - private static final class ShortHT extends BaseHT { - - private final Node[] table; - - ShortHT(int min) { - super(0x10000, min); - table = new Node[minSize]; - } - - int populate(short[] s1) { - int current = -1; - for (int i = 0; i < s1.length; i++) { - if (!containsKey(s1[i])) { - current++; - put(s1[i], current); - } - } - return current; - } - - int index(short key) { - return key & mask; - } - - boolean containsKey(short key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return true; - } - return false; - } - - int get(short key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return current.value; - } - // NOTE: our internal usage never puts a negative as a value - return -1; - } - - void put(short key, int value) { - // warning: assumes key is not already in hash table (only used internally so ok). - int i = index(key); - table[i] = new Node(key, value, table[i]); - } - - static final class Node { - short key; - int value; - Node next; - - Node(short key, int value, Node next) { - this.key = key; - this.value = value; - this.next = next; - } - } - } - - private static final class CharHT extends BaseHT { - - private final Node[] table; - - CharHT(int min) { - super(0x10000, min); - table = new Node[minSize]; - } - - int populate(char[] s1) { - int current = -1; - for (int i = 0; i < s1.length; i++) { - if (!containsKey(s1[i])) { - current++; - put(s1[i], current); - } - } - return current; - } - - int populate(String s1) { - int current = -1; - for (int i = 0; i < s1.length(); i++) { - if (!containsKey(s1.charAt(i))) { - current++; - put(s1.charAt(i), current); - } - } - return current; - } - - int index(char key) { - return key & mask; - } - - boolean containsKey(char key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return true; - } - return false; - } - - int get(char key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return current.value; - } - // NOTE: our internal usage never puts a negative as a value - return -1; - } - - void put(char key, int value) { - // warning: assumes key is not already in hash table (only used internally so ok). - int i = index(key); - table[i] = new Node(key, value, table[i]); - } - - static final class Node { - char key; - int value; - Node next; - - Node(char key, int value, Node next) { - this.key = key; - this.value = value; - this.next = next; - } - } - } - - private static final class DoubleHT extends BaseHT { - - private final Node[] table; - - DoubleHT(int min) { - super(0x40000000, min); - table = new Node[minSize]; - } - - int populate(double[] s1) { - int current = -1; - for (int i = 0; i < s1.length; i++) { - if (!containsKey(s1[i])) { - current++; - put(s1[i], current); - } - } - return current; - } - - int index(double key) { - long x = Double.doubleToLongBits(key); - int y = (int) (x ^ (x >>> 32)); - return (y ^ (y >>> 16)) & mask; - } - - boolean containsKey(double key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return true; - } - return false; - } - - int get(double key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return current.value; - } - // NOTE: our internal usage never puts a negative as a value - return -1; - } - - void put(double key, int value) { - // warning: assumes key is not already in hash table (only used internally so ok). - int i = index(key); - table[i] = new Node(key, value, table[i]); - } - - static final class Node { - double key; - int value; - Node next; - - Node(double key, int value, Node next) { - this.key = key; - this.value = value; - this.next = next; - } - } - } - - private static final class FloatHT extends BaseHT { - - private final Node[] table; - - FloatHT(int min) { - super(0x40000000, min); - table = new Node[minSize]; - } - - int populate(float[] s1) { - int current = -1; - for (int i = 0; i < s1.length; i++) { - if (!containsKey(s1[i])) { - current++; - put(s1[i], current); - } - } - return current; - } - - int index(float key) { - int x = Float.floatToIntBits(key); - return (x ^ (x >>> 16)) & mask; - } - - boolean containsKey(float key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return true; - } - return false; - } - - int get(float key) { - for (Node current = table[index(key)]; current != null; current = current.next) { - if (current.key == key) return current.value; - } - // NOTE: our internal usage never puts a negative as a value - return -1; - } - - void put(float key, int value) { - // warning: assumes key is not already in hash table (only used internally so ok). - int i = index(key); - table[i] = new Node(key, value, table[i]); - } - - static final class Node { - float key; - int value; - Node next; - - Node(float key, int value, Node next) { - this.key = key; - this.value = value; - this.next = next; - } - } - } } diff --git a/src/main/java/org/cicirello/sequences/distance/internal/ShortHT.java b/src/main/java/org/cicirello/sequences/distance/internal/ShortHT.java new file mode 100644 index 00000000..1a4200fa --- /dev/null +++ b/src/main/java/org/cicirello/sequences/distance/internal/ShortHT.java @@ -0,0 +1,105 @@ +/* + * JavaPermutationTools: A Java library for computation on permutations and sequences + * Copyright 2005-2025 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.sequences.distance.internal; + +/** Internal class for hashtable of shorts. */ +final class ShortHT { + + private final Node[] table; + private final int mask; + + ShortHT(int minSize) { + minSize = adjustSize(minSize); + mask = minSize - 1; + table = new Node[minSize]; + } + + int adjustSize(int minSize) { + final int MAX_SIZE = 0x40000000; + if (minSize > MAX_SIZE) { + return MAX_SIZE; + } + minSize = minSize - 1; + minSize = minSize | (minSize >> 1); + minSize = minSize | (minSize >> 2); + minSize = minSize | (minSize >> 4); + minSize = minSize | (minSize >> 8); + minSize = minSize | (minSize >> 16); + return minSize + 1; + } + + int size() { + return table.length; + } + + int mask() { + return mask; + } + + int populate(short[] s1) { + int current = -1; + for (int i = 0; i < s1.length; i++) { + if (!containsKey(s1[i])) { + current++; + put(s1[i], current); + } + } + return current; + } + + int index(short key) { + return key & mask; + } + + boolean containsKey(short key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return true; + } + return false; + } + + int get(short key) { + for (Node current = table[index(key)]; current != null; current = current.next) { + if (current.key == key) return current.value; + } + // NOTE: our internal usage never puts a negative as a value + return -1; + } + + void put(short key, int value) { + // warning: assumes key is not already in hash table (only used internally so ok). + int i = index(key); + table[i] = new Node(key, value, table[i]); + } + + static final class Node { + short key; + int value; + Node next; + + Node(short key, int value, Node next) { + this.key = key; + this.value = value; + this.next = next; + } + } +} diff --git a/src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java b/src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java index d5229e77..8a60eb8c 100644 --- a/src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java +++ b/src/test/java/org/cicirello/sequences/distance/internal/KendallTauSequenceDistanceInternalTests.java @@ -28,33 +28,152 @@ public class KendallTauSequenceDistanceInternalTests { @Test - public void testKendallTauSequenceDistance_HashTableBaseClass() { - class TestHT extends RelabelByHashing.BaseHT { - TestHT(int min) { - super(32, min); - } - } - for (int n = 1; n <= 32; n *= 2) { - TestHT ht = new TestHT(n); - assertEquals(n, ht.minSize); - assertEquals(n - 1, ht.mask); - } - TestHT ht = new TestHT(3); - assertEquals(4, ht.minSize); + public void testKendallTauSequenceDistance_HashTableInt() { + final int max = 0x40000000; + int[] powerOfTwoCases = {1, 2, 4, 8, 16, 32}; + for (int n : powerOfTwoCases) { + IntHT ht = new IntHT(n); + assertEquals(n, ht.size()); + assertEquals(n - 1, ht.mask()); + } + IntHT ht = new IntHT(3); + assertEquals(4, ht.size()); + assertEquals(max, ht.adjustSize(max + 1)); + assertEquals(max, ht.adjustSize(max)); + for (int n = 5; n < 8; n++) { + ht = new IntHT(n); + assertEquals(8, ht.size()); + assertEquals(7, ht.mask()); + } + for (int n = 9; n < 16; n++) { + ht = new IntHT(n); + assertEquals(16, ht.size()); + assertEquals(15, ht.mask()); + } + } + + @Test + public void testKendallTauSequenceDistance_HashTableLong() { + final int max = 0x40000000; + int[] powerOfTwoCases = {1, 2, 4, 8, 16, 32}; + for (int n : powerOfTwoCases) { + LongHT ht = new LongHT(n); + assertEquals(n, ht.size()); + assertEquals(n - 1, ht.mask()); + } + LongHT ht = new LongHT(3); + assertEquals(4, ht.size()); + assertEquals(max, ht.adjustSize(max + 1)); + assertEquals(max, ht.adjustSize(max)); + for (int n = 5; n < 8; n++) { + ht = new LongHT(n); + assertEquals(8, ht.size()); + assertEquals(7, ht.mask()); + } + for (int n = 9; n < 16; n++) { + ht = new LongHT(n); + assertEquals(16, ht.size()); + assertEquals(15, ht.mask()); + } + } + + @Test + public void testKendallTauSequenceDistance_HashTableShort() { + final int max = 0x40000000; + int[] powerOfTwoCases = {1, 2, 4, 8, 16, 32}; + for (int n : powerOfTwoCases) { + ShortHT ht = new ShortHT(n); + assertEquals(n, ht.size()); + assertEquals(n - 1, ht.mask()); + } + ShortHT ht = new ShortHT(3); + assertEquals(4, ht.size()); + assertEquals(max, ht.adjustSize(max + 1)); + assertEquals(max, ht.adjustSize(max)); + for (int n = 5; n < 8; n++) { + ht = new ShortHT(n); + assertEquals(8, ht.size()); + assertEquals(7, ht.mask()); + } + for (int n = 9; n < 16; n++) { + ht = new ShortHT(n); + assertEquals(16, ht.size()); + assertEquals(15, ht.mask()); + } + } + + @Test + public void testKendallTauSequenceDistance_HashTableChar() { + final int max = 0x40000000; + int[] powerOfTwoCases = {1, 2, 4, 8, 16, 32}; + for (int n : powerOfTwoCases) { + CharHT ht = new CharHT(n); + assertEquals(n, ht.size()); + assertEquals(n - 1, ht.mask()); + } + CharHT ht = new CharHT(3); + assertEquals(4, ht.size()); + assertEquals(max, ht.adjustSize(max + 1)); + assertEquals(max, ht.adjustSize(max)); + for (int n = 5; n < 8; n++) { + ht = new CharHT(n); + assertEquals(8, ht.size()); + assertEquals(7, ht.mask()); + } + for (int n = 9; n < 16; n++) { + ht = new CharHT(n); + assertEquals(16, ht.size()); + assertEquals(15, ht.mask()); + } + } + + @Test + public void testKendallTauSequenceDistance_HashTableDouble() { + final int max = 0x40000000; + int[] powerOfTwoCases = {1, 2, 4, 8, 16, 32}; + for (int n : powerOfTwoCases) { + DoubleHT ht = new DoubleHT(n); + assertEquals(n, ht.size()); + assertEquals(n - 1, ht.mask()); + } + DoubleHT ht = new DoubleHT(3); + assertEquals(4, ht.size()); + assertEquals(max, ht.adjustSize(max + 1)); + assertEquals(max, ht.adjustSize(max)); + for (int n = 5; n < 8; n++) { + ht = new DoubleHT(n); + assertEquals(8, ht.size()); + assertEquals(7, ht.mask()); + } + for (int n = 9; n < 16; n++) { + ht = new DoubleHT(n); + assertEquals(16, ht.size()); + assertEquals(15, ht.mask()); + } + } + + @Test + public void testKendallTauSequenceDistance_HashTableFloat() { + final int max = 0x40000000; + int[] powerOfTwoCases = {1, 2, 4, 8, 16, 32}; + for (int n : powerOfTwoCases) { + FloatHT ht = new FloatHT(n); + assertEquals(n, ht.size()); + assertEquals(n - 1, ht.mask()); + } + FloatHT ht = new FloatHT(3); + assertEquals(4, ht.size()); + assertEquals(max, ht.adjustSize(max + 1)); + assertEquals(max, ht.adjustSize(max)); for (int n = 5; n < 8; n++) { - ht = new TestHT(n); - assertEquals(8, ht.minSize); - assertEquals(7, ht.mask); + ht = new FloatHT(n); + assertEquals(8, ht.size()); + assertEquals(7, ht.mask()); } for (int n = 9; n < 16; n++) { - ht = new TestHT(n); - assertEquals(16, ht.minSize); - assertEquals(15, ht.mask); - } - for (int n = 17; n <= 64; n++) { - ht = new TestHT(n); - assertEquals(32, ht.minSize); - assertEquals(31, ht.mask); + ht = new FloatHT(n); + assertEquals(16, ht.size()); + assertEquals(15, ht.mask()); } } } From bed7074cde317594ee3efad58c49501beef3ac09 Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Sun, 5 Oct 2025 15:58:45 -0400 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a75e786..bc3b6877 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] - 2025-05-15 +## [Unreleased] - 2025-10-05 ### Added @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed ### Fixed +* Refactored the internal classes associated with the hash-table implementation of Kendall Tau Sequence Distance, which were identified by RefactorFirst as highly-coupled. ### Dependencies