From 1a9c76e31e176a18a1464f4ff8a271388ee9b614 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Wed, 4 Dec 2024 19:51:50 +0530 Subject: [PATCH 1/8] Implemented API that returns number of GPUs detected --- java/cuvs-java/pom.xml | 268 +++++++++--------- .../java/com/nvidia/cuvs/common/Util.java | 25 ++ .../nvidia/cuvs/CagraBuildAndSearchTest.java | 8 +- .../com/nvidia/cuvs/CagraRandomizedTest.java | 90 ++++++ 4 files changed, 257 insertions(+), 134 deletions(-) create mode 100644 java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java diff --git a/java/cuvs-java/pom.xml b/java/cuvs-java/pom.xml index aacbad2ca2..ac2d8f4826 100644 --- a/java/cuvs-java/pom.xml +++ b/java/cuvs-java/pom.xml @@ -14,144 +14,152 @@ * See the License for the specific language governing permissions and * limitations under the License. */ ---> +--> - 4.0.0 - com.nvidia.cuvs - cuvs-java - 24.12.1 - cuvs-java - jar + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> + 4.0.0 + com.nvidia.cuvs + cuvs-java + 24.12.1 + cuvs-java + jar - - 22 - 22 - UTF-8 - UTF-8 - + + 22 + 22 + UTF-8 + UTF-8 + - - - commons-io - commons-io - 2.15.1 - + + + commons-io + commons-io + 2.15.1 + - - com.github.fommil - jniloader - 1.1 - + + com.github.fommil + jniloader + 1.1 + - - org.slf4j - slf4j-api - 2.0.13 - + + org.slf4j + slf4j-api + 2.0.13 + - - org.slf4j - slf4j-simple - 2.0.13 - runtime - + + org.slf4j + slf4j-simple + 2.0.13 + runtime + - - org.junit.jupiter - junit-jupiter-api - 5.10.0 - + + junit + junit + 4.13.1 + test + - + + org.apache.lucene + lucene-test-framework + 9.12.0 + test + + - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.7 - - - ${project.build.directory}/classes - - - - - org.apache.maven.plugins - maven-dependency-plugin - 2.10 - - - copy - compile - - copy - - - - - com.nvidia.cuvs - cuvs-java-internal - 24.12 - so - false - - ${project.build.directory}/classes - libcuvs_java.so - - - - - - + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.7 + + + ${project.build.directory}/classes + + + + + org.apache.maven.plugins + maven-dependency-plugin + 2.10 + + + copy + compile + + copy + + + + + com.nvidia.cuvs + cuvs-java-internal + 24.12 + so + false + + ${project.build.directory}/classes + libcuvs_java.so + + + + + + - - org.apache.maven.plugins - maven-assembly-plugin - 3.4.2 - - - jar-with-dependencies - - - add - - - - - assemble-all - package - - single - - - - - - org.apache.maven.plugins - maven-jar-plugin - 2.2 - - - - true - - com.nvidia.cuvs.examples.CagraExample - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.6.2 - - com.nvidia.cuvs.examples,com.nvidia.cuvs.panama - ${project.build.directory} - - - - + + org.apache.maven.plugins + maven-assembly-plugin + 3.4.2 + + + jar-with-dependencies + + + add + + + + + assemble-all + package + + single + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.2 + + + + true + + com.nvidia.cuvs.examples.CagraExample + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.6.2 + + + com.nvidia.cuvs.examples,com.nvidia.cuvs.panama + ${project.build.directory} + + + + diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java index 750e49d642..495c71b78f 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java @@ -20,16 +20,41 @@ import java.io.FileOutputStream; import java.io.IOException; import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; import java.lang.foreign.Linker; import java.lang.foreign.MemoryLayout; import java.lang.foreign.MemoryLayout.PathElement; import java.lang.foreign.MemorySegment; import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; import java.lang.invoke.VarHandle; import org.apache.commons.io.IOUtils; +import com.nvidia.cuvs.CuVSResources; + public class Util { + + /** + * Returns the number of GPUs connected to the system using CuVSResources. + * + * @param resources The CuVSResources object managing native resources. + * @return Number of GPUs connected, or -1 if an error occurred. + */ + public static int getNumberOfGPUs(CuVSResources resources) { + try { + MethodHandle getNumberOfGPUsHandle = resources.linker.downcallHandle( + resources.libcuvsNativeLibrary.find("get_number_of_gpus") + .orElseThrow(() -> new IllegalStateException("get_number_of_gpus not found in library")), + FunctionDescriptor.of(ValueLayout.JAVA_INT)); + + return (int) getNumberOfGPUsHandle.invokeExact(); + } catch (Throwable e) { + System.err.println("Failed to invoke get_number_of_gpus: " + e.getMessage()); + return -1; // Return -1 to indicate an error + } + } + /** * A utility method for getting an instance of {@link MemorySegment} for a * {@link String}. diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java index c5788d3427..f5aaf80ae4 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraBuildAndSearchTest.java @@ -16,7 +16,7 @@ package com.nvidia.cuvs; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.Assert.assertEquals; import java.io.File; import java.io.FileInputStream; @@ -28,7 +28,7 @@ import java.util.Map; import java.util.UUID; -import org.junit.jupiter.api.Test; +import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -100,14 +100,14 @@ public void testIndexingAndSearchingFlow() throws Throwable { // Check results log.info(results.getResults().toString()); - assertEquals(expectedResults, results.getResults(), "Results different than expected"); + assertEquals("Results different than expected", expectedResults, results.getResults()); // Search from deserialized index results = loadedIndex.search(cuvsQuery); // Check results log.info(results.getResults().toString()); - assertEquals(expectedResults, results.getResults(), "Results different than expected"); + assertEquals("Results different than expected", expectedResults, results.getResults()); // Cleanup if (indexFile.exists()) { diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java new file mode 100644 index 0000000000..18d94a279b --- /dev/null +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java @@ -0,0 +1,90 @@ +package com.nvidia.cuvs; + +import java.lang.invoke.MethodHandles; +import java.util.Random; + +import org.apache.lucene.tests.util.LuceneTestCase; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.carrotsearch.randomizedtesting.RandomizedContext; +import com.nvidia.cuvs.common.Util; + +public class CagraRandomizedTest extends LuceneTestCase { + private Random random; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + @Before + public void setup() { + + this.random = random(); + log.info("Test seed: " + RandomizedContext.current().getRunnerSeedAsString()); + } + + @Test + public void testResultsTopKWithRandomValues() throws Throwable { + // Generate a random dataset + int numRows = random.nextInt(10) + 1; + int numCols = random.nextInt(5) + 1; + float[][] dataset = new float[numRows][numCols]; + for (int i = 0; i < numRows; i++) { + for (int j = 0; j < numCols; j++) { + dataset[i][j] = random.nextFloat() * 100; + } + } + + // Generate random query vectors + int numQueries = random.nextInt(5) + 1; + float[][] queries = new float[numQueries][numCols]; + for (int i = 0; i < numQueries; i++) { + for (int j = 0; j < numCols; j++) { + queries[i][j] = random.nextFloat() * 100; + } + } + + int topK = random.nextInt(numRows) + 1; + + log.info("Dataset size: {}x{}", numRows, numCols); + log.info("Query size: {}x{}", numQueries, numCols); + log.info("TopK: {}", topK); + + log.info("Dataset:"); + for (float[] row : dataset) { + log.info(java.util.Arrays.toString(row)); + } + + log.info("Queries:"); + for (float[] query : queries) { + log.info(java.util.Arrays.toString(query)); + } + + CuVSResources resources = new CuVSResources(); + + int gpuCount = Util.getNumberOfGPUs(resources); + if (gpuCount == -1) { + log.info("Failed to detect GPUs."); + } else if (gpuCount == 0) { + log.info("No GPUs detected."); + } else { + log.info("Number of GPUs detected: {}", gpuCount); + } + + CagraIndexParams indexParams = new CagraIndexParams.Builder(resources).build(); + + CagraIndex index = new CagraIndex.Builder(resources).withDataset(dataset).withIndexParams(indexParams).build(); + + CagraQuery query = new CagraQuery.Builder().withQueryVectors(queries).withTopK(topK) + .withSearchParams(new CagraSearchParams.Builder(resources).build()).build(); + + CagraSearchResults results = index.search(query); + + results.getResults().forEach(result -> { + log.info("Result size: {}", result.size()); + assertEquals("TopK mismatch for query.", Math.min(topK, numRows), result.size()); + }); + } + +} + From 35901ea63d3c4c916233cbffca27206f3ffc534e Mon Sep 17 00:00:00 2001 From: punAhuja Date: Thu, 5 Dec 2024 10:45:38 +0530 Subject: [PATCH 2/8] C function, was missed in last commit --- java/internal/src/cuvs_java.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/java/internal/src/cuvs_java.c b/java/internal/src/cuvs_java.c index ec9ecb6af8..7a9fb09094 100644 --- a/java/internal/src/cuvs_java.c +++ b/java/internal/src/cuvs_java.c @@ -91,3 +91,16 @@ void search_cagra_index(cuvsCagraIndex_t index, float *queries, int topk, long n cudaMemcpy(neighbors_h, neighbors, sizeof(uint32_t) * n_queries * topk, cudaMemcpyDefault); cudaMemcpy(distances_h, distances, sizeof(float) * n_queries * topk, cudaMemcpyDefault); } + +int get_number_of_gpus() { + int deviceCount = 0; + cudaError_t err = cudaGetDeviceCount(&deviceCount); + + if (err != cudaSuccess) { + return -1; + } + else if(deviceCount == 0){ + return 0; + } + return deviceCount; +} From 151ec194b62e1489f2958fb9ffae0b9e68f75b7c Mon Sep 17 00:00:00 2001 From: punAhuja Date: Mon, 9 Dec 2024 22:54:13 +0530 Subject: [PATCH 3/8] Changes for getting GPU details from C funtion --- .../java/com/nvidia/cuvs/common/Util.java | 34 +++++++++--------- .../com/nvidia/cuvs/CagraRandomizedTest.java | 13 +------ .../java/com/nvidia/cuvs/common/TestUtil.java | 24 +++++++++++++ java/internal/src/cuvs_java.c | 36 +++++++++++++++++++ 4 files changed, 79 insertions(+), 28 deletions(-) create mode 100644 java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java index 495c71b78f..f45b3e07a0 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java @@ -35,23 +35,25 @@ public class Util { - /** - * Returns the number of GPUs connected to the system using CuVSResources. - * - * @param resources The CuVSResources object managing native resources. - * @return Number of GPUs connected, or -1 if an error occurred. - */ - public static int getNumberOfGPUs(CuVSResources resources) { - try { - MethodHandle getNumberOfGPUsHandle = resources.linker.downcallHandle( - resources.libcuvsNativeLibrary.find("get_number_of_gpus") - .orElseThrow(() -> new IllegalStateException("get_number_of_gpus not found in library")), - FunctionDescriptor.of(ValueLayout.JAVA_INT)); - - return (int) getNumberOfGPUsHandle.invokeExact(); + public static String getGpuDetails(CuVSResources resources, int maxGpus, int maxDetailLength) { + try (Arena arena = Arena.ofConfined()) { + MemorySegment detailSegment = arena.allocate(maxGpus * maxDetailLength); + MethodHandle getGpuDetailsHandle = resources.linker.downcallHandle( + resources.libcuvsNativeLibrary.find("get_gpu_details") + .orElseThrow(() -> new IllegalStateException("get_gpu_details not found in library")), + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_INT, ValueLayout.JAVA_INT)); + + int gpuCount = (int) getGpuDetailsHandle.invoke(detailSegment, maxGpus, maxDetailLength); + if (gpuCount < 0) { + throw new RuntimeException("Failed to retrieve GPU details"); + } + + // Convert MemorySegment to String + String details = new String(detailSegment.toArray(ValueLayout.JAVA_BYTE), 0, gpuCount * maxDetailLength); + return details.trim(); } catch (Throwable e) { - System.err.println("Failed to invoke get_number_of_gpus: " + e.getMessage()); - return -1; // Return -1 to indicate an error + System.err.println("Error invoking get_gpu_details: " + e.getMessage()); + throw new RuntimeException("Failed to invoke get_gpu_details", e); } } diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java index 18d94a279b..8cb6173c02 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/CagraRandomizedTest.java @@ -10,7 +10,6 @@ import org.slf4j.LoggerFactory; import com.carrotsearch.randomizedtesting.RandomizedContext; -import com.nvidia.cuvs.common.Util; public class CagraRandomizedTest extends LuceneTestCase { private Random random; @@ -56,21 +55,12 @@ public void testResultsTopKWithRandomValues() throws Throwable { } log.info("Queries:"); - for (float[] query : queries) { + for (float[] query : queries) { log.info(java.util.Arrays.toString(query)); } CuVSResources resources = new CuVSResources(); - int gpuCount = Util.getNumberOfGPUs(resources); - if (gpuCount == -1) { - log.info("Failed to detect GPUs."); - } else if (gpuCount == 0) { - log.info("No GPUs detected."); - } else { - log.info("Number of GPUs detected: {}", gpuCount); - } - CagraIndexParams indexParams = new CagraIndexParams.Builder(resources).build(); CagraIndex index = new CagraIndex.Builder(resources).withDataset(dataset).withIndexParams(indexParams).build(); @@ -87,4 +77,3 @@ public void testResultsTopKWithRandomValues() throws Throwable { } } - diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java new file mode 100644 index 0000000000..8b75ff3619 --- /dev/null +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java @@ -0,0 +1,24 @@ +package com.nvidia.cuvs.common; + +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +import com.nvidia.cuvs.CuVSResources; + +public class TestUtil { + + @Test + public void testGpuDetails() throws Throwable { + try { + CuVSResources resources = new CuVSResources(); + String details = Util.getGpuDetails(resources, 10, 256); + System.out.println("GPU Details: " + details); + assertTrue("GPU details should not be empty", !details.isEmpty()); + } catch (RuntimeException e) { + e.printStackTrace(); + throw new AssertionError("Test failed due to an exception: " + e.getMessage()); + } + } + +} diff --git a/java/internal/src/cuvs_java.c b/java/internal/src/cuvs_java.c index 7a9fb09094..29aabcfee9 100644 --- a/java/internal/src/cuvs_java.c +++ b/java/internal/src/cuvs_java.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include cuvsResources_t create_resource(int *returnValue) { cuvsResources_t cuvsResources; @@ -104,3 +106,37 @@ int get_number_of_gpus() { } return deviceCount; } + +int get_gpu_details(char *details, int max_gpus, int max_detail_length) { + int deviceCount = 0; + cudaError_t err = cudaGetDeviceCount(&deviceCount); + + if (err != cudaSuccess || deviceCount == 0) { + fprintf(stderr, "cudaGetDeviceCount failed or no GPUs found: %s\n", cudaGetErrorString(err)); + return -1; + } + + for (int i = 0; i < deviceCount && i < max_gpus; i++) { + struct cudaDeviceProp deviceProp; + err = cudaGetDeviceProperties(&deviceProp, i); + if (err != cudaSuccess) { + snprintf(&details[i * max_detail_length], max_detail_length, + "Error fetching properties for device %d", i); + continue; + } + + size_t freeMem = 0, totalMem = 0; + cudaSetDevice(i); + err = cudaMemGetInfo(&freeMem, &totalMem); + if (err != cudaSuccess) { + snprintf(&details[i * max_detail_length], max_detail_length, + "%s | Memory info unavailable", deviceProp.name); + continue; + } + + snprintf(&details[i * max_detail_length], max_detail_length, + "%s | Total: %zuMB | Free: %zuMB", deviceProp.name, totalMem / (1024 * 1024), freeMem / (1024 * 1024)); + } + + return deviceCount; +} From c2dba7bf37b12388bc6c884b17761f2cc4234332 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Tue, 10 Dec 2024 19:36:12 +0530 Subject: [PATCH 4/8] Populating structure with GPU details --- .../java/com/nvidia/cuvs/CuVSResources.java | 14 ++++++ .../com/nvidia/cuvs/common/GpuDetail.java | 34 +++++++++++++ .../java/com/nvidia/cuvs/common/Util.java | 47 +++++++++++------- .../nvidia/cuvs/panama/GpuDetailLayout.java | 28 +++++++++++ .../java/com/nvidia/cuvs/common/TestUtil.java | 46 +++++++++++------ java/internal/src/cuvs_java.c | 49 ++++++------------- 6 files changed, 151 insertions(+), 67 deletions(-) create mode 100644 java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java create mode 100644 java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java index 5020c0b6d8..373a295a8e 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/CuVSResources.java @@ -41,6 +41,7 @@ public class CuVSResources { private final MethodHandle createResourceMethodHandle; private final MemorySegment memorySegment; + private final MethodHandle getGpuDetailsHandle; /** * Constructor that allocates the resources needed for cuVS @@ -61,6 +62,10 @@ public CuVSResources() throws Throwable { MemorySegment returnValueMemorySegment = arena.allocate(returnValueMemoryLayout); memorySegment = (MemorySegment) createResourceMethodHandle.invokeExact(returnValueMemorySegment); + getGpuDetailsHandle = linker.downcallHandle( + libcuvsNativeLibrary.find("get_gpu_details") + .orElseThrow(() -> new IllegalStateException("get_gpu_details not found in library")), + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_INT, ValueLayout.JAVA_INT)); } /** @@ -78,4 +83,13 @@ protected MemorySegment getMemorySegment() { protected SymbolLookup getLibcuvsNativeLibrary() { return libcuvsNativeLibrary; } + + /** + * Gets the MethodHandle for the `get_gpu_details` function. + * + * @return MethodHandle for `get_gpu_details` + */ + public MethodHandle getGpuDetailsHandle() { + return getGpuDetailsHandle; + } } \ No newline at end of file diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java new file mode 100644 index 0000000000..43d1a0f1e7 --- /dev/null +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java @@ -0,0 +1,34 @@ +package com.nvidia.cuvs.common; + +public class GpuDetail { + private final String name; + private final long totalMemory; + private final long freeMemory; + + public GpuDetail(String name, long totalMemory, long freeMemory) { + this.name = name; + this.totalMemory = totalMemory; + this.freeMemory = freeMemory; + } + + public String getName() { + return name; + } + + public long getTotalMemory() { + return totalMemory; + } + + public long getFreeMemory() { + return freeMemory; + } + + @Override + public String toString() { + return "GpuDetail{" + + "name='" + name + '\'' + + ", totalMemory=" + totalMemory + + ", freeMemory=" + freeMemory + + '}'; + } +} diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java index f45b3e07a0..8b29891ed4 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java @@ -30,33 +30,42 @@ import java.lang.invoke.VarHandle; import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.panama.GpuDetailLayout; public class Util { + + private static final Logger log = LoggerFactory.getLogger(Util.class); - public static String getGpuDetails(CuVSResources resources, int maxGpus, int maxDetailLength) { + public static GpuDetail[] getGpuDetails(CuVSResources resources, int maxGpus, int maxDetailLength) { try (Arena arena = Arena.ofConfined()) { - MemorySegment detailSegment = arena.allocate(maxGpus * maxDetailLength); - MethodHandle getGpuDetailsHandle = resources.linker.downcallHandle( - resources.libcuvsNativeLibrary.find("get_gpu_details") - .orElseThrow(() -> new IllegalStateException("get_gpu_details not found in library")), - FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_INT, ValueLayout.JAVA_INT)); - - int gpuCount = (int) getGpuDetailsHandle.invoke(detailSegment, maxGpus, maxDetailLength); - if (gpuCount < 0) { - throw new RuntimeException("Failed to retrieve GPU details"); - } - - // Convert MemorySegment to String - String details = new String(detailSegment.toArray(ValueLayout.JAVA_BYTE), 0, gpuCount * maxDetailLength); - return details.trim(); + + MemorySegment detailsSegment = arena.allocate(maxGpus * maxDetailLength); + + int gpuCount = (int) resources.getGpuDetailsHandle().invoke(detailsSegment, maxGpus, maxDetailLength); + + if (gpuCount < 0) { + throw new RuntimeException("Failed to retrieve GPU details"); + } + else if (gpuCount == 0) + { + log.info("No GPU found"); + } + + GpuDetail[] gpuDetails = new GpuDetail[gpuCount]; + for (int i = 0; i < gpuCount; i++) { + MemorySegment structSegment = detailsSegment.asSlice(i * GpuDetailLayout.LAYOUT.byteSize(), GpuDetailLayout.LAYOUT.byteSize()); + gpuDetails[i] = GpuDetailLayout.fromMemorySegment(structSegment); + } + + return gpuDetails; } catch (Throwable e) { - System.err.println("Error invoking get_gpu_details: " + e.getMessage()); - throw new RuntimeException("Failed to invoke get_gpu_details", e); + throw new RuntimeException("Failed to invoke get_gpu_details", e); } - } - +} /** * A utility method for getting an instance of {@link MemorySegment} for a * {@link String}. diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java new file mode 100644 index 0000000000..a8445644fb --- /dev/null +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java @@ -0,0 +1,28 @@ +package com.nvidia.cuvs.panama; + +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.charset.StandardCharsets; + +import com.nvidia.cuvs.common.GpuDetail; + +public class GpuDetailLayout { + // Define the struct layout + public static final MemoryLayout LAYOUT = MemoryLayout.structLayout( + MemoryLayout.sequenceLayout(64, ValueLayout.JAVA_BYTE).withName("name"), // Array for GPU name + ValueLayout.JAVA_LONG.withName("totalMemory"), // Total memory + ValueLayout.JAVA_LONG.withName("freeMemory") // Free memory + ); + + public static final int MAX_NAME_LENGTH = 64; // Match the max GPU name length in your C code + + public static GpuDetail fromMemorySegment(MemorySegment segment) { + // Extract fields from the memory segment + String name = new String(segment.asSlice(0, MAX_NAME_LENGTH).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8).trim(); + long totalMemory = segment.get(ValueLayout.JAVA_LONG, MAX_NAME_LENGTH); + long freeMemory = segment.get(ValueLayout.JAVA_LONG, MAX_NAME_LENGTH + ValueLayout.JAVA_LONG.byteSize()); + + return new GpuDetail(name, totalMemory, freeMemory); + } +} diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java index 8b75ff3619..c5a7ef04c4 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java @@ -1,24 +1,40 @@ package com.nvidia.cuvs.common; -import static org.junit.Assert.assertTrue; - +import com.nvidia.cuvs.CuVSResources; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import com.nvidia.cuvs.CuVSResources; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; public class TestUtil { - @Test - public void testGpuDetails() throws Throwable { - try { - CuVSResources resources = new CuVSResources(); - String details = Util.getGpuDetails(resources, 10, 256); - System.out.println("GPU Details: " + details); - assertTrue("GPU details should not be empty", !details.isEmpty()); - } catch (RuntimeException e) { - e.printStackTrace(); - throw new AssertionError("Test failed due to an exception: " + e.getMessage()); - } - } + private static final Logger log = LoggerFactory.getLogger(TestUtil.class); + @Test + public void testGpuDetails() { + try { + CuVSResources resources = new CuVSResources(); + + int maxGpus = 10; + int maxDetailLength = 256; + + GpuDetail[] gpuDetails = Util.getGpuDetails(resources, maxGpus, maxDetailLength); + + assertNotNull("GPU details should not be null", gpuDetails); + assertTrue("GPU details array should contain at least one GPU", gpuDetails.length > 0); + + log.info("Number of GPUs: {}", gpuDetails.length); + for (GpuDetail detail : gpuDetails) { + log.info("GPU Name: {}", detail.getName()); + log.info("Total Memory (MB): {}", detail.getTotalMemory()); + log.info("Free Memory (MB): {}", detail.getFreeMemory()); + } + + } catch (Throwable e) { + log.error("Test failed due to an exception: {}", e.getMessage(), e); + throw new RuntimeException("Test failed due to an exception: " + e.getMessage(), e); + } + } } diff --git a/java/internal/src/cuvs_java.c b/java/internal/src/cuvs_java.c index 29aabcfee9..f9b8887607 100644 --- a/java/internal/src/cuvs_java.c +++ b/java/internal/src/cuvs_java.c @@ -94,48 +94,31 @@ void search_cagra_index(cuvsCagraIndex_t index, float *queries, int topk, long n cudaMemcpy(distances_h, distances, sizeof(float) * n_queries * topk, cudaMemcpyDefault); } -int get_number_of_gpus() { - int deviceCount = 0; - cudaError_t err = cudaGetDeviceCount(&deviceCount); - - if (err != cudaSuccess) { - return -1; - } - else if(deviceCount == 0){ - return 0; - } - return deviceCount; -} +typedef struct { + char name[64]; + size_t totalMemory; + size_t freeMemory; +} GpuDetail; -int get_gpu_details(char *details, int max_gpus, int max_detail_length) { +int get_gpu_details(GpuDetail *details, int maxGpus) { int deviceCount = 0; cudaError_t err = cudaGetDeviceCount(&deviceCount); if (err != cudaSuccess || deviceCount == 0) { - fprintf(stderr, "cudaGetDeviceCount failed or no GPUs found: %s\n", cudaGetErrorString(err)); return -1; } + else if(deviceCount == 0){ + return 0; + } - for (int i = 0; i < deviceCount && i < max_gpus; i++) { + for (int i = 0; i < deviceCount && i < maxGpus; i++) { struct cudaDeviceProp deviceProp; - err = cudaGetDeviceProperties(&deviceProp, i); - if (err != cudaSuccess) { - snprintf(&details[i * max_detail_length], max_detail_length, - "Error fetching properties for device %d", i); - continue; - } - - size_t freeMem = 0, totalMem = 0; - cudaSetDevice(i); - err = cudaMemGetInfo(&freeMem, &totalMem); - if (err != cudaSuccess) { - snprintf(&details[i * max_detail_length], max_detail_length, - "%s | Memory info unavailable", deviceProp.name); - continue; - } - - snprintf(&details[i * max_detail_length], max_detail_length, - "%s | Total: %zuMB | Free: %zuMB", deviceProp.name, totalMem / (1024 * 1024), freeMem / (1024 * 1024)); + cudaGetDeviceProperties(&deviceProp, i); + + strncpy(details[i].name, deviceProp.name, sizeof(details[i].name) - 1); + details[i].name[sizeof(details[i].name) - 1] = '\0'; // Null-terminate + + cudaMemGetInfo(&details[i].freeMemory, &details[i].totalMemory); } return deviceCount; From cee0c353a0c06beb2df013fa968350403a933d51 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Tue, 10 Dec 2024 19:43:11 +0530 Subject: [PATCH 5/8] Using resources.Arena instead of declaring new variable --- java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java index 8b29891ed4..eff60f4adc 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java @@ -41,9 +41,9 @@ public class Util { private static final Logger log = LoggerFactory.getLogger(Util.class); public static GpuDetail[] getGpuDetails(CuVSResources resources, int maxGpus, int maxDetailLength) { - try (Arena arena = Arena.ofConfined()) { + try{ - MemorySegment detailsSegment = arena.allocate(maxGpus * maxDetailLength); + MemorySegment detailsSegment = resources.arena.allocate(maxGpus * maxDetailLength); int gpuCount = (int) resources.getGpuDetailsHandle().invoke(detailsSegment, maxGpus, maxDetailLength); From 632f92bb5cff220cb1b5b61652214b00d4d49866 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Tue, 10 Dec 2024 19:47:51 +0530 Subject: [PATCH 6/8] Corrected indentation and formatting --- .../com/nvidia/cuvs/common/GpuDetail.java | 46 ++++++++---------- .../java/com/nvidia/cuvs/common/Util.java | 48 +++++++++---------- .../nvidia/cuvs/panama/GpuDetailLayout.java | 27 +++++------ .../java/com/nvidia/cuvs/common/TestUtil.java | 40 ++++++++-------- 4 files changed, 78 insertions(+), 83 deletions(-) diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java index 43d1a0f1e7..fdddb88c09 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/GpuDetail.java @@ -1,34 +1,30 @@ package com.nvidia.cuvs.common; public class GpuDetail { - private final String name; - private final long totalMemory; - private final long freeMemory; + private final String name; + private final long totalMemory; + private final long freeMemory; - public GpuDetail(String name, long totalMemory, long freeMemory) { - this.name = name; - this.totalMemory = totalMemory; - this.freeMemory = freeMemory; - } + public GpuDetail(String name, long totalMemory, long freeMemory) { + this.name = name; + this.totalMemory = totalMemory; + this.freeMemory = freeMemory; + } - public String getName() { - return name; - } + public String getName() { + return name; + } - public long getTotalMemory() { - return totalMemory; - } + public long getTotalMemory() { + return totalMemory; + } - public long getFreeMemory() { - return freeMemory; - } + public long getFreeMemory() { + return freeMemory; + } - @Override - public String toString() { - return "GpuDetail{" + - "name='" + name + '\'' + - ", totalMemory=" + totalMemory + - ", freeMemory=" + freeMemory + - '}'; - } + @Override + public String toString() { + return "GpuDetail{" + "name='" + name + '\'' + ", totalMemory=" + totalMemory + ", freeMemory=" + freeMemory + '}'; + } } diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java index eff60f4adc..f246b9d2dd 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java @@ -37,35 +37,35 @@ import com.nvidia.cuvs.panama.GpuDetailLayout; public class Util { - + private static final Logger log = LoggerFactory.getLogger(Util.class); public static GpuDetail[] getGpuDetails(CuVSResources resources, int maxGpus, int maxDetailLength) { - try{ - - MemorySegment detailsSegment = resources.arena.allocate(maxGpus * maxDetailLength); - - int gpuCount = (int) resources.getGpuDetailsHandle().invoke(detailsSegment, maxGpus, maxDetailLength); - - if (gpuCount < 0) { - throw new RuntimeException("Failed to retrieve GPU details"); - } - else if (gpuCount == 0) - { - log.info("No GPU found"); - } - - GpuDetail[] gpuDetails = new GpuDetail[gpuCount]; - for (int i = 0; i < gpuCount; i++) { - MemorySegment structSegment = detailsSegment.asSlice(i * GpuDetailLayout.LAYOUT.byteSize(), GpuDetailLayout.LAYOUT.byteSize()); - gpuDetails[i] = GpuDetailLayout.fromMemorySegment(structSegment); - } - - return gpuDetails; + try { + + MemorySegment detailsSegment = resources.arena.allocate(maxGpus * maxDetailLength); + + int gpuCount = (int) resources.getGpuDetailsHandle().invoke(detailsSegment, maxGpus, maxDetailLength); + + if (gpuCount < 0) { + throw new RuntimeException("Failed to retrieve GPU details"); + } else if (gpuCount == 0) { + log.info("No GPU found"); + } + + GpuDetail[] gpuDetails = new GpuDetail[gpuCount]; + for (int i = 0; i < gpuCount; i++) { + MemorySegment structSegment = detailsSegment.asSlice(i * GpuDetailLayout.LAYOUT.byteSize(), + GpuDetailLayout.LAYOUT.byteSize()); + gpuDetails[i] = GpuDetailLayout.fromMemorySegment(structSegment); + } + + return gpuDetails; } catch (Throwable e) { - throw new RuntimeException("Failed to invoke get_gpu_details", e); + throw new RuntimeException("Failed to invoke get_gpu_details", e); } -} + } + /** * A utility method for getting an instance of {@link MemorySegment} for a * {@link String}. diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java index a8445644fb..ad3aaddeda 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/panama/GpuDetailLayout.java @@ -8,21 +8,20 @@ import com.nvidia.cuvs.common.GpuDetail; public class GpuDetailLayout { - // Define the struct layout - public static final MemoryLayout LAYOUT = MemoryLayout.structLayout( - MemoryLayout.sequenceLayout(64, ValueLayout.JAVA_BYTE).withName("name"), // Array for GPU name - ValueLayout.JAVA_LONG.withName("totalMemory"), // Total memory - ValueLayout.JAVA_LONG.withName("freeMemory") // Free memory - ); + // Define the struct layout + public static final MemoryLayout LAYOUT = MemoryLayout.structLayout( + MemoryLayout.sequenceLayout(64, ValueLayout.JAVA_BYTE).withName("name"), + ValueLayout.JAVA_LONG.withName("totalMemory"), ValueLayout.JAVA_LONG.withName("freeMemory")); - public static final int MAX_NAME_LENGTH = 64; // Match the max GPU name length in your C code + public static final int MAX_NAME_LENGTH = 64; - public static GpuDetail fromMemorySegment(MemorySegment segment) { - // Extract fields from the memory segment - String name = new String(segment.asSlice(0, MAX_NAME_LENGTH).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8).trim(); - long totalMemory = segment.get(ValueLayout.JAVA_LONG, MAX_NAME_LENGTH); - long freeMemory = segment.get(ValueLayout.JAVA_LONG, MAX_NAME_LENGTH + ValueLayout.JAVA_LONG.byteSize()); + public static GpuDetail fromMemorySegment(MemorySegment segment) { + // Extract fields from the memory segment + String name = new String(segment.asSlice(0, MAX_NAME_LENGTH).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8) + .trim(); + long totalMemory = segment.get(ValueLayout.JAVA_LONG, MAX_NAME_LENGTH); + long freeMemory = segment.get(ValueLayout.JAVA_LONG, MAX_NAME_LENGTH + ValueLayout.JAVA_LONG.byteSize()); - return new GpuDetail(name, totalMemory, freeMemory); - } + return new GpuDetail(name, totalMemory, freeMemory); + } } diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java index c5a7ef04c4..e970293cbb 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java @@ -10,31 +10,31 @@ public class TestUtil { - private static final Logger log = LoggerFactory.getLogger(TestUtil.class); + private static final Logger log = LoggerFactory.getLogger(TestUtil.class); - @Test - public void testGpuDetails() { - try { - CuVSResources resources = new CuVSResources(); + @Test + public void testGpuDetails() { + try { + CuVSResources resources = new CuVSResources(); - int maxGpus = 10; - int maxDetailLength = 256; + int maxGpus = 10; + int maxDetailLength = 256; - GpuDetail[] gpuDetails = Util.getGpuDetails(resources, maxGpus, maxDetailLength); + GpuDetail[] gpuDetails = Util.getGpuDetails(resources, maxGpus, maxDetailLength); - assertNotNull("GPU details should not be null", gpuDetails); - assertTrue("GPU details array should contain at least one GPU", gpuDetails.length > 0); + assertNotNull("GPU details should not be null", gpuDetails); + assertTrue("GPU details array should contain at least one GPU", gpuDetails.length > 0); - log.info("Number of GPUs: {}", gpuDetails.length); - for (GpuDetail detail : gpuDetails) { - log.info("GPU Name: {}", detail.getName()); - log.info("Total Memory (MB): {}", detail.getTotalMemory()); - log.info("Free Memory (MB): {}", detail.getFreeMemory()); - } + log.info("Number of GPUs: {}", gpuDetails.length); + for (GpuDetail detail : gpuDetails) { + log.info("GPU Name: {}", detail.getName()); + log.info("Total Memory (MB): {}", detail.getTotalMemory()); + log.info("Free Memory (MB): {}", detail.getFreeMemory()); + } - } catch (Throwable e) { - log.error("Test failed due to an exception: {}", e.getMessage(), e); - throw new RuntimeException("Test failed due to an exception: " + e.getMessage(), e); - } + } catch (Throwable e) { + log.error("Test failed due to an exception: {}", e.getMessage(), e); + throw new RuntimeException("Test failed due to an exception: " + e.getMessage(), e); } + } } From 934b3b0037a659aa0afac32728135750ebdcf8b7 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Tue, 10 Dec 2024 19:52:47 +0530 Subject: [PATCH 7/8] Converted Bytes to MB --- .../src/test/java/com/nvidia/cuvs/common/TestUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java index e970293cbb..03156de712 100644 --- a/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java +++ b/java/cuvs-java/src/test/java/com/nvidia/cuvs/common/TestUtil.java @@ -28,8 +28,8 @@ public void testGpuDetails() { log.info("Number of GPUs: {}", gpuDetails.length); for (GpuDetail detail : gpuDetails) { log.info("GPU Name: {}", detail.getName()); - log.info("Total Memory (MB): {}", detail.getTotalMemory()); - log.info("Free Memory (MB): {}", detail.getFreeMemory()); + log.info("Total Memory (MB): {}", detail.getTotalMemory()/(1024*1024)); + log.info("Free Memory (MB): {}", detail.getFreeMemory()/(1024*1024)); } } catch (Throwable e) { From 85219c4e3d37b95ce608c5971e6546983fdfc3f5 Mon Sep 17 00:00:00 2001 From: punAhuja Date: Wed, 11 Dec 2024 10:16:20 +0530 Subject: [PATCH 8/8] Addressed some review comments --- .../cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java | 2 -- java/internal/src/cuvs_java.c | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java index f246b9d2dd..fba6fb427a 100644 --- a/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java +++ b/java/cuvs-java/src/main/java/com/nvidia/cuvs/common/Util.java @@ -49,8 +49,6 @@ public static GpuDetail[] getGpuDetails(CuVSResources resources, int maxGpus, in if (gpuCount < 0) { throw new RuntimeException("Failed to retrieve GPU details"); - } else if (gpuCount == 0) { - log.info("No GPU found"); } GpuDetail[] gpuDetails = new GpuDetail[gpuCount]; diff --git a/java/internal/src/cuvs_java.c b/java/internal/src/cuvs_java.c index f9b8887607..b9de569418 100644 --- a/java/internal/src/cuvs_java.c +++ b/java/internal/src/cuvs_java.c @@ -104,12 +104,9 @@ int get_gpu_details(GpuDetail *details, int maxGpus) { int deviceCount = 0; cudaError_t err = cudaGetDeviceCount(&deviceCount); - if (err != cudaSuccess || deviceCount == 0) { + if (err != cudaSuccess) { return -1; } - else if(deviceCount == 0){ - return 0; - } for (int i = 0; i < deviceCount && i < maxGpus; i++) { struct cudaDeviceProp deviceProp;