makepath · brendancol · Jan 28, 2026 · Jan 28, 2026
diff --git a/rtxpy/rtx.py b/rtxpy/rtx.py
@@ -5,19 +5,19 @@
 NVIDIA's OptiX ray tracing engine via the otk-pyoptix Python bindings.
 """

 import os
 import atexit
 import struct
 from dataclasses import dataclass, field
 from typing import Dict, List, Optional

 # CRITICAL: cupy must be imported before optix for proper CUDA context sharing
 import cupy
 has_cupy = True

 import optix

 import numpy as np


 # -----------------------------------------------------------------------------
@@ -362,9 +362,9 @@
     )
     build_input.numVertices = num_vertices
 
-    # Acceleration structure options
+    # Acceleration structure options - enable compaction for memory savings
     accel_options = optix.AccelBuildOptions(
-        buildFlags=optix.BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS,
+        buildFlags=optix.BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS | optix.BUILD_FLAG_ALLOW_COMPACTION,
         operation=optix.BUILD_OPERATION_BUILD,
     )
 
@@ -378,7 +378,10 @@
     d_temp = cupy.zeros(buffer_sizes.tempSizeInBytes, dtype=cupy.uint8)
     gas_buffer = cupy.zeros(buffer_sizes.outputSizeInBytes, dtype=cupy.uint8)
 
-    # Build acceleration structure
+    # Allocate buffer to receive compacted size
+    compacted_size_buffer = cupy.zeros(1, dtype=cupy.uint64)
+
+    # Build acceleration structure with compacted size emission
     gas_handle = _state.context.accelBuild(
         0,  # stream
         [accel_options],
@@ -387,9 +390,24 @@
         buffer_sizes.tempSizeInBytes,
         gas_buffer.data.ptr,
         buffer_sizes.outputSizeInBytes,
-        [],  # emitted properties
+        [optix.AccelEmitDesc(compacted_size_buffer.data.ptr, optix.PROPERTY_TYPE_COMPACTED_SIZE)],
     )
 
+    # Synchronize to ensure compacted size is available
+    cupy.cuda.Stream.null.synchronize()
+
+    # Compact if it saves memory
+    compacted_size = int(compacted_size_buffer[0])
+    if compacted_size < gas_buffer.nbytes:
+        compacted_buffer = cupy.zeros(compacted_size, dtype=cupy.uint8)
+        gas_handle = _state.context.accelCompact(
+            0,  # stream
+            gas_handle,
+            compacted_buffer.data.ptr,
+            compacted_size,
+        )
+        gas_buffer = compacted_buffer
+
     return gas_handle, gas_buffer
 
 
@@ -559,9 +577,9 @@
     )
     build_input.numVertices = num_vertices
 
-    # Acceleration structure options
+    # Acceleration structure options - enable compaction for memory savings
     accel_options = optix.AccelBuildOptions(
-        buildFlags=optix.BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS,
+        buildFlags=optix.BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS | optix.BUILD_FLAG_ALLOW_COMPACTION,
         operation=optix.BUILD_OPERATION_BUILD,
     )
 
@@ -573,20 +591,40 @@
 
     # Allocate buffers
     d_temp = cupy.zeros(buffer_sizes.tempSizeInBytes, dtype=cupy.uint8)
-    _state.gas_buffer = cupy.zeros(buffer_sizes.outputSizeInBytes, dtype=cupy.uint8)
+    gas_buffer = cupy.zeros(buffer_sizes.outputSizeInBytes, dtype=cupy.uint8)
 
-    # Build acceleration structure
+    # Allocate buffer to receive compacted size
+    compacted_size_buffer = cupy.zeros(1, dtype=cupy.uint64)
+
+    # Build acceleration structure with compacted size emission
     _state.gas_handle = _state.context.accelBuild(
         0,  # stream
         [accel_options],
         [build_input],
         d_temp.data.ptr,
         buffer_sizes.tempSizeInBytes,
-        _state.gas_buffer.data.ptr,
+        gas_buffer.data.ptr,
         buffer_sizes.outputSizeInBytes,
-        [],  # emitted properties
+        [optix.AccelEmitDesc(compacted_size_buffer.data.ptr, optix.PROPERTY_TYPE_COMPACTED_SIZE)],
     )
 
+    # Synchronize to ensure compacted size is available
+    cupy.cuda.Stream.null.synchronize()
+
+    # Compact if it saves memory
+    compacted_size = int(compacted_size_buffer[0])
+    if compacted_size < gas_buffer.nbytes:
+        compacted_buffer = cupy.zeros(compacted_size, dtype=cupy.uint8)
+        _state.gas_handle = _state.context.accelCompact(
+            0,  # stream
+            _state.gas_handle,
+            compacted_buffer.data.ptr,
+            compacted_size,
+        )
+        _state.gas_buffer = compacted_buffer
+    else:
+        _state.gas_buffer = gas_buffer
+
     _state.current_hash = hash_value
     return 0
 
@@ -644,7 +682,7 @@
            _state.d_rays_size = rays_size
        _state.d_rays[:] = cupy.asarray(rays, dtype=cupy.float32)
        d_rays = _state.d_rays
        rays_on_host = True

    # Ensure hits buffer is on GPU
    if isinstance(hits, cupy.ndarray):