diff --git a/cuda/common.h b/cuda/common.h index cef3b4f..ea02225 100644 --- a/cuda/common.h +++ b/cuda/common.h @@ -53,4 +53,6 @@ struct Params OptixTraversableHandle handle; Ray* rays; Hit* hits; + int* primitive_ids; // Optional: triangle index per ray (-1 for miss) + int* instance_ids; // Optional: geometry/instance index per ray (-1 for miss) }; diff --git a/cuda/kernel.cu b/cuda/kernel.cu index 317596e..8e30adb 100644 --- a/cuda/kernel.cu +++ b/cuda/kernel.cu @@ -23,7 +23,7 @@ extern "C" __global__ void __raygen__main() const uint3 dim = optixGetLaunchDimensions(); const uint64_t linear_idx = idx.z * dim.y * dim.x + idx.y * dim.x + idx.x; - unsigned int t, nx, ny, nz; + unsigned int t, nx, ny, nz, prim_id, inst_id; Ray ray = params.rays[linear_idx]; optixTrace( params.handle, @@ -40,7 +40,9 @@ extern "C" __global__ void __raygen__main() t, nx, ny, - nz + nz, + prim_id, + inst_id ); Hit hit; @@ -49,6 +51,14 @@ extern "C" __global__ void __raygen__main() hit.geom_normal.y = int_as_float( ny ); hit.geom_normal.z = int_as_float( nz ); params.hits[linear_idx] = hit; + + // Write optional primitive and instance IDs + if (params.primitive_ids != nullptr) { + params.primitive_ids[linear_idx] = static_cast(prim_id); + } + if (params.instance_ids != nullptr) { + params.instance_ids[linear_idx] = static_cast(inst_id); + } } @@ -58,6 +68,8 @@ extern "C" __global__ void __miss__miss() optixSetPayload_1( float_as_int( 1.0f ) ); optixSetPayload_2( float_as_int( 0.0f ) ); optixSetPayload_3( float_as_int( 0.0f ) ); + optixSetPayload_4( static_cast(-1) ); // primitive_id = -1 for miss + optixSetPayload_5( static_cast(-1) ); // instance_id = -1 for miss } @@ -99,4 +111,6 @@ extern "C" __global__ void __closesthit__chit() optixSetPayload_1(float_as_int(n.x)); optixSetPayload_2(float_as_int(n.y)); optixSetPayload_3(float_as_int(n.z)); + optixSetPayload_4(primIdx); // primitive/triangle index + optixSetPayload_5(optixGetInstanceId()); // instance/geometry index } diff --git a/examples/cuda_utils.py b/examples/cuda_utils.py deleted file mode 100644 index 5b60057..0000000 --- a/examples/cuda_utils.py +++ /dev/null @@ -1,47 +0,0 @@ -from numba import cuda -import numba as nb -import numpy as np - -def calc_dims(shape): - threadsperblock = (32,32) - blockspergrid = ( - (shape[0] + (threadsperblock[0] - 1)) // threadsperblock[0], - (shape[1] + (threadsperblock[1] - 1)) // threadsperblock[1] - ) - return blockspergrid, threadsperblock - -@cuda.jit(device=True) -def add(a, b): - return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2]) - -@cuda.jit(device=True) -def diff(a, b): - return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2]) - -@cuda.jit(device=True) -def mul(a, b): - return float3(a[0]*b, a[1]*b, a[2]*b) - -@cuda.jit(device=True) -def multColor(a, b): - return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2]) - -@cuda.jit(device=True) -def dot(a, b): - return a[0]*b[0] + a[1]*b[1] + a[2]*b[2] - -@cuda.jit(device=True) -def mix(a, b, k): - return add(mul(a, k), mul(b, 1-k)) - -@cuda.jit(device=True) -def make_float3(a, offset): - return float3(a[offset], a[offset+1], a[offset+2]) - -@cuda.jit(device=True) -def invert(a): - return float3(-a[0], -a[1], -a[2]) - -@cuda.jit(device=True) -def float3(a, b, c): - return (np.float32(a), np.float32(b), np.float32(c)) diff --git a/examples/hillshade.py b/examples/hillshade.py deleted file mode 100644 index fa433a7..0000000 --- a/examples/hillshade.py +++ /dev/null @@ -1,228 +0,0 @@ -import numpy as np -from numba import cuda - -import cupy -import xarray as xr -from rtxpy import RTX - -from typing import Optional - -from scipy.spatial.transform import Rotation as R - -from cuda_utils import * -import mesh_utils - -@cuda.jit -def _generatePrimaryRays(data, x_coords, y_coords, H, W): - """ - A GPU kernel that given a set of x and y discrete coordinates on a raster terrain - generates in @data a list of parallel rays that represent camera rays generated from an ortographic camera - that is looking straight down at the surface from an origin height 10000 - """ - i, j = cuda.grid(2) - if i>=0 and i < H and j>=0 and j < W: - #data[i,j,0] = j + 1e-6 # x_coords[j] + 1e-6 - #data[i,j,1] = i + 1e-6 # y_coords[i] + 1e-6 - - if (j == W-1): - data[i,j,0] = j - 1e-3 - else: - data[i,j,0] = j + 1e-3 - - if (i == H-1): - data[i,j,1] = i - 1e-3 - else: - data[i,j,1] = i + 1e-3 - - data[i,j,2] = 10000 # Location of the camera (height) - data[i,j,3] = 1e-3 - data[i,j,4] = 0 - data[i,j,5] = 0 - data[i,j,6] = -1 - data[i,j,7] = np.inf - - -def generatePrimaryRays(rays, x_coords, y_coords, H, W): - griddim, blockdim = calc_dims((H, W)) - _generatePrimaryRays[griddim, blockdim](rays, x_coords, y_coords, H, W) - return 0 - - -@cuda.jit -def _generateShadowRays(rays, hits, normals, H, W, sunDir): - """ - A GPU kernel that given a set rays and their respective intersection points, - generates in rays (overwriting the original content) a new set of rays (shadow rays) - That have their origins at the point of intersection of their parent ray and direction - the direction towards the sun - The normals vectors at the point of intersection of the original rays are cached in @normals - Thus we can later use them to do lambertian shading, after the shadow rays have been traced - """ - i, j = cuda.grid(2) - if i>=0 and i < H and j>=0 and j < W: - dist = hits[i,j,0] - norm = make_float3(hits[i,j], 1) - if (norm[2] < 0): - norm = invert(norm) - ray = rays[i,j] - rayOrigin = make_float3(ray, 0) - rayDir = make_float3(ray, 4) - p = add(rayOrigin, mul(rayDir,dist)) - - newOrigin = add(p, mul(norm, 1e-3)) - ray[0] = newOrigin[0] - ray[1] = newOrigin[1] - ray[2] = newOrigin[2] - ray[3] = 1e-3 - ray[4] = sunDir[0] - ray[5] = sunDir[1] - ray[6] = sunDir[2] - ray[7] = np.inf if dist > 0 else 0 - - normals[i,j,0] = norm[0] - normals[i,j,1] = norm[1] - normals[i,j,2] = norm[2] - -def generateShadowRays(rays, hits, normals, H, W, sunDir): - griddim, blockdim = calc_dims((H, W)) - _generateShadowRays[griddim, blockdim](rays, hits, normals, H, W, sunDir) - return 0 - - -@cuda.jit -def _shadeLambert(hits, normals, output, H, W, sunDir, castShadows): - """ - This kernel does a simple Lambertian shading - The hits array contains the results of tracing the shadow rays through the scene. - If the value in hits[x,y,0] is > 0, then a valid intersection occurred and that means that the point - at location x,y is in shadow. - The normals array stores the normal at the intersecion point of each camera ray - We then use the information for light visibility and normal to apply Lambert's cosine law - The final result is stored in output which is an RGB array - """ - i, j = cuda.grid(2) - if i>=0 and i < H and j>=0 and j < W: - # Normal at the intersection of camera ray (i,j) with the scene - norm = make_float3(normals[i,j], 0) - - # Below is same as existing algorithm without shadows and is OK with shadows. - # Could be improved with a bit of antialiasing at edges of shadow??? - - light_dir = make_float3(sunDir, 0) # Might have to make it zero if back cull. - cos_theta = dot(light_dir, norm) # light_dir and norm are already normalised. - - temp = (cos_theta + 1) / 2 - - if castShadows and hits[i, j, 0] >= 0: - temp = temp / 2 - - if temp > 1: - temp = 1 - elif temp < 0: - temp = 0 - - output[i, j] = temp - - - -def shadeLambert(hits, normals, output, H, W, sunDir, castShadows): - griddim, blockdim = calc_dims((H, W)) - _shadeLambert[griddim, blockdim](hits, normals, output, H, W, sunDir, castShadows) - return 0 - - -def getSunDir(angle_altitude, azimuth): - """ - Calculate the vector towards the sun based on sun altitude angle and azimuth - """ - north = (0,1,0) - rx = R.from_euler('x', angle_altitude, degrees=True) - rz = R.from_euler('z', azimuth+180, degrees=True) - sunDir = rx.apply(north) - sunDir = rz.apply(sunDir) - return sunDir - - -def hillshade_rt(raster: xr.DataArray, - optix: RTX, - shadows: bool = False, - azimuth: int = 225, - angle_altitude: int = 25, - name: Optional[str] = 'hillshade') -> xr.DataArray: - - H,W = raster.shape - sunDir = cupy.array(getSunDir(angle_altitude, azimuth)) - - #output = np.zeros((H,W,3), np.float32) - - # Device buffers - d_rays = cupy.empty((H,W,8), np.float32) - d_hits = cupy.empty((H,W,4), np.float32) - d_aux = cupy.empty((H,W,3), np.float32) - d_output = cupy.empty((H,W), np.float32) - - y_coords = cupy.array(raster.indexes.get('y').values) - x_coords = cupy.array(raster.indexes.get('x').values) - - generatePrimaryRays(d_rays, x_coords, y_coords, H, W) - device = cupy.cuda.Device(0) - device.synchronize() - res = optix.trace(d_rays, d_hits, W*H) - - generateShadowRays(d_rays, d_hits, d_aux, H, W, sunDir) - if shadows: - device.synchronize() - res = optix.trace(d_rays, d_hits, W*H) - - shadeLambert(d_hits, d_aux, d_output, H, W, sunDir, shadows) - - if isinstance(raster.data, np.ndarray): - output = cupy.asnumpy(d_output[:, :]) - nanValue = np.nan - else: - output = d_output[:, :] - nanValue = cupy.nan - - output[0, :] = nanValue - output[-1, :] = nanValue - output[:, 0] = nanValue - output[:, -1] = nanValue - - hill = xr.DataArray(output, - name=name, - coords=raster.coords, - dims=raster.dims, - attrs=raster.attrs) - return hill - -def hillshade_gpu(raster: xr.DataArray, - shadows: bool = False, - azimuth: int = 225, - angle_altitude: int = 25, - name: Optional[str] = 'hillshade') -> xr.DataArray: - # Move the terrain to GPU for testing the GPU path - if not isinstance(raster.data, cupy.ndarray): - print("WARNING: raster.data is not a cupy array. Additional overhead will be incurred") - H,W = raster.data.squeeze().shape - optix = RTX() - - datahash = np.uint64(hash(str(raster.data.get())) % (1 << 64)) - optixhash = np.uint64(optix.getHash()) - if (optixhash != datahash): - numTris = (H - 1) * (W - 1) * 2 - verts = cupy.empty(H * W * 3, np.float32) - triangles = cupy.empty(numTris * 3, np.int32) - - # Generate a mesh from the terrain (buffers are on the GPU, so generation happens also on GPU) - res = mesh_utils.triangulateTerrain(verts, triangles, raster) - if res: - raise ValueError("Failed to generate mesh from terrain. Error code:{}".format(res)) - res = optix.build(datahash, verts, triangles) - if res: - raise ValueError("OptiX failed to build GAS with error code:{}".format(res)) - #Clear some GPU memory that we no longer need - verts = None - triangles = None - cupy.get_default_memory_pool().free_all_blocks() - - hill = hillshade_rt(raster, optix, azimuth=azimuth, angle_altitude=angle_altitude, shadows=shadows, name=name) - return hill diff --git a/examples/mesh_utils.py b/examples/mesh_utils.py deleted file mode 100644 index 599f9c9..0000000 --- a/examples/mesh_utils.py +++ /dev/null @@ -1,134 +0,0 @@ - -import numba as nb -from numba import cuda -import numpy as np -import cupy - - -@cuda.jit -def _triangulateTerrain(verts, triangles, data, H, W, scale, stride): - globalId = stride + cuda.grid(1) - if globalId < W*H: - h = globalId // W - w = globalId % W - meshMapIndex = h * W + w - - val = data[h, w] - - offset = 3*meshMapIndex - verts[offset] = w # x_coords[w] # w - verts[offset+1] = h # y_coords[h] # h - verts[offset+2] = val * scale - - if w != W - 1 and h != H - 1: - offset = 6*(h * (W-1) + w) - triangles[offset+0] = np.int32(meshMapIndex + W) - triangles[offset+1] = np.int32(meshMapIndex + W + 1) - triangles[offset+2] = np.int32(meshMapIndex) - triangles[offset+3] = np.int32(meshMapIndex + W + 1) - triangles[offset+4] = np.int32(meshMapIndex + 1) - triangles[offset+5] = np.int32(meshMapIndex) - - -@nb.njit(parallel=True) -def triangulateCPU(verts, triangles, data, H, W, scale): - for h in nb.prange(H): - for w in range(W): - meshMapIndex = h * W + w - - val = data[h, w] - - offset = 3*meshMapIndex - verts[offset] = w # x_coords[w] # w - verts[offset+1] = h # y_coords[h] # h - verts[offset+2] = val * scale - - if w != W - 1 and h != H - 1: - offset = 6*(h * (W-1) + w) - triangles[offset+0] = np.int32(meshMapIndex + W) - triangles[offset+1] = np.int32(meshMapIndex + W + 1) - triangles[offset+2] = np.int32(meshMapIndex) - triangles[offset+3] = np.int32(meshMapIndex + W+1) - triangles[offset+4] = np.int32(meshMapIndex + 1) - triangles[offset+5] = np.int32(meshMapIndex) - - -def triangulateTerrain(verts, triangles, terrain, scale=1): - H, W = terrain.shape - if isinstance(terrain.data, np.ndarray): - triangulateCPU(verts, triangles, terrain.data, H, W, scale) - if isinstance(terrain.data, cupy.ndarray): - jobSize = H*W - blockdim = 1024 - griddim = (jobSize + blockdim - 1) // 1024 - d = 100 - offset = 0 - while (jobSize > 0): - batch = min(d, griddim) - _triangulateTerrain[batch, blockdim](verts, triangles, - terrain.data, H, W, - scale, offset) - offset += batch*blockdim - jobSize -= batch*blockdim - return 0 - - -@nb.jit(nopython=True) -def fillContents(content, verts, triangles, numTris): - v = np.empty(12, np.float32) - pad = np.zeros(2, np.int8) - offset = 0 - for i in range(numTris): - t0 = triangles[3*i+0] - t1 = triangles[3*i+1] - t2 = triangles[3*i+2] - v[3*0+0] = 0 - v[3*0+1] = 0 - v[3*0+2] = 0 - v[3*1+0] = verts[3*t0+0] - v[3*1+1] = verts[3*t0+1] - v[3*1+2] = verts[3*t0+2] - v[3*2+0] = verts[3*t1+0] - v[3*2+1] = verts[3*t1+1] - v[3*2+2] = verts[3*t1+2] - v[3*3+0] = verts[3*t2+0] - v[3*3+1] = verts[3*t2+1] - v[3*3+2] = verts[3*t2+2] - - offset = 50*i - content[offset:offset+48] = v.view(np.uint8) - content[offset+48:offset+50] = pad - - -def write(name, verts, triangles): - """ - Save a triangulated raster to a standard STL file. - Windows has a default STL viewer and probably all 3D viewers have native support for it - because of its simplicity. Can be used to verify the correctness of the algorithm or - to visualize the mesh to get a notion of the size/complexity etc. - @param name - The name of the mesh file we're going to save. Should end in .stl - @param verts - A numpy array containing all the vertices of the mesh. Format is 3 float32 per vertex (vertex buffer) - @param triangles - A numpy array containing all the triangles of the mesh. Format is 3 int32 per triangle (index buffer) - """ - ib = triangles - vb = verts - if isinstance(ib, cupy.ndarray): - ib = cupy.asnumpy(ib) - if isinstance(vb, cupy.ndarray): - vb = cupy.asnumpy(vb) - - header = np.zeros(80, np.uint8) - nf = np.empty(1, np.uint32) - numTris = triangles.shape[0] // 3 - nf[0] = numTris - f = open(name, 'wb') - f.write(header) - f.write(nf) - - # size of 1 triangle in STL is 50 bytes - # 12 floats (each 4 bytes) for a total of 48 - # And additional 2 bytes for padding - content = np.empty(numTris*(50), np.uint8) - fillContents(content, vb, ib, numTris) - f.write(content) - f.close() diff --git a/examples/viewshed.py b/examples/viewshed.py deleted file mode 100644 index 986c1b9..0000000 --- a/examples/viewshed.py +++ /dev/null @@ -1,222 +0,0 @@ -from numba import cuda -import numpy as np -import numba as nb - -import cupy -import xarray as xr -from rtxpy import RTX -import math - -from typing import Union - -from scipy.spatial.transform import Rotation as R - -from cuda_utils import * -import mesh_utils - -# view options default values -OBS_ELEV = 0 -TARGET_ELEV = 0 - -# if a cell is invisible, its value is set to -1 -INVISIBLE = -1 - -@cuda.jit -def _generatePrimaryRays(data, x_coords, y_coords, H, W): - i, j = cuda.grid(2) - if i>=0 and i < H and j>=0 and j < W: -# if (j == W-1): -# data[i,j,0] = y_coords[j] - 1e-6 -# else: -# data[i,j,0] = y_coords[j] + 1e-6 -# -# if (i == H-1): -# data[i,j,1] = x_coords[i] - 1e-6 -# else: -# data[i,j,1] = x_coords[i] + 1e-6 - - if (j == W-1): - data[i,j,0] = j - 1e-3 - else: - data[i,j,0] = j + 1e-3 - - if (i == H-1): - data[i,j,1] = i - 1e-3 - else: - data[i,j,1] = i + 1e-3 - - data[i,j,2] = 10000 # Location of the camera (height) - data[i,j,3] = 1e-3 - data[i,j,4] = 0 - data[i,j,5] = 0 - data[i,j,6] = -1 - data[i,j,7] = np.inf - -def generatePrimaryRays(rays, x_coords, y_coords, H, W): - griddim, blockdim = calc_dims((H, W)) - d_y_coords = cupy.array(y_coords) - d_x_coords = cupy.array(x_coords) - _generatePrimaryRays[griddim, blockdim](rays, d_x_coords, d_y_coords, H, W) - return 0 - -@cuda.jit -def _generateViewshedRays(camRays, hits, vsrays, visibility_grid, H, W, vp): - i, j = cuda.grid(2) - if i>=0 and i < H and j>=0 and j < W: - elevationOffset = vp[2] - targetElevation = vp[3] - dist = hits[i,j,0] # distance to surface from camera - norm = make_float3(hits[i,j], 1) # normal vector at intersection with surface - #inorm = norm - if (norm[2] < 0): # if back hit, face forward - norm = invert(norm) - cameraRay = camRays[i,j] - rayOrigin = make_float3(cameraRay, 0) # get the camera ray origin - rayDir = make_float3(cameraRay, 4) # get the camera ray direction - hitP = add(rayOrigin, mul(rayDir,dist)) # calculate intersection point - newOrigin = add(hitP, mul(norm, 1e-3)) # generate new ray origin, and a little offset to avoid self-intersection - newOrigin = add(newOrigin, float3(0, 0, targetElevation)) # move the new origin up by the selected by user targetElevation factor - - w = int(vp[0]) - h = int(vp[1]) - viewshedRay = camRays[h,w] # get the camera ray that was cast for the location of the viewshed origin - dist = hits[h,w,0] # get the distance from the camera to the viewshed point - rayOrigin = make_float3(viewshedRay, 0) # get the origin on the camera of the ray towards VP point - rayDir = make_float3(viewshedRay, 4) # get the direction from camera to VP point - hitP = add(rayOrigin, mul(rayDir,dist)) # calculate distance from camera to VP - viewshedPoint = add(hitP, float3(0, 0, elevationOffset)) # calculate the VP location on the surface and add the VP offset - - newDir = diff(viewshedPoint, newOrigin) # calculate vector from SurfaceHit to VP - length = math.sqrt(dot(newDir, newDir)) # calculate distance from surface to VP - newDir = mul(newDir, 1/length) # normalize the direction (vector v) - - cosine = dot(norm, newDir) # cosine of the angle between n and v - theta = math.acos(cosine) # Cosine angle in radians - theta = (180*theta)/math.pi # Cosine angle in degrees - - # prepare a viewshed ray to cast to determine visibility - vsray = vsrays[i,j] - vsray[0] = newOrigin[0] - vsray[1] = newOrigin[1] - vsray[2] = newOrigin[2] - vsray[3] = 0 - vsray[4] = newDir[0] - vsray[5] = newDir[1] - vsray[6] = newDir[2] - vsray[7] = length - - visibility_grid[i,j] = theta - -def generateViewshedRays(rays, hits, vsrays, visibility_grid, H, W, vp): - griddim, blockdim = calc_dims((H, W)) - _generateViewshedRays[griddim, blockdim](rays, hits, vsrays, visibility_grid, H, W, vp) - return 0 - -@cuda.jit -def _calcViewshed(hits, visibility_grid, H, W): - i, j = cuda.grid(2) - if i>=0 and i < H and j>=0 and j < W: - dist = hits[i,j,0] - # We traced the viewshed rays and now hits contains the intersection data - # if dist > 0, then we were able to hit something along the length of the ray - # which means that the pixel we targeted is not directly visible from the view point - if dist>=0: - visibility_grid[i,j] = INVISIBLE - -def calcViewshed(hits, visibility_grid, H, W): - griddim, blockdim = calc_dims((H, W)) - _calcViewshed[griddim, blockdim](hits, visibility_grid, H, W) - return 0 - -def viewshed_rt(raster: xr.DataArray, - optix: RTX, - x: Union[int, float], - y: Union[int, float], - observer_elev: float = OBS_ELEV, - target_elev: float = TARGET_ELEV) -> xr.DataArray: - - H,W = raster.shape - - y_coords = raster.indexes.get('y').values - x_coords = raster.indexes.get('x').values - - # validate x arg - if x < x_coords.min(): - raise ValueError("x argument outside of raster x_range") - elif x > x_coords.max(): - raise ValueError("x argument outside of raster x_range") - - # validate y arg - if y < y_coords.min(): - raise ValueError("y argument outside of raster y_range") - elif y > y_coords.max(): - raise ValueError("y argument outside of raster y_range") - - selection = raster.sel(x=[x], y=[y], method='nearest') - x = selection.x.values[0] - y = selection.y.values[0] - - y_view = np.where(y_coords == y)[0][0] - x_view = np.where(x_coords == x)[0][0] - - # Device buffers - d_rays = cupy.empty((H,W,8), np.float32) - d_hits = cupy.empty((H,W,4), np.float32) - d_visgrid = cupy.empty((H,W), np.float32) - d_vsrays = cupy.empty((H,W,8), np.float32) - - generatePrimaryRays(d_rays, x_coords, y_coords, H, W) - device = cupy.cuda.Device(0) - device.synchronize() - res = optix.trace(d_rays, d_hits, W*H) - - generateViewshedRays(d_rays, d_hits, d_vsrays, d_visgrid, H, W, (x_view, y_view, observer_elev, target_elev)) - device.synchronize() - res = optix.trace(d_vsrays, d_hits, W*H) - - calcViewshed(d_hits, d_visgrid, H, W) - - if isinstance(raster.data, np.ndarray): - visgrid = cupy.asnumpy(d_visgrid) - else: - visgrid = d_visgrid - - view = xr.DataArray(visgrid, - name="viewshed", - coords=raster.coords, - dims=raster.dims, - attrs=raster.attrs) - return view - -def viewshed_gpu(raster: xr.DataArray, - x: Union[int, float], - y: Union[int, float], - observer_elev: float = OBS_ELEV, - target_elev: float = TARGET_ELEV) -> xr.DataArray: - if not isinstance(raster.data, cupy.ndarray): - raise ValueError("raster.data must be a cupy array") - - H,W = raster.shape - optix = RTX() - - datahash = np.uint64(hash(str(raster.data.get())) % (1 << 64)) - optixhash = np.uint64(optix.getHash()) - if (optixhash != datahash): - numTris = (H - 1) * (W - 1) * 2 - verts = cupy.empty(H * W * 3, np.float32) - triangles = cupy.empty(numTris * 3, np.int32) - - # Generate a mesh from the terrain (buffers are on the GPU, so generation happens also on GPU) - res = mesh_utils.triangulateTerrain(verts, triangles, raster) - if res: - raise ValueError("Failed to generate mesh from terrain. Error code:{}".format(res)) - res = optix.build(datahash, verts, triangles) - if res: - raise ValueError("OptiX failed to build GAS with error code:{}".format(res)) - #Clear some GPU memory that we no longer need - verts = None - triangles = None - cupy.get_default_memory_pool().free_all_blocks() - - view = viewshed_rt(raster, optix, x, y, observer_elev, target_elev) - return view diff --git a/rtxpy/rtx.py b/rtxpy/rtx.py index 73978ea..8a866ee 100644 --- a/rtxpy/rtx.py +++ b/rtxpy/rtx.py @@ -322,7 +322,7 @@ def _init_optix(device: Optional[int] = None): pipeline_options = optix.PipelineCompileOptions( usesMotionBlur=False, traversableGraphFlags=optix.TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY, - numPayloadValues=4, + numPayloadValues=6, # t, nx, ny, nz, primitive_id, instance_id numAttributeValues=2, exceptionFlags=optix.EXCEPTION_FLAG_NONE, pipelineLaunchParamsVariableName="params", @@ -403,7 +403,7 @@ def _init_optix(device: Optional[int] = None): _create_sbt() # Allocate params buffer (24 bytes: handle(8) + rays_ptr(8) + hits_ptr(8)) - _state.d_params = cupy.zeros(24, dtype=cupy.uint8) + _state.d_params = cupy.zeros(40, dtype=cupy.uint8) # 5 pointers * 8 bytes _state.initialized = True atexit.register(_cleanup_at_exit) @@ -771,7 +771,7 @@ def _build_accel(hash_value: int, vertices, indices) -> int: # Ray tracing # ----------------------------------------------------------------------------- -def _trace_rays(rays, hits, num_rays: int) -> int: +def _trace_rays(rays, hits, num_rays: int, primitive_ids=None, instance_ids=None) -> int: """ Trace rays against the current acceleration structure. @@ -782,6 +782,11 @@ def _trace_rays(rays, hits, num_rays: int) -> int: rays: Ray buffer (Nx8 float32: ox,oy,oz,tmin,dx,dy,dz,tmax) hits: Hit buffer (Nx4 float32: t,nx,ny,nz) num_rays: Number of rays to trace + primitive_ids: Optional output buffer (Nx1 int32) for triangle indices. + -1 indicates a miss. + instance_ids: Optional output buffer (Nx1 int32) for geometry/instance indices. + -1 indicates a miss. Useful in multi-GAS mode to identify + which geometry was hit. Returns: 0 on success, non-zero on error @@ -808,6 +813,12 @@ def _trace_rays(rays, hits, num_rays: int) -> int: if rays.size != num_rays * 8 or hits.size != num_rays * 4: return -1 + # Validate optional buffers + if primitive_ids is not None and primitive_ids.size != num_rays: + return -1 + if instance_ids is not None and instance_ids.size != num_rays: + return -1 + # Ensure rays are on GPU if isinstance(rays, cupy.ndarray): d_rays = rays @@ -835,12 +846,38 @@ def _trace_rays(rays, hits, num_rays: int) -> int: d_hits = _state.d_hits hits_on_host = True - # Pack params: handle(8 bytes) + rays_ptr(8 bytes) + hits_ptr(8 bytes) + # Handle optional primitive_ids buffer + d_prim_ids_ptr = 0 + prim_ids_on_host = False + if primitive_ids is not None: + if isinstance(primitive_ids, cupy.ndarray): + d_prim_ids = primitive_ids + prim_ids_on_host = False + else: + d_prim_ids = cupy.zeros(num_rays, dtype=cupy.int32) + prim_ids_on_host = True + d_prim_ids_ptr = d_prim_ids.data.ptr + + # Handle optional instance_ids buffer + d_inst_ids_ptr = 0 + inst_ids_on_host = False + if instance_ids is not None: + if isinstance(instance_ids, cupy.ndarray): + d_inst_ids = instance_ids + inst_ids_on_host = False + else: + d_inst_ids = cupy.zeros(num_rays, dtype=cupy.int32) + inst_ids_on_host = True + d_inst_ids_ptr = d_inst_ids.data.ptr + + # Pack params: handle(8) + rays_ptr(8) + hits_ptr(8) + prim_ids_ptr(8) + inst_ids_ptr(8) params_data = struct.pack( - 'QQQ', + 'QQQQQ', trace_handle, d_rays.data.ptr, d_hits.data.ptr, + d_prim_ids_ptr, + d_inst_ids_ptr, ) _state.d_params[:] = cupy.frombuffer(np.frombuffer(params_data, dtype=np.uint8), dtype=cupy.uint8) @@ -849,17 +886,23 @@ def _trace_rays(rays, hits, num_rays: int) -> int: _state.pipeline, 0, # stream _state.d_params.data.ptr, - 24, # sizeof(Params) + 40, # sizeof(Params): 5 pointers * 8 bytes _state.sbt, num_rays, # width 1, # height 1, # depth ) - # Copy results back if hits was on host - if hits_on_host: + # Copy results back if buffers were on host + if hits_on_host or prim_ids_on_host or inst_ids_on_host: cupy.cuda.Stream.null.synchronize() + + if hits_on_host: hits[:] = d_hits.get() + if prim_ids_on_host: + primitive_ids[:] = d_prim_ids.get() + if inst_ids_on_host: + instance_ids[:] = d_inst_ids.get() return 0 @@ -936,7 +979,7 @@ def getHash(self) -> int: """ return _state.current_hash - def trace(self, rays, hits, numRays: int) -> int: + def trace(self, rays, hits, numRays: int, primitive_ids=None, instance_ids=None) -> int: """ Trace rays against the current acceleration structure. @@ -948,11 +991,17 @@ def trace(self, rays, hits, numRays: int) -> int: hits: Hit buffer (4 float32 per hit: t,nx,ny,nz) t=-1 indicates a miss numRays: Number of rays to trace + primitive_ids: Optional output buffer (numRays x int32) for triangle indices. + Will contain the index of the hit triangle within its geometry, + or -1 for rays that missed. + instance_ids: Optional output buffer (numRays x int32) for geometry/instance indices. + Will contain the instance ID of the hit geometry, or -1 for misses. + Useful in multi-GAS mode to identify which geometry was hit. Returns: 0 on success, non-zero on error """ - return _trace_rays(rays, hits, numRays) + return _trace_rays(rays, hits, numRays, primitive_ids, instance_ids) # ------------------------------------------------------------------------- # Multi-GAS API diff --git a/rtxpy/tests/test_simple.py b/rtxpy/tests/test_simple.py index 42b4ff0..6dfb463 100644 --- a/rtxpy/tests/test_simple.py +++ b/rtxpy/tests/test_simple.py @@ -927,3 +927,252 @@ def test_cupy_buffers_multi_gas(test_cupy): hits_np = hits np.testing.assert_almost_equal(hits_np[0], 10.0, decimal=1) + + +# ============================================================================= +# Primitive ID and Instance ID Tests +# ============================================================================= + +@pytest.mark.parametrize("test_cupy", [False, True]) +def test_primitive_ids_single_gas(test_cupy): + """Test primitive_ids returns correct triangle indices in single-GAS mode.""" + if test_cupy: + if not has_cupy: + pytest.skip("cupy not available") + import cupy + backend = cupy + else: + backend = np + + rtx = RTX() + rtx.clear_scene() + + # Create a mesh with 2 triangles (a quad) + # Triangle 0: vertices 0,1,2 (bottom-left triangle) + # Triangle 1: vertices 2,1,3 (top-right triangle) + verts = backend.float32([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0]) + triangles = backend.int32([0, 1, 2, 2, 1, 3]) + + res = rtx.build(0, verts, triangles) + assert res == 0 + + # Ray hitting triangle 0 (bottom-left area) + rays = backend.float32([0.25, 0.25, 10, 0, 0, 0, -1, 1000]) + hits = backend.float32([0, 0, 0, 0]) + prim_ids = backend.int32([0]) + + res = rtx.trace(rays, hits, 1, primitive_ids=prim_ids) + assert res == 0 + assert float(hits[0]) > 0 # Should hit + assert int(prim_ids[0]) == 0 # Should hit triangle 0 + + # Ray hitting triangle 1 (top-right area) + rays2 = backend.float32([0.75, 0.75, 10, 0, 0, 0, -1, 1000]) + hits2 = backend.float32([0, 0, 0, 0]) + prim_ids2 = backend.int32([0]) + + res = rtx.trace(rays2, hits2, 1, primitive_ids=prim_ids2) + assert res == 0 + assert float(hits2[0]) > 0 # Should hit + assert int(prim_ids2[0]) == 1 # Should hit triangle 1 + + +@pytest.mark.parametrize("test_cupy", [False, True]) +def test_primitive_ids_miss(test_cupy): + """Test primitive_ids returns -1 for misses.""" + if test_cupy: + if not has_cupy: + pytest.skip("cupy not available") + import cupy + backend = cupy + else: + backend = np + + rtx = RTX() + rtx.clear_scene() + + verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0]) + triangles = backend.int32([0, 1, 2]) + + res = rtx.build(0, verts, triangles) + assert res == 0 + + # Ray that misses + rays = backend.float32([100, 100, 10, 0, 0, 0, -1, 1000]) + hits = backend.float32([0, 0, 0, 0]) + prim_ids = backend.int32([999]) # Initialize with non-zero to verify it's set + + res = rtx.trace(rays, hits, 1, primitive_ids=prim_ids) + assert res == 0 + assert float(hits[0]) == -1.0 # Miss + assert int(prim_ids[0]) == -1 # primitive_id should be -1 for miss + + +@pytest.mark.parametrize("test_cupy", [False, True]) +def test_instance_ids_multi_gas(test_cupy): + """Test instance_ids returns correct geometry indices in multi-GAS mode.""" + if test_cupy: + if not has_cupy: + pytest.skip("cupy not available") + import cupy + backend = cupy + else: + backend = np + + rtx = RTX() + rtx.clear_scene() + + # Small triangle + verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0]) + tris = backend.int32([0, 1, 2]) + + # Add 3 geometries at different X positions + rtx.add_geometry("mesh0", verts, tris, transform=[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]) + rtx.add_geometry("mesh1", verts, tris, transform=[1, 0, 0, 5, 0, 1, 0, 0, 0, 0, 1, 0]) + rtx.add_geometry("mesh2", verts, tris, transform=[1, 0, 0, 10, 0, 1, 0, 0, 0, 0, 1, 0]) + + # Ray hitting mesh0 (at x=0.5) + rays0 = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000]) + hits0 = backend.float32([0, 0, 0, 0]) + prim_ids0 = backend.int32([0]) + inst_ids0 = backend.int32([0]) + + res = rtx.trace(rays0, hits0, 1, primitive_ids=prim_ids0, instance_ids=inst_ids0) + assert res == 0 + assert float(hits0[0]) > 0 + assert int(inst_ids0[0]) == 0 # Should hit mesh0 + + # Ray hitting mesh1 (at x=5.5) + rays1 = backend.float32([5.5, 0.33, 10, 0, 0, 0, -1, 1000]) + hits1 = backend.float32([0, 0, 0, 0]) + prim_ids1 = backend.int32([0]) + inst_ids1 = backend.int32([0]) + + res = rtx.trace(rays1, hits1, 1, primitive_ids=prim_ids1, instance_ids=inst_ids1) + assert res == 0 + assert float(hits1[0]) > 0 + assert int(inst_ids1[0]) == 1 # Should hit mesh1 + + # Ray hitting mesh2 (at x=10.5) + rays2 = backend.float32([10.5, 0.33, 10, 0, 0, 0, -1, 1000]) + hits2 = backend.float32([0, 0, 0, 0]) + prim_ids2 = backend.int32([0]) + inst_ids2 = backend.int32([0]) + + res = rtx.trace(rays2, hits2, 1, primitive_ids=prim_ids2, instance_ids=inst_ids2) + assert res == 0 + assert float(hits2[0]) > 0 + assert int(inst_ids2[0]) == 2 # Should hit mesh2 + + +@pytest.mark.parametrize("test_cupy", [False, True]) +def test_instance_ids_miss(test_cupy): + """Test instance_ids returns -1 for misses in multi-GAS mode.""" + if test_cupy: + if not has_cupy: + pytest.skip("cupy not available") + import cupy + backend = cupy + else: + backend = np + + rtx = RTX() + rtx.clear_scene() + + verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0]) + tris = backend.int32([0, 1, 2]) + + rtx.add_geometry("mesh", verts, tris) + + # Ray that misses + rays = backend.float32([100, 100, 10, 0, 0, 0, -1, 1000]) + hits = backend.float32([0, 0, 0, 0]) + prim_ids = backend.int32([999]) + inst_ids = backend.int32([999]) + + res = rtx.trace(rays, hits, 1, primitive_ids=prim_ids, instance_ids=inst_ids) + assert res == 0 + assert float(hits[0]) == -1.0 # Miss + assert int(prim_ids[0]) == -1 + assert int(inst_ids[0]) == -1 + + +@pytest.mark.parametrize("test_cupy", [False, True]) +def test_primitive_ids_multiple_rays(test_cupy): + """Test primitive_ids with multiple rays hitting different triangles.""" + if test_cupy: + if not has_cupy: + pytest.skip("cupy not available") + import cupy + backend = cupy + else: + backend = np + + rtx = RTX() + rtx.clear_scene() + + # Create a mesh with 2 triangles + verts = backend.float32([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0]) + triangles = backend.int32([0, 1, 2, 2, 1, 3]) + + res = rtx.build(0, verts, triangles) + assert res == 0 + + # 3 rays: one hitting triangle 0, one hitting triangle 1, one missing + rays = backend.float32([ + 0.25, 0.25, 10, 0, 0, 0, -1, 1000, # hits triangle 0 + 0.75, 0.75, 10, 0, 0, 0, -1, 1000, # hits triangle 1 + 100, 100, 10, 0, 0, 0, -1, 1000, # misses + ]) + hits = backend.float32([0] * 12) # 3 rays * 4 floats + prim_ids = backend.int32([0, 0, 0]) + + res = rtx.trace(rays, hits, 3, primitive_ids=prim_ids) + assert res == 0 + + # Check results + assert int(prim_ids[0]) == 0 # First ray hits triangle 0 + assert int(prim_ids[1]) == 1 # Second ray hits triangle 1 + assert int(prim_ids[2]) == -1 # Third ray misses + + +@pytest.mark.parametrize("test_cupy", [False, True]) +def test_primitive_and_instance_ids_optional(test_cupy): + """Test that primitive_ids and instance_ids are truly optional.""" + if test_cupy: + if not has_cupy: + pytest.skip("cupy not available") + import cupy + backend = cupy + else: + backend = np + + rtx = RTX() + rtx.clear_scene() + + verts = backend.float32([0, 0, 0, 1, 0, 0, 0.5, 1, 0]) + triangles = backend.int32([0, 1, 2]) + + res = rtx.build(0, verts, triangles) + assert res == 0 + + rays = backend.float32([0.5, 0.33, 10, 0, 0, 0, -1, 1000]) + hits = backend.float32([0, 0, 0, 0]) + + # Should work without any optional params + res = rtx.trace(rays, hits, 1) + assert res == 0 + np.testing.assert_almost_equal(float(hits[0]), 10.0, decimal=1) + + # Should work with only primitive_ids + hits = backend.float32([0, 0, 0, 0]) + prim_ids = backend.int32([0]) + res = rtx.trace(rays, hits, 1, primitive_ids=prim_ids) + assert res == 0 + assert int(prim_ids[0]) == 0 + + # Should work with only instance_ids + hits = backend.float32([0, 0, 0, 0]) + inst_ids = backend.int32([0]) + res = rtx.trace(rays, hits, 1, instance_ids=inst_ids) + assert res == 0