diff --git a/pyproject.toml b/pyproject.toml index b1e1b8d..449a7c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "rtxpy" -version = "0.0.4" +version = "0.0.5" description = "Ray tracing using CUDA accessible from Python" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.10" @@ -36,6 +36,7 @@ dependencies = [ # Note: otk-pyoptix must be installed separately from NVIDIA # See: https://github.com/NVIDIA/otk-pyoptix tests = ["pytest"] +terrain = ["numba", "xarray", "scipy"] [project.urls] Homepage = "https://github.com/makepath/rtxpy" diff --git a/rtxpy/__init__.py b/rtxpy/__init__.py index b7083aa..df4a355 100644 --- a/rtxpy/__init__.py +++ b/rtxpy/__init__.py @@ -1,11 +1,6 @@ -from .rtx import ( - RTX, - has_cupy, - get_device_count, - get_device_properties, - list_devices, - get_current_device, -) +from .rtx import RTX, has_cupy +from . import mesh +from . import viewshed +from . import hillshade - -__version__ = "0.0.4" +__version__ = "0.0.5" diff --git a/rtxpy/_cuda_utils.py b/rtxpy/_cuda_utils.py new file mode 100644 index 0000000..2ee3cc6 --- /dev/null +++ b/rtxpy/_cuda_utils.py @@ -0,0 +1,64 @@ +""" +Internal CUDA utility functions for rtxpy terrain analysis. + +This module provides low-level CUDA device functions for vector math operations +used by viewshed and hillshade computations. +""" + +from numba import cuda +import numpy as np + + +def calc_dims(shape): + """Calculate CUDA grid and block dimensions for a 2D array.""" + threadsperblock = (32, 32) + blockspergrid = ( + (shape[0] + (threadsperblock[0] - 1)) // threadsperblock[0], + (shape[1] + (threadsperblock[1] - 1)) // threadsperblock[1] + ) + return blockspergrid, threadsperblock + + +@cuda.jit(device=True) +def add(a, b): + return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2]) + + +@cuda.jit(device=True) +def diff(a, b): + return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2]) + + +@cuda.jit(device=True) +def mul(a, b): + return float3(a[0]*b, a[1]*b, a[2]*b) + + +@cuda.jit(device=True) +def multColor(a, b): + return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2]) + + +@cuda.jit(device=True) +def dot(a, b): + return a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + + +@cuda.jit(device=True) +def mix(a, b, k): + return add(mul(a, k), mul(b, 1-k)) + + +@cuda.jit(device=True) +def make_float3(a, offset): + return float3(a[offset], a[offset+1], a[offset+2]) + + +@cuda.jit(device=True) +def invert(a): + return float3(-a[0], -a[1], -a[2]) + + +@cuda.jit(device=True) +def float3(a, b, c): + return (np.float32(a), np.float32(b), np.float32(c)) diff --git a/rtxpy/hillshade.py b/rtxpy/hillshade.py new file mode 100644 index 0000000..9eb7e85 --- /dev/null +++ b/rtxpy/hillshade.py @@ -0,0 +1,290 @@ +""" +GPU-accelerated hillshade rendering using ray tracing. + +This module provides functions for computing hillshade (terrain shading) +on raster data using NVIDIA OptiX ray tracing for optional shadow casting. + +Functions +--------- +hillshade + Compute hillshade with optional ray-traced shadows. +hillshade_with_context + Compute hillshade using an existing RTX context. +""" + +import numpy as np +from numba import cuda +from typing import Optional + +import cupy +import xarray as xr +from scipy.spatial.transform import Rotation as R + +from .rtx import RTX +from ._cuda_utils import calc_dims, make_float3, add, mul, dot, invert, float3 +from . import mesh + + +@cuda.jit +def _generatePrimaryRays(data, x_coords, y_coords, H, W): + """ + Generate orthographic camera rays looking straight down at the terrain. + """ + i, j = cuda.grid(2) + if i >= 0 and i < H and j >= 0 and j < W: + if (j == W-1): + data[i, j, 0] = j - 1e-3 + else: + data[i, j, 0] = j + 1e-3 + + if (i == H-1): + data[i, j, 1] = i - 1e-3 + else: + data[i, j, 1] = i + 1e-3 + + data[i, j, 2] = 10000 # Camera height + data[i, j, 3] = 1e-3 + data[i, j, 4] = 0 + data[i, j, 5] = 0 + data[i, j, 6] = -1 + data[i, j, 7] = np.inf + + +def _generatePrimaryRaysWrapper(rays, x_coords, y_coords, H, W): + griddim, blockdim = calc_dims((H, W)) + _generatePrimaryRays[griddim, blockdim](rays, x_coords, y_coords, H, W) + return 0 + + +@cuda.jit +def _generateShadowRays(rays, hits, normals, H, W, sunDir): + """ + Generate shadow rays from surface intersection points toward the sun. + """ + i, j = cuda.grid(2) + if i >= 0 and i < H and j >= 0 and j < W: + dist = hits[i, j, 0] + norm = make_float3(hits[i, j], 1) + if (norm[2] < 0): + norm = invert(norm) + ray = rays[i, j] + rayOrigin = make_float3(ray, 0) + rayDir = make_float3(ray, 4) + p = add(rayOrigin, mul(rayDir, dist)) + + newOrigin = add(p, mul(norm, 1e-3)) + ray[0] = newOrigin[0] + ray[1] = newOrigin[1] + ray[2] = newOrigin[2] + ray[3] = 1e-3 + ray[4] = sunDir[0] + ray[5] = sunDir[1] + ray[6] = sunDir[2] + ray[7] = np.inf if dist > 0 else 0 + + normals[i, j, 0] = norm[0] + normals[i, j, 1] = norm[1] + normals[i, j, 2] = norm[2] + + +def _generateShadowRaysWrapper(rays, hits, normals, H, W, sunDir): + griddim, blockdim = calc_dims((H, W)) + _generateShadowRays[griddim, blockdim](rays, hits, normals, H, W, sunDir) + return 0 + + +@cuda.jit +def _shadeLambert(hits, normals, output, H, W, sunDir, castShadows): + """ + Apply Lambertian shading with optional shadow darkening. + """ + i, j = cuda.grid(2) + if i >= 0 and i < H and j >= 0 and j < W: + norm = make_float3(normals[i, j], 0) + light_dir = make_float3(sunDir, 0) + cos_theta = dot(light_dir, norm) + + temp = (cos_theta + 1) / 2 + + if castShadows and hits[i, j, 0] >= 0: + temp = temp / 2 + + if temp > 1: + temp = 1 + elif temp < 0: + temp = 0 + + output[i, j] = temp + + +def _shadeLambertWrapper(hits, normals, output, H, W, sunDir, castShadows): + griddim, blockdim = calc_dims((H, W)) + _shadeLambert[griddim, blockdim](hits, normals, output, H, W, sunDir, castShadows) + return 0 + + +def _getSunDir(angle_altitude, azimuth): + """ + Calculate the vector towards the sun based on altitude angle and azimuth. + """ + north = (0, 1, 0) + rx = R.from_euler('x', angle_altitude, degrees=True) + rz = R.from_euler('z', azimuth+180, degrees=True) + sunDir = rx.apply(north) + sunDir = rz.apply(sunDir) + return sunDir + + +def hillshade_with_context(raster: xr.DataArray, + rtx: RTX, + shadows: bool = False, + azimuth: int = 225, + angle_altitude: int = 25, + name: Optional[str] = 'hillshade') -> xr.DataArray: + """ + Compute hillshade using an existing RTX context. + + This function performs hillshade rendering with optional ray-traced shadows + using an existing RTX context that already has terrain geometry built. + + Parameters + ---------- + raster : xr.DataArray + Input terrain raster with x and y coordinates. + rtx : RTX + Pre-configured RTX instance with terrain geometry already built. + shadows : bool, optional + Whether to compute ray-traced shadows. Default is False. + azimuth : int, optional + Sun azimuth angle in degrees (0=North, 90=East). Default is 225 (SW). + angle_altitude : int, optional + Sun altitude angle in degrees above horizon. Default is 25. + name : str, optional + Name for the output DataArray. Default is 'hillshade'. + + Returns + ------- + xr.DataArray + Hillshade result with values from 0 (dark) to 1 (bright). + """ + H, W = raster.shape + sunDir = cupy.array(_getSunDir(angle_altitude, azimuth)) + + # Device buffers + d_rays = cupy.empty((H, W, 8), np.float32) + d_hits = cupy.empty((H, W, 4), np.float32) + d_aux = cupy.empty((H, W, 3), np.float32) + d_output = cupy.empty((H, W), np.float32) + + y_coords = cupy.array(raster.indexes.get('y').values) + x_coords = cupy.array(raster.indexes.get('x').values) + + _generatePrimaryRaysWrapper(d_rays, x_coords, y_coords, H, W) + device = cupy.cuda.Device(0) + device.synchronize() + rtx.trace(d_rays, d_hits, W*H) + + _generateShadowRaysWrapper(d_rays, d_hits, d_aux, H, W, sunDir) + if shadows: + device.synchronize() + rtx.trace(d_rays, d_hits, W*H) + + _shadeLambertWrapper(d_hits, d_aux, d_output, H, W, sunDir, shadows) + + if isinstance(raster.data, np.ndarray): + output = cupy.asnumpy(d_output[:, :]) + nanValue = np.nan + else: + output = d_output[:, :] + nanValue = cupy.nan + + output[0, :] = nanValue + output[-1, :] = nanValue + output[:, 0] = nanValue + output[:, -1] = nanValue + + hill = xr.DataArray(output, + name=name, + coords=raster.coords, + dims=raster.dims, + attrs=raster.attrs) + return hill + + +def hillshade(raster: xr.DataArray, + shadows: bool = False, + azimuth: int = 225, + angle_altitude: int = 25, + name: Optional[str] = 'hillshade') -> xr.DataArray: + """ + Compute hillshade with optional ray-traced shadows. + + This function performs hillshade (terrain shading) rendering on a raster + DEM. It automatically builds the necessary ray tracing acceleration + structure if needed. + + Parameters + ---------- + raster : xr.DataArray + Input terrain raster as an xarray DataArray with x and y coordinates. + Should be a cupy array for best performance. + shadows : bool, optional + Whether to compute ray-traced shadows. When True, areas occluded from + the sun are darkened. Default is False. + azimuth : int, optional + Sun azimuth angle in degrees (0=North, 90=East, 180=South, 270=West). + Default is 225 (Southwest). + angle_altitude : int, optional + Sun altitude angle in degrees above the horizon. Default is 25. + name : str, optional + Name for the output DataArray. Default is 'hillshade'. + + Returns + ------- + xr.DataArray + Hillshade result with values from 0 (dark) to 1 (bright). + Edge pixels are set to NaN. + + Raises + ------ + ValueError + If mesh generation or OptiX build fails. + + Examples + -------- + >>> import rtxpy + >>> result = rtxpy.hillshade.hillshade(terrain_raster, shadows=True, azimuth=315) + """ + if not isinstance(raster.data, cupy.ndarray): + print("WARNING: raster.data is not a cupy array. Additional overhead will be incurred") + H, W = raster.data.squeeze().shape + rtx = RTX() + + datahash = np.uint64(hash(str(raster.data.get())) % (1 << 64)) + optixhash = np.uint64(rtx.getHash()) + if (optixhash != datahash): + numTris = (H - 1) * (W - 1) * 2 + verts = cupy.empty(H * W * 3, np.float32) + triangles = cupy.empty(numTris * 3, np.int32) + + # Generate mesh from terrain + res = mesh.triangulate_terrain(verts, triangles, raster) + if res: + raise ValueError("Failed to generate mesh from terrain. Error code:{}".format(res)) + res = rtx.build(datahash, verts, triangles) + if res: + raise ValueError("OptiX failed to build GAS with error code:{}".format(res)) + # Clear GPU memory no longer needed + verts = None + triangles = None + cupy.get_default_memory_pool().free_all_blocks() + + hill = hillshade_with_context(raster, rtx, azimuth=azimuth, + angle_altitude=angle_altitude, + shadows=shadows, name=name) + return hill + + +# Backwards compatibility aliases +hillshade_gpu = hillshade +hillshade_rt = hillshade_with_context diff --git a/rtxpy/mesh.py b/rtxpy/mesh.py new file mode 100644 index 0000000..af9bb82 --- /dev/null +++ b/rtxpy/mesh.py @@ -0,0 +1,197 @@ +""" +Mesh utilities for rtxpy terrain processing. + +This module provides functions for triangulating terrain data and exporting +meshes to STL format. + +Functions +--------- +triangulate_terrain + Convert a 2D terrain array into a triangulated mesh. +write_stl + Export a triangulated mesh to STL file format. +""" + +import numba as nb +from numba import cuda +import numpy as np +import cupy + + +@cuda.jit +def _triangulateTerrain(verts, triangles, data, H, W, scale, stride): + globalId = stride + cuda.grid(1) + if globalId < W*H: + h = globalId // W + w = globalId % W + meshMapIndex = h * W + w + + val = data[h, w] + + offset = 3*meshMapIndex + verts[offset] = w # x_coords[w] # w + verts[offset+1] = h # y_coords[h] # h + verts[offset+2] = val * scale + + if w != W - 1 and h != H - 1: + offset = 6*(h * (W-1) + w) + triangles[offset+0] = np.int32(meshMapIndex + W) + triangles[offset+1] = np.int32(meshMapIndex + W + 1) + triangles[offset+2] = np.int32(meshMapIndex) + triangles[offset+3] = np.int32(meshMapIndex + W + 1) + triangles[offset+4] = np.int32(meshMapIndex + 1) + triangles[offset+5] = np.int32(meshMapIndex) + + +@nb.njit(parallel=True) +def _triangulateCPU(verts, triangles, data, H, W, scale): + for h in nb.prange(H): + for w in range(W): + meshMapIndex = h * W + w + + val = data[h, w] + + offset = 3*meshMapIndex + verts[offset] = w # x_coords[w] # w + verts[offset+1] = h # y_coords[h] # h + verts[offset+2] = val * scale + + if w != W - 1 and h != H - 1: + offset = 6*(h * (W-1) + w) + triangles[offset+0] = np.int32(meshMapIndex + W) + triangles[offset+1] = np.int32(meshMapIndex + W + 1) + triangles[offset+2] = np.int32(meshMapIndex) + triangles[offset+3] = np.int32(meshMapIndex + W+1) + triangles[offset+4] = np.int32(meshMapIndex + 1) + triangles[offset+5] = np.int32(meshMapIndex) + + +def triangulate_terrain(verts, triangles, terrain, scale=1): + """ + Convert a 2D terrain array into a triangulated mesh. + + This function populates pre-allocated vertex and triangle buffers with + mesh data generated from the terrain heightmap. The mesh is created by + generating two triangles for each cell in the terrain grid. + + Parameters + ---------- + verts : array-like + Pre-allocated buffer for vertex data. Should have shape (H * W * 3,) + where H and W are the terrain dimensions. Each vertex consists of + 3 float32 values (x, y, z). + triangles : array-like + Pre-allocated buffer for triangle indices. Should have shape + ((H-1) * (W-1) * 2 * 3,) for the index buffer. + terrain : array-like + 2D terrain heightmap with shape (H, W). Can be a numpy array, + cupy array, or xarray DataArray. + scale : float, optional + Scale factor for elevation values. Default is 1. + + Returns + ------- + int + 0 on success. + + Notes + ----- + - If terrain.data is a numpy array, uses parallel CPU processing via numba. + - If terrain.data is a cupy array, uses GPU processing via CUDA kernels. + """ + H, W = terrain.shape + if isinstance(terrain.data, np.ndarray): + _triangulateCPU(verts, triangles, terrain.data, H, W, scale) + if isinstance(terrain.data, cupy.ndarray): + jobSize = H*W + blockdim = 1024 + griddim = (jobSize + blockdim - 1) // 1024 + d = 100 + offset = 0 + while (jobSize > 0): + batch = min(d, griddim) + _triangulateTerrain[batch, blockdim](verts, triangles, + terrain.data, H, W, + scale, offset) + offset += batch*blockdim + jobSize -= batch*blockdim + return 0 + + +@nb.jit(nopython=True) +def _fillContents(content, verts, triangles, numTris): + v = np.empty(12, np.float32) + pad = np.zeros(2, np.int8) + offset = 0 + for i in range(numTris): + t0 = triangles[3*i+0] + t1 = triangles[3*i+1] + t2 = triangles[3*i+2] + v[3*0+0] = 0 + v[3*0+1] = 0 + v[3*0+2] = 0 + v[3*1+0] = verts[3*t0+0] + v[3*1+1] = verts[3*t0+1] + v[3*1+2] = verts[3*t0+2] + v[3*2+0] = verts[3*t1+0] + v[3*2+1] = verts[3*t1+1] + v[3*2+2] = verts[3*t1+2] + v[3*3+0] = verts[3*t2+0] + v[3*3+1] = verts[3*t2+1] + v[3*3+2] = verts[3*t2+2] + + offset = 50*i + content[offset:offset+48] = v.view(np.uint8) + content[offset+48:offset+50] = pad + + +def write_stl(name, verts, triangles): + """ + Save a triangulated mesh to a standard STL file. + + STL is a widely supported 3D mesh format. Windows has a default STL viewer + and most 3D applications support it natively due to its simplicity. + + Parameters + ---------- + name : str + The name of the mesh file to save. Should end in .stl + verts : array-like + Vertex buffer containing all mesh vertices. Format is 3 float32 values + per vertex (x, y, z coordinates). Can be numpy or cupy array. + triangles : array-like + Index buffer containing all mesh triangles. Format is 3 int32 values + per triangle (vertex indices). Can be numpy or cupy array. + + Notes + ----- + If input arrays are cupy arrays, they will be automatically converted + to numpy arrays for file writing. + """ + ib = triangles + vb = verts + if isinstance(ib, cupy.ndarray): + ib = cupy.asnumpy(ib) + if isinstance(vb, cupy.ndarray): + vb = cupy.asnumpy(vb) + + header = np.zeros(80, np.uint8) + nf = np.empty(1, np.uint32) + numTris = triangles.shape[0] // 3 + nf[0] = numTris + f = open(name, 'wb') + f.write(header) + f.write(nf) + + # size of 1 triangle in STL is 50 bytes + # 12 floats (each 4 bytes) for a total of 48 + # And additional 2 bytes for padding + content = np.empty(numTris*(50), np.uint8) + _fillContents(content, vb, ib, numTris) + f.write(content) + f.close() + + +# Backwards compatibility aliases +triangulateTerrain = triangulate_terrain +write = write_stl diff --git a/rtxpy/rtx.py b/rtxpy/rtx.py index 73978ea..9615f6b 100644 --- a/rtxpy/rtx.py +++ b/rtxpy/rtx.py @@ -49,7 +49,6 @@ class _OptixState: """ def __init__(self): - self.device_id = None # CUDA device ID used for this context self.context = None self.module = None self.pipeline = None @@ -84,9 +83,6 @@ def __init__(self): def cleanup(self): """Release all OptiX and CUDA resources.""" - # Reset device tracking - self.device_id = None - # Free device buffers self.d_params = None self.d_rays = None @@ -118,10 +114,6 @@ def cleanup(self): self.initialized = False - def reset_device(self): - """Reset device tracking (called during cleanup).""" - self.device_id = None - _state = _OptixState() @@ -133,106 +125,6 @@ def _cleanup_at_exit(): _state.cleanup() -# ----------------------------------------------------------------------------- -# Device utilities -# ----------------------------------------------------------------------------- - -def get_device_count() -> int: - """ - Get the number of available CUDA devices. - - Returns: - Number of CUDA-capable GPUs available. - - Example: - >>> import rtxpy - >>> rtxpy.get_device_count() - 2 - """ - return cupy.cuda.runtime.getDeviceCount() - - -def get_device_properties(device: int = 0) -> dict: - """ - Get properties of a CUDA device. - - Args: - device: Device ID (0, 1, 2, ...). Defaults to device 0. - - Returns: - Dictionary containing device properties including: - - name: Device name (e.g., "NVIDIA GeForce RTX 3090") - - compute_capability: Tuple of (major, minor) compute capability - - total_memory: Total device memory in bytes - - multiprocessor_count: Number of streaming multiprocessors - - Raises: - ValueError: If device ID is invalid. - - Example: - >>> import rtxpy - >>> props = rtxpy.get_device_properties(0) - >>> print(props['name']) - NVIDIA GeForce RTX 3090 - """ - device_count = cupy.cuda.runtime.getDeviceCount() - if device < 0 or device >= device_count: - raise ValueError( - f"Invalid device ID {device}. " - f"Available devices: 0-{device_count - 1}" - ) - - with cupy.cuda.Device(device): - props = cupy.cuda.runtime.getDeviceProperties(device) - - return { - 'name': props['name'].decode('utf-8') if isinstance(props['name'], bytes) else props['name'], - 'compute_capability': (props['major'], props['minor']), - 'total_memory': props['totalGlobalMem'], - 'multiprocessor_count': props['multiProcessorCount'], - } - - -def list_devices() -> list: - """ - List all available CUDA devices with their properties. - - Returns: - List of dictionaries, each containing device properties. - Each dict includes 'id' (device index) plus all properties - from get_device_properties(). - - Example: - >>> import rtxpy - >>> for dev in rtxpy.list_devices(): - ... print(f"GPU {dev['id']}: {dev['name']}") - GPU 0: NVIDIA GeForce RTX 3090 - GPU 1: NVIDIA GeForce RTX 2080 - """ - devices = [] - for i in range(get_device_count()): - props = get_device_properties(i) - props['id'] = i - devices.append(props) - return devices - - -def get_current_device() -> Optional[int]: - """ - Get the CUDA device ID that RTX is currently using. - - Returns: - Device ID if RTX has been initialized, None otherwise. - - Example: - >>> import rtxpy - >>> rtx = rtxpy.RTX(device=1) - >>> rtxpy.get_current_device() - 1 - """ - return _state.device_id if _state.initialized else None - - # ----------------------------------------------------------------------------- # PTX loading # ----------------------------------------------------------------------------- @@ -265,43 +157,13 @@ def _log_callback(level, tag, message): print(f"[OPTIX][{level}][{tag}]: {message}") -def _init_optix(device: Optional[int] = None): - """ - Initialize OptiX context, module, pipeline, and SBT. - - Args: - device: CUDA device ID to use. If None, uses the current CuPy device. - If already initialized, this parameter is ignored (a warning - would be appropriate if it differs from the active device). - """ +def _init_optix(): + """Initialize OptiX context, module, pipeline, and SBT.""" global _state if _state.initialized: - # Already initialized - check if user requested a different device - if device is not None and _state.device_id != device: - import warnings - warnings.warn( - f"RTX already initialized on device {_state.device_id}. " - f"Ignoring request for device {device}. " - "Create a new Python process to use a different device.", - RuntimeWarning - ) return - # Select the CUDA device if specified - if device is not None: - device_count = cupy.cuda.runtime.getDeviceCount() - if device < 0 or device >= device_count: - raise ValueError( - f"Invalid device ID {device}. " - f"Available devices: 0-{device_count - 1}" - ) - cupy.cuda.Device(device).use() - _state.device_id = device - else: - # Use current device - _state.device_id = cupy.cuda.Device().id - # Create OptiX device context (uses cupy's CUDA context) _state.context = optix.deviceContextCreate( cupy.cuda.get_current_stream().ptr, @@ -874,34 +736,11 @@ class RTX: This class provides GPU-accelerated ray-triangle intersection using NVIDIA's OptiX ray tracing engine. - - Args: - device: CUDA device ID to use (0, 1, 2, ...). If None (default), - uses the currently active CuPy device. Use get_device_count() - to see available devices. - - Example: - # Use default device (device 0 or current CuPy device) - rtx = RTX() - - # Use specific GPU - rtx = RTX(device=1) - - Note: - The RTX context is a singleton - all RTX instances share the same - underlying OptiX context. The device can only be set on first - initialization. Subsequent RTX() calls with a different device - will emit a warning. """ - def __init__(self, device: Optional[int] = None): - """ - Initialize the RTX context. - - Args: - device: CUDA device ID to use. If None, uses the current device. - """ - _init_optix(device) + def __init__(self): + """Initialize the RTX context.""" + _init_optix() def build(self, hashValue: int, vertexBuffer, indexBuffer) -> int: """ @@ -917,16 +756,6 @@ def build(self, hashValue: int, vertexBuffer, indexBuffer) -> int: """ return _build_accel(hashValue, vertexBuffer, indexBuffer) - @property - def device(self) -> Optional[int]: - """ - The CUDA device ID this RTX instance is using. - - Returns: - Device ID (0, 1, 2, ...) or None if not initialized. - """ - return _state.device_id - def getHash(self) -> int: """ Get the hash of the current acceleration structure. diff --git a/rtxpy/viewshed.py b/rtxpy/viewshed.py new file mode 100644 index 0000000..b69e7ad --- /dev/null +++ b/rtxpy/viewshed.py @@ -0,0 +1,304 @@ +""" +GPU-accelerated viewshed analysis using ray tracing. + +This module provides functions for computing viewshed (visibility) analysis +on terrain data using NVIDIA OptiX ray tracing. + +Functions +--------- +viewshed + Compute viewshed from a point on a raster terrain. +viewshed_with_context + Compute viewshed using an existing RTX context. +""" + +from numba import cuda +import numpy as np +import math +from typing import Union + +import cupy +import xarray as xr + +from .rtx import RTX +from ._cuda_utils import calc_dims, make_float3, add, mul, diff, dot, invert, float3 +from . import mesh + +# view options default values +OBS_ELEV = 0 +TARGET_ELEV = 0 + +# if a cell is invisible, its value is set to -1 +INVISIBLE = -1 + + +@cuda.jit +def _generatePrimaryRays(data, x_coords, y_coords, H, W): + i, j = cuda.grid(2) + if i >= 0 and i < H and j >= 0 and j < W: + if (j == W-1): + data[i, j, 0] = j - 1e-3 + else: + data[i, j, 0] = j + 1e-3 + + if (i == H-1): + data[i, j, 1] = i - 1e-3 + else: + data[i, j, 1] = i + 1e-3 + + data[i, j, 2] = 10000 # Location of the camera (height) + data[i, j, 3] = 1e-3 + data[i, j, 4] = 0 + data[i, j, 5] = 0 + data[i, j, 6] = -1 + data[i, j, 7] = np.inf + + +def _generatePrimaryRaysWrapper(rays, x_coords, y_coords, H, W): + griddim, blockdim = calc_dims((H, W)) + d_y_coords = cupy.array(y_coords) + d_x_coords = cupy.array(x_coords) + _generatePrimaryRays[griddim, blockdim](rays, d_x_coords, d_y_coords, H, W) + return 0 + + +@cuda.jit +def _generateViewshedRays(camRays, hits, vsrays, visibility_grid, H, W, vp): + i, j = cuda.grid(2) + if i >= 0 and i < H and j >= 0 and j < W: + elevationOffset = vp[2] + targetElevation = vp[3] + dist = hits[i, j, 0] # distance to surface from camera + norm = make_float3(hits[i, j], 1) # normal vector at intersection with surface + if (norm[2] < 0): # if back hit, face forward + norm = invert(norm) + cameraRay = camRays[i, j] + rayOrigin = make_float3(cameraRay, 0) # get the camera ray origin + rayDir = make_float3(cameraRay, 4) # get the camera ray direction + hitP = add(rayOrigin, mul(rayDir, dist)) # calculate intersection point + newOrigin = add(hitP, mul(norm, 1e-3)) # generate new ray origin with offset + newOrigin = add(newOrigin, float3(0, 0, targetElevation)) # add target elevation + + w = int(vp[0]) + h = int(vp[1]) + viewshedRay = camRays[h, w] # get camera ray for viewshed origin + dist = hits[h, w, 0] # get distance from camera to viewshed point + rayOrigin = make_float3(viewshedRay, 0) + rayDir = make_float3(viewshedRay, 4) + hitP = add(rayOrigin, mul(rayDir, dist)) + viewshedPoint = add(hitP, float3(0, 0, elevationOffset)) + + newDir = diff(viewshedPoint, newOrigin) # vector from SurfaceHit to VP + length = math.sqrt(dot(newDir, newDir)) # distance from surface to VP + newDir = mul(newDir, 1/length) # normalize direction + + cosine = dot(norm, newDir) # cosine of angle between n and v + theta = math.acos(cosine) # angle in radians + theta = (180*theta)/math.pi # angle in degrees + + # prepare viewshed ray for visibility test + vsray = vsrays[i, j] + vsray[0] = newOrigin[0] + vsray[1] = newOrigin[1] + vsray[2] = newOrigin[2] + vsray[3] = 0 + vsray[4] = newDir[0] + vsray[5] = newDir[1] + vsray[6] = newDir[2] + vsray[7] = length + + visibility_grid[i, j] = theta + + +def _generateViewshedRaysWrapper(rays, hits, vsrays, visibility_grid, H, W, vp): + griddim, blockdim = calc_dims((H, W)) + _generateViewshedRays[griddim, blockdim](rays, hits, vsrays, visibility_grid, H, W, vp) + return 0 + + +@cuda.jit +def _calcViewshed(hits, visibility_grid, H, W): + i, j = cuda.grid(2) + if i >= 0 and i < H and j >= 0 and j < W: + dist = hits[i, j, 0] + # If dist > 0, intersection occurred along ray length + # meaning pixel is not directly visible from view point + if dist >= 0: + visibility_grid[i, j] = INVISIBLE + + +def _calcViewshedWrapper(hits, visibility_grid, H, W): + griddim, blockdim = calc_dims((H, W)) + _calcViewshed[griddim, blockdim](hits, visibility_grid, H, W) + return 0 + + +def viewshed_with_context(raster: xr.DataArray, + rtx: RTX, + x: Union[int, float], + y: Union[int, float], + observer_elev: float = OBS_ELEV, + target_elev: float = TARGET_ELEV) -> xr.DataArray: + """ + Compute viewshed analysis using an existing RTX context. + + This function performs viewshed (visibility) analysis from a specified + observer point on the terrain. It uses an existing RTX context that + already has the terrain geometry built, avoiding rebuild overhead. + + Parameters + ---------- + raster : xr.DataArray + Input terrain raster with x and y coordinates. + rtx : RTX + Pre-configured RTX instance with terrain geometry already built. + x : int or float + X coordinate of the observer position. + y : int or float + Y coordinate of the observer position. + observer_elev : float, optional + Height of the observer above the terrain surface. Default is 0. + target_elev : float, optional + Height of target points above the terrain surface. Default is 0. + + Returns + ------- + xr.DataArray + Viewshed result where visible cells contain the viewing angle in + degrees, and invisible cells contain -1. + + Raises + ------ + ValueError + If x or y coordinates are outside the raster extent. + """ + H, W = raster.shape + + y_coords = raster.indexes.get('y').values + x_coords = raster.indexes.get('x').values + + # validate x arg + if x < x_coords.min(): + raise ValueError("x argument outside of raster x_range") + elif x > x_coords.max(): + raise ValueError("x argument outside of raster x_range") + + # validate y arg + if y < y_coords.min(): + raise ValueError("y argument outside of raster y_range") + elif y > y_coords.max(): + raise ValueError("y argument outside of raster y_range") + + selection = raster.sel(x=[x], y=[y], method='nearest') + x = selection.x.values[0] + y = selection.y.values[0] + + y_view = np.where(y_coords == y)[0][0] + x_view = np.where(x_coords == x)[0][0] + + # Device buffers + d_rays = cupy.empty((H, W, 8), np.float32) + d_hits = cupy.empty((H, W, 4), np.float32) + d_visgrid = cupy.empty((H, W), np.float32) + d_vsrays = cupy.empty((H, W, 8), np.float32) + + _generatePrimaryRaysWrapper(d_rays, x_coords, y_coords, H, W) + device = cupy.cuda.Device(0) + device.synchronize() + rtx.trace(d_rays, d_hits, W*H) + + _generateViewshedRaysWrapper(d_rays, d_hits, d_vsrays, d_visgrid, H, W, + (x_view, y_view, observer_elev, target_elev)) + device.synchronize() + rtx.trace(d_vsrays, d_hits, W*H) + + _calcViewshedWrapper(d_hits, d_visgrid, H, W) + + if isinstance(raster.data, np.ndarray): + visgrid = cupy.asnumpy(d_visgrid) + else: + visgrid = d_visgrid + + view = xr.DataArray(visgrid, + name="viewshed", + coords=raster.coords, + dims=raster.dims, + attrs=raster.attrs) + return view + + +def viewshed(raster: xr.DataArray, + x: Union[int, float], + y: Union[int, float], + observer_elev: float = OBS_ELEV, + target_elev: float = TARGET_ELEV) -> xr.DataArray: + """ + Compute viewshed analysis from a point on a raster terrain. + + This function performs viewshed (visibility) analysis from a specified + observer point on the terrain. It automatically builds the necessary + ray tracing acceleration structure if needed. + + Parameters + ---------- + raster : xr.DataArray + Input terrain raster as an xarray DataArray with x and y coordinates. + The data must be a cupy array on GPU. + x : int or float + X coordinate of the observer position. + y : int or float + Y coordinate of the observer position. + observer_elev : float, optional + Height of the observer above the terrain surface. Default is 0. + target_elev : float, optional + Height of target points above the terrain surface. Default is 0. + + Returns + ------- + xr.DataArray + Viewshed result where visible cells contain the viewing angle in + degrees, and invisible cells contain -1. + + Raises + ------ + ValueError + If raster.data is not a cupy array, or if the mesh generation + or OptiX build fails. + + Examples + -------- + >>> import rtxpy + >>> result = rtxpy.viewshed.viewshed(terrain_raster, x=1000, y=2000, observer_elev=2) + """ + if not isinstance(raster.data, cupy.ndarray): + raise ValueError("raster.data must be a cupy array") + + H, W = raster.shape + rtx = RTX() + + datahash = np.uint64(hash(str(raster.data.get())) % (1 << 64)) + optixhash = np.uint64(rtx.getHash()) + if (optixhash != datahash): + numTris = (H - 1) * (W - 1) * 2 + verts = cupy.empty(H * W * 3, np.float32) + triangles = cupy.empty(numTris * 3, np.int32) + + # Generate mesh from terrain + res = mesh.triangulate_terrain(verts, triangles, raster) + if res: + raise ValueError("Failed to generate mesh from terrain. Error code:{}".format(res)) + res = rtx.build(datahash, verts, triangles) + if res: + raise ValueError("OptiX failed to build GAS with error code:{}".format(res)) + # Clear GPU memory no longer needed + verts = None + triangles = None + cupy.get_default_memory_pool().free_all_blocks() + + view = viewshed_with_context(raster, rtx, x, y, observer_elev, target_elev) + return view + + +# Backwards compatibility aliases +viewshed_gpu = viewshed +viewshed_rt = viewshed_with_context