diff --git a/.bazelrc b/.bazelrc index 4cca2bd..23b8c56 100644 --- a/.bazelrc +++ b/.bazelrc @@ -5,6 +5,12 @@ build:asan --copt -g build:asan --copt -fno-omit-frame-pointer build:asan --linkopt -fsanitize=address +# Enable the built-in platform-specific configuration feature +common --enable_platform_specific_config + +# Apply this flag only when building on/for macOS +build:macos --macos_minimum_os=10.15 + # For all builds, use C++17 build --cxxopt="-std=c++17" build --cxxopt='-Wno-sign-compare' @@ -15,6 +21,9 @@ build:apple_silicon --cpu=darwin_arm64 # Common flags for Clang build:clang --action_env=BAZEL_COMPILER=clang build:clang --action_env=CC=clang --action_env=CXX=clang++ +build:clang --copt="-Wno-reserved-macro-identifier" +build:clang --copt="-Wunused-parameter" + #build:clang --linkopt=-fuse-ld=lld # Clang with libc++ @@ -25,3 +34,19 @@ build:libc++ --action_env=BAZEL_CXXOPTS=-stdlib=libc++ build:libc++ --action_env=BAZEL_LINKLIBS=-l%:libc++.a:-l%:libc++abi.a build:libc++ --action_env=BAZEL_LINKOPTS=-lm:-pthread build:libc++ --define force_libcpp=enabled + +# QNX 8 Toolchain Configurations +# Set QNX_SDP_PATH environment variable or use default ~/qnx800 +build:qnx_aarch64 --platforms=//bazel/toolchains/qnx/platforms:qnx_aarch64 +build:qnx_aarch64 --action_env=QNX_SDP_PATH +build:qnx_aarch64 --define=QNX_SDP_PATH=/home/dave.allison/qnx800 +build:qnx_aarch64 --cxxopt=-D__QNX__ + +build:qnx_x86_64 --platforms=//bazel/toolchains/qnx/platforms:qnx_x86_64 +build:qnx_x86_64 --action_env=QNX_SDP_PATH +build:qnx_x86_64 --define=QNX_SDP_PATH=/home/dave.allison/qnx800 +build:qnx_x86_64 --cxxopt=-D__QNX__ + +# QNX common flags +build:qnx --cxxopt=-std=c++17 +build:qnx --cxxopt=-Wno-sign-compare \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4e30e09 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,171 @@ +# CHANGELOG.md + +## Subspace Version 2.2.0 + +### Client API Improvements + +#### Publisher Options +- **Retirement notification support**: Enhanced publisher options with retirement notification capabilities + - `notify_retirement` option for publishers to receive slot retirement notifications + - `GetRetirementFd()` method to obtain file descriptor for retirement notifications + - Improved retirement tracking for GPU memory management and external resource cleanup + +#### Thread Safety Features +- **Configurable thread safety**: New thread safety options for concurrent client usage + - Optional mutex protection for all client operations + - Automatic message buffer locking during publish operations + - Configurable locking behavior for performance optimization + - Thread-safe message retirement handling + +#### Checksums +- **Configurable checksum generation and checking**: Optionally allows a CRC32 checksum to be added into the `MessagePrefix` for a published message and checked in subscribers. A message with a failed checksum can either be dropped by the client or received with a failure notification. + +#### Portability +- **POSIX shared memory**: for non-Linux systems, uses POSIX shared memory with a shadow file in /tmp. + +#### Bridge Support +- **Enhanced bridge publisher/subscriber options**: Improved bridge functionality in client options + - Better bridge detection and configuration + - Enhanced virtual channel support for bridged communications + - Improved multiplexer channel handling across bridges + +### New Features + +#### Server Plugin Architecture +- **Dynamic plugin loading**: Added comprehensive plugin system for extending server functionality + - Plugin interface with lifecycle hooks: `onStartup`, `onReady`, `onShutdown` + - Event-driven callbacks for channel and user management: `onNewChannel`, `onRemoveChannel`, `onNewPublisher`, `onRemovePublisher`, `onNewSubscriber`, `onRemoveSubscriber` + - Plugin context system with integrated logging support + - Single-threaded coroutine-based execution model for plugins + - Dynamic library loading support with C-style function interfaces + +#### Client-Side Thread Safety +- **Optional thread safety support**: Added thread-safe client operations for multi-threaded applications + - `SetThreadSafe(true)` method to enable mutex-protected client operations + - Thread-safe publisher message buffer management with automatic locking + - Configurable locking behavior with option to disable automatic locking for zero-copy operations + - Mutual exclusion protection for concurrent client operations + - Thread-safe retirement notification handling across multiple threads + +#### Message Checksum Support +- **Hardware-accelerated checksums**: Integrated CRC32 checksum calculation for message integrity + - Hardware CRC32 instruction support with fallback implementations + - `CalculateChecksum()` and `VerifyChecksum()` functions for multi-span data validation + - ARM assembly optimizations for CRC32 calculations + - Configurable hardware acceleration via `SUBSPACE_HARDWARE_CRC` compilation flag + - Template-based checksum calculation for arbitrary data spans + +### Testing Infrastructure + +#### Bridge Testing Framework +- **Comprehensive bridge retirement testing**: Added extensive test coverage for message retirement across bridges + - `BasicRetirement` test: Validates basic retirement notification functionality + - `MultipleRetirement` test: Tests retirement notifications for multiple messages with slot tracking + - `MultipleRetirement2` test: Alternative retirement scenario testing with different slot allocation patterns +- **Multi-server bridge testing**: Enhanced test infrastructure supporting multiple Subspace servers + - Two-server test setup with proper lifecycle management + - Bridge notification pipes for inter-server communication testing + - Retirement notification validation across server boundaries + +#### Multi-threaded Testing +- **Concurrent operation validation**: Extensive multi-threaded testing infrastructure + - Stress tests with multiple concurrent clients and channels + - Latency testing under multi-threaded conditions + - Thread safety validation for concurrent publisher/subscriber operations + - Performance benchmarking for threaded vs non-threaded scenarios + +#### Test Utilities +- **Improved test helpers**: Enhanced utility functions for bridge testing + - `WaitForSubscribedMessage`: Robust waiting mechanism for bridge subscription notifications + - Better signal handling for test debugging (`SigQuitHandler`) + - Enhanced coroutine debugging capabilities for multi-server scenarios + +### Server Infrastructure + +#### Bridge Communication +- **Enhanced bridge transmitter functionality**: Improved the bridge transmitter coroutine system + - Better retirement notification handling across bridges + - Improved error handling for bridge connection failures + - Enhanced retirement socket management + +#### Retirement Notification System +- **Robust retirement tracking**: Improved retirement notification system for bridged channels + - Better tracking of active messages across bridge connections + - Enhanced retirement receiver coroutine functionality + - Improved slot retirement correlation across server boundaries + +#### Plugin Integration +- **Server extension points**: Comprehensive plugin integration throughout server lifecycle + - Plugin initialization during server startup + - Event notifications for all channel and user lifecycle events + - Graceful plugin shutdown handling + - Error handling and logging for plugin operations + + +### Documentation + +#### Version 2 Features +- **Comprehensive feature documentation**: Enhanced documentation of Subspace Version 2 features + - Lock-free shared memory implementation details + - Message retirement notification system documentation + - Multiplexed virtual channels usage patterns + - C client API documentation + - Thread safety features and guidelines + - Server plugin development guide + + +### Bug Fixes + +#### Bridge Test Improvements +- **Fixed bridge test debug output**: Removed excessive debug logging in `bridge_test.cc` that was cluttering test output + - Removed verbose bridge notification length logging in `WaitForSubscribedMessage` function + - Improved test output clarity for bridge functionality testing + +#### Bridge Notification System +- **Enhanced bridge notification handling**: Improved the robustness of bridge notification processing + - Better error handling for bridge notification pipe operations + - Cleaner separation of bridge notification logic from debug output + +### Infrastructure Improvements + +#### Build System +- **Enhanced CMake support**: Improved CMake build configuration + - Better dependency management for server components + - Enhanced library linking for cross-platform builds + - Improved target configuration for test executables + - Plugin compilation support + +#### Development Tools +- **Better debugging support**: Enhanced debugging capabilities for multi-server scenarios + - Improved coroutine introspection tools + - Better signal handling for development debugging + - Enhanced test output formatting + - Plugin debugging and logging infrastructure + +### Performance Enhancements + +#### Hardware Acceleration +- **Optimized checksum calculations**: Hardware-accelerated CRC32 for improved performance + - ARM assembly optimizations for mobile and embedded platforms + - Intel hardware CRC32 instruction utilization + - Fallback implementations for compatibility + +#### Thread Safety Optimization +- **Selective locking**: Configurable thread safety to minimize performance impact + - Optional locking for zero-copy operations + - Minimal mutex contention in multi-threaded scenarios + - Atomic operations for retirement trigger management + +### Breaking Changes +- None in this release - all changes are backward compatible bug fixes and improvements + +### Migration Notes +- No migration required for existing applications +- New retirement notification features are opt-in via publisher options +- Thread safety must be explicitly enabled via `SetThreadSafe(true)` +- Server plugins require separate compilation and loading +- Enhanced bridge testing infrastructure is available for developers working with multi-server deployments + +--- + +**Note**: This release focuses primarily on improving the stability and testability of the bridge communication system, while adding significant new capabilities for server extensibility, client thread safety, and message integrity validation. All new features maintain backward compatibility with existing Subspace applications. \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d3b489..ba55a9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ # build systems. cmake_minimum_required(VERSION 3.15) -project(Subspace LANGUAGES CXX) +project(Subspace LANGUAGES CXX ASM) # Set C++ standard and compiler flags set(CMAKE_CXX_STANDARD 17) @@ -41,38 +41,68 @@ FetchContent_Declare( GIT_TAG 20240722.0 # Matches the version in your WORKSPACE # Pass architecture settings and CXX standard propagation to Abseil's CMake build CMAKE_ARGS - CMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" + -DCMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" -DABSL_PROPAGATE_CXX_STD=ON # Propagate C++ standard settings ) FetchContent_MakeAvailable(abseil) # Abseil provides targets like absl::base, absl::strings, etc. +# --- External Dependency: co (using FetchContent for native CMake) --- +# Using the same version as Bazel build (commit cf1252b2f5952d7cba83b67dd69288971c0a2b57) +# MUST be declared before cpp_toolbelt since cpp_toolbelt depends on it +FetchContent_Declare( + co + GIT_REPOSITORY https://github.com/dallison/co.git + GIT_TAG cf1252b2f5952d7cba83b67dd69288971c0a2b57 + # Pass architecture settings to co's CMake build + CMAKE_ARGS + -DCMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" +) +FetchContent_MakeAvailable(co) +# co provides the 'co' target. + # --- External Dependency: cpp_toolbelt (using FetchContent for native CMake) --- -# Assumes cpp_toolbelt repository now contains a CMakeLists.txt at its root. +# Using the same version as Bazel build (commit 7ec0e2cd2a17a006f976df23d07ad530499249e0) +# Note: cpp_toolbelt's CMakeLists.txt will try to fetch co, but since we've already +# declared it above, FetchContent will use our version instead FetchContent_Declare( cpp_toolbelt GIT_REPOSITORY https://github.com/dallison/cpp_toolbelt.git - GIT_TAG main + GIT_TAG 7ec0e2cd2a17a006f976df23d07ad530499249e0 # Pass architecture settings to cpp_toolbelt's CMake build CMAKE_ARGS - CMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" + -DCMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" ) FetchContent_MakeAvailable(cpp_toolbelt) # cpp_toolbelt provides the 'toolbelt' target. +# --- External Dependency: Googletest (using FetchContent for native CMake) --- +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.15.2 # Matches the version in MODULE.bazel + # Pass architecture settings to Googletest's CMake build + CMAKE_ARGS + -DCMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" +) +FetchContent_MakeAvailable(googletest) +# Googletest provides targets like gtest, gtest_main, gmock, gmock_main -# --- External Dependency: co (using FetchContent for native CMake) --- -# Assumes co repository now contains a CMakeLists.txt at its root. +# --- External Dependency: Protobuf (using FetchContent for native CMake) --- FetchContent_Declare( - co - GIT_REPOSITORY https://github.com/dallison/co.git - GIT_TAG main - # Pass architecture settings to co's CMake build + protobuf + GIT_REPOSITORY https://github.com/protocolbuffers/protobuf.git + GIT_TAG v29.5 + # Protobuf's CMake build can be configured to build only necessary components CMAKE_ARGS - CMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" + -Dprotobuf_BUILD_TESTS=OFF # Explicitly disable building tests to avoid gmock conflicts + -Dprotobuf_BUILD_EXAMPLES=OFF + -Dprotobuf_BUILD_SHARED_LIBS=OFF # Build static libs + -DCMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" ) -FetchContent_MakeAvailable(co) -# co provides the 'co' target. +FetchContent_MakeAvailable(protobuf) +# Protobuf provides targets like protobuf::libprotobuf and protobuf::protoc +# These targets can be used directly without find_package #add to get */file.h includes working include_directories(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/COPYRIGHT b/COPYRIGHT index fa17064..a45cc7f 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,4 +1,7 @@ -Copyright 2025 David Allison +Copyright 2023-2026 David Allison All Rights Reserved This is licensed under the Apache 2 license (see the LICENSE file). + +Parts of this software were contributed by Cruise Auotmation LLC. + diff --git a/MODULE.bazel b/MODULE.bazel index 294f7c9..b66ccf8 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -1,10 +1,8 @@ module( name = "subspace", + version = "2.2.0", ) -http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -git_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") - bazel_dep(name = "bazel_skylib", version = "1.9.0") bazel_dep(name = "platforms", version = "1.0.0") bazel_dep(name = "abseil-cpp", version = "20250512.1") @@ -14,34 +12,8 @@ bazel_dep(name = "protobuf", version = "33.2") bazel_dep(name = "rules_cc", version = "0.2.16") bazel_dep(name = "rules_pkg", version = "1.0.1") bazel_dep(name = "zlib", version = "1.3.1.bcr.5") - -# Toolbelt -git_repository( - name = "toolbelt", - commit = "0286a5f6da0f256d6ffbc9f6b43cd6b003fe9dbf", - remote = "https://github.com/dallison/cpp_toolbelt.git", -) -# For local debugging of toolbelt coroutine library. -# bazel_dep(name = "toolbelt") -# local_path_override( -# module_name = "toolbelt", -# path = "../cpp_toolbelt", -# ) - -# Coroutines -http_archive( - name = "coroutines", - integrity = "sha256-fdSwDRrEyFf6PRX8fkkfSPTWPB3Z9tc5GiUbkwsk0Mc=", - strip_prefix = "co-3.0.0", - urls = ["https://github.com/dallison/co/archive/refs/tags/3.0.0.tar.gz"], -) -# For local debugging of co coroutine library. -# bazel_dep(name = "coroutines") -# local_path_override( -# module_name = "coroutines", -# path = "../co", -# ) - +bazel_dep(name = "coroutines", version = "3.0.1") +bazel_dep(name = "cpp_toolbelt", version = "2.0.0") bazel_dep(name = "rules_python", version = "1.7.0") python = use_extension("@rules_python//python/extensions:python.bzl", "python") diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index a7eaa51..332956e 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -1,5 +1,5 @@ { - "lockFileVersion": 24, + "lockFileVersion": 26, "registryFileHashes": { "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", @@ -12,18 +12,24 @@ "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/MODULE.bazel": "73939767a4686cd9a520d16af5ab440071ed75cec1a876bf2fcfaf1f71987a16", "https://bcr.bazel.build/modules/abseil-cpp/20250127.1/MODULE.bazel": "c4a89e7ceb9bf1e25cf84a9f830ff6b817b72874088bf5141b314726e46a57c1", "https://bcr.bazel.build/modules/abseil-cpp/20250512.1/MODULE.bazel": "d209fdb6f36ffaf61c509fcc81b19e81b411a999a934a032e10cd009a0226215", - "https://bcr.bazel.build/modules/abseil-cpp/20250512.1/source.json": "d725d73707d01bb46ab3ca59ba408b8e9bd336642ca77a2269d4bfb8bbfd413d", + "https://bcr.bazel.build/modules/abseil-cpp/20250814.1/MODULE.bazel": "51f2312901470cdab0dbdf3b88c40cd21c62a7ed58a3de45b365ddc5b11bcab2", + "https://bcr.bazel.build/modules/abseil-cpp/20250814.1/source.json": "cea3901d7e299da7320700abbaafe57a65d039f10d0d7ea601c4a66938ea4b0c", "https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896", "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85", - "https://bcr.bazel.build/modules/apple_support/1.23.1/MODULE.bazel": "53763fed456a968cf919b3240427cf3a9d5481ec5466abc9d5dc51bc70087442", - "https://bcr.bazel.build/modules/apple_support/1.23.1/source.json": "d888b44312eb0ad2c21a91d026753f330caa48a25c9b2102fae75eb2b0dcfdd2", + "https://bcr.bazel.build/modules/apple_support/1.21.0/MODULE.bazel": "ac1824ed5edf17dee2fdd4927ada30c9f8c3b520be1b5fd02a5da15bc10bff3e", + "https://bcr.bazel.build/modules/apple_support/1.21.1/MODULE.bazel": "5809fa3efab15d1f3c3c635af6974044bac8a4919c62238cce06acee8a8c11f1", + "https://bcr.bazel.build/modules/apple_support/1.22.1/MODULE.bazel": "90bd1a660590f3ceffbdf524e37483094b29352d85317060b2327fff8f3f4458", + "https://bcr.bazel.build/modules/apple_support/1.24.2/MODULE.bazel": "0e62471818affb9f0b26f128831d5c40b074d32e6dda5a0d3852847215a41ca4", + "https://bcr.bazel.build/modules/apple_support/1.24.2/source.json": "2c22c9827093250406c5568da6c54e6fdf0ef06238def3d99c71b12feb057a8d", "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd", + "https://bcr.bazel.build/modules/bazel_features/1.10.0/MODULE.bazel": "f75e8807570484a99be90abcd52b5e1f390362c258bcb73106f4544957a48101", "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d", "https://bcr.bazel.build/modules/bazel_features/1.17.0/MODULE.bazel": "039de32d21b816b47bd42c778e0454217e9c9caac4a3cf8e15c7231ee3ddee4d", "https://bcr.bazel.build/modules/bazel_features/1.18.0/MODULE.bazel": "1be0ae2557ab3a72a57aeb31b29be347bcdc5d2b1eb1e70f39e3851a7e97041a", "https://bcr.bazel.build/modules/bazel_features/1.19.0/MODULE.bazel": "59adcdf28230d220f0067b1f435b8537dd033bfff8db21335ef9217919c7fb58", "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b", + "https://bcr.bazel.build/modules/bazel_features/1.23.0/MODULE.bazel": "fd1ac84bc4e97a5a0816b7fd7d4d4f6d837b0047cf4cbd81652d616af3a6591a", "https://bcr.bazel.build/modules/bazel_features/1.27.0/MODULE.bazel": "621eeee06c4458a9121d1f104efb80f39d34deff4984e778359c60eaf1a8cb65", "https://bcr.bazel.build/modules/bazel_features/1.28.0/MODULE.bazel": "4b4200e6cbf8fa335b2c3f43e1d6ef3e240319c33d43d60cc0fbd4b87ece299d", "https://bcr.bazel.build/modules/bazel_features/1.3.0/MODULE.bazel": "cdcafe83ec318cda34e02948e81d790aab8df7a929cec6f6969f13a489ccecd9", @@ -48,15 +54,20 @@ "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/MODULE.bazel": "69ad6927098316848b34a9142bcc975e018ba27f08c4ff403f50c1b6e646ca67", "https://bcr.bazel.build/modules/bazel_skylib/1.9.0/MODULE.bazel": "72997b29dfd95c3fa0d0c48322d05590418edef451f8db8db5509c57875fb4b7", "https://bcr.bazel.build/modules/bazel_skylib/1.9.0/source.json": "7ad77c1e8c1b84222d9b3f3cae016a76639435744c19330b0b37c0a3c9da7dc0", - "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", - "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/buildozer/8.2.1/MODULE.bazel": "61e9433c574c2bd9519cad7fa66b9c1d2b8e8d5f3ae5d6528a2c2d26e68d874d", + "https://bcr.bazel.build/modules/buildozer/8.2.1/source.json": "7c33f6a26ee0216f85544b4bca5e9044579e0219b6898dd653f5fb449cf2e484", + "https://bcr.bazel.build/modules/coroutines/3.0.1/MODULE.bazel": "47c65a0b258dd5b8cecd9143f1fe5e810acf5b996379eb75a8282447b53bf612", + "https://bcr.bazel.build/modules/coroutines/3.0.1/source.json": "ad0d12c4430ffa309eed980821ce08d1f4722b106a7190fb02d62ae09f195c50", + "https://bcr.bazel.build/modules/cpp_toolbelt/2.0.0/MODULE.bazel": "5d470b2806d87e55b1e2abef8501dc2a7e9e1c06933aa607b84a88f1a941310f", + "https://bcr.bazel.build/modules/cpp_toolbelt/2.0.0/source.json": "47ad67fa229ba437a78d8791d255af5f2e5aaa5605a3e0730bd58c0c4b95aa63", "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f", "https://bcr.bazel.build/modules/googletest/1.15.2/MODULE.bazel": "6de1edc1d26cafb0ea1a6ab3f4d4192d91a312fd2d360b63adaa213cd00b2108", + "https://bcr.bazel.build/modules/googletest/1.17.0.bcr.2/MODULE.bazel": "827f54f492a3ce549c940106d73de332c2b30cebd0c20c0bc5d786aba7f116cb", + "https://bcr.bazel.build/modules/googletest/1.17.0.bcr.2/source.json": "3664514073a819992320ffbce5825e4238459df344d8b01748af2208f8d2e1eb", "https://bcr.bazel.build/modules/googletest/1.17.0/MODULE.bazel": "dbec758171594a705933a29fcf69293d2468c49ec1f2ebca65c36f504d72df46", - "https://bcr.bazel.build/modules/googletest/1.17.0/source.json": "38e4454b25fc30f15439c0378e57909ab1fd0a443158aa35aec685da727cd713", "https://bcr.bazel.build/modules/jsoncpp/1.9.5/MODULE.bazel": "31271aedc59e815656f5736f282bb7509a97c7ecb43e927ac1a37966e0578075", "https://bcr.bazel.build/modules/jsoncpp/1.9.6/MODULE.bazel": "2f8d20d3b7d54143213c4dfc3d98225c42de7d666011528dc8fe91591e2e17b0", "https://bcr.bazel.build/modules/jsoncpp/1.9.6/source.json": "a04756d367a2126c3541682864ecec52f92cdee80a35735a3cb249ce015ca000", @@ -74,31 +85,35 @@ "https://bcr.bazel.build/modules/platforms/1.0.0/MODULE.bazel": "f05feb42b48f1b3c225e4ccf351f367be0371411a803198ec34a389fb22aa580", "https://bcr.bazel.build/modules/platforms/1.0.0/source.json": "f4ff1fd412e0246fd38c82328eb209130ead81d62dcd5a9e40910f867f733d96", "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", + "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a", + "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12", "https://bcr.bazel.build/modules/protobuf/27.0/MODULE.bazel": "7873b60be88844a0a1d8f80b9d5d20cfbd8495a689b8763e76c6372998d3f64c", - "https://bcr.bazel.build/modules/protobuf/27.1/MODULE.bazel": "703a7b614728bb06647f965264967a8ef1c39e09e8f167b3ca0bb1fd80449c0d", "https://bcr.bazel.build/modules/protobuf/29.0-rc2/MODULE.bazel": "6241d35983510143049943fc0d57937937122baf1b287862f9dc8590fc4c37df", "https://bcr.bazel.build/modules/protobuf/29.0-rc3/MODULE.bazel": "33c2dfa286578573afc55a7acaea3cada4122b9631007c594bf0729f41c8de92", - "https://bcr.bazel.build/modules/protobuf/29.0/MODULE.bazel": "319dc8bf4c679ff87e71b1ccfb5a6e90a6dbc4693501d471f48662ac46d04e4e", "https://bcr.bazel.build/modules/protobuf/29.1/MODULE.bazel": "557c3457560ff49e122ed76c0bc3397a64af9574691cb8201b4e46d4ab2ecb95", "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", + "https://bcr.bazel.build/modules/protobuf/32.1/MODULE.bazel": "89cd2866a9cb07fee9ff74c41ceace11554f32e0d849de4e23ac55515cfada4d", "https://bcr.bazel.build/modules/protobuf/33.2/MODULE.bazel": "e8d408653b855e467ac7f99f2dfae8f073ae421fd4eb9a05b5ade5118d66cc3d", - "https://bcr.bazel.build/modules/protobuf/33.2/source.json": "be3a465c5bb4c2097fd81fc69f01fe5ef93bd29c58e461e1efc97100c8a010d7", + "https://bcr.bazel.build/modules/protobuf/33.4/MODULE.bazel": "114775b816b38b6d0ca620450d6b02550c60ceedfdc8d9a229833b34a223dc42", + "https://bcr.bazel.build/modules/protobuf/33.4/source.json": "555f8686b4c7d6b5ba731fbea13bf656b4bfd9a7ff629c1d9d3f6e1d6155de79", "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e", "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34", + "https://bcr.bazel.build/modules/pybind11_bazel/2.13.6/MODULE.bazel": "2d746fda559464b253b2b2e6073cb51643a2ac79009ca02100ebbc44b4548656", "https://bcr.bazel.build/modules/pybind11_bazel/3.0.0/MODULE.bazel": "a2bfa6020ed603a00d944161c63173c7f109774e99bee0c2cd8dbf24159f8134", "https://bcr.bazel.build/modules/pybind11_bazel/3.0.0/source.json": "d8f5104d4c21d272bf327ebe44366fb0b4c036cdaa1f5cceb21a408ca4ef2ef8", "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206", "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/MODULE.bazel": "b4963dda9b31080be1905ef085ecd7dd6cd47c05c79b9cdf83ade83ab2ab271a", - "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/source.json": "2ff292be6ef3340325ce8a045ecc326e92cbfab47c7cbab4bd85d28971b97ac4", "https://bcr.bazel.build/modules/re2/2024-07-02/MODULE.bazel": "0eadc4395959969297cbcf31a249ff457f2f1d456228c67719480205aa306daa", + "https://bcr.bazel.build/modules/re2/2025-08-12.bcr.1/MODULE.bazel": "e09b434b122bfb786a69179f9b325e35cb1856c3f56a7a81dd61609260ed46e1", + "https://bcr.bazel.build/modules/re2/2025-08-12.bcr.1/source.json": "a8ae7c09533bf67f9f6e5122d884d5741600b09d78dca6fc0f2f8d2ee0c2d957", "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8", "https://bcr.bazel.build/modules/rules_android/0.1.1/source.json": "e6986b41626ee10bdc864937ffb6d6bf275bb5b9c65120e6137d56e6331f089e", "https://bcr.bazel.build/modules/rules_apple/3.16.0/MODULE.bazel": "0d1caf0b8375942ce98ea944be754a18874041e4e0459401d925577624d3a54a", - "https://bcr.bazel.build/modules/rules_apple/3.16.0/source.json": "d8b5fe461272018cc07cfafce11fe369c7525330804c37eec5a82f84cd475366", + "https://bcr.bazel.build/modules/rules_apple/4.1.0/MODULE.bazel": "76e10fd4a48038d3fc7c5dc6e63b7063bbf5304a2e3bd42edda6ec660eebea68", + "https://bcr.bazel.build/modules/rules_apple/4.1.0/source.json": "8ee81e1708756f81b343a5eb2b2f0b953f1d25c4ab3d4a68dc02754872e80715", "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", "https://bcr.bazel.build/modules/rules_cc/0.0.10/MODULE.bazel": "ec1705118f7eaedd6e118508d3d26deba2a4e76476ada7e0e3965211be012002", "https://bcr.bazel.build/modules/rules_cc/0.0.13/MODULE.bazel": "0e8529ed7b323dad0775ff924d2ae5af7640b23553dfcd4d34344c7e7a867191", - "https://bcr.bazel.build/modules/rules_cc/0.0.14/MODULE.bazel": "5e343a3aac88b8d7af3b1b6d2093b55c347b8eefc2e7d1442f7a02dc8fea48ac", "https://bcr.bazel.build/modules/rules_cc/0.0.15/MODULE.bazel": "6704c35f7b4a72502ee81f61bf88706b54f06b3cbe5558ac17e2e14666cd5dcc", "https://bcr.bazel.build/modules/rules_cc/0.0.16/MODULE.bazel": "7661303b8fc1b4d7f532e54e9d6565771fea666fbdf839e0a86affcd02defe87", "https://bcr.bazel.build/modules/rules_cc/0.0.17/MODULE.bazel": "2ae1d8f4238ec67d7185d8861cb0a2cdf4bc608697c331b95bf990e69b62e64a", @@ -107,36 +122,37 @@ "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", "https://bcr.bazel.build/modules/rules_cc/0.1.1/MODULE.bazel": "2f0222a6f229f0bf44cd711dc13c858dad98c62d52bd51d8fc3a764a83125513", + "https://bcr.bazel.build/modules/rules_cc/0.1.2/MODULE.bazel": "557ddc3a96858ec0d465a87c0a931054d7dcfd6583af2c7ed3baf494407fd8d0", + "https://bcr.bazel.build/modules/rules_cc/0.1.4/MODULE.bazel": "bb03a452a7527ac25a7518fb86a946ef63df860b9657d8323a0c50f8504fb0b9", "https://bcr.bazel.build/modules/rules_cc/0.1.5/MODULE.bazel": "88dfc9361e8b5ae1008ac38f7cdfd45ad738e4fa676a3ad67d19204f045a1fd8", + "https://bcr.bazel.build/modules/rules_cc/0.2.0/MODULE.bazel": "b5c17f90458caae90d2ccd114c81970062946f49f355610ed89bebf954f5783c", + "https://bcr.bazel.build/modules/rules_cc/0.2.13/MODULE.bazel": "eecdd666eda6be16a8d9dc15e44b5c75133405e820f620a234acc4b1fdc5aa37", + "https://bcr.bazel.build/modules/rules_cc/0.2.14/MODULE.bazel": "353c99ed148887ee89c54a17d4100ae7e7e436593d104b668476019023b58df8", "https://bcr.bazel.build/modules/rules_cc/0.2.16/MODULE.bazel": "9242fa89f950c6ef7702801ab53922e99c69b02310c39fb6e62b2bd30df2a1d4", "https://bcr.bazel.build/modules/rules_cc/0.2.16/source.json": "d03d5cde49376d87e14ec14b666c56075e5e3926930327fd5d0484a1ff2ac1cc", + "https://bcr.bazel.build/modules/rules_cc/0.2.8/MODULE.bazel": "f1df20f0bf22c28192a794f29b501ee2018fa37a3862a1a2132ae2940a23a642", "https://bcr.bazel.build/modules/rules_cc/0.2.9/MODULE.bazel": "34263f1dca62ea664265438cef714d7db124c03e1ed55ebb4f1dc860164308d1", "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6", "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/MODULE.bazel": "40c97d1144356f52905566c55811f13b299453a14ac7769dfba2ac38192337a8", "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", "https://bcr.bazel.build/modules/rules_java/5.3.5/MODULE.bazel": "a4ec4f2db570171e3e5eb753276ee4b389bae16b96207e9d3230895c99644b86", - "https://bcr.bazel.build/modules/rules_java/6.0.0/MODULE.bazel": "8a43b7df601a7ec1af61d79345c17b31ea1fedc6711fd4abfd013ea612978e39", - "https://bcr.bazel.build/modules/rules_java/6.4.0/MODULE.bazel": "e986a9fe25aeaa84ac17ca093ef13a4637f6107375f64667a15999f77db6c8f6", "https://bcr.bazel.build/modules/rules_java/6.5.2/MODULE.bazel": "1d440d262d0e08453fa0c4d8f699ba81609ed0e9a9a0f02cd10b3e7942e61e31", + "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64", "https://bcr.bazel.build/modules/rules_java/7.10.0/MODULE.bazel": "530c3beb3067e870561739f1144329a21c851ff771cd752a49e06e3dc9c2e71a", "https://bcr.bazel.build/modules/rules_java/7.12.2/MODULE.bazel": "579c505165ee757a4280ef83cda0150eea193eed3bef50b1004ba88b99da6de6", "https://bcr.bazel.build/modules/rules_java/7.2.0/MODULE.bazel": "06c0334c9be61e6cef2c8c84a7800cef502063269a5af25ceb100b192453d4ab", - "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2", "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", - "https://bcr.bazel.build/modules/rules_java/8.14.0/MODULE.bazel": "717717ed40cc69994596a45aec6ea78135ea434b8402fb91b009b9151dd65615", - "https://bcr.bazel.build/modules/rules_java/8.14.0/source.json": "8a88c4ca9e8759da53cddc88123880565c520503321e2566b4e33d0287a3d4bc", "https://bcr.bazel.build/modules/rules_java/8.3.2/MODULE.bazel": "7336d5511ad5af0b8615fdc7477535a2e4e723a357b6713af439fe8cf0195017", "https://bcr.bazel.build/modules/rules_java/8.5.1/MODULE.bazel": "d8a9e38cc5228881f7055a6079f6f7821a073df3744d441978e7a43e20226939", "https://bcr.bazel.build/modules/rules_java/8.6.1/MODULE.bazel": "f4808e2ab5b0197f094cabce9f4b006a27766beb6a9975931da07099560ca9c2", + "https://bcr.bazel.build/modules/rules_java/9.0.3/MODULE.bazel": "1f98ed015f7e744a745e0df6e898a7c5e83562d6b759dfd475c76456dda5ccea", + "https://bcr.bazel.build/modules/rules_java/9.0.3/source.json": "b038c0c07e12e658135bbc32cc1a2ded6e33785105c9d41958014c592de4593e", "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909", "https://bcr.bazel.build/modules/rules_jvm_external/5.2/MODULE.bazel": "d9351ba35217ad0de03816ef3ed63f89d411349353077348a45348b096615036", - "https://bcr.bazel.build/modules/rules_jvm_external/5.3/MODULE.bazel": "bf93870767689637164657731849fb887ad086739bd5d360d90007a581d5527d", - "https://bcr.bazel.build/modules/rules_jvm_external/6.1/MODULE.bazel": "75b5fec090dbd46cf9b7d8ea08cf84a0472d92ba3585b476f44c326eda8059c4", "https://bcr.bazel.build/modules/rules_jvm_external/6.3/MODULE.bazel": "c998e060b85f71e00de5ec552019347c8bca255062c990ac02d051bb80a38df0", "https://bcr.bazel.build/modules/rules_jvm_external/6.7/MODULE.bazel": "e717beabc4d091ecb2c803c2d341b88590e9116b8bf7947915eeb33aab4f96dd", "https://bcr.bazel.build/modules/rules_jvm_external/6.7/source.json": "5426f412d0a7fc6b611643376c7e4a82dec991491b9ce5cb1cfdd25fe2e92be4", - "https://bcr.bazel.build/modules/rules_kotlin/1.9.0/MODULE.bazel": "ef85697305025e5a61f395d4eaede272a5393cee479ace6686dba707de804d59", "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/MODULE.bazel": "d269a01a18ee74d0335450b10f62c9ed81f2321d7958a2934e44272fe82dcef3", "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/source.json": "2faa4794364282db7c06600b7e5e34867a564ae91bda7cae7c29c64e9466b7d5", "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", @@ -150,7 +166,6 @@ "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483", "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73", - "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2", "https://bcr.bazel.build/modules/rules_proto/7.1.0/MODULE.bazel": "002d62d9108f75bb807cd56245d45648f38275cb3a99dcd45dfb864c5d74cb96", "https://bcr.bazel.build/modules/rules_proto/7.1.0/source.json": "39f89066c12c24097854e8f57ab8558929f9c8d474d34b2c00ac04630ad8940e", "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", @@ -159,28 +174,34 @@ "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed", "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58", "https://bcr.bazel.build/modules/rules_python/0.33.2/MODULE.bazel": "3e036c4ad8d804a4dad897d333d8dce200d943df4827cb849840055be8d2e937", + "https://bcr.bazel.build/modules/rules_python/0.34.0/MODULE.bazel": "1d623d026e075b78c9fde483a889cda7996f5da4f36dffb24c246ab30f06513a", "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", - "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", + "https://bcr.bazel.build/modules/rules_python/1.3.0/MODULE.bazel": "8361d57eafb67c09b75bf4bbe6be360e1b8f4f18118ab48037f2bd50aa2ccb13", + "https://bcr.bazel.build/modules/rules_python/1.4.1/MODULE.bazel": "8991ad45bdc25018301d6b7e1d3626afc3c8af8aaf4bc04f23d0b99c938b73a6", + "https://bcr.bazel.build/modules/rules_python/1.5.1/MODULE.bazel": "acfe65880942d44a69129d4c5c3122d57baaf3edf58ae5a6bd4edea114906bf5", "https://bcr.bazel.build/modules/rules_python/1.6.0/MODULE.bazel": "7e04ad8f8d5bea40451cf80b1bd8262552aa73f841415d20db96b7241bd027d8", "https://bcr.bazel.build/modules/rules_python/1.6.3/MODULE.bazel": "a7b80c42cb3de5ee2a5fa1abc119684593704fcd2fec83165ebe615dec76574f", "https://bcr.bazel.build/modules/rules_python/1.7.0/MODULE.bazel": "d01f995ecd137abf30238ad9ce97f8fc3ac57289c8b24bd0bf53324d937a14f8", "https://bcr.bazel.build/modules/rules_python/1.7.0/source.json": "028a084b65dcf8f4dc4f82f8778dbe65df133f234b316828a82e060d81bdce32", "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c", "https://bcr.bazel.build/modules/rules_shell/0.3.0/MODULE.bazel": "de4402cd12f4cc8fda2354fce179fdb068c0b9ca1ec2d2b17b3e21b24c1a937b", - "https://bcr.bazel.build/modules/rules_shell/0.3.0/source.json": "c55ed591aa5009401ddf80ded9762ac32c358d2517ee7820be981e2de9756cf3", + "https://bcr.bazel.build/modules/rules_shell/0.6.1/MODULE.bazel": "72e76b0eea4e81611ef5452aa82b3da34caca0c8b7b5c0c9584338aa93bae26b", + "https://bcr.bazel.build/modules/rules_shell/0.6.1/source.json": "20ec05cd5e592055e214b2da8ccb283c7f2a421ea0dc2acbf1aa792e11c03d0c", "https://bcr.bazel.build/modules/rules_swift/1.16.0/MODULE.bazel": "4a09f199545a60d09895e8281362b1ff3bb08bbde69c6fc87aff5b92fcc916ca", "https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046", - "https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186", + "https://bcr.bazel.build/modules/rules_swift/2.4.0/MODULE.bazel": "1639617eb1ede28d774d967a738b4a68b0accb40650beadb57c21846beab5efd", + "https://bcr.bazel.build/modules/rules_swift/3.1.2/MODULE.bazel": "72c8f5cf9d26427cee6c76c8e3853eb46ce6b0412a081b2b6db6e8ad56267400", + "https://bcr.bazel.build/modules/rules_swift/3.1.2/source.json": "e85761f3098a6faf40b8187695e3de6d97944e98abd0d8ce579cb2daf6319a66", "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", - "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", "https://bcr.bazel.build/modules/stardoc/0.7.0/MODULE.bazel": "05e3d6d30c099b6770e97da986c53bd31844d7f13d41412480ea265ac9e8079c", - "https://bcr.bazel.build/modules/stardoc/0.7.1/MODULE.bazel": "3548faea4ee5dda5580f9af150e79d0f6aea934fc60c1cc50f4efdd9420759e7", "https://bcr.bazel.build/modules/stardoc/0.7.2/MODULE.bazel": "fc152419aa2ea0f51c29583fab1e8c99ddefd5b3778421845606ee628629e0e5", "https://bcr.bazel.build/modules/stardoc/0.7.2/source.json": "58b029e5e901d6802967754adf0a9056747e8176f017cfe3607c0851f4d42216", "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/MODULE.bazel": "5e463fbfba7b1701d957555ed45097d7f984211330106ccd1352c6e0af0dcf91", - "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/source.json": "32bd87e5f4d7acc57c5b2ff7c325ae3061d5e242c0c4c214ae87e0f1c13e54cb", + "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.2/MODULE.bazel": "75aab2373a4bbe2a1260b9bf2a1ebbdbf872d3bd36f80bff058dccd82e89422f", + "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.2/source.json": "5fba48bbe0ba48761f9e9f75f92876cafb5d07c0ce059cc7a8027416de94a05b", "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9", "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/MODULE.bazel": "eec517b5bbe5492629466e11dae908d043364302283de25581e3eb944326c4ca", "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/source.json": "22bc55c47af97246cfc093d0acf683a7869377de362b5d1c552c2c2e16b7a806", @@ -190,11 +211,11 @@ "moduleExtensions": { "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": { "general": { - "bzlTransitiveDigest": "rL/34P1aFDq2GqVC2zCFgQ8nTuOC6ziogocpvG50Qz8=", + "bzlTransitiveDigest": "ABI1D/sbS1ovwaW/kHDoj8nnXjQ0oKU9fzmzEG4iT8o=", "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=", - "recordedFileInputs": {}, - "recordedDirentsInputs": {}, - "envVariables": {}, + "recordedInputs": [ + "REPO_MAPPING:rules_kotlin+,bazel_tools bazel_tools" + ], "generatedRepoSpecs": { "com_github_jetbrains_kotlin_git": { "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_compiler_git_repository", @@ -242,23 +263,31 @@ ] } } - }, - "recordedRepoMappingEntries": [ - [ - "rules_kotlin+", - "bazel_tools", - "bazel_tools" - ] - ] + } } }, "@@rules_python+//python/extensions:config.bzl%config": { "general": { - "bzlTransitiveDigest": "W97kKxM+lW7l/kO0rQa7Jm31CA1j+W1bNHGKjwX5xMg=", + "bzlTransitiveDigest": "2hLgIvNVTLgxus0ZuXtleBe70intCfo0cHs8qvt6cdM=", "usagesDigest": "ZVSXMAGpD+xzVNPuvF1IoLBkty7TROO0+akMapt1pAg=", - "recordedFileInputs": {}, - "recordedDirentsInputs": {}, - "envVariables": {}, + "recordedInputs": [ + "REPO_MAPPING:rules_python+,bazel_tools bazel_tools", + "REPO_MAPPING:rules_python+,pypi__build rules_python++config+pypi__build", + "REPO_MAPPING:rules_python+,pypi__click rules_python++config+pypi__click", + "REPO_MAPPING:rules_python+,pypi__colorama rules_python++config+pypi__colorama", + "REPO_MAPPING:rules_python+,pypi__importlib_metadata rules_python++config+pypi__importlib_metadata", + "REPO_MAPPING:rules_python+,pypi__installer rules_python++config+pypi__installer", + "REPO_MAPPING:rules_python+,pypi__more_itertools rules_python++config+pypi__more_itertools", + "REPO_MAPPING:rules_python+,pypi__packaging rules_python++config+pypi__packaging", + "REPO_MAPPING:rules_python+,pypi__pep517 rules_python++config+pypi__pep517", + "REPO_MAPPING:rules_python+,pypi__pip rules_python++config+pypi__pip", + "REPO_MAPPING:rules_python+,pypi__pip_tools rules_python++config+pypi__pip_tools", + "REPO_MAPPING:rules_python+,pypi__pyproject_hooks rules_python++config+pypi__pyproject_hooks", + "REPO_MAPPING:rules_python+,pypi__setuptools rules_python++config+pypi__setuptools", + "REPO_MAPPING:rules_python+,pypi__tomli rules_python++config+pypi__tomli", + "REPO_MAPPING:rules_python+,pypi__wheel rules_python++config+pypi__wheel", + "REPO_MAPPING:rules_python+,pypi__zipp rules_python++config+pypi__zipp" + ], "generatedRepoSpecs": { "rules_python_internal": { "repoRuleId": "@@rules_python+//python/private:internal_config_repo.bzl%internal_config_repo", @@ -402,98 +431,17 @@ "build_file_content": "package(default_visibility = [\"//visibility:public\"])\n\nload(\"@rules_python//python:py_library.bzl\", \"py_library\")\n\npy_library(\n name = \"lib\",\n srcs = glob([\"**/*.py\"]),\n data = glob([\"**/*\"], exclude=[\n # These entries include those put into user-installed dependencies by\n # data_exclude to avoid non-determinism.\n \"**/*.py\",\n \"**/*.pyc\",\n \"**/*.pyc.*\", # During pyc creation, temp files named *.pyc.NNN are created\n \"**/*.dist-info/RECORD\",\n \"BUILD\",\n \"WORKSPACE\",\n ]),\n # This makes this directory a top-level in the python import\n # search path for anything that depends on this.\n imports = [\".\"],\n)\n" } } - }, - "recordedRepoMappingEntries": [ - [ - "rules_python+", - "bazel_tools", - "bazel_tools" - ], - [ - "rules_python+", - "pypi__build", - "rules_python++config+pypi__build" - ], - [ - "rules_python+", - "pypi__click", - "rules_python++config+pypi__click" - ], - [ - "rules_python+", - "pypi__colorama", - "rules_python++config+pypi__colorama" - ], - [ - "rules_python+", - "pypi__importlib_metadata", - "rules_python++config+pypi__importlib_metadata" - ], - [ - "rules_python+", - "pypi__installer", - "rules_python++config+pypi__installer" - ], - [ - "rules_python+", - "pypi__more_itertools", - "rules_python++config+pypi__more_itertools" - ], - [ - "rules_python+", - "pypi__packaging", - "rules_python++config+pypi__packaging" - ], - [ - "rules_python+", - "pypi__pep517", - "rules_python++config+pypi__pep517" - ], - [ - "rules_python+", - "pypi__pip", - "rules_python++config+pypi__pip" - ], - [ - "rules_python+", - "pypi__pip_tools", - "rules_python++config+pypi__pip_tools" - ], - [ - "rules_python+", - "pypi__pyproject_hooks", - "rules_python++config+pypi__pyproject_hooks" - ], - [ - "rules_python+", - "pypi__setuptools", - "rules_python++config+pypi__setuptools" - ], - [ - "rules_python+", - "pypi__tomli", - "rules_python++config+pypi__tomli" - ], - [ - "rules_python+", - "pypi__wheel", - "rules_python++config+pypi__wheel" - ], - [ - "rules_python+", - "pypi__zipp", - "rules_python++config+pypi__zipp" - ] - ] + } } }, "@@rules_python+//python/uv:uv.bzl%uv": { "general": { - "bzlTransitiveDigest": "zyNsrbgVKwpA0B3zI84imAfuC424VSzYNPgjr/HJy5M=", + "bzlTransitiveDigest": "ijW9KS7qsIY+yBVvJ+Nr1mzwQox09j13DnE3iIwaeTM=", "usagesDigest": "H8dQoNZcoqP+Mu0tHZTi4KHATzvNkM5ePuEqoQdklIU=", - "recordedFileInputs": {}, - "recordedDirentsInputs": {}, - "envVariables": {}, + "recordedInputs": [ + "REPO_MAPPING:rules_python+,bazel_tools bazel_tools", + "REPO_MAPPING:rules_python+,platforms platforms" + ], "generatedRepoSpecs": { "uv": { "repoRuleId": "@@rules_python+//python/uv/private:uv_toolchains_repo.bzl%uv_toolchains_repo", @@ -513,19 +461,7 @@ "toolchain_target_settings": {} } } - }, - "recordedRepoMappingEntries": [ - [ - "rules_python+", - "bazel_tools", - "bazel_tools" - ], - [ - "rules_python+", - "platforms", - "platforms" - ] - ] + } } } }, diff --git a/README.md b/README.md index cca1447..a75b12f 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,12 @@ and high bandwidth shared memory buffers, kind of like they are going faster than light (not really, of course). If they go between computers, they are transported over the network at sub-light speed. +## Acknowledgments + +Some of the code in this project was contributed by Cruise LLC. + +## Features + It has the following features: 1. Single threaded coroutine based server process written in C++17 @@ -24,30 +30,33 @@ It has the following features: 1. Shared and weak pointers for message references. 1. Ports to MacOS and Linux, ARM64 and x86_64. 1. Builds using Bazel and uses Abseil and Protocol Buffers from Google. -1. Uses my C++ coroutine library (https://github.com/dallison/cocpp) +1. Uses my C++ coroutine library (https://github.com/dallison/co) See the file docs/subspace.pdf for full documentation. # Building + +Subspace can be built using either Bazel or CMake. Both build systems will automatically download and build all required dependencies. + +## Building with Bazel + This uses Google's Bazel to build. You will need to download Bazel to build it. The build also needs some external libraries, but Bazel takes care of downloading them. The *.bazelrc* file contains some configuration options. -## To build on Mac Apple Silicon +### To build on Mac Apple Silicon ``` bazel build --config=apple_silicon ... ``` -## To build on Linux -Subspace really wants to be built using *clang*. Depending on how your OS is configured, you +### To build on Linux +Subspace really wants to be built using *clang* but modern *GCC* versions work well too. Depending on how your OS is configured, you might need to tell bazel what compiler to use. ``` CC=clang bazel build ... ``` -It does build with *g++* but you will get some compiler warnings about different signed comparisons -that clang doesn't care about. ### Example: Ubuntu 20.04 Build a minimal set of binaries: @@ -62,6 +71,166 @@ Then run each in a separate terminal: * `./bazel-bin/manual_tests/sub` * `./bazel-bin/manual_tests/pub` +### Running Tests with Bazel + +You can run tests directly using `bazel run` or `bazel test`. The `bazel run` command will build and execute the test in one step, while `bazel test` runs tests in test mode (useful for CI/CD). + +**Note:** All tests automatically start a subspace server in a separate thread, so you don't need to run the server separately. The tests handle server lifecycle management internally. + +#### client_test + +The `client_test` is a comprehensive test suite that validates the core client functionality including publishers, subscribers, reliable/unreliable channels, message reading modes, and more. + +```bash +# Run the test +bazel run //client:client_test + +# Or run as a test (better for CI) +bazel test //client:client_test +``` + +#### latency_test + +The `latency_test` measures message transmission latency between publishers and subscribers. This is useful for benchmarking performance. + +```bash +# Run the latency test +bazel run //client:latency_test + +# Run with custom options (if supported) +bazel run //client:latency_test -- --help +``` + +#### stress_test + +The `stress_test` performs stress testing with high message rates and multiple publishers/subscribers to verify system stability under load. + +```bash +# Run the stress test (may take a while) +bazel run //client:stress_test + +# Or run as a test +bazel test //client:stress_test +``` + +#### Running All Tests + +To run all tests at once: + +```bash +# Run all tests +bazel test //... + +# Run all tests in a specific directory +bazel test //client/... +bazel test //common/... +``` + +## Building with CMake + +Subspace also supports building with CMake (version 3.15 or later). CMake uses FetchContent to automatically download and build all dependencies including Abseil, Protobuf, Googletest, cpp_toolbelt, and co. + +### Prerequisites + +- CMake 3.15 or later +- C++17 compatible compiler (clang or g++) +- Git (for fetching dependencies) + +### Basic Build + +```bash +mkdir build +cd build +cmake .. +make +``` + +### Build Options + +You can customize the build with CMake options: + +```bash +cmake -DCMAKE_BUILD_TYPE=Release .. +make -j$(nproc) +``` + +### Running Tests + +After building, you can run the tests: + +```bash +cd build +ctest +``` + +Or run individual tests: + +```bash +./client/client_test +./common/common_test +``` + +### Example: Building and Running + +```bash +# Configure and build +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Release .. +make -j$(nproc) + +# Run the server in one terminal +./server/subspace_server + +# Run publisher/subscriber examples in other terminals +./client/latency_test +./client/stress_test +``` + +### CMake Integration in Your Project + +To use Subspace in your CMake project, you can add it as a subdirectory: + +```cmake +# In your CMakeLists.txt +add_subdirectory(subspace) +target_link_libraries(your_target + subspace_client + subspace_common + subspace_proto +) +``` + +Or use FetchContent: + +```cmake +include(FetchContent) +FetchContent_Declare( + subspace + GIT_REPOSITORY https://github.com/dallison/subspace.git + GIT_TAG main # or specific tag/commit +) +FetchContent_MakeAvailable(subspace) + +target_link_libraries(your_target + subspace_client + subspace_common + subspace_proto +) +``` + +### CMake Build Targets + +The CMake build provides the following targets: + +- `subspace_client` - Client library +- `subspace_common` - Common utilities library +- `subspace_proto` - Protocol buffer definitions library +- `libserver` - Server library +- `subspace_server` - Server executable +- `client_test`, `latency_test`, `stress_test` - Test executables +- `common_test` - Common library tests +- `c_client_test` - C client tests + # Bazel WORKSPACE Add this to your Bazel WORKSPACE file to get access to this library without downloading it manually. @@ -77,3 +246,1093 @@ http_archive( You can also add a sha256 field to ensure a canonical build if you like. Bazel will tell you what to put in for the hash when you first build it. +# Using Subspace + +## Overview + +Subspace provides a high-performance, shared-memory based publish/subscribe IPC system. Messages are transmitted through POSIX shared memory with sub-microsecond latency. The system supports both reliable and unreliable message delivery, allowing you to choose the appropriate semantics for your use case. + +## Client API + +### Creating a Client + +The `Client` class is the main entry point for using Subspace. You can create a client in two ways: + +**Method 1: Using `Create()` (recommended)** +```cpp +#include "client/client.h" + +auto client_or = subspace::Client::Create("/tmp/subspace", "my_client"); +if (!client_or.ok()) { + // Handle error + return; +} +auto client = client_or.value(); +``` + +**Method 2: Constructor + Init()** +```cpp +subspace::Client client; +auto status = client.Init("/tmp/subspace", "my_client"); +if (!status.ok()) { + // Handle error + return; +} +``` + +**Parameters:** +- `server_socket` (default: `"/tmp/subspace"`): Path to the Unix domain socket where the Subspace server is listening +- `client_name` (default: `""`): Optional name for this client instance +- `c` (optional): Pointer to a coroutine if using coroutine-aware mode + +### Client Methods + +```cpp +class Client { +public: + // Initialize the client by connecting to the server + absl::Status Init(const std::string &server_socket = "/tmp/subspace", + const std::string &client_name = ""); + + // Create a publisher for a channel + absl::StatusOr + CreatePublisher(const std::string &channel_name, + int slot_size, + int num_slots, + const PublisherOptions &opts = PublisherOptions()); + + // Create a publisher with options specifying slot size and count + absl::StatusOr + CreatePublisher(const std::string &channel_name, + const PublisherOptions &opts = PublisherOptions()); + + // Create a subscriber for a channel + absl::StatusOr + CreateSubscriber(const std::string &channel_name, + const SubscriberOptions &opts = SubscriberOptions()); + + // Get information about channels + absl::StatusOr GetChannelInfo(const std::string &channelName); + absl::StatusOr> GetChannelInfo(); + + absl::StatusOr GetChannelStats(const std::string &channelName); + absl::StatusOr ChannelExists(const std::string &channelName); + + // Enable/disable debug output + void SetDebug(bool v); + + // Enable/disable thread-safe mode + void SetThreadSafe(bool v); +}; +``` + +## Publisher API + +### Creating a Publisher + +Publishers send messages to channels. You can create a publisher in two ways: + +**Method 1: Explicit slot size and count** +```cpp +auto pub_or = client->CreatePublisher("my_channel", 1024, 10); +if (!pub_or.ok()) { + // Handle error + return; +} +auto pub = pub_or.value(); +``` + +**Method 2: Using PublisherOptions** +```cpp +auto pub_or = client->CreatePublisher("my_channel", + subspace::PublisherOptions() + .SetSlotSize(1024) + .SetNumSlots(10) + .SetReliable(true)); +``` + +### Publishing Messages + +```cpp +// Get a message buffer +auto buffer_or = pub.GetMessageBuffer(1024); +if (!buffer_or.ok()) { + // Handle error (e.g., no free slots for reliable publisher) + return; +} +void* buffer = buffer_or.value(); + +// Fill in your message data +MyMessageType* msg = reinterpret_cast(buffer); +msg->field1 = 42; +msg->field2 = "hello"; + +// Publish the message +auto msg_info_or = pub.PublishMessage(sizeof(MyMessageType)); +if (!msg_info_or.ok()) { + // Handle error + return; +} +auto msg_info = msg_info_or.value(); +// msg_info.ordinal contains the message sequence number +// msg_info.timestamp contains the publish timestamp +``` + +**Using GetMessageBufferSpan (C++17 style):** +```cpp +auto span_or = pub.GetMessageBufferSpan(1024); +if (!span_or.ok() || span_or.value().empty()) { + // Handle error + return; +} +auto span = span_or.value(); +// span is an absl::Span +MyMessageType* msg = reinterpret_cast(span.data()); +// ... fill message ... +pub.PublishMessage(sizeof(MyMessageType)); +``` + +### Publisher Methods + +```cpp +class Publisher { +public: + // Get a message buffer for writing + absl::StatusOr GetMessageBuffer(int32_t max_size = -1, bool lock = true); + absl::StatusOr> GetMessageBufferSpan(int32_t max_size = -1, bool lock = true); + + // Publish a message + absl::StatusOr PublishMessage(int64_t message_size); + + // Cancel a publish (releases lock in thread-safe mode) + void CancelPublish(); + + // Wait for a reliable publisher to have a free slot + absl::Status Wait(const co::Coroutine *c = nullptr); + absl::Status Wait(std::chrono::nanoseconds timeout, const co::Coroutine *c = nullptr); + absl::StatusOr Wait(const toolbelt::FileDescriptor &fd, const co::Coroutine *c = nullptr); + + // Get file descriptor for polling + struct pollfd GetPollFd() const; + toolbelt::FileDescriptor GetFileDescriptor() const; + const toolbelt::FileDescriptor& GetRetirementFd() const; + + // Channel information + std::string Name() const; + std::string Type() const; + bool IsReliable() const; + bool IsLocal() const; + bool IsFixedSize() const; + int32_t SlotSize() const; + int32_t NumSlots() const; + + // Statistics + void GetStatsCounters(uint64_t &total_bytes, uint64_t &total_messages, + uint32_t &max_message_size, uint32_t &total_drops); + + // Resize callback registration + absl::Status RegisterResizeCallback( + std::function callback); +}; +``` + +### Reliable Publisher Example + +```cpp +// Create a reliable publisher +auto pub_or = client->CreatePublisher("reliable_channel", 256, 5, + subspace::PublisherOptions().SetReliable(true)); + +auto pub = pub_or.value(); + +while (true) { + // Wait for a free slot (blocks until available) + auto status = pub.Wait(); + if (!status.ok()) { + // Handle error + break; + } + + // Get message buffer + auto buffer_or = pub.GetMessageBuffer(256); + if (!buffer_or.ok()) { + continue; // Should not happen after Wait() + } + + // Fill and publish + MyMessage* msg = reinterpret_cast(buffer_or.value()); + msg->data = compute_data(); + pub.PublishMessage(sizeof(MyMessage)); +} +``` + +## Subscriber API + +### Creating a Subscriber + +```cpp +auto sub_or = client->CreateSubscriber("my_channel"); +if (!sub_or.ok()) { + // Handle error + return; +} +auto sub = sub_or.value(); +``` + +### Reading Messages + +**Method 1: Read next message** +```cpp +auto msg_or = sub.ReadMessage(subspace::ReadMode::kReadNext); +if (!msg_or.ok()) { + // Handle error + return; +} +auto msg = msg_or.value(); +if (msg.length == 0) { + // No message available + return; +} +// msg.buffer points to the message data +// msg.length is the message size in bytes +// msg.ordinal is the sequence number +// msg.timestamp is the publish timestamp +const MyMessageType* data = reinterpret_cast(msg.buffer); +``` + +**Method 2: Read newest message** +```cpp +auto msg_or = sub.ReadMessage(subspace::ReadMode::kReadNewest); +// This skips to the most recent message, discarding older ones +``` + +**Method 3: Typed read (returns shared_ptr)** +```cpp +auto msg_ptr_or = sub.ReadMessage(); +if (!msg_ptr_or.ok() || !msg_ptr_or.value()) { + // No message or error + return; +} +auto msg_ptr = msg_ptr_or.value(); +// msg_ptr is a subspace::shared_ptr +// Access data: msg_ptr->field1, (*msg_ptr).field2 +// Message is automatically released when msg_ptr goes out of scope +``` + +### Waiting for Messages + +```cpp +// Wait indefinitely +auto status = sub.Wait(); +if (!status.ok()) { + // Handle error + return; +} + +// Wait with timeout +auto status = sub.Wait(std::chrono::milliseconds(100)); +if (status.code() == absl::StatusCode::kDeadlineExceeded) { + // Timeout +} + +// Wait with file descriptor (for integration with event loops) +toolbelt::FileDescriptor fd = /* your fd */; +auto fd_or = sub.Wait(fd); +if (fd_or.ok()) { + int triggered_fd = fd_or.value(); + // Process message +} +``` + +### Subscriber Methods + +```cpp +class Subscriber { +public: + // Read messages + absl::StatusOr ReadMessage(ReadMode mode = ReadMode::kReadNext); + template + absl::StatusOr> ReadMessage(ReadMode mode = ReadMode::kReadNext); + + // Find message by timestamp + absl::StatusOr FindMessage(uint64_t timestamp); + template + absl::StatusOr> FindMessage(uint64_t timestamp); + + // Wait for messages + absl::Status Wait(const co::Coroutine *c = nullptr); + absl::Status Wait(std::chrono::nanoseconds timeout, const co::Coroutine *c = nullptr); + absl::StatusOr Wait(const toolbelt::FileDescriptor &fd, const co::Coroutine *c = nullptr); + + // Get file descriptor for polling + struct pollfd GetPollFd() const; + toolbelt::FileDescriptor GetFileDescriptor() const; + + // Channel information + std::string Name() const; + std::string Type() const; + bool IsReliable() const; + int32_t SlotSize() const; + int32_t NumSlots() const; + int64_t GetCurrentOrdinal() const; + + // Callbacks + absl::Status RegisterDroppedMessageCallback( + std::function callback); + absl::Status RegisterMessageCallback( + std::function callback); + absl::Status ProcessAllMessages(ReadMode mode = ReadMode::kReadNext); + + // Statistics + const ChannelCounters& GetChannelCounters(); + int NumActiveMessages() const; +}; +``` + +### Subscriber Example with Callbacks + +```cpp +auto sub_or = client->CreateSubscriber("my_channel", + subspace::SubscriberOptions().SetReliable(true)); + +auto sub = sub_or.value(); + +// Register callback for dropped messages +sub.RegisterDroppedMessageCallback([](subspace::Subscriber* sub, int64_t count) { + std::cerr << "Dropped " << count << " messages on " << sub->Name() << std::endl; +}); + +// Register callback for received messages +sub.RegisterMessageCallback([](subspace::Subscriber* sub, subspace::Message msg) { + if (msg.length > 0) { + process_message(msg); + } +}); + +// In your event loop +while (true) { + // Process all available messages + sub.ProcessAllMessages(); + + // Or wait and read manually + sub.Wait(); + auto msg = sub.ReadMessage(); + if (msg.ok() && msg->length > 0) { + process_message(*msg); + } +} +``` + +## Reliable vs Unreliable Channels + +### Reliable Channels + +Reliable channels guarantee that **reliable subscribers** will never miss a message from **reliable publishers**. This is achieved through reference counting: a reliable publisher cannot reuse a slot until all reliable subscribers have released it. + +**Characteristics:** +- Messages are never dropped for reliable subscribers +- Publishers may block if all slots are in use +- Higher memory usage (slots held until all subscribers release) +- Use `Wait()` to block until a slot is available + +**When to use:** +- Critical data that must not be lost +- Control messages +- State synchronization +- Any scenario where message loss is unacceptable + +**Example:** +```cpp +// Reliable publisher +auto pub = client->CreatePublisher("control", 128, 10, + subspace::PublisherOptions().SetReliable(true)).value(); + +// Reliable subscriber +auto sub = client->CreateSubscriber("control", + subspace::SubscriberOptions().SetReliable(true)).value(); +``` + +### Unreliable Channels + +Unreliable channels provide best-effort delivery with no guarantees. If a subscriber cannot keep up, messages may be dropped. This provides the lowest latency and highest throughput. + +**Characteristics:** +- Messages may be dropped if subscriber is slow +- Publishers never block (always get a slot immediately) +- Lower memory usage +- Highest performance + +**When to use:** +- High-frequency sensor data where occasional loss is acceptable +- Video/audio streaming +- Telemetry data +- Any scenario where latency is more important than reliability + +**Example:** +```cpp +// Unreliable publisher (default) +auto pub = client->CreatePublisher("sensor_data", 64, 100).value(); + +// Unreliable subscriber (default) +auto sub = client->CreateSubscriber("sensor_data").value(); +``` + +### Mixed Reliability + +You can mix reliable and unreliable publishers/subscribers on the same channel: +- **Reliable subscriber + Reliable publisher**: Guaranteed delivery +- **Reliable subscriber + Unreliable publisher**: Best effort (may drop) +- **Unreliable subscriber + Reliable publisher**: May drop if slow +- **Unreliable subscriber + Unreliable publisher**: Best effort, may drop + +## PublisherOptions + +The `PublisherOptions` struct configures publisher behavior. You can use it in two ways: + +### Method 1: Chained Setters (Fluent API) + +```cpp +auto opts = subspace::PublisherOptions() + .SetSlotSize(1024) + .SetNumSlots(10) + .SetReliable(true) + .SetLocal(false) + .SetType("MyMessageType") + .SetFixedSize(false) + .SetChecksum(true); + +auto pub = client->CreatePublisher("channel", opts).value(); +``` + +### Method 2: Designated Initializer (C++20) + +```cpp +auto pub = client->CreatePublisher("channel", + subspace::PublisherOptions{ + .slot_size = 1024, + .num_slots = 10, + .reliable = true, + .local = false, + .type = "MyMessageType", + .fixed_size = false, + .checksum = true + }).value(); +``` + +### PublisherOptions Fields and Methods + +| Field/Method | Type | Default | Description | +|--------------|------|---------|-------------| +| `slot_size` / `SetSlotSize()` | `int32_t` | `0` | Size of each message slot in bytes. Must be set if using options-only CreatePublisher. | +| `num_slots` / `SetNumSlots()` | `int32_t` | `0` | Number of slots in the channel. Must be set if using options-only CreatePublisher. | +| `reliable` / `SetReliable()` | `bool` | `false` | If true, reliable delivery (see Reliable Channels section). | +| `local` / `SetLocal()` | `bool` | `false` | If true, messages are only visible on the local machine (not bridged). | +| `type` / `SetType()` | `std::string` | `""` | User-defined message type identifier. All publishers/subscribers must use the same type. | +| `fixed_size` / `SetFixedSize()` | `bool` | `false` | If true, prevents automatic resizing of slots. | +| `bridge` / `SetBridge()` | `bool` | `false` | Internal: marks this as a bridge publisher. | +| `mux` / `SetMux()` | `std::string` | `""` | Multiplexer name for virtual channels. | +| `vchan_id` / `SetVchanId()` | `int` | `-1` | Virtual channel ID (-1 for server-assigned). | +| `activate` / `SetActivate()` | `bool` | `false` | If true, channel is activated even if unreliable. | +| `notify_retirement` / `SetNotifyRetirement()` | `bool` | `false` | If true, notify when slots are retired. | +| `checksum` / `SetChecksum()` | `bool` | `false` | If true, calculate checksums for all messages. | + +**Getter Methods:** +- `int32_t SlotSize() const` +- `int32_t NumSlots() const` +- `bool IsReliable() const` +- `bool IsLocal() const` +- `bool IsFixedSize() const` +- `const std::string& Type() const` +- `bool IsBridge() const` +- `const std::string& Mux() const` +- `int VchanId() const` +- `bool Activate() const` +- `bool NotifyRetirement() const` +- `bool Checksum() const` + +**Example: Creating a reliable publisher with checksums** +```cpp +auto pub = client->CreatePublisher("secure_channel", 512, 20, + subspace::PublisherOptions() + .SetReliable(true) + .SetChecksum(true) + .SetType("SecureMessage")).value(); +``` + +## SubscriberOptions + +The `SubscriberOptions` struct configures subscriber behavior. Like `PublisherOptions`, it supports both chained setters and designated initializers. + +### Method 1: Chained Setters + +```cpp +auto opts = subspace::SubscriberOptions() + .SetReliable(true) + .SetType("MyMessageType") + .SetMaxActiveMessages(10) + .SetChecksum(true) + .SetPassChecksumErrors(false); + +auto sub = client->CreateSubscriber("channel", opts).value(); +``` + +### Method 2: Designated Initializer + +```cpp +auto sub = client->CreateSubscriber("channel", + subspace::SubscriberOptions{ + .reliable = true, + .type = "MyMessageType", + .max_active_messages = 10, + .checksum = true, + .pass_checksum_errors = false + }).value(); +``` + +### SubscriberOptions Fields and Methods + +| Field/Method | Type | Default | Description | +|--------------|------|---------|-------------| +| `reliable` / `SetReliable()` | `bool` | `false` | If true, reliable delivery (see Reliable Channels section). | +| `type` / `SetType()` | `std::string` | `""` | User-defined message type identifier. Must match publisher type. | +| `max_active_messages` / `SetMaxActiveMessages()` | `int` | `1` | Maximum number of active messages (shared_ptrs) that can be held simultaneously. | +| `max_active_messages` / `SetMaxSharedPtrs()` | `int` | `0` | Alias: sets max_active_messages to n+1. | +| `log_dropped_messages` / `SetLogDroppedMessages()` | `bool` | `true` | If true, log when messages are dropped. | +| `bridge` / `SetBridge()` | `bool` | `false` | Internal: marks this as a bridge subscriber. | +| `mux` / `SetMux()` | `std::string` | `""` | Multiplexer name for virtual channels. | +| `vchan_id` / `SetVchanId()` | `int` | `-1` | Virtual channel ID (-1 for server-assigned). | +| `pass_activation` / `SetPassActivation()` | `bool` | `false` | If true, activation messages are passed to the user. | +| `read_write` / `SetReadWrite()` | `bool` | `false` | If true, map buffers as read-write instead of read-only. | +| `checksum` / `SetChecksum()` | `bool` | `false` | If true, verify checksums on received messages. | +| `pass_checksum_errors` / `SetPassChecksumErrors()` | `bool` | `false` | If true, pass messages with checksum errors (with flag set). If false, return error. | + +**Getter Methods:** +- `bool IsReliable() const` +- `const std::string& Type() const` +- `int MaxActiveMessages() const` +- `int MaxSharedPtrs() const` +- `bool LogDroppedMessages() const` +- `bool IsBridge() const` +- `const std::string& Mux() const` +- `int VchanId() const` +- `bool PassActivation() const` +- `bool ReadWrite() const` +- `bool Checksum() const` +- `bool PassChecksumErrors() const` + +**Example: Creating a reliable subscriber with checksum verification** +```cpp +auto sub = client->CreateSubscriber("secure_channel", + subspace::SubscriberOptions() + .SetReliable(true) + .SetChecksum(true) + .SetPassChecksumErrors(false) // Return error on checksum failure + .SetType("SecureMessage") + .SetMaxActiveMessages(5)).value(); +``` + +## Complete Example + +Here's a complete example showing publisher and subscriber: + +```cpp +#include "client/client.h" +#include + +struct SensorData { + double temperature; + double pressure; + uint64_t timestamp; +}; + +int main() { + // Create client + auto client_or = subspace::Client::Create("/tmp/subspace", "sensor_app"); + if (!client_or.ok()) { + std::cerr << "Failed to create client: " << client_or.status() << std::endl; + return 1; + } + auto client = client_or.value(); + + // Create reliable publisher + auto pub_or = client->CreatePublisher("sensors", sizeof(SensorData), 10, + subspace::PublisherOptions() + .SetReliable(true) + .SetType("SensorData")); + if (!pub_or.ok()) { + std::cerr << "Failed to create publisher: " << pub_or.status() << std::endl; + return 1; + } + auto pub = pub_or.value(); + + // Create reliable subscriber + auto sub_or = client->CreateSubscriber("sensors", + subspace::SubscriberOptions() + .SetReliable(true) + .SetType("SensorData")); + if (!sub_or.ok()) { + std::cerr << "Failed to create subscriber: " << sub_or.status() << std::endl; + return 1; + } + auto sub = sub_or.value(); + + // Publisher loop + for (int i = 0; i < 100; ++i) { + // Wait for free slot + pub.Wait(); + + // Get buffer + auto buffer_or = pub.GetMessageBuffer(sizeof(SensorData)); + if (!buffer_or.ok()) continue; + + // Fill message + SensorData* data = reinterpret_cast(buffer_or.value()); + data->temperature = 20.0 + i * 0.1; + data->pressure = 1013.25; + data->timestamp = std::chrono::steady_clock::now().time_since_epoch().count(); + + // Publish + auto msg_or = pub.PublishMessage(sizeof(SensorData)); + if (msg_or.ok()) { + std::cout << "Published message " << msg_or->ordinal << std::endl; + } + } + + // Subscriber loop + for (int i = 0; i < 100; ++i) { + // Wait for message + sub.Wait(); + + // Read message + auto msg_or = sub.ReadMessage(); + if (!msg_or.ok() || !msg_or.value()) { + continue; + } + + auto msg = msg_or.value(); + std::cout << "Received: temp=" << msg->temperature + << ", pressure=" << msg->pressure + << ", ordinal=" << msg.GetMessage().ordinal << std::endl; + } + + return 0; +} +``` + +## C Client Interface + +Subspace provides a C API (`c_client/subspace.h`) for applications that need to use Subspace from C code or integrate it into other language bindings. The C API is simpler and has fewer dependencies than the C++ API, making it easier to integrate into projects that don't use C++. + +### Error Handling + +The C API uses a thread-local error mechanism similar to `errno`. Most functions return a boolean indicating success (`true`) or failure (`false`). When a function fails, you can check for errors and retrieve the error message: + +```c +#include "c_client/subspace.h" + +// Check if there was an error +if (subspace_has_error()) { + // Get the error message + char* error = subspace_get_last_error(); + fprintf(stderr, "Error: %s\n", error); +} +``` + +The error message is a static string owned by the library and is thread-local (one error message per thread). + +### Creating a Client + +```c +// Create client with default socket ("/tmp/subspace") and no name +SubspaceClient client = subspace_create_client(); + +// Create client with custom socket +SubspaceClient client = subspace_create_client_with_socket("/tmp/my_subspace"); + +// Create client with socket and name +SubspaceClient client = subspace_create_client_with_socket_and_name( + "/tmp/subspace", "my_client_name"); + +// Check if client was created successfully +if (client.client == NULL) { + fprintf(stderr, "Failed to create client: %s\n", subspace_get_last_error()); + return 1; +} + +// Clean up when done +subspace_remove_client(&client); +``` + +### Creating Publishers and Subscribers + +**Publisher Options:** + +```c +// Create default publisher options +SubspacePublisherOptions pub_opts = subspace_publisher_options_default(1024, 10); +// pub_opts.slot_size = 1024 +// pub_opts.num_slots = 10 +// pub_opts.reliable = false +// pub_opts.fixed_size = false +// pub_opts.activate = false + +// Customize options +pub_opts.reliable = true; +pub_opts.fixed_size = false; +pub_opts.type.type = "MyMessageType"; +pub_opts.type.type_length = strlen(pub_opts.type.type); + +// Create publisher +SubspacePublisher pub = subspace_create_publisher(client, "my_channel", pub_opts); +if (pub.publisher == NULL) { + fprintf(stderr, "Failed to create publisher: %s\n", subspace_get_last_error()); + return 1; +} +``` + +**Subscriber Options:** + +```c +// Create default subscriber options +SubspaceSubscriberOptions sub_opts = subspace_subscriber_options_default(); +// sub_opts.reliable = false +// sub_opts.max_active_messages = 1 +// sub_opts.pass_activation = false +// sub_opts.log_dropped_messages = false + +// Customize options +sub_opts.reliable = true; +sub_opts.max_active_messages = 10; +sub_opts.type.type = "MyMessageType"; +sub_opts.type.type_length = strlen(sub_opts.type.type); + +// Create subscriber +SubspaceSubscriber sub = subspace_create_subscriber(client, "my_channel", sub_opts); +if (sub.subscriber == NULL) { + fprintf(stderr, "Failed to create subscriber: %s\n", subspace_get_last_error()); + return 1; +} +``` + +### Publishing Messages + +```c +// Get a message buffer +SubspaceMessageBuffer buffer = subspace_get_message_buffer(pub, 1024); +if (buffer.buffer == NULL) { + // For reliable publishers, you may need to wait + if (pub_opts.reliable) { + subspace_wait_for_publisher(pub); + buffer = subspace_get_message_buffer(pub, 1024); + } else { + fprintf(stderr, "Failed to get buffer: %s\n", subspace_get_last_error()); + return 1; + } +} + +// Fill in your message data +MyMessageType* msg = (MyMessageType*)buffer.buffer; +msg->field1 = 42; +msg->field2 = 3.14; + +// Publish the message +const SubspaceMessage pub_status = subspace_publish_message(pub, sizeof(MyMessageType)); +if (pub_status.length == 0) { + fprintf(stderr, "Failed to publish: %s\n", subspace_get_last_error()); + return 1; +} +// pub_status.ordinal contains the message sequence number +// pub_status.timestamp contains the publish timestamp +``` + +### Reading Messages + +```c +// Read next message +SubspaceMessage msg = subspace_read_message(sub); +if (msg.length == 0) { + // No message available + // For reliable subscribers, you may want to wait + if (sub_opts.reliable) { + subspace_wait_for_subscriber(sub); + msg = subspace_read_message(sub); + } +} + +if (msg.length > 0) { + // Process the message + const MyMessageType* data = (const MyMessageType*)msg.buffer; + printf("Received message ordinal: %lu\n", msg.ordinal); + printf("Message timestamp: %lu\n", msg.timestamp); + + // IMPORTANT: Free the message when done + subspace_free_message(&msg); +} + +// Read newest message (skips to most recent) +SubspaceMessage newest = subspace_read_message_with_mode(sub, kSubspaceReadNewest); +if (newest.length > 0) { + // Process message + subspace_free_message(&newest); +} +``` + +**Important:** You must call `subspace_free_message()` when done with a message. The `max_active_messages` option determines how many messages you can hold simultaneously. If you don't free messages, the subscriber will run out of slots and be unable to read more messages. + +### Waiting for Messages + +```c +// Wait indefinitely for a message +if (!subspace_wait_for_subscriber(sub)) { + fprintf(stderr, "Wait failed: %s\n", subspace_get_last_error()); + return 1; +} + +// Wait with file descriptor (for integration with event loops) +int fd = /* your file descriptor */; +int triggered_fd = subspace_wait_for_subscriber_with_fd(sub, fd); +if (triggered_fd < 0) { + fprintf(stderr, "Wait failed: %s\n", subspace_get_last_error()); + return 1; +} +``` + +### Using Poll/Epoll + +The C API provides file descriptors that can be used with `poll()`, `epoll()`, or other event notification mechanisms: + +```c +// Get pollfd structure for subscriber +struct pollfd pfd = subspace_get_subscriber_poll_fd(sub); +// pfd.fd is the file descriptor +// pfd.events should be set to POLLIN + +// Use in poll() call +int ret = poll(&pfd, 1, timeout_ms); +if (ret > 0 && (pfd.revents & POLLIN)) { + // Message available, read it + SubspaceMessage msg = subspace_read_message(sub); + // ... process message ... + subspace_free_message(&msg); +} + +// Or get the raw file descriptor +int fd = subspace_get_subscriber_fd(sub); +// Use fd with epoll, select, etc. +``` + +### Callbacks + +The C API supports callbacks for message reception and dropped messages: + +```c +// Message callback +void message_callback(SubspaceSubscriber sub, SubspaceMessage msg) { + if (msg.length > 0) { + printf("Received message of size %zu\n", msg.length); + // Process message + // IMPORTANT: Free the message when done + subspace_free_message(&msg); + } +} + +// Register callback +if (!subspace_register_subscriber_callback(sub, message_callback)) { + fprintf(stderr, "Failed to register callback: %s\n", subspace_get_last_error()); + return 1; +} + +// Process all available messages (calls the callback for each) +subspace_process_all_messages(sub); + +// Unregister callback +subspace_remove_subscriber_callback(sub); + +// Dropped message callback +void dropped_callback(SubspaceSubscriber sub, int64_t count) { + fprintf(stderr, "Dropped %ld messages\n", count); +} + +subspace_register_dropped_message_callback(sub, dropped_callback); +``` + +### Complete C Example + +```c +#include "c_client/subspace.h" +#include +#include + +struct SensorData { + double temperature; + double pressure; + uint64_t timestamp; +}; + +int main() { + // Create client + SubspaceClient client = subspace_create_client(); + if (client.client == NULL) { + fprintf(stderr, "Failed to create client: %s\n", subspace_get_last_error()); + return 1; + } + + // Create reliable publisher + SubspacePublisherOptions pub_opts = subspace_publisher_options_default( + sizeof(SensorData), 10); + pub_opts.reliable = true; + pub_opts.type.type = "SensorData"; + pub_opts.type.type_length = strlen(pub_opts.type.type); + + SubspacePublisher pub = subspace_create_publisher(client, "sensors", pub_opts); + if (pub.publisher == NULL) { + fprintf(stderr, "Failed to create publisher: %s\n", subspace_get_last_error()); + return 1; + } + + // Create reliable subscriber + SubspaceSubscriberOptions sub_opts = subspace_subscriber_options_default(); + sub_opts.reliable = true; + sub_opts.type.type = "SensorData"; + sub_opts.type.type_length = strlen(sub_opts.type.type); + + SubspaceSubscriber sub = subspace_create_subscriber(client, "sensors", sub_opts); + if (sub.subscriber == NULL) { + fprintf(stderr, "Failed to create subscriber: %s\n", subspace_get_last_error()); + return 1; + } + + // Publisher loop + for (int i = 0; i < 100; ++i) { + // Wait for free slot (reliable publisher) + subspace_wait_for_publisher(pub); + + // Get buffer + SubspaceMessageBuffer buffer = subspace_get_message_buffer(pub, sizeof(SensorData)); + if (buffer.buffer == NULL) { + continue; + } + + // Fill message + struct SensorData* data = (struct SensorData*)buffer.buffer; + data->temperature = 20.0 + i * 0.1; + data->pressure = 1013.25; + data->timestamp = /* get current time */; + + // Publish + const SubspaceMessage pub_status = subspace_publish_message(pub, sizeof(SensorData)); + if (pub_status.length > 0) { + printf("Published message %lu\n", pub_status.ordinal); + } + } + + // Subscriber loop + for (int i = 0; i < 100; ++i) { + // Wait for message + subspace_wait_for_subscriber(sub); + + // Read message + SubspaceMessage msg = subspace_read_message(sub); + if (msg.length > 0) { + const struct SensorData* data = (const struct SensorData*)msg.buffer; + printf("Received: temp=%.2f, pressure=%.2f, ordinal=%lu\n", + data->temperature, data->pressure, msg.ordinal); + subspace_free_message(&msg); + } + } + + // Cleanup + subspace_remove_subscriber(&sub); + subspace_remove_publisher(&pub); + subspace_remove_client(&client); + + return 0; +} +``` + +### C API Reference + +**Client Functions:** +- `SubspaceClient subspace_create_client(void)` +- `SubspaceClient subspace_create_client_with_socket(const char *socket_name)` +- `SubspaceClient subspace_create_client_with_socket_and_name(const char *socket_name, const char *client_name)` +- `bool subspace_remove_client(SubspaceClient *client)` + +**Publisher Functions:** +- `SubspacePublisherOptions subspace_publisher_options_default(int32_t slot_size, int num_slots)` +- `SubspacePublisher subspace_create_publisher(SubspaceClient client, const char *channel_name, SubspacePublisherOptions options)` +- `SubspaceMessageBuffer subspace_get_message_buffer(SubspacePublisher publisher, size_t max_size)` +- `const SubspaceMessage subspace_publish_message(SubspacePublisher publisher, size_t messageSize)` +- `bool subspace_wait_for_publisher(SubspacePublisher publisher)` +- `int subspace_wait_for_publisher_with_fd(SubspacePublisher publisher, int fd)` +- `struct pollfd subspace_get_publisher_poll_fd(SubspacePublisher publisher)` +- `int subspace_get_publisher_fd(SubspacePublisher publisher)` +- `bool subspace_register_resize_callback(SubspacePublisher publisher, bool (*callback)(SubspacePublisher, int32_t, int32_t))` +- `bool subspace_unregister_resize_callback(SubspacePublisher publisher)` +- `bool subspace_remove_publisher(SubspacePublisher *publisher)` + +**Subscriber Functions:** +- `SubspaceSubscriberOptions subspace_subscriber_options_default(void)` +- `SubspaceSubscriber subspace_create_subscriber(SubspaceClient client, const char *channel_name, SubspaceSubscriberOptions options)` +- `SubspaceMessage subspace_read_message(SubspaceSubscriber subscriber)` +- `SubspaceMessage subspace_read_message_with_mode(SubspaceSubscriber subscriber, SubspaceReadMode mode)` +- `bool subspace_free_message(SubspaceMessage *message)` +- `bool subspace_wait_for_subscriber(SubspaceSubscriber subscriber)` +- `int subspace_wait_for_subscriber_with_fd(SubspaceSubscriber subscriber, int fd)` +- `struct pollfd subspace_get_subscriber_poll_fd(SubspaceSubscriber subscriber)` +- `int subspace_get_subscriber_fd(SubspaceSubscriber subscriber)` +- `int32_t subspace_get_subscriber_slot_size(SubspaceSubscriber subscriber)` +- `int subspace_get_subscriber_num_slots(SubspaceSubscriber subscriber)` +- `SubspaceTypeInfo subspace_get_subscriber_type(SubspaceSubscriber subscriber)` +- `bool subspace_register_subscriber_callback(SubspaceSubscriber subscriber, void (*callback)(SubspaceSubscriber, SubspaceMessage))` +- `bool subspace_remove_subscriber_callback(SubspaceSubscriber subscriber)` +- `bool subspace_register_dropped_message_callback(SubspaceSubscriber subscriber, void (*callback)(SubspaceSubscriber, int64_t))` +- `bool subspace_remove_dropped_message_callback(SubspaceSubscriber subscriber)` +- `bool subspace_process_all_messages(SubspaceSubscriber subscriber)` +- `bool subspace_remove_subscriber(SubspaceSubscriber *subscriber)` + +**Error Functions:** +- `char* subspace_get_last_error(void)` +- `bool subspace_has_error(void)` + +## Message Types and Serialization + +Subspace is message-type agnostic. You can send any data structure as long as it fits in the slot size. Common approaches: + +1. **Plain C structs** (as shown above) - fastest, no serialization overhead +2. **Protocol Buffers** - cross-language, versioned +3. **Zero-copy facilities** like [Phaser](https://github.com/dallison/phaser) or [Neutron](https://github.com/dallison/neutron) - zero-copy, schema evolution +4. **JSON** - human-readable, flexible +5. **Custom binary formats** + +The `type` field in `PublisherOptions` and `SubscriberOptions` is purely for application-level type checking - Subspace doesn't validate or enforce it. + +## Thread Safety + +By default, the `Client` class is **not thread-safe**. To enable thread-safe mode: + +```cpp +client->SetThreadSafe(true); +``` + +In thread-safe mode: +- `GetMessageBuffer()` acquires a lock that is held until `PublishMessage()` or `CancelPublish()` is called +- You must call `PublishMessage()` or `CancelPublish()` after `GetMessageBuffer()` +- Multiple threads can safely use the same client instance + + +## Coroutine Support + +Subspace is coroutine-aware. If you pass a coroutine pointer when creating the client, blocking operations will yield to other coroutines: + +```cpp +co::CoroutineScheduler scheduler; +co::Coroutine* co = scheduler.CreateCoroutine([]() { + auto client = subspace::Client::Create("/tmp/subspace", "co_client", + co::Coroutine::Current()).value(); + // ... use client ... +}); +scheduler.Run(); +``` + +When using coroutines, `Wait()` operations will yield instead of blocking the thread. diff --git a/c_client/client_test.cc b/c_client/client_test.cc index 87da1cd..078b8be 100644 --- a/c_client/client_test.cc +++ b/c_client/client_test.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/c_client/subspace.cc b/c_client/subspace.cc index 01a6dff..2d02c17 100644 --- a/c_client/subspace.cc +++ b/c_client/subspace.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/c_client/subspace.h b/c_client/subspace.h index c6bcefa..6f81610 100644 --- a/c_client/subspace.h +++ b/c_client/subspace.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/client/BUILD.bazel b/client/BUILD.bazel index 8d56da4..97155bb 100644 --- a/client/BUILD.bazel +++ b/client/BUILD.bazel @@ -1,5 +1,7 @@ package(default_visibility = ["//visibility:public"]) +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + cc_library( name = "subspace_client", srcs = [ @@ -8,6 +10,8 @@ cc_library( "message.cc", "publisher.cc", "subscriber.cc", + "checksum.cc", + "arm_crc32.S", ], hdrs = [ "client.h", @@ -16,7 +20,10 @@ cc_library( "options.h", "publisher.h", "subscriber.h", + "checksum.h", ], + strip_include_prefix = ".", + include_prefix = "subspace/client", deps = [ "//common:subspace_common", "@abseil-cpp//absl/container:flat_hash_map", @@ -24,8 +31,9 @@ cc_library( "@abseil-cpp//absl/status", "@abseil-cpp//absl/status:statusor", "@abseil-cpp//absl/strings:str_format", - "@coroutines//:co", - ], + "@abseil-cpp//absl/types:span", + "@coroutines//co", + ], ) cc_test( @@ -34,6 +42,7 @@ cc_test( srcs = ["client_test.cc"], data = [ "//server:subspace_server", + "//plugins:nop_plugin.so", ], deps = [ ":subspace_client", diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt index 3ca1aea..4cc7e0d 100644 --- a/client/CMakeLists.txt +++ b/client/CMakeLists.txt @@ -13,17 +13,32 @@ add_library(subspace_client STATIC publisher.h subscriber.cc subscriber.h + checksum.cc + checksum.h + arm_crc32.S ) target_include_directories(subspace_client PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ) +# Set the assembly file to use ASM language (CMake will handle C preprocessor automatically for .S files) +set_source_files_properties(arm_crc32.S PROPERTIES + LANGUAGE ASM +) + # Link against other internal libraries (e.g., common) or external dependencies if needed: target_link_libraries(subspace_client PUBLIC subspace_common subspace_proto absl::flags + absl::flat_hash_map + absl::flat_hash_set + absl::status + absl::statusor + absl::str_format + absl::span + co protobuf::libprotobuf ) @@ -35,7 +50,9 @@ target_link_libraries(client_test PUBLIC subspace_common libserver gmock - absl::flags_parse) + absl::flags_parse + absl::failure_signal_handler + absl::symbolize) add_executable(latency_test latency_test.cc) target_link_libraries(latency_test PUBLIC diff --git a/client/arm_crc32.S b/client/arm_crc32.S new file mode 100644 index 0000000..d80ef16 --- /dev/null +++ b/client/arm_crc32.S @@ -0,0 +1,61 @@ +// Copyright 2023-2026 David Allison +// All Rights Reserved +// See LICENSE file for licensing information. + +#if defined(__ARM_FEATURE_CRC32) && defined(__aarch64__) + +// This is a handcoded ARM64 CRC32 function that uses the built-in CRC +// instructions. It is as fast as I can make it. + +#if defined(__APPLE__) +#define SYM(name) _##name +#define SYM_DATA(name) .p2align 2 +.section __TEXT,__text,regular,pure_instructions + +#elif defined(__linux__) || defined(__QNX__) || defined(__QNXNTO__) +.text +#define SYM(name) #name +#define SYM_DATA(name) .type SYM(name), @function +#else +#error "Unknown OS" +#endif + +.global SYM(SubspaceCRC32) +SYM_DATA(SubspaceCRC32) + +// Entry: +// w0: input crc +// x1: address of start of buffer +// x2: length of buffer +// +// Exit: +// x0: new crc +SYM(SubspaceCRC32): +1: + // 8-byte loop until x2 is less than 8 + cmp x2, #8 + blt 2f + ldr x4, [x1], #8 + crc32x w0, w0, x4 + subs x2, x2, #8 + b 1b + + // 4 byte remainder. Can only be one. +2: + cmp x2, #4 + blt 3f + ldr w4, [x1], #4 + crc32w w0, w0, w4 + subs x2, x2, #4 + + // Single byte remainder. +3: + cbz x2, 4f + ldrb w4, [x1], #1 + crc32b w0, w0, w4 + subs x2, x2, #1 + b 3b +4: + ret + +#endif diff --git a/client/bridge_test.cc b/client/bridge_test.cc index a696b9a..a8693c0 100644 --- a/client/bridge_test.cc +++ b/client/bridge_test.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -162,7 +162,6 @@ void WaitForSubscribedMessage(toolbelt::FileDescriptor &bridge_pipe, ASSERT_OK(n); ASSERT_EQ(sizeof(int32_t), *n); length = ntohl(length); // Length is network byte order. - n = bridge_pipe.Read(buffer, length); ASSERT_OK(n); diff --git a/client/checksum.cc b/client/checksum.cc new file mode 100644 index 0000000..a35c8cc --- /dev/null +++ b/client/checksum.cc @@ -0,0 +1,133 @@ +// Copyright 2023-2026 David Allison +// All Rights Reserved +// See LICENSE file for licensing information. + +#include "client/checksum.h" +#include +#include +#include + +namespace subspace { + +extern "C" { + +#if defined(__x86_64__) && !defined(__SSE4_2__) +// On Intel crc is only available when SSE4.2 is enabled +#undef SUBSPACE_HARDWARE_CRC +#endif + +#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRC32) +// Swite off hardware CRC32 if it is not available on aarch64. +#undef SUBSPACE_HARDWARE_CRC +#endif + + +#if defined(SUBSPACE_HARDWARE_CRC) && defined(__aarch64__) + +// Whether to use the hand-written assembly version of the CRC32 function +// in arm_crc32.S +#define ARM_ASM_CRC32 1 + +#if !defined(ARM_ASM_CRC32) +#include +uint32_t SubspaceCRC32(uint32_t crc, const uint8_t *data, size_t length) { + size_t i = 0; + + // Process 8 bytes at a time. + for (; i + 8 <= length; i += 8) { + crc = __crc32d(crc, *reinterpret_cast(data + i)); + } + + // Process remaining 4 bytes. + if (i + 4 <= length) { + crc = __crc32w(crc, *reinterpret_cast(data + i)); + i += 4; + } + + // Process remaining bytes one at a time. + for (; i < length; i++) { + crc = __crc32b(crc, data[i]); + } + + return crc; +} +#endif // !defined(ARM_ASM_CRC32) + +#elif defined(SUBSPACE_HARDWARE_CRC) && defined(__x86_64__) +#include +uint32_t SubspaceCRC32(uint32_t crc, const uint8_t *data, size_t length) { + size_t i = 0; + // Process 8 bytes at a time. + for (; i + 8 <= length; i += 8) { + crc = _mm_crc32_u64(crc, *reinterpret_cast(data + i)); + } + // Process remaining 4 bytes. + if (i + 4 <= length) { + crc = _mm_crc32_u32(crc, *reinterpret_cast(data + i)); + i += 4; + } + + // Process remaining bytes one at a time. + for (; i < length; i++) { + crc = _mm_crc32_u8(crc, data[i]); + } + + return crc; +} +#else +// Vibe-coded Fast CRC32 lookup table (IEEE 802.3 polynomial: 0xEDB88320) +static const uint32_t crc32_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, + 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, + 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, + 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, + 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, + 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, + 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, + 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, + 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, + 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, + 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, + 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, + 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, + 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, + 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, + 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, + 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, + 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, + 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D}; + +// Generic CRC32 calculation using lookup table. +uint32_t SubspaceCRC32(uint32_t crc, const uint8_t *data, size_t length) { + for (size_t i = 0; i < length; i++) { + crc = (crc >> 8) ^ crc32_table[(crc ^ data[i]) & 0xFF]; + } + return crc; +} +#endif +} // extern "C" +} // namespace subspace diff --git a/client/checksum.h b/client/checksum.h new file mode 100644 index 0000000..847b70e --- /dev/null +++ b/client/checksum.h @@ -0,0 +1,37 @@ +// Copyright 2023-2026 David Allison +// All Rights Reserved +// See LICENSE file for licensing information. + +#pragma once + +#include "absl/types/span.h" +#include +#include +#include + +// Undefine this if you don't want to use hardware CRC32 instructions +#define SUBSPACE_HARDWARE_CRC 1 + +namespace subspace { + +extern "C" { +uint32_t SubspaceCRC32(uint32_t crc, const uint8_t *data, size_t length); +} + +template +uint32_t +CalculateChecksum(const std::array, N> &data) { + uint32_t crc = 0xFFFFFFFF; + for (size_t i = 0; i < N; i++) { + crc = SubspaceCRC32(crc, data[i].data(), data[i].size()); + } + return ~crc; +} + +template +bool VerifyChecksum(const std::array, N> &data, + uint32_t checksum) { + return CalculateChecksum(data) == checksum; +} + +} // namespace subspace diff --git a/client/client.cc b/client/client.cc index 513daf2..c4433ac 100644 --- a/client/client.cc +++ b/client/client.cc @@ -4,6 +4,7 @@ #include "client/client.h" #include "absl/strings/str_format.h" +#include "client.h" #include "proto/subspace.pb.h" #include "toolbelt/clock.h" #include "toolbelt/hexdump.h" @@ -11,13 +12,80 @@ #include "toolbelt/sockets.h" #include #include - namespace subspace { using ClientChannel = details::ClientChannel; using SubscriberImpl = details::SubscriberImpl; using PublisherImpl = details::PublisherImpl; +// Get the current thread as a 64 bit number. +static uint64_t GetThreadId() { + return reinterpret_cast(pthread_self()); +} + +ClientImpl::ClientLockGuard::ClientLockGuard(ClientImpl *client, + LockMode lock_mode) + : client_(client), lock_mode_(lock_mode) { + if (!client_->thread_safe_) { + return; + } + switch (lock_mode_) { + case LockMode::kAutoLock: + Lock(); + break; + case LockMode::kMaybeLocked: + case LockMode::kDeferredLock: { + uint64_t old_thread_id = GetThreadId(); + uint64_t new_thread_id = old_thread_id; + // If we the current owner of the lock we allow to continue without + // relocking. If we are not the current owner, we lock the mutex. + if (!client_->owner_thread_id_.compare_exchange_strong( + old_thread_id, new_thread_id, std::memory_order_relaxed)) { + Lock(); + } + client_->owner_thread_id_.store(new_thread_id, std::memory_order_relaxed); + break; + } + } +} + +ClientImpl::ClientLockGuard::~ClientLockGuard() { + if (!client_->thread_safe_) { + return; + } + switch (lock_mode_) { + case LockMode::kAutoLock: + Unlock(); + break; + case LockMode::kDeferredLock: + if (!committed_) { + Unlock(); + } + break; + case LockMode::kMaybeLocked: + Unlock(); + break; + } +} + +void ClientImpl::ClientLockGuard::Lock() { + client_->mutex_.lock(); + locked_ = true; +} + +void ClientImpl::ClientLockGuard::Unlock() { + client_->mutex_.unlock(); + locked_ = false; + client_->owner_thread_id_.store(0, std::memory_order_relaxed); +} + +void ClientImpl::ClientLockGuard::CommitLock() { + if (lock_mode_ != LockMode::kDeferredLock) { + return; + } + committed_ = true; +} + absl::Status ClientImpl::CheckConnected() const { if (!socket_.Connected()) { return absl::InternalError( @@ -28,6 +96,7 @@ absl::Status ClientImpl::CheckConnected() const { absl::Status ClientImpl::Init(const std::string &server_socket, const std::string &client_name) { + ClientLockGuard guard(this); if (socket_.Connected()) { return absl::InternalError("Client is already connected to the server; " "Init() called twice perhaps?"); @@ -50,12 +119,15 @@ absl::Status ClientImpl::Init(const std::string &server_socket, } scb_fd_ = std::move(fds[resp.init().scb_fd_index()]); session_id_ = resp.init().session_id(); + server_user_id_ = resp.init().user_id(); + server_group_id_ = resp.init().group_id(); return absl::OkStatus(); } absl::Status ClientImpl::RegisterDroppedMessageCallback( SubscriberImpl *subscriber, std::function callback) { + ClientLockGuard guard(this); if (dropped_message_callbacks_.find(subscriber) != dropped_message_callbacks_.end()) { return absl::InternalError( @@ -69,6 +141,7 @@ absl::Status ClientImpl::RegisterDroppedMessageCallback( absl::Status ClientImpl::UnregisterDroppedMessageCallback(SubscriberImpl *subscriber) { + ClientLockGuard guard(this); auto it = dropped_message_callbacks_.find(subscriber); if (it == dropped_message_callbacks_.end()) { return absl::InternalError(absl::StrFormat( @@ -82,6 +155,7 @@ ClientImpl::UnregisterDroppedMessageCallback(SubscriberImpl *subscriber) { absl::Status ClientImpl::RegisterMessageCallback( SubscriberImpl *subscriber, std::function callback) { + ClientLockGuard guard(this); auto it = message_callbacks_.find(subscriber); if (it != message_callbacks_.end()) { return absl::InternalError(absl::StrFormat( @@ -93,6 +167,7 @@ absl::Status ClientImpl::RegisterMessageCallback( } absl::Status ClientImpl::UnregisterMessageCallback(SubscriberImpl *subscriber) { + ClientLockGuard guard(this); auto it = message_callbacks_.find(subscriber); if (it == message_callbacks_.end()) { return absl::InternalError(absl::StrFormat( @@ -106,6 +181,7 @@ absl::Status ClientImpl::UnregisterMessageCallback(SubscriberImpl *subscriber) { absl::Status ClientImpl::RegisterResizeCallback( PublisherImpl *publisher, std::function callback) { + ClientLockGuard guard(this); if (resize_callbacks_.find(publisher) != resize_callbacks_.end()) { return absl::InternalError(absl::StrFormat( "A resize callback has already been registered for channel %s\n", @@ -116,6 +192,7 @@ absl::Status ClientImpl::RegisterResizeCallback( } absl::Status ClientImpl::UnregisterResizeCallback(PublisherImpl *publisher) { + ClientLockGuard guard(this); auto it = resize_callbacks_.find(publisher); if (it == resize_callbacks_.end()) { return absl::InternalError(absl::StrFormat( @@ -128,12 +205,18 @@ absl::Status ClientImpl::UnregisterResizeCallback(PublisherImpl *publisher) { absl::Status ClientImpl::ProcessAllMessages(details::SubscriberImpl *subscriber, ReadMode mode) { - auto it = message_callbacks_.find(subscriber); - if (it == message_callbacks_.end()) { - return absl::InternalError(absl::StrFormat( - "No message callback has been registered for channel %s\n", - subscriber->Name())); + std::function callback; + { + ClientLockGuard guard(this); + auto it = message_callbacks_.find(subscriber); + if (it == message_callbacks_.end()) { + return absl::InternalError(absl::StrFormat( + "No message callback has been registered for channel %s\n", + subscriber->Name())); + } + callback = it->second; } + for (;;) { absl::StatusOr msg = ReadMessage(subscriber, mode); if (!msg.ok()) { @@ -142,7 +225,7 @@ absl::Status ClientImpl::ProcessAllMessages(details::SubscriberImpl *subscriber, if (msg->length == 0) { break; } - it->second(subscriber, std::move(*msg)); + callback(subscriber, std::move(*msg)); } return absl::OkStatus(); } @@ -160,6 +243,15 @@ ClientImpl::GetAllMessages(details::SubscriberImpl *subscriber, ReadMode mode) { } r.push_back(std::move(*msg)); } + // We we are out of unique_ptrs, untrigger the subscriber so that it will be + // retriggered when a unique_ptr is released. + if (subscriber->NumActiveMessages() == subscriber->MaxActiveMessages()) { + // std::cerr << "read {} messages, untriggering subscriber\n"_format( + // subscriber->numActiveMessages()); + subscriber->Untrigger(); + return r; + } + if (!r.empty()) { subscriber->Trigger(); } @@ -169,6 +261,7 @@ ClientImpl::GetAllMessages(details::SubscriberImpl *subscriber, ReadMode mode) { absl::StatusOr ClientImpl::CreatePublisher(const std::string &channel_name, const PublisherOptions &opts) { + ClientLockGuard guard(this); if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -204,9 +297,11 @@ ClientImpl::CreatePublisher(const std::string &channel_name, std::shared_ptr channel = std::make_shared( channel_name, opts.num_slots, pub_resp.channel_id(), pub_resp.publisher_id(), pub_resp.vchan_id(), session_id_, - pub_resp.type(), opts, [this](Channel* c) { - return CheckReload(static_cast(c)); - }); + pub_resp.type(), opts, + [this](Channel *c) { + return CheckReload(static_cast(c)); + }, + server_user_id_, server_group_id_); SharedMemoryFds channel_fds(std::move(fds[pub_resp.ccb_fd_index()]), std::move(fds[pub_resp.bcb_fd_index()])); @@ -231,20 +326,23 @@ ClientImpl::CreatePublisher(const std::string &channel_name, channel->SetNumUpdates(pub_resp.num_sub_updates()); + // An unreliable publisher always needs a slot but if we are a bridge we + // don't active the channels as the original publisher's activation message + // will be used. if (!opts.IsReliable()) { // A publisher needs a slot. Allocate one. - MessageSlot *slot = channel->FindFreeSlotUnreliable( - channel->GetPublisherId()); + MessageSlot *slot = + channel->FindFreeSlotUnreliable(channel->GetPublisherId()); if (slot == nullptr) { return absl::InternalError("No slot available for publisher"); } channel->SetSlot(slot); - if (opts.Activate()) { + if (!opts.IsBridge() && opts.Activate()) { if (absl::Status status = ActivateChannel(channel.get()); !status.ok()) { return status; } } - } else { + } else if (!opts.IsBridge()) { // Send a single activation message to the channel. absl::Status status = ActivateReliableChannel(channel.get()); if (!status.ok()) { @@ -279,9 +377,11 @@ ClientImpl::CreatePublisher(const std::string &channel_name, int slot_size, options.num_slots = num_slots; return CreatePublisher(channel_name, options); } + absl::StatusOr ClientImpl::CreateSubscriber(const std::string &channel_name, const SubscriberOptions &opts) { + ClientLockGuard guard(this); if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -319,9 +419,11 @@ ClientImpl::CreateSubscriber(const std::string &channel_name, std::shared_ptr channel = std::make_shared( channel_name, sub_resp.num_slots(), sub_resp.channel_id(), sub_resp.subscriber_id(), sub_resp.vchan_id(), session_id_, - sub_resp.type(), opts, [this](Channel* c) { - return CheckReload(static_cast(c)); - }); + sub_resp.type(), opts, + [this](Channel *c) { + return CheckReload(static_cast(c)); + }, + server_user_id_, server_group_id_); channel->SetNumSlots(sub_resp.num_slots()); @@ -358,21 +460,64 @@ ClientImpl::CreateSubscriber(const std::string &channel_name, // channel->Dump(); channels_.insert(channel); - return Subscriber(shared_from_this(), channel); + auto sub = Subscriber(shared_from_this(), channel); + return sub; +} + +static uint64_t ExpandSlotSize(uint64_t slotSize) { + // smaller than 4K, double the size + // 4K..16K, times 1.5 + // 16K..64K, times 1.25 + // 64K..256K, times 1.125 + // 256K..1M, times 1.0625 + // 1M..., times 1.03125 + static constexpr double multipliers[] = {2.0, 1.5, 1.25, + 1.125, 1.0625, 1.03125}; + static constexpr size_t sizeRanges[] = {4096, 16384, 65536, 262144, 1048576}; + size_t i = 0; + for (; i < std::size(sizeRanges); i++) { + if (slotSize <= sizeRanges[i]) { + break; + } + } + // i will be the index of the multiplier to use. It might be one past the + // end of the sizeRanges array. + return Aligned(uint64_t(double(slotSize) * multipliers[i])); } absl::StatusOr ClientImpl::GetMessageBuffer(PublisherImpl *publisher, - int32_t max_size) { + int32_t max_size, + bool lock) { + auto span_or_status = GetMessageBufferSpan(publisher, max_size, lock); + if (!span_or_status.ok()) { + return span_or_status.status(); + } + if (absl::Span span = span_or_status.value(); !span.empty()) { + return span.data(); + } + return nullptr; +} + +absl::StatusOr> +ClientImpl::GetMessageBufferSpan(PublisherImpl *publisher, int32_t max_size, + bool lock) { + // If the current thread is calling this while it already owns the mutex we + // allow it to continue without locking. If another t thread is trying to + // call this lock the mutex until the current thread releases it. + ClientLockGuard guard(this, lock ? LockMode::kDeferredLock + : LockMode::kMaybeLocked); if (publisher->IsReliable()) { publisher->ClearPollFd(); } int32_t slot_size = publisher->SlotSize(); + size_t span_size = size_t(slot_size); if (max_size != -1 && max_size > slot_size) { int32_t new_slot_size = slot_size; assert(new_slot_size > 0); while (new_slot_size <= slot_size || new_slot_size < max_size) { - new_slot_size *= 2; + new_slot_size = ExpandSlotSize(new_slot_size); + span_size = size_t(new_slot_size); } if (absl::Status status = ResizeChannel(publisher, new_slot_size); @@ -398,12 +543,12 @@ absl::StatusOr ClientImpl::GetMessageBuffer(PublisherImpl *publisher, // to stop the publisher taking all the slots. An incoming subscriber // would miss all those messages and that's not reliable. if (publisher->NumSubscribers(publisher->VirtualChannelId()) == 0) { - return nullptr; + return absl::Span(); } - MessageSlot *slot = publisher->FindFreeSlotReliable( - publisher->GetPublisherId()); + MessageSlot *slot = + publisher->FindFreeSlotReliable(publisher->GetPublisherId()); if (slot == nullptr) { - return nullptr; + return absl::Span(); } publisher->SetSlot(slot); } @@ -411,19 +556,31 @@ absl::StatusOr ClientImpl::GetMessageBuffer(PublisherImpl *publisher, void *buffer = publisher->GetCurrentBufferAddress(); if (buffer == nullptr) { return absl::InternalError( - absl::StrFormat("Channel %s has no buffer", publisher->Name())); + absl::StrFormat("1 Channel %s has no buffer", publisher->Name())); + } + // If we are returning a valid message buffer we commit the lock so that we + // can hold onto it until the message is published or cancelled. + if (span_size > 0) { + guard.CommitLock(); } - return buffer; + return absl::Span(reinterpret_cast(buffer), + span_size); } absl::StatusOr ClientImpl::PublishMessage(PublisherImpl *publisher, int64_t message_size) { - return PublishMessageInternal(publisher, message_size, /*omit_prefix=*/false); + return PublishMessageInternal(publisher, message_size, /*omit_prefix=*/false, + /*use_prefix_slot_id=*/false); } absl::StatusOr ClientImpl::PublishMessageInternal(PublisherImpl *publisher, - int64_t message_size, bool omit_prefix) { + int64_t message_size, bool omit_prefix, + bool use_prefix_slot_id) { + // Lock is already held by the call to GetMessageBufferSpan. This RAII + // instance wil relesas the lock when we return from this function. + ClientLockGuard guard(this, LockMode::kMaybeLocked); + // Check if there are any new subscribers and if so, load their trigger fds. if (absl::Status status = ReloadSubscribersIfNecessary(publisher); !status.ok()) { @@ -434,6 +591,16 @@ ClientImpl::PublishMessageInternal(PublisherImpl *publisher, return absl::InternalError("Message size must be greater than 0"); } + int32_t old_slot_id = publisher->CurrentSlotId(); + + if (publisher->on_send_callback_ != nullptr) { + absl::StatusOr status_or_size = publisher->on_send_callback_( + publisher->GetCurrentBufferAddress(), message_size); + if (!status_or_size.ok()) { + return status_or_size.status(); + } + message_size = status_or_size.value(); + } publisher->SetMessageSize(message_size); MessageSlot *old_slot = publisher->CurrentSlot(); if (debug_) { @@ -443,32 +610,25 @@ ClientImpl::PublishMessageInternal(PublisherImpl *publisher, } } - bool notify = false; Channel::PublishedMessage msg = publisher->ActivateSlotAndGetAnother( - publisher->IsReliable(), /*is_activation=*/false, omit_prefix, ¬ify); + publisher->IsReliable(), /*is_activation=*/false, omit_prefix, + use_prefix_slot_id); // Prevent use of old_slot. old_slot = nullptr; publisher->SetSlot(msg.new_slot); - // Only trigger subscribers if we need to. - // We could trigger for every message, but that is unnecessary and - // slower. It would basically mean a write to pipe for every - // message sent. That's fast, but if we can avoid it, things - // would be faster. - if (notify) { - publisher->TriggerSubscribers(); - if (absl::Status status = publisher->UnmapUnusedBuffers(); !status.ok()) { - return status; - } + publisher->TriggerSubscribers(); + if (absl::Status status = publisher->UnmapUnusedBuffers(); !status.ok()) { + return status; } if (msg.new_slot == nullptr) { if (publisher->IsReliable()) { // Reliable publishers don't get a slot until it's asked for. return Message(message_size, nullptr, msg.ordinal, msg.timestamp, - publisher->VirtualChannelId(), false, -1); + publisher->VirtualChannelId(), false, -1, false); } return absl::InternalError( absl::StrFormat("Out of slots for channel %s", publisher->Name())); @@ -480,13 +640,18 @@ ClientImpl::PublishMessageInternal(PublisherImpl *publisher, } return Message(message_size, nullptr, msg.ordinal, msg.timestamp, - publisher->VirtualChannelId(), false, msg.new_slot->id); + publisher->VirtualChannelId(), false, old_slot_id, false); +} + +void ClientImpl::CancelPublish(PublisherImpl *publisher) { + // Creating this object will unlock the mutex when it goes out of scope. + ClientLockGuard guard(this, LockMode::kMaybeLocked); } absl::Status ClientImpl::WaitForReliablePublisher(PublisherImpl *publisher, std::chrono::nanoseconds timeout, - co::Coroutine *c) { + const co::Coroutine *c) { if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -532,7 +697,7 @@ ClientImpl::WaitForReliablePublisher(PublisherImpl *publisher, const toolbelt::FileDescriptor &fd, std::chrono::nanoseconds timeout, - co::Coroutine *c) { + const co::Coroutine *c) { if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -558,17 +723,17 @@ ClientImpl::WaitForReliablePublisher(PublisherImpl *publisher, {.fd = publisher->GetPollFd().Fd(), .events = POLLIN}, {.fd = fd.Fd(), .events = POLLIN}}; int e = ::poll(fds, 2, timeout_ns == 0 ? -1 : timeout_ns / 1000000); - // Since we are waiting forever will can only get the value 1 from the poll. - // We will never get 0 since there is no timeout. Anything else (can only - // be -1) will be an error. - if (e <= 0) { + if (timeout_ns == 0 && e == 0) { + return absl::InternalError("Timeout waiting for reliable publisher"); + } + if (e < 0) { return absl::InternalError( absl::StrFormat("Error from poll waiting for reliable publisher: %s", strerror(errno))); } - if (fds[0].revents & POLLIN) { + if (fds[0].revents & (POLLIN | POLLHUP)) { result = fds[0].fd; // The publisher's poll fd triggered. - } else if (fds[1].revents & POLLIN) { + } else if (fds[1].revents & (POLLIN | POLLHUP)) { result = fds[1].fd; // The passed in fd triggered. } else { return absl::InternalError( @@ -581,7 +746,7 @@ ClientImpl::WaitForReliablePublisher(PublisherImpl *publisher, absl::Status ClientImpl::WaitForSubscriber(SubscriberImpl *subscriber, std::chrono::nanoseconds timeout, - co::Coroutine *c) { + const co::Coroutine *c) { if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -617,7 +782,7 @@ absl::Status ClientImpl::WaitForSubscriber(SubscriberImpl *subscriber, absl::StatusOr ClientImpl::WaitForSubscriber( SubscriberImpl *subscriber, const toolbelt::FileDescriptor &fd, - std::chrono::nanoseconds timeout, co::Coroutine *c) { + std::chrono::nanoseconds timeout, const co::Coroutine *c) { if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -636,17 +801,17 @@ absl::StatusOr ClientImpl::WaitForSubscriber( {.fd = subscriber->GetPollFd().Fd(), .events = POLLIN}, {.fd = fd.Fd(), .events = POLLIN}}; int e = ::poll(fds, 2, timeout_ns == 0 ? -1 : timeout_ns / 1000000); - // Since we are waiting forever will can only get the value 1 from the poll. - // We will never get 0 since there is no timeout. Anything else (can only - // be -1) will be an error. - if (e <= 0) { + if (timeout_ns == 0 && e == 0) { + return absl::InternalError("Timeout waiting for subscriber"); + } + if (e < 0) { return absl::InternalError(absl::StrFormat( "Error from poll waiting for subscriber: %s", strerror(errno))); } - if (fds[0].revents & POLLIN) { + if (fds[0].revents & (POLLIN | POLLHUP)) { // The subscriber's poll fd triggered. return fds[0].fd; - } else if (fds[1].revents & POLLIN) { + } else if (fds[1].revents & (POLLIN | POLLHUP)) { // The passed in fd triggered. return fds[1].fd; } else { @@ -712,6 +877,7 @@ ClientImpl::ReadMessageInternal(SubscriberImpl *subscriber, ReadMode mode, if (it != dropped_message_callbacks_.end()) { it->second(subscriber, drops); } + subscriber->RecordDroppedMessages(drops); if (subscriber->options_.log_dropped_messages) { logger_.Log(toolbelt::LogLevel::kWarning, "Dropped %d message%s on channel %s", drops, @@ -723,7 +889,14 @@ ClientImpl::ReadMessageInternal(SubscriberImpl *subscriber, ReadMode mode, MessagePrefix *prefix = subscriber->Prefix(new_slot); bool is_activation = false; + bool checksum_error = false; if (prefix != nullptr) { + if (prefix->HasChecksum()) { + auto data = + GetMessageChecksumData(prefix, subscriber->GetCurrentBufferAddress(), + new_slot->message_size); + checksum_error = !subscriber->ValidateChecksum(data, prefix->checksum); + } if ((prefix->flags & kMessageActivate) != 0) { is_activation = true; if (!pass_activation) { @@ -737,15 +910,26 @@ ClientImpl::ReadMessageInternal(SubscriberImpl *subscriber, ReadMode mode, } } } + // Call the on receive callback. + if (subscriber->on_receive_callback_ != nullptr) { + absl::StatusOr status_or_size = subscriber->on_receive_callback_( + subscriber->GetCurrentBufferAddress(), new_slot->message_size); + if (!status_or_size.ok()) { + return status_or_size.status(); + } + new_slot->message_size = status_or_size.value(); + } + if (new_slot->message_size <= 0) { + return Message(); + } // We have a new slot, clear the subscriber's slot. subscriber->ClearActiveMessage(); // Allocate a new active message for the slot. auto msg = subscriber->SetActiveMessage( - new_slot->message_size, new_slot, - subscriber->GetCurrentBufferAddress(), - subscriber->CurrentOrdinal(), subscriber->Timestamp(), new_slot->vchan_id, - is_activation); + new_slot->message_size, new_slot, subscriber->GetCurrentBufferAddress(), + subscriber->CurrentOrdinal(), subscriber->Timestamp(new_slot), + new_slot->vchan_id, is_activation, checksum_error); // If we are unable to allocate a new message (due to message limits) // restore the slot so that we pick it up next time. @@ -754,9 +938,8 @@ ClientImpl::ReadMessageInternal(SubscriberImpl *subscriber, ReadMode mode, // Subscriber does not have a slot now but the slot it had is still active. } else { // We have a slot, claim it. - subscriber->ClaimSlot( - new_slot, - subscriber->VirtualChannelId(), mode == ReadMode::kReadNewest); + subscriber->ClaimSlot(new_slot, subscriber->VirtualChannelId(), + mode == ReadMode::kReadNewest); } auto ret_msg = Message(msg); if (subscriber->IsBridge()) { @@ -766,15 +949,23 @@ ClientImpl::ReadMessageInternal(SubscriberImpl *subscriber, ReadMode mode, // subscribers which are used to send data between servers. subscriber->ClearActiveMessage(); } + if (checksum_error && !subscriber->PassChecksumErrors()) { + return absl::InternalError("Checksum verification failed"); + } + // A checksum error that is ignored results in a valid message with the + // checksum_error flag set. return ret_msg; } absl::StatusOr ClientImpl::ReadMessage(SubscriberImpl *subscriber, ReadMode mode) { + + ClientLockGuard guard(this); // If the channel is a placeholder (no publishers present), look // in the SCB to see if a new publisher has been created and if so, // talk to the server to get the information to reload the shared - // memory. If there still isn't a publisher, we will still be a placeholder. + // memory. If there still isn't a publisher, we will still be a + // placeholder. if (subscriber->IsPlaceholder()) { absl::Status status = ReloadSubscriber(subscriber); if (!status.ok() || subscriber->IsPlaceholder()) { @@ -798,23 +989,25 @@ absl::StatusOr ClientImpl::ReadMessage(SubscriberImpl *subscriber, absl::StatusOr ClientImpl::FindMessageInternal(SubscriberImpl *subscriber, uint64_t timestamp) { - MessageSlot *new_slot = subscriber->FindMessage(timestamp); if (new_slot == nullptr) { // Not found. return Message(); } - return Message(new_slot->message_size, - subscriber->GetCurrentBufferAddress(), + return Message(new_slot->message_size, subscriber->GetCurrentBufferAddress(), subscriber->CurrentOrdinal(), subscriber->Timestamp(), - subscriber->VirtualChannelId(), false, new_slot->id); + subscriber->VirtualChannelId(), false, new_slot->id, false); } absl::StatusOr ClientImpl::FindMessage(SubscriberImpl *subscriber, uint64_t timestamp) { + + ClientLockGuard guard(this); + // If the channel is a placeholder (no publishers present), contact the // server to see if there is now a publisher. This will reload the shared - // memory. If there still isn't a publisher, we will still be a placeholder. + // memory. If there still isn't a publisher, we will still be a + // placeholder. if (subscriber->IsPlaceholder()) { absl::Status status = ReloadSubscriber(subscriber); if (!status.ok() || subscriber->IsPlaceholder()) { @@ -829,6 +1022,7 @@ absl::StatusOr ClientImpl::FindMessage(SubscriberImpl *subscriber, if (!status.ok()) { return status; } + return FindMessageInternal(subscriber, timestamp); } @@ -838,7 +1032,7 @@ struct pollfd ClientImpl::GetPollFd(SubscriberImpl *subscriber) const { } struct pollfd ClientImpl::GetPollFd(PublisherImpl *publisher) const { - static struct pollfd fd{.fd = -1, .events = POLLIN}; + static struct pollfd fd { .fd = -1, .events = POLLIN }; if (!publisher->IsReliable()) { return fd; } @@ -859,7 +1053,8 @@ ClientImpl::GetFileDescriptor(PublisherImpl *publisher) const { return publisher->GetPollFd(); } -int64_t ClientImpl::GetCurrentOrdinal(SubscriberImpl *sub) const { +int64_t ClientImpl::GetCurrentOrdinal(SubscriberImpl *sub) { + ClientLockGuard guard(this); MessageSlot *slot = sub->CurrentSlot(); if (slot == nullptr) { return -1; @@ -870,7 +1065,6 @@ int64_t ClientImpl::GetCurrentOrdinal(SubscriberImpl *sub) const { bool ClientImpl::CheckReload(ClientChannel *channel) { auto reloaded = ReloadBuffersIfNecessary(channel); if (!reloaded.ok()) { - std::cerr << "Error reloading buffers for channel " << channel->Name() << ": " << reloaded.status() << std::endl; return false; } return *reloaded; @@ -1052,8 +1246,8 @@ ClientImpl::ReloadReliablePublishersIfNecessary(SubscriberImpl *subscriber) { // is created. This is to ensure that the reliable subscribers see // on message and thus keep a reference to it. absl::Status ClientImpl::ActivateReliableChannel(PublisherImpl *publisher) { - MessageSlot *slot = publisher->FindFreeSlotReliable( - publisher->GetPublisherId()); + MessageSlot *slot = + publisher->FindFreeSlotReliable(publisher->GetPublisherId()); if (slot == nullptr) { return absl::InternalError( absl::StrFormat("Channel %s has no free slots", publisher->Name())); @@ -1070,7 +1264,7 @@ absl::Status ClientImpl::ActivateReliableChannel(PublisherImpl *publisher) { publisher->ActivateSlotAndGetAnother( /*reliable=*/true, /*is_activation=*/true, - /*omit_prefix=*/false, /*notify=*/nullptr); + /*omit_prefix=*/false, /*use_prefix_slot_id=*/false); publisher->SetSlot(nullptr); publisher->TriggerSubscribers(); @@ -1085,7 +1279,7 @@ absl::Status ClientImpl::ActivateChannel(PublisherImpl *publisher) { void *buffer = publisher->GetCurrentBufferAddress(); if (buffer == nullptr) { return absl::InternalError( - absl::StrFormat("Channel %s has no buffer", publisher->Name())); + absl::StrFormat("3 Channel %s has no buffer", publisher->Name())); } MessageSlot *slot = publisher->CurrentSlot(); slot->message_size = 1; @@ -1093,7 +1287,7 @@ absl::Status ClientImpl::ActivateChannel(PublisherImpl *publisher) { Channel::PublishedMessage msg = publisher->ActivateSlotAndGetAnother( /*reliable=*/false, /*is_activation=*/true, - /*omit_prefix=*/false, /*notify=*/nullptr); + /*omit_prefix=*/false, /*use_prefix_slot_id=*/false); publisher->SetSlot(msg.new_slot); publisher->TriggerSubscribers(); @@ -1110,6 +1304,7 @@ absl::Status ClientImpl::RemoveChannel(ClientChannel *channel) { } absl::Status ClientImpl::RemovePublisher(PublisherImpl *publisher) { + ClientLockGuard guard(this); if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -1135,6 +1330,7 @@ absl::Status ClientImpl::RemovePublisher(PublisherImpl *publisher) { } absl::Status ClientImpl::RemoveSubscriber(SubscriberImpl *subscriber) { + ClientLockGuard guard(this); if (absl::Status status = CheckConnected(); !status.ok()) { return status; } @@ -1159,10 +1355,172 @@ absl::Status ClientImpl::RemoveSubscriber(SubscriberImpl *subscriber) { return RemoveChannel(subscriber); } +absl::StatusOr +ClientImpl::GetChannelCounters(const std::string &channel_name) const { + // TODO: find a better way to search for a channel. + for (auto it = channels_.begin(); it != channels_.end(); ++it) { + if ((*it)->Name() == channel_name) { + return (*it)->GetCounters(); + } + } + return absl::InternalError( + absl::StrFormat("Channel %s doesn't exist", channel_name)); +} + const ChannelCounters &ClientImpl::GetChannelCounters(ClientChannel *channel) { return channel->GetCounters(); } +absl::StatusOr +ClientImpl::GetChannelInfo(const std::string &channel) { + if (absl::Status status = CheckConnected(); !status.ok()) { + return status; + } + Request req; + auto *cmd = req.mutable_get_channel_info(); + cmd->set_channel_name(channel); + + // Send request to server and wait for response. + Response response; + std::vector fds; + if (absl::Status status = SendRequestReceiveResponse(req, response, fds); + !status.ok()) { + return status; + } + + auto &resp = response.get_channel_info(); + if (!resp.error().empty()) { + return absl::InternalError(resp.error()); + } + // There will be one channel information in the response. + if (resp.channels_size() != 1) { + return absl::InternalError("Invalid response for getChannelInfo"); + } + ChannelInfo result; + const ChannelInfoProto &info = resp.channels()[0]; + result.channel_name = info.name(); + result.num_publishers = info.num_pubs(); + result.num_subscribers = info.num_subs(); + result.num_bridge_pubs = info.num_bridge_pubs(); + result.num_bridge_subs = info.num_bridge_subs(); + result.reliable = info.is_reliable(); + result.type = info.type(); + result.slot_size = info.slot_size(); + result.num_slots = info.num_slots(); + return result; +} + +absl::StatusOr> ClientImpl::GetChannelInfo() { + ClientLockGuard guard(this); + if (absl::Status status = CheckConnected(); !status.ok()) { + return status; + } + Request req; + [[maybe_unused]] auto cmd = req.mutable_get_channel_info(); + + // Send request to server and wait for response. + Response response; + std::vector fds; + if (absl::Status status = SendRequestReceiveResponse(req, response, fds); + !status.ok()) { + return status; + } + auto &resp = response.get_channel_info(); + if (!resp.error().empty()) { + return absl::InternalError(resp.error()); + } + std::vector r; + for (auto &info : resp.channels()) { + ChannelInfo result; + result.channel_name = info.name(); + result.num_publishers = info.num_pubs(); + result.num_subscribers = info.num_subs(); + result.num_bridge_pubs = info.num_bridge_pubs(); + result.num_bridge_subs = info.num_bridge_subs(); + result.reliable = info.is_reliable(); + result.type = info.type(); + result.slot_size = info.slot_size(); + result.num_slots = info.num_slots(); + r.push_back(result); + } + return r; +} + +absl::StatusOr ClientImpl::ChannelExists(const std::string &channelName) { + ClientLockGuard guard(this); + if (absl::Status status = CheckConnected(); !status.ok()) { + return status; + } + absl::StatusOr info = GetChannelInfo(channelName); + return info.ok(); +} + +absl::StatusOr +ClientImpl::GetChannelStats(const std::string &channel) { + ClientLockGuard guard(this); + if (absl::Status status = CheckConnected(); !status.ok()) { + return status; + } + Request req; + auto *cmd = req.mutable_get_channel_stats(); + cmd->set_channel_name(channel); + + // Send request to server and wait for response. + Response response; + std::vector fds; + if (absl::Status status = SendRequestReceiveResponse(req, response, fds); + !status.ok()) { + return status; + } + auto &resp = response.get_channel_stats(); + if (!resp.error().empty()) { + return absl::InternalError(resp.error()); + } + // There will be one channel information in the response. + if (resp.channels_size() != 1) { + return absl::InternalError("Invalid response for getChannelStats"); + } + ChannelStats result; + const ChannelStatsProto &stats = resp.channels()[0]; + result.channel_name = stats.channel_name(); + result.total_bytes = stats.total_bytes(); + result.total_messages = stats.total_messages(); + result.max_message_size = stats.max_message_size(); + return result; +} + +absl::StatusOr> ClientImpl::GetChannelStats() { + ClientLockGuard guard(this); + if (absl::Status status = CheckConnected(); !status.ok()) { + return status; + } + + Request req; + [[maybe_unused]] auto cmd = req.mutable_get_channel_stats(); + + // Send request to server and wait for response. + Response response; + std::vector fds; + if (absl::Status status = SendRequestReceiveResponse(req, response, fds); + !status.ok()) { + return status; + } + auto &resp = response.get_channel_stats(); + if (!resp.error().empty()) { + return absl::InternalError(resp.error()); + } + std::vector r; + for (auto &stats : resp.channels()) { + ChannelStats result; + result.channel_name = stats.channel_name(); + result.total_bytes = stats.total_bytes(); + result.total_messages = stats.total_messages(); + result.max_message_size = stats.max_message_size(); + r.push_back(result); + } + return r; +} + absl::Status ClientImpl::ResizeChannel(PublisherImpl *publisher, int32_t new_slot_size) { if (publisher->IsFixedSize()) { @@ -1188,29 +1546,35 @@ absl::Status ClientImpl::ResizeChannel(PublisherImpl *publisher, absl::Status ClientImpl::SendRequestReceiveResponse( const Request &req, Response &response, std::vector &fds) { - // SendMessage needs 4 bytes before the buffer passed to - // use for the length. - char *sendbuf = buffer_ + sizeof(int32_t); - constexpr size_t kSendBufLen = sizeof(buffer_) - sizeof(int32_t); - if (!req.SerializeToArray(sendbuf, kSendBufLen)) { - return absl::InternalError("Failed to serialize request"); - } + // std::cerr << "Sending request " << req.DebugString() << "\n"; + { + // SendMessage needs 4 bytes before the buffer passed to + // use for the length. + size_t msg_len = req.ByteSizeLong(); + std::vector send_msg(sizeof(int32_t) + msg_len); + char *sendbuf = send_msg.data() + sizeof(int32_t); - size_t length = req.ByteSizeLong(); - absl::StatusOr n = socket_.SendMessage(sendbuf, length, co_); - if (!n.ok()) { - socket_.Close(); - return n.status(); + if (!req.SerializeToArray(sendbuf, msg_len)) { + return absl::InternalError("Failed to serialize request"); + } + + absl::StatusOr n = socket_.SendMessage(sendbuf, msg_len, co_); + if (!n.ok()) { + socket_.Close(); + return n.status(); + } } - // Wait for response and put it in the same buffer we used for send. - n = socket_.ReceiveMessage(buffer_, sizeof(buffer_), co_); - if (!n.ok()) { + // Wait for response and any fds. + absl::StatusOr> recv_msg = + socket_.ReceiveVariableLengthMessage(co_); + if (!recv_msg.ok()) { socket_.Close(); - return n.status(); + return recv_msg.status(); } - if (!response.ParseFromArray(buffer_, static_cast(*n))) { + if (!response.ParseFromArray(recv_msg->data(), + static_cast(recv_msg->size()))) { socket_.Close(); return absl::InternalError("Failed to parse response"); } diff --git a/client/client.h b/client/client.h index d6199a6..a0d0679 100644 --- a/client/client.h +++ b/client/client.h @@ -2,18 +2,21 @@ // All Rights Reserved // See LICENSE file for licensing information. -#ifndef __CLIENT_CLIENT_H -#define __CLIENT_CLIENT_H +#ifndef _xCLIENT_CLIENT_H +#define _xCLIENT_CLIENT_H #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/types/span.h" + #include "client/message.h" #include "client/options.h" #include "client/publisher.h" #include "client/subscriber.h" -#include "common/channel.h" #include "co/coroutine.h" +#include "common/channel.h" + #include "toolbelt/fd.h" #include "toolbelt/logging.h" #include "toolbelt/sockets.h" @@ -21,9 +24,11 @@ #include #include #include +#include +#include #include #include - +#include namespace subspace { enum class ReadMode { @@ -31,6 +36,25 @@ enum class ReadMode { kReadNewest, }; +struct ChannelInfo { + std::string channel_name; + int num_publishers; + int num_subscribers; + int num_bridge_pubs; + int num_bridge_subs; + std::string type; + uint64_t slot_size; + int num_slots; + bool reliable; +}; + +struct ChannelStats { + std::string channel_name; + uint64_t total_bytes; + uint64_t total_messages; + uint64_t max_message_size; +}; + template class weak_ptr; template class shared_ptr { @@ -176,7 +200,7 @@ class ClientImpl : public std::enable_shared_from_this { // // These are public so that they can be accessed by std::make_shared. // You shouldn't create these yourself - create a Client instead. - ClientImpl(co::Coroutine *c = nullptr) : co_(c) {} + ClientImpl(const co::Coroutine *c = nullptr) : co_(c) {} ~ClientImpl() = default; private: @@ -185,6 +209,39 @@ class ClientImpl : public std::enable_shared_from_this { friend class Publisher; friend class Subscriber; + enum class LockMode { + kAutoLock, // Lock on construction, unlock on destruction + kDeferredLock, // Locks if not already locked by the current thread, unlocks + // if not committed. + kMaybeLocked, // Maybe locked by the current thread, lock if not, unlock on + // destruction + }; + + // RAII class to conditionally lock a mutex. This is like std::lock_guard but + // allows a locking mode to be specified. The default is be behave like + // std::lock_guard. To support holding the lock across GetMessageBuffer and + // PublishMessage calls it also support deferring the lock and unlock. This + // mode also allows for calls to GetMessageBuffer multiple times without + // publishing. + class ClientLockGuard { + public: + ClientLockGuard(ClientImpl *client, + LockMode lock_mode = LockMode::kAutoLock); + ~ClientLockGuard(); + void Lock(); + void Unlock(); + + // If we are using a deferred lock we need to commit it before the object is + // desctructed if we want to hold onto the lock. + void CommitLock(); + + private: + ClientImpl *client_; + LockMode lock_mode_; + bool locked_ = false; + bool committed_ = false; + }; + const std::string &GetName() const { return name_; } // Initialize the client by connecting to the server. @@ -214,10 +271,25 @@ class ClientImpl : public std::enable_shared_from_this { // information unless you know how this works in detail. void SetDebug(bool v) { debug_ = v; } + void SetThreadSafe(bool v) { thread_safe_ = v; } + // Get a snapshot of the current number of publishers and subscribers // for the given channel (publisher or subscriber) const ChannelCounters &GetChannelCounters(details::ClientChannel *channel); + absl::StatusOr + GetChannelCounters(const std::string &channel_name) const; + + absl::StatusOr + GetChannelInfo(const std::string &channelName); + absl::StatusOr> GetChannelInfo(); + + absl::StatusOr + GetChannelStats(const std::string &channelName); + absl::StatusOr> GetChannelStats(); + + absl::StatusOr ChannelExists(const std::string &channelName); + // Remove publisher and subscriber. absl::Status RemovePublisher(details::PublisherImpl *publisher); absl::Status RemoveSubscriber(details::SubscriberImpl *subscriber); @@ -233,7 +305,12 @@ class ClientImpl : public std::enable_shared_from_this { // If max_size is greater than the current buffer size, the buffers // will be resized. absl::StatusOr GetMessageBuffer(details::PublisherImpl *publisher, - int32_t max_size); + int32_t max_size, bool lock); + // Get the messsage buffer as a span. Returns an empty span if there is no + // buffer available + absl::StatusOr> + GetMessageBufferSpan(details::PublisherImpl *publisher, int32_t max_size, + bool lock); // Publish the message in the publisher's buffer. The message_size // argument specifies the actual size of the message to send. Returns the @@ -242,17 +319,21 @@ class ClientImpl : public std::enable_shared_from_this { absl::StatusOr PublishMessage(details::PublisherImpl *publisher, int64_t message_size); + // In thread-safe mode, if you don't want to publish the message, you must + // cancel the publish. This will release the lock. + void CancelPublish(details::PublisherImpl *publisher); + // Wait until a reliable publisher can try again to send a message. If the // client is coroutine-aware, the coroutine will wait. If it's not, // the function will block on a poll until the publisher is triggered. absl::Status WaitForReliablePublisher(details::PublisherImpl *publisher, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return WaitForReliablePublisher(publisher, std::chrono::nanoseconds(0), c); } absl::Status WaitForReliablePublisher(details::PublisherImpl *publisher, std::chrono::nanoseconds timeout, - co::Coroutine *c = nullptr); + const co::Coroutine *c = nullptr); // Wait until a reliable publisher can try again to send a message. If the // client is coroutine-aware, the coroutine will wait. If it's not, @@ -260,27 +341,27 @@ class ClientImpl : public std::enable_shared_from_this { absl::StatusOr WaitForReliablePublisher(details::PublisherImpl *publisher, const toolbelt::FileDescriptor &fd, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return WaitForReliablePublisher(publisher, fd, std::chrono::nanoseconds(0), c); } absl::StatusOr WaitForReliablePublisher( details::PublisherImpl *publisher, const toolbelt::FileDescriptor &fd, - std::chrono::nanoseconds timeout, co::Coroutine *c = nullptr); + std::chrono::nanoseconds timeout, const co::Coroutine *c = nullptr); // Wait until there's a message available to be read by the // subscriber. If the client is coroutine-aware, the coroutine // will wait. If it's not, the function will block on a poll // until the subscriber is triggered. absl::Status WaitForSubscriber(details::SubscriberImpl *subscriber, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return WaitForSubscriber(subscriber, std::chrono::nanoseconds(0), c); } absl::Status WaitForSubscriber(details::SubscriberImpl *subscriber, std::chrono::nanoseconds timeout, - co::Coroutine *c = nullptr); + const co::Coroutine *c = nullptr); // Wait until there' s a message available to be read by the // subscriber. If the client is coroutine-aware, the coroutine @@ -288,14 +369,14 @@ class ClientImpl : public std::enable_shared_from_this { // until the subscriber is triggered. absl::StatusOr WaitForSubscriber(details::SubscriberImpl *subscriber, const toolbelt::FileDescriptor &fd, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return WaitForSubscriber(subscriber, fd, std::chrono::nanoseconds(0), c); } absl::StatusOr WaitForSubscriber(details::SubscriberImpl *subscriber, const toolbelt::FileDescriptor &fd, std::chrono::nanoseconds timeout, - co::Coroutine *c = nullptr); + const co::Coroutine *c = nullptr); // Read a message from a subscriber. If there are no available messages // the 'length' field of the returned Message will be zero. The 'buffer' @@ -347,6 +428,13 @@ class ClientImpl : public std::enable_shared_from_this { std::function callback); absl::Status UnregisterMessageCallback(details::SubscriberImpl *subscriber); + void InvokeMessageCallback(details::SubscriberImpl *subscriber, Message msg) { + auto it = message_callbacks_.find(subscriber); + if (it != message_callbacks_.end() && it->second) { + it->second(subscriber, msg); + } + } + // Register a callback that will be called when the publisher wants a // channel to be resized. Note that there is more than one // publisher, only the one that causes the resize will cause the @@ -374,7 +462,7 @@ class ClientImpl : public std::enable_shared_from_this { ReadMode mode = ReadMode::kReadNext); // Get the most recently received ordinal for the subscriber. - int64_t GetCurrentOrdinal(details::SubscriberImpl *sub) const; + int64_t GetCurrentOrdinal(details::SubscriberImpl *sub); absl::Status CheckConnected() const; absl::Status @@ -397,7 +485,8 @@ class ClientImpl : public std::enable_shared_from_this { FindMessageInternal(details::SubscriberImpl *subscriber, uint64_t timestamp); absl::StatusOr PublishMessageInternal(details::PublisherImpl *publisher, - int64_t message_size, bool omit_prefix); + int64_t message_size, bool omit_prefix, + bool use_prefix_slot_id); absl::Status ResizeChannel(details::PublisherImpl *publisher, int32_t new_slot_size); absl::StatusOr @@ -414,14 +503,13 @@ class ClientImpl : public std::enable_shared_from_this { toolbelt::UnixSocket socket_; toolbelt::FileDescriptor scb_fd_; // System control block memory fd. - char buffer_[kMaxMessage]; // Buffer for comms with server over UDS. // The client owns all the publishers and subscribers. absl::flat_hash_set> channels_; // If this is non-nullptr the client is coroutine aware and will cooperate // with all other coroutines to share the CPU. - co::Coroutine *co_; // Does not own the coroutine. + const co::Coroutine *co_; // Does not own the coroutine. // Call this function when the given subscriber detects a dropped message. // This will only really happen when you have an unreliable subscriber @@ -445,6 +533,19 @@ class ClientImpl : public std::enable_shared_from_this { resize_callbacks_; bool debug_ = false; toolbelt::Logger logger_; + mutable std::mutex mutex_; + std::atomic owner_thread_id_ = {}; + + // For backward compatibility we default to non-thread-safe because it has + // implications for the GetMessageBufferSpan function. You have to publish + // the message or call CancelPublish in order to release the lock. + bool thread_safe_ = false; + + // User and group id for the server. This is used to change the ownership of + // the shared memory files so that the server can delete them. Important when + // the client is running as root and the server isn't + int server_user_id_ = -1; + int server_group_id_ = -1; }; // This function returns an subspace::shared_ptr that refers to the message @@ -484,6 +585,7 @@ class Publisher { public: ~Publisher() { if (client_ != nullptr && impl_ != nullptr) { + UnregisterResizeCallback().IgnoreError(); (void)client_->RemovePublisher(impl_.get()); } } @@ -491,14 +593,25 @@ class Publisher { Publisher(const Publisher &other) = delete; Publisher &operator=(const Publisher &other) = delete; - Publisher(Publisher &&other) : client_(other.client_), impl_(other.impl_) { + Publisher(Publisher &&other) + : client_(std::move(other.client_)), impl_(std::move(other.impl_)), + resize_callback_(std::move(other.resize_callback_)) { + if (impl_ != nullptr && client_ != nullptr && resize_callback_ != nullptr) { + client_->UnregisterResizeCallback(impl_.get()).IgnoreError(); + RegisterResizeCallback(resize_callback_).IgnoreError(); + } other.client_ = nullptr; other.impl_ = nullptr; } Publisher &operator=(Publisher &&other) { - client_ = other.client_; - impl_ = other.impl_; + client_ = std::move(other.client_); + impl_ = std::move(other.impl_); + resize_callback_ = std::move(other.resize_callback_); + if (impl_ != nullptr && client_ != nullptr && resize_callback_ != nullptr) { + client_->UnregisterResizeCallback(impl_.get()).IgnoreError(); + RegisterResizeCallback(resize_callback_).IgnoreError(); + } other.client_ = nullptr; other.impl_ = nullptr; return *this; @@ -518,27 +631,62 @@ class Publisher { // detect when another attempt can be made to get a buffer. // If max_size is greater than the current buffer size, the buffers // will be resized. - absl::StatusOr GetMessageBuffer(int32_t max_size = -1) { - return client_->GetMessageBuffer(impl_.get(), max_size); + // + // In thread-safe mode, this will hold a lock on the client until you publish + // the message. If you don't want to publish the message, you must cancel the + // publish using CancelPublish. This will release the lock. + absl::StatusOr GetMessageBuffer(int32_t max_size = -1, + bool lock = true) { + return client_->GetMessageBuffer(impl_.get(), max_size, lock); + } + + // Get the messsage buffer as a span. Returns an empty span if there is no + // buffer available. See GetMessageBuffer for details of + absl::StatusOr> + GetMessageBufferSpan(int32_t max_size = -1, bool lock = true) { + return client_->GetMessageBufferSpan(impl_.get(), max_size, lock); } // Publish the message in the publisher's buffer. The message_size // argument specifies the actual size of the message to send. Returns the // information about the message sent with buffer set to nullptr since // the publisher cannot access the message once it's been published. + // + // In thread-safe mode, this will release the lock on the client. If you + // don't want to publish the message, you must cancel the publish using + // CancelPublish. absl::StatusOr PublishMessage(int64_t message_size) { return client_->PublishMessage(impl_.get(), message_size); } + // Publish a message that already includes a prefix. You have the option to + // use the slot id passed in the prefix for message retirement or use the + // If the message arrived over a bridge and you want to pass the retirement + // back over the bridge, use the slot id from the prefix. If the retirement + // notification is locally handled, use the slot id from the message. + // + // In thread-safe mode, this will release the lock on the client. If you + // don't want to publish the message, you must cancel the publish using + // CancelPublish. + absl::StatusOr + PublishMessageWithPrefix(int64_t message_size, + bool use_slot_id_from_prefix = true) { + return PublishMessageInternal(message_size, true, use_slot_id_from_prefix); + } + + // If you don't want to publish the message, you must cancel the publish. This + // will release the lock. + void CancelPublish() { client_->CancelPublish(impl_.get()); } + // Wait until a reliable publisher can try again to send a message. If the // client is coroutine-aware, the coroutine will wait. If it's not, // the function will block on a poll until the publisher is triggered. - absl::Status Wait(co::Coroutine *c = nullptr) { + absl::Status Wait(const co::Coroutine *c = nullptr) { return client_->WaitForReliablePublisher(impl_.get(), c); } absl::Status Wait(std::chrono::nanoseconds timeout, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return client_->WaitForReliablePublisher(impl_.get(), timeout, c); } @@ -549,17 +697,19 @@ class Publisher { // the wait. Returns the integer fd value of the file descriptor that // triggered the wait. absl::StatusOr Wait(const toolbelt::FileDescriptor &fd, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return client_->WaitForReliablePublisher(impl_.get(), fd, c); } absl::StatusOr Wait(const toolbelt::FileDescriptor &fd, std::chrono::nanoseconds timeout, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return client_->WaitForReliablePublisher(impl_.get(), fd, timeout, c); } - struct pollfd GetPollFd() const { return client_->GetPollFd(impl_.get()); } + struct pollfd GetPollFd() const { + return client_->GetPollFd(impl_.get()); + } // This is a file descriptor that you can poll on to wait for // message slots to be retired. It is triggered @@ -592,27 +742,67 @@ class Publisher { return client_->GetBuffers(impl_.get()); } + void GetStatsCounters(uint64_t &total_bytes, uint64_t &total_messages, + uint32_t &max_message_size, uint32_t &total_drops) { + impl_->GetStatsCounters(total_bytes, total_messages, max_message_size, + total_drops); + } + + uint64_t GetVirtualMemoryUsage() const { + return impl_->GetVirtualMemoryUsage(); + } + + const ChannelCounters &GetCounters() const { return impl_->GetCounters(); } + + std::string BufferSharedMemoryName(int buffer_index) const { + return impl_->BufferSharedMemoryName(buffer_index); + } + // Register a function to be called when the publisher resizes // the channel. absl::Status RegisterResizeCallback( std::function callback) { - return client_->RegisterResizeCallback( + auto status = client_->RegisterResizeCallback( impl_.get(), - [this, callback = std::move(callback)]( - details::PublisherImpl *p, int32_t old_size, int32_t new_size) - -> absl::Status { return callback(this, old_size, new_size); }); + [this](details::PublisherImpl *, int32_t old_size, + int32_t new_size) -> absl::Status { + return resize_callback_(this, old_size, new_size); + }); + if (!status.ok()) { + return status; + } + resize_callback_ = std::move(callback); + return absl::OkStatus(); } absl::Status UnregisterResizeCallback() { - return client_->UnregisterResizeCallback(impl_.get()); + auto status = client_->UnregisterResizeCallback(impl_.get()); + if (!status.ok()) { + return status; + } + resize_callback_ = nullptr; + return absl::OkStatus(); + } + + void SetOnSendCallback( + std::function(void *buffer, int64_t size)> + callback) { + impl_->SetOnSendCallback(std::move(callback)); } + void ClearOnSendCallback() { impl_->SetOnSendCallback(nullptr); } + int VirtualChannelId() const { return impl_->VirtualChannelId(); } + std::string Mux() const { return impl_->Mux(); } int NumSubscribers(int vchan_id = -1) const { return impl_->NumSubscribers(vchan_id); } + int CurrentSlotId() const { return impl_->CurrentSlotId(); } + + MessageSlot *CurrentSlot() const { return impl_->CurrentSlot(); } + private: friend class Server; friend class ClientImpl; @@ -621,20 +811,24 @@ class Publisher { std::shared_ptr impl) : client_(client), impl_(impl) {} - absl::StatusOr PublishMessageInternal(int64_t message_size, - bool omit_prefix) { + absl::StatusOr + PublishMessageInternal(int64_t message_size, bool omit_prefix, + bool use_prefix_slot_id) { return client_->PublishMessageInternal(impl_.get(), message_size, - omit_prefix); + omit_prefix, use_prefix_slot_id); } std::shared_ptr client_; std::shared_ptr impl_; + std::function resize_callback_ = nullptr; }; class Subscriber { public: ~Subscriber() { if (client_ != nullptr && impl_ != nullptr) { + UnregisterDroppedMessageCallback().IgnoreError(); + UnregisterMessageCallback().IgnoreError(); (void)client_->RemoveSubscriber(impl_.get()); } } @@ -642,14 +836,39 @@ class Subscriber { Subscriber &operator=(const Subscriber &other) = delete; - Subscriber(Subscriber &&other) : client_(other.client_), impl_(other.impl_) { + Subscriber(Subscriber &&other) + : client_(std::move(other.client_)), impl_(std::move(other.impl_)), + dropped_message_callback_(std::move(other.dropped_message_callback_)), + message_callback_(std::move(other.message_callback_)) { + if (impl_ != nullptr && client_ != nullptr && + dropped_message_callback_ != nullptr) { + client_->UnregisterDroppedMessageCallback(impl_.get()).IgnoreError(); + RegisterDroppedMessageCallback(dropped_message_callback_).IgnoreError(); + } + if (impl_ != nullptr && client_ != nullptr && + message_callback_ != nullptr) { + client_->UnregisterMessageCallback(impl_.get()).IgnoreError(); + RegisterMessageCallback(message_callback_).IgnoreError(); + } other.client_ = nullptr; other.impl_ = nullptr; } Subscriber &operator=(Subscriber &&other) { - client_ = other.client_; - impl_ = other.impl_; + client_ = std::move(other.client_); + impl_ = std::move(other.impl_); + dropped_message_callback_ = std::move(other.dropped_message_callback_); + message_callback_ = std::move(other.message_callback_); + if (impl_ != nullptr && client_ != nullptr && + dropped_message_callback_ != nullptr) { + client_->UnregisterDroppedMessageCallback(impl_.get()).IgnoreError(); + RegisterDroppedMessageCallback(dropped_message_callback_).IgnoreError(); + } + if (impl_ != nullptr && client_ != nullptr && + message_callback_ != nullptr) { + client_->UnregisterMessageCallback(impl_.get()).IgnoreError(); + RegisterMessageCallback(message_callback_).IgnoreError(); + } other.client_ = nullptr; other.impl_ = nullptr; return *this; @@ -663,12 +882,12 @@ class Subscriber { // subscriber. If the client is coroutine-aware, the coroutine // will wait. If it's not, the function will block on a poll // until the subscriber is triggered. - absl::Status Wait(co::Coroutine *c = nullptr) { + absl::Status Wait(const co::Coroutine *c = nullptr) { return client_->WaitForSubscriber(impl_.get(), c); } absl::Status Wait(std::chrono::nanoseconds timeout, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return client_->WaitForSubscriber(impl_.get(), timeout, c); } @@ -680,13 +899,13 @@ class Subscriber { // the wait. Returns the integer fd value of the file descriptor that // triggered the wait. absl::StatusOr Wait(const toolbelt::FileDescriptor &fd, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return client_->WaitForSubscriber(impl_.get(), fd, c); } absl::StatusOr Wait(const toolbelt::FileDescriptor &fd, std::chrono::nanoseconds timeout, - co::Coroutine *c = nullptr) { + const co::Coroutine *c = nullptr) { return client_->WaitForSubscriber(impl_.get(), fd, timeout, c); } @@ -706,6 +925,13 @@ class Subscriber { absl::StatusOr> ReadMessage(ReadMode mode = ReadMode::kReadNext); + bool AddActiveMessage(int32_t slot_id) { + return impl_->AddActiveMessage(impl_->GetSlot(slot_id)); + } + void RemoveActiveMessage(int32_t slot_id) { + impl_->RemoveActiveMessage(impl_->GetSlot(slot_id)); + } + // Find a message given a timestamp. absl::StatusOr FindMessage(uint64_t timestamp) { return client_->FindMessage(impl_.get(), timestamp); @@ -716,7 +942,9 @@ class Subscriber { template absl::StatusOr> FindMessage(uint64_t timestamp); - struct pollfd GetPollFd() const { return client_->GetPollFd(impl_.get()); } + struct pollfd GetPollFd() const { + return client_->GetPollFd(impl_.get()); + } toolbelt::FileDescriptor GetFileDescriptor() const { return client_->GetFileDescriptor(impl_.get()); @@ -730,6 +958,10 @@ class Subscriber { return client_->GetChannelCounters(impl_.get()); } + uint64_t GetVirtualMemoryUsage() const { + return impl_->GetVirtualMemoryUsage(); + } + std::string Name() const { return impl_->Name(); } std::string Type() const { return impl_->Type(); } std::string_view TypeView() const { return impl_->TypeView(); } @@ -739,29 +971,60 @@ class Subscriber { // as its second argument. absl::Status RegisterDroppedMessageCallback( std::function callback) { - return client_->RegisterDroppedMessageCallback( - impl_.get(), - [this, callback = std::move(callback)]( - details::SubscriberImpl *s, int64_t c) { callback(this, c); }); + auto status = client_->RegisterDroppedMessageCallback( + impl_.get(), [this](details::SubscriberImpl *, int64_t c) { + dropped_message_callback_(this, c); + }); + if (!status.ok()) { + return status; + } + dropped_message_callback_ = std::move(callback); + return absl::OkStatus(); } absl::Status UnregisterDroppedMessageCallback() { - return client_->UnregisterDroppedMessageCallback(impl_.get()); + auto status = client_->UnregisterDroppedMessageCallback(impl_.get()); + if (!status.ok()) { + return status; + } + dropped_message_callback_ = nullptr; + return absl::OkStatus(); } absl::Status RegisterMessageCallback(std::function callback) { - return client_->RegisterMessageCallback( - impl_.get(), [this, callback = std::move(callback)]( - details::SubscriberImpl *s, Message m) { - callback(this, std::move(m)); + auto status = client_->RegisterMessageCallback( + impl_.get(), [this](details::SubscriberImpl *, Message m) { + message_callback_(this, std::move(m)); }); + if (!status.ok()) { + return status; + } + message_callback_ = std::move(callback); + return absl::OkStatus(); } absl::Status UnregisterMessageCallback() { - return client_->UnregisterMessageCallback(impl_.get()); + auto status = client_->UnregisterMessageCallback(impl_.get()); + if (!status.ok()) { + return status; + } + message_callback_ = nullptr; + return absl::OkStatus(); + } + + void InvokeMessageCallback(Message msg) { + client_->InvokeMessageCallback(impl_.get(), std::move(msg)); } + void SetOnReceiveCallback( + std::function(void *buffer, int64_t size)> + callback) { + impl_->SetOnReceiveCallback(std::move(callback)); + } + + void ClearOnReceiveCallback() { impl_->SetOnReceiveCallback(nullptr); } + absl::Status ProcessAllMessages(ReadMode mode = ReadMode::kReadNext) { return client_->ProcessAllMessages(impl_.get(), mode); } @@ -772,6 +1035,9 @@ class Subscriber { } void Trigger() { impl_->Trigger(); } + void Untrigger() { impl_->Untrigger(); } + + bool IsPlaceholder() const { return impl_->IsPlaceholder(); } const ChannelCounters &GetCounters() const { return impl_->GetCounters(); } @@ -792,6 +1058,10 @@ class Subscriber { int VirtualChannelId() const { return impl_->VirtualChannelId(); } + int ConfiguredVchanId() const { return impl_->ConfiguredVchanId(); } + + std::string Mux() const { return impl_->Mux(); } + int NumSubscribers(int vchan_id = -1) const { return impl_->NumSubscribers(vchan_id); } @@ -800,6 +1070,17 @@ class Subscriber { // subscriber, you can call this. void ClearActiveMessage() { impl_->ClearActiveMessage(); } + void TriggerReliablePublishers() { impl_->TriggerReliablePublishers(); } + + bool AtomicIncRefCount(int slot_id, int inc) { + MessageSlot *slot = impl_->GetSlot(slot_id); + if (slot != nullptr) { + return impl_->AtomicIncRefCount(slot, IsReliable(), inc, slot->ordinal, + slot->vchan_id, false); + } + return false; + } + private: friend class Server; friend class ClientImpl; @@ -816,6 +1097,9 @@ class Subscriber { std::shared_ptr client_; std::shared_ptr impl_; + std::function dropped_message_callback_ = + nullptr; + std::function message_callback_ = nullptr; }; template @@ -844,7 +1128,8 @@ class Client { public: static absl::StatusOr> Create(const std::string &server_socket = "/tmp/subspace", - const std::string &client_name = "", co::Coroutine *c = nullptr) { + const std::string &client_name = "", + const co::Coroutine *c = nullptr) { auto client = std::make_shared(c); auto status = client->Init(server_socket, client_name); if (!status.ok()) { @@ -853,7 +1138,8 @@ class Client { return client; } - Client(co::Coroutine *c = nullptr) : impl_(std::make_shared(c)) {} + Client(const co::Coroutine *c = nullptr) + : impl_(std::make_shared(c)) {} ~Client() = default; const std::string &GetName() const { return impl_->GetName(); } @@ -893,10 +1179,37 @@ class Client { // information unless you know how this works in detail. void SetDebug(bool v) { impl_->SetDebug(v); } + void SetThreadSafe(bool v) { impl_->SetThreadSafe(v); } + + absl::StatusOr + GetChannelCounters(const std::string &channel_name) const { + return impl_->GetChannelCounters(channel_name); + } + + absl::StatusOr + GetChannelInfo(const std::string &channelName) { + return impl_->GetChannelInfo(channelName); + } + absl::StatusOr> GetChannelInfo() { + return impl_->GetChannelInfo(); + } + + absl::StatusOr + GetChannelStats(const std::string &channelName) { + return impl_->GetChannelStats(channelName); + } + absl::StatusOr> GetChannelStats() { + return impl_->GetChannelStats(); + } + + absl::StatusOr ChannelExists(const std::string &channelName) { + return impl_->ChannelExists(channelName); + } + private: std::shared_ptr impl_; }; } // namespace subspace -#endif // __CLIENT_CLIENT_H +#endif // _xCLIENT_CLIENT_H diff --git a/client/client_channel.cc b/client/client_channel.cc index f1425b4..42437ca 100644 --- a/client/client_channel.cc +++ b/client/client_channel.cc @@ -1,10 +1,10 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. #include "client/client_channel.h" #include -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX #include #endif #include @@ -15,10 +15,10 @@ namespace subspace { namespace details { -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX absl::StatusOr -ClientChannel::CreateMacOSSharedMemoryFile(const std::string &filename, +ClientChannel::CreatePosixSharedMemoryFile(const std::string &filename, off_t size) { // Create a file in /tmp and make it the same size as the shared memory. This // will not actually allocate any disk space. @@ -36,7 +36,7 @@ ClientChannel::CreateMacOSSharedMemoryFile(const std::string &filename, } close(fd); - return MacOsSharedMemoryName(filename); + return PosixSharedMemoryName(filename); } #endif @@ -81,7 +81,8 @@ absl::Status ClientChannel::UnmapUnusedBuffers() { if (bcb_->refs[i] == 0) { if (buffers_[i]->full_size > 0) { if (debug_) { - fprintf(stderr, "%p: Unmapping unused buffers at index %zd\n", this, i); + fprintf(stderr, "%p: Unmapping unused buffers at index %zd\n", this, + i); } UnmapMemory(buffers_[i]->buffer, buffers_[i]->full_size, "buffers"); buffers_[i]->buffer = nullptr; @@ -113,14 +114,14 @@ bool ClientChannel::ValidateSlotBuffer(MessageSlot *slot) { absl::Status ClientChannel::AttachBuffers() { // NOTE: the num_buffers variable in the CCB is atomic and could change while // we are in or after we are done with this loop. - bool map_read_only = IsSubscriber() && !IsBridge(); + BufferMapMode mode = MapMode(); int num_buffers = ccb_->num_buffers; while (buffers_.size() < size_t(num_buffers)) { // We need to open the next buffer in the list. The buffer index is size_t buffer_index = buffers_.size(); auto shm_fd = OpenBuffer(buffer_index); if (!shm_fd.ok()) { -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX if (buffers_.size() + 1 < size_t(num_buffers)) { // The buffer might have been deleted because there are no // references to it. If we are not the last buffer, this is @@ -136,7 +137,7 @@ absl::Status ClientChannel::AttachBuffers() { return size.status(); } if (*size == 0) { -#if !defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_LINUX if (buffers_.size() + 1 < size_t(num_buffers)) { // If the size is 0, it means the buffer has been deleted or not yet // created. We just add an empty buffer. @@ -151,7 +152,7 @@ absl::Status ClientChannel::AttachBuffers() { uint64_t slot_size = BufferSizeToSlotSize(*size); if (slot_size > 0) { // Map the shared memory buffer. - addr = MapBuffer(*shm_fd, *size, map_read_only); + addr = MapBuffer(*shm_fd, *size, mode); if (!addr.ok()) { return addr.status(); } @@ -216,7 +217,7 @@ absl::StatusOr ClientChannel::CreateBuffer(int buffer_index, size_t size) { std::string filename = BufferSharedMemoryName(buffer_index); -#if !defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_LINUX // Open the shared memory file. auto shm_fd = OpenSharedMemoryFile(filename, O_RDWR | O_CREAT | O_EXCL); if (!shm_fd.ok()) { @@ -236,13 +237,28 @@ ClientChannel::CreateBuffer(int buffer_index, size_t size) { filename, strerror(errno))); } + std::string shm_filename = "/dev/shm/" + filename; + // Change the permissions for the file to 777. + if (chmod(shm_filename.c_str(), 0777) == -1) { + return absl::InternalError( + absl::StrFormat("Failed to change permissions of shared memory %s: %s", shm_filename, strerror(errno))); + } + + if (getuid() == 0) { + // If we are root, change the owner for the file to server's user and group. + if (chown(shm_filename.c_str(), user_id_, group_id_) == -1) { + return absl::InternalError( + absl::StrFormat("Failed to change owner of shared memory %s: %s", shm_filename, strerror(errno))); + } + } + #else - // On MacOS we need to create a shadow file that has the same size as the + // On Posix we need to create a shadow file that has the same size as the // shared memory file. This is because the fstat of the shm "file" returns a // page aligned size, which is not what we want. The shadow file is used // to determine the size of the shared memory segment. absl::StatusOr shm_name = - CreateMacOSSharedMemoryFile(filename, off_t(size)); + CreatePosixSharedMemoryFile(filename, off_t(size)); if (!shm_name.ok()) { return shm_name.status(); } @@ -263,9 +279,23 @@ ClientChannel::CreateBuffer(int buffer_index, size_t size) { (void)shm_unlink(filename.c_str()); return absl::InternalError( absl::StrFormat("Failed to set length of shared memory %s: %s", - filename, strerror(errno))); + filename, strerror(errno))); } + // Change the permissions for the file to 777. + if (chmod(filename.c_str(), 0777) == -1) { + return absl::InternalError( + absl::StrFormat("Failed to change permissions of shared memory %s: %s", filename, strerror(errno))); + + } + + if (getuid() == 0) { + // If we are root, change the owner for the file to server's user and group. + if (chown(filename.c_str(), user_id_, group_id_) == -1) { + return absl::InternalError( + absl::StrFormat("Failed to change owner of shared memory %s: %s", filename, strerror(errno))); + } + } #endif return *shm_fd; } @@ -273,11 +303,11 @@ ClientChannel::CreateBuffer(int buffer_index, size_t size) { absl::StatusOr ClientChannel::OpenBuffer(int buffer_index) { std::string filename = BufferSharedMemoryName(buffer_index); -#if !defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_LINUX // Open the shared memory file. return OpenSharedMemoryFile(filename, O_RDWR); #else - auto shm_name = MacOsSharedMemoryName(filename); + auto shm_name = PosixSharedMemoryName(filename); if (!shm_name.ok()) { return shm_name.status(); } @@ -288,8 +318,8 @@ ClientChannel::OpenBuffer(int buffer_index) { absl::StatusOr ClientChannel::GetBufferSize(toolbelt::FileDescriptor &shm_fd, int buffer_index) const { -#if defined(__APPLE__) - // On MacOS we need to look at the size of the shadow file because it looks +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX + // On Posix we need to look at the size of the shadow file because it looks // like the fstat of the shm "file" returns a page aligned size. std::string filename = BufferSharedMemoryName(buffer_index); struct stat sb; @@ -313,8 +343,9 @@ ClientChannel::GetBufferSize(toolbelt::FileDescriptor &shm_fd, absl::StatusOr ClientChannel::MapBuffer(toolbelt::FileDescriptor &shm_fd, size_t size, - bool read_only) { - int prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE); + BufferMapMode mode) { + int prot = + mode == BufferMapMode::kReadOnly ? PROT_READ : (PROT_READ | PROT_WRITE); void *p = MapMemory(shm_fd.Fd(), size, prot, "buffers"); if (p == MAP_FAILED) { return absl::InternalError( @@ -329,6 +360,11 @@ void ClientChannel::TriggerRetirement(int slot_id) { // No retirement triggers, let's avoid locking the mutex. return; } + MessageSlot *slot = GetSlot(slot_id); + if ((slot->flags & kMessageIsActivation) != 0) { + // Don't retire activation messages. + return; + } std::unique_lock lock(retirement_lock_); for (auto &fd : retirement_triggers_) { ssize_t n = ::write(fd.Fd(), &slot_id, sizeof(slot_id)); diff --git a/client/client_channel.h b/client/client_channel.h index 6463bf4..9dbdae2 100644 --- a/client/client_channel.h +++ b/client/client_channel.h @@ -1,25 +1,26 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. -#ifndef __CLIENT_CLIENT_CHANNEL_H -#define __CLIENT_CLIENT_CHANNEL_H +#ifndef _xCLIENT_CLIENT_CHANNEL_H +#define _xCLIENT_CLIENT_CHANNEL_H #include "client/options.h" -#include "common/channel.h" #include "co/coroutine.h" +#include "common/channel.h" #include "proto/subspace.pb.h" +#include "toolbelt/clock.h" #include "toolbelt/fd.h" #include "toolbelt/sockets.h" #include "toolbelt/triggerfd.h" -#include "toolbelt/clock.h" #include +#include "client/checksum.h" #include #include #include -#include #include +#include // Notification strategy // --------------------- @@ -41,8 +42,23 @@ namespace subspace { + +#define SUBSPACE_SHMEM_MODE_POSIX 1 +#define SUBSPACE_SHMEM_MODE_LINUX 2 + +#if defined(__linux__) +#define SUBSPACE_SHMEM_MODE SUBSPACE_SHMEM_MODE_LINUX +#else +#define SUBSPACE_SHMEM_MODE SUBSPACE_SHMEM_MODE_POSIX +#endif + class ClientImpl; +enum class BufferMapMode { + kReadOnly, + kReadWrite, +}; + namespace details { struct BufferSet { @@ -59,11 +75,17 @@ struct BufferSet { class ClientChannel : public Channel { public: ClientChannel(const std::string &name, int num_slots, int channel_id, - int vchan_id, uint64_t session_id, std::string type, std::function reload) - : Channel(name, num_slots, channel_id, std::move(type), std::move(reload)), - vchan_id_(vchan_id), session_id_(std::move(session_id)) {} + int vchan_id, uint64_t session_id, std::string type, + std::function reload, int user_id, int group_id) + : Channel(name, num_slots, channel_id, std::move(type), + std::move(reload)), + vchan_id_(vchan_id), session_id_(std::move(session_id)), user_id_(user_id), group_id_(group_id) { + active_slots_.reserve(num_slots); + embargoed_slots_.Resize(num_slots); + } virtual ~ClientChannel() = default; MessageSlot *CurrentSlot() const { return slot_; } + int32_t CurrentSlotId() const { return slot_ != nullptr ? slot_->id : -1; } const ChannelCounters &GetCounters() const { return GetScb()->counters[GetChannelId()]; } @@ -96,10 +118,11 @@ class ClientChannel : public Channel { if (slot == nullptr) { return nullptr; } - return Buffer(slot->id) + - (sizeof(MessagePrefix) + Aligned<64>(SlotSize(slot->id))) * - slot->id + - sizeof(MessagePrefix); + void *b = + Buffer(slot->id) + + (sizeof(MessagePrefix) + Aligned<64>(SlotSize(slot->id))) * slot->id + + sizeof(MessagePrefix); + return b; } // Get a pointer to the MessagePrefix for a given slot. @@ -110,6 +133,13 @@ class ClientChannel : public Channel { return p; } + MessageSlot *GetSlot(int32_t id) const { + if (id < 0 || id >= num_slots_) { + return nullptr; + } + return &ccb_->slots[id]; + } + // Get the size associated with the given slot id. int SlotSize(int slot_id) const { if (ccb_->slots[slot_id].buffer_index < 0 || @@ -139,9 +169,14 @@ class ClientChannel : public Channel { return buffers_.empty() ? 0 : buffers_.back()->slot_size; } + void SetNumSlots(int n) override { + Channel::SetNumSlots(n); + active_slots_.resize(n); + embargoed_slots_.Resize(n); + } + // Get the buffer associated with the given slot id. - char *Buffer(int slot_id, - bool abort_on_range = true) { + char *Buffer(int slot_id, bool abort_on_range = true) { // While we are trying to get the buffer a publisher might be adding // more buffers. Since we are going to abort if the index isn't in // range we should try very hard to make it so. @@ -161,14 +196,19 @@ class ClientChannel : public Channel { // This should never happen. int index = ccb_->slots[slot_id].buffer_index; std::cerr << this << " Invalid buffer index for slot " << slot_id << ": " - << index << " there are " << buffers_.size() << " buffers" << std::endl; - std::cerr << this << "Channel: " << name_ << " from " << (IsPublisher() ? "publisher" : "subscriber") << std::endl; + << index << " there are " << buffers_.size() << " buffers" + << std::endl; + std::cerr << this << "Channel: " << name_ << " from " + << (IsPublisher() ? "publisher" : "subscriber") << std::endl; DumpSlots(std::cerr); abort(); } + return nullptr; } + virtual BufferMapMode MapMode() const = 0; + bool BuffersChanged() const { return ccb_->num_buffers != static_cast(buffers_.size()); } @@ -191,6 +231,14 @@ class ClientChannel : public Channel { has_retirement_triggers_ = true; } + std::string BufferSharedMemoryName(int buffer_index) const { + return Channel::BufferSharedMemoryName(session_id_, buffer_index); + } + + void RecordDroppedMessages(uint32_t num) { + ccb_->total_drops += num; // Atomic increment. + } + protected: void TriggerRetirement(int slot_id); @@ -216,23 +264,20 @@ class ClientChannel : public Channel { absl::StatusOr GetBufferSize(toolbelt::FileDescriptor &shm_fd, int buffer_index) const; absl::StatusOr MapBuffer(toolbelt::FileDescriptor &shm_fd, - size_t size, bool read_only); - - std::string BufferSharedMemoryName(int buffer_index) const { - return Channel::BufferSharedMemoryName(session_id_, buffer_index); - } + size_t size, BufferMapMode mode); -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX absl::StatusOr - CreateMacOSSharedMemoryFile(const std::string &filename, off_t size); + CreatePosixSharedMemoryFile(const std::string &filename, off_t size); #endif -protected: MessageSlot *slot_ = nullptr; // Current slot. int vchan_id_ = -1; // Virtual channel ID. uint64_t session_id_; std::vector> buffers_ = {}; - + int user_id_ = -1; + int group_id_ = -1; + // Retirement triggers. Although these are not in shared memory, // the retirement of a slot can occur in any thread so we need // a mutex. But we don't want to lock the mutex if there are none @@ -247,9 +292,12 @@ class ClientChannel : public Channel { std::atomic has_retirement_triggers_{false}; std::vector retirement_triggers_ = {}; std::mutex retirement_lock_; + + std::vector active_slots_; + DynamicBitSet embargoed_slots_; }; } // namespace details } // namespace subspace -#endif // __CLIENT_CLIENT_CHANNEL_H +#endif // _xCLIENT_CLIENT_CHANNEL_H diff --git a/client/client_test.cc b/client/client_test.cc index 0235ea2..91aea41 100644 --- a/client/client_test.cc +++ b/client/client_test.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -26,20 +26,22 @@ ABSL_FLAG(bool, start_server, true, "Start the subspace server"); ABSL_FLAG(std::string, server, "", "Path to server executable"); void SignalHandler(int sig) { - fprintf(stderr,"Signal %d", sig); - std::cerr.flush(); - FILE *fp = fopen("/proc/self/maps", "r"); - for (;;) { - int ch = fgetc(fp); - if (ch == EOF) { - break; - } - fputc(ch, stderr); - } - signal(sig, SIG_DFL); - raise(sig); + fprintf(stderr, "Signal %d", sig); + std::cerr.flush(); + FILE *fp = fopen("/proc/self/maps", "r"); + for (;;) { + int ch = fgetc(fp); + if (ch == EOF) { + break; + } + fputc(ch, stderr); + } + signal(sig, SIG_DFL); + raise(sig); } +void SigQuitHandler(int signum); + using Publisher = subspace::Publisher; using Subscriber = subspace::Subscriber; using Message = subspace::Message; @@ -78,10 +80,9 @@ class ClientTest : public ::testing::Test { // has started and stopped. This end of the pipe is blocking. (void)pipe(server_pipe_); - server_ = - std::make_unique(scheduler_, socket_, "", 0, 0, - /*local=*/true, server_pipe_[1]); - + server_ = std::make_unique(scheduler_, socket_, "", 0, 0, + /*local=*/true, + server_pipe_[1], 1, true); // Start server running in a thread. server_thread_ = std::thread([]() { absl::Status s = server_->Run(); @@ -117,9 +118,12 @@ class ClientTest : public ::testing::Test { void TearDown() override {} void InitClient(subspace::Client &client) { + client.SetThreadSafe(true); ASSERT_OK(client.Init(Socket())); } + static co::CoroutineScheduler &Scheduler() { return scheduler_; } + static const std::string &Socket() { return socket_; } static subspace::Server *Server() { return server_.get(); } @@ -138,14 +142,21 @@ int ClientTest::server_pipe_[2]; std::unique_ptr ClientTest::server_; std::thread ClientTest::server_thread_; +void SigQuitHandler(int signum) { + ClientTest::Scheduler().Show(); + signal(signum, SIG_DFL); + raise(signum); +} + TEST_F(ClientTest, InetAddressSupportsAbslHash) { struct sockaddr_in addr = { #if defined(__APPLE__) - .sin_len = sizeof(int), + .sin_len = sizeof(int), #endif - .sin_family = AF_INET, - .sin_port = htons(1234), - .sin_addr = {.s_addr = htonl(0x12345678)}}; + .sin_family = AF_INET, + .sin_port = htons(1234), + .sin_addr = {.s_addr = htonl(0x12345678)} + }; EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly({ toolbelt::InetAddress(), @@ -186,7 +197,7 @@ TEST_F(ClientTest, Resize1) { ASSERT_EQ(512, pub->SlotSize()); // Won't resize. - absl::StatusOr buffer3 = pub->GetMessageBuffer(512); + absl::StatusOr buffer3 = pub->GetMessageBuffer(512, false); ASSERT_OK(buffer3); ASSERT_EQ(512, pub->SlotSize()); } @@ -231,6 +242,9 @@ TEST_F(ClientTest, ResizeCallback) { // allows one resize. absl::StatusOr buffer4 = pub->GetMessageBuffer(1000); ASSERT_FALSE(buffer4.ok()); + // For thread safety support we need to cancel the publish if we don't want to + // send it. + pub->CancelPublish(); } TEST_F(ClientTest, CreatePublisherWithType) { @@ -270,7 +284,7 @@ TEST_F(ClientTest, TooManyVirtualPublishers) { constexpr int kMuxCapacity = 10; std::vector pubs; - for (int i = 0; i < kMuxCapacity - 2; i++) { + for (int i = 0; i < kMuxCapacity - 1; i++) { absl::StatusOr pub = client.CreatePublisher( "dave0", 256, 10, {.type = "foobar", .mux = "mainmux"}); ASSERT_OK(pub); @@ -405,7 +419,7 @@ TEST_F(ClientTest, TooManyVirtualSubscribers) { // 6 subscribers. std::vector subs; - for (int i = 0; i < kNumSlots - 4; i++) { + for (int i = 0; i < kNumSlots - 3; i++) { absl::StatusOr sub = client.CreateSubscriber("dave0", {.mux = "foobar"}); ASSERT_OK(sub); @@ -472,7 +486,7 @@ TEST_F(ClientTest, BadPublisherParameters) { // Different num slots absl::StatusOr pub3 = client.CreatePublisher("dave0", 256, 9); - ASSERT_FALSE(pub3.ok()); + ASSERT_TRUE(pub3.ok()); // Fixed size. absl::StatusOr pub4 = @@ -703,6 +717,77 @@ TEST_F(ClientTest, PublishSingleMessageAndRead) { ASSERT_EQ(0, msg->length); } +TEST_F(ClientTest, PublishSingleMessageWithPrefixAndRead) { + subspace::Client pub_client; + subspace::Client sub_client; + ASSERT_OK(pub_client.Init(Socket())); + ASSERT_OK(sub_client.Init(Socket())); + absl::StatusOr pub = pub_client.CreatePublisher( + "dave6", {.slot_size = 256, .num_slots = 10, .type = "foobar"}); + ASSERT_OK(pub); + + ASSERT_EQ("foobar", pub->TypeView()); + ASSERT_FALSE(pub->IsReliable()); + ASSERT_FALSE(pub->IsFixedSize()); + ASSERT_EQ(256, pub->SlotSize()); + ASSERT_EQ(10, pub->NumSlots()); + ASSERT_EQ(0, pub->CurrentSlotId()); + ASSERT_EQ("", pub->Mux()); + std::stringstream ss; + pub->DumpSlots(ss); + ASSERT_NE("", ss.str()); + ASSERT_NE("", pub->BufferSharedMemoryName(0)); + ASSERT_EQ(0, pub->CurrentSlotId()); + ASSERT_NE(nullptr, pub->CurrentSlot()); + + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + memcpy(*buffer, "foobar", 6); + + auto prefix = reinterpret_cast(*buffer) - 1; + prefix->SetIsBridged(); + prefix->timestamp = 1234; + + absl::StatusOr pub_status = pub->PublishMessageWithPrefix(6); + ASSERT_OK(pub_status); + + absl::StatusOr sub = sub_client.CreateSubscriber("dave6"); + ASSERT_OK(sub); + + absl::StatusOr msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(6, msg->length); + + auto prefix2 = + reinterpret_cast(msg->buffer) - 1; + ASSERT_TRUE(prefix2->IsBridged()); + ASSERT_EQ(1234, prefix2->timestamp); + ASSERT_EQ(1, sub->CurrentOrdinal()); + ASSERT_EQ("dave6", sub->Name()); + ASSERT_EQ("foobar", sub->TypeView()); + ASSERT_FALSE(sub->IsReliable()); + ASSERT_EQ(256, sub->SlotSize()); + ASSERT_EQ(10, sub->NumSlots()); + ASSERT_EQ("", sub->Mux()); + ss.clear(); + sub->DumpSlots(ss); + ASSERT_NE("", ss.str()); + ASSERT_EQ(1234, sub->Timestamp()); + ASSERT_EQ(1234, msg->timestamp); + ASSERT_EQ(-1, sub->VirtualChannelId()); + ASSERT_EQ(-1, sub->ConfiguredVchanId()); + // Another read will get 0. + msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(0, msg->length); + + // Read again to make sure we get another 0. + // Regression test. + msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(0, msg->length); +} + TEST_F(ClientTest, PublishSingleMessageAndReadNewest) { subspace::Client pub_client; subspace::Client sub_client; @@ -812,6 +897,40 @@ TEST_F(ClientTest, PublishSingleMessageAndReadWithCallback) { ASSERT_OK(status); } +TEST_F(ClientTest, PublishSingleMessageAndReadWithPlugin) { + ASSERT_OK(Server()->LoadPlugin("NOP", "plugins/nop_plugin.so")); + subspace::Client pub_client; + subspace::Client sub_client; + ASSERT_OK(pub_client.Init(Socket())); + ASSERT_OK(sub_client.Init(Socket())); + absl::StatusOr pub = pub_client.CreatePublisher("dave6", 256, 10); + ASSERT_OK(pub); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + memcpy(*buffer, "foobar", 6); + absl::StatusOr pub_status = pub->PublishMessage(6); + ASSERT_OK(pub_status); + + absl::StatusOr sub = sub_client.CreateSubscriber("dave6"); + ASSERT_OK(sub); + + absl::StatusOr msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(6, msg->length); + + // Another read will get 0. + msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(0, msg->length); + + // Read again to make sure we get another 0. + // Regression test. + msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(0, msg->length); + ASSERT_OK(Server()->UnloadPlugin("NOP")); +} + TEST_F(ClientTest, VirtualPublishSingleMessageAndRead) { subspace::Client pub_client; subspace::Client sub_client; @@ -1251,24 +1370,23 @@ TEST_F(ClientTest, PublishConcurrentlyFromOneClientToOneSubscriber) { ASSERT_OK(pub_client.Init(Socket())); for (int i = 0; i < kNumPublishers; ++i) { pubs.emplace_back(*pub_client.CreatePublisher( - channel_name, {.slot_size = 256, .num_slots = 2*kNumPublishers + 16})); + channel_name, + {.slot_size = 256, .num_slots = 2 * kNumPublishers + 16})); } std::vector pub_threads; pub_threads.reserve(kNumPublishers); for (int i = 0; i < kNumPublishers; ++i) { - pub_threads.emplace_back( - std::thread([&pubs, i]() { - std::array msg = {}; - auto size = std::snprintf(msg.data(), msg.size(), "M%d", i); - auto buffer = pubs[i].GetMessageBuffer(size); - std::memcpy(*buffer, msg.data(), size); - ASSERT_OK(pubs[i].PublishMessage(size)); - }) - ); - } - - for (auto& t : pub_threads) { + pub_threads.emplace_back(std::thread([&pubs, i]() { + std::array msg = {}; + auto size = std::snprintf(msg.data(), msg.size(), "M%d", i); + auto buffer = pubs[i].GetMessageBuffer(size); + std::memcpy(*buffer, msg.data(), size); + ASSERT_OK(pubs[i].PublishMessage(size)); + })); + } + + for (auto &t : pub_threads) { t.join(); } @@ -1280,7 +1398,8 @@ TEST_F(ClientTest, PublishConcurrentlyFromOneClientToOneSubscriber) { if (size == 0) { break; } - all_recv_msgs.emplace_back(std::string(reinterpret_cast(message.buffer), message.length)); + all_recv_msgs.emplace_back(std::string( + reinterpret_cast(message.buffer), message.length)); } EXPECT_EQ(all_recv_msgs.size(), kNumPublishers); std::sort(all_recv_msgs.begin(), all_recv_msgs.end()); @@ -1298,33 +1417,32 @@ TEST_F(ClientTest, PublishConcurrentlyToOneSubscriber) { constexpr int kNumPublishers = 100; pub_threads.reserve(kNumPublishers); for (int i = 0; i < kNumPublishers; ++i) { - pub_threads.emplace_back( - std::thread([&channel_name, i]() { - // We have a backlog of 10 hardcoded for the subscriber's listen socket. We will get - // a connection refused if we exceed this so use a retry loop with a delay if we get - // errors on connection. Happens on MacOS. - subspace::Client pub_client; - bool connected = false; - for (int i = 0; i < 100; i++) { - if (pub_client.Init(Socket()).ok()) { - connected = true; - break; - } - std::this_thread::sleep_for(std::chrono::milliseconds(10)); + pub_threads.emplace_back(std::thread([&channel_name, i]() { + // We have a backlog of 10 hardcoded for the subscriber's listen socket. + // We will get a connection refused if we exceed this so use a retry loop + // with a delay if we get errors on connection. Happens on MacOS. + subspace::Client pub_client; + bool connected = false; + for (int i = 0; i < 100; i++) { + if (pub_client.Init(Socket()).ok()) { + connected = true; + break; } - ASSERT_TRUE(connected); - auto pub = *pub_client.CreatePublisher( - channel_name, {.slot_size = 256, .num_slots = 2*kNumPublishers + 16}); - std::array msg = {}; - auto size = std::snprintf(msg.data(), msg.size(), "M%d", i); - auto buffer = pub.GetMessageBuffer(size); - std::memcpy(*buffer, msg.data(), size); - ASSERT_OK(pub.PublishMessage(size)); - }) - ); - } - - for (auto& t : pub_threads) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + ASSERT_TRUE(connected); + auto pub = *pub_client.CreatePublisher( + channel_name, + {.slot_size = 256, .num_slots = 2 * kNumPublishers + 16}); + std::array msg = {}; + auto size = std::snprintf(msg.data(), msg.size(), "M%d", i); + auto buffer = pub.GetMessageBuffer(size); + std::memcpy(*buffer, msg.data(), size); + ASSERT_OK(pub.PublishMessage(size)); + })); + } + + for (auto &t : pub_threads) { t.join(); } @@ -1336,7 +1454,8 @@ TEST_F(ClientTest, PublishConcurrentlyToOneSubscriber) { if (size == 0) { break; } - all_recv_msgs.emplace_back(std::string(reinterpret_cast(message.buffer), message.length)); + all_recv_msgs.emplace_back(std::string( + reinterpret_cast(message.buffer), message.length)); } EXPECT_EQ(all_recv_msgs.size(), kNumPublishers); std::sort(all_recv_msgs.begin(), all_recv_msgs.end()); @@ -1838,8 +1957,8 @@ TEST_F(ClientTest, Publish2Message2AndReadSharedPtrs) { for (int i = 0; i < 2; i++) { absl::StatusOr buffer = pub->GetMessageBuffer(); ASSERT_OK(buffer); - memcpy(*buffer, "foobar", 6); - absl::StatusOr pub_status = pub->PublishMessage(6); + memcpy(*buffer, "foobar", 7); + absl::StatusOr pub_status = pub->PublishMessage(7); ASSERT_OK(pub_status); } @@ -2455,14 +2574,15 @@ TEST_F(ClientTest, ChannelDirectory) { ASSERT_OK(s2); // Subscribe to channel directory. - absl::StatusOr dir_sub = client->CreateSubscriber( - "/subspace/ChannelDirectory"); + absl::StatusOr dir_sub = + client->CreateSubscriber("/subspace/ChannelDirectory"); ASSERT_OK(dir_sub); - sleep(1); // Give some time for directory to be updated. + sleep(1); // Give some time for directory to be updated. // Read the latest channel directory message. - absl::StatusOr msg = dir_sub->ReadMessage(subspace::ReadMode::kReadNewest); + absl::StatusOr msg = + dir_sub->ReadMessage(subspace::ReadMode::kReadNewest); ASSERT_OK(msg); ASSERT_NE(0, msg->length); @@ -2474,7 +2594,7 @@ TEST_F(ClientTest, ChannelDirectory) { bool found_chan1 = false; bool found_chan2 = false; for (int i = 0; i < dir.channels_size(); i++) { - const subspace::ChannelInfo &info = dir.channels(i); + const subspace::ChannelInfoProto &info = dir.channels(i); if (info.name() == "chan1") { found_chan1 = true; ASSERT_EQ(256, info.slot_size()); @@ -2518,12 +2638,137 @@ TEST_F(ClientTest, MessageGetters) { ASSERT_EQ(10, msg->NumSlots()); } +// This tests checksums. We have two publishers, one that calculates a checksum and one that doesn't. +// We have 2 subscribers, one that checks the checksum and expects an error if there's an error +// and one that checks the checksum and passes the message intact but with the checksum_error flag set. +TEST_F(ClientTest, ChecksumVerification) { + auto client = EVAL_AND_ASSERT_OK(subspace::Client::Create(Socket())); + + absl::StatusOr pub = client->CreatePublisher( + "chan1", {.slot_size = 256, .num_slots = 10, .checksum = true}); + ASSERT_OK(pub); + + // Create a second publisher that doesn't calculate a checksum. + absl::StatusOr pub2 = + client->CreatePublisher("chan1", {.slot_size = 256, .num_slots = 10}); + ASSERT_OK(pub2); + + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + memcpy(*buffer, "foobar", 6); + absl::StatusOr pub_status = pub->PublishMessage(6); + ASSERT_OK(pub_status); + + absl::StatusOr sub = + client->CreateSubscriber("chan1", {.checksum = true}); + ASSERT_OK(sub); + + { + absl::StatusOr msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(6, msg->length); + ASSERT_STREQ("foobar", reinterpret_cast(msg->buffer)); + } + + // Build a message but overwrite the message buffer with a different string. + absl::StatusOr buffer2 = pub->GetMessageBuffer(); + ASSERT_OK(buffer2); + memcpy(*buffer2, "foobar", 6); + absl::StatusOr pub_status2 = pub->PublishMessage(6); + ASSERT_OK(pub_status2); + + char *buf = reinterpret_cast(*buffer2); + buf[0] = 'x'; + + // Read the message with a bad checksum. + { + absl::StatusOr msg2 = sub->ReadMessage(); + ASSERT_FALSE(msg2.ok()); + ASSERT_EQ(absl::StatusCode::kInternal, msg2.status().code()); + ASSERT_EQ("Checksum verification failed", msg2.status().message()); + } + + // Send another message with a valid checksum. + absl::StatusOr buffer3 = pub->GetMessageBuffer(); + ASSERT_OK(buffer3); + memcpy(*buffer3, "foobar", 6); + absl::StatusOr pub_status3 = pub->PublishMessage(6); + ASSERT_OK(pub_status3); + + // Read the message with the valid checksum + { + absl::StatusOr msg3 = sub->ReadMessage(); + ASSERT_OK(msg3); + ASSERT_EQ(6, msg3->length); + ASSERT_STREQ("foobar", reinterpret_cast(msg3->buffer)); + } + + // Send a message on pub2 with no checksum and corrupt it. + absl::StatusOr buffer4 = pub2->GetMessageBuffer(); + ASSERT_OK(buffer4); + memcpy(*buffer4, "foobar", 6); + absl::StatusOr pub_status4 = pub2->PublishMessage(6); + ASSERT_OK(pub_status4); + char *buf4 = reinterpret_cast(*buffer4); + buf4[0] = 'X'; + + // Read the corrupted message with no checksum. Although the subscriber is + // checking for a checksum, it will not be set since the publisher didn't + // calculate one. + { + absl::StatusOr msg5 = sub->ReadMessage(); + ASSERT_OK(msg5); + ASSERT_EQ(6, msg5->length); + ASSERT_STREQ("Xoobar", reinterpret_cast(msg5->buffer)); + ASSERT_FALSE(msg5->checksum_error); + } + + // Create another subscriber with pass checksum errors. + absl::StatusOr sub2 = client->CreateSubscriber( + "chan1", {.checksum = true, .pass_checksum_errors = true}); + ASSERT_OK(sub2); + + // First message will be fine since the checksum is good. + { + absl::StatusOr msg4 = sub2->ReadMessage(); + ASSERT_OK(msg4); + ASSERT_EQ(6, msg4->length); + ASSERT_STREQ("foobar", reinterpret_cast(msg4->buffer)); + } + + // Second message wil have a checksum error flag set. + { + absl::StatusOr msg5 = sub2->ReadMessage(); + ASSERT_OK(msg5); + ASSERT_EQ(6, msg5->length); + ASSERT_TRUE(msg5->checksum_error); + } + + // Third message will be fine since the checksum is good. + { + absl::StatusOr msg6 = sub2->ReadMessage(); + ASSERT_OK(msg6); + ASSERT_EQ(6, msg6->length); + ASSERT_STREQ("foobar", reinterpret_cast(msg6->buffer)); + } + + // Fourth message doesn't have a checksum. + { + absl::StatusOr msg7 = sub2->ReadMessage(); + ASSERT_OK(msg7); + ASSERT_EQ(6, msg7->length); + ASSERT_STREQ("Xoobar", reinterpret_cast(msg7->buffer)); + ASSERT_FALSE(msg7->checksum_error); + } +} + int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); absl::ParseCommandLine(argc, argv); signal(SIGSEGV, SignalHandler); signal(SIGBUS, SignalHandler); + signal(SIGQUIT, SigQuitHandler); absl::InitializeSymbolizer(argv[0]); diff --git a/client/latency_test.cc b/client/latency_test.cc index dbea65a..26aad5d 100644 --- a/client/latency_test.cc +++ b/client/latency_test.cc @@ -1,10 +1,11 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. #include "absl/flags/flag.h" #include "absl/flags/parse.h" #include "absl/hash/hash_testing.h" +#include "absl/status/status_matchers.h" #include "client/client.h" #include "co/coroutine.h" #include "server/server.h" @@ -17,7 +18,6 @@ #include #include #include -#include "absl/status/status_matchers.h" ABSL_FLAG(bool, start_server, true, "Start the subspace server"); ABSL_FLAG(std::string, server, "", "Path to server executable"); @@ -38,7 +38,7 @@ using InetAddress = toolbelt::InetAddress; if (!result.ok()) { \ std::cerr << result.status() << std::endl; \ } \ - ASSERT_OK(result); \ + ASSERT_OK(result); \ std::move(*result); \ }) @@ -94,6 +94,7 @@ class LatencyTest : public ::testing::Test { void TearDown() override {} void InitClient(subspace::Client &client) { + client.SetThreadSafe(true); ASSERT_OK(client.Init(Socket())); } @@ -592,6 +593,251 @@ TEST_F(LatencyTest, PublisherLatency) { } } +TEST_F(LatencyTest, PublisherLatencyChecksum) { + subspace::Client pub_client; + subspace::Client sub_client; + ASSERT_OK(pub_client.Init(Socket())); + ASSERT_OK(sub_client.Init(Socket())); + + constexpr int kNumMessages = 20000; + for (int num_slots = 10; num_slots < 100000; + num_slots = (num_slots)*15 / 10) { + absl::StatusOr pub = pub_client.CreatePublisher( + "publat", 256, num_slots, {.reliable = false, .checksum = true}); + ASSERT_OK(pub); + + std::cerr << num_slots << ","; + absl::StatusOr sub = sub_client.CreateSubscriber( + "publat", + {.reliable = false, .log_dropped_messages = false, .checksum = true}); + ASSERT_OK(sub); + + uint64_t total_time = 0; + + // Send messages ensuring there is always a retired message. Measure the + // total time to send (but not to receive). + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + absl::StatusOr pub_status = pub->PublishMessage(100); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + total_time += end - start_time; + + absl::StatusOr msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(100, msg->length); + } + std::cerr << total_time / kNumMessages << ","; + + // Now fill the channel. + for (int i = 0; i < num_slots; i++) { + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + if (*buffer == nullptr) { + // Can't send, wait until we can try again. + ASSERT_OK(pub->Wait()); + continue; + } + absl::StatusOr pub_status = pub->PublishMessage(100); + // std::cerr << "pub status " << pub_status.status() << "\n"; + ASSERT_OK(pub_status); + } + + // Send the same number of messages but with the channel full so that it has + // to take messages that subscribers have not yet seen. + total_time = 0; + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + + absl::StatusOr pub_status = pub->PublishMessage(100); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + total_time += end - start_time; + } + std::cerr << total_time / kNumMessages << "\n"; + } +} + +TEST_F(LatencyTest, PublisherLatencyPayload) { + subspace::Client pub_client; + subspace::Client sub_client; + ASSERT_OK(pub_client.Init(Socket())); + ASSERT_OK(sub_client.Init(Socket())); + + constexpr int kNumMessages = 2000; + constexpr int kMaxPayloadSize = 32 * 1024; + + auto random_payload = [](void *buffer) -> int { + int size = (rand() % (kMaxPayloadSize - 1)) + 1; + for (int i = 0; i < size; i++) { + reinterpret_cast(buffer)[i] = rand() % 256; + } + return size; + }; + for (int num_slots = 10; num_slots < 10000; + num_slots = (num_slots)*15 / 10) { + absl::StatusOr pub = pub_client.CreatePublisher( + "publat", kMaxPayloadSize, num_slots, {.reliable = false}); + ASSERT_OK(pub); + + std::cerr << num_slots << ","; + absl::StatusOr sub = sub_client.CreateSubscriber( + "publat", {.reliable = false, .log_dropped_messages = false}); + ASSERT_OK(sub); + + uint64_t total_time = 0; + + // Send messages ensuring there is always a retired message. Measure the + // total time to send (but not to receive). + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + int payload_size = random_payload(*buffer); + + absl::StatusOr pub_status = + pub->PublishMessage(payload_size); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + total_time += end - start_time; + + absl::StatusOr msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(payload_size, msg->length); + } + std::cerr << total_time / kNumMessages << ","; + + // Now fill the channel. + for (int i = 0; i < num_slots; i++) { + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + if (*buffer == nullptr) { + // Can't send, wait until we can try again. + ASSERT_OK(pub->Wait()); + continue; + } + int payload_size = random_payload(*buffer); + absl::StatusOr pub_status = + pub->PublishMessage(payload_size); + // std::cerr << "pub status " << pub_status.status() << "\n"; + ASSERT_OK(pub_status); + } + + // Send the same number of messages but with the channel full so that it has + // to take messages that subscribers have not yet seen. + total_time = 0; + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + + int payload_size = random_payload(*buffer); + absl::StatusOr pub_status = + pub->PublishMessage(payload_size); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + total_time += end - start_time; + } + std::cerr << total_time / kNumMessages << "\n"; + } +} + + +TEST_F(LatencyTest, PublisherLatencyPayloadChecksum) { + subspace::Client pub_client; + subspace::Client sub_client; + ASSERT_OK(pub_client.Init(Socket())); + ASSERT_OK(sub_client.Init(Socket())); + + constexpr int kNumMessages = 2000; + constexpr int kMaxPayloadSize = 32 * 1024; + + auto random_payload = [](void *buffer) -> int { + int size = (rand() % (kMaxPayloadSize - 1)) + 1; + for (int i = 0; i < size; i++) { + reinterpret_cast(buffer)[i] = rand() % 256; + } + return size; + }; + for (int num_slots = 10; num_slots < 10000; + num_slots = (num_slots)*15 / 10) { + absl::StatusOr pub = pub_client.CreatePublisher( + "publat", kMaxPayloadSize, num_slots, {.reliable = false, .checksum = true}); + ASSERT_OK(pub); + + std::cerr << num_slots << ","; + absl::StatusOr sub = sub_client.CreateSubscriber( + "publat", {.reliable = false, .log_dropped_messages = false, .checksum = true}); + ASSERT_OK(sub); + + uint64_t total_time = 0; + + // Send messages ensuring there is always a retired message. Measure the + // total time to send (but not to receive). + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + int payload_size = random_payload(*buffer); + + absl::StatusOr pub_status = + pub->PublishMessage(payload_size); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + total_time += end - start_time; + + absl::StatusOr msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(payload_size, msg->length); + } + std::cerr << total_time / kNumMessages << ","; + + // Now fill the channel. + for (int i = 0; i < num_slots; i++) { + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + if (*buffer == nullptr) { + // Can't send, wait until we can try again. + ASSERT_OK(pub->Wait()); + continue; + } + int payload_size = random_payload(*buffer); + absl::StatusOr pub_status = + pub->PublishMessage(payload_size); + // std::cerr << "pub status " << pub_status.status() << "\n"; + ASSERT_OK(pub_status); + } + + // Send the same number of messages but with the channel full so that it has + // to take messages that subscribers have not yet seen. + total_time = 0; + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + + int payload_size = random_payload(*buffer); + absl::StatusOr pub_status = + pub->PublishMessage(payload_size); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + total_time += end - start_time; + } + std::cerr << total_time / kNumMessages << "\n"; + } +} + + TEST_F(LatencyTest, PublisherLatencyHistogram) { subspace::Client pub_client; subspace::Client sub_client; @@ -683,6 +929,99 @@ TEST_F(LatencyTest, PublisherLatencyHistogram) { } } +TEST_F(LatencyTest, PublisherLatencyHistogramThreadSafe) { + subspace::Client pub_client; + subspace::Client sub_client; + pub_client.SetThreadSafe(true); + sub_client.SetThreadSafe(true); + ASSERT_OK(pub_client.Init(Socket())); + ASSERT_OK(sub_client.Init(Socket())); + + auto show_latencies = [](std::vector &latencies) { + // Sort latencies. + std::sort(latencies.begin(), latencies.end()); + // Min latency. + std::cerr << latencies.front() << ","; + // Median. + std::cerr << latencies[latencies.size() / 2] << ","; + // P99 latency. + std::cerr << latencies[latencies.size() * 99 / 100] << ","; + // Max latency. + std::cerr << latencies.back() << ","; + // Average latency. + uint64_t sum = 0; + for (auto &l : latencies) { + sum += l; + } + std::cerr << sum / latencies.size() << "\n"; + }; + constexpr int kNumMessages = 20000; + for (int num_slots = 10; num_slots < 100000; + num_slots = (num_slots)*15 / 10) { + absl::StatusOr pub = pub_client.CreatePublisher( + "publat", 256, num_slots, {.reliable = false}); + ASSERT_OK(pub); + + std::cerr << num_slots << ","; + absl::StatusOr sub = sub_client.CreateSubscriber( + "publat", {.reliable = false, .log_dropped_messages = false}); + ASSERT_OK(sub); + + std::vector latencies; + latencies.reserve(kNumMessages); + + // Send messages ensuring there is always a retired message. Measure the + // total time to send (but not to receive). + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + absl::StatusOr pub_status = pub->PublishMessage(100); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + latencies.push_back(end - start_time); + + absl::StatusOr msg = sub->ReadMessage(); + ASSERT_OK(msg); + ASSERT_EQ(100, msg->length); + } + + show_latencies(latencies); + latencies.clear(); + std::cerr << num_slots << ","; + + // Now fill the channel. + for (int i = 0; i < num_slots; i++) { + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + if (*buffer == nullptr) { + // Can't send, wait until we can try again. + ASSERT_OK(pub->Wait()); + continue; + } + absl::StatusOr pub_status = pub->PublishMessage(100); + // std::cerr << "pub status " << pub_status.status() << "\n"; + ASSERT_OK(pub_status); + } + + // Send the same number of messages but with the channel full so that it has + // to take messages that subscribers have not yet seen. + for (int i = 0; i < kNumMessages; i++) { + // Publish a message. + uint64_t start_time = toolbelt::Now(); + absl::StatusOr buffer = pub->GetMessageBuffer(); + ASSERT_OK(buffer); + + absl::StatusOr pub_status = pub->PublishMessage(100); + ASSERT_OK(pub_status); + uint64_t end = toolbelt::Now(); + latencies.push_back(end - start_time); + } + show_latencies(latencies); + } +} + TEST_F(LatencyTest, PublisherLatencyMultiSub) { subspace::Client pub_client; subspace::Client sub_client; diff --git a/client/message.cc b/client/message.cc index 9861fcf..1987667 100644 --- a/client/message.cc +++ b/client/message.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -8,9 +8,11 @@ namespace subspace { ActiveMessage::ActiveMessage(std::shared_ptr subr, size_t len, MessageSlot *slot_ptr, const void *buf, - uint64_t ord, int64_t ts, int vid, bool activation) + uint64_t ord, int64_t ts, int vid, bool activation, + bool checksum_error) : sub(std::move(subr)), length(len), slot(slot_ptr), buffer(buf), - ordinal(ord), timestamp(ts), vchan_id(vid), is_activation(activation) { + ordinal(ord), timestamp(ts), vchan_id(vid), is_activation(activation), + checksum_error(checksum_error) { if (slot == nullptr) { return; } @@ -54,7 +56,7 @@ Message::Message(std::shared_ptr msg) buffer(active_message->buffer), ordinal(active_message->ordinal), timestamp(active_message->timestamp), vchan_id(active_message->vchan_id), is_activation(active_message->is_activation), - slot_id(active_message->slot != nullptr ? active_message->slot->id : -1) { -} + slot_id(active_message->slot != nullptr ? active_message->slot->id : -1), + checksum_error(active_message->checksum_error) {} } // namespace subspace diff --git a/client/message.h b/client/message.h index f2772e6..3f2a0c1 100644 --- a/client/message.h +++ b/client/message.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -32,10 +32,10 @@ struct ActiveMessage { ActiveMessage() = default; ActiveMessage(std::shared_ptr subr, size_t len, MessageSlot *slot_ptr, const void *buf, uint64_t ord, - int64_t ts, int vid, bool activation); - ActiveMessage(size_t len, uint64_t ord, uint64_t ts, int vid, bool activation) + int64_t ts, int vid, bool activation, bool checksum_error); + ActiveMessage(size_t len, uint64_t ord, uint64_t ts, int vid, bool activation, bool checksum_error) : length(len), ordinal(ord), timestamp(ts), vchan_id(vid), - is_activation(activation) {} + is_activation(activation), checksum_error(checksum_error) {} ~ActiveMessage(); // Can't be copied but can be moved. @@ -66,19 +66,35 @@ struct ActiveMessage { uint64_t timestamp = 0; // Nanosecond time message was published. int vchan_id = -1; // Virtual channel ID (or -1 if not used). bool is_activation = false; // Is this an activation message? + bool checksum_error = false; // Was there a checksum error? }; struct Message { Message() = default; Message(size_t len, const void *buf, uint64_t ord, int64_t ts, int vid, - bool activation, int32_t sid) + bool activation, int32_t sid, bool checksum_error) : length(len), buffer(buf), ordinal(ord), timestamp(ts), vchan_id(vid), - is_activation(activation), slot_id(sid) {} + is_activation(activation), slot_id(sid), checksum_error(checksum_error) {} Message(std::shared_ptr msg); void Release() { + if (active_message == nullptr) { + return; + } active_message->Release(); active_message.reset(); } + + void Reset() { + active_message.reset(); + length = 0; + buffer = nullptr; + ordinal = 0; + timestamp = 0; + vchan_id = -1; + is_activation = false; + slot_id = -1; + } + std::string ChannelType() const; int NumSlots() const; uint64_t SlotSize() const; @@ -91,6 +107,7 @@ struct Message { int vchan_id = -1; // Virtual channel ID (or -1 if not used). bool is_activation = false; // Is this an activation message? int32_t slot_id = -1; + bool checksum_error = false; }; } // namespace subspace \ No newline at end of file diff --git a/client/options.h b/client/options.h index ecd947c..1e4f5c5 100644 --- a/client/options.h +++ b/client/options.h @@ -1,10 +1,11 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. -#ifndef __CLIENT_OPTIONS_H -#define __CLIENT_OPTIONS_H +#ifndef _xCLIENT_OPTIONS_H +#define _xCLIENT_OPTIONS_H +#include #include namespace subspace { @@ -24,6 +25,8 @@ namespace subspace { // // Your choice. +class Subscriber; + // Options when creating a publisher. struct PublisherOptions { int32_t SlotSize() const { return slot_size; } @@ -104,6 +107,21 @@ struct PublisherOptions { return *this; } + // If this is set to true all messages published will have a checksum + // calculated and placed in the MessagePrefix metadata. If subscribers want + // to verify the checksum the must set the SubscriberOptions.Checksum to true. + // Included in the checksum is the message prefix and the message + // data. + // + // Using checksums on all messages will increase the latency of message transmission. Use it + // sparingly. Subscribers generally map the buffers in read-only so the only way to corrup it is + // for the publishing process to overwrite the buffer after it has been published. + PublisherOptions &SetChecksum(bool v) { + checksum = v; + return *this; + } + bool Checksum() const { return checksum; } + // If you use the new CreatePublisher API, set the slot size and num slots in // here. int32_t slot_size = 0; @@ -120,6 +138,7 @@ struct PublisherOptions { std::string mux; int vchan_id = -1; // If -1, server will assign. bool notify_retirement = false; + bool checksum = false; }; struct SubscriberOptions { @@ -182,6 +201,30 @@ struct SubscriberOptions { return *this; } + bool ReadWrite() const { return read_write; } + SubscriberOptions &SetReadWrite(bool v) { + read_write = v; + return *this; + } + + // If this options is set to true the checksum calculated by the publisher + // will be verified. The checksum is placed in the MessagePrefix metadata. See + // PassChecksumErrors below for options for handling checksum errors. + SubscriberOptions &SetChecksum(bool v) { + checksum = v; + return *this; + } + bool Checksum() const { return checksum; } + + // If we get a checksum error and this is true the message will be received + // but will have the checksum_error flag set. If false, an error will be + // returned from ReadMessage. + SubscriberOptions &SetPassChecksumErrors(bool v) { + pass_checksum_errors = v; + return *this; + } + bool PassChecksumErrors() const { return pass_checksum_errors; } + bool reliable = false; bool bridge = false; std::string type; @@ -189,11 +232,14 @@ struct SubscriberOptions { bool log_dropped_messages = true; bool pass_activation = false; // If true, the subscriber will pass activation // messages to the user. + bool read_write = false; std::string mux; int vchan_id = -1; // If -1, server will assign. + bool checksum = false; + bool pass_checksum_errors = false; }; } // namespace subspace -#endif // __CLIENT_OPTIONS_H +#endif // _xCLIENT_OPTIONS_H diff --git a/client/publisher.cc b/client/publisher.cc index 2844896..8df9e39 100644 --- a/client/publisher.cc +++ b/client/publisher.cc @@ -1,14 +1,20 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. #include "client/publisher.h" +#include "client_channel.h" #include "toolbelt/clock.h" - +#include "client/checksum.h" namespace subspace { namespace details { absl::Status PublisherImpl::CreateOrAttachBuffers(uint64_t final_slot_size) { + if (final_slot_size == 0) { + // If we are being asked for a slot size of 0, we will just use 64 bytes. + // This is the minimum slot size we can use. + final_slot_size = 64; + } size_t final_buffer_size = size_t(SlotSizeToBufferSize(final_slot_size)); uint64_t current_slot_size = 0; int num_buffers = ccb_->num_buffers.load(std::memory_order_relaxed); @@ -35,7 +41,7 @@ absl::Status PublisherImpl::CreateOrAttachBuffers(uint64_t final_slot_size) { absl::StatusOr addr; current_slot_size = BufferSizeToSlotSize(*size); if (current_slot_size > 0) { - addr = MapBuffer(*shm_fd, *size, /*read_only=*/false); + addr = MapBuffer(*shm_fd, *size, BufferMapMode::kReadWrite); if (!addr.ok()) { return addr.status(); } @@ -50,7 +56,8 @@ absl::Status PublisherImpl::CreateOrAttachBuffers(uint64_t final_slot_size) { // We successfully created the /dev/shm file. bcb_->sizes[buffers_.size()].store(final_buffer_size, std::memory_order_relaxed); - auto addr = MapBuffer(*shm_fd, final_buffer_size, /*read_only=*/false); + auto addr = + MapBuffer(*shm_fd, final_buffer_size, BufferMapMode::kReadWrite); if (!addr.ok()) { return addr.status(); } @@ -90,18 +97,26 @@ void PublisherImpl::SetSlotToBiggestBuffer(MessageSlot *slot) { MessageSlot *PublisherImpl::FindFreeSlotUnreliable(int owner) { int retries = num_slots_ * 1000; MessageSlot *slot = nullptr; - DynamicBitSet embargoed_slots(NumSlots()); + embargoed_slots_.ClearAll(); constexpr int max_cas_retries = 1000; int cas_retries = 0; int retired_slot = -1; + int free_slot = -1; for (;;) { CheckReload(); - // Look at the first retired slot. If there are no retired - // slots, look at all slots for the earliest unreferenced one. - retired_slot = RetiredSlots().FindFirstSet(); - if (retired_slot != -1) { + // First look at free slots then at retired slots. If there are no free or + // retired slots, look at all slots for the earliest unreferenced one. + if (!ccb_->free_slots_exhausted.load(std::memory_order_relaxed) && + (free_slot = FreeSlots().FindFirstSet()) != -1) { + // Take a free slot if there is one. + slot = &ccb_->slots[free_slot]; + FreeSlots().Clear(free_slot); + if (FreeSlots().IsEmpty()) { + ccb_->free_slots_exhausted.store(true, std::memory_order_relaxed); + } + } else if ((retired_slot = RetiredSlots().FindFirstSet()) != -1) { // We have a retired slot. - if (embargoed_slots.IsSet(retired_slot)) { + if (embargoed_slots_.IsSet(retired_slot)) { continue; } RetiredSlots().Clear(retired_slot); @@ -110,7 +125,7 @@ MessageSlot *PublisherImpl::FindFreeSlotUnreliable(int owner) { // Find the slot with refs == 0 and the oldest message. uint64_t earliest_timestamp = -1ULL; for (int i = 0; i < num_slots_; i++) { - if (embargoed_slots.IsSet(i)) { + if (embargoed_slots_.IsSet(i)) { continue; } MessageSlot *s = &ccb_->slots[i]; @@ -124,6 +139,7 @@ MessageSlot *PublisherImpl::FindFreeSlotUnreliable(int owner) { } } } + if (slot == nullptr) { // We are guaranteed to find a slot, but let's not go into an infinite // loop if something goes wrong. @@ -145,7 +161,7 @@ MessageSlot *PublisherImpl::FindFreeSlotUnreliable(int owner) { if (!ValidateSlotBuffer(slot)) { // No buffer for the slot. Embargo the slot so we don't see it again // this loop and try again. - embargoed_slots.Set(slot->id); + embargoed_slots_.Set(slot->id); slot->refs.store(old_refs, std::memory_order_relaxed); continue; } @@ -166,13 +182,19 @@ MessageSlot *PublisherImpl::FindFreeSlotUnreliable(int owner) { p->vchan_id = vchan_id_; // We have a slot. Clear it in all the subscriber bitsets. - ccb_->subscribers.Traverse( - [this, slot](int sub_id) { GetAvailableSlots(sub_id).Clear(slot->id); }); + ccb_->subscribers.Traverse([this, slot](int sub_id) { + int vid = GetSubVchanId(sub_id); + if (vid != -1 && slot->vchan_id != -1 && vid != slot->vchan_id) { + return; + } + + GetAvailableSlots(sub_id).Clear(slot->id); + }); // If we took a slot that wasn't retired we must trigger the retirement fd. // This happens when we recycle a slot that has not yet been seen by all // subscribers. - if (retired_slot == -1) { + if (free_slot == -1 && retired_slot == -1) { TriggerRetirement(slot->id); } return slot; @@ -180,49 +202,58 @@ MessageSlot *PublisherImpl::FindFreeSlotUnreliable(int owner) { MessageSlot *PublisherImpl::FindFreeSlotReliable(int owner) { MessageSlot *slot = nullptr; - std::vector active_slots; - active_slots.reserve(NumSlots()); - DynamicBitSet embargoed_slots(NumSlots()); int retired_slot = -1; + int free_slot = -1; + embargoed_slots_.ClearAll(); for (;;) { CheckReload(); // Put all free slots into the active_slots vector. - active_slots.clear(); - retired_slot = RetiredSlots().FindFirstSet(); - if (retired_slot != -1) { // We have a retired slot. - if (embargoed_slots.IsSet(retired_slot)) { + active_slots_.clear(); + if (!ccb_->free_slots_exhausted.load(std::memory_order_relaxed) && + (free_slot = FreeSlots().FindFirstSet()) != -1) { + FreeSlots().Clear(free_slot); + if (FreeSlots().IsEmpty()) { + ccb_->free_slots_exhausted.store(true, std::memory_order_relaxed); + } + MessageSlot *s = &ccb_->slots[free_slot]; + + ActiveSlot active_slot = {s, s->ordinal, s->timestamp}; + active_slots_.push_back(active_slot); + } else if ((retired_slot = RetiredSlots().FindFirstSet()) != -1) { + if (embargoed_slots_.IsSet(retired_slot)) { continue; } RetiredSlots().Clear(retired_slot); MessageSlot *s = &ccb_->slots[retired_slot]; ActiveSlot active_slot = {s, s->ordinal, s->timestamp}; - active_slots.push_back(active_slot); + active_slots_.push_back(active_slot); } else { for (int i = 0; i < NumSlots(); i++) { - if (embargoed_slots.IsSet(i)) { + if (embargoed_slots_.IsSet(i)) { continue; } MessageSlot *s = &ccb_->slots[i]; uint64_t refs = s->refs.load(std::memory_order_relaxed); if ((refs & kPubOwned) == 0) { ActiveSlot active_slot = {s, s->ordinal, s->timestamp}; - active_slots.push_back(active_slot); + active_slots_.push_back(active_slot); } } } + // Sort the active slots by timestamp. // std::stable_sort gives consistently better performance than std::sort and // also is more deterministic in slot ordering. - std::stable_sort(active_slots.begin(), active_slots.end(), + std::stable_sort(active_slots_.begin(), active_slots_.end(), [](const ActiveSlot &a, const ActiveSlot &b) { return a.timestamp < b.timestamp; }); // Look for a slot with zero refs but don't go past one with non-zero // reliable ref count. - for (auto &s : active_slots) { + for (auto &s : active_slots_) { uint64_t refs = s.slot->refs.load(std::memory_order_relaxed); if (((refs >> kReliableRefCountShift) & kRefCountMask) != 0) { break; @@ -251,7 +282,7 @@ MessageSlot *PublisherImpl::FindFreeSlotReliable(int owner) { if (!ValidateSlotBuffer(slot)) { // No buffer for the slot. Embargo the slot so we don't see it again // this loop and try again. - embargoed_slots.Set(slot->id); + embargoed_slots_.Set(slot->id); slot->refs.store(old_refs, std::memory_order_relaxed); continue; } @@ -269,13 +300,18 @@ MessageSlot *PublisherImpl::FindFreeSlotReliable(int owner) { p->vchan_id = vchan_id_; // We have a slot. Clear it in all the subscriber bitsets. - ccb_->subscribers.Traverse( - [this, slot](int sub_id) { GetAvailableSlots(sub_id).Clear(slot->id); }); + ccb_->subscribers.Traverse([this, slot](int sub_id) { + int vid = GetSubVchanId(sub_id); + if (vid != -1 && slot->vchan_id != -1 && vid != slot->vchan_id) { + return; + } + GetAvailableSlots(sub_id).Clear(slot->id); + }); // If we took a slot that wasn't retired we must trigger the retirement fd. // This happens when we recycle a slot that has not yet been seen by all // subscribers. - if (retired_slot == -1) { + if (free_slot == -1 && retired_slot == -1) { TriggerRetirement(slot->id); } return slot; @@ -283,11 +319,7 @@ MessageSlot *PublisherImpl::FindFreeSlotReliable(int owner) { Channel::PublishedMessage PublisherImpl::ActivateSlotAndGetAnother( MessageSlot *slot, bool reliable, bool is_activation, int owner, - bool omit_prefix, bool *notify) { - if (notify != nullptr) { - *notify = true; // TODO: remove notify. - } - + bool omit_prefix, bool use_prefix_slot_id) { void *buffer = GetBufferAddress(slot); MessagePrefix *prefix = reinterpret_cast(buffer) - 1; @@ -299,7 +331,8 @@ Channel::PublishedMessage PublisherImpl::ActivateSlotAndGetAnother( if (omit_prefix) { slot->timestamp = prefix->timestamp; slot->vchan_id = prefix->vchan_id; - slot->bridged_slot_id = prefix->slot_id; + // The bridged_slot_id is the slot is used for the retirement notification. + slot->bridged_slot_id = use_prefix_slot_id ? prefix->slot_id : slot->id; } else { prefix->message_size = slot->message_size; prefix->ordinal = slot->ordinal; @@ -307,15 +340,31 @@ Channel::PublishedMessage PublisherImpl::ActivateSlotAndGetAnother( prefix->vchan_id = slot->vchan_id; prefix->flags = 0; prefix->slot_id = slot->id; + slot->bridged_slot_id = slot->id; if (is_activation) { - prefix->flags |= kMessageActivate; + prefix->SetIsActivation(); + slot->flags |= kMessageIsActivation; ccb_->activation_tracker.Activate(slot->vchan_id); } + if (options_.Checksum()) { + prefix->SetHasChecksum(); + // Checksum includes the prefix and the message data. Obviously the checksum itself isn't + // included since we are calculating it here. The first 4 bytes (padding) are also not + // incluced. + auto data = GetMessageChecksumData(prefix, buffer, slot->message_size); + prefix->checksum = CalculateChecksum(data); + } } // Update counters. - ccb_->total_messages++; - ccb_->total_bytes += slot->message_size; + if (!is_activation) { + ccb_->total_messages++; + ccb_->total_bytes += slot->message_size; + + if (slot->message_size > ccb_->max_message_size) { + ccb_->max_message_size = slot->message_size; + } + } // Set the refs to the ordinal with no refs. slot->refs.store(BuildRefsBitField(slot->ordinal, vchan_id_, 0), @@ -323,8 +372,8 @@ Channel::PublishedMessage PublisherImpl::ActivateSlotAndGetAnother( // Tell all subscribers that the slot is available. ccb_->subscribers.Traverse([this, slot](int sub_id) { - if (vchan_id_ != -1 && ccb_->sub_vchan_ids[sub_id] != -1 && - vchan_id_ != ccb_->sub_vchan_ids[sub_id]) { + if (vchan_id_ != -1 && GetSubVchanId(sub_id) != -1 && + vchan_id_ != GetSubVchanId(sub_id)) { return; } GetAvailableSlots(sub_id).Set(slot->id); diff --git a/client/publisher.h b/client/publisher.h index d70c323..9cf00f8 100644 --- a/client/publisher.h +++ b/client/publisher.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -16,9 +16,11 @@ class PublisherImpl : public ClientChannel { public: PublisherImpl(const std::string &name, int num_slots, int channel_id, int publisher_id, int vchan_id, uint64_t session_id, - std::string type, const PublisherOptions &options, std::function reload) + std::string type, const PublisherOptions &options, + std::function reload, int user_id, int group_id) : ClientChannel(name, num_slots, channel_id, vchan_id, - std::move(session_id), std::move(type), std::move(reload)), + std::move(session_id), std::move(type), + std::move(reload), user_id, group_id), publisher_id_(publisher_id), options_(options) {} bool IsReliable() const { return options_.IsReliable(); } @@ -32,21 +34,30 @@ class PublisherImpl : public ClientChannel { absl::Status CreateOrAttachBuffers(uint64_t slot_size); - void SetRetirementFd(toolbelt::FileDescriptor fd) { - retirement_fd_ = std::move(fd); - } + void SetRetirementFd(toolbelt::FileDescriptor fd) { + retirement_fd_ = std::move(fd); + } - // This is the read end of a pipe into which will be written the slot ids - // for retired slots. - const toolbelt::FileDescriptor& GetRetirementFd() const { - return retirement_fd_; - } + // This is the read end of a pipe into which will be written the slot ids + // for retired slots. + const toolbelt::FileDescriptor &GetRetirementFd() const { + return retirement_fd_; + } + + void SetOnSendCallback( + std::function(void *buffer, int64_t size)> + callback) { + on_send_callback_ = std::move(callback); + } + + std::string Mux() const { return options_.Mux(); } private: friend class ::subspace::ClientImpl; bool IsPublisher() const override { return true; } bool IsBridge() const override { return options_.IsBridge(); } + BufferMapMode MapMode() const override { return BufferMapMode::kReadWrite; } std::string ResolvedName() const override { return IsVirtual() ? options_.mux : Name(); @@ -71,13 +82,15 @@ class PublisherImpl : public ClientChannel { Channel::PublishedMessage ActivateSlotAndGetAnother(MessageSlot *slot, bool reliable, bool is_activation, int owner, bool omit_prefix, - bool *notify); + bool use_prefix_slot_id); - Channel::PublishedMessage - ActivateSlotAndGetAnother(bool reliable, bool is_activation, bool omit_prefix, - bool *notify) { + Channel::PublishedMessage ActivateSlotAndGetAnother(bool reliable, + bool is_activation, + bool omit_prefix, + bool use_prefix_slot_id) { return ActivateSlotAndGetAnother(slot_, reliable, is_activation, - publisher_id_, omit_prefix, notify); + publisher_id_, omit_prefix, + use_prefix_slot_id); } void ClearSubscribers() { subscribers_.clear(); } @@ -108,6 +121,9 @@ class PublisherImpl : public ClientChannel { std::vector subscribers_; PublisherOptions options_; toolbelt::FileDescriptor retirement_fd_ = {}; + std::function(void *buffer, int64_t size)> + on_send_callback_ = nullptr; + }; } // namespace details diff --git a/client/stress_test.cc b/client/stress_test.cc index 80c0b68..e32b866 100644 --- a/client/stress_test.cc +++ b/client/stress_test.cc @@ -1,10 +1,11 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. #include "absl/flags/flag.h" #include "absl/flags/parse.h" #include "absl/hash/hash_testing.h" +#include "absl/status/status_matchers.h" #include "client/client.h" #include "co/coroutine.h" #include "server/server.h" @@ -17,7 +18,6 @@ #include #include #include -#include "absl/status/status_matchers.h" ABSL_FLAG(bool, start_server, true, "Start the subspace server"); ABSL_FLAG(std::string, server, "", "Path to server executable"); @@ -38,7 +38,7 @@ using InetAddress = toolbelt::InetAddress; if (!result.ok()) { \ std::cerr << result.status() << std::endl; \ } \ - ASSERT_OK(result); \ + ASSERT_OK(result); \ std::move(*result); \ }) @@ -96,6 +96,7 @@ class StressTest : public ::testing::Test { void TearDown() override {} void InitClient(subspace::Client &client) { + client.SetThreadSafe(true); ASSERT_OK(client.Init(Socket())); } @@ -374,6 +375,96 @@ TEST_F(StressTest, Threads) { signal(SIGQUIT, oldSig); } +TEST_F(StressTest, ThreadSafety) { + auto oldSig = signal(SIGQUIT, SigQuitHandler); + + // Uses one client with a thread per pub and sub. Stress test the thread safety of the client. + constexpr int kNumChannels = 100; // Channels per client. + + constexpr int kMaxActiveMessages = 5; + // Number of channels accounts for all publishers, subscribers and unique_ptrs + constexpr int kNumSlots = kNumChannels * (2 + kMaxActiveMessages) + 1; + + std::shared_ptr client; + + std::vector channels; + std::vector pubs; + std::vector subs; + + client = EVAL_AND_ASSERT_OK( + subspace::Client::Create(Socket(), absl::StrFormat("client"))); + client->SetThreadSafe(true); + + // Create the channel names. + for (int j = 0; j < kNumChannels; j++) { + channels.push_back(absl::StrFormat("/foobar/%d", j)); + } + // Create the publishers. + for (int j = 0; j < kNumChannels; j++) { + pubs.emplace_back(EVAL_AND_ASSERT_OK(client->CreatePublisher( + channels[j], {.slot_size = 32, .num_slots = kNumSlots}))); + } + + // Create the subscribers. + for (int j = 0; j < kNumChannels; j++) { + subs.emplace_back(EVAL_AND_ASSERT_OK(client->CreateSubscriber( + channels[j], {.max_active_messages = kMaxActiveMessages}))); + } + + std::vector threads; + + // Publish messages + auto publish = [&](int pubId) { + for (int i = 0; i < kNumSlots - 1; i++) { + absl::StatusOr buffer = pubs[pubId].GetMessageBuffer(); + ASSERT_OK(buffer); + memcpy(*buffer, "foobar", 6); + absl::StatusOr pubStatus = pubs[pubId].PublishMessage(6); + ASSERT_OK(pubStatus); + } + }; + + // Publish all messages from all publishers in all clients. + for (int i = 0; i < kNumChannels; i++) { + threads.push_back(std::thread([&, i]() { publish(i); })); + } + + // Read all messages from a single subscriber. + auto read = [&](int subId) { + int num_messages = 0; + while (num_messages < kNumSlots - 1) { + // Wait for notification of a message. + ASSERT_OK(subs[subId].Wait()); + + // Read all available messages up to our active message limit. + absl::StatusOr> msgs = + subs[subId].GetAllMessages(); + ASSERT_OK(msgs); + auto all = *msgs; + for (auto &msg : all) { + if (msg.length == 0) { + break; + } + num_messages++; + ASSERT_EQ(6, msg.length); + } + } + }; + + // Read all messages in all clients. + for (int i = 0; i < kNumChannels; i++) { + threads.push_back(std::thread([&, i]() { read(i); })); + } + + std::cerr << threads.size() << " threads created" << std::endl; + + // Wait for all threads to complete. + for (auto &t : threads) { + t.join(); + } + signal(SIGQUIT, oldSig); +} + TEST_F(StressTest, ActiveMessages) { auto oldSig = signal(SIGQUIT, SigQuitHandler); @@ -683,22 +774,11 @@ TEST_F(StressTest, VirtualChannels) { // Create a subscriber to the mux. absl::StatusOr s = subClient->CreateSubscriber("/foobar"); ASSERT_OK(s); - int pipes[2]; - ASSERT_EQ(0, pipe(pipes)); - toolbelt::FileDescriptor rfd(pipes[0]); // Thread to read messages from the mux. - std::thread sub_thread([&s, pipes, &rfd]() { + std::thread sub_thread([&s]() { int num_messages = 0; while (num_messages < kNumMessages) { - // Wait for notification of a message. - absl::StatusOr waitStatus = s->Wait(rfd); - ASSERT_OK(waitStatus); - if (*waitStatus == pipes[0]) { - // We got a notification from the pipe, so we can read messages. - continue; - } - // Read all available messages. for (;;) { absl::StatusOr m = s->ReadMessage(); @@ -735,14 +815,9 @@ TEST_F(StressTest, VirtualChannels) { for (auto &t : threads) { t.join(); } - sleep(2); - // Interrupt the subscriber to cause it to stop. - close(pipes[1]); // Wait for the subscriber to finish. sub_thread.join(); - - close(pipes[0]); } TEST_F(StressTest, ManyChannelsNonMultiplexed) { diff --git a/client/subscriber.cc b/client/subscriber.cc index 553bb27..2e2f8a6 100644 --- a/client/subscriber.cc +++ b/client/subscriber.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -20,8 +20,11 @@ static inline bool VirtualChannelIdMatch(MessageSlot *slot, int vchan_id) { } bool SubscriberImpl::AddActiveMessage(MessageSlot *slot) { + // std::cerr << "adding active message " << slot->id << " " << slot->ordinal + // << "\n"; int old = num_active_messages_.fetch_add(1); if (old >= options_.MaxActiveMessages() && !IsBridge()) { + // std::cerr << "exceeded max active messages\n"; num_active_messages_.fetch_sub(1); return false; } @@ -30,7 +33,8 @@ bool SubscriberImpl::AddActiveMessage(MessageSlot *slot) { void SubscriberImpl::RemoveActiveMessage(MessageSlot *slot) { // std::cerr << this << " remove active message " << slot->id << " " - // << slot->ordinal << "\n"; + // << slot->ordinal << " refs " << std::hex << slot->refs.load() << + // std::dec << "\n"; slot->sub_owners.Clear(subscriber_id_); AtomicIncRefCount(slot, IsReliable(), -1, slot->ordinal, slot->vchan_id, true, [this, slot]() { @@ -39,12 +43,16 @@ void SubscriberImpl::RemoveActiveMessage(MessageSlot *slot) { // in from a bridge we want to notify the original sender // of the message, not the bridge publisher. // - // The original slot id is in the message prefix and is copied - // into the slot when the bridge publisher publishes the - // message. + // The original slot id is in the message prefix and is + // copied into the slot when the bridge publisher + // publishes the message. + + // Enable this for debugging slot retirement. + // std::string details = absl::StrFormat( + // "%d: RemoveActiveMessage: %s retiring slot %d ordinal %d vchan_id %d\n", getpid(), Name(), slot->bridged_slot_id, slot->ordinal, slot->vchan_id); + // std::cerr << details; TriggerRetirement(slot->bridged_slot_id); }); - if (num_active_messages_-- == options_.MaxActiveMessages()) { Trigger(); if (IsReliable()) { @@ -119,12 +127,11 @@ void SubscriberImpl::RememberOrdinal(uint64_t ordinal, int vchan_id) { tracker.ordinals.Insert(OrdinalAndVchanId{ordinal, vchan_id}); } -const ActiveSlot * -SubscriberImpl::FindUnseenOrdinal(const std::vector &active_slots) { +const ActiveSlot *SubscriberImpl::FindUnseenOrdinal() { // Traverse the active slots looking for the first ordinal that is not zero // and has not been seen by a subscriber. auto &tracker = GetOrdinalTracker(vchan_id_); - for (auto &s : active_slots) { + for (auto &s : active_slots_) { if (s.ordinal != 0 && !tracker.ordinals.Contains(OrdinalAndVchanId{s.ordinal, s.vchan_id})) { return &s; @@ -148,17 +155,15 @@ void SubscriberImpl::ClaimSlot(MessageSlot *slot, int vchan_id, slot->flags |= kMessageSeen; } -void SubscriberImpl::CollectVisibleSlots(InPlaceAtomicBitset &bits, - std::vector &active_slots, - const DynamicBitSet &embargoed_slots) { +void SubscriberImpl::CollectVisibleSlots(InPlaceAtomicBitset &bits) { uint64_t num_messages = 0; do { num_messages = ccb_->total_messages; - active_slots.clear(); + active_slots_.clear(); // Traverse the bits and add an active slot for each bit set. - bits.Traverse([this, &active_slots, &embargoed_slots](int i) { - if (embargoed_slots.IsSet(i)) { + bits.Traverse([this](int i) { + if (embargoed_slots_.IsSet(i)) { return; } MessageSlot *s = &ccb_->slots[i]; @@ -169,45 +174,51 @@ void SubscriberImpl::CollectVisibleSlots(InPlaceAtomicBitset &bits, return; } ActiveSlot active_slot = {s, s->ordinal, s->timestamp, s->vchan_id}; - active_slots.push_back(active_slot); + active_slots_.push_back(active_slot); }); } while (num_messages != ccb_->total_messages); } MessageSlot *SubscriberImpl::NextSlot(MessageSlot *slot, bool reliable, int owner) { - std::vector active_slots; - active_slots.reserve(NumSlots()); + InPlaceAtomicBitset &bits = GetAvailableSlots(owner); - DynamicBitSet embargoed_slots(NumSlots()); + embargoed_slots_.ClearAll(); constexpr int kMaxRetries = 1000; int retries = 0; while (retries++ < kMaxRetries) { - const bool print_errors = retries >= kMaxRetries - 10; +#ifndef NDEBUG + const bool print_errors = retries >= kMaxRetries - 1; +#else + const bool print_errors = false; +#endif CheckReload(); if (slot == nullptr) { // Prepopulate the active slots. PopulateActiveSlots(bits); } - CollectVisibleSlots(bits, active_slots, embargoed_slots); + CollectVisibleSlots(bits); // Sort the active slots by timestamp. - std::stable_sort(active_slots.begin(), active_slots.end(), + std::stable_sort(active_slots_.begin(), active_slots_.end(), [](const ActiveSlot &a, const ActiveSlot &b) { return a.timestamp < b.timestamp; }); - const ActiveSlot *new_slot = FindUnseenOrdinal(active_slots); + const ActiveSlot *new_slot = FindUnseenOrdinal(); if (new_slot == nullptr) { return nullptr; } if (print_errors) { - std::cerr << "sub looking at slot " << new_slot->slot->id << " ordinal " - << new_slot->ordinal << "\n"; + std::cerr << "Warning: subscriber for " << Name() + << " has reached the max retries for reference counter " + "increment on slot " + << new_slot->slot->id << " ordinal " << new_slot->ordinal + << "; this may indicate heavy use of the channel\n"; } // We have a new slot, see if we can increment the ref count. If we can't // we just go back and try again. @@ -216,12 +227,15 @@ MessageSlot *SubscriberImpl::NextSlot(MessageSlot *slot, bool reliable, if (!ValidateSlotBuffer(new_slot->slot) || new_slot->slot->buffer_index == -1) { if (print_errors) { - std::cerr << "sub failed on slot: "; + std::cerr << "Subscriber for " << Name() + << " detected buffer failure on slot: " + << new_slot->slot->id + << " buffer index: " << new_slot->slot->buffer_index; new_slot->slot->Dump(std::cerr); } // Failed to get a buffer for the slot. Embargo the slot so we don't // see it again this loop and try again. - embargoed_slots.Set(new_slot->slot->id); + embargoed_slots_.Set(new_slot->slot->id); AtomicIncRefCount(new_slot->slot, reliable, -1, new_slot->ordinal, new_slot->vchan_id, false); continue; @@ -234,29 +248,27 @@ MessageSlot *SubscriberImpl::NextSlot(MessageSlot *slot, bool reliable, MessageSlot *SubscriberImpl::LastSlot(MessageSlot *slot, bool reliable, int owner) { - std::vector active_slots; - active_slots.reserve(NumSlots()); - InPlaceAtomicBitset &bits = GetAvailableSlots(owner); - DynamicBitSet embargoed_slots(NumSlots()); + InPlaceAtomicBitset &bits = GetAvailableSlots(owner); + embargoed_slots_.ClearAll(); for (;;) { CheckReload(); if (slot == nullptr) { // Prepopulate the active slots. PopulateActiveSlots(bits); } - CollectVisibleSlots(bits, active_slots, embargoed_slots); + CollectVisibleSlots(bits); // Sort the active slots by timestamp. - std::stable_sort(active_slots.begin(), active_slots.end(), + std::stable_sort(active_slots_.begin(), active_slots_.end(), [](const ActiveSlot &a, const ActiveSlot &b) { return a.timestamp < b.timestamp; }); ActiveSlot *new_slot = nullptr; - if (!active_slots.empty()) { - new_slot = &active_slots.back(); + if (!active_slots_.empty()) { + new_slot = &active_slots_.back(); if (slot != nullptr && slot == new_slot->slot) { // Same slot, nothing changes. @@ -274,7 +286,7 @@ MessageSlot *SubscriberImpl::LastSlot(MessageSlot *slot, bool reliable, new_slot->slot->buffer_index == -1) { // Failed to get a buffer for the slot. Embargo the slot so we don't // see it again this loop and try again. - embargoed_slots.Set(new_slot->slot->id); + embargoed_slots_.Set(new_slot->slot->id); AtomicIncRefCount(new_slot->slot, reliable, -1, new_slot->ordinal, new_slot->vchan_id, false); continue; @@ -287,8 +299,7 @@ MessageSlot *SubscriberImpl::LastSlot(MessageSlot *slot, bool reliable, MessageSlot *SubscriberImpl::FindActiveSlotByTimestamp( MessageSlot *old_slot, uint64_t timestamp, bool reliable, int owner, std::vector &buffer) { - DynamicBitSet embargoed_slots(NumSlots()); - + embargoed_slots_.ClearAll(); for (;;) { CheckReload(); buffer.clear(); @@ -296,7 +307,7 @@ MessageSlot *SubscriberImpl::FindActiveSlotByTimestamp( // Prepopulate the search buffer. for (int i = 0; i < NumSlots(); i++) { - if (embargoed_slots.IsSet(i)) { + if (embargoed_slots_.IsSet(i)) { continue; } MessageSlot *s = &ccb_->slots[i]; @@ -332,7 +343,7 @@ MessageSlot *SubscriberImpl::FindActiveSlotByTimestamp( if (!ValidateSlotBuffer(it->slot) || it->slot->buffer_index == -1) { // Failed to get a buffer for the slot. Embargo the slot so we don't // see it again this loop and try again. - embargoed_slots.Set(it->slot->id); + embargoed_slots_.Set(it->slot->id); AtomicIncRefCount(it->slot, reliable, -1, it->ordinal, it->vchan_id, false); continue; diff --git a/client/subscriber.h b/client/subscriber.h index 6a4c298..d312220 100644 --- a/client/subscriber.h +++ b/client/subscriber.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -41,9 +41,11 @@ class SubscriberImpl : public ClientChannel { public: SubscriberImpl(const std::string &name, int num_slots, int channel_id, int subscriber_id, int vchan_id, uint64_t session_id, - std::string type, const SubscriberOptions &options, std::function reload) + std::string type, const SubscriberOptions &options, + std::function reload, int user_id, int group_id) : ClientChannel(name, num_slots, channel_id, vchan_id, - std::move(session_id), std::move(type), std::move(reload)), + std::move(session_id), std::move(type), + std::move(reload), user_id, group_id), subscriber_id_(subscriber_id), options_(options) {} std::shared_ptr shared_from_this() { @@ -62,6 +64,8 @@ class SubscriberImpl : public ClientChannel { int32_t SlotSize() const { return ClientChannel::SlotSize(CurrentSlot()); } + bool IsPlaceholder() const { return ClientChannel::NumSlots() == 0; } + bool AddActiveMessage(MessageSlot *slot); void RemoveActiveMessage(MessageSlot *slot); @@ -71,16 +75,20 @@ class SubscriberImpl : public ClientChannel { return num_active_messages_ < options_.MaxActiveMessages(); } + // This is the configured virtual channel ID, not the value assigned when the + // subscriber is created by the server. The difference is that the configured + // value is normally -1 and this allows the server to pick a vchan ID. The + // use of a configured ID is to allow a multiplexer subscriber to determine + // which channel a vchanId corresponds to when it receives messages. + int ConfiguredVchanId() const { return options_.vchan_id; } + const ActiveSlot * - FindUnseenOrdinal(const std::vector &active_slots); + FindUnseenOrdinal(); void PopulateActiveSlots(InPlaceAtomicBitset &bits); - void ClaimSlot(MessageSlot *slot, int vchan_id, - bool was_newest); + void ClaimSlot(MessageSlot *slot, int vchan_id, bool was_newest); void RememberOrdinal(uint64_t ordinal, int vchan_id); - void CollectVisibleSlots(InPlaceAtomicBitset &bits, - std::vector &active_slots, - const DynamicBitSet &embargoed_slots); + void CollectVisibleSlots(InPlaceAtomicBitset &bits); void IgnoreActivation(MessageSlot *slot) { RememberOrdinal(slot->ordinal, slot->vchan_id); @@ -104,10 +112,10 @@ class SubscriberImpl : public ClientChannel { std::shared_ptr SetActiveMessage(size_t len, MessageSlot *slot, const void *buf, uint64_t ord, int64_t ts, int vchan_id, - bool is_activation) { + bool is_activation, bool checksum_error) { active_message_.reset(); active_message_ = std::make_shared(ActiveMessage{ - shared_from_this(), len, slot, buf, ord, ts, vchan_id, is_activation}); + shared_from_this(), len, slot, buf, ord, ts, vchan_id, is_activation, checksum_error}); return active_message_; } @@ -134,7 +142,7 @@ class SubscriberImpl : public ClientChannel { } auto msg = std::make_shared(ActiveMessage{ shared_from_this(), slot->message_size, slot, GetBufferAddress(slot), - slot->ordinal, Timestamp(slot), slot->vchan_id, false}); + slot->ordinal, Timestamp(slot), slot->vchan_id, false, false}); if (msg->length == 0) { // Failed to get an active message, return an empty shared_ptr. return nullptr; @@ -157,6 +165,29 @@ class SubscriberImpl : public ClientChannel { std::vector &buffer); void Trigger() { trigger_.Trigger(); } + void Untrigger() { trigger_.Clear(); } + + void TriggerReliablePublishers() { + std::unique_lock lock(reliable_publishers_mutex_); + for (auto &fd : reliable_publishers_) { + fd.Trigger(); + } + } + + void SetOnReceiveCallback(std::function(void* buffer, int64_t size)> callback) { + on_receive_callback_ = std::move(callback); + } + + std::string Mux() const { return options_.Mux(); } + + bool ValidateChecksum(const std::array, 2>& data, uint32_t checksum) { + if (!options_.Checksum()) { + return true; + } + return VerifyChecksum(data, checksum); + } + + bool PassChecksumErrors() const { return options_.PassChecksumErrors(); } private: friend class ::subspace::ClientImpl; @@ -166,6 +197,12 @@ class SubscriberImpl : public ClientChannel { uint64_t last_ordinal_seen = 0; }; + BufferMapMode MapMode() const override { + return options_.IsBridge() || options_.ReadWrite() + ? BufferMapMode::kReadWrite + : BufferMapMode::kReadOnly; + } + bool IsSubscriber() const override { return true; } bool IsBridge() const override { return options_.IsBridge(); } @@ -190,12 +227,6 @@ class SubscriberImpl : public ClientChannel { trigger_.SetPollFd(std::move(fd)); } int GetSubscriberId() const { return subscriber_id_; } - void TriggerReliablePublishers() { - std::unique_lock lock(reliable_publishers_mutex_); - for (auto &fd : reliable_publishers_) { - fd.Trigger(); - } - } MessageSlot *NextSlot() { return NextSlot(CurrentSlot(), IsReliable(), subscriber_id_); @@ -247,6 +278,7 @@ class SubscriberImpl : public ClientChannel { // The callback to call when a message is received. std::function message_callback_; + std::function(void* buffer, int64_t size)> on_receive_callback_ = nullptr; }; } // namespace details } // namespace subspace diff --git a/common/BUILD.bazel b/common/BUILD.bazel index e1cf3f0..6f1ba8d 100644 --- a/common/BUILD.bazel +++ b/common/BUILD.bazel @@ -1,5 +1,7 @@ package(default_visibility = ["//visibility:public"]) +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + cc_library( name = "subspace_common", srcs = [ @@ -25,7 +27,7 @@ cc_library( "@abseil-cpp//absl/strings:str_format", "@protobuf//:protobuf", "@coroutines//:co", - "@toolbelt//toolbelt", + "@cpp_toolbelt//toolbelt", ], ) diff --git a/common/atomic_bitset.h b/common/atomic_bitset.h index 013ffcd..9ba9e35 100644 --- a/common/atomic_bitset.h +++ b/common/atomic_bitset.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -140,9 +140,15 @@ using InPlaceAtomicBitset = AtomicBitSet<0>; class DynamicBitSet { public: + DynamicBitSet() : num_bits_(0), bits_(0) {} DynamicBitSet(size_t num_bits) : num_bits_(num_bits), bits_(BitsToWords(num_bits), 0) {} + void Resize(size_t num_bits) { + num_bits_ = num_bits; + bits_.resize(BitsToWords(num_bits), 0); + } + void Set(size_t bit) { size_t word = bit / 64; size_t offset = bit % 64; diff --git a/common/channel.cc b/common/channel.cc index c26d13d..b7feb62 100644 --- a/common/channel.cc +++ b/common/channel.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -157,13 +157,17 @@ bool Channel::AtomicIncRefCount(MessageSlot *slot, bool reliable, int inc, // This is a special case where the vchan_id is invalid. ref_vchan_id = -1; } - if (ref_ord != 0 && ordinal != 0 && ref_vchan_id != vchan_id) { return false; } uint64_t new_refs = ref & kRefCountMask; uint64_t new_reliable_refs = (ref >> kReliableRefCountShift) & kRefCountMask; + if (inc < 0 && new_refs == 0) { + // Don't try to decrement the refs below zero. How can this happen? + return true; + } + new_refs += inc; if (reliable) { new_reliable_refs += inc; @@ -175,14 +179,21 @@ bool Channel::AtomicIncRefCount(MessageSlot *slot, bool reliable, int inc, } uint64_t new_ref = BuildRefsBitField(ref_ord, ref_vchan_id, retired_refs) | (new_reliable_refs << kReliableRefCountShift) | new_refs; + if (new_ref == -1ULL) { + abort(); + } if (slot->refs.compare_exchange_weak(ref, new_ref, std::memory_order_relaxed)) { - // std::cerr << slot->id << " retired_refs: " << retired_refs - // << " num subs: " << NumSubscribers(vchan_id) << std::endl; - if (retired_refs >= NumSubscribers(ref_vchan_id)) { + // std::string details = absl::StrFormat( + // "%d: AtomicIncRefCount: %s slot %d ordinal %d retired_refs: %d NumSubscribers: %d retire: %d\n", getpid(), Name(), slot->id, ordinal, retired_refs, NumSubscribers(ref_vchan_id), retire); + // std::cerr << details; + if (retire && new_refs == 0 && new_reliable_refs == 0 && + retired_refs >= NumSubscribers(ref_vchan_id)) { // All subscribers have seen the slot, retire it. RetiredSlots().Set(slot->id); if (retire_callback) { + // std::cerr << "Calling retire callback for slot " << slot->id + // << std::endl; retire_callback(); } } @@ -200,7 +211,7 @@ void MessageSlot::Dump(std::ostream &os) const { uint64_t just_refs = l_refs & kRefCountMask; uint64_t ref_ord = (l_refs >> kOrdinalShift) & kOrdinalMask; - os << "Slot: " << id; + os << this << " Slot: " << id; if (is_pub) { os << " publisher " << just_refs; } else { @@ -210,7 +221,7 @@ void MessageSlot::Dump(std::ostream &os) const { os << " ordinal: " << ordinal << " buffer_index: " << buffer_index << " vchan_id: " << vchan_id << " timestamp: " << timestamp << " message size: " << message_size << " raw refs: " << std::hex << refs - << std::dec << "\n"; + << " flags: " << flags << std::dec << "\n"; } void Channel::DumpSlots(std::ostream &os) const { @@ -220,6 +231,8 @@ void Channel::DumpSlots(std::ostream &os) const { } os << "Retired slots: "; RetiredSlots().Print(os); + os << "Free slots: "; + FreeSlots().Print(os); } void Channel::Dump(std::ostream &os) const { @@ -259,6 +272,17 @@ void Channel::GetStatsCounters(uint64_t &total_bytes, uint64_t &total_messages, total_drops = ccb_->total_drops; } +uint64_t Channel::GetVirtualMemoryUsage() const { + uint64_t size = sizeof(SystemControlBlock) + CcbSize(num_slots_) + + sizeof(BufferControlBlock); + for (int i = 0; i < ccb_->num_buffers; i++) { + if (bcb_->refs[i] > 0) { + size += bcb_->sizes[i]; + } + } + return size; +} + void Channel::CleanupSlots(int owner, bool reliable, bool is_pub, int vchan_id) { if (is_pub) { @@ -270,7 +294,8 @@ void Channel::CleanupSlots(int owner, bool reliable, bool is_pub, if (refs == (kPubOwned | uint64_t(owner))) { // Owned by this publisher, clear slot. slot->ordinal = 0; - slot->refs = 0; // Sequentially consistent because we've changed the ordinal too. + slot->refs = + 0; // Sequentially consistent because we've changed the ordinal too. // Clear the slot in all the subscriber bitsets. ccb_->subscribers.Traverse([this, slot](int sub_id) { @@ -296,14 +321,15 @@ void Channel::CleanupSlots(int owner, bool reliable, bool is_pub, } } -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX absl::StatusOr -Channel::MacOsSharedMemoryName(const std::string &shadow_file) { +Channel::PosixSharedMemoryName(const std::string &shadow_file) { struct stat st; int e = ::stat(shadow_file.c_str(), &st); if (e == -1) { - return absl::InternalError(absl::StrFormat( - "Failed to determine MacOS shm name for %s: %s", shadow_file, strerror(errno))); + return absl::InternalError( + absl::StrFormat("Failed to determine Posix shm name for %s: %s", + shadow_file, strerror(errno))); } // Use the inode number (unique per file) to make the shm file name. return absl::StrFormat("subspace_%d", st.st_ino); diff --git a/common/channel.h b/common/channel.h index 421d9a2..fea964a 100644 --- a/common/channel.h +++ b/common/channel.h @@ -1,10 +1,10 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. -#ifndef __COMMON_CHANNEL_H -#define __COMMON_CHANNEL_H +#ifndef _xCOMMON_CHANNEL_H +#define _xCOMMON_CHANNEL_H #include @@ -22,8 +22,22 @@ namespace subspace { -// Max message size for comms with server. -static constexpr size_t kMaxMessage = 4096; +#define SUBSPACE_SHMEM_MODE_POSIX 1 +#define SUBSPACE_SHMEM_MODE_LINUX 2 + +// Change this if you want to use a different shared memory mode. +#if defined(__linux__) +// On Linux we can use /dev/shm directly for shared memory. +#define SUBSPACE_SHMEM_MODE SUBSPACE_SHMEM_MODE_LINUX +#else +// On other systems we need to use a file in /tmp and then create a shared memory segment with the same name. +#define SUBSPACE_SHMEM_MODE SUBSPACE_SHMEM_MODE_POSIX +#endif + +// Flag for flags field in MessagePrefix. +constexpr int kMessageActivate = 1; // This is a reliable activation message. +constexpr int kMessageBridged = 2; // This message came from the bridge. +constexpr int kMessageHasChecksum = 4; // This message has a checksum. // This is stored immediately before the channel buffer in shared // memory. It is transferred intact across the TCP bridges. @@ -49,18 +63,23 @@ struct MessagePrefix { uint64_t timestamp; int64_t flags; int32_t vchan_id; - char padding2[64 - 44]; // Align to 64 bytes. + uint32_t checksum; + char padding2[64 - 48]; // Align to 64 bytes. + + bool IsActivation() const { return (flags & kMessageActivate) != 0; } + void SetIsActivation() { flags |= kMessageActivate; } + bool IsBridged() const { return (flags & kMessageBridged) != 0; } + void SetIsBridged() { flags |= kMessageBridged; } + bool HasChecksum() const { return (flags & kMessageHasChecksum) != 0; } + void SetHasChecksum() { flags |= kMessageHasChecksum; } }; static_assert(sizeof(MessagePrefix) == 64, "MessagePrefix size is not 64 bytes"); -// Flag for flags field in MessagePrefix. -constexpr int kMessageActivate = 1; // This is a reliable activation message. -constexpr int kMessageBridged = 2; // This message came from the bridge. - // Flags for MessageSlot flags. -constexpr int kMessageSeen = 1; // Message has been seen. +constexpr int kMessageSeen = 1; // Message has been seen. +constexpr int kMessageIsActivation = 2; // This is an activation message. // We need a max channels number because the size of things in // shared memory needs to be fixed. @@ -144,6 +163,7 @@ struct ChannelCounters { uint16_t num_reliable_pubs; // Current number of reliable publishers. uint16_t num_subs; // Current number of subscribers. uint16_t num_reliable_subs; // Current number of reliable subscribers. + uint16_t num_resizes; // Number of times channel has been resized. }; struct SystemControlBlock { @@ -161,7 +181,7 @@ struct MessageSlot { AtomicBitSet sub_owners; // One bit per subscriber. uint64_t timestamp; // Timestamp of message. uint32_t flags; - int32_t bridged_slot_id; // Slot ID of other side of bridge. + int32_t bridged_slot_id; // Slot ID of other side of bridge. void Dump(std::ostream &os) const; }; @@ -180,6 +200,21 @@ struct BufferControlBlock { sizes[kMaxBuffers]; // Number of references to this buffer. }; +// Given a message prefix and a buffer containing the message data return a vector of spans +// that can be used to calculate the checksum. +inline std::array, 2> +GetMessageChecksumData(MessagePrefix *prefix, void *buffer, + size_t message_size) { + std::array, 2> data = { + absl::Span(reinterpret_cast( + prefix) + offsetof(MessagePrefix, slot_id), + offsetof(MessagePrefix, checksum) - + offsetof(MessagePrefix, slot_id)), + absl::Span(reinterpret_cast(buffer), + message_size)}; + return data; +} + // This counts the number of subscribers given a virtual channel id. class SubscriberCounter { public: @@ -204,9 +239,9 @@ class SubscriberCounter { class OrdinalAccumulator { public: - void Init() { + void Init(int v) { for (int i = 0; i < kMaxVchanId + 1; i++) { - ordinals_[i] = 1; + ordinals_[i] = v; } } uint64_t Next(int vchan_id) { return ordinals_[vchan_id + 1]++; } @@ -254,12 +289,19 @@ struct ChannelControlBlock { // a.k.a CCB std::atomic max_message_size; std::atomic total_drops; + // If true there are no more free slots and there's no need to check + // the bitset for them (there will never be another free slot) + std::atomic free_slots_exhausted; + // Variable number of MessageSlot structs (num_slots long). MessageSlot slots[0]; // Followed by: // AtomicBitSet<0> retiredSlots[num_slots]; // Followed by: + // AtomicBitSet<0> freeSlots[num_slots]; + // Followed by: // AtomicBitSet<0> availableSlots[kMaxSlotOwners]; + // }; inline size_t AvailableSlotsSize(int num_slots) { @@ -269,7 +311,8 @@ inline size_t AvailableSlotsSize(int num_slots) { inline size_t CcbSize(int num_slots) { return Aligned(sizeof(ChannelControlBlock) + num_slots * sizeof(MessageSlot)) + - Aligned(SizeofAtomicBitSet(num_slots)) + AvailableSlotsSize(num_slots); + Aligned(SizeofAtomicBitSet(num_slots)) * 2 + + AvailableSlotsSize(num_slots); } struct SlotBuffer { @@ -338,9 +381,9 @@ class Channel : public std::enable_shared_from_this { virtual std::string ResolvedName() const = 0; -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX static absl::StatusOr - MacOsSharedMemoryName(const std::string &shadow_file); + PosixSharedMemoryName(const std::string &shadow_file); #endif // For debug, prints the contents of the three linked lists in // shared memory, @@ -361,6 +404,8 @@ class Channel : public std::enable_shared_from_this { } } + int GetSubVchanId(int32_t i) const { return ccb_->sub_vchan_ids[i]; } + void DumpSlots(std::ostream &os) const; virtual void Dump(std::ostream &os) const; @@ -377,7 +422,7 @@ class Channel : public std::enable_shared_from_this { // Get the number of slots in the channel (can't be changed) int NumSlots() const { return num_slots_; } - void SetNumSlots(int n) { num_slots_ = n; } + virtual void SetNumSlots(int n) { num_slots_ = n; } std::string SlotType() const { return type_; } void CleanupSlots(int owner, bool reliable, bool is_pub, int vchan_id); @@ -421,6 +466,9 @@ class Channel : public std::enable_shared_from_this { char *EndOfRetiredSlots() const { return EndOfSlots() + Aligned(SizeofAtomicBitSet(num_slots_)); } + char *EndOfFreeSlots() const { + return EndOfRetiredSlots() + Aligned(SizeofAtomicBitSet(num_slots_)); + } InPlaceAtomicBitset *RetiredSlotsAddr() { return reinterpret_cast(EndOfSlots()); @@ -434,19 +482,33 @@ class Channel : public std::enable_shared_from_this { return *reinterpret_cast(EndOfSlots()); } + InPlaceAtomicBitset *FreeSlotsAddr() { + return reinterpret_cast(EndOfRetiredSlots()); + } + + InPlaceAtomicBitset &FreeSlots() { + return *reinterpret_cast(EndOfRetiredSlots()); + } + + const InPlaceAtomicBitset &FreeSlots() const { + return *reinterpret_cast(EndOfRetiredSlots()); + } + InPlaceAtomicBitset &GetAvailableSlots(int sub_id) { return *GetAvailableSlotsAddress(sub_id); } InPlaceAtomicBitset *GetAvailableSlotsAddress(int sub_id) { return reinterpret_cast( - EndOfRetiredSlots() + SizeofAtomicBitSet(num_slots_) * sub_id); + EndOfFreeSlots() + SizeofAtomicBitSet(num_slots_) * sub_id); } bool IsActivated(int vchan_id) const { return ccb_->activation_tracker.IsActivated(vchan_id); } + virtual uint64_t GetVirtualMemoryUsage() const; + protected: int32_t ToCCBOffset(void *addr) const { return (int32_t)(reinterpret_cast(addr) - @@ -487,4 +549,4 @@ class Channel : public std::enable_shared_from_this { }; } // namespace subspace -#endif /* __COMMON_CHANNEL_H */ +#endif /* _xCOMMON_CHANNEL_H */ diff --git a/common/fast_ring_buffer.h b/common/fast_ring_buffer.h index f4da472..7c23b13 100644 --- a/common/fast_ring_buffer.h +++ b/common/fast_ring_buffer.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/coroutines/BUILD.bazel b/coroutines/BUILD.bazel new file mode 100644 index 0000000..c0545a4 --- /dev/null +++ b/coroutines/BUILD.bazel @@ -0,0 +1,14 @@ +package(default_visibility = ["//visibility:public"]) + +# This is a wrapper library for the co coroutine library. If you get multiple definition of the +# coroutine symbols when linking with both the co coroutine library and subspace, you can depend +# on this library instead of the co library directly. +# +# This might be a snafu in Bazel. I suspect that it can't correlate the two difference references +# to the coroutine library when linking with both the co library and subspace. +cc_library( + name = "coroutines", + deps = [ + "@coroutines//co:co", + ], +) diff --git a/docs/Version2Features.md b/docs/Version2Features.md index d6e8a60..f0c7615 100644 --- a/docs/Version2Features.md +++ b/docs/Version2Features.md @@ -48,7 +48,7 @@ The previous version of Subpace used a custom implementation of a `shared_ptr` a This version, due to the complexity of implementing a lock-free algorithm, changes the implementation to wrap the standard library implementation and is thus much simpler. It should work in the same way so you shouldn't see any difference if you used the old versions. ## Message callbacks. -You can now register a callback function for a subsciber that can be invoked for every message received by the subscriber. The function is called from an invocation of `ProcessAllMessages` on the same subscriber. This will call the registered callback function for every available message, returning when there are no more messages available. The callback function is called with a pointer to `Subscriber` and the instance of the `Message` received. +You can now register a callback function for a subsciber that can be invoked for every message received by the subscriber. The function is called from an invocation of `ProcessAllMessages` on the same subscriber. This will call the registered callback function for every available message, returning when there are no more messages available. The callback function is called with a pointer to `Subscriber` and the instance of the `Message` received. An example of the use of this is: ```c++ @@ -63,12 +63,21 @@ for (;;) { ``` +## Messsage retirement notifications +If your publishers are sending messages that contain references to external things (like shared memory indexes, files, GPU pointers, etc.) you will need to know when you are able to free up the external thing. There is now a publisher option available to enable the system to notify the publisher when all the subscribers have seen the message. + +If you set the `notify_retirement` option in `PublisherOptions`, the system will create a pipe that will carry notifications of when a message slot has been retired. The `slot ID` for the retired slots will be written (as a 32 bit integer) to the pipe and the publisher end can read this pipe to receive the notification. + +Say you have a message that contains a pointer to something in the GPU (a number). The publisher will allocate the GPU memory, put a reference to it in the message and publish it. The publisher needs to know when it can free up the GPU memory and reuse it, otherwise you would run of memory. So the publisher process will set the `notify_retirement` option and call `GetRetirementFd` to get the file descriptor for the read-end of the pipe. + +Then, when all the subscribers have processed the message (or the message is dropped), the `slot ID` of the slot containing the message will be written to the pipe. The publisher process can then read the slot ID from the pipe and correlate that number with the GPU pointer (it will need to keep that mapping). It can then safely free up the GPU pointer. + ## Multiplexed virtual channels There is a style of IPC usage that insists on only one publisher per channel. If you are a proponent of this, you will probably be creating a bunch of channels, perhaps a set of them per process/node that all have one publisher and a few subscribers. In an IPC system that uses TCP for transport this isn't a big deal since they will use the kernel's pre-allocated TCP buffers for their message storage, but in a shared memory IPC system, each channel will have a ring buffer of shared memory allocated for them. As the system grows, the number of channels also grows and the amount of shared memory increases accordingly. This is a bit of stress on the memory. -The new feature in this release is the ability to create a set of channels that all share the same `multiplexer` channel. These are called `virtual channels` and all publishers and subscribers work with them in exactly the same manner as with normal channels. +The new feature in this release is the ability to create a set of channels that all share the same `multiplexer` channel. These are called `virtual channels` and all publishers and subscribers work with them in exactly the same manner as with normal channels. To create a virtual channel set the `mux` member of the publisher or subscriber options to the name of another channel that will be used as the multiplexer channel. All virtual channels that use the same multiplexer channel will shared the slots in that multiplexer channel instead of allocating their own shared memory. Like any other channel, a multiplexer channel will be created by the first publisher or subscriber to use it. You cannot create a publisher to a multiplexer channel that is not a virtual publisher, but you can create a subscriber to one. @@ -121,7 +130,7 @@ auto sub = client.CreateSubscriber("/logs_mux"); ``` Notice that the subscriber is created on the multiplexer channel name and does not use the `mux` option. This subscriber will be able to read all messages sent on the multiplexer and each message contains a `vchan_id` member that specifies that virtual channel the message is carried on. You can use this number to determine the name of the virtual channel either by recording the values assigned by the server or using the mapping you chose to determine `vchan_ids`. -If you are using a multi-computer system where messages are sent over a network between computers using a multiplexer subscriber (the most efficient way), it is probably best to determine the virtual channel ID vs channel name mapping ahead of time and apply the same mapping to all computers. +If you are using a multi-computer system where messages are sent over a network between computers using a multiplexer subscriber (the most efficient way), it is probably best to determine the virtual channel ID vs channel name mapping ahead of time and apply the same mapping to all computers. ## Client side buffer allocations In version 1 the buffers for channels were allocated by the server and communicated to the client via @@ -141,3 +150,36 @@ in the C++ client with many fewer dependencies. Being in C, it's more portable maps onto the C++ client but provides C-linkage functions. This should be easier to map into other languages like Rust or Go in the future. + +## Remote Procedure Calls (RPC) +RPC is a way for one process to invoke a procedure on another. Subspace 2 provides an API similar to `gRPC` for peforming this task over shared memory. RPC has difference semantics than pub/sub in that it's always point-to-point and it always uses reliable messaging (it would be bad for a command to be dropped before the server has seen it). + +The RPC system comprises a set of clients and a set of servers. Each server provides a set of services (functions) that can be invoked from the client. Internally, subspace creates request and response channels and uses reliable messaging. + +TODO: fully document this... + +## Thread safety +A new feature of Subspace version 2 is the addition of optional thread-safety features. This adds a +mutual exclusion lock (mutex) to the client that allows multiple threads to use it once. The lock +is used if you call: + +```c++ +SetThreadSafe(true) +``` +on the Client object after you create it. + +Most of the thread-safety features are invisible to the user with one exception. + +When you call `GetMessageBuffer` or `GetMessageBufferSpan` the client will be locked to the +current thread until you call `PublishMessage` or `CancelPublish`. The usual method of sending +messages is to call `GetMessageBuffer` to obtain a pointer to the shared memory for the buffer, then +serialize the message into the memory, and then publish the message. When thread-safety is enabled +the client will be locked while this is occurring. This is necessary to prevent another thread +making these same sequence of calls at the same time and corrupting the message. + +If you want to disable this locking functionality you can pass a `false` second argument to `GetMessageBuffer` to +tell it to not hold the lock. This will be useful if you are doing zero-copy operations where the +time between getting the buffer and publishing the message is longer and holding the lock would prevent +any other usage of the client. You will need to be careful with thread safety yourself in this case, but that +is always a given. + diff --git a/manual_tests/BUILD.bazel b/manual_tests/BUILD.bazel index 34cfa0b..394118c 100644 --- a/manual_tests/BUILD.bazel +++ b/manual_tests/BUILD.bazel @@ -1,5 +1,7 @@ package(default_visibility = ["//visibility:public"]) +load("@rules_cc//cc:defs.bzl", "cc_binary") + cc_binary( name = "pub", srcs = [ @@ -144,7 +146,7 @@ cc_binary( "@abseil-cpp//absl/strings:str_format", "@abseil-cpp//absl/flags:flag", "@abseil-cpp//absl/flags:parse", - "@coroutines//:co", + "@coroutines//co:co", ], linkopts = select({ "//:macos_x86_64": [], diff --git a/plugins/BUILD.bazel b/plugins/BUILD.bazel new file mode 100644 index 0000000..5cd9aec --- /dev/null +++ b/plugins/BUILD.bazel @@ -0,0 +1,15 @@ +package(default_visibility = ["//visibility:public"]) + +load("@rules_cc//cc:defs.bzl", "cc_binary") + +cc_binary( + name = "nop_plugin.so", + srcs = ["nop_plugin.cc"], + deps = [ + "//server:server", + "@abseil-cpp//absl/status", + "@abseil-cpp//absl/strings:str_format", + ], + linkstatic = False, + linkshared = True, +) diff --git a/plugins/nop_plugin.cc b/plugins/nop_plugin.cc new file mode 100644 index 0000000..1883dd1 --- /dev/null +++ b/plugins/nop_plugin.cc @@ -0,0 +1,72 @@ +#include "server/server.h" + +namespace nop_plugin { + +absl::Status OnStartup(subspace::Server &s, const std::string &name, + subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, "NOP plugin %s started\n", + name.c_str()); + return absl::OkStatus(); +} +void OnReady(subspace::Server &s, subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, "NOP plugin ready\n"); +} + +void OnShutdown(subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, "NOP plugin shutting down\n"); +} +void OnNewChannel(subspace::Server &s, const std::string &channel_name, + subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, "NOP plugin: new channel %s\n", + channel_name.c_str()); +} +void OnRemoveChannel(subspace::Server &s, const std::string &channel_name, + subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, "NOP plugin: remove channel %s\n", + channel_name.c_str()); +} +void OnNewPublisher(subspace::Server &s, const std::string &channel_name, + int publisher_id, subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, + "NOP plugin: new publisher %d on channel %s\n", publisher_id, + channel_name.c_str()); +} +void OnRemovePublisher(subspace::Server &s, const std::string &channel_name, + int publisher_id, subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, + "NOP plugin: remove publisher %d on channel %s\n", + publisher_id, channel_name.c_str()); +} +void OnNewSubscriber(subspace::Server &s, const std::string &channel_name, + int subscriber_id, subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, + "NOP plugin: new subscriber %d on channel %s\n", + subscriber_id, channel_name.c_str()); +} +void OnRemoveSubscriber(subspace::Server &s, const std::string &channel_name, + int subscriber_id, subspace::PluginContext *ctx) { + ctx->logger.Log(toolbelt::LogLevel::kInfo, + "NOP plugin: remove subscriber %d on channel %s\n", + subscriber_id, channel_name.c_str()); +} + +} // namespace nop_plugin + +extern "C" { +subspace::PluginInterface *NOP_Create() { + subspace::PluginInterfaceFunctions functions = { + .onStartup = nop_plugin::OnStartup, + .onReady = nop_plugin::OnReady, + .onShutdown = nop_plugin::OnShutdown, + .onNewChannel = nop_plugin::OnNewChannel, + .onRemoveChannel = nop_plugin::OnRemoveChannel, + .onNewPublisher = nop_plugin::OnNewPublisher, + .onRemovePublisher = nop_plugin::OnRemovePublisher, + .onNewSubscriber = nop_plugin::OnNewSubscriber, + .onRemoveSubscriber = nop_plugin::OnRemoveSubscriber, + }; + auto iface = new subspace::PluginInterface( + functions, std::make_unique("nop_plugin")); + return iface; +} +} diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 4900d1d..dc7f1db 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -4,37 +4,18 @@ cmake_minimum_required(VERSION 3.15) -# Include FetchContent module for handling external dependencies +# Protobuf is already fetched by the parent CMakeLists.txt via FetchContent +# The targets like protobuf::libprotobuf and protobuf::protoc are available directly + +# Include FetchContent to get protobuf source directory for well-known types include(FetchContent) -# --- Apple Silicon (ARM64) specific settings --- -# CMAKE_OSX_ARCHITECTURES is expected to be set by the parent project or environment. -# We will propagate this value to sub-dependencies. -if(APPLE) - message(STATUS "Building for Apple platform in proto subdirectory. Propagating CMAKE_OSX_ARCHITECTURES: ${CMAKE_OSX_ARCHITECTURES}") +# Get the protobuf source directory to find well-known types +FetchContent_GetProperties(protobuf) +if(NOT protobuf_POPULATED) + message(FATAL_ERROR "protobuf must be populated before proto/CMakeLists.txt is processed") endif() -# Protobuf provides targets like protobuf::libprotobuf and the protoc executable. -# --- External Dependency: Protobuf (using FetchContent for native CMake) --- -FetchContent_Declare( - protobuf - GIT_REPOSITORY https://github.com/protocolbuffers/protobuf.git - GIT_TAG v29.5 - FIND_PACKAGE_ARGS NAMES protobuf - # Protobuf's CMake build can be configured to build only necessary components - CMAKE_ARGS - -Dprotobuf_BUILD_TESTS=OFF # Explicitly disable building tests to avoid gmock conflicts - -Dprotobuf_BUILD_EXAMPLES=OFF - -Dprotobuf_BUILD_SHARED_LIBS=OFF # Build static libs - # Pass architecture settings to Protobuf's CMake build - CMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" -) -FetchContent_MakeAvailable(protobuf) -# Protobuf provides targets like protobuf::libprotobuf and the protoc executable. -include(FindProtobuf) -find_package(protobuf CONFIG REQUIRED) - - # Define the .proto file(s) in this subdirectory set(PROTO_FILES subspace.proto @@ -43,16 +24,29 @@ set(PROTO_FILES # Variables to store the generated source and header files set(SUBSPACE_PROTO_GENERATED_DIR "${CMAKE_CURRENT_BINARY_DIR}") -# Define the subspace_proto library target -add_library(subspace_proto STATIC OBJECT "${CMAKE_CURRENT_LIST_DIR}/subspace.proto") -# Use the protobuf_generate function to generate C++ files -# This function automatically adds custom commands to perform the generation -# and sets up dependencies on the protoc executable. -protobuf_generate( - TARGET subspace_proto - LANGUAGE cpp - IMPORT_DIRS ${CMAKE_CURRENT_SOURCE_DIR} - PROTOC_OUT_DIR ${SUBSPACE_PROTO_GENERATED_DIR}) +# Generated file paths +set(PROTO_SRC "${CMAKE_CURRENT_BINARY_DIR}/subspace.pb.cc") +set(PROTO_HDR "${CMAKE_CURRENT_BINARY_DIR}/subspace.pb.h") + +# Generate C++ files from the proto file using protoc +# Include both the current source dir and protobuf src dir for well-known types +add_custom_command( + OUTPUT ${PROTO_SRC} ${PROTO_HDR} + COMMAND $ + ARGS --cpp_out=${CMAKE_CURRENT_BINARY_DIR} + --proto_path=${CMAKE_CURRENT_SOURCE_DIR} + --proto_path=${protobuf_SOURCE_DIR}/src + ${CMAKE_CURRENT_SOURCE_DIR}/subspace.proto + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/subspace.proto + protobuf::protoc + COMMENT "Generating C++ code from subspace.proto" +) + +# Define the subspace_proto library target with the generated sources +add_library(subspace_proto STATIC + ${PROTO_SRC} + ${PROTO_HDR} +) # Add the directory containing the generated headers to the include paths diff --git a/proto/subspace.proto b/proto/subspace.proto index 616211b..91686d4 100644 --- a/proto/subspace.proto +++ b/proto/subspace.proto @@ -18,6 +18,8 @@ message InitRequest { string client_name = 1; } message InitResponse { int32 scb_fd_index = 1; // Index into fds of SystemControlBlock fd. int64 session_id = 2; + int32 user_id = 3; + int32 group_id = 4; } message CreatePublisherRequest { @@ -104,6 +106,25 @@ message RemoveSubscriberRequest { message RemoveSubscriberResponse { string error = 1; } + +message GetChannelInfoRequest { + string channel_name = 1; // Empty for all channels. +} + +message GetChannelInfoResponse { + string error = 1; + repeated ChannelInfoProto channels = 2; +} + +message GetChannelStatsRequest { + string channel_name = 1; // Empty for all channels. +} + +message GetChannelStatsResponse { + string error = 1; + repeated ChannelStatsProto channels = 2; +} + message Request { oneof request { InitRequest init = 1; @@ -112,6 +133,8 @@ message Request { GetTriggersRequest get_triggers = 4; RemovePublisherRequest remove_publisher = 5; RemoveSubscriberRequest remove_subscriber = 6; + GetChannelInfoRequest get_channel_info = 9; + GetChannelStatsRequest get_channel_stats = 10; } } @@ -123,12 +146,14 @@ message Response { GetTriggersResponse get_triggers = 4; RemovePublisherResponse remove_publisher = 5; RemoveSubscriberResponse remove_subscriber = 6; + GetChannelInfoResponse get_channel_info = 9; + GetChannelStatsResponse get_channel_stats = 10; } } // These messages are carried on Subspace channels published by // the server. -message ChannelInfo { +message ChannelInfoProto { string name = 1; int32 slot_size = 2; int32 num_slots = 3; @@ -148,10 +173,10 @@ message ChannelInfo { // This is published to the /subspace/ChannelDirectory channel. message ChannelDirectory { string server_id = 1; - repeated ChannelInfo channels = 2; + repeated ChannelInfoProto channels = 2; } -message ChannelStats { +message ChannelStatsProto { string channel_name = 1; int64 total_bytes = 2; int64 total_messages = 3; @@ -161,13 +186,15 @@ message ChannelStats { int32 num_subs = 7; uint32 max_message_size = 8; uint32 total_drops = 9; + int32 num_bridge_pubs = 10; // Number of publishers that are bridges. + int32 num_bridge_subs = 11; // Number of subscribers that are bridges. } // This is published to the /subspace/Statistics channel. message Statistics { string server_id = 1; int64 timestamp = 2; - repeated ChannelStats channels = 3; + repeated ChannelStatsProto channels = 3; } message ChannelAddress { @@ -293,10 +320,10 @@ message RpcResponse { string error = 1; // Error message if any. google.protobuf.Any result = 2; // Data returned by the server. int32 session_id = 3; // Session ID for this response. - int32 request_id = 4; // Unique ID for this response, matches request. - uint64 client_id = 5; // Client ID making the request. - bool is_last = 6; // Whether this is the last response in a stream. - bool is_cancelled = 7; // Whether this response is for a cancelled request. + int32 request_id = 4; // Unique ID for this response, matches request. + uint64 client_id = 5; // Client ID making the request. + bool is_last = 6; // Whether this is the last response in a stream. + bool is_cancelled = 7; // Whether this response is for a cancelled request. } // This is sent to cancel a streaming method. diff --git a/rpc/README.md b/rpc/README.md index 87bd7b1..3e44d70 100644 --- a/rpc/README.md +++ b/rpc/README.md @@ -58,8 +58,8 @@ service TestService { You can write a `BUILD.bazel` (or just BUILD) that contains: ``` -load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library") -load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library") +load("@protobuf//bazel:cc_proto_library.bzl", "cc_proto_library") +load("@protobuf//bazel:proto_library.bzl", "proto_library") load("//:rpc/subspace_rpc_library.bzl", "subspace_rpc_library") package(default_visibility = ["//visibility:public"]) diff --git a/rpc/client/BUILD.bazel b/rpc/client/BUILD.bazel index 6ed899b..c201f64 100644 --- a/rpc/client/BUILD.bazel +++ b/rpc/client/BUILD.bazel @@ -20,7 +20,7 @@ cc_library( "@abseil-cpp//absl/status", "@abseil-cpp//absl/status:statusor", "@abseil-cpp//absl/strings:str_format", - "@coroutines//:co", + "@coroutines//co:co", ], ) @@ -43,6 +43,6 @@ cc_test( "@abseil-cpp//absl/status:status_matchers", "@abseil-cpp//absl/status:statusor", "@googletest//:gtest", - "@coroutines//:co", + "@coroutines//co:co", ], ) diff --git a/rpc/client/client_test.cc b/rpc/client/client_test.cc index 85d9e52..9697961 100644 --- a/rpc/client/client_test.cc +++ b/rpc/client/client_test.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/client/rpc_client.cc b/rpc/client/rpc_client.cc index bdf297e..00ab27d 100644 --- a/rpc/client/rpc_client.cc +++ b/rpc/client/rpc_client.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/client/rpc_client.h b/rpc/client/rpc_client.h index a074459..4a0c1f4 100644 --- a/rpc/client/rpc_client.h +++ b/rpc/client/rpc_client.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/example/client.cc b/rpc/example/client.cc index 12f9042..d7cbbe7 100644 --- a/rpc/example/client.cc +++ b/rpc/example/client.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/example/server.cc b/rpc/example/server.cc index 11e545f..c4e040c 100644 --- a/rpc/example/server.cc +++ b/rpc/example/server.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/idl_compiler/gen.cc b/rpc/idl_compiler/gen.cc index 9b54eeb..c90d559 100644 --- a/rpc/idl_compiler/gen.cc +++ b/rpc/idl_compiler/gen.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -33,7 +33,7 @@ static std::string GeneratedFilename(const std::filesystem::path &package_name, size_t virtual_imports = filename.find("_virtual_imports/"); if (virtual_imports != std::string_view::npos) { // This is something like: - // bazel-out/darwin_arm64-dbg/bin/external/com_google_protobuf/_virtual_imports/any_proto/google/protobuf/any.proto + // bazel-out/darwin_arm64-dbg/bin/external/protobuf/_virtual_imports/any_proto/google/protobuf/any.proto filename = filename.substr(virtual_imports + sizeof("_virtual_imports/")); // Remove the first directory. filename = filename.substr(filename.find('/') + 1); diff --git a/rpc/idl_compiler/gen.h b/rpc/idl_compiler/gen.h index 6e032e0..c976cc2 100644 --- a/rpc/idl_compiler/gen.h +++ b/rpc/idl_compiler/gen.h @@ -1,4 +1,4 @@ -// Copyright 2024 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/idl_compiler/main.cc b/rpc/idl_compiler/main.cc index b837117..8275199 100644 --- a/rpc/idl_compiler/main.cc +++ b/rpc/idl_compiler/main.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/idl_compiler/service_gen.cc b/rpc/idl_compiler/service_gen.cc index 4e99a65..0d28dcd 100644 --- a/rpc/idl_compiler/service_gen.cc +++ b/rpc/idl_compiler/service_gen.cc @@ -1,4 +1,4 @@ -// Copyright 2024 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/idl_compiler/service_gen.h b/rpc/idl_compiler/service_gen.h index f74612a..e513738 100644 --- a/rpc/idl_compiler/service_gen.h +++ b/rpc/idl_compiler/service_gen.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/server/BUILD.bazel b/rpc/server/BUILD.bazel index a63b3ff..f39854d 100644 --- a/rpc/server/BUILD.bazel +++ b/rpc/server/BUILD.bazel @@ -20,7 +20,7 @@ cc_library( "@abseil-cpp//absl/strings:str_format", "@abseil-cpp//absl/flags:flag", "@abseil-cpp//absl/flags:parse", - "@coroutines//:co", + "@coroutines//co:co", ], ) @@ -41,7 +41,7 @@ cc_test( "@abseil-cpp//absl/status:status_matchers", "@abseil-cpp//absl/status:statusor", "@googletest//:gtest", - "@coroutines//:co", + "@coroutines//co:co", "//rpc/proto:rpc_test_cc_proto", ], ) diff --git a/rpc/server/rpc_server.cc b/rpc/server/rpc_server.cc index 1c4a2f4..8287a33 100644 --- a/rpc/server/rpc_server.cc +++ b/rpc/server/rpc_server.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/server/rpc_server.h b/rpc/server/rpc_server.h index 5e04905..e7b7557 100644 --- a/rpc/server/rpc_server.h +++ b/rpc/server/rpc_server.h @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/server/server_test.cc b/rpc/server/server_test.cc index a2645e7..d8da081 100644 --- a/rpc/server/server_test.cc +++ b/rpc/server/server_test.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/rpc/subspace_rpc_library.bzl b/rpc/subspace_rpc_library.bzl index 67db86a..cb1b165 100644 --- a/rpc/subspace_rpc_library.bzl +++ b/rpc/subspace_rpc_library.bzl @@ -79,7 +79,7 @@ def _subspace_rpc_aspect_impl(target, _ctx): # For a file that is not in this package, we need to generate the # output in our package. # The path looks like: - # ../com_google_protobuf/_virtual_imports/any_proto/google/protobuf/any.proto + # ../protobuf/_virtual_imports/any_proto/google/protobuf/any.proto # We want to declare the file as:Æ’ # google/protobuf/any.subspace_rpc.cc v = file_path.split("_virtual_imports/") diff --git a/rpc/test/rpc_test.cc b/rpc/test/rpc_test.cc index 5729e39..74a6108 100644 --- a/rpc/test/rpc_test.cc +++ b/rpc/test/rpc_test.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. diff --git a/server/BUILD.bazel b/server/BUILD.bazel index 7ac635e..46195eb 100644 --- a/server/BUILD.bazel +++ b/server/BUILD.bazel @@ -1,5 +1,7 @@ package(default_visibility = ["//visibility:public"]) +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary") + cc_library( name = "server", srcs = [ @@ -11,6 +13,7 @@ cc_library( "server.h", "client_handler.h", "server_channel.h", + "plugin.h", ], deps = [ "//common:subspace_common", @@ -22,7 +25,7 @@ cc_library( "@abseil-cpp//absl/strings:str_format", "@abseil-cpp//absl/flags:flag", "@abseil-cpp//absl/flags:parse", - "@coroutines//:co", + "@coroutines//co:co", ], ) @@ -33,7 +36,7 @@ cc_binary( ], deps = [ ":server", - "@coroutines//:co", + "@coroutines//co:co", ], linkopts = select({ "//:macos_x86_64": [], diff --git a/server/client_handler.cc b/server/client_handler.cc index 1c145a0..03dd495 100644 --- a/server/client_handler.cc +++ b/server/client_handler.cc @@ -1,55 +1,66 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. #include "server/client_handler.h" #include "absl/strings/str_format.h" +#include "client_handler.h" #include "server/server.h" namespace subspace { ClientHandler::~ClientHandler() { server_->RemoveAllUsersFor(this); } -void ClientHandler::Run(co::Coroutine *c) { +void ClientHandler::Run() { // The data is placed 4 bytes into the buffer. The first 4 // bytes of the buffer are used by SendMessage and ReceiveMessage // for the length of the data. - char *sendbuf = buffer_ + sizeof(int32_t); - constexpr size_t kSendBufLen = sizeof(buffer_) - sizeof(int32_t); for (;;) { - absl::StatusOr n_recv = - socket_.ReceiveMessage(buffer_, sizeof(buffer_), c); - if (!n_recv.ok()) { - return; - } subspace::Request request; - if (request.ParseFromArray(buffer_, *n_recv)) { - std::vector fds; - subspace::Response response; - if (absl::Status s = HandleMessage(request, response, fds); !s.ok()) { - server_->logger_.Log(toolbelt::LogLevel::kError, "%s\n", - s.ToString().c_str()); - return; - } - if (!response.SerializeToArray(sendbuf, kSendBufLen)) { - server_->logger_.Log(toolbelt::LogLevel::kError, - "Failed to serialize response\n"); + { + absl::StatusOr> receive_buffer = + socket_.ReceiveVariableLengthMessage(co::self); + if (!receive_buffer.ok()) { return; } - size_t msglen = response.ByteSizeLong(); - absl::StatusOr n_sent = socket_.SendMessage(sendbuf, msglen, c); - if (!n_sent.ok()) { + if (receive_buffer->empty()) { + // Connection closed. return; } - if (absl::Status status = socket_.SendFds(fds, c); !status.ok()) { - server_->logger_.Log(toolbelt::LogLevel::kError, "%s\n", - status.ToString().c_str()); + if (!request.ParseFromArray(receive_buffer->data(), + int(receive_buffer->size()))) { + server_->logger_.Log(toolbelt::LogLevel::kError, + "Failed to parse request\n"); return; } - } else { + } + + std::vector fds; + subspace::Response response; + if (absl::Status s = HandleMessage(request, response, fds); !s.ok()) { + server_->logger_.Log(toolbelt::LogLevel::kError, "%s\n", + s.ToString().c_str()); + return; + } + + size_t msglen = response.ByteSizeLong(); + std::vector send_buffer(sizeof(int32_t) + msglen); + if (!response.SerializeToArray(send_buffer.data() + sizeof(int32_t), + msglen)) { server_->logger_.Log(toolbelt::LogLevel::kError, - "Failed to parse message\n"); + "Failed to serialize response\n"); + return; + } + + absl::StatusOr n_sent = socket_.SendMessage( + send_buffer.data() + sizeof(int32_t), msglen, co::self); + if (!n_sent.ok()) { + return; + } + if (absl::Status status = socket_.SendFds(fds, co::self); !status.ok()) { + server_->logger_.Log(toolbelt::LogLevel::kError, "%s\n", + status.ToString().c_str()); return; } } @@ -87,6 +98,15 @@ ClientHandler::HandleMessage(const subspace::Request &req, resp.mutable_remove_subscriber(), fds); break; + case subspace::Request::kGetChannelInfo: + HandleGetChannelInfo(req.get_channel_info(), + resp.mutable_get_channel_info(), fds); + break; + case subspace::Request::kGetChannelStats: + HandleGetChannelStats(req.get_channel_stats(), + resp.mutable_get_channel_stats(), fds); + break; + case subspace::Request::REQUEST_NOT_SET: return absl::InternalError("Protocol error: unknown request"); } @@ -100,6 +120,8 @@ void ClientHandler::HandleInit(const subspace::InitRequest &req, fds.push_back(server_->scb_fd_); client_name_ = req.client_name(); response->set_session_id(server_->GetSessionId()); + response->set_user_id(getuid()); + response->set_group_id(getgid()); } void ClientHandler::HandleCreatePublisher( @@ -108,6 +130,12 @@ void ClientHandler::HandleCreatePublisher( std::vector &fds) { ServerChannel *channel = server_->FindChannel(req.channel_name()); if (channel == nullptr) { + server_->logger_.Log(toolbelt::LogLevel::kDebug, + "Publisher %s is creating new channel %s with size " + "%d/%d and type length %d (total of %d channels)", + client_name_.c_str(), req.channel_name().c_str(), + req.slot_size(), req.num_slots(), req.type().size(), + server_->GetNumChannels()); absl::StatusOr ch = server_->CreateChannel( req.channel_name(), req.slot_size(), req.num_slots(), req.mux(), req.vchan_id(), req.type()); @@ -117,6 +145,12 @@ void ClientHandler::HandleCreatePublisher( } channel = *ch; } else if (channel->IsPlaceholder()) { + server_->logger_.Log( + toolbelt::LogLevel::kDebug, + "Publisher %s is remapping placeholder channel %s with size %d/%d and " + "type length %d (total of %d channels)", + client_name_.c_str(), req.channel_name().c_str(), req.slot_size(), + req.num_slots(), req.type().size(), server_->GetNumChannels()); // Channel exists, but it's just a placeholder. Remap the memory now // that we know the slots. absl::Status status = @@ -200,8 +234,8 @@ void ClientHandler::HandleCreatePublisher( } } - int num_pubs, num_subs; - channel->CountUsers(num_pubs, num_subs); + int num_pubs, num_subs, num_bridge_pubs, num_bridge_subs; + channel->CountUsers(num_pubs, num_subs, num_bridge_pubs, num_bridge_subs); // Check consistency of publisher parameters. if (num_pubs > 0) { if (req.is_fixed_size() != channel->IsFixedSize()) { @@ -212,18 +246,37 @@ void ClientHandler::HandleCreatePublisher( return; } - // We check only the number of slots since the slot size can change - // over time. - if ((req.is_fixed_size() && channel->SlotSize() != req.slot_size()) || - channel->NumSlots() != req.num_slots()) { + int current_num_slots = channel->NumSlots(); + + bool slot_size_changed = + channel->SlotSize() != 0 && req.slot_size() > channel->SlotSize(); + bool num_slots_changed = req.num_slots() > current_num_slots; + if (num_slots_changed) { response->set_error(absl::StrFormat( - "Inconsistent publisher parameters for channel %s: " - "existing: %d/%d, new: %d/%d", - req.channel_name(), channel->SlotSize(), channel->NumSlots(), - req.slot_size(), req.num_slots())); + "Failed to add publisher to %s with more slots (%d) than the current " + "number (%d)", + req.channel_name(), req.num_slots(), current_num_slots)); return; } + if (slot_size_changed) { + if (slot_size_changed) { + if (channel->IsFixedSize()) { + // Fixed size channels cannot change size. + response->set_error(absl::StrFormat( + "Failed to add publisher to fixed size channel %s with different " + "slot size (%d) than the current size (%d)", + req.channel_name(), req.slot_size(), channel->SlotSize())); + return; + } + } + server_->logger_.Log( + toolbelt::LogLevel::kDebug, + "Publisher %s is resizing channel %s buffers from %d bytes to %d", + client_name_.c_str(), channel->Name().c_str(), channel->SlotSize(), + req.slot_size()); + } + if (channel->IsLocal() != req.is_local()) { response->set_error( absl::StrFormat("Inconsistent publisher parameters for channel %s: " @@ -233,6 +286,10 @@ void ClientHandler::HandleCreatePublisher( } } + server_->logger_.Log(toolbelt::LogLevel::kDebug, + "Client %s creating publisher on channel %s: VM: %s", + client_name_.c_str(), req.channel_name().c_str(), + GetTotalVM().c_str()); // Create the publisher. absl::StatusOr publisher = channel->AddPublisher(this, req.is_reliable(), req.is_local(), @@ -241,6 +298,7 @@ void ClientHandler::HandleCreatePublisher( response->set_error(publisher.status().ToString()); return; } + server_->OnNewPublisher(channel->Name(), (*publisher)->GetId()); server_->SendChannelDirectory(); response->set_channel_id(channel->GetChannelId()); @@ -325,6 +383,11 @@ void ClientHandler::HandleCreateSubscriber( ServerChannel *channel = server_->FindChannel(req.channel_name()); if (channel == nullptr) { // No channel exists, map an empty channel. + server_->logger_.Log(toolbelt::LogLevel::kDebug, + "Subscriber %s is creating new placeholder channel %s " + "with type length %d (total of %d channels)", + client_name_.c_str(), req.channel_name().c_str(), + req.type().size(), server_->GetNumChannels()); absl::StatusOr ch = server_->CreateChannel( req.channel_name(), 0, 0, req.mux(), req.vchan_id(), req.type()); if (!ch.ok()) { @@ -396,14 +459,23 @@ void ClientHandler::HandleCreateSubscriber( } } // Create the subscriber. + server_->logger_.Log(toolbelt::LogLevel::kDebug, + "Client %s creating subscriber on channel %s: VM: %s", + client_name_.c_str(), req.channel_name().c_str(), + GetTotalVM().c_str()); absl::StatusOr subscriber = channel->AddSubscriber( this, req.is_reliable(), req.is_bridge(), req.max_active_messages()); if (!subscriber.ok()) { response->set_error(subscriber.status().ToString()); return; } + ChannelCounters &counters = channel->RecordUpdate( + /*is_pub=*/false, /*add=*/true, req.is_reliable()); + response->set_num_pub_updates(counters.num_pub_updates); sub = *subscriber; } + server_->OnNewSubscriber(channel->Name(), sub->GetId()); + server_->SendChannelDirectory(); channel->RegisterSubscriber(sub->GetId(), channel->GetVirtualChannelId(), req.subscriber_id() == -1); @@ -461,9 +533,6 @@ void ClientHandler::HandleCreateSubscriber( // Send Query to subscribe to public channels on other servers. server_->SendQuery(req.channel_name()); } - ChannelCounters &counters = - channel->RecordUpdate(/*is_pub=*/false, /*add=*/true, req.is_reliable()); - response->set_num_pub_updates(counters.num_pub_updates); } void ClientHandler::HandleGetTriggers( @@ -541,4 +610,54 @@ void ClientHandler::HandleRemoveSubscriber( channel->RemoveUser(server_, req.subscriber_id()); } +void ClientHandler::HandleGetChannelInfo( + const subspace::GetChannelInfoRequest &req, + subspace::GetChannelInfoResponse *response, + std::vector &fds) { + if (req.channel_name().empty()) { + // All channels. + auto result = response->mutable_channels(); + + server_->ForeachChannel([result](ServerChannel *channel) { + channel->GetChannelInfo(result->Add()); + }); + return; + } + ServerChannel *channel = server_->FindChannel(req.channel_name()); + if (channel == nullptr) { + response->set_error( + absl::StrFormat("No such channel %s", req.channel_name())); + return; + } + auto info = response->mutable_channels(); + channel->GetChannelInfo(info->Add()); +} + +void ClientHandler::HandleGetChannelStats( + const subspace::GetChannelStatsRequest &req, + subspace::GetChannelStatsResponse *response, + std::vector &fds) { + if (req.channel_name().empty()) { + // All channels. + auto result = response->mutable_channels(); + + server_->ForeachChannel([result](ServerChannel *channel) { + channel->GetChannelStats(result->Add()); + }); + return; + } + ServerChannel *channel = server_->FindChannel(req.channel_name()); + if (channel == nullptr) { + response->set_error( + absl::StrFormat("No such channel %s", req.channel_name())); + return; + } + auto info = response->mutable_channels(); + channel->GetChannelStats(info->Add()); +} + +std::string ClientHandler::GetTotalVM() { + uint64_t total_vm = server_->GetVirtualMemoryUsage(); + return absl::StrFormat("%g MiB", double(total_vm) / (1024.0 * 1024.0)); +} } // namespace subspace diff --git a/server/client_handler.h b/server/client_handler.h index 17b0097..9efa4e0 100644 --- a/server/client_handler.h +++ b/server/client_handler.h @@ -1,9 +1,9 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. -#ifndef __SERVER_CLIENT_HANDLER_H -#define __SERVER_CLIENT_HANDLER_H +#ifndef _xSERVERCLIENT_HANDLER_H +#define _xSERVERCLIENT_HANDLER_H #include "absl/status/status.h" #include "common/channel.h" @@ -25,9 +25,11 @@ class ClientHandler { // Run the client handler receiver in a coroutine. Terminates // when the connection to the client is closed. - void Run(co::Coroutine *c); + void Run(); private: + std::string GetTotalVM(); + absl::Status HandleMessage(const subspace::Request &req, subspace::Response &resp, std::vector &fds); @@ -55,12 +57,17 @@ class ClientHandler { void HandleRemoveSubscriber(const subspace::RemoveSubscriberRequest &req, subspace::RemoveSubscriberResponse *response, std::vector &fds); + void HandleGetChannelInfo(const subspace::GetChannelInfoRequest &req, + subspace::GetChannelInfoResponse *response, + std::vector &fds); + void HandleGetChannelStats(const subspace::GetChannelStatsRequest &req, + subspace::GetChannelStatsResponse *response, + std::vector &fds); Server *server_; toolbelt::UnixSocket socket_; - char buffer_[kMaxMessage]; std::string client_name_; }; } // namespace subspace -#endif // __SERVER_CLIENT_HANDLER_H \ No newline at end of file +#endif // _xSERVERCLIENT_HANDLER_H \ No newline at end of file diff --git a/server/main.cc b/server/main.cc index 6c62337..400887b 100644 --- a/server/main.cc +++ b/server/main.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -25,6 +25,8 @@ ABSL_FLAG(std::string, log_level, "info", "Log level"); ABSL_FLAG(std::string, interface, "", "Discovery network interface"); ABSL_FLAG(bool, local, false, "Use local computer only"); ABSL_FLAG(int, notify_fd, -1, "File descriptor to notify of startup"); +ABSL_FLAG(std::string, machine, "", "Machine name"); + #if defined(___APPLE__) // This is default true on Mac since is uses /tmp. ABSL_FLAG(bool, cleanup_filesystem, true, "Cleanup the filesystem on server startup"); @@ -32,6 +34,8 @@ ABSL_FLAG(bool, cleanup_filesystem, true, "Cleanup the filesystem on server star // Default false on other OSes as it interferes with tests that run multiple servers. ABSL_FLAG(bool, cleanup_filesystem, false, "Cleanup the filesystem on server startup"); #endif +ABSL_FLAG(std::vector, plugins, {}, + "List of plugins to load"); int main(int argc, char **argv) { absl::ParseCommandLine(argc, argv); @@ -42,6 +46,9 @@ int main(int argc, char **argv) { signal(SIGPIPE, SIG_IGN); signal(SIGQUIT, Signal); + // Close the plugins when the server is shutting down. + subspace::ClosePluginsOnShutdown(); + std::unique_ptr server; if (absl::GetFlag(FLAGS_peer_address).empty()) { @@ -63,6 +70,25 @@ int main(int argc, char **argv) { server->CleanupFilesystem(); } + // Load the plugins. Each plugin is a name:path pair. + for (const auto &p : absl::GetFlag(FLAGS_plugins)) { + auto pos = p.find(':'); + if (pos == std::string::npos) { + fprintf(stderr, "Plugin '%s' is not in name:path format\n", p.c_str()); + exit(1); + } + std::string name = p.substr(0, pos); + std::string path = p.substr(pos + 1); + absl::Status status = server->LoadPlugin(name, path); + if (!status.ok()) { + fprintf(stderr, "Failed to load plugin %s from %s: %s\n", + name.c_str(), path.c_str(), status.ToString().c_str()); + exit(1); + } + } + + server->SetMachineName(absl::GetFlag(FLAGS_machine)); + absl::Status s = server->Run(); if (!s.ok()) { fprintf(stderr, "Error running Subspace server: %s\n", diff --git a/server/plugin.h b/server/plugin.h new file mode 100644 index 0000000..80cb610 --- /dev/null +++ b/server/plugin.h @@ -0,0 +1,84 @@ + +#ifndef _xSERVERPLUGIN_H +#define _xSERVERPLUGIN_H + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "toolbelt/logging.h" +#include +#include + +namespace subspace { +class Server; + +struct PluginContext { + PluginContext(const std::string &name) : logger(name) {} + virtual ~PluginContext() = default; + toolbelt::Logger logger; +}; + +// Plugins allow an externally loaded module to handle occurences in the +// server. It is envisioned that they can be used for adding additional +// channels, etc. The server runs in a single thread so plugins must +// behave themselves and not block for long periods of time. Access to +// the current coroutine is in `co::self`. +struct PluginInterfaceFunctions { + absl::Status (*onStartup)(Server &s, const std::string& name, PluginContext *ctx); + void (*onReady)(Server &s, PluginContext *ctx); + void (*onShutdown)(PluginContext *ctx); + + void (*onNewChannel)(Server &s, const std::string &channel_name, PluginContext *ctx); + void (*onRemoveChannel)(Server &s, const std::string &channel_name, PluginContext *ctx); + void (*onNewPublisher)(Server &s, const std::string &channel_name, int publisher_id, PluginContext *ctx); + void (*onRemovePublisher)(Server &s, const std::string &channel_name, int publisher_id, PluginContext *ctx); + void (*onNewSubscriber)(Server &s, const std::string &channel_name, int subscriber_id, PluginContext *ctx); + void (*onRemoveSubscriber)(Server &s, const std::string &channel_name, int subscriber_id, PluginContext *ctx); +}; + +class PluginInterface { +public: + PluginInterface(const PluginInterfaceFunctions &functions, std::unique_ptr ctx = nullptr) + : functions_(functions), ctx_(std::move(ctx)) {} + + absl::Status OnStartup(Server &s, const std::string& name) { + return functions_.onStartup(s, name, ctx_.get()); + } + + void OnReady(Server &s) { functions_.onReady(s, ctx_.get()); } + + void OnShutdown() { functions_.onShutdown(ctx_.get()); } + + void OnNewChannel(Server &s, const std::string &channel_name) { + functions_.onNewChannel(s, channel_name, ctx_.get()); + } + + void OnRemoveChannel(Server &s, const std::string &channel_name) { + functions_.onRemoveChannel(s, channel_name, ctx_.get()); + } + + void OnNewPublisher(Server &s, const std::string &channel_name, + int publisher_id) { + functions_.onNewPublisher(s, channel_name, publisher_id, ctx_.get()); + } + + void OnRemovePublisher(Server &s, const std::string &channel_name, + int publisher_id) { + functions_.onRemovePublisher(s, channel_name, publisher_id, ctx_.get()); + } + + void OnNewSubscriber(Server &s, const std::string &channel_name, + int subscriber_id) { + functions_.onNewSubscriber(s, channel_name, subscriber_id, ctx_.get()); + } + + void OnRemoveSubscriber(Server &s, const std::string &channel_name, + int subscriber_id) { + functions_.onRemoveSubscriber(s, channel_name, subscriber_id, ctx_.get()); + } + +private: + PluginInterfaceFunctions functions_; + std::unique_ptr ctx_; +}; +} // namespace subspace +#endif // _xSERVERPLUGIN_H \ No newline at end of file diff --git a/server/server.cc b/server/server.cc index b8946e1..b337a3b 100644 --- a/server/server.cc +++ b/server/server.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -6,6 +6,7 @@ #include "absl/strings/numbers.h" #include "absl/strings/str_format.h" #include "client/client.h" +#include "client_handler.h" #include "proto/subspace.pb.h" #include "toolbelt/clock.h" #include "toolbelt/hexdump.h" @@ -19,11 +20,19 @@ #include #include #include +#include #include #include namespace subspace { +// In multithreaded tests we can't dlclose the plugins because the dynamic +// linker doesn't play well with threads. +static std::atomic close_plugins_on_shutdown = false; +void ClosePluginsOnShutdown() { close_plugins_on_shutdown = true; } + +bool ShouldClosePluginsOnShutdown() { return close_plugins_on_shutdown; } + // Look for the IP address and calculate the broadcast address // for the given interface. If the interface name is empty // choose the first interface that supports broadcast and @@ -93,25 +102,63 @@ static absl::Status FindIPAddresses(const std::string &interface, Server::Server(co::CoroutineScheduler &scheduler, const std::string &socket_name, const std::string &interface, - int disc_port, int peer_port, bool local, int notify_fd) + int disc_port, int peer_port, bool local, int notify_fd, + int initial_ordinal, bool wait_for_clients, + bool publish_server_channels) : socket_name_(socket_name), interface_(interface), discovery_port_(disc_port), discovery_peer_port_(peer_port), - local_(local), notify_fd_(notify_fd), co_scheduler_(scheduler) {} + local_(local), notify_fd_(notify_fd), scheduler_(scheduler), + logger_("Subspace server"), initial_ordinal_(initial_ordinal), + wait_for_clients_(wait_for_clients), + publish_server_channels_(publish_server_channels) { + CreateShutdownTrigger(); +} Server::Server(co::CoroutineScheduler &scheduler, const std::string &socket_name, const std::string &interface, const toolbelt::InetAddress &peer, int disc_port, int peer_port, - bool local, int notify_fd) + bool local, int notify_fd, int initial_ordinal, + bool wait_for_clients, bool publish_server_channels) : socket_name_(socket_name), interface_(interface), peer_address_(peer), discovery_port_(disc_port), discovery_peer_port_(peer_port), - local_(local), notify_fd_(notify_fd), co_scheduler_(scheduler) {} + local_(local), notify_fd_(notify_fd), scheduler_(scheduler), + logger_("Subspace server"), initial_ordinal_(initial_ordinal), + wait_for_clients_(wait_for_clients), + publish_server_channels_(publish_server_channels) { + CreateShutdownTrigger(); +} Server::~Server() { // Clear this before other data members get destroyed. client_handlers_.clear(); } -void Server::Stop() { co_scheduler_.Stop(); } +void Server::Stop(bool force) { + if (shutting_down_) { + return; + } + if (force || !wait_for_clients_) { + scheduler_.Stop(); + return; + } + shutting_down_ = true; + for (auto &plugin : plugins_) { + plugin->interface->OnShutdown(); + } + shutdown_trigger_fd_.Trigger(); + NotifyViaFd(kServerWaiting); +} + +void Server::CreateShutdownTrigger() { + auto fd = toolbelt::TriggerFd::Create(); + if (!fd.ok()) { + logger_.Log(toolbelt::LogLevel::kError, + "Failed to create shutdown trigger fd: %s", + fd.status().ToString().c_str()); + abort(); + } + shutdown_trigger_fd_ = std::move(*fd); +} absl::StatusOr Server::CreateBridgeNotificationPipe() { @@ -137,10 +184,9 @@ void Server::CloseHandler(ClientHandler *handler) { // This coroutine listens for incoming client connections on the given // UDS and spawns a handler coroutine to handle the communication with // the client. -void Server::ListenerCoroutine(toolbelt::UnixSocket &listen_socket, - co::Coroutine *c) { - for (;;) { - absl::Status status = HandleIncomingConnection(listen_socket, c); +void Server::ListenerCoroutine(toolbelt::UnixSocket &listen_socket) { + while (!shutting_down_) { + absl::Status status = HandleIncomingConnection(listen_socket); if (!status.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Unable to make incoming connection: %s", @@ -149,11 +195,33 @@ void Server::ListenerCoroutine(toolbelt::UnixSocket &listen_socket, } } +void Server::NotifyViaFd(int64_t val) { + if (!notify_fd_.Valid()) { + return; + } + const char *p = reinterpret_cast(&val); + size_t remaining = sizeof(val); + while (remaining > 0) { + ssize_t n = ::write(notify_fd_.Fd(), p, remaining); + if (n <= 0) { + return; + } + remaining -= n; + p += n; + } +} + +void Server::ForeachChannel(std::function func) { + for (auto &channel : channels_) { + func(channel.second.get()); + } +} + void Server::CleanupAfterSession() { std::string session_shm_file_prefix = "subspace_." + std::to_string(session_id_); -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX // Remove all files starting with "subspace_SESSION" in /tmp. These refer to // shared memory segments names "subspace_INODE". for (const auto &entry : std::filesystem::directory_iterator("/tmp")) { @@ -161,7 +229,7 @@ void Server::CleanupAfterSession() { 0) { // Extrace the node and remove the shared memory segment. // The name of the shared memory segment is "subspace_". - auto status = Channel::MacOsSharedMemoryName(entry.path().string()); + auto status = Channel::PosixSharedMemoryName(entry.path().string()); if (status.ok()) { shm_unlink(status->c_str()); } @@ -194,14 +262,14 @@ void Server::CleanupAfterSession() { void Server::CleanupFilesystem() { logger_.Log(toolbelt::LogLevel::kInfo, "Cleaning up filesystem..."); -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX // Remove all files starting with "subspace_" in /tmp. These refer to // shared memory segments names "subspace_INODE". for (const auto &entry : std::filesystem::directory_iterator("/tmp")) { if (entry.path().filename().string().rfind("subspace_", 0) == 0) { // Extrace the node and remove the shared memory segment. // The name of the shared memory segment is "subspace_". - auto status = Channel::MacOsSharedMemoryName(entry.path().string()); + auto status = Channel::PosixSharedMemoryName(entry.path().string()); if (status.ok()) { shm_unlink(status->c_str()); } @@ -247,11 +315,12 @@ absl::Status Server::Run() { return status; } - // Notify listener that the server is ready. + // Notify listener that the server is ready. Do this in a coroutine so that + // it executes when we start running. if (notify_fd_.Valid()) { - int64_t val = kServerReady; - (void)::write(notify_fd_.Fd(), &val, 8); + scheduler_.Spawn([this]() { NotifyViaFd(kServerReady); }); } + OnReady(); absl::StatusOr scb = CreateSystemControlBlock(scb_fd_); if (!scb.ok()) { @@ -322,59 +391,53 @@ absl::Status Server::Run() { } } - // Register a callback to be called when a coroutine completes. The - // server keeps track of all coroutines created. - // This deletes them when they are done. - co_scheduler_.SetCompletionCallback( - [this](co::Coroutine *c) { coroutines_.erase(c); }); + // TODO: why does this not work? The BridgeSenderCoroutine causes a terminate because the co::AbortException + // is not caught in the coroutine caller. This appears to be a bug somewhere as I can't find why the catch + // isn't working. We don't need to abort handling anyway. + scheduler_.EnableAborts(false); // Start the listener coroutine. - coroutines_.insert(std::make_unique( - co_scheduler_, - [this, &listen_socket](co::Coroutine *c) { - ListenerCoroutine(listen_socket, c); - }, - "Listener UDS")); + scheduler_.Spawn( + [this, &listen_socket]() { ListenerCoroutine(listen_socket); }, + {.name = "Listener UDS", + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); - // Start the channel directory coroutine. - coroutines_.insert(std::make_unique( - co_scheduler_, [this](co::Coroutine *c) { ChannelDirectoryCoroutine(c); }, - "Channel directory")); + if (publish_server_channels_) { + // Start the channel directory coroutine. + scheduler_.Spawn([this]() { ChannelDirectoryCoroutine(); }, + {.name = "Channel directory", + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); - // Start the channel stats coroutine. - coroutines_.insert(std::make_unique( - co_scheduler_, [this](co::Coroutine *c) { StatisticsCoroutine(c); }, - "Channel stats")); + // Start the channel stats coroutine. + scheduler_.Spawn([this]() { StatisticsCoroutine(); }, + {.name = "Channel stats", + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); + } if (!local_) { // Start the discovery receiver coroutine. - coroutines_.insert(std::make_unique( - co_scheduler_, - [this](co::Coroutine *c) { DiscoveryReceiverCoroutine(c); })); + scheduler_.Spawn([this]() { DiscoveryReceiverCoroutine(); }, + {.name = "Discovery receiver", + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); // Start the gratuitous Advertiser coroutine. This sends Advertise messages // every 5 seconds. - coroutines_.insert(std::make_unique( - co_scheduler_, - [this](co::Coroutine *c) { GratuitousAdvertiseCoroutine(c); }, - "Gratuitous advertiser")); + scheduler_.Spawn([this]() { GratuitousAdvertiseCoroutine(); }, + {.name = "Gratuitous advertiser", + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); } // Run the coroutine main loop. - co_scheduler_.Run(); + scheduler_.Run(); // Notify listener that we're stopped. - if (notify_fd_.Valid()) { - int64_t val = kServerStopped; - (void)::write(notify_fd_.Fd(), &val, 8); - } + NotifyViaFd(kServerStopped); return absl::OkStatus(); } absl::Status -Server::HandleIncomingConnection(toolbelt::UnixSocket &listen_socket, - co::Coroutine *c) { - absl::StatusOr s = listen_socket.Accept(c); +Server::HandleIncomingConnection(toolbelt::UnixSocket &listen_socket) { + absl::StatusOr s = listen_socket.Accept(co::self); if (!s.ok()) { return s.status(); } @@ -382,13 +445,14 @@ Server::HandleIncomingConnection(toolbelt::UnixSocket &listen_socket, std::make_unique(this, std::move(*s))); ClientHandler *handler_ptr = client_handlers_.back().get(); - coroutines_.insert(std::make_unique( - co_scheduler_, - [this, handler_ptr](co::Coroutine *c) { - handler_ptr->Run(c); + scheduler_.Spawn( + [this, handler_ptr]() { + handler_ptr->Run(); CloseHandler(handler_ptr); }, - "Client handler")); + {.name = "Client handler", + .interrupt_fd = + wait_for_clients_ ? shutdown_trigger_fd_.GetPollFd().Fd() : -1}); return absl::OkStatus(); } @@ -408,7 +472,7 @@ Server::CreateMultiplexer(const std::string &channel_name, int slot_size, channel->SetDebug(logger_.GetLogLevel() <= toolbelt::LogLevel::kVerboseDebug); absl::StatusOr fds = - channel->Allocate(scb_fd_, slot_size, num_slots); + channel->Allocate(scb_fd_, slot_size, num_slots, initial_ordinal_); if (!fds.ok()) { return fds.status(); } @@ -457,6 +521,7 @@ Server::CreateChannel(const std::string &channel_name, int slot_size, channel_name.c_str(), num_slots, mux.c_str()); // The channels_ map owns all the server channels. channels_.emplace(std::make_pair(channel_name, std::move(*vchan))); + OnNewChannel(channel_name); return channel; } @@ -471,12 +536,13 @@ Server::CreateChannel(const std::string &channel_name, int slot_size, channel->SetLastKnownSlotSize(slot_size); absl::StatusOr fds = - channel->Allocate(scb_fd_, slot_size, num_slots); + channel->Allocate(scb_fd_, slot_size, num_slots, initial_ordinal_); if (!fds.ok()) { return fds.status(); } channel->SetSharedMemoryFds(std::move(*fds)); channels_.emplace(std::make_pair(channel_name, channel)); + OnNewChannel(channel_name); return channel; } @@ -499,7 +565,7 @@ absl::Status Server::RemapChannel(ServerChannel *channel, int slot_size, return RemapChannel(mux, slot_size, num_slots); } absl::StatusOr fds = - channel->Allocate(scb_fd_, slot_size, num_slots); + channel->Allocate(scb_fd_, slot_size, num_slots, initial_ordinal_); if (!fds.ok()) { return fds.status(); } @@ -516,9 +582,21 @@ ServerChannel *Server::FindChannel(const std::string &channel_name) { } void Server::RemoveChannel(ServerChannel *channel) { + OnRemoveChannel(channel->Name()); channel->RemoveBuffer(session_id_); channel_ids_.Clear(channel->GetChannelId()); auto it = channels_.find(channel->Name()); + if (it == channels_.end()) { + return; + } + if (it->second->IsVirtual()) { + auto vchan = static_cast(it->second.get()); + ChannelMultiplexer *mux = vchan->GetMux(); + mux->RemoveVirtualChannel(vchan); + if (mux->IsEmpty()) { + RemoveChannel(mux); + } + } channels_.erase(it); SendChannelDirectory(); } @@ -537,29 +615,31 @@ void Server::RemoveAllUsersFor(ClientHandler *handler) { } } -void Server::ChannelDirectoryCoroutine(co::Coroutine *c) { +void Server::ChannelDirectoryCoroutine() { // Coroutine aware client. - Client client(c); + Client client(co::self); absl::Status status = client.Init(socket_name_); if (!status.ok()) { logger_.Log( - toolbelt::LogLevel::kFatal, + toolbelt::LogLevel::kError, "Failed to initialize Subspace client for channel directory: %s", status.ToString().c_str()); + return; } - constexpr int kDirectorySlotSize = 128 * 1024 - sizeof(MessagePrefix); + constexpr int kDirectorySlotSize = 1024; constexpr int kDirectoryNumSlots = 32; absl::StatusOr channel_directory = client.CreatePublisher( "/subspace/ChannelDirectory", kDirectorySlotSize, kDirectoryNumSlots, PublisherOptions().SetType("subspace.ChannelDirectory")); if (!channel_directory.ok()) { - logger_.Log(toolbelt::LogLevel::kFatal, + logger_.Log(toolbelt::LogLevel::kError, "Failed to create channel directory channel: %s", channel_directory.status().ToString().c_str()); + return; } - for (;;) { - c->Wait(channel_directory_trigger_fd_.GetPollFd().Fd(), POLLIN); + while (!shutting_down_) { + co::Wait(channel_directory_trigger_fd_.GetPollFd().Fd(), POLLIN); channel_directory_trigger_fd_.Clear(); ChannelDirectory directory; @@ -568,19 +648,21 @@ void Server::ChannelDirectoryCoroutine(co::Coroutine *c) { auto info = directory.add_channels(); channel.second->GetChannelInfo(info); } - absl::StatusOr buffer = channel_directory->GetMessageBuffer(); + int64_t length = directory.ByteSizeLong(); + absl::StatusOr buffer = + channel_directory->GetMessageBuffer(int32_t(length)); if (!buffer.ok()) { - logger_.Log(toolbelt::LogLevel::kFatal, + logger_.Log(toolbelt::LogLevel::kError, "Failed to get channel directory buffer: %s", buffer.status().ToString().c_str()); + return; } - bool ok = directory.SerializeToArray(*buffer, kDirectorySlotSize); + bool ok = directory.SerializeToArray(*buffer, length); if (!ok) { logger_.Log(toolbelt::LogLevel::kError, "Failed to serialize channel directory"); continue; } - int64_t length = directory.ByteSizeLong(); absl::StatusOr s = channel_directory->PublishMessage(length); if (!s.ok()) { logger_.Log(toolbelt::LogLevel::kError, @@ -592,29 +674,31 @@ void Server::ChannelDirectoryCoroutine(co::Coroutine *c) { void Server::SendChannelDirectory() { channel_directory_trigger_fd_.Trigger(); } -void Server::StatisticsCoroutine(co::Coroutine *c) { - Client client(c); +void Server::StatisticsCoroutine() { + Client client(co::self); absl::Status status = client.Init(socket_name_); if (!status.ok()) { - logger_.Log(toolbelt::LogLevel::kFatal, + logger_.Log(toolbelt::LogLevel::kError, "Failed to initialize Subspace client for statistics: %s", status.ToString().c_str()); + return; } - constexpr int kStatsSlotSize = 8192 - sizeof(MessagePrefix); + constexpr int kStatsSlotSize = 1024; constexpr int kStatsNumSlots = 32; absl::StatusOr pub = client.CreatePublisher( "/subspace/Statistics", kStatsSlotSize, kStatsNumSlots, PublisherOptions().SetType("subspace.Statistics")); if (!pub.ok()) { - logger_.Log(toolbelt::LogLevel::kFatal, + logger_.Log(toolbelt::LogLevel::kError, "Failed to create statistics channel: %s", pub.status().ToString().c_str()); + return; } constexpr int kPeriodSecs = 2; - for (;;) { - c->Sleep(kPeriodSecs); + while (!shutting_down_) { + co::Sleep(kPeriodSecs); Statistics stats; stats.set_timestamp(toolbelt::Now()); stats.set_server_id(server_id_); @@ -622,19 +706,21 @@ void Server::StatisticsCoroutine(co::Coroutine *c) { auto s = stats.add_channels(); channel.second->GetChannelStats(s); } - absl::StatusOr buffer = pub->GetMessageBuffer(); + int64_t length = stats.ByteSizeLong(); + + absl::StatusOr buffer = pub->GetMessageBuffer(int32_t(length)); if (!buffer.ok()) { - logger_.Log(toolbelt::LogLevel::kFatal, + logger_.Log(toolbelt::LogLevel::kError, "Failed to get channel stats buffer: %s", buffer.status().ToString().c_str()); + return; } - bool ok = stats.SerializeToArray(*buffer, kStatsSlotSize); + bool ok = stats.SerializeToArray(*buffer, length); if (!ok) { logger_.Log(toolbelt::LogLevel::kError, "Failed to serialize channel stats"); continue; } - int64_t length = stats.ByteSizeLong(); absl::StatusOr s = pub->PublishMessage(length); if (!s.ok()) { logger_.Log(toolbelt::LogLevel::kError, @@ -651,9 +737,8 @@ void Server::SendQuery(const std::string &channel_name) { return; } // Spawn a coroutine to send the Query message. - coroutines_.insert(std::make_unique( - co_scheduler_, - [this, channel_name](co::Coroutine *c) { + scheduler_.Spawn( + [this, channel_name]() { logger_.Log(toolbelt::LogLevel::kDebug, "Sending Query %s with discovery port %d", channel_name.c_str(), discovery_port_); @@ -672,14 +757,15 @@ void Server::SendQuery(const std::string &channel_name) { } int64_t length = disc.ByteSizeLong(); absl::Status s = - discovery_transmitter_.SendTo(discovery_addr_, buffer, length, c); + discovery_transmitter_.SendTo(discovery_addr_, buffer, length); if (!s.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to send Query: %s", s.ToString().c_str()); return; } }, - "discovery query")); + {.name = "Send discovery query", + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); } // Send an advertise discovery message over UDP. @@ -688,9 +774,8 @@ void Server::SendAdvertise(const std::string &channel_name, bool reliable) { return; } // Spawn a coroutine to send the Publish message. - coroutines_.insert(std::make_unique( - co_scheduler_, - [this, channel_name, reliable](co::Coroutine *c) { + scheduler_.Spawn( + [this, channel_name, reliable]() { logger_.Log(toolbelt::LogLevel::kDebug, "Sending Advertise %s with discovery port %d", channel_name.c_str(), discovery_port_); @@ -709,24 +794,25 @@ void Server::SendAdvertise(const std::string &channel_name, bool reliable) { } int64_t length = disc.ByteSizeLong(); absl::Status s = - discovery_transmitter_.SendTo(discovery_addr_, buffer, length, c); + discovery_transmitter_.SendTo(discovery_addr_, buffer, length); if (!s.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to send Advertise: %s", s.ToString().c_str()); return; } }, - "discovery advertiser")); + {.name = "Send discovery advertise", + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); } // This coroutine receives discovery messages over UDP. -void Server::DiscoveryReceiverCoroutine(co::Coroutine *c) { +void Server::DiscoveryReceiverCoroutine() { char buffer[kDiscoveryBufferSize]; for (;;) { toolbelt::InetAddress sender; - absl::StatusOr n = - discovery_receiver_.ReceiveFrom(sender, buffer, sizeof(buffer), c); + absl::StatusOr n = discovery_receiver_.ReceiveFrom( + sender, buffer, sizeof(buffer), co::self); if (!n.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to read discovery message: %s", @@ -769,8 +855,7 @@ void Server::DiscoveryReceiverCoroutine(co::Coroutine *c) { void Server::BridgeTransmitterCoroutine(ServerChannel *channel, bool pub_reliable, bool sub_reliable, toolbelt::SocketAddress subscriber, - bool notify_retirement, - co::Coroutine *c) { + bool notify_retirement) { logger_.Log(toolbelt::LogLevel::kDebug, "BridgeTransmitterCoroutine running"); toolbelt::StreamSocket bridge; if (absl::Status status = bridge.Connect(subscriber); !status.ok()) { @@ -852,7 +937,8 @@ void Server::BridgeTransmitterCoroutine(ServerChannel *channel, return; } int64_t length = subscribed.ByteSizeLong(); - absl::StatusOr n_sent_1 = bridge.SendMessage(databuf, length, c); + absl::StatusOr n_sent_1 = + bridge.SendMessage(databuf, length, co::self); if (!n_sent_1.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to send subscribed for %s: %s", channel_name.c_str(), @@ -860,7 +946,7 @@ void Server::BridgeTransmitterCoroutine(ServerChannel *channel, return; } - Client client(c); + Client client(co::self); if (absl::Status s = client.Init(socket_name_); !s.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to connect to Subspace server: %s", @@ -881,20 +967,21 @@ void Server::BridgeTransmitterCoroutine(ServerChannel *channel, // send over the bridge. This extends the lifetime of the AciveMessage until // the other side of the bridge sends us a retirement notification for the // message's slot. - std::vector> active_retirement_msgs; + std::shared_ptr>> + active_retirement_msgs = + std::make_shared>>(); bool notifying_of_retirement = !channel->GetRetirementFds().empty(); // Spawn a coroutine to read from the retirement connection. if (notifying_of_retirement) { - active_retirement_msgs.resize(channel->NumSlots()); - coroutines_.insert(std::make_unique( - co_scheduler_, - [this, &retirement_listener, - &active_retirement_msgs](co::Coroutine *c2) { + active_retirement_msgs->resize(channel->NumSlots()); + scheduler_.Spawn( + [this, &retirement_listener, active_retirement_msgs]() mutable { return RetirementReceiverCoroutine(retirement_listener, - active_retirement_msgs, c2); + active_retirement_msgs); }, - absl::StrFormat("Retirement receiver for %s", channel_name))); + {.name = absl::StrFormat("Retirement listener for %s", channel_name), + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); } if (bridge_notification_pipe_.WriteFd().Valid()) { @@ -971,7 +1058,7 @@ void Server::BridgeTransmitterCoroutine(ServerChannel *channel, // we will stop reading the messages from the channel and thus // backpressure any publishers writing to that channel. if (absl::StatusOr n_sent_2 = - bridge.SendMessage(data_addr, msglen, c); + bridge.SendMessage(data_addr, msglen, co::self); !n_sent_2.ok()) { done = true; logger_.Log(toolbelt::LogLevel::kError, @@ -983,7 +1070,8 @@ void Server::BridgeTransmitterCoroutine(ServerChannel *channel, if (notifying_of_retirement) { // We need to keep track of the message so that we can retire it // when the other side retires the slot. - active_retirement_msgs[msg->slot_id] = std::move(msg->active_message); + (*active_retirement_msgs)[msg->slot_id] = + std::move(msg->active_message); } } @@ -1008,11 +1096,11 @@ void Server::BridgeTransmitterCoroutine(ServerChannel *channel, // on this side and the publisher's retirement FD is sent the slot. void Server::RetirementReceiverCoroutine( toolbelt::StreamSocket &retirement_listener, - std::vector> &active_retirement_msgs, - co::Coroutine *c) { + std::shared_ptr>> + active_retirement_msgs) { // Accept connection on the retirement listener socket. absl::StatusOr retirement_socket = - retirement_listener.Accept(c); + retirement_listener.Accept(co::self); if (!retirement_socket.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to accept retirement connection: %s", @@ -1024,7 +1112,7 @@ void Server::RetirementReceiverCoroutine( char buffer[kDiscoveryBufferSize]; for (;;) { absl::StatusOr n_recv = - retirement_socket->ReceiveMessage(buffer, sizeof(buffer), c); + retirement_socket->ReceiveMessage(buffer, sizeof(buffer), co::self); if (!n_recv.ok()) { return; } @@ -1043,18 +1131,19 @@ void Server::RetirementReceiverCoroutine( // being sent (which is serialized as 0 bytes. Remove the adustment. slot_id -= 1; - if (slot_id < 0 || slot_id >= active_retirement_msgs.size()) { + if (slot_id < 0 || slot_id >= active_retirement_msgs->size()) { continue; } - active_retirement_msgs[slot_id].reset(); + (*active_retirement_msgs)[slot_id].reset(); } } // Send a Subscribe message over UDP. -absl::Status Server::SendSubscribeMessage( - const std::string &channel_name, bool reliable, - toolbelt::InetAddress publisher, toolbelt::StreamSocket &receiver_listener, - char *buffer, size_t buffer_size, co::Coroutine *c) { +absl::Status +Server::SendSubscribeMessage(const std::string &channel_name, bool reliable, + toolbelt::InetAddress publisher, + toolbelt::StreamSocket &receiver_listener, + char *buffer, size_t buffer_size) { const toolbelt::SocketAddress &receiver_addr = receiver_listener.BoundAddress(); logger_.Log(toolbelt::LogLevel::kDebug, @@ -1094,7 +1183,8 @@ absl::Status Server::SendSubscribeMessage( int64_t length = disc.ByteSizeLong(); logger_.Log(toolbelt::LogLevel::kDebug, "Sending subscribe to %s: %s", publisher.ToString().c_str(), disc.DebugString().c_str()); - absl::Status s = discovery_transmitter_.SendTo(publisher, buffer, length, c); + absl::Status s = + discovery_transmitter_.SendTo(publisher, buffer, length, co::self); if (!s.ok()) { return absl::InternalError( absl::StrFormat("Failed to send subscribe: %s", s.ToString())); @@ -1109,15 +1199,14 @@ absl::Status Server::SendSubscribeMessage( // bridge and publishing them to the local channel. void Server::BridgeReceiverCoroutine(std::string channel_name, bool sub_reliable, - toolbelt::InetAddress publisher, - co::Coroutine *c) { + toolbelt::InetAddress publisher) { // Open a listening TCP socket on a free port. logger_.Log(toolbelt::LogLevel::kDebug, "BridgeReceiverCoroutine running"); char buffer[kDiscoveryBufferSize]; toolbelt::StreamSocket receiver_listener; - absl::Status s = receiver_listener.Bind( - toolbelt::SocketAddress::AnyPort(my_address_), true); + auto addr = toolbelt::SocketAddress::AnyPort(my_address_); + absl::Status s = receiver_listener.Bind(addr, true); if (!s.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Unable to bind socket for bridge receiver for %s: %s", @@ -1130,7 +1219,7 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, receiver_addr.ToString().c_str()); s = SendSubscribeMessage(channel_name, sub_reliable, publisher, - receiver_listener, buffer, sizeof(buffer), c); + receiver_listener, buffer, sizeof(buffer)); if (!s.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to send Subscribe message for channel %s: %s", @@ -1139,7 +1228,8 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, } // Accept connection on listen socket. - absl::StatusOr bridge = receiver_listener.Accept(c); + absl::StatusOr bridge = + receiver_listener.Accept(co::self); if (!bridge.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to accept incoming bridge connection: %s", @@ -1152,7 +1242,7 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, // Recieve it into offset 3 in buffer so that we can write the length for // sending it out again to the bridge notification pipe. absl::StatusOr n_recv = bridge->ReceiveMessage( - buffer + sizeof(int32_t), sizeof(buffer) - sizeof(int32_t), c); + buffer + sizeof(int32_t), sizeof(buffer) - sizeof(int32_t), co::self); if (!n_recv.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to receive Subscribed: %s", n_recv.status().ToString().c_str()); @@ -1166,7 +1256,7 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, } // Build a publisher to publish incoming bridge messages to the channel. - Client client(c); + Client client(co::self); s = client.Init(socket_name_); if (!s.ok()) { logger_.Log(toolbelt::LogLevel::kError, @@ -1246,14 +1336,14 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, // Add a coroutine to listen for retirement notifications and send them to // the socket connected to the other server. - coroutines_.insert(std::make_unique( - co_scheduler_, + scheduler_.Spawn( [this, retirement_fd = std::move(retirement_fd), - &retirement_transmitter, channel_name](co::Coroutine *c) mutable { + &retirement_transmitter, channel_name]() mutable { RetirementCoroutine(channel_name, std::move(retirement_fd), - std::move(retirement_transmitter), c); + std::move(retirement_transmitter)); }, - absl::StrFormat("Retirement notifier for %s", channel_name).c_str())); + {.name = absl::StrFormat("Retirement notifier for %s", channel_name), + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); } if (bridge_notification_pipe_.WriteFd().Valid()) { @@ -1328,7 +1418,8 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, char *after_padding = prefix_addr + sizeof(int32_t); absl::StatusOr n = bridge->ReceiveMessage( - after_padding, subscribed.slot_size() + kAdjustedPrefixLength, c); + after_padding, subscribed.slot_size() + kAdjustedPrefixLength, + co::self); if (!n.ok()) { // This will happen when the bridge transmitter on the other // side of the bridge terminates. @@ -1349,7 +1440,8 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, prefix->flags |= kMessageBridged; absl::StatusOr pub_msg = pub->PublishMessageInternal( - *n - kAdjustedPrefixLength, /*omit_prefix=*/true); + *n - kAdjustedPrefixLength, /*omit_prefix=*/true, + /*omit_prefix_slot_id=*/true); if (!pub_msg.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to publish bridge message for %s: %s", @@ -1369,14 +1461,13 @@ void Server::BridgeReceiverCoroutine(std::string channel_name, // which is kept intact. void Server::RetirementCoroutine( const std::string &channel_name, toolbelt::FileDescriptor &&retirement_fd, - std::unique_ptr retirement_transmitter, - co::Coroutine *c) { + std::unique_ptr retirement_transmitter) { logger_.Log(toolbelt::LogLevel::kDebug, "Retirement coroutine for %s running", channel_name.c_str()); for (;;) { int32_t slot_id; absl::StatusOr n = - retirement_fd.Read(&slot_id, sizeof(slot_id), c); + retirement_fd.Read(&slot_id, sizeof(slot_id), co::self); if (!n.ok()) { // Failed to read the slot ID, we're done. return; @@ -1407,7 +1498,7 @@ void Server::RetirementCoroutine( return; } absl::StatusOr nsent = retirement_transmitter->SendMessage( - buffer + sizeof(int32_t), msg.ByteSizeLong(), c); + buffer + sizeof(int32_t), msg.ByteSizeLong(), co::self); if (!nsent.ok()) { logger_.Log(toolbelt::LogLevel::kError, "Failed to send retirement fd for %s: %s", @@ -1419,12 +1510,12 @@ void Server::RetirementCoroutine( void Server::SubscribeOverBridge(ServerChannel *channel, bool reliable, toolbelt::InetAddress publisher) { - coroutines_.insert(std::make_unique( - co_scheduler_, - [this, publisher, channel, reliable](co::Coroutine *c) { - BridgeReceiverCoroutine(channel->Name(), reliable, publisher, c); + scheduler_.Spawn( + [this, channel, reliable, publisher]() { + BridgeReceiverCoroutine(channel->Name(), reliable, publisher); }, - absl::StrFormat("Bridge receiver for %s", channel->Name()).c_str())); + {.name = absl::StrFormat("Bridge receiver for %s", channel->Name()), + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); } void Server::IncomingQuery(const Discovery::Query &query, @@ -1458,8 +1549,9 @@ void Server::IncomingAdvertise(const Discovery::Advertise &advertise, } channel->second->AddBridgedAddress(sender, advertise.reliable()); - int num_pubs, num_subs; - channel->second->CountUsers(num_pubs, num_subs); + int num_pubs, num_subs, num_bridge_pubs, num_bridge_subs; + channel->second->CountUsers(num_pubs, num_subs, num_bridge_pubs, + num_bridge_subs); if (num_subs > 0) { SubscribeOverBridge(channel->second.get(), advertise.reliable(), sender); } @@ -1496,6 +1588,8 @@ void Server::IncomingSubscribe(const Discovery::Subscribe &subscribe, in_addr subscriber_ip; memcpy(&subscriber_ip, subscribe.receiver().address().data(), sizeof(subscriber_ip)); + // Need this in host byte order. + subscriber_ip.s_addr = ntohl(subscriber_ip.s_addr); subscriber_addr = toolbelt::SocketAddress(subscriber_ip, subscribe.receiver().port()); break; @@ -1514,26 +1608,27 @@ void Server::IncomingSubscribe(const Discovery::Subscribe &subscribe, } bool notify_retirement = !channel->second->GetRetirementFds().empty(); - coroutines_.insert(std::make_unique( - co_scheduler_, - [this, pub_reliable, sub_reliable, - subscriber_addr = std::move(subscriber_addr), notify_retirement, - ch](co::Coroutine *c) mutable { + scheduler_.Spawn( + [this, ch, pub_reliable, sub_reliable, + subscriber_addr = std::move(subscriber_addr), notify_retirement]() { BridgeTransmitterCoroutine(ch, pub_reliable, sub_reliable, std::move(subscriber_addr), - notify_retirement, c); + notify_retirement); }, - absl::StrFormat("Bridge transmitter for %s", channel->first).c_str())); + {.name = absl::StrFormat("Bridge transmitter for %s", channel->first) + .c_str(), + .interrupt_fd = shutdown_trigger_fd_.GetPollFd().Fd()}); + } else { logger_.Log(toolbelt::LogLevel::kDebug, "I don't publish channel %s", subscribe.channel_name().c_str()); } } -void Server::GratuitousAdvertiseCoroutine(co::Coroutine *c) { +void Server::GratuitousAdvertiseCoroutine() { constexpr int kPeriodSecs = 5; for (;;) { - c->Sleep(kPeriodSecs); + co::Sleep(kPeriodSecs); for (auto &channel : channels_) { if (!channel.second->IsLocal() && !channel.second->IsBridgePublisher()) { SendAdvertise(channel.first, channel.second->IsReliable()); @@ -1542,4 +1637,104 @@ void Server::GratuitousAdvertiseCoroutine(co::Coroutine *c) { } } +absl::Status Server::LoadPlugin(const std::string &name, + const std::string &path) { + std::lock_guard lock(plugin_lock_); + + void *handle = nullptr; + if (path != "BUILTIN") { + handle = dlopen(path.c_str(), RTLD_LAZY); + if (handle == nullptr) { + return absl::InternalError( + absl::StrFormat("Can't open plugin file %s: %s", path, dlerror())); + } + } + // Form the name of the init function and find it in the shared object. + std::string interfaceFunc = absl::StrFormat("%s_Create", name); + void *func = dlsym(handle, interfaceFunc.c_str()); + if (func == nullptr) { + return absl::InternalError( + absl::StrFormat("Can't find plugin initialization symbol %s: %s", + interfaceFunc, dlerror())); + } + // Call the init function to get the interface. + using InitFunc = PluginInterface *(*)(); + InitFunc init = reinterpret_cast(func); + auto interface = init(); + + // Call the OnStartup function in the loaded plugin. + absl::Status status = interface->OnStartup(*this, name); + if (!status.ok()) { + return status; + } + plugins_.push_back(std::make_unique( + name, handle, std::unique_ptr(interface))); + return absl::OkStatus(); +} + +absl::Status Server::UnloadPlugin(const std::string &name) { + std::lock_guard lock(plugin_lock_); + auto it = std::find_if( + plugins_.begin(), plugins_.end(), + [&name](const std::unique_ptr &p) { return p->name == name; }); + if (it == plugins_.end()) { + return absl::NotFoundError( + absl::StrFormat("No such plugin %s loaded", name.c_str())); + } + plugins_.erase(it); + return absl::OkStatus(); +} + +void Server::OnReady() { + std::lock_guard lock(plugin_lock_); + for (const auto &plugin : plugins_) { + plugin->interface->OnReady(*this); + } +} + +void Server::OnNewChannel(const std::string &channel_name) { + std::lock_guard lock(plugin_lock_); + for (const auto &plugin : plugins_) { + plugin->interface->OnNewChannel(*this, channel_name); + } +} + +void Server::OnRemoveChannel(const std::string &channel_name) { + std::lock_guard lock(plugin_lock_); + for (const auto &plugin : plugins_) { + plugin->interface->OnRemoveChannel(*this, channel_name); + } +} + +void Server::OnNewPublisher(const std::string &channel_name, int publisher_id) { + std::lock_guard lock(plugin_lock_); + for (const auto &plugin : plugins_) { + plugin->interface->OnNewPublisher(*this, channel_name, publisher_id); + } +} + +void Server::OnRemovePublisher(const std::string &channel_name, + int publisher_id) { + std::lock_guard lock(plugin_lock_); + for (const auto &plugin : plugins_) { + plugin->interface->OnRemovePublisher(*this, channel_name, publisher_id); + } +} + +void Server::OnNewSubscriber(const std::string &channel_name, + int subscriber_id) { + std::lock_guard lock(plugin_lock_); + for (const auto &plugin : plugins_) { + plugin->interface->OnNewSubscriber(*this, channel_name, subscriber_id); + } +} + +void Server::OnRemoveSubscriber(const std::string &channel_name, + int subscriber_id) { + std::lock_guard lock(plugin_lock_); + for (const auto &plugin : plugins_) { + plugin->interface->OnRemoveSubscriber(*this, channel_name, subscriber_id); + } +} + } // namespace subspace diff --git a/server/server.h b/server/server.h index a6e4a9d..cff0987 100644 --- a/server/server.h +++ b/server/server.h @@ -1,9 +1,9 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. -#ifndef __SERVER_SERVER_H -#define __SERVER_SERVER_H +#ifndef _xSERVERSERVER_H +#define _xSERVERSERVER_H #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" @@ -12,13 +12,17 @@ #include "client/message.h" #include "client_handler.h" #include "co/coroutine.h" +#include "plugin.h" #include "proto/subspace.pb.h" +#include "server/plugin.h" #include "server/server_channel.h" #include "toolbelt/bitset.h" #include "toolbelt/clock.h" #include "toolbelt/fd.h" #include "toolbelt/logging.h" +#include "toolbelt/triggerfd.h" #include +#include #include namespace subspace { @@ -27,6 +31,12 @@ namespace subspace { // is stopped. constexpr int64_t kServerReady = 1; constexpr int64_t kServerStopped = 2; +constexpr int64_t kServerWaiting = 3; + +// In multithreaded tests we can't dlclose the plugins because the dynamic linker doesn't +// play well with threads. +void ClosePluginsOnShutdown(); +bool ShouldClosePluginsOnShutdown(); // The Subspace server. // This is a single-threaded, coroutine-based server that maintains shared @@ -41,17 +51,28 @@ class Server { // The values are written in host byte order. Server(co::CoroutineScheduler &scheduler, const std::string &socket_name, const std::string &interface, int disc_port, int peer_port, bool local, - int notify_fd = -1); + int notify_fd = -1, int initial_ordinal = 1, + bool wait_for_clients = false, bool publish_server_channels = true); // This constructor can be used when you have a single peer server to talk to. Server(co::CoroutineScheduler &scheduler, const std::string &socket_name, const std::string &interface, const toolbelt::InetAddress &peer, - int disc_port, int peer_port, bool local, int notify_fd = -1); - ~Server(); + int disc_port, int peer_port, bool local, int notify_fd = -1, + int initial_ordinal = 1, bool wait_for_clients = false, bool publish_server_channels = true); + + virtual ~Server(); void SetLogLevel(const std::string &level) { logger_.SetLogLevel(level); } + toolbelt::LogLevel GetLogLevel() const { return logger_.GetLogLevel(); } + absl::Status Run(); - void Stop(); + void Stop(bool force = false); + + // The machine name can be used to distinguish between multiple servers + // running on the same computer. + void SetMachineName(std::string name) { machine_name_ = std::move(name); } + const std::string& MachineName() const { return machine_name_; } uint64_t GetVirtualMemoryUsage() const; + const std::string& GetSocketName() const { return socket_name_; } uint64_t GetSessionId() const { return session_id_; } @@ -60,14 +81,25 @@ class Server { void CleanupFilesystem(); void CleanupAfterSession(); -private: - friend class ClientHandler; - friend class ServerChannel; - friend class VirtualChannel; - static constexpr size_t kDiscoveryBufferSize = 1024; + absl::Status LoadPlugin(const std::string &name, const std::string &path); + absl::Status UnloadPlugin(const std::string &name); + + virtual co::CoroutineScheduler &GetScheduler() { return scheduler_; } + + absl::flat_hash_map> & + GetChannels() { + return channels_; + } + + int GetShutdownTriggerFd() { + return shutdown_trigger_fd_.GetPollFd().Fd(); + } - absl::Status HandleIncomingConnection(toolbelt::UnixSocket &listen_socket, - co::Coroutine *c); + bool ShuttingDown() const { return shutting_down_; } + + size_t GetNumChannels() const { return channels_.size(); } + + absl::Status HandleIncomingConnection(toolbelt::UnixSocket &listen_socket); // Create a channel in both process and shared memory. For a placeholder // subscriber, the channel parameters are not known, so slot_size and @@ -83,32 +115,55 @@ class Server { int num_slots); ServerChannel *FindChannel(const std::string &channel_name); void RemoveChannel(ServerChannel *channel); + +private: + friend class ClientHandler; + friend class ServerChannel; + friend class VirtualChannel; + static constexpr size_t kDiscoveryBufferSize = 1024; + + struct Plugin { + Plugin(const std::string &n, void *h, std::unique_ptr i) + : name(n), handle(h), interface(std::move(i)) {} + ~Plugin() { + if (handle) { + if (ShouldClosePluginsOnShutdown()) { + dlclose(handle); + } + } + } + std::string name; + void *handle = nullptr; + std::unique_ptr interface; + }; + + void ForeachChannel(std::function func); + void RemoveAllUsersFor(ClientHandler *handler); void CloseHandler(ClientHandler *handler); - void ListenerCoroutine(toolbelt::UnixSocket &listen_socket, co::Coroutine *c); - void ChannelDirectoryCoroutine(co::Coroutine *c); + void NotifyViaFd(int64_t val); + void CreateShutdownTrigger(); + void ListenerCoroutine(toolbelt::UnixSocket &listen_socket); + void ChannelDirectoryCoroutine(); void SendChannelDirectory(); - void StatisticsCoroutine(co::Coroutine *c); - void DiscoveryReceiverCoroutine(co::Coroutine *c); - void PublisherCoroutine(co::Coroutine *c); + void StatisticsCoroutine(); + void DiscoveryReceiverCoroutine(); + void PublisherCoroutine(); void SendQuery(const std::string &channel_name); void SendAdvertise(const std::string &channel_name, bool reliable); void BridgeTransmitterCoroutine(ServerChannel *channel, bool pub_reliable, bool sub_reliable, toolbelt::SocketAddress subscriber, - bool notify_retirement, co::Coroutine *c); + bool notify_retirement); void BridgeReceiverCoroutine(std::string channel_name, bool sub_reliable, - toolbelt::InetAddress publisher, - co::Coroutine *c); + toolbelt::InetAddress publisher); void RetirementCoroutine( const std::string &channel_name, toolbelt::FileDescriptor &&retirement_fd, - std::unique_ptr retirement_transmitter, - co::Coroutine *c); + std::unique_ptr retirement_transmitter); void RetirementReceiverCoroutine( toolbelt::StreamSocket &retirement_listener, - std::vector> &active_retirement_msgs, - co::Coroutine *c); + std::shared_ptr>> active_retirement_msgs); void SubscribeOverBridge(ServerChannel *channel, bool reliable, toolbelt::InetAddress publisher); @@ -118,15 +173,24 @@ class Server { const toolbelt::InetAddress &sender); void IncomingSubscribe(const Discovery::Subscribe &subscribe, const toolbelt::InetAddress &sender); - void GratuitousAdvertiseCoroutine(co::Coroutine *c); + void GratuitousAdvertiseCoroutine(); absl::Status SendSubscribeMessage(const std::string &channel_name, bool reliable, toolbelt::InetAddress publisher, toolbelt::StreamSocket &receiver_listener, - char *buffer, size_t buffer_size, - co::Coroutine *c); + char *buffer, size_t buffer_size); static uint64_t AllocateSessionId() { return toolbelt::Now(); } + + // Plugin callers. + void OnReady(); + void OnNewChannel(const std::string &channel_name); + void OnRemoveChannel(const std::string &channel_name); + void OnNewPublisher(const std::string &channel_name, int publisher_id); + void OnRemovePublisher(const std::string &channel_name, int publisher_id); + void OnNewSubscriber(const std::string &channel_name, int subscriber_id); + void OnRemoveSubscriber(const std::string &channel_name, int subscriber_id); + std::string socket_name_; uint64_t session_id_; std::vector> client_handlers_; @@ -141,15 +205,15 @@ class Server { bool local_; toolbelt::FileDescriptor notify_fd_; + // Atomic only because of testing. + std::atomic shutting_down_ = false; + absl::flat_hash_map> channels_; SystemControlBlock *scb_; toolbelt::FileDescriptor scb_fd_; toolbelt::BitSet channel_ids_; - co::CoroutineScheduler &co_scheduler_; - - // All coroutines are owned by this set. - absl::flat_hash_set> coroutines_; + co::CoroutineScheduler &scheduler_; toolbelt::TriggerFd channel_directory_trigger_fd_; toolbelt::InetAddress discovery_addr_; @@ -161,8 +225,21 @@ class Server { // new connection. The server will send an encoded protobuf Subscribed // message through this pipe if it is set up. toolbelt::Pipe bridge_notification_pipe_; + + int initial_ordinal_ = 1; + + bool wait_for_clients_ = false; + + // In tests we will load a plugin while the server is running. This needs a + // lock. + std::mutex plugin_lock_; + + std::vector> plugins_; + toolbelt::TriggerFd shutdown_trigger_fd_; + std::string machine_name_; + bool publish_server_channels_ = true; }; } // namespace subspace -#endif // __SERVER_SERVER_H +#endif // _xSERVERSERVER_H diff --git a/server/server_channel.cc b/server/server_channel.cc index f0320b0..0410ad7 100644 --- a/server/server_channel.cc +++ b/server/server_channel.cc @@ -1,4 +1,4 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. @@ -6,7 +6,7 @@ #include "absl/strings/str_format.h" #include "server/server.h" #include -#if defined(__APPLE__) +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX #include #endif @@ -22,7 +22,8 @@ ServerChannel::~ServerChannel() { static absl::StatusOr CreateSharedMemory(int id, const char *suffix, int64_t size, bool map, - toolbelt::FileDescriptor &fd, int session_id = 0) { + toolbelt::FileDescriptor &fd, + int session_id = 0) { char shm_file[NAME_MAX]; // Unique file in file system. char *shm_name; // Name passed to shm_* (starts with /) int tmpfd; @@ -34,7 +35,8 @@ static absl::StatusOr CreateSharedMemory(int id, const char *suffix, #else // On other systems (BSD, MacOS, etc), we need to use a file in /tmp. // This is just used to ensure uniqueness. - snprintf(shm_file, sizeof(shm_file), "/tmp/%d.%d.%s.XXXXXX", session_id, id, suffix); + snprintf(shm_file, sizeof(shm_file), "/tmp/%d.%d.%s.XXXXXX", session_id, id, + suffix); tmpfd = mkstemp(shm_file); shm_name = shm_file + 4; // After /tmp #endif @@ -90,7 +92,7 @@ CreateSystemControlBlock(toolbelt::FileDescriptor &fd) { absl::StatusOr ServerChannel::Allocate(const toolbelt::FileDescriptor &scb_fd, int slot_size, - int num_slots) { + int num_slots, int initial_ordinal) { SubscriberCounter num_subs; if (scb_ != nullptr) { @@ -121,8 +123,9 @@ ServerChannel::Allocate(const toolbelt::FileDescriptor &scb_fd, int slot_size, SharedMemoryFds fds; // Create CCB in shared memory and map into process memory. - absl::StatusOr p = CreateSharedMemory( - channel_id_, "ccb", CcbSize(num_slots_), /*map=*/true, fds.ccb, session_id_); + absl::StatusOr p = + CreateSharedMemory(channel_id_, "ccb", CcbSize(num_slots_), /*map=*/true, + fds.ccb, session_id_); if (!p.ok()) { UnmapMemory(scb_, sizeof(SystemControlBlock), "SCB"); return p.status(); @@ -147,8 +150,8 @@ ServerChannel::Allocate(const toolbelt::FileDescriptor &scb_fd, int slot_size, strncpy(ccb_->channel_name, name_.c_str(), kMaxChannelName - 1); ccb_->num_slots = num_slots_; - // Initialize all ordinals to 1. - ccb_->ordinals.Init(); + // Initialize all ordinals. + ccb_->ordinals.Init(initial_ordinal); new (&ccb_->subscribers) AtomicBitSet(); @@ -164,9 +167,12 @@ ServerChannel::Allocate(const toolbelt::FileDescriptor &scb_fd, int slot_size, // Initialize the available slots for each subscriber. if (num_slots_ > 0) { - // All slots are initially retired. + // No retired slots initially. new (RetiredSlotsAddr()) InPlaceAtomicBitset(num_slots_); - RetiredSlots().SetAll(); + + // All slots are initially free. + new (FreeSlotsAddr()) InPlaceAtomicBitset(num_slots_); + FreeSlots().SetAll(); for (int i = 0; i < kMaxSlotOwners; i++) { new (GetAvailableSlotsAddress(i)) InPlaceAtomicBitset(num_slots_); @@ -183,7 +189,7 @@ ServerChannel::Allocate(const toolbelt::FileDescriptor &scb_fd, int slot_size, std::vector ServerChannel::GetSubscriberTriggerFds() const { std::vector r; - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -197,7 +203,7 @@ ServerChannel::GetSubscriberTriggerFds() const { std::vector ServerChannel::GetReliablePublisherTriggerFds() const { std::vector r; - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -210,7 +216,7 @@ ServerChannel::GetReliablePublisherTriggerFds() const { std::vector ServerChannel::GetRetirementFds() const { std::vector r; - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -226,17 +232,6 @@ std::vector ServerChannel::GetRetirementFds() const { } return r; } - -uint64_t ServerChannel::GetVirtualMemoryUsage() const { - uint64_t size = CcbSize(num_slots_); - for (int i = 0; i < ccb_->num_buffers; i++) { - if (bcb_->refs[i] > 0) { - size += bcb_->sizes[i]; - } - } - return size; -} - // User ids are allocated from the multiplexer as all virtual channels // on the mux share the same CCB. absl::StatusOr ServerChannel::AllocateUserId(const char *type) { @@ -258,6 +253,7 @@ ServerChannel::AddPublisher(ClientHandler *handler, bool is_reliable, } PublisherUser *result = pub.get(); AddUser(*user_id, std::move(pub)); + return result; } @@ -280,7 +276,7 @@ ServerChannel::AddSubscriber(ClientHandler *handler, bool is_reliable, } void ServerChannel::TriggerAllSubscribers() { - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -305,6 +301,11 @@ void ServerChannel::RemoveUser(Server *server, int user_id) { users_.erase(it); return; } + if (user->IsPublisher()) { + server->OnRemovePublisher(Name(), user->GetId()); + } else { + server->OnRemoveSubscriber(Name(), user->GetId()); + } CleanupSlots(user->GetId(), user->IsReliable(), user->IsPublisher(), GetVirtualChannelId()); RemoveUserId(user->GetId()); @@ -320,7 +321,7 @@ void ServerChannel::RemoveUser(Server *server, int user_id) { } void ServerChannel::RemoveAllUsersFor(ClientHandler *handler) { - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -337,31 +338,23 @@ void ServerChannel::RemoveAllUsersFor(ClientHandler *handler) { } } -void ServerChannel::CountUsers(int &num_pubs, int &num_subs) const { - num_pubs = num_subs = 0; - for (auto & [ id, user ] : users_) { +void ServerChannel::CountUsers(int &num_pubs, int &num_subs, + int &num_bridge_pubs, + int &num_bridge_subs) const { + num_pubs = num_subs = num_bridge_pubs = num_bridge_subs = 0; + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } if (user->IsPublisher()) { num_pubs++; + if (user->IsBridge()) { + num_bridge_pubs++; + } } else { num_subs++; - } - } -} - -void ServerChannel::CountBridgeUsers(int &num_pubs, int &num_subs) const { - num_pubs = num_subs = 0; - for (auto & [ id, user ] : users_) { - if (user == nullptr) { - continue; - } - if (user->IsBridge()) { - if (user->IsPublisher()) { - num_pubs++; - } else { - num_subs++; + if (user->IsBridge()) { + num_bridge_subs++; } } } @@ -369,7 +362,7 @@ void ServerChannel::CountBridgeUsers(int &num_pubs, int &num_subs) const { // Channel is public if there are any public publishers. bool ServerChannel::IsLocal() const { - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -385,7 +378,7 @@ bool ServerChannel::IsLocal() const { // Channel is reliable if there are any reliable publishers. bool ServerChannel::IsReliable() const { - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -402,7 +395,7 @@ bool ServerChannel::IsReliable() const { // Channel is fixed_size if there are any fixed size publishers. If one is // fixed size, they all must be. bool ServerChannel::IsFixedSize() const { - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -419,7 +412,7 @@ bool ServerChannel::IsFixedSize() const { bool ServerChannel::IsBridgePublisher() const { int num_pubs = 0; int num_bridge_pubs = 0; - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -437,7 +430,7 @@ bool ServerChannel::IsBridgePublisher() const { bool ServerChannel::IsBridgeSubscriber() const { int num_subs = 0; int num_bridge_subs = 0; - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -453,17 +446,17 @@ bool ServerChannel::IsBridgeSubscriber() const { } ServerChannel::CapacityInfo ServerChannel::HasSufficientCapacityInternal( - int initial_value, int new_max_active_messages) const { + int new_max_active_messages) const { if (NumSlots() == 0) { - return CapacityInfo{true, 0, 0, 0}; + return CapacityInfo{true, 0, 0, 0, 0}; } // Count number of publishers and subscribers. - int num_pubs, num_subs; - CountUsers(num_pubs, num_subs); + int num_pubs, num_subs, num_bridge_pubs, num_bridge_subs; + CountUsers(num_pubs, num_subs, num_bridge_pubs, num_bridge_subs); // Add in the total active message maximums. int max_active_messages = new_max_active_messages; - for (auto & [ id, user ] : users_) { + for (auto &[id, user] : users_) { if (user == nullptr) { continue; } @@ -472,15 +465,14 @@ ServerChannel::CapacityInfo ServerChannel::HasSufficientCapacityInternal( max_active_messages += sub->MaxActiveMessages() - 1; } } - int slots_needed = - initial_value + num_pubs + num_subs + max_active_messages + 1; + int slots_needed = num_pubs + num_subs + max_active_messages + 1; return CapacityInfo{slots_needed <= NumSlots() - 1, num_pubs, num_subs, max_active_messages, slots_needed}; } absl::Status ServerChannel::HasSufficientCapacity(int new_max_active_messages) const { - auto info = HasSufficientCapacityInternal(0, new_max_active_messages); + auto info = HasSufficientCapacityInternal(new_max_active_messages); if (info.capacity_ok) { return absl::OkStatus(); } @@ -497,16 +489,18 @@ absl::Status ServerChannel::CapacityError(const CapacityInfo &info) const { (info.max_active_messages == 1 ? "" : "s"), info.slots_needed + 1)); } -void ServerChannel::GetChannelInfo(subspace::ChannelInfo *info) { +void ServerChannel::GetChannelInfo(subspace::ChannelInfoProto *info) { info->set_name(Name()); info->set_slot_size(SlotSize()); info->set_num_slots(NumSlots()); info->set_type(Type()); - int num_pubs, num_subs; - CountUsers(num_pubs, num_subs); + int num_pubs, num_subs, num_bridge_pubs, num_bridge_subs; + CountUsers(num_pubs, num_subs, num_bridge_pubs, num_bridge_subs); info->set_num_pubs(num_pubs); info->set_num_subs(num_subs); + info->set_num_bridge_pubs(num_bridge_pubs); + info->set_num_bridge_subs(num_bridge_subs); info->set_is_reliable(IsReliable()); if (IsVirtual()) { @@ -515,13 +509,34 @@ void ServerChannel::GetChannelInfo(subspace::ChannelInfo *info) { info->set_vchan_id(GetVirtualChannelId()); info->set_mux(vchan->GetMux()->Name()); } +} + +std::vector ServerChannel::GetResizeInfo() const { + std::vector info; + // Derive the resize information from the bcb contents. Since the server + // isn't aware of resize operations we have to derive the information. + if (bcb_ == nullptr || ccb_ == nullptr) { + // No buffers, no resizes. + return info; + } + uint64_t previous_buffer_size = 0; + for (int i = 0; i < ccb_->num_buffers; i++) { + if (previous_buffer_size == 0) { + previous_buffer_size = bcb_->sizes[i]; + continue; + } + ResizeInfo resize_info; - CountBridgeUsers(num_pubs, num_subs); - info->set_num_bridge_pubs(num_pubs); - info->set_num_bridge_subs(num_subs); + resize_info.new_slot_size = BufferSizeToSlotSize(bcb_->sizes[i]); + resize_info.old_slot_size = BufferSizeToSlotSize(previous_buffer_size); + previous_buffer_size = bcb_->sizes[i]; + info.push_back(resize_info); + } + scb_->counters[GetChannelId()].num_resizes = ccb_->num_buffers - 1; + return info; } -void ServerChannel::GetChannelStats(subspace::ChannelStats *stats) { +void ServerChannel::GetChannelStats(subspace::ChannelStatsProto *stats) { stats->set_channel_name(Name()); uint64_t total_bytes, total_messages; uint32_t max_message_size, total_drops; @@ -533,10 +548,12 @@ void ServerChannel::GetChannelStats(subspace::ChannelStats *stats) { stats->set_max_message_size(max_message_size); stats->set_total_drops(total_drops); - int num_pubs, num_subs; - CountUsers(num_pubs, num_subs); + int num_pubs, num_subs, num_bridge_pubs, num_bridge_subs; + CountUsers(num_pubs, num_subs, num_bridge_pubs, num_bridge_subs); stats->set_num_pubs(num_pubs); stats->set_num_subs(num_subs); + stats->set_num_bridge_pubs(num_bridge_pubs); + stats->set_num_bridge_subs(num_bridge_subs); } ChannelCounters &ServerChannel::RecordUpdate(bool is_pub, bool add, @@ -581,7 +598,7 @@ ChannelMultiplexer::CreateVirtualChannel(Server &server, "Virtual channel id %d is beyond max virtual channels (%d)", vchan_id, kMaxVchanId)); } - auto v = std::make_unique(server, this, vchan_id, name, + auto v = std::make_unique(this, vchan_id, name, SlotSize(), Type(), session_id_); virtual_channels_.insert(v.get()); vchan_ids_.insert(vchan_id); @@ -592,32 +609,29 @@ void ChannelMultiplexer::RemoveVirtualChannel(VirtualChannel *vchan) { virtual_channels_.erase(vchan); } -absl::Status -ChannelMultiplexer::HasSufficientCapacity(int new_max_active_messages) const { - // Check the real pubs and subs on the multiplexer. - auto info = HasSufficientCapacityInternal(0, new_max_active_messages); - if (!info.capacity_ok) { - return CapacityError(info); - } +void ChannelMultiplexer::CountUsers(int &num_pubs, int &num_subs, + int &num_bridge_pubs, + int &num_bridge_subs) const { + int total_pubs = 0; + int total_subs = 0; + int total_bridge_pubs = 0; + int total_bridge_subs = 0; - // Check the virtual channels. We keep track of the current number of slots - // needed and this is incremented each time we process a virtual channel. - int slots_needed = info.slots_needed; for (auto vchan : virtual_channels_) { - auto vinfo = vchan->HasSufficientCapacityInternal(slots_needed, - new_max_active_messages); - if (!vinfo.capacity_ok) { - return CapacityError(vinfo); - } - slots_needed = vinfo.slots_needed; - } - return absl::OkStatus(); -} - -VirtualChannel::~VirtualChannel() { - mux_->RemoveVirtualChannel(this); - if (mux_->IsEmpty()) { - server_.RemoveChannel(mux_); - } + int vchan_pubs, vchan_subs, vchan_bridge_pubs, vchan_bridge_subs; + vchan->GetUserCount(vchan_pubs, vchan_subs, vchan_bridge_pubs, + vchan_bridge_subs); + total_pubs += vchan_pubs; + total_subs += vchan_subs; + total_bridge_pubs += vchan_bridge_pubs; + total_bridge_subs += vchan_bridge_subs; + } + // Add the counts from the multiplexer itself. + ServerChannel::CountUsers(num_pubs, num_subs, num_bridge_pubs, + num_bridge_subs); + num_pubs += total_pubs; + num_subs += total_subs; + num_bridge_pubs += total_bridge_pubs; + num_bridge_subs += total_bridge_subs; } } // namespace subspace \ No newline at end of file diff --git a/server/server_channel.h b/server/server_channel.h index 2d42192..66df69f 100644 --- a/server/server_channel.h +++ b/server/server_channel.h @@ -1,9 +1,9 @@ -// Copyright 2025 David Allison +// Copyright 2023-2026 David Allison // All Rights Reserved // See LICENSE file for licensing information. -#ifndef __SERVER_SERVER_CHANNEL_H -#define __SERVER_SERVER_CHANNEL_H +#ifndef _xSERVERSERVER_CHANNEL_H +#define _xSERVERSERVER_CHANNEL_H #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" @@ -28,6 +28,11 @@ class Server; absl::StatusOr CreateSystemControlBlock(toolbelt::FileDescriptor &fd); +struct ResizeInfo { + int old_slot_size; + int new_slot_size; +}; + // A user is a publisher or subscriber on a channel. Each user has a // unique (per channel) user id. A user might have a trigger fd // associated with it (subscribers always have one, but only @@ -148,8 +153,8 @@ class ServerChannel : public Channel { public: ServerChannel(int id, const std::string &name, int num_slots, std::string type, bool is_virtual, int session_id) - : Channel(name, num_slots, id, std::move(type)), is_virtual_(is_virtual), session_id_(session_id) { - } + : Channel(name, num_slots, id, std::move(type)), is_virtual_(is_virtual), + session_id_(session_id) {} virtual ~ServerChannel(); @@ -217,12 +222,14 @@ class ServerChannel : public Channel { void RemoveAllUsersFor(ClientHandler *handler); virtual bool IsEmpty() const { return user_ids_.IsEmpty(); } virtual absl::Status HasSufficientCapacity(int new_max_active_messages) const; - void CountUsers(int &num_pubs, int &num_subs) const; - void CountBridgeUsers(int &num_pubs, int &num_subs) const; - virtual void GetChannelInfo(subspace::ChannelInfo *info); - virtual void GetChannelStats(subspace::ChannelStats *stats); + virtual void CountUsers(int &num_pubs, int &num_subs, int &num_bridge_pubs, + int &num_bridge_subs) const; + virtual void GetChannelInfo(subspace::ChannelInfoProto *info); + virtual void GetChannelStats(subspace::ChannelStatsProto *stats); void TriggerAllSubscribers(); + std::vector GetResizeInfo() const; + virtual int SlotSize() const { if (ccb_->num_buffers == 0) { return last_known_slot_size_; @@ -244,8 +251,8 @@ class ServerChannel : public Channel { virtual void RemoveBuffer(uint64_t session_id) { for (int i = 0; i < ccb_->num_buffers; i++) { std::string filename = BufferSharedMemoryName(session_id, i); -#if defined(__APPLE__) - auto shm_name = MacOsSharedMemoryName(filename); +#if SUBSPACE_SHMEM_MODE == SUBSPACE_SHMEM_MODE_POSIX + auto shm_name = PosixSharedMemoryName(filename); if (shm_name.ok()) { (void)shm_unlink(shm_name->c_str()); } @@ -269,7 +276,8 @@ class ServerChannel : public Channel { bridged_publishers_.emplace(addr, reliable); } - void RemoveBridgedAddress(const toolbelt::SocketAddress &addr, bool reliable) { + void RemoveBridgedAddress(const toolbelt::SocketAddress &addr, + bool reliable) { bridged_publishers_.erase(ChannelTransmitter(addr, reliable)); } @@ -282,7 +290,7 @@ class ServerChannel : public Channel { } virtual const SharedMemoryFds &GetFds() { return shared_memory_fds_; } - virtual uint64_t GetVirtualMemoryUsage() const; + uint64_t GetVirtualMemoryUsage() const override { return Channel::GetVirtualMemoryUsage(); } // Allocate the shared memory for a channel. The num_slots_ // and slot_size_ member variables will either be 0 (for a subscriber @@ -292,8 +300,8 @@ class ServerChannel : public Channel { // SCB has already been allocated and will be mapped in for // this channel. This is only used in the server. virtual absl::StatusOr - Allocate(const toolbelt::FileDescriptor &scb_fd, int slot_size, - int num_slots); + Allocate(const toolbelt::FileDescriptor &scb_fd, int slot_size, int num_slots, + int initial_ordinal); struct CapacityInfo { bool capacity_ok; @@ -303,10 +311,16 @@ class ServerChannel : public Channel { int slots_needed; }; - CapacityInfo HasSufficientCapacityInternal(int initial_value, - int new_max_active_messages) const; + CapacityInfo HasSufficientCapacityInternal(int new_max_active_messages) const; absl::Status CapacityError(const CapacityInfo &info) const; + virtual void GetStatsCounters(uint64_t &total_bytes, uint64_t &total_messages, + uint32_t &max_message_size, + uint32_t &total_drops) { + Channel::GetStatsCounters(total_bytes, total_messages, max_message_size, + total_drops); + } + protected: absl::flat_hash_map> users_; toolbelt::BitSet user_ids_; @@ -335,12 +349,6 @@ class ChannelMultiplexer : public ServerChannel { return virtual_channels_.empty() && ServerChannel::IsEmpty(); } - // Check the capacity of the mux. There needs to be sufficient slots cover: - // 1. One for each publisher, both real and virtual - // 2. For each subscriber, the maximum number of active messages. - absl::Status - HasSufficientCapacity(int new_max_active_messages) const override; - void RemoveBuffer(uint64_t session_id) override { if (!virtual_channels_.empty()) { return; @@ -348,6 +356,9 @@ class ChannelMultiplexer : public ServerChannel { ServerChannel::RemoveBuffer(session_id); } + void CountUsers(int &num_pubs, int &num_subs, int &num_bridge_pubs, + int &num_bridge_subs) const override; + private: int next_vchan_id_ = 0; absl::flat_hash_set virtual_channels_; @@ -362,12 +373,12 @@ class ChannelMultiplexer : public ServerChannel { // updates to the multiplexer SCB will be seen by all virtual channels. class VirtualChannel : public ServerChannel { public: - VirtualChannel(Server &server, ChannelMultiplexer *mux, int vchan_id, - const std::string &name, int num_slots, std::string type, int session_id) - : ServerChannel(mux->GetChannelId(), name, num_slots, type, true, session_id), - server_(server), mux_(mux), vchan_id_(vchan_id) {} - - ~VirtualChannel(); + VirtualChannel(ChannelMultiplexer *mux, int vchan_id, + const std::string &name, int num_slots, std::string type, + int session_id) + : ServerChannel(mux->GetChannelId(), name, num_slots, type, true, + session_id), + mux_(mux), vchan_id_(vchan_id) {} std::string Type() const override { return mux_->Type(); } void SetType(const std::string &type) override { mux_->SetType(type); } @@ -385,6 +396,10 @@ class VirtualChannel : public ServerChannel { user_ids_.Clear(id); } + void CountUsers(int &num_pubs, int &num_subs, int &num_bridge_pubs, + int &num_bridge_subs) const override { + mux_->CountUsers(num_pubs, num_subs, num_bridge_pubs, num_bridge_subs); + } ChannelMultiplexer *GetMux() const { return mux_; } int GetVirtualChannelId() const override { return vchan_id_; } @@ -421,11 +436,22 @@ class VirtualChannel : public ServerChannel { uint64_t GetVirtualMemoryUsage() const override { return 0; } + void GetUserCount(int &num_pubs, int &num_subs, int &num_bridge_pubs, + int &num_bridge_subs) const { + ServerChannel::CountUsers(num_pubs, num_subs, num_bridge_pubs, + num_bridge_subs); + } + + void GetStatsCounters(uint64_t &total_bytes, uint64_t &total_messages, + uint32_t &max_message_size, uint32_t &total_drops) override { + mux_->GetStatsCounters(total_bytes, total_messages, max_message_size, + total_drops); + } + private: - Server &server_; ChannelMultiplexer *mux_; int vchan_id_; }; } // namespace subspace -#endif // __SERVER_SERVER_CHANNEL_H +#endif // _xSERVERSERVER_CHANNEL_H