diff --git a/CMakeLists.txt b/CMakeLists.txt index c50f3c9..321339d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,12 +28,12 @@ if (CMAKE_BUILD_TYPE MATCHES Release) endif() option(FSANITIZE "Turn on fsanitize" OFF) if (FSANITIZE) - list(APPEND BUILD_FLAGS + list(APPEND BUILD_FLAGS -fsanitize=address #ASAN -fsanitize=undefined #UBSAN - -fsanitize=float-divide-by-zero - -fsanitize=float-cast-overflow - -fno-sanitize-recover=all + -fsanitize=float-divide-by-zero + -fsanitize=float-cast-overflow + -fno-sanitize-recover=all -fno-sanitize=alignment ) message("fsanitize is turned on") @@ -59,6 +59,11 @@ if(SZD_PERF_PER_ZONE_COUNTERS) endif() endif() +# Sets up io_uring +find_package(PkgConfig REQUIRED) +pkg_search_module(IOURING REQUIRED IMPORTED_TARGET liburing) +list(TRANSFORM IOURING_LINK_LIBRARIES REPLACE "[.]so$" ".a") + # Sets up SPDK include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/FindSPDK.cmake") @@ -68,12 +73,18 @@ set(szd_core_include_files "${szd_core_include_dir}/szd_namespace.h" "${szd_core_include_dir}/szd_status_code.h" "${szd_core_include_dir}/szd.h" + "${szd_core_include_dir}/szd_spdk.h" + "${szd_core_include_dir}/szd_ioctl.h" + "${szd_core_include_dir}/szd_iouring.h" ) list(APPEND szd_all_files "${szd_core_include_files}") set(szd_core_src_dir "${CMAKE_CURRENT_SOURCE_DIR}/szd/core/src") set(szd_core_src_files "${szd_core_src_dir}/szd_status_code.c" "${szd_core_src_dir}/szd.c" + "${szd_core_src_dir}/szd_spdk.c" + "${szd_core_src_dir}/szd_ioctl.c" + "${szd_core_src_dir}/szd_iouring.c" ) list(APPEND szd_all_files "${szd_core_src_files}") @@ -110,6 +121,7 @@ list(APPEND szd_all_files "${szd_cpp_src_files}") function(SETUP_SZD_PROJECT_STRUCTURE project_name) set_target_properties(${project_name} PROPERTIES LINKER_LANGUAGE CXX) set_property(TARGET ${project_name} PROPERTY POSITION_INDEPENDENT_CODE ON) + target_link_libraries(${project_name} PRIVATE ${IOURING_LINK_LIBRARIES} nvme) target_compile_options( ${project_name} PRIVATE "${BUILD_FLAGS}" @@ -133,7 +145,7 @@ add_library(szd STATIC ) target_include_directories(szd PRIVATE "${SPDK_INCLUDE_DIRS}") target_link_libraries(szd PUBLIC ${SPDK_LIBRARY_DEPENDENCIES}) -target_include_directories(szd PUBLIC +target_include_directories(szd PUBLIC "$" "$/szd/core/${CMAKE_INSTALL_INCLUDEDIR}>" ) @@ -147,7 +159,7 @@ add_library(szd_extended STATIC ) target_include_directories(szd_extended PRIVATE "${SPDK_INCLUDE_DIRS}") target_link_libraries(szd_extended PUBLIC ${SPDK_LIBRARY_DEPENDENCIES}) -target_include_directories(szd_extended PUBLIC +target_include_directories(szd_extended PUBLIC "$" "$" "$/szd/core/${CMAKE_INSTALL_INCLUDEDIR}>" @@ -161,6 +173,34 @@ setup_szd_project_structure(szd_extended) # ) #setup_spdk_project_structure(szd_lib_shared) + +# Change install dir +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + # Change default installation prefix on Linux to /usr + set(CMAKE_INSTALL_PREFIX /usr CACHE PATH "Install path prefix, prepended onto install directories." FORCE) + endif() +endif() +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +# Install +install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/szd/core/include/szd" COMPONENT runtime DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/szd/cpp/include/szd" COMPONENT runtime DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +install( + TARGETS szd_extended + COMPONENT runtime + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) +install( + TARGETS szd + COMPONENT runtime + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) + + # Tooling.... set(tool_files) if (SZD_TOOLS) @@ -192,6 +232,14 @@ if (TESTING) list(APPEND szd_all_files "${szd_core_test_dir}/szd_full_path_test.c") target_link_libraries(szd_full_path_test PUBLIC szd) setup_szd_project_structure(szd_full_path_test) + + # add_executable(szd_full_path_test_writes + # "${szd_core_test_dir}/szd_full_path_test_writes.c" + # ) + # list(APPEND szd_all_files "${szd_core_test_dir}/szd_full_path_test_writes.c") + # target_link_libraries(szd_full_path_test_writes PUBLIC szd) + # setup_szd_project_structure(szd_full_path_test_writes) + # Setup GTests include(GoogleTest) include(FetchContent) @@ -255,7 +303,7 @@ if(DOXYGEN) set(DOCS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs) set(DOXYFILE_IN ${DOCS_DIR}/Doxyfile.in) set(DOXYFILE_OUT ${DOCS_DIR}/Doxyfile) - + # request to configure the file configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY) add_custom_target(docs diff --git a/szd/core/include/szd/szd.h b/szd/core/include/szd/szd.h index 00874c5..0a09f5b 100644 --- a/szd/core/include/szd/szd.h +++ b/szd/core/include/szd/szd.h @@ -1,36 +1,3 @@ -/*- - * BSD LICENSE - * - * Copyright (c) Intel Corporation. All rights reserved. - * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - /** \file * Main SZD interface. */ @@ -40,13 +7,10 @@ #include "szd/szd_namespace.h" #include "szd/szd_status_code.h" - #include - #ifdef SZD_USDT #include #endif - #ifdef __cplusplus extern "C" { #include @@ -56,32 +20,18 @@ extern "C" { #include #include #endif - -// "Forward declare" SPDK structs to prevent pollution. -typedef struct spdk_nvme_transport_id t_spdk_nvme_transport_id; -typedef struct spdk_nvme_ctrlr t_spdk_nvme_ctrlr; -typedef struct spdk_nvme_ctrlr_opts t_spdk_nvme_ctrlr_opts; -typedef struct spdk_nvme_ns t_spdk_nvme_ns; -typedef struct spdk_nvme_qpair t_spdk_nvme_qpair; -typedef struct spdk_nvme_cpl t_spdk_nvme_cpl; -typedef struct spdk_nvme_zns_ns_data t_spdk_nvme_zns_ns_data; -typedef struct spdk_nvme_ns_data t_spdk_nvme_ns_data; -typedef struct spdk_nvme_zns_ctrlr_data t_spdk_nvme_zns_ctrlr_data; -typedef struct spdk_nvme_ctrlr_data t_spdk_nvme_ctrlr_data; - #ifdef __cplusplus namespace SIMPLE_ZNS_DEVICE_NAMESPACE { #endif -#define MAX_TRADDR_LENGTH 0x100 -#define MAX_DEVICE_COUNT 0x100 - /** * @brief Options to pass to the ZNS device on initialisation. */ typedef struct { const char *name; /**< Name used by SPDK to identify application. */ const bool setup_spdk; /**< Set to false during reset. */ + const bool iouring_sqthread; /**< Set to false during reset. */ + const bool iouring_fixed; /**< Set to false during reset. */ } DeviceOptions; extern const DeviceOptions DeviceOptions_default; @@ -101,14 +51,16 @@ extern const DeviceOpenOptions DeviceOpenOptions_default; typedef struct { uint64_t lba_size; /**< Size of one block, also known as logical block address.*/ - uint64_t zone_size; /**< Size of one zone in lbas.*/ - uint64_t zone_cap; /**< Size of user availabe space in one zone. */ - uint64_t mdts; /**< Maximum data transfer size in bytes.*/ - uint64_t zasl; /**< Maximum size of one append command in bytes.*/ - uint64_t lba_cap; /**< Amount of lbas available on the device.*/ - uint64_t min_lba; /**< Minimum lba that is allowed to be written to.*/ - uint64_t max_lba; /**< Maximum lba that is allowed to be written to.*/ - const char *name; /**< Name used by SPDK to identify device.*/ + uint64_t zone_size; /**< Size of one zone in lbas.*/ + uint64_t zone_cap; /**< Size of user availabe space in one zone. */ + uint64_t min_page_size; /** minimum cap page size */ + uint64_t mdts; /**< Maximum data transfer size in bytes.*/ + uint64_t zasl; /**< Maximum size of one append command in bytes.*/ + uint64_t lba_cap; /**< Amount of lbas available on the device.*/ + uint64_t min_lba; /**< Minimum lba that is allowed to be written to.*/ + uint64_t max_lba; /**< Maximum lba that is allowed to be written to.*/ + const char *name; /**< Name used by SZD to identify device.*/ + uint32_t nsid; /**< nsid*/ } DeviceInfo; extern const DeviceInfo DeviceInfo_default; @@ -126,12 +78,8 @@ extern const DeviceManagerInternal DeviceManagerInternal_default; * The core structure in SimpleZnsDevice. */ typedef struct { - t_spdk_nvme_transport_id - *g_trid; /**< transport id used to communicate with SSD*/ - t_spdk_nvme_ctrlr *ctrlr; /**< Controller of the selected SSD*/ - t_spdk_nvme_ns *ns; /**< Selected namespace of the selected SSD*/ - DeviceInfo info; /**< Information of selected SSD*/ - void *private_; /**< To be used by SZD only */ + DeviceInfo info; /**< Information of selected SSD*/ + void *private_; /**< To be used by backend only */ } DeviceManager; /** @@ -139,8 +87,8 @@ typedef struct { * Can be used for writing and reading of data. */ typedef struct { - t_spdk_nvme_qpair *qpair; /**< internal I/O channel */ - DeviceManager *man; /**< Manager of the channel*/ + void *qpair; /**< internal I/O channel */ + DeviceManager *man; /**< Manager of the channel*/ } QPair; /** @@ -150,102 +98,136 @@ typedef struct { typedef struct { bool done; /**< Synchronous call is done.*/ uint16_t err; /**< return code after call is done.*/ + uint32_t id; /** @@ -46,33 +48,15 @@ #ifdef __cplusplus extern "C" { #endif - -typedef struct spdk_nvme_transport_id t_spdk_nvme_transport_id; -typedef struct spdk_nvme_ctrlr t_spdk_nvme_ctrlr; -typedef struct spdk_nvme_ctrlr_opts t_spdk_nvme_ctrlr_opts; -typedef struct spdk_nvme_ns t_spdk_nvme_ns; -typedef struct spdk_nvme_qpair t_spdk_nvme_qpair; -typedef struct spdk_nvme_cpl t_spdk_nvme_cpl; -typedef struct spdk_nvme_zns_ns_data t_spdk_nvme_zns_ns_data; -typedef struct spdk_nvme_ns_data t_spdk_nvme_ns_data; -typedef struct spdk_nvme_zns_ctrlr_data t_spdk_nvme_zns_ctrlr_data; -typedef struct spdk_nvme_ctrlr_data t_spdk_nvme_ctrlr_data; - #ifdef __cplusplus namespace SIMPLE_ZNS_DEVICE_NAMESPACE { #endif -const DeviceOptions DeviceOptions_default = {"znsdevice", true}; +const DeviceOptions DeviceOptions_default = {"znsdevice", true, true, false}; const DeviceOpenOptions DeviceOpenOptions_default = {0, 0}; -const Completion Completion_default = {false, SZD_SC_SUCCESS}; +const Completion Completion_default = {false, SZD_SC_SUCCESS, 0}; const DeviceManagerInternal DeviceManagerInternal_default = {0, 0}; -const DeviceInfo DeviceInfo_default = {0, 0, 0, 0, 0, 0, 0, 0, "SZD"}; - -// Needed because of DPDK and reattaching, we need to remember what we have -// seen... -static char *found_devices[MAX_DEVICE_COUNT]; -static size_t found_devices_len[MAX_DEVICE_COUNT]; -static size_t found_devices_number = 0; +const DeviceInfo DeviceInfo_default = {0, 0, 0, 0, 0, 0, 0, 0, 0, "SZD", 0}; #ifdef NDEBUG // When no debugging, we require that no functions will use invalid nulled @@ -90,124 +74,59 @@ static size_t found_devices_number = 0; } while (0) #endif -int szd_init(DeviceManager **manager, DeviceOptions *options) { +int __szd_register_backend(EngineManager *em, enum szd_io_backend backend) { + switch (backend) { + case SZD_IO_BACKEND_SPDK: + return szd_spdk_register_backend(em); + break; + case SZD_IO_BACKEND_IO_URING: + return szd_io_uring_register_backend(em); + break; + default: + break; + } + return -1; +} + +int szd_init(EngineManager **em, DeviceOptions *options, + enum szd_io_backend backend) { + int ret = 0; RETURN_ERR_ON_NULL(options); - RETURN_ERR_ON_NULL(manager); - *manager = (DeviceManager *)calloc(1, sizeof(DeviceManager)); - RETURN_ERR_ON_NULL(*manager); - // Setup options - struct spdk_env_opts opts; - if (options->setup_spdk) { - opts.name = options->name; - spdk_env_opts_init(&opts); - } - // Setup SPDK - (*manager)->g_trid = - (t_spdk_nvme_transport_id *)calloc(1, sizeof(t_spdk_nvme_transport_id)); - RETURN_ERR_ON_NULL((*manager)->g_trid); - spdk_nvme_trid_populate_transport((*manager)->g_trid, - SPDK_NVME_TRANSPORT_PCIE); - if (spdk_unlikely(spdk_env_init(!options->setup_spdk ? NULL : &opts) < 0)) { - free((*manager)->g_trid); - free(*manager); - return SZD_SC_SPDK_ERROR_INIT; - } - // setup stub info, we do not want to create extra UB. - (*manager)->info = DeviceInfo_default; - (*manager)->info.name = options->name; - (*manager)->ctrlr = NULL; - (*manager)->ns = NULL; - (*manager)->private_ = NULL; + RETURN_ERR_ON_NULL(em); + *em = (EngineManager *)calloc(1, sizeof(EngineManager)); + RETURN_ERR_ON_NULL(*em); + DeviceManager *dm = (DeviceManager *)calloc(1, sizeof(DeviceManager)); + (*em)->manager_ = dm; + RETURN_ERR_ON_NULL(dm); + // Setup engine + if ((ret = __szd_register_backend(*em, backend)) < 0) { + free(dm); + return ret; + } + if ((ret = (*em)->backend.init(&dm, options)) < 0) { + free(dm); + return ret; + } SZD_DTRACE_PROBE(szd_init); return SZD_SC_SUCCESS; } -int szd_get_device_info(DeviceInfo *info, DeviceManager *manager) { +int szd_get_device_info(EngineManager *em, DeviceInfo *info) { RETURN_ERR_ON_NULL(info); - RETURN_ERR_ON_NULL(manager); - RETURN_ERR_ON_NULL(manager->ctrlr); - RETURN_ERR_ON_NULL(manager->ns); - info->lba_size = (uint64_t)spdk_nvme_ns_get_sector_size(manager->ns); - info->zone_size = - (uint64_t)spdk_nvme_zns_ns_get_zone_size_sectors(manager->ns); - info->mdts = (uint64_t)spdk_nvme_ctrlr_get_max_xfer_size(manager->ctrlr); - info->zasl = - (uint64_t)spdk_nvme_zns_ctrlr_get_max_zone_append_size(manager->ctrlr); - info->lba_cap = (uint64_t)spdk_nvme_ns_get_num_sectors(manager->ns); - info->min_lba = manager->info.min_lba; - info->max_lba = manager->info.max_lba; - // printf("INFO: %lu %lu %lu %lu %lu %lu %lu \n", info->lba_size, - // info->zone_size, info->mdts, info->zasl, - // info->lba_cap, info->min_lba, info->max_lba); - // TODO: zone cap can differ between zones... - QPair **temp = (QPair **)calloc(1, sizeof(QPair *)); - szd_create_qpair(manager, temp); - szd_get_zone_cap(*temp, info->min_lba, &info->zone_cap); - szd_destroy_qpair(*temp); - free(temp); - return SZD_SC_SUCCESS; -} - -bool __szd_open_probe_cb(void *cb_ctx, - const struct spdk_nvme_transport_id *trid, - struct spdk_nvme_ctrlr_opts *opts) { - DeviceTarget *prober = (DeviceTarget *)cb_ctx; - if (!prober->traddr) { - return false; - } - // You trying to overflow? - if (strlen(prober->traddr) < prober->traddr_len) { - return false; + RETURN_ERR_ON_NULL(em); + RETURN_ERR_ON_NULL(em->manager_); + if (em->backend.get_device_info(info, em->manager_) < 0) { + return SZD_SC_UNKNOWN; } - if (strlen((const char *)trid->traddr) < prober->traddr_len) { - return false; - } - if (strncmp((const char *)trid->traddr, prober->traddr, prober->traddr_len) != - 0) { - return false; - } - (void)opts; - return true; -} - -void __szd_open_attach_cb(void *cb_ctx, - const struct spdk_nvme_transport_id *trid, - struct spdk_nvme_ctrlr *ctrlr, - const struct spdk_nvme_ctrlr_opts *opts) { - DeviceTarget *prober = (DeviceTarget *)cb_ctx; - if (prober == NULL) { - return; - } - prober->manager->ctrlr = ctrlr; - // take any ZNS namespace, we do not care which. - for (int nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; - nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { - struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); - if (ns == NULL) { - continue; - } - if (spdk_nvme_ns_get_csi(ns) != SPDK_NVME_CSI_ZNS) { - continue; - } - prober->manager->ns = ns; - prober->found = true; - break; - } - (void)trid; - (void)opts; - return; -} - -void __szd_open_remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) { - (void)cb_ctx; - (void)ctrlr; + return SZD_SC_SUCCESS; } -int __szd_open_create_private(DeviceManager *manager, - DeviceOpenOptions *options) { +static int __szd_open_create_internal(EngineManager *em, + DeviceOpenOptions *options) { + DeviceManager *dm = em->manager_; uint64_t zone_min = options->min_zone; uint64_t zone_max = options->max_zone; - uint64_t zone_max_allowed = manager->info.lba_cap / manager->info.zone_size; + uint64_t zone_max_allowed = dm->info.lba_cap / dm->info.zone_size; if (zone_min != 0 && zone_min > zone_max_allowed) { return SZD_SC_SPDK_ERROR_OPEN; } @@ -219,297 +138,121 @@ int __szd_open_create_private(DeviceManager *manager, if (zone_min > zone_max) { return SZD_SC_SPDK_ERROR_OPEN; } - DeviceManagerInternal *private_ = + DeviceManagerInternal *internal_ = (DeviceManagerInternal *)calloc(1, sizeof(DeviceManagerInternal)); - *private_ = DeviceManagerInternal_default; - RETURN_ERR_ON_NULL(private_); - private_->zone_min_ = zone_min; - private_->zone_max_ = zone_max; - manager->private_ = (void *)private_; + *internal_ = DeviceManagerInternal_default; + RETURN_ERR_ON_NULL(internal_); + internal_->zone_min_ = zone_min; + internal_->zone_max_ = zone_max; + em->internal_ = (void *)internal_; return SZD_SC_SUCCESS; } -int szd_open(DeviceManager *manager, const char *traddr, +int szd_open(EngineManager *em, const char *traddr, DeviceOpenOptions *options) { - DeviceTarget prober = {.manager = manager, - .traddr = traddr, - .traddr_len = strlen(traddr), - .found = false}; - // This is needed because of DPDK not properly recognising reattached devices. - // So force traddr. - bool already_found_once = false; - for (size_t i = 0; i < found_devices_number; i++) { - if (found_devices_len[i] == strlen(traddr) && - memcmp(found_devices[i], traddr, found_devices_len[i])) { - already_found_once = true; - } - } - if (already_found_once) { - memset(manager->g_trid, 0, sizeof(*(manager->g_trid))); - spdk_nvme_trid_populate_transport(manager->g_trid, - SPDK_NVME_TRANSPORT_PCIE); - memcpy(manager->g_trid->traddr, traddr, - spdk_min(strlen(traddr), sizeof(manager->g_trid->traddr))); - } - // Find controller. - int probe_ctx; - probe_ctx = spdk_nvme_probe(manager->g_trid, &prober, - (spdk_nvme_probe_cb)__szd_open_probe_cb, - (spdk_nvme_attach_cb)__szd_open_attach_cb, - (spdk_nvme_remove_cb)__szd_open_remove_cb); - // Dettach if broken. - if (probe_ctx != 0) { - if (manager->ctrlr != NULL) { - return spdk_nvme_detach(manager->ctrlr) || SZD_SC_SPDK_ERROR_OPEN; - } else { - return SZD_SC_SPDK_ERROR_OPEN; - } - } - if (!prober.found) { - if (manager->ctrlr != NULL) { - return spdk_nvme_detach(manager->ctrlr) || SZD_SC_SPDK_ERROR_OPEN; - } else { - return SZD_SC_SPDK_ERROR_OPEN; - } + int rc; + // look for device + if ((rc = em->backend.open(em->manager_, traddr, options)) != 0) { + return rc; } // Setup information immediately. - int rc = szd_get_device_info(&manager->info, manager); - if (rc != 0) { + if ((rc = szd_get_device_info(em, &em->manager_->info)) != 0) { return rc; } - rc = __szd_open_create_private(manager, options); - if (rc != 0) { + // Shadow container + if ((rc = __szd_open_create_internal(em, options)) != 0) { return rc; } // Create a container. - DeviceManagerInternal *private_ = (DeviceManagerInternal *)manager->private_; - manager->info.min_lba = private_->zone_min_ * manager->info.zone_size; - manager->info.max_lba = private_->zone_max_ * manager->info.zone_size; + DeviceManagerInternal *internal_ = (DeviceManagerInternal *)em->internal_; + em->manager_->info.min_lba = + internal_->zone_min_ * em->manager_->info.zone_size; + em->manager_->info.max_lba = + internal_->zone_max_ * em->manager_->info.zone_size; SZD_DTRACE_PROBE(szd_open); return rc; } -int szd_close(DeviceManager *manager) { - RETURN_ERR_ON_NULL(manager); - if (spdk_unlikely(manager->ctrlr == NULL)) { - return SZD_SC_NOT_ALLOCATED; - } - int rc = spdk_nvme_detach(manager->ctrlr); - manager->ctrlr = NULL; - manager->ns = NULL; - // Prevents wrongly assuming a device is attached. - manager->info = DeviceInfo_default; - manager->info.name = "\xef\xbe\xad\xde"; - if (manager->private_ != NULL) { - free(manager->private_); - manager->private_ = NULL; - } - if (manager->g_trid != NULL) { - memset(manager->g_trid, 0, sizeof(*(manager->g_trid))); +int szd_close(EngineManager *em) { + RETURN_ERR_ON_NULL(em); + int rc; + // Close backend + rc = em->backend.close(em->manager_); + // Close internal business + em->manager_->info = DeviceInfo_default; + em->manager_->info.name = "\xef\xbe\xad\xde"; + if (em->internal_ != NULL) { + free(em->internal_); + em->internal_ = NULL; } SZD_DTRACE_PROBE(szd_closed); return rc != 0 ? SZD_SC_SPDK_ERROR_CLOSE : SZD_SC_SUCCESS; } -int szd_destroy(DeviceManager *manager) { - RETURN_ERR_ON_NULL(manager); +int szd_destroy(EngineManager *em) { + RETURN_ERR_ON_NULL(em); int rc = SZD_SC_SUCCESS; - if (manager->ctrlr != NULL) { - rc = szd_close(manager); + if (em->internal_ != NULL) { + rc = szd_close(em); } - if (manager->g_trid != NULL) { - free(manager->g_trid); - manager->g_trid = NULL; + rc = em->backend.destroy(em->manager_) || rc; + if (em->manager_ != NULL) { + free(em->manager_); } - free(manager); - spdk_env_fini(); + free(em); SZD_DTRACE_PROBE(szd_destroy); return rc; } -int szd_reinit(DeviceManager **manager) { - RETURN_ERR_ON_NULL(manager); - RETURN_ERR_ON_NULL(*manager); - const char *name = (*manager)->info.name; - int rc = szd_destroy(*manager); +int szd_reinit(EngineManager **em) { + RETURN_ERR_ON_NULL(em); + RETURN_ERR_ON_NULL(*em); + int rc = (*em)->backend.reinit(&(*em)->manager_); if (rc != 0) { return SZD_SC_SPDK_ERROR_CLOSE; } - DeviceOptions options = {.name = name, .setup_spdk = false}; - return szd_init(manager, &options); -} - -bool __szd_probe_probe_cb(void *cb_ctx, - const struct spdk_nvme_transport_id *trid, - struct spdk_nvme_ctrlr_opts *opts) { - (void)cb_ctx; - (void)trid; - (void)opts; - return true; -} - -void __szd_probe_attach_cb(void *cb_ctx, - const struct spdk_nvme_transport_id *trid, - struct spdk_nvme_ctrlr *ctrlr, - const struct spdk_nvme_ctrlr_opts *opts) { - ProbeInformation *prober = (ProbeInformation *)cb_ctx; - // Very important lock! We probe concurrently and alter one struct. - pthread_mutex_lock(prober->mut); - if (prober->devices >= MAX_DEVICE_COUNT - 1) { - SPDK_ERRLOG("SZD: At the moment no more than %x devices are supported \n", - MAX_DEVICE_COUNT); - } else { - prober->traddr[prober->devices] = - (char *)calloc(strlen(trid->traddr) + 1, sizeof(char)); - memcpy(prober->traddr[prober->devices], trid->traddr, strlen(trid->traddr)); - prober->ctrlr[prober->devices] = ctrlr; - for (int nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; - nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { - struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); - prober->zns[prober->devices] = - spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS; - } - prober->devices++; - // hidden global state... - bool found = false; - for (size_t i = 0; i < found_devices_number; i++) { - if (found_devices_len[i] == strlen(trid->traddr) && - memcmp(found_devices[i], trid->traddr, found_devices_len[i])) { - found = true; - } - } - if (!found) { - found_devices_len[found_devices_number] = strlen(trid->traddr); - found_devices[found_devices_number] = - (char *)calloc(found_devices_len[found_devices_number], sizeof(char)); - memcpy(found_devices[found_devices_number], trid->traddr, - found_devices_len[found_devices_number]); - found_devices_number++; - } - } - pthread_mutex_unlock(prober->mut); - (void)opts; + return SZD_SC_SUCCESS; } -int szd_probe(DeviceManager *manager, ProbeInformation **probe) { - RETURN_ERR_ON_NULL(manager); +int szd_probe(EngineManager *em, void **probe) { + RETURN_ERR_ON_NULL(em); RETURN_ERR_ON_NULL(probe); - *probe = (ProbeInformation *)calloc(1, sizeof(ProbeInformation)); - RETURN_ERR_ON_NULL(*probe); - (*probe)->traddr = (char **)calloc(MAX_DEVICE_COUNT, sizeof(char *)); - (*probe)->ctrlr = (struct spdk_nvme_ctrlr **)calloc( - MAX_DEVICE_COUNT, sizeof(t_spdk_nvme_ctrlr *)); - (*probe)->zns = (bool *)calloc(MAX_DEVICE_COUNT, sizeof(bool)); - (*probe)->mut = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t)); - if (pthread_mutex_init((*probe)->mut, NULL) != 0) { - return SZD_SC_SPDK_ERROR_PROBE; - } - int rc; - rc = spdk_nvme_probe(manager->g_trid, *probe, - (spdk_nvme_probe_cb)__szd_probe_probe_cb, - (spdk_nvme_attach_cb)__szd_probe_attach_cb, NULL); - if (rc != 0) { - return SZD_SC_SPDK_ERROR_PROBE; - } - // Thread safe removing of devices, they have already been probed. - pthread_mutex_lock((*probe)->mut); - for (size_t i = 0; i < (*probe)->devices; i++) { - // keep error message. - rc = spdk_nvme_detach((*probe)->ctrlr[i]) | rc; - } - pthread_mutex_unlock((*probe)->mut); - return rc != 0 ? SZD_SC_SPDK_ERROR_PROBE : SZD_SC_SUCCESS; + return em->backend.probe(em->manager_, probe); } -void szd_free_probe_information(ProbeInformation *probe_info) { - free(probe_info->zns); - for (uint8_t i = 0; i < probe_info->devices; i++) { - free(probe_info->traddr[i]); - } - free(probe_info->traddr); - free(probe_info->ctrlr); - free(probe_info->mut); - free(probe_info); +void szd_free_probe_information(EngineManager *em, void *probe_info) { + em->backend.free_probe(em->manager_, probe_info); } -int szd_create_qpair(DeviceManager *man, QPair **qpair) { - RETURN_ERR_ON_NULL(man); - RETURN_ERR_ON_NULL(man->ctrlr); +int szd_create_qpair(EngineManager *em, QPair **qpair) { + RETURN_ERR_ON_NULL(em); RETURN_ERR_ON_NULL(qpair); - *qpair = (QPair *)calloc(1, sizeof(QPair)); - RETURN_ERR_ON_NULL(*qpair); - (*qpair)->qpair = spdk_nvme_ctrlr_alloc_io_qpair(man->ctrlr, NULL, 0); - (*qpair)->man = man; - RETURN_ERR_ON_NULL((*qpair)->qpair); + int rc = em->backend.create_qpair(em->manager_, qpair); SZD_DTRACE_PROBE(szd_create_qpair); - return SZD_SC_SUCCESS; + return rc; } -int szd_destroy_qpair(QPair *qpair) { +int szd_destroy_qpair(EngineManager *em, QPair *qpair) { RETURN_ERR_ON_NULL(qpair); - RETURN_ERR_ON_NULL(qpair->qpair); - spdk_nvme_ctrlr_free_io_qpair(qpair->qpair); - qpair->man = NULL; - free(qpair); + int rc = em->backend.destroy_qpair(em->manager_, qpair); SZD_DTRACE_PROBE(szd_destroy_qpair); - return SZD_SC_SUCCESS; -} - -void *__reserve_dma(uint64_t size) { - return spdk_zmalloc(size, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); -} - -void *szd_calloc(uint64_t __allign, size_t __nmemb, size_t __size) { - size_t expanded_size = __nmemb * __size; - if (spdk_unlikely(expanded_size % __allign != 0 || __allign == 0)) { - return NULL; - } - return spdk_zmalloc(expanded_size, __allign, NULL, SPDK_ENV_SOCKET_ID_ANY, - SPDK_MALLOC_DMA); -} - -void szd_free(void *buffer) { spdk_free(buffer); } - -void __operation_complete(void *arg, const struct spdk_nvme_cpl *completion) { - Completion *completed = (Completion *)arg; - completed->done = true; - // force non error to always be 0. - completed->err = - spdk_nvme_cpl_is_error(completion) ? completion->status.sc : 0x00; -} - -void __append_complete(void *arg, const struct spdk_nvme_cpl *completion) { - __operation_complete(arg, completion); -} - -void __read_complete(void *arg, const struct spdk_nvme_cpl *completion) { - __operation_complete(arg, completion); -} - -void __reset_zone_complete(void *arg, const struct spdk_nvme_cpl *completion) { - __operation_complete(arg, completion); -} - -void __finish_zone_complete(void *arg, const struct spdk_nvme_cpl *completion) { - __operation_complete(arg, completion); + return rc; } -void __get_zone_head_complete(void *arg, - const struct spdk_nvme_cpl *completion) { - __operation_complete(arg, completion); +void *szd_calloc(EngineManager *em, uint64_t __allign, size_t __nmemb, + size_t __size) { + return em->backend.buf_calloc(__allign, __nmemb, __size); } -#define POLL_QPAIR(qpair, target) \ - do { \ - spdk_nvme_qpair_process_completions((qpair), 0); \ - } while (!(target)) +void szd_free(EngineManager *em, void *buffer) { em->backend.free(buffer); } -int szd_read_with_diag(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, - uint64_t *nr_reads) { +int szd_read_with_diag(EngineManager *em, QPair *qpair, uint64_t lba, + void *buffer, uint64_t size, uint64_t *nr_reads) { RETURN_ERR_ON_NULL(qpair); RETURN_ERR_ON_NULL(buffer); int rc = SZD_SC_SUCCESS; - DeviceInfo info = qpair->man->info; + DeviceManager *dm = qpair->man; + DeviceInfo info = dm->info; // zone pointers uint64_t slba = (lba / info.zone_size) * info.zone_size; @@ -528,14 +271,13 @@ int szd_read_with_diag(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, (info.mdts / info.lba_size); // If lba_size > mdts, we have a big problem, // but not because of the read. uint64_t current_step_size = step_size; - Completion completion = Completion_default; // Otherwise we have an out of range. uint64_t number_of_zones_traversed = (lbas_to_process + (lba - slba)) / info.zone_cap; - if (spdk_unlikely(lba < info.min_lba || - slba + number_of_zones_traversed * info.zone_size > - info.max_lba)) { + if (szd_unlikely(lba < info.min_lba || + slba + number_of_zones_traversed * info.zone_size > + info.max_lba)) { return SZD_SC_SPDK_ERROR_READ; } @@ -547,18 +289,14 @@ int szd_read_with_diag(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, } else { current_step_size = step_size; } + // printf("STEPSIXE %lu %lu \n", lbas_processed, current_step_size); // Do not read too much (more than mdts or requested) current_step_size = lbas_to_process - lbas_processed > current_step_size ? current_step_size : lbas_to_process - lbas_processed; - - completion.done = false; - completion.err = 0x00; - rc = spdk_nvme_ns_cmd_read(qpair->man->ns, qpair->qpair, - (char *)buffer + lbas_processed * info.lba_size, - lba, /* LBA start */ - current_step_size, /* number of LBAs */ - __read_complete, &completion, 0); + rc = em->backend.read(qpair, lba, + (char *)buffer + lbas_processed * info.lba_size, + current_step_size * info.lba_size, current_step_size); #ifdef SZD_PERF_COUNTERS if (nr_reads != NULL) { *nr_reads += 1; @@ -569,11 +307,6 @@ int szd_read_with_diag(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, if (spdk_unlikely(rc != 0)) { return SZD_SC_SPDK_ERROR_READ; } - // Synchronous reads, busy wait. - POLL_QPAIR(qpair->qpair, completion.done); - if (spdk_unlikely(completion.err != 0)) { - return SZD_SC_SPDK_ERROR_READ; - } lbas_processed += current_step_size; lba += current_step_size; // To the next zone we go @@ -586,16 +319,18 @@ int szd_read_with_diag(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, return SZD_SC_SUCCESS; } -int szd_read(QPair *qpair, uint64_t lba, void *buffer, uint64_t size) { - return szd_read_with_diag(qpair, lba, buffer, size, NULL); +int szd_read(EngineManager *em, QPair *qpair, uint64_t lba, void *buffer, + uint64_t size) { + return szd_read_with_diag(em, qpair, lba, buffer, size, NULL); } -int szd_append_with_diag(QPair *qpair, uint64_t *lba, void *buffer, - uint64_t size, uint64_t *nr_appends) { +int szd_append_with_diag(EngineManager *em, QPair *qpair, uint64_t *lba, + void *buffer, uint64_t size, uint64_t *nr_appends) { RETURN_ERR_ON_NULL(qpair); RETURN_ERR_ON_NULL(buffer); int rc = SZD_SC_SUCCESS; - DeviceInfo info = qpair->man->info; + DeviceManager *dm = qpair->man; + DeviceInfo info = dm->info; // Zone pointers uint64_t slba = (*lba / info.zone_size) * info.zone_size; @@ -614,14 +349,13 @@ int szd_append_with_diag(QPair *qpair, uint64_t *lba, void *buffer, (info.zasl / info.lba_size); // < If lba_size > zasl, we have a big // problem, but not because of the append. uint64_t current_step_size = step_size; - Completion completion = Completion_default; // Error if we have an out of range. uint64_t number_of_zones_traversed = (lbas_to_process + (*lba - slba)) / info.zone_cap; - if (spdk_unlikely(*lba < info.min_lba || - slba + number_of_zones_traversed * info.zone_size > - info.max_lba)) { + if (szd_unlikely(*lba < info.min_lba || + slba + number_of_zones_traversed * info.zone_size > + info.max_lba)) { SPDK_ERRLOG("SZD: Append is out of allowed range\n"); return SZD_SC_SPDK_ERROR_APPEND; } @@ -639,14 +373,9 @@ int szd_append_with_diag(QPair *qpair, uint64_t *lba, void *buffer, ? current_step_size : lbas_to_process - lbas_processed; - completion.done = false; - completion.err = 0x00; - - rc = spdk_nvme_zns_zone_append( - qpair->man->ns, qpair->qpair, - (char *)buffer + lbas_processed * info.lba_size, slba, /* LBA start */ - current_step_size, /* number of LBAs */ - __append_complete, &completion, 0); + rc = em->backend.append( + qpair, slba, (char *)buffer + lbas_processed * info.lba_size, + current_step_size * info.lba_size, current_step_size); #ifdef SZD_PERF_COUNTERS if (nr_appends != NULL) { *nr_appends += 1; @@ -658,21 +387,6 @@ int szd_append_with_diag(QPair *qpair, uint64_t *lba, void *buffer, SPDK_ERRLOG("SZD: Error creating append request\n"); return SZD_SC_SPDK_ERROR_APPEND; } - // Synchronous write, busy wait. - POLL_QPAIR(qpair->qpair, completion.done); - if (spdk_unlikely(completion.err != 0)) { - SPDK_ERRLOG("SZD: Error during append %x\n", completion.err); - for (uint64_t slba = info.min_lba; slba != info.max_lba; - slba += info.zone_size) { - uint64_t zone_head; - szd_get_zone_head(qpair, slba, &zone_head); - if (zone_head != slba && zone_head != slba + info.zone_size) - SPDK_ERRLOG( - "SZD: Error during append - zone head= [%lu - %lu - %lu]\n", - slba / info.zone_size, zone_head, slba + info.zone_size); - } - return SZD_SC_SPDK_ERROR_APPEND; - } *lba = *lba + current_step_size; lbas_processed += current_step_size; // To the next zone we go @@ -685,17 +399,19 @@ int szd_append_with_diag(QPair *qpair, uint64_t *lba, void *buffer, return SZD_SC_SUCCESS; } -int szd_append(QPair *qpair, uint64_t *lba, void *buffer, uint64_t size) { - return szd_append_with_diag(qpair, lba, buffer, size, NULL); +int szd_append(EngineManager *em, QPair *qpair, uint64_t *lba, void *buffer, + uint64_t size) { + return szd_append_with_diag(em, qpair, lba, buffer, size, NULL); } -int szd_append_async_with_diag(QPair *qpair, uint64_t *lba, void *buffer, - uint64_t size, uint64_t *nr_appends, - Completion *completion) { +int szd_append_async_with_diag(EngineManager *em, QPair *qpair, uint64_t *lba, + void *buffer, uint64_t size, + uint64_t *nr_appends, Completion *completion) { RETURN_ERR_ON_NULL(qpair); RETURN_ERR_ON_NULL(buffer); int rc = SZD_SC_SUCCESS; - DeviceInfo info = qpair->man->info; + DeviceManager *dm = qpair->man; + DeviceInfo info = dm->info; // Zone pointers uint64_t slba = (*lba / info.zone_size) * info.zone_size; @@ -708,24 +424,25 @@ int szd_append_async_with_diag(QPair *qpair, uint64_t *lba, void *buffer, } // Progress variables uint64_t lbas_to_process = (size + info.lba_size - 1) / info.lba_size; + uint32_t id = completion->id; *completion = Completion_default; + completion->id = id; // Error if we have an out of range or we cross a zone border. uint64_t number_of_zones_traversed = (lbas_to_process + (*lba - slba)) / info.zone_cap; - if (spdk_unlikely(*lba < info.min_lba || *lba > info.max_lba || - number_of_zones_traversed > 1 || - lbas_to_process > info.zasl / info.lba_size)) { + if (szd_unlikely(*lba < info.min_lba || *lba > info.max_lba || + number_of_zones_traversed > 1 || + lbas_to_process > info.zasl / info.lba_size)) { SPDK_ERRLOG("SZD: Async append out of range\n"); return SZD_SC_SPDK_ERROR_APPEND; } completion->done = false; completion->err = 0x00; - rc = spdk_nvme_zns_zone_append(qpair->man->ns, qpair->qpair, (char *)buffer, - slba, /* LBA start */ - lbas_to_process, /* number of LBAs */ - __append_complete, completion, 0); + rc = em->backend.append_async(qpair, slba, (char *)buffer, + lbas_to_process * info.lba_size, + lbas_to_process, completion); #ifdef SZD_PERF_COUNTERS if (nr_appends != NULL) { *nr_appends += 1; @@ -741,61 +458,122 @@ int szd_append_async_with_diag(QPair *qpair, uint64_t *lba, void *buffer, return SZD_SC_SUCCESS; } -int szd_append_async(QPair *qpair, uint64_t *lba, void *buffer, uint64_t size, - Completion *completion) { - return szd_append_async_with_diag(qpair, lba, buffer, size, NULL, completion); +int szd_append_async(EngineManager *em, QPair *qpair, uint64_t *lba, + void *buffer, uint64_t size, Completion *completion) { + return szd_append_async_with_diag(em, qpair, lba, buffer, size, NULL, + completion); } -int szd_poll_async(QPair *qpair, Completion *completion) { - POLL_QPAIR(qpair->qpair, completion->done); - if (spdk_unlikely(completion->err != 0)) { - SPDK_ERRLOG("SZD: Error during polling - code:%x\n", completion->err); - return SZD_SC_SPDK_ERROR_POLLING; +int szd_write_with_diag(EngineManager *em, QPair *qpair, uint64_t *lba, + void *buffer, uint64_t size, uint64_t *nr_writes) { + RETURN_ERR_ON_NULL(qpair); + RETURN_ERR_ON_NULL(buffer); + int rc = SZD_SC_SUCCESS; + DeviceManager *dm = qpair->man; + DeviceInfo info = dm->info; + + // Zone pointers + uint64_t slba = (*lba / info.zone_size) * info.zone_size; + uint64_t current_zone_end = slba + info.zone_cap; + // Oops, let me fix this for you + if (spdk_unlikely(*lba >= current_zone_end)) { + slba += info.zone_size; + *lba = slba + *lba - current_zone_end; + current_zone_end = slba + info.zone_cap; } - return SZD_SC_SUCCESS; -} + uint64_t lba_tmp = *lba; + // Progress variables + uint64_t lbas_to_process = (size + info.lba_size - 1) / info.lba_size; + uint64_t lbas_processed = 0; + // Used to determine next IO call + uint64_t step_size = + (info.mdts / info.lba_size); // < If lba_size > mdts, we have a big + // problem, but not because of the append. + uint64_t current_step_size = step_size; -int szd_poll_once(QPair *qpair, Completion *completion) { - if (!completion->done) { - spdk_nvme_qpair_process_completions(qpair->qpair, 0); + // Error if we have an out of range. + uint64_t number_of_zones_traversed = + (lbas_to_process + (*lba - slba)) / info.zone_cap; + if (szd_unlikely(*lba < info.min_lba || + slba + number_of_zones_traversed * info.zone_size > + info.max_lba)) { + SPDK_ERRLOG("SZD: Write is out of allowed range\n"); + return SZD_SC_SPDK_ERROR_APPEND; } - if (spdk_unlikely(completion->err != 0)) { - SPDK_ERRLOG("SZD: Error during polling once - code:%x\n", completion->err); - return SZD_SC_SPDK_ERROR_POLLING; + + // Append in steps of max MDTS bytes and respect boundaries + while (lbas_processed < lbas_to_process) { + // Append across a zone border. + if ((*lba + step_size) >= current_zone_end) { + current_step_size = current_zone_end - *lba; + } else { + current_step_size = step_size; + } + // Do not append too much (more than mdts or what is requested) + current_step_size = lbas_to_process - lbas_processed > current_step_size + ? current_step_size + : lbas_to_process - lbas_processed; + + rc = em->backend.append( + qpair, lba_tmp, (char *)buffer + lbas_processed * info.lba_size, + current_step_size * info.lba_size, current_step_size); +#ifdef SZD_PERF_COUNTERS + if (nr_writes != NULL) { + *nr_writes += 1; + } +#else + (void)nr_writes; +#endif + if (szd_unlikely(rc != 0)) { + SPDK_ERRLOG("SZD: Error creating write request\n"); + return SZD_SC_SPDK_ERROR_APPEND; + } + lba_tmp += current_step_size; + *lba = lba_tmp; + lbas_processed += current_step_size; + // To the next zone we go + if (*lba >= current_zone_end) { + slba += info.zone_size; + lba_tmp = slba; + *lba = slba; + current_zone_end = slba + info.zone_cap; + } } return SZD_SC_SUCCESS; } -void szd_poll_once_raw(QPair *qpair) { - spdk_nvme_qpair_process_completions(qpair->qpair, 0); +int szd_write(EngineManager *em, QPair *qpair, uint64_t *lba, void *buffer, + uint64_t size) { + return szd_write_with_diag(em, qpair, lba, buffer, size, NULL); +} + +int szd_poll_async(EngineManager *em, QPair *qpair, Completion *completion) { + return em->backend.poll_async(qpair, completion); +} + +int szd_poll_once(EngineManager *em, QPair *qpair, Completion *completion) { + return em->backend.poll_once(qpair, completion); +} + +void szd_poll_once_raw(EngineManager *em, QPair *qpair) { + return em->backend.poll_once_raw(qpair); } -int szd_reset(QPair *qpair, uint64_t slba) { +int szd_reset(EngineManager *em, QPair *qpair, uint64_t slba) { RETURN_ERR_ON_NULL(qpair); // Otherwise we have an out of range. DeviceInfo info = qpair->man->info; if (spdk_unlikely(slba < info.min_lba || slba >= info.lba_cap)) { return SZD_SC_SPDK_ERROR_READ; } - Completion completion = Completion_default; - int rc = - spdk_nvme_zns_reset_zone(qpair->man->ns, qpair->qpair, - slba, /* starting LBA of the zone to reset */ - false, /* don't reset all zones */ - __reset_zone_complete, &completion); + int rc = em->backend.reset_zone(qpair, slba); if (spdk_unlikely(rc != 0)) { return SZD_SC_SPDK_ERROR_RESET; } - // Busy wait - POLL_QPAIR(qpair->qpair, completion.done); - if (spdk_unlikely(completion.err != 0)) { - SPDK_ERRLOG("SZD: Reset error - code:%x \n", completion.err); - return SZD_SC_SPDK_ERROR_RESET; - } return rc; } -int szd_reset_all(QPair *qpair) { +int szd_reset_all(EngineManager *em, QPair *qpair) { RETURN_ERR_ON_NULL(qpair); // Otherwise we have an out of range. DeviceInfo info = qpair->man->info; @@ -803,183 +581,74 @@ int szd_reset_all(QPair *qpair) { // We can not do full reset, if we only "own" a part. if (info.min_lba > 0 || info.max_lba < info.lba_cap) { // What are you doing? - if (spdk_unlikely(info.min_lba > info.max_lba)) { + if (szd_unlikely(info.min_lba > info.max_lba)) { return SZD_SC_SPDK_ERROR_RESET; } for (uint64_t slba = info.min_lba; slba < info.max_lba; slba += info.zone_size) { - if ((rc = szd_reset(qpair, slba)) != 0) { + if ((rc = szd_reset(em, qpair, slba)) != 0) { return rc; } } } else { - Completion completion = Completion_default; - rc = spdk_nvme_zns_reset_zone(qpair->man->ns, qpair->qpair, - 0, /* starting LBA of the zone to reset */ - true, /* reset all zones */ - __reset_zone_complete, &completion); - if (spdk_unlikely(rc != 0)) { - return SZD_SC_SPDK_ERROR_RESET; - } - // Busy wait - POLL_QPAIR(qpair->qpair, completion.done); - if (spdk_unlikely(completion.err != 0)) { + rc = em->backend.reset_all_zones(qpair); + if (szd_unlikely(rc != 0)) { return SZD_SC_SPDK_ERROR_RESET; } } return rc; } -int szd_finish_zone(QPair *qpair, uint64_t slba) { +int szd_finish_zone(EngineManager *em, QPair *qpair, uint64_t slba) { RETURN_ERR_ON_NULL(qpair); // Otherwise we have an out of range. DeviceInfo info = qpair->man->info; if (spdk_unlikely(slba < info.min_lba || slba > info.lba_cap)) { return SZD_SC_SPDK_ERROR_FINISH; } - Completion completion = Completion_default; - int rc = - spdk_nvme_zns_finish_zone(qpair->man->ns, qpair->qpair, - slba, /* starting LBA of the zone to finish */ - false, /* don't finish all zones */ - __finish_zone_complete, &completion); - if (spdk_unlikely(rc != 0)) { - return SZD_SC_SPDK_ERROR_FINISH; - } - // Busy wait - POLL_QPAIR(qpair->qpair, completion.done); - if (spdk_unlikely(completion.err != 0)) { + int rc = em->backend.finish_zone(qpair, slba); + if (szd_unlikely(rc != 0)) { return SZD_SC_SPDK_ERROR_FINISH; } return rc; } -int szd_get_zone_heads(QPair *qpair, uint64_t slba, uint64_t eslba, - uint64_t *write_head) { +int szd_get_zone_heads(EngineManager *em, QPair *qpair, uint64_t slba, + uint64_t eslba, uint64_t *write_head) { // Inspired by SPDK/nvme/identify.c RETURN_ERR_ON_NULL(qpair); RETURN_ERR_ON_NULL(qpair->man); // Otherwise we have an out of range. - DeviceInfo info = qpair->man->info; - if (spdk_unlikely(slba < info.min_lba || slba >= info.max_lba || - eslba < info.min_lba || eslba >= info.max_lba || - slba > eslba || slba % info.zone_size != 0 || - eslba % info.zone_size != 0)) { + DeviceManager *dm = qpair->man; + DeviceInfo info = dm->info; + if (szd_unlikely(slba < info.min_lba || slba >= info.max_lba || + eslba < info.min_lba || eslba >= info.max_lba || + slba > eslba || slba % info.zone_size != 0 || + eslba % info.zone_size != 0)) { return SZD_SC_SPDK_ERROR_REPORT_ZONES; } - int rc = SZD_SC_SUCCESS; - - // Setup state variables - size_t report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(qpair->man->ns); - uint8_t *report_buf = (uint8_t *)calloc(1, report_bufsize); - uint64_t reported_zones = 0; - uint64_t zones_to_report = (eslba - slba) / info.zone_size; - struct spdk_nvme_zns_zone_report *zns_report; - - // Setup logical variables - const struct spdk_nvme_ns_data *nsdata = - spdk_nvme_ns_get_data(qpair->man->ns); - const struct spdk_nvme_zns_ns_data *nsdata_zns = - spdk_nvme_zns_ns_get_data(qpair->man->ns); - uint64_t zone_report_size = sizeof(struct spdk_nvme_zns_zone_report); - uint64_t zone_descriptor_size = sizeof(struct spdk_nvme_zns_zone_desc); - uint64_t zns_descriptor_size = - nsdata_zns->lbafe[nsdata->flbas.format].zdes * 64; - uint64_t max_zones_per_buf = - zns_descriptor_size - ? (report_bufsize - zone_report_size) / - (zone_descriptor_size + zns_descriptor_size) - : (report_bufsize - zone_report_size) / zone_descriptor_size; - - // Get zone heads iteratively - do { - memset(report_buf, 0, report_bufsize); - // Get as much as we can from SPDK - Completion completion = Completion_default; - rc = spdk_nvme_zns_report_zones( - qpair->man->ns, qpair->qpair, report_buf, report_bufsize, slba, - SPDK_NVME_ZRA_LIST_ALL, true, __get_zone_head_complete, &completion); - if (spdk_unlikely(rc != 0)) { - free(report_buf); - return SZD_SC_SPDK_ERROR_REPORT_ZONES; - } - // Busy wait for the head. - POLL_QPAIR(qpair->qpair, completion.done); - if (spdk_unlikely(completion.err != 0)) { - free(report_buf); - return SZD_SC_SPDK_ERROR_REPORT_ZONES; - } - - // retrieve nr_zones - zns_report = (struct spdk_nvme_zns_zone_report *)report_buf; - uint64_t nr_zones = zns_report->nr_zones; - if (nr_zones > max_zones_per_buf || nr_zones == 0) { - free(report_buf); - return SZD_SC_SPDK_ERROR_REPORT_ZONES; - } - - // Retrieve write heads from zone information. - for (uint64_t i = 0; i < nr_zones && reported_zones <= zones_to_report; - i++) { - struct spdk_nvme_zns_zone_desc *desc = &zns_report->descs[i]; - write_head[reported_zones] = desc->wp; - if (spdk_unlikely(write_head[reported_zones] < slba)) { - free(report_buf); - return SZD_SC_SPDK_ERROR_REPORT_ZONES; - } - if (write_head[reported_zones] > slba + desc->zcap) { - write_head[reported_zones] = slba + info.zone_size; - } - // progress - slba += info.zone_size; - reported_zones++; - } - } while (reported_zones < zones_to_report); - free(report_buf); - return SZD_SC_SUCCESS; + rc = em->backend.get_zone_heads(qpair, slba, eslba, write_head); + return rc; } -int szd_get_zone_head(QPair *qpair, uint64_t slba, uint64_t *write_head) { - return szd_get_zone_heads(qpair, slba, slba, write_head); +int szd_get_zone_head(EngineManager *em, QPair *qpair, uint64_t slba, + uint64_t *write_head) { + return szd_get_zone_heads(em, qpair, slba, slba, write_head); } -int szd_get_zone_cap(QPair *qpair, uint64_t slba, uint64_t *zone_cap) { +int szd_get_zone_cap(EngineManager *em, QPair *qpair, uint64_t slba, + uint64_t *zone_cap) { RETURN_ERR_ON_NULL(qpair); RETURN_ERR_ON_NULL(qpair->man); // Otherwise we have an out of range. - DeviceInfo info = qpair->man->info; - if (spdk_unlikely(slba < info.min_lba || slba > info.max_lba)) { + DeviceManager *dm = qpair->man; + DeviceInfo info = dm->info; + if (szd_unlikely(slba < info.min_lba || slba > info.max_lba)) { return SZD_SC_SPDK_ERROR_READ; } - - int rc = SZD_SC_SUCCESS; - // Get information from a zone. - size_t report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(qpair->man->ns); - uint8_t *report_buf = (uint8_t *)calloc(1, report_bufsize); - { - Completion completion = Completion_default; - rc = spdk_nvme_zns_report_zones( - qpair->man->ns, qpair->qpair, report_buf, report_bufsize, slba, - SPDK_NVME_ZRA_LIST_ALL, true, __get_zone_head_complete, &completion); - if (spdk_unlikely(rc != 0)) { - free(report_buf); - return SZD_SC_SPDK_ERROR_REPORT_ZONES; - } - // Busy wait for the head. - POLL_QPAIR(qpair->qpair, completion.done); - if (spdk_unlikely(completion.err != 0)) { - free(report_buf); - return SZD_SC_SPDK_ERROR_REPORT_ZONES; - } - } - // Retrieve write head from zone information. - uint32_t zd_index = sizeof(struct spdk_nvme_zns_zone_report); - struct spdk_nvme_zns_zone_desc *desc = - (struct spdk_nvme_zns_zone_desc *)(report_buf + zd_index); - *zone_cap = desc->zcap; - free(report_buf); - return SZD_SC_SUCCESS; + int rc = em->backend.get_zone_head(qpair, slba, zone_cap); + return rc; } void szd_print_zns_status(int status) { diff --git a/szd/core/src/szd_ioctl.c b/szd/core/src/szd_ioctl.c new file mode 100644 index 0000000..f049701 --- /dev/null +++ b/szd/core/src/szd_ioctl.c @@ -0,0 +1,263 @@ +#include "szd/szd_ioctl.h" + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +namespace SIMPLE_ZNS_DEVICE_NAMESPACE { +#endif + +// #include +#include +// #include +// #include +#include +// #include +// #include +// #include +// #include +// #include + +#define NVME_ZNS_SEND_SELECT_ALL (1 << 8) +#define MAX_TRANSFER_SIZE (1 << 16) + +int __ioctl_mgmt_command(int fd, int nsid, unsigned char opcode, + unsigned int cmd, uint64_t zslba, void *data, + unsigned int data_len) { + struct nvme_passthru_cmd nvme_cmd = { + .opcode = opcode, + .nsid = nsid, + .addr = (__u64)(uintptr_t)data, + .data_len = data_len, + .cdw10 = (unsigned int)(((__u64)zslba) & 0xffffffff), + .cdw11 = (unsigned int)(((__u64)zslba) >> 32), + .cdw12 = (data_len >> 2) - 1, + .cdw13 = cmd, + .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT, + }; + return ioctl(fd, NVME_IOCTL_IO_CMD, &nvme_cmd); +} + +int __ioctl_mgmt_send_command(int fd, int nsid, uint64_t zslba, + unsigned int nvme_zns_send_action) { + return __ioctl_mgmt_command(fd, nsid, nvme_zns_cmd_mgmt_send, + nvme_zns_send_action, zslba, NULL, 0); +} + +int ioctl_open_zone(int fd, int nsid, uint64_t zslba) { + return __ioctl_mgmt_send_command(fd, nsid, zslba, NVME_ZNS_ZSA_OPEN); +} + +int ioctl_close_zone(int fd, int nsid, uint64_t zslba) { + return __ioctl_mgmt_send_command(fd, nsid, zslba, NVME_ZNS_ZSA_CLOSE); +} + +int ioctl_reset_zone(int fd, int nsid, uint64_t zslba) { + return __ioctl_mgmt_send_command(fd, nsid, zslba, NVME_ZNS_ZSA_RESET); +} + +int ioctl_reset_all_zones(int fd, int nsid) { + return __ioctl_mgmt_send_command( + fd, nsid, 0, NVME_ZNS_ZSA_RESET | NVME_ZNS_SEND_SELECT_ALL); +} + +int ioctl_finish_zone(int fd, int nsid, uint64_t zslba) { + return __ioctl_mgmt_send_command(fd, nsid, zslba, NVME_ZNS_ZSA_FINISH); +} + +int __ioctl_mgmt_recv_command(int fd, int nsid, uint64_t zslba, + unsigned int nvme_zns_recv_action, void *data, + uint32_t data_len) { + return __ioctl_mgmt_command(fd, nsid, nvme_zns_cmd_mgmt_recv, + nvme_zns_recv_action, zslba, data, data_len); +} + +int ioctl_get_zone_heads(int fd, int nsid, uint64_t zone_cnt, + uint64_t zone_size, uint64_t zslba, uint64_t zeslba, + uint64_t *zone_heads) { + int ret = 0; + int nr_zones = zone_cnt; + uint64_t reported_zones = 0; + uint32_t data_len = sizeof(struct nvme_zone_report) + + (nr_zones * sizeof(struct nvme_zns_desc)); + struct nvme_zone_report *data = (struct nvme_zone_report *)malloc(data_len); + ret = __ioctl_mgmt_recv_command(fd, nsid, zslba, NVME_ZNS_ZRA_REPORT_ZONES, + (void *)data, data_len); + if (ret < 0) { + free(data); + return ret; + } + uint64_t zones_to_read = (zeslba - zslba) / zone_size; + zones_to_read = (zslba / zone_size) + zones_to_read > zone_cnt + ? zone_cnt - (zslba / zone_size) + : zones_to_read; + for (uint64_t j = 0; j <= zones_to_read; j++) { + struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(data->entries[j]); + if (desc->wp > desc->zslba + desc->zcap) { + zone_heads[reported_zones++] = desc->zslba + zone_size; + } else { + zone_heads[reported_zones++] = desc->wp; + } + } + free(data); + return ret; +} + +int ioctl_get_zone_head(int fd, int nsid, uint64_t zone_cnt, uint64_t zone_size, + uint64_t zslba, uint64_t *zone_head) { + return ioctl_get_zone_heads(fd, nsid, zone_cnt, zone_size, zslba, zslba, + zone_head); +} + +int ioctl_get_zone_cap(int fd, int nsid, uint64_t zone_cnt, uint64_t zslba, + uint64_t *zone_cap) { + int ret = 0; + int nr_zones = zone_cnt; + uint32_t data_len = sizeof(struct nvme_zone_report) + + (nr_zones * sizeof(struct nvme_zns_desc)); + struct nvme_zone_report *data = (struct nvme_zone_report *)malloc(data_len); + ret = __ioctl_mgmt_recv_command(fd, nsid, zslba, NVME_ZNS_ZRA_REPORT_ZONES, + (void *)data, data_len); + if (ret < 0) { + free(data); + return ret; + } + struct nvme_zns_desc *desc = (struct nvme_zns_desc *)&(data->entries[0]); + *zone_cap = desc->zcap; + free(data); + return ret; +} + +int ioctl_get_nsid(int fd, uint32_t *nsid) { + int32_t ns = ioctl(fd, NVME_IOCTL_ID); + if (ns > 0) { + *nsid = (uint32_t)ns; + return 0; + } + return -1; +} + +int __ioctl_admin_identify(int fd, int nsid, void *data, unsigned int cns, + unsigned int csi) { + struct nvme_passthru_cmd cmd = { + .opcode = nvme_admin_identify, + .nsid = nsid, + .addr = (__u64)(uintptr_t)data, + .data_len = NVME_IDENTIFY_DATA_SIZE, + .cdw10 = cns, + .cdw11 = csi << 24, + .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT, + }; + return ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd); +} + +int ioctl_admin_identify_ns_nvme(int fd, int nsid, void *data) { + return __ioctl_admin_identify(fd, nsid, data, NVME_IDENTIFY_CNS_NS, + NVME_CSI_NVM); +} + +int ioctl_admin_identify_ns_zns(int fd, int nsid, void *data) { + return __ioctl_admin_identify(fd, nsid, data, NVME_IDENTIFY_CNS_CSI_NS, + NVME_CSI_ZNS); +} + +int ioctl_admin_identify_ctrl_nvme(int fd, int nsid, void *data) { + return __ioctl_admin_identify(fd, nsid, data, NVME_IDENTIFY_CNS_CTRL, + NVME_CSI_NVM); +} + +int ioctl_admin_identify_ctrl_zns(int fd, int nsid, void *data) { + return __ioctl_admin_identify(fd, nsid, data, NVME_IDENTIFY_CNS_CSI_CTRL, + NVME_CSI_ZNS); +} + +int nvme_registers_get_cap(int fd, uint64_t *cap) { + // TODO: Implement + *cap = 0; + (void)fd; + return 0; +} + +static uint64_t __nvme_get_lba_cap(struct nvme_id_ns *nin) { return nin->nsze; } + +static uint64_t __nvme_get_lba_size(struct nvme_id_ns *nin) { + return 1 << nin->lbaf[(nin->flbas & 0xf)].ds; +} + +static uint64_t __nvme_get_minpage_size(uint64_t cap) { + uint64_t min_page_size = 1ULL << (12 + NVME_CAP_MPSMIN(cap)); + return min_page_size; +} + +static uint64_t __nvme_get_mdts(uint64_t min_page_size, + struct nvme_id_ctrl *nic) { + uint64_t mdts = nic->mdts; + if (mdts > 0) { + mdts = min_page_size * (1 << mdts); + } + if (mdts == 0 || mdts > MAX_TRANSFER_SIZE) { + mdts = MAX_TRANSFER_SIZE; + } + return mdts; +} + +static uint64_t __nvme_get_zasl(uint64_t min_page_size, + struct nvme_zns_id_ctrl *zic, uint64_t mdts) { + + uint64_t zasl = zic->zasl; + if (zasl == 0) { + zasl = mdts; + } else { + zasl = min_page_size * (1 << zasl); + } + if (zasl > MAX_TRANSFER_SIZE) { + zasl = MAX_TRANSFER_SIZE; + } + return zasl; +} + +static uint64_t __nvme_get_sze(struct nvme_id_ns *nin, + struct nvme_zns_id_ns *zin) { + return zin->lbafe[nin->flbas].zsze; +} + +int ioctl_get_nvme_info(int fd, DeviceInfo *info) { + int ret; + struct nvme_id_ns nin; + struct nvme_zns_id_ns zin; + struct nvme_id_ctrl nic; + struct nvme_zns_id_ctrl zic; + uint64_t cap; + + // Determine nsid + if ((ret = ioctl_get_nsid(fd, &info->nsid)) < 0) { + return ret; + } + + // Get NVMe data + ret = ioctl_admin_identify_ns_nvme(fd, info->nsid, &nin) || + ioctl_admin_identify_ns_zns(fd, info->nsid, &zin) || + ioctl_admin_identify_ctrl_nvme(fd, info->nsid, &nic) || + ioctl_admin_identify_ctrl_zns(fd, info->nsid, &zic) || + nvme_registers_get_cap(fd, &cap); + if (ret != 0) { + return ret; + } + + info->lba_cap = __nvme_get_lba_cap(&nin); + info->lba_size = __nvme_get_lba_size(&nin); + info->min_page_size = __nvme_get_minpage_size(cap); + info->mdts = __nvme_get_mdts(info->min_page_size, &nic); + info->zasl = __nvme_get_zasl(info->min_page_size, &zic, info->mdts); + info->zone_size = __nvme_get_sze(&nin, &zin); + + // Hack zone cap for now + ret = ioctl_get_zone_cap(fd, info->nsid, info->lba_cap / info->zone_size, + 0 /*slba*/, &info->zone_cap); + return ret; +} + +#ifdef __cplusplus +} +} // namespace SimpleZNSDeviceNamespace +#endif diff --git a/szd/core/src/szd_iouring.c b/szd/core/src/szd_iouring.c new file mode 100644 index 0000000..75078f5 --- /dev/null +++ b/szd/core/src/szd_iouring.c @@ -0,0 +1,425 @@ +#include "szd/szd_iouring.h" +#include "szd/szd_ioctl.h" +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +namespace SIMPLE_ZNS_DEVICE_NAMESPACE { +#endif + +typedef struct { + int fd; + bool sqthread; + bool fixed; +} UringDeviceManager; + +int szd_io_uring_register_backend(EngineManager *em) { + ioengine_backend backend = { + .init = szd_io_uring_init, + .destroy = szd_io_uring_destroy, + .reinit = szd_io_uring_reinit, + .probe = szd_io_uring_probe, + .free_probe = szd_io_uring_free_probe, + .open = szd_io_uring_open, + .close = szd_io_uring_close, + .get_device_info = szd_io_uring_get_device_info, + .create_qpair = szd_io_uring_create_qpair, + .destroy_qpair = szd_io_uring_destroy_qpair, + .buf_calloc = szd_io_uring_calloc, + .free = szd_io_uring_free, + .read = szd_io_uring_read, + .write = szd_io_uring_write, + .append = szd_io_uring_append, + .append_async = szd_io_uring_append_async, + .poll_async = szd_io_uring_poll_async, + .poll_once = szd_io_uring_poll_once, + .poll_once_raw = szd_io_uring_poll_once_raw, + .reset_zone = szd_io_uring_reset, + .reset_all_zones = szd_io_uring_reset_all, + .finish_zone = szd_io_uring_finish_zone, + .get_zone_head = szd_io_uring_get_zone_head, + .get_zone_heads = szd_io_uring_get_zone_heads, + .get_zone_cap = szd_io_uring_get_zone_cap, + }; + em->backend = backend; + return 0; +} + +int szd_io_uring_init(DeviceManager **dm, DeviceOptions *options) { + // Dynamic stuff + (*dm)->private_ = calloc(sizeof(UringDeviceManager), 1); + UringDeviceManager *priv_ = (UringDeviceManager *)(*dm)->private_; + // setup stub info, we do not want to create extra UB. + (*dm)->info = DeviceInfo_default; + (*dm)->info.name = options->name; + priv_->fd = -1; + return SZD_SC_SUCCESS; +} + +static int check_nvme_device(const char *filename) { + int ret = 0; + struct stat stat_buffer; + ret = stat(filename, &stat_buffer); + if (ret != 0) { + return -1; + } + if (!S_ISCHR(stat_buffer.st_mode)) { + ret = -1; + fprintf(stderr, "Currently only support char devices\n"); + return ret; + } + return ret; +} + +int szd_io_uring_get_device_info(DeviceInfo *info, DeviceManager *dm) { + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + return ioctl_get_nvme_info(priv_->fd, info); +} + +int szd_io_uring_open(DeviceManager *dm, const char *filename, + DeviceOpenOptions *options) { + (void)options; + int ret; + int open_flags; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + + // Check device + ret = check_nvme_device(filename); + if (ret < 0) { + return ret; + } + + // Open device + open_flags = O_RDWR; + priv_->fd = open(filename, open_flags); + if (priv_->fd < 0) { + ret = -1; + return ret; + } + return ret; +} + +int szd_io_uring_destroy(DeviceManager *dm) { + int ret = 0; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + if (priv_->fd > 0) { + close(priv_->fd); + priv_->fd = -1; + } + free(priv_); + dm->private_ = NULL; + return ret; +} + +int szd_io_uring_reinit(DeviceManager **dm) { + const char *name = (*dm)->info.name; + UringDeviceManager *priv_ = (UringDeviceManager *)(*dm)->private_; + bool sqthread = priv_->sqthread; + int ret = szd_io_uring_destroy(*dm); + if (ret < 0) { + return ret; + } + DeviceOptions options = { + .name = name, .setup_spdk = false, .iouring_sqthread = sqthread}; + return szd_io_uring_init(dm, &options); +} + +int szd_io_uring_probe(DeviceManager *dm, void **probe_info) { + (void)dm; + (void)probe_info; + return 0; +} + +void szd_io_uring_free_probe(DeviceManager *dm, void *probe_info) { + (void)dm; + (void)probe_info; +} + +int szd_io_uring_close(DeviceManager *dm) { + int ret = 0; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + if (priv_ != NULL && priv_->fd > 0) { + close(priv_->fd); + priv_->fd = -1; + ret = 0; + } else { + ret = -1; + } + return ret; +} + +int szd_io_uring_create_qpair(DeviceManager *dm, QPair **qpair) { + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + int ret; + + *qpair = (QPair *)calloc(1, sizeof(QPair)); + (*qpair)->man = dm; + (*qpair)->qpair = calloc(1, sizeof(struct io_uring)); + + struct io_uring_params p = {}; + // Setup io_uring + // p.flags = IORING_SETUP_IOPOLL; + p.flags |= IORING_SETUP_SQE128; + p.flags |= IORING_SETUP_CQE32; + // p.flags |= IORING_SETUP_CQSIZE; + // p.flags |= IORING_SETUP_COOP_TASKRUN; + // p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; + if (priv_->sqthread) + p.flags |= IORING_SETUP_SQPOLL; + p.cq_entries = 64; + ret = io_uring_queue_init_params(64, (*qpair)->qpair, &p); + if (ret != 0) { + return ret; + } + + if (priv_->sqthread) { + struct io_uring *ring = (struct io_uring *)(*qpair)->qpair; + ret = io_uring_register_files(ring, &(priv_->fd), 1); + } + + return SZD_SC_SUCCESS; +} + +int szd_io_uring_destroy_qpair(DeviceManager *man, QPair *qpair) { + int rc = 0; + UringDeviceManager *priv_ = (UringDeviceManager *)man->private_; + struct io_uring *ring = (struct io_uring *)qpair->qpair; + qpair->man = NULL; + if (priv_->sqthread) { + rc = io_uring_unregister_files(ring); + } + if (priv_->fixed) { + // We do not use fixed bufs yet + // ret = io_uring_unregister_buffers(&priv_->ring) || ret; + } + io_uring_queue_exit(ring); + free(qpair->qpair); + free(qpair); + return rc; +} + +void *szd_io_uring_calloc(uint64_t __allign, size_t __nmemb, size_t __size) { + return aligned_alloc(__allign, __nmemb * __size); +} + +void szd_io_uring_free(void *buffer) { free(buffer); } + +static int szd_io_uring_req_submit(DeviceManager *dm, struct io_uring *ring, + Completion *completion, uint32_t nsid, + uint8_t opcode, uint64_t slba, + uint64_t blocks, void *buf, uint64_t bytes, + uint64_t offset) { + int ret = 0; + + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + struct io_uring_sqe *sqe; + struct nvme_uring_cmd *cmd; + + // prepare write + sqe = io_uring_get_sqe(ring); + io_uring_prep_write(sqe, priv_->sqthread ? 0 : priv_->fd, buf, bytes, offset); + + // Alter Submission queue for passthrough + sqe->user_data = (__u64)(uintptr_t)completion; + sqe->opcode = IORING_OP_URING_CMD; + sqe->cmd_op = NVME_URING_CMD_IO; + if (priv_->sqthread) + sqe->flags |= IOSQE_FIXED_FILE; + + // Alter Submission cmd for passthrough + cmd = (struct nvme_uring_cmd *)sqe->cmd; + memset(cmd, 0, sizeof(struct nvme_uring_cmd)); + cmd->opcode = opcode; + cmd->cdw10 = ((__u64)slba) & 0xffffffff; + cmd->cdw11 = ((__u64)slba) >> 32; + cmd->cdw12 = (__u32)blocks - 1; + cmd->addr = (__u64)(uintptr_t)buf; + cmd->data_len = bytes; + cmd->nsid = nsid; + cmd->timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT; + + // Submit + ret = io_uring_submit(ring); + return ret; +} + +static int szd_io_uring_req_complete(DeviceManager *dm, struct io_uring *ring) { + (void)dm; + int ret = 0; + struct io_uring_cqe *cqe; + ret = io_uring_wait_cqe(ring, &cqe); + if (ret != 0) { + return ret; + } + ((Completion *)(cqe->user_data))->done = true; + ((Completion *)(cqe->user_data))->err = ret = cqe->res; + io_uring_cqe_seen(ring, cqe); + return ret; +} + +static int szd_io_uring_req_sync(DeviceManager *dm, struct io_uring *ring, + uint32_t nsid, uint8_t opcode, uint64_t slba, + uint64_t blocks, void *buf, uint64_t bytes, + uint64_t offset) { + int ret = 0; + Completion completion = Completion_default; + ret = szd_io_uring_req_submit(dm, ring, &completion, nsid, opcode, slba, + blocks, buf, bytes, offset); + if (ret < 0) { + return ret; + } + ret = szd_io_uring_req_complete(dm, ring); + return ret; +} + +int szd_io_uring_read(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, + uint64_t blocks) { + DeviceManager *dm = qpair->man; + struct io_uring *ring = (struct io_uring *)(qpair->qpair); + uint32_t nsid = dm->info.nsid; + int ret = 0; + ret = szd_io_uring_req_sync(dm, ring, nsid, nvme_cmd_read, lba, blocks, + buffer, size, 0); + return ret; +} + +int szd_io_uring_write(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, + uint64_t blocks) { + DeviceManager *dm = qpair->man; + struct io_uring *ring = (struct io_uring *)(qpair->qpair); + uint32_t nsid = dm->info.nsid; + int ret = 0; + ret = szd_io_uring_req_sync(dm, ring, nsid, nvme_cmd_write, lba, blocks, + buffer, size, 0); + return ret; +} + +int szd_io_uring_append(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, + uint64_t blocks) { + DeviceManager *dm = qpair->man; + struct io_uring *ring = (struct io_uring *)(qpair->qpair); + uint32_t nsid = dm->info.nsid; + int ret = 0; + ret = szd_io_uring_req_sync(dm, ring, nsid, nvme_zns_cmd_append, lba, blocks, + buffer, size, 0); + return ret; +} + +int szd_io_uring_append_async(QPair *qpair, uint64_t lba, void *buffer, + uint64_t size, uint64_t blocks, + Completion *completion) { + completion->done = false; + completion->err = 0; + DeviceManager *dm = qpair->man; + struct io_uring *ring = (struct io_uring *)(qpair->qpair); + uint32_t nsid = dm->info.nsid; + printf("completion start (async) %u\n", completion->id); + int ret = + szd_io_uring_req_submit(dm, ring, completion, nsid, nvme_zns_cmd_append, + lba, blocks, buffer, size, 0); + if (ret > 0) { + ret = 0; + } + return ret; +} + +int szd_io_uring_poll_async(QPair *qpair, Completion *completion) { + (void)completion; + int ret = 0; + DeviceManager *dm = qpair->man; + struct io_uring *ring = (struct io_uring *)(qpair->qpair); + while (!completion->done) { + ret = szd_io_uring_req_complete(dm, ring); + } + return ret; +} + +int szd_io_uring_poll_once(QPair *qpair, Completion *completion) { + struct io_uring *ring = (struct io_uring *)(qpair->qpair); + int ret = 0; + struct io_uring_cqe *cqe = NULL; + if (!completion->done) { + ret = io_uring_peek_cqe(ring, &cqe); + if (ret != 0 || cqe == NULL) { + return ret; + } + ((Completion *)(cqe->user_data))->done = true; + ((Completion *)(cqe->user_data))->err = cqe->res; + printf("completion done (once) %u\n", ((Completion *)(cqe->user_data))->id); + io_uring_cqe_seen(ring, cqe); + } + return ret; +} + +void szd_io_uring_poll_once_raw(QPair *qpair) { + struct io_uring *ring = (struct io_uring *)(qpair->qpair); + struct io_uring_cqe *cqe = NULL; + if (io_uring_peek_cqe(ring, &cqe) == 0 && cqe != NULL) { + ((Completion *)(cqe->user_data))->done = true; + ((Completion *)(cqe->user_data))->err = cqe->res; + printf("completion done (peek) %u\n", ((Completion *)(cqe->user_data))->id); + io_uring_cqe_seen(ring, cqe); + } +} + +int szd_io_uring_reset(QPair *qpair, uint64_t slba) { + DeviceManager *dm = qpair->man; + uint32_t nsid = dm->info.nsid; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + int fd = priv_->fd; + return ioctl_reset_zone(fd, nsid, slba); +} + +int szd_io_uring_reset_all(QPair *qpair) { + DeviceManager *dm = qpair->man; + uint32_t nsid = dm->info.nsid; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + int fd = priv_->fd; + return ioctl_reset_all_zones(fd, nsid); +} + +int szd_io_uring_finish_zone(QPair *qpair, uint64_t slba) { + DeviceManager *dm = qpair->man; + uint32_t nsid = dm->info.nsid; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + int fd = priv_->fd; + return ioctl_finish_zone(fd, nsid, slba); +} + +int szd_io_uring_get_zone_head(QPair *qpair, uint64_t slba, + uint64_t *write_head) { + DeviceManager *dm = qpair->man; + uint32_t nsid = dm->info.nsid; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + int fd = priv_->fd; + return ioctl_get_zone_head(fd, nsid, dm->info.lba_cap / dm->info.zone_size, + dm->info.zone_size, slba, write_head); +} + +int szd_io_uring_get_zone_heads(QPair *qpair, uint64_t slba, uint64_t eslba, + uint64_t *zone_heads) { + DeviceManager *dm = qpair->man; + uint32_t nsid = dm->info.nsid; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + int fd = priv_->fd; + return ioctl_get_zone_heads(fd, nsid, dm->info.lba_cap / dm->info.zone_size, + dm->info.zone_size, slba, eslba, zone_heads); +} + +int szd_io_uring_get_zone_cap(QPair *qpair, uint64_t slba, uint64_t *zone_cap) { + DeviceManager *dm = qpair->man; + uint32_t nsid = dm->info.nsid; + UringDeviceManager *priv_ = (UringDeviceManager *)dm->private_; + int fd = priv_->fd; + return ioctl_get_zone_cap(fd, nsid, dm->info.lba_cap / dm->info.zone_size, + slba, zone_cap); +} + +#ifdef __cplusplus +} +} // namespace SimpleZNSDeviceNamespace +#endif diff --git a/szd/core/src/szd_spdk.c b/szd/core/src/szd_spdk.c new file mode 100644 index 0000000..2043e7c --- /dev/null +++ b/szd/core/src/szd_spdk.c @@ -0,0 +1,804 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "szd/szd_spdk.h" +#include "szd/szd_status_code.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct spdk_nvme_transport_id t_spdk_nvme_transport_id; +typedef struct spdk_nvme_ctrlr t_spdk_nvme_ctrlr; +typedef struct spdk_nvme_ctrlr_opts t_spdk_nvme_ctrlr_opts; +typedef struct spdk_nvme_ns t_spdk_nvme_ns; +typedef struct spdk_nvme_qpair t_spdk_nvme_qpair; +typedef struct spdk_nvme_cpl t_spdk_nvme_cpl; +typedef struct spdk_nvme_zns_ns_data t_spdk_nvme_zns_ns_data; +typedef struct spdk_nvme_ns_data t_spdk_nvme_ns_data; +typedef struct spdk_nvme_zns_ctrlr_data t_spdk_nvme_zns_ctrlr_data; +typedef struct spdk_nvme_ctrlr_data t_spdk_nvme_ctrlr_data; + +#ifdef __cplusplus +extern "C" { +namespace SIMPLE_ZNS_DEVICE_NAMESPACE { +#endif + +#ifdef NDEBUG +// When no debugging, we require that no functions will use invalid nulled +// params. +#define RETURN_ERR_ON_NULL(x) \ + do { \ + } while (0) +#else +// TODO: unlikely and do while worth it? +#define RETURN_ERR_ON_NULL(x) \ + do { \ + if (spdk_unlikely((x) == NULL)) { \ + return (SZD_SC_NOT_ALLOCATED); \ + } \ + } while (0) +#endif + +// Needed because of DPDK and reattaching, we need to remember what we have +// seen... +static char *found_devices[MAX_DEVICE_COUNT]; +static size_t found_devices_len[MAX_DEVICE_COUNT]; +static size_t found_devices_number = 0; + +int szd_spdk_register_backend(EngineManager *em) { + ioengine_backend backend = { + .init = szd_spdk_init, + .destroy = szd_spdk_destroy, + .reinit = szd_spdk_reinit, + .probe = szd_spdk_probe, + .free_probe = szd_spdk_free_probe_information, + .open = szd_spdk_open, + .close = szd_spdk_close, + .get_device_info = szd_spdk_get_device_info, + .create_qpair = szd_spdk_create_qpair, + .destroy_qpair = szd_spdk_destroy_qpair, + .buf_calloc = szd_spdk_calloc, + .free = szd_spdk_free, + .read = szd_spdk_read, + .write = szd_spdk_write, + .append = szd_spdk_append, + .append_async = szd_spdk_append_async, + .poll_async = szd_spdk_poll_async, + .poll_once = szd_spdk_poll_once, + .poll_once_raw = szd_spdk_poll_once_raw, + .reset_zone = szd_spdk_reset, + .reset_all_zones = szd_spdk_reset_all, + .finish_zone = szd_spdk_finish_zone, + .get_zone_head = szd_spdk_get_zone_head, + .get_zone_heads = szd_spdk_get_zone_heads, + .get_zone_cap = szd_spdk_get_zone_cap, + }; + em->backend = backend; + return 0; +} + +int szd_spdk_init(DeviceManager **dm, DeviceOptions *options) { + // Setup options + struct spdk_env_opts opts; + if (options->setup_spdk) { + opts.name = options->name; + spdk_env_opts_init(&opts); + } + (*dm)->private_ = calloc(1, sizeof(SPDKManager)); + SPDKManager *man = (SPDKManager *)((*dm)->private_); + // Setup SPDK + man->g_trid = + (t_spdk_nvme_transport_id *)calloc(1, sizeof(t_spdk_nvme_transport_id)); + RETURN_ERR_ON_NULL(man->g_trid); + spdk_nvme_trid_populate_transport(man->g_trid, SPDK_NVME_TRANSPORT_PCIE); + if (spdk_unlikely(spdk_env_init(!options->setup_spdk ? NULL : &opts) < 0)) { + free(man->g_trid); + return SZD_SC_SPDK_ERROR_INIT; + } + // setup stub info, we do not want to create extra UB. + (*dm)->info = DeviceInfo_default; + (*dm)->info.name = options->name; + man->ctrlr = NULL; + man->ns = NULL; + return SZD_SC_SUCCESS; +} + +int szd_spdk_get_device_info(DeviceInfo *info, DeviceManager *man) { + SPDKManager *dm = (SPDKManager *)man->private_; + RETURN_ERR_ON_NULL(dm->ctrlr); + RETURN_ERR_ON_NULL(dm->ns); + info->lba_size = (uint64_t)spdk_nvme_ns_get_sector_size(dm->ns); + info->zone_size = (uint64_t)spdk_nvme_zns_ns_get_zone_size_sectors(dm->ns); + info->mdts = (uint64_t)spdk_nvme_ctrlr_get_max_xfer_size(dm->ctrlr); + info->zasl = + (uint64_t)spdk_nvme_zns_ctrlr_get_max_zone_append_size(dm->ctrlr); + info->lba_cap = (uint64_t)spdk_nvme_ns_get_num_sectors(dm->ns); + info->min_lba = man->info.min_lba; + info->max_lba = man->info.max_lba; + // printf("INFO: %lu %lu %lu %lu %lu %lu %lu \n", info->lba_size, + // info->zone_size, info->mdts, info->zasl, + // info->lba_cap, info->min_lba, info->max_lba); + // TODO: zone cap can differ between zones... + QPair **temp = (QPair **)calloc(1, sizeof(QPair *)); + szd_spdk_create_qpair(man, temp); + szd_spdk_get_zone_cap(*temp, info->min_lba, &info->zone_cap); + szd_spdk_destroy_qpair(man, *temp); + free(temp); + return SZD_SC_SUCCESS; +} + +bool __szd_spdk_open_probe_cb(void *cb_ctx, + const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) { + DeviceTarget *prober = (DeviceTarget *)cb_ctx; + if (!prober->traddr) { + return false; + } + // You trying to overflow? + if (strlen(prober->traddr) < prober->traddr_len) { + return false; + } + if (strlen((const char *)trid->traddr) < prober->traddr_len) { + return false; + } + if (strncmp((const char *)trid->traddr, prober->traddr, prober->traddr_len) != + 0) { + return false; + } + (void)opts; + return true; +} + +void __szd_spdk_open_attach_cb(void *cb_ctx, + const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, + const struct spdk_nvme_ctrlr_opts *opts) { + DeviceTarget *prober = (DeviceTarget *)cb_ctx; + if (prober == NULL) { + return; + } + SPDKManager *man = (SPDKManager *)(prober->manager->private_); + man->ctrlr = ctrlr; + // take any ZNS namespace, we do not care which. + for (int nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; + nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + if (spdk_nvme_ns_get_csi(ns) != SPDK_NVME_CSI_ZNS) { + continue; + } + man->ns = ns; + prober->found = true; + break; + } + (void)trid; + (void)opts; + return; +} + +void __szd_spdk_open_remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) { + (void)cb_ctx; + (void)ctrlr; +} + +int szd_spdk_open(DeviceManager *manager, const char *traddr, + DeviceOpenOptions *options) { + (void)options; + SPDKManager *man = (SPDKManager *)(manager->private_); + DeviceTarget prober = {.manager = manager, + .traddr = traddr, + .traddr_len = strlen(traddr), + .found = false}; + // This is needed because of DPDK not properly recognising reattached devices. + // So force traddr. + bool already_found_once = false; + for (size_t i = 0; i < found_devices_number; i++) { + if (found_devices_len[i] == strlen(traddr) && + memcmp(found_devices[i], traddr, found_devices_len[i])) { + already_found_once = true; + } + } + if (already_found_once) { + memset(man->g_trid, 0, sizeof(*(man->g_trid))); + spdk_nvme_trid_populate_transport(man->g_trid, SPDK_NVME_TRANSPORT_PCIE); + memcpy(man->g_trid->traddr, traddr, + spdk_min(strlen(traddr), sizeof(man->g_trid->traddr))); + } + // Find controller. + int probe_ctx; + probe_ctx = spdk_nvme_probe(man->g_trid, &prober, + (spdk_nvme_probe_cb)__szd_spdk_open_probe_cb, + (spdk_nvme_attach_cb)__szd_spdk_open_attach_cb, + (spdk_nvme_remove_cb)__szd_spdk_open_remove_cb); + // Dettach if broken. + if (probe_ctx != 0) { + if (man->ctrlr != NULL) { + return spdk_nvme_detach(man->ctrlr) || SZD_SC_SPDK_ERROR_OPEN; + } else { + return SZD_SC_SPDK_ERROR_OPEN; + } + } + if (!prober.found) { + if (man->ctrlr != NULL) { + return spdk_nvme_detach(man->ctrlr) || SZD_SC_SPDK_ERROR_OPEN; + } else { + return SZD_SC_SPDK_ERROR_OPEN; + } + } + return SZD_SC_SUCCESS; +} + +int szd_spdk_close(DeviceManager *manager) { + SPDKManager *man = (SPDKManager *)(manager->private_); + int rc = 0; + if (spdk_unlikely(man->ctrlr == NULL)) { + return SZD_SC_NOT_ALLOCATED; + } else { + rc = spdk_nvme_detach(man->ctrlr); + man->ctrlr = NULL; + man->ns = NULL; + // Prevents wrongly assuming a device is attached. + if (man->g_trid != NULL) { + memset(man->g_trid, 0, sizeof(*(man->g_trid))); + } + } + return rc != 0 ? SZD_SC_SPDK_ERROR_CLOSE : SZD_SC_SUCCESS; +} + +int szd_spdk_destroy(DeviceManager *manager) { + SPDKManager *man = (SPDKManager *)(manager->private_); + int rc = SZD_SC_SUCCESS; + if (man->ctrlr != NULL) { + rc = szd_spdk_close(manager); + } + if (man->g_trid != NULL) { + free(man->g_trid); + free(man); + manager->private_ = NULL; + } + spdk_env_fini(); + return rc; +} + +int szd_spdk_reinit(DeviceManager **manager) { + const char *name = (*manager)->info.name; + int rc = szd_spdk_destroy(*manager); + if (rc != 0) { + return SZD_SC_SPDK_ERROR_CLOSE; + } + DeviceOptions options = {.name = name, .setup_spdk = false}; + return szd_spdk_init(manager, &options); +} + +bool __szd_spdk_probe_probe_cb(void *cb_ctx, + const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr_opts *opts) { + (void)cb_ctx; + (void)trid; + (void)opts; + return true; +} + +void __szd_spdk_probe_attach_cb(void *cb_ctx, + const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, + const struct spdk_nvme_ctrlr_opts *opts) { + ProbeInformation *prober = (ProbeInformation *)cb_ctx; + // Very important lock! We probe concurrently and alter one struct. + pthread_mutex_lock(prober->mut); + if (prober->devices >= MAX_DEVICE_COUNT - 1) { + SPDK_ERRLOG("SZD: At the moment no more than %x devices are supported \n", + MAX_DEVICE_COUNT); + } else { + prober->traddr[prober->devices] = + (char *)calloc(strlen(trid->traddr) + 1, sizeof(char)); + memcpy(prober->traddr[prober->devices], trid->traddr, strlen(trid->traddr)); + prober->ctrlr[prober->devices] = ctrlr; + for (int nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; + nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + prober->zns[prober->devices] = + spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS; + } + prober->devices++; + // hidden global state... + bool found = false; + for (size_t i = 0; i < found_devices_number; i++) { + if (found_devices_len[i] == strlen(trid->traddr) && + memcmp(found_devices[i], trid->traddr, found_devices_len[i])) { + found = true; + } + } + if (!found) { + found_devices_len[found_devices_number] = strlen(trid->traddr); + found_devices[found_devices_number] = + (char *)calloc(found_devices_len[found_devices_number], sizeof(char)); + memcpy(found_devices[found_devices_number], trid->traddr, + found_devices_len[found_devices_number]); + found_devices_number++; + } + } + pthread_mutex_unlock(prober->mut); + (void)opts; +} + +int szd_spdk_probe(DeviceManager *manager, void **probe) { + RETURN_ERR_ON_NULL(manager); + RETURN_ERR_ON_NULL(manager->private_); + + SPDKManager *man = (SPDKManager *)(manager->private_); + RETURN_ERR_ON_NULL(probe); + ProbeInformation *p = (ProbeInformation *)calloc(1, sizeof(ProbeInformation)); + *probe = p; + RETURN_ERR_ON_NULL(*probe); + p->traddr = (char **)calloc(MAX_DEVICE_COUNT, sizeof(char *)); + p->ctrlr = (struct spdk_nvme_ctrlr **)calloc(MAX_DEVICE_COUNT, + sizeof(t_spdk_nvme_ctrlr *)); + p->zns = (bool *)calloc(MAX_DEVICE_COUNT, sizeof(bool)); + p->mut = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t)); + if (pthread_mutex_init(p->mut, NULL) != 0) { + return SZD_SC_SPDK_ERROR_PROBE; + } + int rc; + rc = spdk_nvme_probe(man->g_trid, *probe, + (spdk_nvme_probe_cb)__szd_spdk_probe_probe_cb, + (spdk_nvme_attach_cb)__szd_spdk_probe_attach_cb, NULL); + if (rc != 0) { + return SZD_SC_SPDK_ERROR_PROBE; + } + // Thread safe removing of devices, they have already been probed. + pthread_mutex_lock(p->mut); + for (size_t i = 0; i < p->devices; i++) { + // keep error message. + rc = spdk_nvme_detach(p->ctrlr[i]) | rc; + } + pthread_mutex_unlock(p->mut); + return rc != 0 ? SZD_SC_SPDK_ERROR_PROBE : SZD_SC_SUCCESS; +} + +void szd_spdk_free_probe_information(DeviceManager *manager, void *probe_info) { + (void)manager; + ProbeInformation *pi = (ProbeInformation *)probe_info; + free(pi->zns); + for (uint8_t i = 0; i < pi->devices; i++) { + free(pi->traddr[i]); + } + free(pi->traddr); + free(pi->ctrlr); + free(pi->mut); + free(pi); +} + +int szd_spdk_create_qpair(DeviceManager *man, QPair **qpair) { + SPDKManager *spdk_man = (SPDKManager *)(man->private_); + RETURN_ERR_ON_NULL(spdk_man->ctrlr); + RETURN_ERR_ON_NULL(qpair); + *qpair = (QPair *)calloc(1, sizeof(QPair)); + RETURN_ERR_ON_NULL(*qpair); + (*qpair)->qpair = spdk_nvme_ctrlr_alloc_io_qpair(spdk_man->ctrlr, NULL, 0); + (*qpair)->man = man; + RETURN_ERR_ON_NULL((*qpair)->qpair); + SZD_DTRACE_PROBE(szd_create_qpair); + return SZD_SC_SUCCESS; +} + +int szd_spdk_destroy_qpair(DeviceManager *man, QPair *qpair) { + (void)man; + RETURN_ERR_ON_NULL(qpair); + RETURN_ERR_ON_NULL(qpair->qpair); + spdk_nvme_ctrlr_free_io_qpair(qpair->qpair); + qpair->man = NULL; + free(qpair); + SZD_DTRACE_PROBE(szd_destroy_qpair); + return SZD_SC_SUCCESS; +} + +void *__reserve_dma(uint64_t size) { + return spdk_zmalloc(size, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); +} + +void *szd_spdk_calloc(uint64_t __allign, size_t __nmemb, size_t __size) { + size_t expanded_size = __nmemb * __size; + if (spdk_unlikely(expanded_size % __allign != 0 || __allign == 0)) { + return NULL; + } + return spdk_zmalloc(expanded_size, __allign, NULL, SPDK_ENV_SOCKET_ID_ANY, + SPDK_MALLOC_DMA); +} + +void szd_spdk_free(void *buffer) { spdk_free(buffer); } + +void __spdk_operation_complete(void *arg, + const struct spdk_nvme_cpl *completion) { + Completion *completed = (Completion *)arg; + completed->done = true; + // force non error to always be 0. + completed->err = + spdk_nvme_cpl_is_error(completion) ? completion->status.sc : 0x00; +} + +void __spdk_append_complete(void *arg, const struct spdk_nvme_cpl *completion) { + __spdk_operation_complete(arg, completion); +} + +void __spdk_read_complete(void *arg, const struct spdk_nvme_cpl *completion) { + __spdk_operation_complete(arg, completion); +} + +void __spdk_reset_zone_complete(void *arg, + const struct spdk_nvme_cpl *completion) { + __spdk_operation_complete(arg, completion); +} + +void __spdk_finish_zone_complete(void *arg, + const struct spdk_nvme_cpl *completion) { + __spdk_operation_complete(arg, completion); +} + +void __spdk_get_zone_head_complete(void *arg, + const struct spdk_nvme_cpl *completion) { + __spdk_operation_complete(arg, completion); +} + +#define POLL_QPAIR(qpair, target) \ + do { \ + spdk_nvme_qpair_process_completions((qpair), 0); \ + } while (!(target)) + +int szd_spdk_read(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, + uint64_t blocks) { + (void)size; + RETURN_ERR_ON_NULL(qpair); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + int rc = SZD_SC_SUCCESS; + + Completion completion; + completion.done = false; + completion.err = 0x00; + rc = spdk_nvme_ns_cmd_read(spdk_man->ns, qpair->qpair, buffer, + lba, /* LBA start */ + blocks, /* number of LBAs */ + __spdk_read_complete, &completion, 0); + if (spdk_unlikely(rc != 0)) { + return SZD_SC_SPDK_ERROR_READ; + } + // Synchronous reads, busy wait. + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + return SZD_SC_SPDK_ERROR_READ; + } + return SZD_SC_SUCCESS; +} + +int szd_spdk_append(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, + uint64_t blocks) { + (void)size; + RETURN_ERR_ON_NULL(qpair); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + int rc = SZD_SC_SUCCESS; + + Completion completion; + completion.done = false; + completion.err = 0x00; + + rc = spdk_nvme_zns_zone_append(spdk_man->ns, qpair->qpair, buffer, + lba, /* LBA start */ + blocks, /* number of LBAs */ + __spdk_append_complete, &completion, 0); + if (spdk_unlikely(rc != 0)) { + SPDK_ERRLOG("SZD: Error creating append request\n"); + return SZD_SC_SPDK_ERROR_APPEND; + } + // Synchronous write, busy wait. + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + SPDK_ERRLOG("SZD: Error during append %x\n", completion.err); + return SZD_SC_SPDK_ERROR_APPEND; + } + // To the next zone we go + return SZD_SC_SUCCESS; +} + +int szd_spdk_append_async(QPair *qpair, uint64_t lba, void *buffer, + uint64_t size, uint64_t blocks, + Completion *completion) { + (void)size; + RETURN_ERR_ON_NULL(qpair); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + int rc = SZD_SC_SUCCESS; + + *completion = Completion_default; + completion->done = false; + completion->err = 0x00; + rc = spdk_nvme_zns_zone_append(spdk_man->ns, qpair->qpair, buffer, + lba, /* LBA start */ + blocks, /* number of LBAs */ + __spdk_append_complete, completion, 0); + if (spdk_unlikely(rc != 0)) { + SPDK_ERRLOG("SZD: Error creating append request\n"); + return SZD_SC_SPDK_ERROR_APPEND; + } + return SZD_SC_SUCCESS; +} + +int szd_spdk_write(QPair *qpair, uint64_t lba, void *buffer, uint64_t size, + uint64_t blocks) { + (void)size; + RETURN_ERR_ON_NULL(qpair); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + int rc = SZD_SC_SUCCESS; + + Completion completion; + completion.done = false; + completion.err = 0x00; + rc = spdk_nvme_ns_cmd_write(spdk_man->ns, qpair->qpair, buffer, + lba, /* LBA start */ + blocks, /* number of LBAs */ + __spdk_append_complete, &completion, 0); + if (spdk_unlikely(rc != 0)) { + SPDK_ERRLOG("SZD: Error creating write request\n"); + return SZD_SC_SPDK_ERROR_APPEND; + } + // Synchronous write, busy wait. + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + return SZD_SC_SPDK_ERROR_WRITE; + } + return SZD_SC_SUCCESS; +} + +int szd_spdk_poll_async(QPair *qpair, Completion *completion) { + POLL_QPAIR(qpair->qpair, completion->done); + if (spdk_unlikely(completion->err != 0)) { + SPDK_ERRLOG("SZD: Error during polling - code:%x\n", completion->err); + return SZD_SC_SPDK_ERROR_POLLING; + } + return SZD_SC_SUCCESS; +} + +int szd_spdk_poll_once(QPair *qpair, Completion *completion) { + if (!completion->done) { + spdk_nvme_qpair_process_completions(qpair->qpair, 0); + } + if (spdk_unlikely(completion->err != 0)) { + SPDK_ERRLOG("SZD: Error during polling once - code:%x\n", completion->err); + return SZD_SC_SPDK_ERROR_POLLING; + } + return SZD_SC_SUCCESS; +} + +void szd_spdk_poll_once_raw(QPair *qpair) { + spdk_nvme_qpair_process_completions(qpair->qpair, 0); +} + +int szd_spdk_reset(QPair *qpair, uint64_t slba) { + RETURN_ERR_ON_NULL(qpair); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + // Otherwise we have an out of range. + DeviceInfo info = qpair->man->info; + if (spdk_unlikely(slba < info.min_lba || slba >= info.lba_cap)) { + return SZD_SC_SPDK_ERROR_READ; + } + Completion completion = Completion_default; + int rc = spdk_nvme_zns_reset_zone( + spdk_man->ns, qpair->qpair, slba, /* starting LBA of the zone to reset */ + false, /* don't reset all zones */ + __spdk_reset_zone_complete, &completion); + if (spdk_unlikely(rc != 0)) { + return SZD_SC_SPDK_ERROR_RESET; + } + // Busy wait + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + SPDK_ERRLOG("SZD: Reset error - code:%x \n", completion.err); + return SZD_SC_SPDK_ERROR_RESET; + } + return rc; +} + +int szd_spdk_reset_all(QPair *qpair) { + RETURN_ERR_ON_NULL(qpair); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + + Completion completion = Completion_default; + int rc = spdk_nvme_zns_reset_zone(spdk_man->ns, qpair->qpair, + 0, /* starting LBA of the zone to reset */ + true, /* reset all zones */ + __spdk_reset_zone_complete, &completion); + if (spdk_unlikely(rc != 0)) { + return SZD_SC_SPDK_ERROR_RESET; + } + // Busy wait + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + return SZD_SC_SPDK_ERROR_RESET; + } + return rc; +} + +int szd_spdk_finish_zone(QPair *qpair, uint64_t slba) { + RETURN_ERR_ON_NULL(qpair); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + + Completion completion = Completion_default; + int rc = spdk_nvme_zns_finish_zone( + spdk_man->ns, qpair->qpair, slba, /* starting LBA of the zone to finish */ + false, /* don't finish all zones */ + __spdk_finish_zone_complete, &completion); + if (spdk_unlikely(rc != 0)) { + return SZD_SC_SPDK_ERROR_FINISH; + } + // Busy wait + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + return SZD_SC_SPDK_ERROR_FINISH; + } + return rc; +} + +int szd_spdk_get_zone_heads(QPair *qpair, uint64_t slba, uint64_t eslba, + uint64_t *write_head) { + // Inspired by SPDK/nvme/identify.c + RETURN_ERR_ON_NULL(qpair); + RETURN_ERR_ON_NULL(qpair->man); + // Otherwise we have an out of range. + DeviceInfo info = qpair->man->info; + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + int rc = SZD_SC_SUCCESS; + + // Setup state variables + size_t report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(spdk_man->ns); + uint8_t *report_buf = (uint8_t *)calloc(1, report_bufsize); + uint64_t reported_zones = 0; + uint64_t zones_to_report = (eslba - slba) / info.zone_size; + struct spdk_nvme_zns_zone_report *zns_report; + + // Setup logical variables + const struct spdk_nvme_ns_data *nsdata = spdk_nvme_ns_get_data(spdk_man->ns); + const struct spdk_nvme_zns_ns_data *nsdata_zns = + spdk_nvme_zns_ns_get_data(spdk_man->ns); + uint64_t zone_report_size = sizeof(struct spdk_nvme_zns_zone_report); + uint64_t zone_descriptor_size = sizeof(struct spdk_nvme_zns_zone_desc); + uint64_t zns_descriptor_size = + nsdata_zns->lbafe[nsdata->flbas.format].zdes * 64; + uint64_t max_zones_per_buf = + zns_descriptor_size + ? (report_bufsize - zone_report_size) / + (zone_descriptor_size + zns_descriptor_size) + : (report_bufsize - zone_report_size) / zone_descriptor_size; + + // Get zone heads iteratively + do { + memset(report_buf, 0, report_bufsize); + // Get as much as we can from SPDK + Completion completion = Completion_default; + rc = spdk_nvme_zns_report_zones(spdk_man->ns, qpair->qpair, report_buf, + report_bufsize, slba, + SPDK_NVME_ZRA_LIST_ALL, true, + __spdk_get_zone_head_complete, &completion); + if (spdk_unlikely(rc != 0)) { + free(report_buf); + return SZD_SC_SPDK_ERROR_REPORT_ZONES; + } + // Busy wait for the head. + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + free(report_buf); + return SZD_SC_SPDK_ERROR_REPORT_ZONES; + } + + // retrieve nr_zones + zns_report = (struct spdk_nvme_zns_zone_report *)report_buf; + uint64_t nr_zones = zns_report->nr_zones; + if (nr_zones > max_zones_per_buf || nr_zones == 0) { + free(report_buf); + return SZD_SC_SPDK_ERROR_REPORT_ZONES; + } + + // Retrieve write heads from zone information. + for (uint64_t i = 0; i < nr_zones && reported_zones <= zones_to_report; + i++) { + struct spdk_nvme_zns_zone_desc *desc = &zns_report->descs[i]; + write_head[reported_zones] = desc->wp; + if (spdk_unlikely(write_head[reported_zones] < slba)) { + free(report_buf); + return SZD_SC_SPDK_ERROR_REPORT_ZONES; + } + if (write_head[reported_zones] > slba + desc->zcap) { + write_head[reported_zones] = slba + info.zone_size; + } + // progress + slba += info.zone_size; + reported_zones++; + } + } while (reported_zones < zones_to_report); + free(report_buf); + return SZD_SC_SUCCESS; +} + +int szd_spdk_get_zone_head(QPair *qpair, uint64_t slba, uint64_t *write_head) { + return szd_spdk_get_zone_heads(qpair, slba, slba, write_head); +} + +int szd_spdk_get_zone_cap(QPair *qpair, uint64_t slba, uint64_t *zone_cap) { + RETURN_ERR_ON_NULL(qpair); + RETURN_ERR_ON_NULL(qpair->man); + SPDKManager *spdk_man = (SPDKManager *)(qpair->man->private_); + // Otherwise we have an out of range. + DeviceInfo info = qpair->man->info; + if (spdk_unlikely(slba < info.min_lba || slba > info.max_lba)) { + return SZD_SC_SPDK_ERROR_READ; + } + + int rc = SZD_SC_SUCCESS; + // Get information from a zone. + size_t report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(spdk_man->ns); + uint8_t *report_buf = (uint8_t *)calloc(1, report_bufsize); + { + Completion completion = Completion_default; + rc = spdk_nvme_zns_report_zones(spdk_man->ns, qpair->qpair, report_buf, + report_bufsize, slba, + SPDK_NVME_ZRA_LIST_ALL, true, + __spdk_get_zone_head_complete, &completion); + if (spdk_unlikely(rc != 0)) { + free(report_buf); + return SZD_SC_SPDK_ERROR_REPORT_ZONES; + } + // Busy wait for the head. + POLL_QPAIR(qpair->qpair, completion.done); + if (spdk_unlikely(completion.err != 0)) { + free(report_buf); + return SZD_SC_SPDK_ERROR_REPORT_ZONES; + } + } + // Retrieve write head from zone information. + uint32_t zd_index = sizeof(struct spdk_nvme_zns_zone_report); + struct spdk_nvme_zns_zone_desc *desc = + (struct spdk_nvme_zns_zone_desc *)(report_buf + zd_index); + *zone_cap = desc->zcap; + free(report_buf); + return SZD_SC_SUCCESS; +} + +#ifdef __cplusplus +} +} // namespace SimpleZNSDeviceNamespace +#endif diff --git a/szd/core/tests/szd_full_path_test.c b/szd/core/tests/szd_full_path_test.c index f2874ce..84929f6 100644 --- a/szd/core/tests/szd_full_path_test.c +++ b/szd/core/tests/szd_full_path_test.c @@ -41,11 +41,13 @@ extern "C" { #define VALID(rc) assert((rc) == 0) #define INVALID(rc) assert((rc) != 0) -int write_pattern(char **pattern, QPair *qpair, int32_t size, int32_t jump) { +int write_pattern(EngineManager *manager, char **pattern, QPair *qpair, + int32_t size, int32_t jump) { // if (*pattern != NULL) { // szd_free(*pattern); // } - *pattern = (char *)szd_calloc(qpair->man->info.lba_size, size, sizeof(char)); + *pattern = (char *)szd_calloc(manager, qpair->man->info.lba_size, size, + sizeof(char)); if (*pattern == NULL) { return 1; } @@ -56,7 +58,7 @@ int write_pattern(char **pattern, QPair *qpair, int32_t size, int32_t jump) { } typedef struct { - DeviceManager **manager; + EngineManager **manager; uint64_t write_slba_start; uint64_t alt_slba_start; int32_t data_offset; @@ -77,7 +79,7 @@ if they interfere. Hence the need for a barrier and a mutex. */ void *worker_thread(void *arg) { thread_data *dat = (thread_data *)arg; - DeviceManager **manager = dat->manager; + EngineManager **manager = dat->manager; int rc; QPair **qpair = (QPair **)calloc(1, sizeof(QPair *)); rc = szd_create_qpair(*manager, qpair); @@ -87,16 +89,17 @@ void *worker_thread(void *arg) { pthread_exit(NULL); } uint64_t zone_size_bytes = - (*manager)->info.lba_size * (*manager)->info.zone_cap; + (*manager)->manager_->info.lba_size * (*manager)->manager_->info.zone_cap; char **pattern_1 = (char **)calloc(1, sizeof(char **)); - rc = write_pattern(pattern_1, *qpair, zone_size_bytes, dat->data_offset); + rc = write_pattern(*manager, pattern_1, *qpair, zone_size_bytes, + dat->data_offset); if (rc != 0) { PLUS_THREAD_BARRIER(mut, thread_barrier); dat->rc = rc; pthread_exit(NULL); } - char *pattern_read_1 = (char *)szd_calloc((*qpair)->man->info.lba_size, - zone_size_bytes, sizeof(char *)); + char *pattern_read_1 = (char *)szd_calloc( + *manager, (*qpair)->man->info.lba_size, zone_size_bytes, sizeof(char *)); if (pattern_read_1 == NULL) { PLUS_THREAD_BARRIER(mut, thread_barrier); dat->rc = rc; @@ -105,13 +108,13 @@ void *worker_thread(void *arg) { // hammering for (uint16_t i = 0; i < 200; i++) { uint64_t wstart = dat->write_slba_start; - rc = szd_append(*qpair, &wstart, *pattern_1, zone_size_bytes); + rc = szd_append(*manager, *qpair, &wstart, *pattern_1, zone_size_bytes); if (rc != 0) { PLUS_THREAD_BARRIER(mut, thread_barrier); dat->rc = rc; pthread_exit(NULL); } - rc = szd_read(*qpair, dat->write_slba_start, pattern_read_1, + rc = szd_read(*manager, *qpair, dat->write_slba_start, pattern_read_1, zone_size_bytes); if (rc != 0) { PLUS_THREAD_BARRIER(mut, thread_barrier); @@ -124,7 +127,7 @@ void *worker_thread(void *arg) { pthread_exit(NULL); } if (i != 199) { - rc = szd_reset(*qpair, dat->write_slba_start); + rc = szd_reset(*manager, *qpair, dat->write_slba_start); if (rc != 0) { PLUS_THREAD_BARRIER(mut, thread_barrier); dat->rc = rc; @@ -141,12 +144,14 @@ void *worker_thread(void *arg) { } pthread_mutex_unlock(&mut); - szd_free(pattern_read_1); - pattern_read_1 = (char *)szd_calloc((*qpair)->man->info.lba_size, + szd_free(*manager, pattern_read_1); + pattern_read_1 = (char *)szd_calloc(*manager, (*qpair)->man->info.lba_size, zone_size_bytes, sizeof(char *)); - szd_free(*pattern_1); - rc = write_pattern(pattern_1, *qpair, zone_size_bytes, dat->alt_offset); - rc = szd_read(*qpair, dat->alt_slba_start, pattern_read_1, zone_size_bytes); + szd_free(*manager, *pattern_1); + rc = write_pattern(*manager, pattern_1, *qpair, zone_size_bytes, + dat->alt_offset); + rc = szd_read(*manager, *qpair, dat->alt_slba_start, pattern_read_1, + zone_size_bytes); if (rc != 0) { dat->rc = rc; pthread_exit(NULL); @@ -157,60 +162,73 @@ void *worker_thread(void *arg) { pthread_exit(NULL); } dat->rc = rc; - szd_destroy_qpair(*qpair); + szd_free(*manager, pattern_read_1); + szd_destroy_qpair(*manager, *qpair); free(qpair); free(pattern_1); pthread_exit(NULL); } +typedef struct { + char **traddr; /**< transport ids of all probed devices.*/ + bool *zns; /**< Foreach probed device, is it a ZNS device?*/ + struct spdk_nvme_ctrlr **ctrlr; /**< The controller(s) of the devices.*/ + uint8_t devices; /**< Used to identify global device count.*/ + pthread_mutex_t *mut; /**< Ensures that probe information is thread safe.*/ +} ProbeInformation; + int main(void) { int rc; printf("----------------------INIT----------------------\n"); uint64_t min_zone = 2, max_zone = 10; DeviceOpenOptions open_opts = {min_zone, max_zone}; - DeviceManager **manager = (DeviceManager **)calloc(1, sizeof(DeviceManager)); + EngineManager *man; + EngineManager **manager = &man; DeviceOptions opts = DeviceOptions_default; - rc = szd_init(manager, &opts); + rc = szd_init(manager, &opts, SZD_IO_BACKEND_IO_URING); DEBUG_TEST_PRINT("SPDK init ", rc); VALID(rc); // find devices printf("----------------------PROBE----------------------\n"); - char *device_to_use = NULL; - ProbeInformation **prober = - (ProbeInformation **)calloc(1, sizeof(ProbeInformation *)); - rc = szd_probe(*manager, prober); - DEBUG_TEST_PRINT("probe return code ", rc); - VALID(rc); - for (int i = 0; i < (*prober)->devices; i++) { - const char *is_zns = (*prober)->zns[i] ? "true" : "false"; - printf("Device found\n\tname:%s\n\tZNS device:%s\n", (*prober)->traddr[i], - is_zns); - if ((*prober)->zns[i]) { - if (device_to_use) { - free(device_to_use); - } - device_to_use = - (char *)calloc(strlen((*prober)->traddr[i]) + 1, sizeof(char)); - memcpy(device_to_use, (*prober)->traddr[i], strlen((*prober)->traddr[i])); - } - } - // dangerous! we must be absolutely sure that no other process is using this - // anymore. - szd_free_probe_information(*prober); - free(prober); - if (!device_to_use) { - printf("No ZNS Device found.\n Are you sure you have a ZNS device " - "connected?\n"); - assert(false); - } - printf("ZNS device %s found. This device will be used for the rest of the " - "test.\n", - device_to_use); + // char *device_to_use = NULL; + // ProbeInformation **prober = + // (ProbeInformation **)calloc(1, sizeof(ProbeInformation *)); + // rc = szd_probe(*manager, (void **)prober); + // DEBUG_TEST_PRINT("probe return code ", rc); + // VALID(rc); + // for (int i = 0; i < (*prober)->devices; i++) { + // const char *is_zns = (*prober)->zns[i] ? "true" : "false"; + // printf("Device found\n\tname:%s\n\tZNS device:%s\n", + // (*prober)->traddr[i], + // is_zns); + // if ((*prober)->zns[i]) { + // if (device_to_use) { + // free(device_to_use); + // } + // device_to_use = + // (char *)calloc(strlen((*prober)->traddr[i]) + 1, sizeof(char)); + // memcpy(device_to_use, (*prober)->traddr[i], + // strlen((*prober)->traddr[i])); + // } + // } + // // dangerous! we must be absolutely sure that no other process is using + // this + // // anymore. + // szd_free_probe_information(*manager, *prober); + // free(prober); + // if (!device_to_use) { + // printf("No ZNS Device found.\n Are you sure you have a ZNS device " + // "connected?\n"); + // assert(false); + // } + // printf("ZNS device %s found. This device will be used for the rest of the " + // "test.\n", + // device_to_use); - rc = szd_reinit(manager); - DEBUG_TEST_PRINT("reinit return code ", rc); - VALID(rc); + // rc = szd_reinit(manager); + // DEBUG_TEST_PRINT("reinit return code ", rc); + // VALID(rc); // init spdk printf("----------------------OPENING DEVICE----------------------\n"); @@ -220,20 +238,19 @@ int main(void) { INVALID(rc); // try existing device - rc = szd_open(*manager, device_to_use, &open_opts); + rc = szd_open(*manager, "/dev/ng5n1", &open_opts); DEBUG_TEST_PRINT("existing return code ", rc); VALID(rc); - free(device_to_use); // ensure that everything from this device is OK - assert((*manager)->ctrlr != NULL); - assert((*manager)->ns != NULL); - assert((*manager)->info.lba_size > 0); - assert((*manager)->info.mdts > 0); - assert((*manager)->info.zasl > 0); - assert((*manager)->info.zone_size > 0); - assert((*manager)->info.zone_cap > 0); - assert((*manager)->info.lba_cap > 0); + // assert((*manager)->manager_->private_->ctrlr != NULL); + // assert((*manager)->ns != NULL); + assert((*manager)->manager_->info.lba_size > 0); + assert((*manager)->manager_->info.mdts > 0); + assert((*manager)->manager_->info.zasl > 0); + assert((*manager)->manager_->info.zone_size > 0); + assert((*manager)->manager_->info.zone_cap > 0); + assert((*manager)->manager_->info.lba_cap > 0); // create qpair QPair **qpair = (QPair **)calloc(1, sizeof(QPair *)); @@ -244,7 +261,7 @@ int main(void) { // get and verify data (based on ZNS QEMU image) DeviceInfo info = {}; - rc = szd_get_device_info(&info, *manager); + rc = szd_get_device_info(*manager, &info); DEBUG_TEST_PRINT("get info code ", rc); VALID(rc); printf("lba size is %ld\n", info.lba_size); @@ -253,204 +270,231 @@ int main(void) { printf("mdts is %ld\n", info.mdts); printf("zasl is %ld\n", info.zasl); printf("lba_cap is %ld\n", info.lba_cap); + info.min_lba = min_zone * info.zone_size; printf("min lba is %ld\n", info.min_lba); + info.max_lba = max_zone * info.zone_size; printf("max lba is %ld\n", info.max_lba); uint64_t write_head; uint64_t append_head; printf("----------------------WORKLOAD SMALL----------------------\n"); // make space by resetting the device zones - rc = szd_reset_all(*qpair); + rc = szd_reset_all(*manager, *qpair); DEBUG_TEST_PRINT("reset all code ", rc); VALID(rc); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + rc = szd_get_zone_head(*manager, *qpair, min_zone * info.zone_size, + &write_head); DEBUG_TEST_PRINT("min zone head ", rc); VALID(rc); assert(write_head == min_zone * info.zone_size); char **pattern_1 = (char **)calloc(1, sizeof(char **)); - rc = write_pattern(pattern_1, *qpair, info.lba_size, 10); + rc = write_pattern(*manager, pattern_1, *qpair, info.lba_size, 10); VALID(rc); append_head = min_zone * info.zone_size; - rc = szd_append(*qpair, &append_head, *pattern_1, info.lba_size); + printf("HEKKIE %lu \n", append_head); + rc = szd_append(*manager, *qpair, &append_head, *pattern_1, info.lba_size); DEBUG_TEST_PRINT("append alligned ", rc); VALID(rc); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + rc = szd_get_zone_head(*manager, *qpair, min_zone * info.zone_size, + &write_head); + printf("HEKKIE %lu %lu\n", append_head, write_head); VALID(rc); assert(write_head == min_zone * info.zone_size + 1); char **pattern_2 = (char **)calloc(1, sizeof(char **)); - rc = write_pattern(pattern_2, *qpair, info.zasl, 13); + rc = write_pattern(*manager, pattern_2, *qpair, info.zasl, 13); VALID(rc); - rc = szd_append(*qpair, &append_head, *pattern_2, info.zasl); + rc = szd_append(*manager, *qpair, &append_head, *pattern_2, info.zasl); DEBUG_TEST_PRINT("append zasl ", rc); VALID(rc); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + rc = szd_get_zone_head(*manager, *qpair, min_zone * info.zone_size, + &write_head); VALID(rc); + printf("HEKKIE %lu %lu\n", + min_zone * info.zone_size + 1 + info.zasl / info.lba_size, write_head); assert(write_head == min_zone * info.zone_size + 1 + info.zasl / info.lba_size); - char *pattern_read_1 = (char *)szd_calloc((*qpair)->man->info.lba_size, - info.lba_size, sizeof(char *)); - rc = szd_read(*qpair, min_zone * info.zone_size, pattern_read_1, + char *pattern_read_1 = (char *)szd_calloc( + *manager, (*qpair)->man->info.lba_size, info.lba_size, sizeof(char *)); + rc = szd_read(*manager, *qpair, min_zone * info.zone_size, pattern_read_1, info.lba_size); DEBUG_TEST_PRINT("read alligned ", rc); VALID(rc); for (uint64_t i = 0; i < info.lba_size; i++) { assert((char)(pattern_read_1)[i] == (char)(*pattern_1)[i]); } - szd_free(*pattern_1); - szd_free(pattern_read_1); - char *pattern_read_2 = (char *)szd_calloc((*qpair)->man->info.lba_size, - info.zasl, sizeof(char *)); - rc = szd_read(*qpair, min_zone * info.zone_size + 1, pattern_read_2, + szd_free(*manager, *pattern_1); + szd_free(*manager, pattern_read_1); + char *pattern_read_2 = (char *)szd_calloc( + *manager, (*qpair)->man->info.lba_size, info.zasl, sizeof(char *)); + rc = szd_read(*manager, *qpair, min_zone * info.zone_size + 1, pattern_read_2, info.zasl); DEBUG_TEST_PRINT("read zasl ", rc); VALID(rc); for (uint64_t i = 0; i < info.zasl; i++) { assert((char)(pattern_read_2)[i] == (char)(*pattern_2)[i]); } - szd_free(*pattern_2); - rc = szd_reset_all(*qpair); + szd_free(*manager, *pattern_2); + rc = szd_reset_all(*manager, *qpair); DEBUG_TEST_PRINT("reset all ", rc); VALID(rc); - rc = szd_read(*qpair, min_zone * info.zone_size + 1, pattern_read_2, + rc = szd_read(*manager, *qpair, min_zone * info.zone_size + 1, pattern_read_2, info.zasl); DEBUG_TEST_PRINT("verify empty first zone ", rc); VALID(rc); for (uint64_t i = 0; i < info.zasl; i++) { assert((char)(pattern_read_2)[i] == 0); } - szd_free(pattern_read_2); + szd_free(*manager, pattern_read_2); append_head = min_zone * info.zone_size; printf("----------------------WORKLOAD FILL----------------------\n"); uint64_t number_of_zones = (info.max_lba - info.min_lba) / info.zone_size; char **pattern_3 = (char **)calloc(1, sizeof(char **)); - rc = write_pattern(pattern_3, *qpair, + rc = write_pattern(*manager, pattern_3, *qpair, info.lba_size * number_of_zones * info.zone_cap, 19); VALID(rc); - rc = szd_append(*qpair, &append_head, *pattern_3, + rc = szd_append(*manager, *qpair, &append_head, *pattern_3, info.lba_size * number_of_zones * info.zone_cap); DEBUG_TEST_PRINT("fill entire device ", rc); VALID(rc); for (uint64_t i = info.min_lba; i < info.max_lba; i += info.zone_size) { - rc = szd_get_zone_head(*qpair, i, &write_head); + rc = szd_get_zone_head(*manager, *qpair, i, &write_head); VALID(rc); assert(write_head == i + info.zone_size); + printf("HEKKIE %lu %lu\n", i + info.zone_size, write_head); } - szd_free(*pattern_3); char *pattern_read_3 = (char *)szd_calloc( - (*qpair)->man->info.lba_size, + *manager, (*qpair)->man->info.lba_size, info.lba_size * number_of_zones * info.zone_cap, sizeof(char *)); assert(pattern_read_3 != NULL); - rc = szd_read(*qpair, min_zone * info.zone_size, pattern_read_3, + rc = szd_read(*manager, *qpair, min_zone * info.zone_size, pattern_read_3, info.lba_size * number_of_zones * info.zone_cap); DEBUG_TEST_PRINT("read entire device ", rc); VALID(rc); for (uint64_t i = 0; i < info.lba_size * number_of_zones * info.zone_cap; i++) { + if ((char)(pattern_read_3)[i] != (char)(*pattern_3)[i]) + printf("LU %lu / %lu\n", i, + info.lba_size * number_of_zones * info.zone_cap); assert((char)(pattern_read_3)[i] == (char)(*pattern_3)[i]); } - szd_free(pattern_read_3); - rc = szd_reset(*qpair, min_zone * info.zone_size + info.zone_size); - rc = szd_reset(*qpair, min_zone * info.zone_size + info.zone_size * 2) | rc; + szd_free(*manager, pattern_read_3); + rc = szd_reset(*manager, *qpair, min_zone * info.zone_size + info.zone_size); + rc = szd_reset(*manager, *qpair, + min_zone * info.zone_size + info.zone_size * 2) | + rc; DEBUG_TEST_PRINT("reset zone 2,3 ", rc); VALID(rc); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + rc = szd_get_zone_head(*manager, *qpair, min_zone * info.zone_size, + &write_head); VALID(rc); + printf("HEKKIE %lu %lu\n", min_zone * info.zone_size + info.zone_size, + write_head); assert(write_head == min_zone * info.zone_size + info.zone_size); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size, + rc = szd_get_zone_head(*manager, *qpair, + min_zone * info.zone_size + info.zone_size, &write_head); VALID(rc); assert(write_head == min_zone * info.zone_size + info.zone_size); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size * 2, + rc = szd_get_zone_head(*manager, *qpair, + min_zone * info.zone_size + info.zone_size * 2, &write_head); VALID(rc); assert(write_head == min_zone * info.zone_size + info.zone_size * 2); char *pattern_read_4 = - (char *)szd_calloc((*qpair)->man->info.lba_size, + (char *)szd_calloc(*manager, (*qpair)->man->info.lba_size, info.lba_size * info.zone_cap, sizeof(char *)); - rc = szd_read(*qpair, info.zone_size * min_zone, pattern_read_4, + rc = szd_read(*manager, *qpair, info.zone_size * min_zone, pattern_read_4, info.lba_size * info.zone_cap); DEBUG_TEST_PRINT("read zone 1 ", rc); VALID(rc); for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { assert((char)(pattern_read_4)[i] == (char)(*pattern_3)[i]); } - rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_size, + szd_free(*manager, *pattern_3); + rc = szd_read(*manager, *qpair, min_zone * info.zone_size + info.zone_size, pattern_read_4, info.lba_size * info.zone_cap); DEBUG_TEST_PRINT("read zone 2 ", rc); VALID(rc); for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { assert((char)(pattern_read_4)[i] == 0); } - rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_size * 2, - pattern_read_4, info.lba_size * info.zone_cap); + rc = + szd_read(*manager, *qpair, min_zone * info.zone_size + info.zone_size * 2, + pattern_read_4, info.lba_size * info.zone_cap); DEBUG_TEST_PRINT("read zone 3 ", rc); VALID(rc); for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { assert((char)(pattern_read_4)[i] == 0); } - rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_size * 3, - pattern_read_4, info.lba_size * info.zone_cap); + rc = + szd_read(*manager, *qpair, min_zone * info.zone_size + info.zone_size * 3, + pattern_read_4, info.lba_size * info.zone_cap); DEBUG_TEST_PRINT("read zone 4 ", rc); VALID(rc); // This ugly loop is necessary to prevent over-allocating DMA. We only want // one lba at a time. - rc = write_pattern(pattern_3, *qpair, info.lba_size, + rc = write_pattern(*manager, pattern_3, *qpair, info.lba_size, 19 + info.zone_cap * 3 * info.lba_size); for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { if (i % info.lba_size == 0 && i > 0) { - szd_free(*pattern_3); - rc = write_pattern(pattern_3, *qpair, info.lba_size, + szd_free(*manager, *pattern_3); + rc = write_pattern(*manager, pattern_3, *qpair, info.lba_size, 19 + i + info.zone_cap * 3 * info.lba_size); VALID(rc); } assert((char)(pattern_read_4)[i] == (char)(*pattern_3)[i % info.lba_size]); } - rc = szd_reset_all(*qpair); + rc = szd_reset_all(*manager, *qpair); DEBUG_TEST_PRINT("reset all ", rc); VALID(rc); append_head = min_zone * info.zone_size; printf("----------------------WORKLOAD ZONE EDGE----------------------\n"); - rc = write_pattern(pattern_3, *qpair, info.lba_size * info.zone_cap * 2, 19); - rc = szd_append(*qpair, &append_head, *pattern_3, + rc = write_pattern(*manager, pattern_3, *qpair, + info.lba_size * info.zone_cap * 2, 19); + rc = szd_append(*manager, *qpair, &append_head, *pattern_3, info.lba_size * (info.zone_cap - 3)); DEBUG_TEST_PRINT("zone friction part 1: append 1 zoneborder - 3 ", rc); VALID(rc); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + rc = szd_get_zone_head(*manager, *qpair, min_zone * info.zone_size, + &write_head); VALID(rc); assert(write_head == min_zone * info.zone_size + info.zone_cap - 3); - rc = szd_append(*qpair, &append_head, + rc = szd_append(*manager, *qpair, &append_head, *pattern_3 + info.lba_size * (info.zone_cap - 3), info.lba_size * 6); DEBUG_TEST_PRINT("zone friction part 2: append 1 zoneborder + 6 ", rc); VALID(rc); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + rc = szd_get_zone_head(*manager, *qpair, min_zone * info.zone_size, + &write_head); VALID(rc); assert(write_head == min_zone * info.zone_size + info.zone_size); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size, + rc = szd_get_zone_head(*manager, *qpair, + min_zone * info.zone_size + info.zone_size, &write_head); VALID(rc); assert(write_head == min_zone * info.zone_size + info.zone_size + 3); - rc = szd_append(*qpair, &append_head, + rc = szd_append(*manager, *qpair, &append_head, *pattern_3 + info.lba_size * (info.zone_cap + 3), info.lba_size * 13); DEBUG_TEST_PRINT("zone friction part 3: append 1 zoneborder + 16 ", rc); VALID(rc); - rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size, + rc = szd_get_zone_head(*manager, *qpair, + min_zone * info.zone_size + info.zone_size, &write_head); VALID(rc); assert(write_head == min_zone * info.zone_size + info.zone_size + 16); - rc = szd_read(*qpair, min_zone * info.zone_size, pattern_read_4, + rc = szd_read(*manager, *qpair, min_zone * info.zone_size, pattern_read_4, info.lba_size * (info.zone_cap - 3)); DEBUG_TEST_PRINT("zone friction part 4: read 1 zoneborder - 3 ", rc); VALID(rc); - rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_cap - 3, + rc = szd_read(*manager, *qpair, min_zone * info.zone_size + info.zone_cap - 3, pattern_read_4 + info.lba_size * (info.zone_cap - 3), info.lba_size * 6); DEBUG_TEST_PRINT("zone friction part 5: read 1 zoneborder + 3 ", rc); VALID(rc); - rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_cap + 3, + rc = szd_read(*manager, *qpair, min_zone * info.zone_size + info.zone_cap + 3, pattern_read_4 + info.lba_size * (info.zone_cap + 3), info.lba_size * 13); DEBUG_TEST_PRINT("zone friction part 6: read 1 zoneborder + 16 ", rc); @@ -458,9 +502,9 @@ int main(void) { for (uint64_t i = 0; i < info.lba_size * (info.zone_cap + 15); i++) { assert((char)(pattern_read_4)[i] == (char)(*pattern_3)[i]); } - szd_free(*pattern_3); - szd_free(pattern_read_4); - rc = szd_reset_all(*qpair); + szd_free(*manager, *pattern_3); + szd_free(*manager, pattern_read_4); + rc = szd_reset_all(*manager, *qpair); DEBUG_TEST_PRINT("reset all ", rc); VALID(rc); @@ -505,7 +549,7 @@ int main(void) { printf("----------------------CLOSE----------------------\n"); // destroy qpair - rc = szd_destroy_qpair(*qpair); + rc = szd_destroy_qpair(*manager, *qpair); DEBUG_TEST_PRINT("valid destroy code ", rc); VALID(rc); @@ -529,7 +573,6 @@ int main(void) { free(pattern_3); free(qpair); - free(manager); } #ifdef __cplusplus diff --git a/szd/core/tests/szd_full_path_test_writes.c b/szd/core/tests/szd_full_path_test_writes.c new file mode 100644 index 0000000..ba1e701 --- /dev/null +++ b/szd/core/tests/szd_full_path_test_writes.c @@ -0,0 +1,537 @@ + +/** + * \file general tests for the core SZD source. + * As it needs to deeply test state as well, the test is quite large and has + * unfortunately become messy. + * TODO: cleanup... + */ +#ifdef __cplusplus +extern "C" { +#endif + +// TODO: remove +#ifdef NDEBUG +#undef NDBEBUG +#endif + +// TODO: use a testing framework or something else than raw assert +#include +#include +#include +#include +#include + +// TODO: disable by default +#define DEBUG +#ifdef DEBUG +#define DEBUG_TEST_PRINT(str, code) \ + do { \ + if ((code) == 0) { \ + printf("%s\x1B[32m%u\x1B[0m\n", (str), (code)); \ + } else { \ + printf("%s\x1B[31m%u\x1B[0m\n", (str), (code)); \ + } \ + } while (0) +#else +#define DEBUG_TEST_PRINT(str, code) \ + do { \ + } while (0) +#endif + +#define VALID(rc) assert((rc) == 0) +#define INVALID(rc) assert((rc) != 0) + +int write_pattern(char **pattern, QPair *qpair, int32_t size, int32_t jump) { + // if (*pattern != NULL) { + // szd_free(*pattern); + // } + *pattern = (char *)szd_calloc(qpair->man->info.lba_size, size, sizeof(char)); + if (*pattern == NULL) { + return 1; + } + for (int j = 0; j < size; j++) { + (*pattern)[j] = (j + jump) % 200; + } + return 0; +} + +typedef struct { + DeviceManager **manager; + uint64_t write_slba_start; + uint64_t alt_slba_start; + int32_t data_offset; + int32_t alt_offset; + int rc; +} thread_data; + +static pthread_mutex_t mut; +static uint8_t thread_barrier; +#define PLUS_THREAD_BARRIER(mut, bar) \ + pthread_mutex_lock(&mut); \ + bar += 1; \ + pthread_mutex_unlock(&mut); + +/* There will be 2 threads. One writes, reads and resets the first zone a 1000 +times. The second one, the second zone. Then the two will switch around to see +if they interfere. Hence the need for a barrier and a mutex. +*/ +void *worker_thread(void *arg) { + thread_data *dat = (thread_data *)arg; + DeviceManager **manager = dat->manager; + int rc; + QPair **qpair = (QPair **)calloc(1, sizeof(QPair *)); + rc = szd_create_qpair(*manager, qpair); + if (rc != 0) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = rc; + pthread_exit(NULL); + } + uint64_t zone_size_bytes = + (*manager)->info.lba_size * (*manager)->info.zone_cap; + char **pattern_1 = (char **)calloc(1, sizeof(char **)); + rc = write_pattern(pattern_1, *qpair, zone_size_bytes, dat->data_offset); + if (rc != 0) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = rc; + pthread_exit(NULL); + } + char *pattern_read_1 = (char *)szd_calloc((*qpair)->man->info.lba_size, + zone_size_bytes, sizeof(char *)); + if (pattern_read_1 == NULL) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = rc; + pthread_exit(NULL); + } + // hammering + for (uint16_t i = 0; i < 200; i++) { + uint64_t wstart = dat->write_slba_start; + rc = szd_write(*qpair, &wstart, *pattern_1, zone_size_bytes); + if (rc != 0) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = rc; + pthread_exit(NULL); + } + rc = szd_read(*qpair, dat->write_slba_start, pattern_read_1, + zone_size_bytes); + if (rc != 0) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = rc; + pthread_exit(NULL); + } + if (memcmp(pattern_read_1, *pattern_1, zone_size_bytes) != 0) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = 1; + pthread_exit(NULL); + } + if (i != 199) { + rc = szd_reset(*qpair, dat->write_slba_start); + if (rc != 0) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = rc; + pthread_exit(NULL); + } + } + } + + PLUS_THREAD_BARRIER(mut, thread_barrier); + pthread_mutex_lock(&mut); + while (thread_barrier < 2) { + pthread_mutex_unlock(&mut); + pthread_mutex_lock(&mut); + } + pthread_mutex_unlock(&mut); + + szd_free(pattern_read_1); + pattern_read_1 = (char *)szd_calloc((*qpair)->man->info.lba_size, + zone_size_bytes, sizeof(char *)); + szd_free(*pattern_1); + rc = write_pattern(pattern_1, *qpair, zone_size_bytes, dat->alt_offset); + rc = szd_read(*qpair, dat->alt_slba_start, pattern_read_1, zone_size_bytes); + if (rc != 0) { + dat->rc = rc; + pthread_exit(NULL); + } + if (memcmp(pattern_read_1, *pattern_1, zone_size_bytes) != 0) { + PLUS_THREAD_BARRIER(mut, thread_barrier); + dat->rc = 1; + pthread_exit(NULL); + } + dat->rc = rc; + szd_destroy_qpair(*qpair); + free(qpair); + free(pattern_1); + pthread_exit(NULL); +} + +int main(void) { + int rc; + printf("----------------------INIT----------------------\n"); + uint64_t min_zone = 2, max_zone = 10; + DeviceOpenOptions open_opts = {min_zone, max_zone}; + DeviceManager **manager = (DeviceManager **)calloc(1, sizeof(DeviceManager)); + DeviceOptions opts = DeviceOptions_default; + rc = szd_init(manager, &opts); + DEBUG_TEST_PRINT("SPDK init ", rc); + VALID(rc); + + // find devices + printf("----------------------PROBE----------------------\n"); + char *device_to_use = NULL; + ProbeInformation **prober = + (ProbeInformation **)calloc(1, sizeof(ProbeInformation *)); + rc = szd_probe(*manager, prober); + DEBUG_TEST_PRINT("probe return code ", rc); + VALID(rc); + for (int i = 0; i < (*prober)->devices; i++) { + const char *is_zns = (*prober)->zns[i] ? "true" : "false"; + printf("Device found\n\tname:%s\n\tZNS device:%s\n", (*prober)->traddr[i], + is_zns); + if ((*prober)->zns[i]) { + if (device_to_use) { + free(device_to_use); + } + device_to_use = + (char *)calloc(strlen((*prober)->traddr[i]) + 1, sizeof(char)); + memcpy(device_to_use, (*prober)->traddr[i], strlen((*prober)->traddr[i])); + } + } + // dangerous! we must be absolutely sure that no other process is using this + // anymore. + szd_free_probe_information(*prober); + free(prober); + if (!device_to_use) { + printf("No ZNS Device found.\n Are you sure you have a ZNS device " + "connected?\n"); + assert(false); + } + printf("ZNS device %s found. This device will be used for the rest of the " + "test.\n", + device_to_use); + + rc = szd_reinit(manager); + DEBUG_TEST_PRINT("reinit return code ", rc); + VALID(rc); + + // init spdk + printf("----------------------OPENING DEVICE----------------------\n"); + // try non-existent device + rc = szd_open(*manager, "non-existent traddr", &open_opts); + DEBUG_TEST_PRINT("non-existent return code ", rc); + INVALID(rc); + + // try existing device + rc = szd_open(*manager, device_to_use, &open_opts); + DEBUG_TEST_PRINT("existing return code ", rc); + VALID(rc); + free(device_to_use); + + // ensure that everything from this device is OK + assert((*manager)->ctrlr != NULL); + assert((*manager)->ns != NULL); + assert((*manager)->info.lba_size > 0); + assert((*manager)->info.mdts > 0); + assert((*manager)->info.zasl > 0); + assert((*manager)->info.zone_size > 0); + assert((*manager)->info.zone_cap > 0); + assert((*manager)->info.lba_cap > 0); + + // create qpair + QPair **qpair = (QPair **)calloc(1, sizeof(QPair *)); + rc = szd_create_qpair(*manager, qpair); + DEBUG_TEST_PRINT("Qpair creation code ", rc); + VALID(rc); + assert(qpair != NULL); + + // get and verify data (based on ZNS QEMU image) + DeviceInfo info = {}; + rc = szd_get_device_info(&info, *manager); + DEBUG_TEST_PRINT("get info code ", rc); + VALID(rc); + printf("lba size is %ld\n", info.lba_size); + printf("zone size is %ld\n", info.zone_size); + printf("zone cap is %ld\n", info.zone_cap); + printf("mdts is %ld\n", info.mdts); + printf("zasl is %ld\n", info.zasl); + printf("lba_cap is %ld\n", info.lba_cap); + printf("min lba is %ld\n", info.min_lba); + printf("max lba is %ld\n", info.max_lba); + + uint64_t write_head; + uint64_t append_head; + printf("----------------------WORKLOAD SMALL----------------------\n"); + // make space by resetting the device zones + rc = szd_reset_all(*qpair); + DEBUG_TEST_PRINT("reset all code ", rc); + VALID(rc); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + DEBUG_TEST_PRINT("min zone head ", rc); + VALID(rc); + assert(write_head == min_zone * info.zone_size); + char **pattern_1 = (char **)calloc(1, sizeof(char **)); + rc = write_pattern(pattern_1, *qpair, info.lba_size, 10); + VALID(rc); + append_head = min_zone * info.zone_size; + rc = szd_write(*qpair, &append_head, *pattern_1, info.lba_size); + DEBUG_TEST_PRINT("append alligned ", rc); + VALID(rc); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + 1); + char **pattern_2 = (char **)calloc(1, sizeof(char **)); + rc = write_pattern(pattern_2, *qpair, info.zasl, 13); + VALID(rc); + rc = szd_write(*qpair, &append_head, *pattern_2, info.zasl); + DEBUG_TEST_PRINT("append zasl ", rc); + VALID(rc); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + VALID(rc); + assert(write_head == + min_zone * info.zone_size + 1 + info.zasl / info.lba_size); + char *pattern_read_1 = (char *)szd_calloc((*qpair)->man->info.lba_size, + info.lba_size, sizeof(char *)); + rc = szd_read(*qpair, min_zone * info.zone_size, pattern_read_1, + info.lba_size); + DEBUG_TEST_PRINT("read alligned ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.lba_size; i++) { + assert((char)(pattern_read_1)[i] == (char)(*pattern_1)[i]); + } + szd_free(*pattern_1); + szd_free(pattern_read_1); + char *pattern_read_2 = (char *)szd_calloc((*qpair)->man->info.lba_size, + info.zasl, sizeof(char *)); + rc = szd_read(*qpair, min_zone * info.zone_size + 1, pattern_read_2, + info.zasl); + DEBUG_TEST_PRINT("read zasl ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.zasl; i++) { + assert((char)(pattern_read_2)[i] == (char)(*pattern_2)[i]); + } + szd_free(*pattern_2); + rc = szd_reset_all(*qpair); + DEBUG_TEST_PRINT("reset all ", rc); + VALID(rc); + rc = szd_read(*qpair, min_zone * info.zone_size + 1, pattern_read_2, + info.zasl); + DEBUG_TEST_PRINT("verify empty first zone ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.zasl; i++) { + assert((char)(pattern_read_2)[i] == 0); + } + szd_free(pattern_read_2); + + append_head = min_zone * info.zone_size; + printf("----------------------WORKLOAD FILL----------------------\n"); + uint64_t number_of_zones = (info.max_lba - info.min_lba) / info.zone_size; + char **pattern_3 = (char **)calloc(1, sizeof(char **)); + rc = write_pattern(pattern_3, *qpair, + info.lba_size * number_of_zones * info.zone_cap, 19); + VALID(rc); + rc = szd_write(*qpair, &append_head, *pattern_3, + info.lba_size * number_of_zones * info.zone_cap); + DEBUG_TEST_PRINT("fill entire device ", rc); + VALID(rc); + for (uint64_t i = info.min_lba; i < info.max_lba; i += info.zone_size) { + rc = szd_get_zone_head(*qpair, i, &write_head); + VALID(rc); + assert(write_head == i + info.zone_size); + } + szd_free(*pattern_3); + char *pattern_read_3 = (char *)szd_calloc( + (*qpair)->man->info.lba_size, + info.lba_size * number_of_zones * info.zone_cap, sizeof(char *)); + assert(pattern_read_3 != NULL); + rc = szd_read(*qpair, min_zone * info.zone_size, pattern_read_3, + info.lba_size * number_of_zones * info.zone_cap); + DEBUG_TEST_PRINT("read entire device ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.lba_size * number_of_zones * info.zone_cap; + i++) { + assert((char)(pattern_read_3)[i] == (char)(*pattern_3)[i]); + } + szd_free(pattern_read_3); + rc = szd_reset(*qpair, min_zone * info.zone_size + info.zone_size); + rc = szd_reset(*qpair, min_zone * info.zone_size + info.zone_size * 2) | rc; + DEBUG_TEST_PRINT("reset zone 2,3 ", rc); + VALID(rc); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + info.zone_size); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size, + &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + info.zone_size); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size * 2, + &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + info.zone_size * 2); + char *pattern_read_4 = + (char *)szd_calloc((*qpair)->man->info.lba_size, + info.lba_size * info.zone_cap, sizeof(char *)); + rc = szd_read(*qpair, info.zone_size * min_zone, pattern_read_4, + info.lba_size * info.zone_cap); + DEBUG_TEST_PRINT("read zone 1 ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { + assert((char)(pattern_read_4)[i] == (char)(*pattern_3)[i]); + } + rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_size, + pattern_read_4, info.lba_size * info.zone_cap); + DEBUG_TEST_PRINT("read zone 2 ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { + assert((char)(pattern_read_4)[i] == 0); + } + rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_size * 2, + pattern_read_4, info.lba_size * info.zone_cap); + DEBUG_TEST_PRINT("read zone 3 ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { + assert((char)(pattern_read_4)[i] == 0); + } + rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_size * 3, + pattern_read_4, info.lba_size * info.zone_cap); + DEBUG_TEST_PRINT("read zone 4 ", rc); + VALID(rc); + // This ugly loop is necessary to prevent over-allocating DMA. We only want + // one lba at a time. + rc = write_pattern(pattern_3, *qpair, info.lba_size, + 19 + info.zone_cap * 3 * info.lba_size); + for (uint64_t i = 0; i < info.lba_size * info.zone_cap; i++) { + if (i % info.lba_size == 0 && i > 0) { + szd_free(*pattern_3); + rc = write_pattern(pattern_3, *qpair, info.lba_size, + 19 + i + info.zone_cap * 3 * info.lba_size); + VALID(rc); + } + assert((char)(pattern_read_4)[i] == (char)(*pattern_3)[i % info.lba_size]); + } + rc = szd_reset_all(*qpair); + DEBUG_TEST_PRINT("reset all ", rc); + VALID(rc); + + append_head = min_zone * info.zone_size; + printf("----------------------WORKLOAD ZONE EDGE----------------------\n"); + rc = write_pattern(pattern_3, *qpair, info.lba_size * info.zone_cap * 2, 19); + rc = szd_write(*qpair, &append_head, *pattern_3, + info.lba_size * (info.zone_cap - 3)); + DEBUG_TEST_PRINT("zone friction part 1: append 1 zoneborder - 3 ", rc); + VALID(rc); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + info.zone_cap - 3); + rc = szd_write(*qpair, &append_head, + *pattern_3 + info.lba_size * (info.zone_cap - 3), + info.lba_size * 6); + DEBUG_TEST_PRINT("zone friction part 2: append 1 zoneborder + 6 ", rc); + VALID(rc); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size, &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + info.zone_size); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size, + &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + info.zone_size + 3); + rc = szd_write(*qpair, &append_head, + *pattern_3 + info.lba_size * (info.zone_cap + 3), + info.lba_size * 13); + DEBUG_TEST_PRINT("zone friction part 3: append 1 zoneborder + 16 ", rc); + VALID(rc); + rc = szd_get_zone_head(*qpair, min_zone * info.zone_size + info.zone_size, + &write_head); + VALID(rc); + assert(write_head == min_zone * info.zone_size + info.zone_size + 16); + rc = szd_read(*qpair, min_zone * info.zone_size, pattern_read_4, + info.lba_size * (info.zone_cap - 3)); + DEBUG_TEST_PRINT("zone friction part 4: read 1 zoneborder - 3 ", rc); + VALID(rc); + rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_cap - 3, + pattern_read_4 + info.lba_size * (info.zone_cap - 3), + info.lba_size * 6); + DEBUG_TEST_PRINT("zone friction part 5: read 1 zoneborder + 3 ", rc); + VALID(rc); + rc = szd_read(*qpair, min_zone * info.zone_size + info.zone_cap + 3, + pattern_read_4 + info.lba_size * (info.zone_cap + 3), + info.lba_size * 13); + DEBUG_TEST_PRINT("zone friction part 6: read 1 zoneborder + 16 ", rc); + VALID(rc); + for (uint64_t i = 0; i < info.lba_size * (info.zone_cap + 15); i++) { + assert((char)(pattern_read_4)[i] == (char)(*pattern_3)[i]); + } + szd_free(*pattern_3); + szd_free(pattern_read_4); + rc = szd_reset_all(*qpair); + DEBUG_TEST_PRINT("reset all ", rc); + VALID(rc); + + printf( + "----------------------WORKLOAD MULTITHREADING----------------------\n"); + printf("This might take a time...\n"); + pthread_mutex_init(&mut, NULL); + pthread_t thread1; + thread_data first_thread_dat = {.manager = manager, + .write_slba_start = min_zone * info.zone_size, + .alt_slba_start = min_zone * info.zone_size + + 2 * info.zone_size, + .data_offset = 3, + .alt_offset = 9, + .rc = 0}; + rc = pthread_create(&thread1, NULL, worker_thread, (void *)&first_thread_dat); + VALID(rc); + pthread_t thread2; + thread_data second_thread_dat = { + .manager = manager, + .write_slba_start = min_zone * info.zone_size + 2 * info.zone_size, + .alt_slba_start = min_zone * info.zone_size, + .data_offset = 9, + .alt_offset = 3, + .rc = 0}; + rc = + pthread_create(&thread2, NULL, worker_thread, (void *)&second_thread_dat); + VALID(rc); + + if (pthread_join(thread1, NULL) != 0) { + DEBUG_TEST_PRINT("Error in thread1 ", 1); + } + rc = first_thread_dat.rc; + DEBUG_TEST_PRINT("thread 2 writes and reads ", rc); + VALID(rc); + if (pthread_join(thread2, NULL) != 0) { + DEBUG_TEST_PRINT("Error in thread2 ", 1); + } + rc = second_thread_dat.rc; + DEBUG_TEST_PRINT("thread 3 writes and reads ", rc); + VALID(rc); + + printf("----------------------CLOSE----------------------\n"); + // destroy qpair + rc = szd_destroy_qpair(*qpair); + DEBUG_TEST_PRINT("valid destroy code ", rc); + VALID(rc); + + // close device + rc = szd_close(*manager); + DEBUG_TEST_PRINT("valid close code ", rc); + VALID(rc); + + // can not close twice + rc = szd_close(*manager); + DEBUG_TEST_PRINT("invalid close code ", rc); + INVALID(rc); + + rc = szd_destroy(*manager); + DEBUG_TEST_PRINT("valid shutdown code ", rc); + VALID(rc); + + // cleanup local + free(pattern_1); + free(pattern_2); + free(pattern_3); + + free(qpair); + free(manager); +} + +#ifdef __cplusplus +} +#endif diff --git a/szd/cpp/include/szd/datastructures/szd_buffer.hpp b/szd/cpp/include/szd/datastructures/szd_buffer.hpp index 20d0585..61b0035 100644 --- a/szd/cpp/include/szd/datastructures/szd_buffer.hpp +++ b/szd/cpp/include/szd/datastructures/szd_buffer.hpp @@ -13,7 +13,7 @@ namespace SIMPLE_ZNS_DEVICE_NAMESPACE { class SZDBuffer { public: - SZDBuffer(size_t size, uint64_t lba_size); + SZDBuffer(EngineManager* em, size_t size, uint64_t lba_size); // No copying or implicits SZDBuffer(const SZDBuffer &) = delete; SZDBuffer &operator=(const SZDBuffer &) = delete; @@ -49,6 +49,7 @@ class SZDBuffer { uint64_t lba_size_; void *backed_memory_; size_t backed_memory_size_; + EngineManager* em_; }; } // namespace SIMPLE_ZNS_DEVICE_NAMESPACE diff --git a/szd/cpp/include/szd/szd_channel.hpp b/szd/cpp/include/szd/szd_channel.hpp index bc0f491..fea0405 100644 --- a/szd/cpp/include/szd/szd_channel.hpp +++ b/szd/cpp/include/szd/szd_channel.hpp @@ -21,10 +21,10 @@ namespace SIMPLE_ZNS_DEVICE_NAMESPACE { */ class SZDChannel { public: - SZDChannel(std::unique_ptr qpair, const DeviceInfo &info, + SZDChannel(SZD::EngineManager *em, std::unique_ptr qpair, const DeviceInfo &info, uint64_t min_lba, uint64_t max_lba, bool keep_async_buffer = false, uint32_t queue_depth = 1); - SZDChannel(std::unique_ptr qpair, const DeviceInfo &info, + SZDChannel(SZD::EngineManager *em, std::unique_ptr qpair, const DeviceInfo &info, bool keep_async_buffer = false, uint32_t queue_depth = 1); // No copying or implicits SZDChannel(const SZDChannel &) = delete; @@ -105,6 +105,7 @@ class SZDChannel { private: QPair *qpair_; + SZD::EngineManager *em_; uint64_t lba_size_; uint64_t zasl_; uint64_t mdts_; diff --git a/szd/cpp/include/szd/szd_channel_factory.hpp b/szd/cpp/include/szd/szd_channel_factory.hpp index e866d14..572e94c 100644 --- a/szd/cpp/include/szd/szd_channel_factory.hpp +++ b/szd/cpp/include/szd/szd_channel_factory.hpp @@ -17,7 +17,7 @@ namespace SIMPLE_ZNS_DEVICE_NAMESPACE { */ class SZDChannelFactory { public: - SZDChannelFactory(DeviceManager *device_manager, size_t max_channel_count); + SZDChannelFactory(SZD::EngineManager *em, size_t max_channel_count); ~SZDChannelFactory(); // No copying or implicits SZDChannelFactory(const SZDChannelFactory &) = delete; @@ -45,7 +45,7 @@ class SZDChannelFactory { private: size_t max_channel_count_; size_t channel_count_; - DeviceManager *device_manager_; + EngineManager *em_; size_t refs_; }; } // namespace SIMPLE_ZNS_DEVICE_NAMESPACE diff --git a/szd/cpp/include/szd/szd_device.hpp b/szd/cpp/include/szd/szd_device.hpp index 708829b..ce8bedf 100644 --- a/szd/cpp/include/szd/szd_device.hpp +++ b/szd/cpp/include/szd/szd_device.hpp @@ -36,7 +36,7 @@ class SZDDevice { SZDStatus GetInfo(DeviceInfo *info) const; SZDStatus Destroy(); - inline DeviceManager *GetDeviceManager() { + inline EngineManager *GetEngineManager() { return initialised_device_ ? *manager_ : nullptr; } @@ -45,7 +45,7 @@ class SZDDevice { // state bool initialised_device_; bool device_opened_; - SZD::DeviceManager **manager_; + SZD::EngineManager **manager_; std::string opened_device_; }; diff --git a/szd/cpp/src/datastructures/szd_buffer.cpp b/szd/cpp/src/datastructures/szd_buffer.cpp index fb0c0a9..e5a6e36 100644 --- a/szd/cpp/src/datastructures/szd_buffer.cpp +++ b/szd/cpp/src/datastructures/szd_buffer.cpp @@ -8,12 +8,12 @@ namespace SIMPLE_ZNS_DEVICE_NAMESPACE { -SZDBuffer::SZDBuffer(size_t size, uint64_t lba_size) - : lba_size_(lba_size), backed_memory_(nullptr), backed_memory_size_(size) { +SZDBuffer::SZDBuffer(SZD::EngineManager *em, size_t size, uint64_t lba_size) + : lba_size_(lba_size), backed_memory_(nullptr), backed_memory_size_(size), em_(em) { backed_memory_size_ = ((backed_memory_size_ + lba_size_ - 1) / lba_size_) * lba_size_; if (backed_memory_size_ != 0) { - backed_memory_ = szd_calloc(lba_size_, 1, backed_memory_size_); + backed_memory_ = szd_calloc(em_, lba_size_, 1, backed_memory_size_); } // idle state (can also be because of bad malloc!) if (backed_memory_ == nullptr) { @@ -22,7 +22,7 @@ SZDBuffer::SZDBuffer(size_t size, uint64_t lba_size) } SZDBuffer::~SZDBuffer() { if (backed_memory_ != nullptr && backed_memory_size_ > 0) { - szd_free(backed_memory_); + szd_free(em_, backed_memory_); } } @@ -81,7 +81,7 @@ SZDStatus SZDBuffer::ReallocBuffer(uint64_t size) { return s; } } - backed_memory_ = szd_calloc(lba_size_, alligned_size, sizeof(char)); + backed_memory_ = szd_calloc(em_, lba_size_, alligned_size, sizeof(char)); if (szd_unlikely(backed_memory_ == nullptr)) { backed_memory_size_ = 0; SZD_LOG_ERROR("SZD: Buffer: ReallocBuffer: Failed allocating memory\n"); @@ -98,7 +98,7 @@ SZDStatus SZDBuffer::FreeBuffer() { if (backed_memory_size_ == 0) { return SZDStatus::Success; } - szd_free(backed_memory_); + szd_free(em_, backed_memory_); backed_memory_ = nullptr; backed_memory_size_ = 0; return SZDStatus::Success; diff --git a/szd/cpp/src/datastructures/szd_circular_log.cpp b/szd/cpp/src/datastructures/szd_circular_log.cpp index 19fc0c7..1d010d8 100644 --- a/szd/cpp/src/datastructures/szd_circular_log.cpp +++ b/szd/cpp/src/datastructures/szd_circular_log.cpp @@ -400,7 +400,8 @@ SZDStatus SZDCircularLog::RecoverPointers() { // Retrieve zone heads from the device std::vector zone_heads; - s = reset_channel_->ZoneHeads(min_zone_head_, max_zone_head_ - zone_cap_, &zone_heads); + s = reset_channel_->ZoneHeads(min_zone_head_, max_zone_head_ - zone_cap_, + &zone_heads); if (szd_unlikely(s != SZDStatus::Success)) { SZD_LOG_ERROR("SZD: Once log: Recover pointers\n"); return s; diff --git a/szd/cpp/src/szd_channel.cpp b/szd/cpp/src/szd_channel.cpp index 55bf3d7..b24c908 100644 --- a/szd/cpp/src/szd_channel.cpp +++ b/szd/cpp/src/szd_channel.cpp @@ -8,10 +8,10 @@ namespace SIMPLE_ZNS_DEVICE_NAMESPACE { -SZDChannel::SZDChannel(std::unique_ptr qpair, const DeviceInfo &info, +SZDChannel::SZDChannel(SZD::EngineManager *em, std::unique_ptr qpair, const DeviceInfo &info, uint64_t min_lba, uint64_t max_lba, bool keep_async_buffer, uint32_t queue_depth) - : qpair_(qpair.release()), lba_size_(info.lba_size), zasl_(info.zasl), + : qpair_(qpair.release()), em_(em), lba_size_(info.lba_size), zasl_(info.zasl), mdts_(info.mdts), zone_size_(info.zone_size), zone_cap_(info.zone_cap), min_lba_(min_lba), max_lba_(max_lba), can_access_all_(false), backed_memory_spill_(nullptr), lba_msb_(msb(info.lba_size)), @@ -30,7 +30,7 @@ SZDChannel::SZDChannel(std::unique_ptr qpair, const DeviceInfo &info, can_access_all_ = true; } // Setup all buffers - backed_memory_spill_ = szd_calloc(lba_size_, 1, lba_size_); + backed_memory_spill_ = szd_calloc(em_, lba_size_, 1, lba_size_); completion_ = new Completion *[queue_depth_]; async_buffer_ = (void **)(new char **[queue_depth_]); async_buffer_size_ = new size_t[queue_depth_]; @@ -56,9 +56,9 @@ SZDChannel::SZDChannel(std::unique_ptr qpair, const DeviceInfo &info, #endif } -SZDChannel::SZDChannel(std::unique_ptr qpair, const DeviceInfo &info, +SZDChannel::SZDChannel(SZD::EngineManager *em, std::unique_ptr qpair, const DeviceInfo &info, bool keep_async_buffer, uint32_t queue_depth) - : SZDChannel(std::move(qpair), info, 0, info.lba_cap, keep_async_buffer, + : SZDChannel(em, std::move(qpair), info, 0, info.lba_cap, keep_async_buffer, queue_depth) {} SZDChannel::~SZDChannel() { @@ -70,7 +70,7 @@ SZDChannel::~SZDChannel() { if (keep_async_buffer_ && async_buffer_ != nullptr) { for (uint32_t i = 0; i < queue_depth_; i++) { if (async_buffer_[i] != nullptr) { - szd_free(async_buffer_[i]); + szd_free(em_, async_buffer_[i]); } if (completion_[i] != nullptr) { SZD_LOG_ERROR( @@ -82,11 +82,11 @@ SZDChannel::~SZDChannel() { delete[] async_buffer_; delete[] async_buffer_size_; if (backed_memory_spill_ != nullptr) { - szd_free(backed_memory_spill_); + szd_free(em_, backed_memory_spill_); backed_memory_spill_ = nullptr; } if (qpair_ != nullptr) { - szd_destroy_qpair(qpair_); + szd_destroy_qpair(em_, qpair_); } } @@ -144,11 +144,11 @@ SZDStatus SZDChannel::FlushBufferSection(uint64_t *lba, const SZDBuffer &buffer, int rc = 0; if (prefix_size > 0) { #ifdef SZD_PERF_COUNTERS - rc = szd_append_with_diag(qpair_, &new_lba, (char *)cbuffer + addr, + rc = szd_append_with_diag(em_, qpair_, &new_lba, (char *)cbuffer + addr, prefix_size, &append_ops); bytes_written_.fetch_add(prefix_size, std::memory_order_relaxed); #else - rc = szd_append(qpair_, &new_lba, (char *)cbuffer + addr, prefix_size); + rc = szd_append(em_, qpair_, &new_lba, (char *)cbuffer + addr, prefix_size); #endif } memset((char *)backed_memory_spill_ + postfix_size, 0, @@ -156,21 +156,21 @@ SZDStatus SZDChannel::FlushBufferSection(uint64_t *lba, const SZDBuffer &buffer, memcpy(backed_memory_spill_, (char *)cbuffer + addr + prefix_size, postfix_size); #ifdef SZD_PERF_COUNTERS - rc = rc | szd_append_with_diag(qpair_, &new_lba, backed_memory_spill_, + rc = rc | szd_append_with_diag(em_, qpair_, &new_lba, backed_memory_spill_, lba_size_, &append_ops); bytes_written_.fetch_add(lba_size_, std::memory_order_relaxed); #else - rc = rc | szd_append(qpair_, &new_lba, backed_memory_spill_, lba_size_); + rc = rc | szd_append(em_, qpair_, &new_lba, backed_memory_spill_, lba_size_); #endif s = FromStatus(rc); } else { #ifdef SZD_PERF_COUNTERS - s = FromStatus(szd_append_with_diag( + s = FromStatus(szd_append_with_diag(em_, qpair_, &new_lba, (char *)cbuffer + addr, alligned_size, &append_ops)); bytes_written_.fetch_add(alligned_size, std::memory_order_relaxed); #else s = FromStatus( - szd_append(qpair_, &new_lba, (char *)cbuffer + addr, alligned_size)); + szd_append(em_, qpair_, &new_lba, (char *)cbuffer + addr, alligned_size)); #endif } @@ -231,23 +231,23 @@ SZDStatus SZDChannel::ReadIntoBuffer(uint64_t lba, SZDBuffer *buffer, if (alligned_size > 0) { #ifdef SZD_PERF_COUNTERS uint64_t read_ops = 0; - rc = szd_read_with_diag(qpair_, lba, (char *)cbuffer + addr, + rc = szd_read_with_diag(em_, qpair_, lba, (char *)cbuffer + addr, alligned_size, &read_ops); bytes_read_.fetch_add(alligned_size, std::memory_order_relaxed); read_operations_.fetch_add(read_ops, std::memory_order_relaxed); #else - rc = szd_read(qpair_, lba, (char *)cbuffer + addr, alligned_size); + rc = szd_read(em_, qpair_, lba, (char *)cbuffer + addr, alligned_size); #endif } #ifdef SZD_PERF_COUNTERS uint64_t read_ops = 0; - rc = rc | szd_read_with_diag(qpair_, lba + alligned_size / lba_size_, + rc = rc | szd_read_with_diag(em_, qpair_, lba + alligned_size / lba_size_, (char *)backed_memory_spill_, lba_size_, &read_ops); bytes_read_.fetch_add(lba_size_, std::memory_order_relaxed); read_operations_.fetch_add(read_ops, std::memory_order_relaxed); #else - rc = rc | szd_read(qpair_, lba + alligned_size / lba_size_, + rc = rc | szd_read(em_, qpair_, lba + alligned_size / lba_size_, (char *)backed_memory_spill_, lba_size_); #endif s = FromStatus(rc); @@ -258,13 +258,13 @@ SZDStatus SZDChannel::ReadIntoBuffer(uint64_t lba, SZDBuffer *buffer, } else { #ifdef SZD_PERF_COUNTERS uint64_t read_ops = 0; - s = FromStatus(szd_read_with_diag(qpair_, lba, (char *)cbuffer + addr, + s = FromStatus(szd_read_with_diag(em_, qpair_, lba, (char *)cbuffer + addr, alligned_size, &read_ops)); bytes_read_.fetch_add(alligned_size, std::memory_order_relaxed); read_operations_.fetch_add(read_ops, std::memory_order_relaxed); #else s = FromStatus( - szd_read(qpair_, lba, (char *)cbuffer + addr, alligned_size)); + szd_read(em_, qpair_, lba, (char *)cbuffer + addr, alligned_size)); #endif } return s; @@ -288,7 +288,7 @@ SZDStatus SZDChannel::DirectAppend(uint64_t *lba, void *buffer, } // Create temporary DMA buffer of maximum ZASL size size_t dma_buffer_size = zasl_ > alligned_size ? alligned_size : zasl_; - void *dma_buffer = szd_calloc(lba_size_, dma_buffer_size, 1); + void *dma_buffer = szd_calloc(em_, lba_size_, dma_buffer_size, 1); if (szd_unlikely(dma_buffer == nullptr)) { SZD_LOG_ERROR("SZD: Channel: DirectAppend: No DMA buffer\n"); return SZDStatus::MemoryError; @@ -311,7 +311,7 @@ SZDStatus SZDChannel::DirectAppend(uint64_t *lba, void *buffer, #ifdef SZD_PERF_PER_ZONE_COUNTERS uint64_t prev_lba = new_lba; #endif - s = FromStatus(szd_append_with_diag(qpair_, &new_lba, dma_buffer, stepsize, + s = FromStatus(szd_append_with_diag(em_, qpair_, &new_lba, dma_buffer, stepsize, &append_ops)); if (s == SZDStatus::Success) { bytes_written_.fetch_add(stepsize, std::memory_order_relaxed); @@ -328,7 +328,7 @@ SZDStatus SZDChannel::DirectAppend(uint64_t *lba, void *buffer, #endif } #else - s = FromStatus(szd_append(qpair_, &new_lba, dma_buffer, stepsize)); + s = FromStatus(szd_append(em_, qpair_, &new_lba, dma_buffer, stepsize)); #endif if (szd_unlikely(s != SZDStatus::Success)) { @@ -338,7 +338,7 @@ SZDStatus SZDChannel::DirectAppend(uint64_t *lba, void *buffer, begin += stepsize; } // Remove temporary buffer. - szd_free(dma_buffer); + szd_free(em_, dma_buffer); *lba = TranslatePbaToLba(new_lba); return s; } @@ -360,7 +360,7 @@ SZDStatus SZDChannel::DirectRead(uint64_t lba, void *buffer, uint64_t size, } // Create temporary DMA buffer to copy other DMA buffer data into. size_t dma_buffer_size = mdts_ > alligned_size ? alligned_size : mdts_; - void *buffer_dma = szd_calloc(lba_size_, 1, dma_buffer_size); + void *buffer_dma = szd_calloc(em_, lba_size_, 1, dma_buffer_size); if (szd_unlikely(buffer_dma == nullptr)) { SZD_LOG_ERROR("SZD: Channel: DirectRead: OOM\n"); return SZDStatus::MemoryError; @@ -384,12 +384,12 @@ SZDStatus SZDChannel::DirectRead(uint64_t lba, void *buffer, uint64_t size, } #ifdef SZD_PERF_COUNTERS uint64_t read_ops = 0; - s = FromStatus(szd_read_with_diag(qpair_, lba_to_read, buffer_dma, stepsize, + s = FromStatus(szd_read_with_diag(em_, qpair_, lba_to_read, buffer_dma, stepsize, &read_ops)); read_operations_.fetch_add(read_ops, std::memory_order_relaxed); bytes_read_.fetch_add(stepsize, std::memory_order_relaxed); #else - s = FromStatus(szd_read(qpair_, lba_to_read, buffer_dma, stepsize)); + s = FromStatus(szd_read(em_, qpair_, lba_to_read, buffer_dma, stepsize)); #endif if (szd_likely(s == SZDStatus::Success)) { memcpy((char *)buffer + begin, buffer_dma, alligned_step); @@ -406,7 +406,7 @@ SZDStatus SZDChannel::DirectRead(uint64_t lba, void *buffer, uint64_t size, } } // Remove temporary buffer. - szd_free(buffer_dma); + szd_free(em_, buffer_dma); return s; } @@ -438,12 +438,12 @@ SZDStatus SZDChannel::AsyncAppend(uint64_t *lba, void *buffer, // Create temporary DMA buffer and copy normal buffer to DMA. if (keep_async_buffer_ && async_buffer_size_[writer] < alligned_size) { if (async_buffer_[writer] != nullptr) { - szd_free(async_buffer_[writer]); + szd_free(em_, async_buffer_[writer]); } - async_buffer_[writer] = szd_calloc(lba_size_, 1, alligned_size); + async_buffer_[writer] = szd_calloc(em_, lba_size_, 1, alligned_size); async_buffer_size_[writer] = alligned_size; } else if (!keep_async_buffer_) { - async_buffer_[writer] = szd_calloc(lba_size_, 1, alligned_size); + async_buffer_[writer] = szd_calloc(em_, lba_size_, 1, alligned_size); } else { memset(async_buffer_[writer], 0, async_buffer_size_[writer]); } @@ -453,13 +453,16 @@ SZDStatus SZDChannel::AsyncAppend(uint64_t *lba, void *buffer, } memcpy(async_buffer_[writer], buffer, size); if (completion_[writer] != nullptr) { + printf("delete completion (new) %u\n", writer); delete completion_[writer]; } completion_[writer] = new Completion; + completion_[writer]->id = writer; + printf("create completion %u\n", writer); SZDStatus s = SZDStatus::Success; #ifdef SZD_PERF_COUNTERS uint64_t append_ops = 0; - s = FromStatus(szd_append_async_with_diag( + s = FromStatus(szd_append_async_with_diag(em_, qpair_, &new_lba, async_buffer_[writer], alligned_size, &append_ops, completion_[writer])); if (s == SZDStatus::Success) { @@ -478,7 +481,7 @@ SZDStatus SZDChannel::AsyncAppend(uint64_t *lba, void *buffer, #endif } #else - s = FromStatus(szd_append_async(qpair_, &new_lba, async_buffer_[writer], + s = FromStatus(szd_append_async(em_, qpair_, &new_lba, async_buffer_[writer], alligned_size, completion_[writer])); #endif outstanding_requests_++; @@ -494,13 +497,14 @@ bool SZDChannel::PollOnce(uint32_t writer) { if (completion_[writer] == nullptr) { return true; } - szd_poll_once(qpair_, completion_[writer]); + szd_poll_once(em_, qpair_, completion_[writer]); if (completion_[writer]->done || completion_[writer]->err != 0) { // Remove temporary buffer. if (!keep_async_buffer_) { - szd_free(async_buffer_[writer]); + szd_free(em_, async_buffer_[writer]); async_buffer_[writer] = nullptr; } + printf("delete completion (done poll once) %u\n", writer); delete completion_[writer]; completion_[writer] = nullptr; outstanding_requests_--; @@ -510,7 +514,7 @@ bool SZDChannel::PollOnce(uint32_t writer) { } bool SZDChannel::FindFreeWriter(uint32_t *any_writer) { - szd_poll_once_raw(qpair_); + szd_poll_once_raw(em_, qpair_); for (uint32_t i = 0; i < queue_depth_; i++) { if (completion_[i] == nullptr) { *any_writer = i; @@ -519,9 +523,10 @@ bool SZDChannel::FindFreeWriter(uint32_t *any_writer) { if (completion_[i]->err != 0x0 || completion_[i]->done) { // Remove temporary buffer. if (!keep_async_buffer_) { - szd_free(async_buffer_[i]); + szd_free(em_, async_buffer_[i]); async_buffer_[i] = nullptr; } + printf("delete completion (find) %u\n", i); delete completion_[i]; completion_[i] = nullptr; *any_writer = i; @@ -542,16 +547,17 @@ SZDStatus SZDChannel::Sync() { if (completion_[i] == nullptr) { continue; } - s = FromStatus(szd_poll_async(qpair_, completion_[i])); + s = FromStatus(szd_poll_async(em_, qpair_, completion_[i])); if (szd_unlikely(s != SZDStatus::Success)) { SZD_LOG_ERROR("SZD: Channel: Sync: Failed a poll\n"); break; } // Remove temporary buffer. if (!keep_async_buffer_) { - szd_free(async_buffer_[i]); + szd_free(em_, async_buffer_[i]); async_buffer_[i] = nullptr; } + printf("delete completion (sync) %u\n", i); delete completion_[i]; completion_[i] = nullptr; outstanding_requests_--; @@ -565,7 +571,7 @@ SZDStatus SZDChannel::ResetZone(uint64_t slba) { SZD_LOG_ERROR("SZD: Channel: ResetZone: OOB\n"); return SZDStatus::InvalidArguments; } - SZDStatus s = FromStatus(szd_reset(qpair_, slba)); + SZDStatus s = FromStatus(szd_reset(em_, qpair_, slba)); #ifdef SZD_PERF_COUNTERS zones_reset_counter_.fetch_add(1, std::memory_order_relaxed); #ifdef SZD_PERF_PER_ZONE_COUNTERS @@ -581,7 +587,7 @@ SZDStatus SZDChannel::ResetAllZones() { // zones one by one. if (!can_access_all_) { for (uint64_t slba = min_lba_; slba != max_lba_; slba += zone_size_) { - if ((s = FromStatus(szd_reset(qpair_, slba))) != SZDStatus::Success) { + if ((s = FromStatus(szd_reset(em_, qpair_, slba))) != SZDStatus::Success) { SZD_LOG_ERROR("SZD: Channel: ResetAllZones: OOB\n"); return s; } @@ -593,7 +599,7 @@ SZDStatus SZDChannel::ResetAllZones() { #endif } } else { - s = FromStatus(szd_reset_all(qpair_)); + s = FromStatus(szd_reset_all(em_, qpair_)); #ifdef SZD_PERF_COUNTERS zones_reset_counter_.fetch_add((max_lba_ - min_lba_) / zone_size_, std::memory_order_relaxed); @@ -613,7 +619,7 @@ SZDStatus SZDChannel::ZoneHead(uint64_t slba, uint64_t *zone_head) { SZD_LOG_ERROR("SZD: Channel: ZoneHead: OOB\n"); return SZDStatus::InvalidArguments; } - SZDStatus s = FromStatus(szd_get_zone_head(qpair_, slba, zone_head)); + SZDStatus s = FromStatus(szd_get_zone_head(em_, qpair_, slba, zone_head)); *zone_head = TranslatePbaToLba(*zone_head); return s; } @@ -630,7 +636,7 @@ SZDStatus SZDChannel::ZoneHeads(uint64_t slba, uint64_t eslba, uint64_t head_size = (eslba - slba) / zone_size_ + 1; uint64_t *zone_heads_c = new uint64_t[head_size]; SZDStatus s = - FromStatus(szd_get_zone_heads(qpair_, slba, eslba, zone_heads_c)); + FromStatus(szd_get_zone_heads(em_, qpair_, slba, eslba, zone_heads_c)); if (s == SZDStatus::Success) { for (uint64_t i = 0; i < head_size; i++) { zone_heads->push_back(TranslatePbaToLba(zone_heads_c[i])); @@ -648,7 +654,7 @@ SZDStatus SZDChannel::FinishZone(uint64_t slba) { SZD_LOG_ERROR("SZD: Channel: FinishZone: OOB\n"); return SZDStatus::InvalidArguments; } - SZDStatus s = FromStatus(szd_finish_zone(qpair_, slba)); + SZDStatus s = FromStatus(szd_finish_zone(em_, qpair_, slba)); return s; } diff --git a/szd/cpp/src/szd_channel_factory.cpp b/szd/cpp/src/szd_channel_factory.cpp index 3073c21..f9c1368 100644 --- a/szd/cpp/src/szd_channel_factory.cpp +++ b/szd/cpp/src/szd_channel_factory.cpp @@ -7,10 +7,10 @@ #include namespace SIMPLE_ZNS_DEVICE_NAMESPACE { -SZDChannelFactory::SZDChannelFactory(DeviceManager *device_manager, +SZDChannelFactory::SZDChannelFactory(EngineManager *em, size_t max_channel_count) : max_channel_count_(max_channel_count), channel_count_(0), - device_manager_(device_manager), refs_(0) {} + em_(em), refs_(0) {} SZDChannelFactory::~SZDChannelFactory() {} SZDStatus SZDChannelFactory::register_raw_qpair(QPair **qpair) { @@ -18,7 +18,7 @@ SZDStatus SZDChannelFactory::register_raw_qpair(QPair **qpair) { SZD_LOG_ERROR("SZD: Channel factory: Too many QPairs\n"); return SZDStatus::InvalidArguments; } - SZDStatus s = FromStatus(szd_create_qpair(device_manager_, qpair)); + SZDStatus s = FromStatus(szd_create_qpair(em_, qpair)); if (s == SZDStatus::Success) { channel_count_++; } @@ -26,7 +26,7 @@ SZDStatus SZDChannelFactory::register_raw_qpair(QPair **qpair) { } SZDStatus SZDChannelFactory::unregister_raw_qpair(QPair *qpair) { - SZDStatus s = FromStatus(szd_destroy_qpair(qpair)); + SZDStatus s = FromStatus(szd_destroy_qpair(em_, qpair)); if (s == SZDStatus::Success) { channel_count_--; } @@ -44,15 +44,15 @@ SZDStatus SZDChannelFactory::register_channel(SZDChannel **channel, } SZDStatus s; QPair **qpair = new QPair *; - if ((s = FromStatus(szd_create_qpair(device_manager_, qpair))) != + if ((s = FromStatus(szd_create_qpair(em_, qpair))) != SZDStatus::Success) { SZD_LOG_ERROR("SZD: Channel factory: Could not create QPair\n"); return s; } *channel = - new SZDChannel(std::unique_ptr(*qpair), device_manager_->info, - min_zone_nr * device_manager_->info.zone_size, - max_zone_nr * device_manager_->info.zone_size, + new SZDChannel(em_, std::unique_ptr(*qpair), em_->manager_->info, + min_zone_nr * em_->manager_->info.zone_size, + max_zone_nr * em_->manager_->info.zone_size, preserve_async_buffer, channel_depth); channel_count_++; @@ -64,8 +64,8 @@ SZDStatus SZDChannelFactory::register_channel(SZDChannel **channel, bool preserve_async_buffer, uint32_t channel_depth) { return register_channel( - channel, device_manager_->info.min_lba / device_manager_->info.zone_size, - device_manager_->info.max_lba / device_manager_->info.zone_size, + channel, em_->manager_->info.min_lba / em_->manager_->info.zone_size, + em_->manager_->info.max_lba / em_->manager_->info.zone_size, preserve_async_buffer, channel_depth); } diff --git a/szd/cpp/src/szd_device.cpp b/szd/cpp/src/szd_device.cpp index a83f545..6c0936c 100644 --- a/szd/cpp/src/szd_device.cpp +++ b/szd/cpp/src/szd_device.cpp @@ -12,7 +12,7 @@ static bool dpdk_initialised = false; SZDDevice::SZDDevice(const std::string &application_name) : application_name_(application_name), initialised_device_(false), - device_opened_(false), manager_(new DeviceManager *), opened_device_() {} + device_opened_(false), manager_(new EngineManager *), opened_device_() {} SZDDevice::~SZDDevice() { if (initialised_device_ || device_opened_) { @@ -23,8 +23,11 @@ SZDDevice::~SZDDevice() { SZDStatus SZDDevice::Init() { DeviceOptions opts = {.name = application_name_.data(), - .setup_spdk = !dpdk_initialised}; - SZDStatus s = FromStatus(szd_init(manager_, &opts)); + .setup_spdk = !dpdk_initialised, + .iouring_sqthread = false, + .iouring_fixed = false, + }; + SZDStatus s = FromStatus(szd_init(manager_, &opts, SZD::SZD_IO_BACKEND_IO_URING)); if (s == SZDStatus::Success) { initialised_device_ = true; dpdk_initialised = true; @@ -44,13 +47,21 @@ SZDStatus SZDDevice::Reinit() { return s; } +typedef struct { + char **traddr; /**< transport ids of all probed devices.*/ + bool *zns; /**< Foreach probed device, is it a ZNS device?*/ + struct spdk_nvme_ctrlr **ctrlr; /**< The controller(s) of the devices.*/ + uint8_t devices; /**< Used to identify global device count.*/ + pthread_mutex_t *mut; /**< Ensures that probe information is thread safe.*/ +} ProbeInformation; + SZDStatus SZDDevice::Probe(std::vector &info) { if (!initialised_device_) { SZD_LOG_ERROR("SZD: Device: Probe: Invalid args\n"); return SZDStatus::InvalidArguments; } ProbeInformation **prober = new ProbeInformation *; - SZDStatus s = FromStatus(szd_probe(*manager_, prober)); + SZDStatus s = FromStatus(szd_probe(*manager_, (void**)prober)); if (s != SZDStatus::Success) { SZD_LOG_ERROR("SZD: Device: Probe: Failed probing\n"); return s; @@ -61,7 +72,7 @@ SZDStatus SZDDevice::Probe(std::vector &info) { info.push_back( DeviceOpenInfo{.traddr = trid, .is_zns = (*prober)->zns[dev]}); } - szd_free_probe_information(*prober); + szd_free_probe_information(*manager_, *prober); delete prober; // Probe can leave SZD in a weird attached state (zombie devices). s = Reinit(); @@ -101,7 +112,7 @@ SZDStatus SZDDevice::GetInfo(DeviceInfo *info) const { SZD_LOG_ERROR("SZD: Device: GetInfo: Not initialised\n"); return SZDStatus::InvalidArguments; } - *info = (*manager_)->info; + *info = (*manager_)->manager_->info; return SZDStatus::Success; } diff --git a/szd/cpp/tests/szd_channel_test.cpp b/szd/cpp/tests/szd_channel_test.cpp index 6a848c0..937bf66 100644 --- a/szd/cpp/tests/szd_channel_test.cpp +++ b/szd/cpp/tests/szd_channel_test.cpp @@ -76,7 +76,7 @@ TEST_F(SZDChannelTest, AllignmentTest) { SZD::SZDDevice dev("AllignmentTest"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 1); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 1); SZD::SZDChannel *channel; factory.register_channel(&channel); // 0 bytes @@ -100,7 +100,7 @@ TEST_F(SZDChannelTest, TranslateAddress) { SZD::SZDDevice dev("TranslateAddress"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 1); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 1); SZD::SZDChannel *channel; // Mock translations @@ -108,7 +108,7 @@ TEST_F(SZDChannelTest, TranslateAddress) { // 1. EQ (zone_sze = zone_cap) // 2. PowerOfTwo (zone_sze = zone_cap * 2) // 3. NotAPowerOfTwo (zone_sze = zone_cap + 10) - dev.GetDeviceManager()->info.zone_size = 4096; + dev.GetEngineManager()->manager_->info.zone_size = 4096; const auto testallignment = [](SZD::SZDChannel *channel, uint64_t l, uint64_t r) -> void { @@ -119,7 +119,7 @@ TEST_F(SZDChannelTest, TranslateAddress) { ASSERT_EQ(channel->TranslatePbaToLba(channel->TranslateLbaToPba(l)), l); }; - dev.GetDeviceManager()->info.zone_cap = 4096; + dev.GetEngineManager()->manager_->info.zone_cap = 4096; factory.register_channel(&channel); testallignment(channel, 0, 0); testallignment(channel, 3, 3); @@ -128,7 +128,7 @@ TEST_F(SZDChannelTest, TranslateAddress) { testallignment(channel, 1UL << 63, 1UL << 63); factory.unregister_channel(channel); - dev.GetDeviceManager()->info.zone_cap = 4096 >> 1; + dev.GetEngineManager()->manager_->info.zone_cap = 4096 >> 1; factory.register_channel(&channel); testallignment(channel, 0, 0); testallignment(channel, 3, 3); @@ -140,7 +140,7 @@ TEST_F(SZDChannelTest, TranslateAddress) { testallignment(channel, 1UL << 62, 1UL << 63); factory.unregister_channel(channel); - dev.GetDeviceManager()->info.zone_cap = 4096 - 10; + dev.GetEngineManager()->manager_->info.zone_cap = 4096 - 10; factory.register_channel(&channel); testallignment(channel, 0, 0); testallignment(channel, 3, 3); @@ -156,7 +156,7 @@ TEST_F(SZDChannelTest, DirectIO) { SZD::SZDDevice dev("DirectIO"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 1); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 1); SZD::SZDChannel *channel; factory.register_channel(&channel); @@ -283,7 +283,7 @@ TEST_F(SZDChannelTest, DirectIONonAlligned) { SZD::SZDDevice dev("DirectIONonAlligned"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 1); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 1); SZD::SZDChannel *channel; factory.register_channel(&channel); @@ -377,7 +377,7 @@ TEST_F(SZDChannelTest, BufferIO) { SZD::SZDDevice dev("BufferIO"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 1); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 1); SZD::SZDChannel *channel; factory.register_channel(&channel); @@ -397,7 +397,7 @@ TEST_F(SZDChannelTest, BufferIO) { // Setup. We will create 3 equal sized parts. We flush the middle part. // Read it into the last, then flush a non-alligned area around the last 2 // parts and read it into the first. - SZD::SZDBuffer buffer(info.lba_size * 3, info.lba_size); + SZD::SZDBuffer buffer(dev.GetEngineManager(), info.lba_size * 3, info.lba_size); char *raw_buffer = nullptr; ASSERT_EQ(buffer.GetBuffer((void **)&raw_buffer), SZD::SZDStatus::Success); ASSERT_NE(raw_buffer, nullptr); @@ -466,7 +466,7 @@ TEST_F(SZDChannelTest, BufferIO) { start_head + buffer.GetBufferSize() / info.lba_size, info.zone_cap, info.zasl / info.lba_size); - SZD::SZDBuffer shadow_buffer(info.lba_size * 3, info.lba_size); + SZD::SZDBuffer shadow_buffer(dev.GetEngineManager(), info.lba_size * 3, info.lba_size); char *raw_shadow_buffer; ASSERT_EQ(shadow_buffer.GetBuffer((void **)&raw_shadow_buffer), SZD::SZDStatus::Success); @@ -502,7 +502,7 @@ TEST_F(SZDChannelTest, ResetZone) { SZD::SZDDevice dev("ResetZone"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 4); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 4); SZD::SZDChannel *channel; factory.register_channel(&channel); @@ -578,7 +578,7 @@ TEST_F(SZDChannelTest, ResetAndWriteRespectsRange) { SZD::SZDDevice dev("ResetAndWriteRespectsRange"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 4); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 4); SZD::SZDChannel *channel, *channel1, *channel2, *channel3; factory.register_channel(&channel); factory.register_channel(&channel1, begin_zone, begin_zone + 1, false, 1); @@ -640,7 +640,7 @@ TEST_F(SZDChannelTest, FinishZone) { SZD::SZDDevice dev("FinishZone"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 4); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 4); SZD::SZDChannel *channel; factory.register_channel(&channel); @@ -690,7 +690,7 @@ TEST_F(SZDChannelTest, AsyncTest) { SZD::SZDDevice dev("AsyncTest"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 1); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 1); SZD::SZDChannel *channel; factory.register_channel(&channel, true, 8); @@ -822,7 +822,7 @@ TEST_F(SZDChannelTest, MaxZoneHeads) { SZD::SZDDevice dev("MaxZoneHeads"); SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(0, 0, &dev, &info); - SZD::SZDChannelFactory factory(dev.GetDeviceManager(), 1); + SZD::SZDChannelFactory factory(dev.GetEngineManager(), 1); SZD::SZDChannel *channel; factory.register_channel(&channel); diff --git a/szd/cpp/tests/szd_circular_log_test.cpp b/szd/cpp/tests/szd_circular_log_test.cpp index a487b45..f12c36e 100644 --- a/szd/cpp/tests/szd_circular_log_test.cpp +++ b/szd/cpp/tests/szd_circular_log_test.cpp @@ -23,7 +23,7 @@ TEST_F(SZDTest, FillingACircularLogEphemerallyTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_circular_log); + dev.GetEngineManager(), needed_channels_for_circular_log); SZD::SZDCircularLog log(factory, info, begin_zone_log, end_zone_log, 1); // We need to reset all data if it is there, as always. @@ -110,7 +110,7 @@ TEST_F(SZDTest, TestFillingACircularPersistently) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_circular_log); + dev.GetEngineManager(), needed_channels_for_circular_log); factory->Ref(); // Cleanup first round @@ -142,7 +142,7 @@ TEST_F(SZDTest, TestFillingACircularPersistently) { } // last round, get and verify the data - SZD::SZDBuffer buffer((end_zone - begin_zone) * info.zone_cap * info.lba_size, + SZD::SZDBuffer buffer(dev.GetEngineManager(), (end_zone - begin_zone) * info.zone_cap * info.lba_size, info.lba_size); { SZD::SZDCircularLog log(factory, info, begin_zone, end_zone, 1); @@ -170,7 +170,7 @@ TEST_F(SZDTest, CircularLogCircularPatternTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_circular_log * 2); + dev.GetEngineManager(), needed_channels_for_circular_log * 2); SZD::SZDCircularLog log(factory, info, begin_zone, end_zone, 1); // We need to reset all data if it is there, as always. @@ -268,7 +268,7 @@ TEST_F(SZDTest, CircularLogMultipleReaderTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), + dev.GetEngineManager(), needed_channels_for_circular_log + /* 1 reader extra */ 1); SZD::SZDCircularLog log(factory, info, begin_zone, end_zone, 2); ASSERT_EQ(log.GetNumberOfReaders(), 2); diff --git a/szd/cpp/tests/szd_fragmented_log_test.cpp b/szd/cpp/tests/szd_fragmented_log_test.cpp index 4c67b0f..225056a 100644 --- a/szd/cpp/tests/szd_fragmented_log_test.cpp +++ b/szd/cpp/tests/szd_fragmented_log_test.cpp @@ -22,7 +22,7 @@ TEST_F(SZDTest, FillingFragmentedLogSimpleTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_fragmented_log); + dev.GetEngineManager(), needed_channels_for_fragmented_log); SZD::SZDFragmentedLog log(factory, info, begin_zone, end_zone, 1, 1); // We need to reset all data if it is there, as always. @@ -89,7 +89,7 @@ TEST_F(SZDTest, FillingFragmentedLogFragmentingTest) { static constexpr uint64_t further_end_zone = 19; SZDTestUtil::SZDSetupDevice(begin_zone, further_end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_fragmented_log); + dev.GetEngineManager(), needed_channels_for_fragmented_log); SZD::SZDFragmentedLog log(factory, info, begin_zone, further_end_zone, 1, 1); // We need to reset all data if it is there, as always. @@ -211,7 +211,7 @@ TEST_F(SZDTest, MultipleWritersFragmentedLogFragmentingTest) { static constexpr uint64_t further_end_zone = 19; SZDTestUtil::SZDSetupDevice(begin_zone, further_end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_fragmented_log + 1); + dev.GetEngineManager(), needed_channels_for_fragmented_log + 1); SZD::SZDFragmentedLog log(factory, info, begin_zone, further_end_zone, 1, 2); // Reset first diff --git a/szd/cpp/tests/szd_once_log_test.cpp b/szd/cpp/tests/szd_once_log_test.cpp index a42bc66..68bd36f 100644 --- a/szd/cpp/tests/szd_once_log_test.cpp +++ b/szd/cpp/tests/szd_once_log_test.cpp @@ -23,7 +23,7 @@ TEST_F(SZDTest, OnceLogEphemeralTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_once_log); + dev.GetEngineManager(), needed_channels_for_once_log); SZD::SZDOnceLog log(factory, info, begin_zone_log, end_zone_log, 1U); // We need to reset all data if it is there, as always. @@ -108,7 +108,7 @@ TEST_F(SZDTest, OnceLogPersistenceTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_once_log); + dev.GetEngineManager(), needed_channels_for_once_log); factory->Ref(); // Cleanup first round @@ -139,7 +139,7 @@ TEST_F(SZDTest, OnceLogPersistenceTest) { } // last round, get and verify the data - SZD::SZDBuffer buffer((end_zone - begin_zone) * info.zone_cap * info.lba_size, + SZD::SZDBuffer buffer(dev.GetEngineManager(), (end_zone - begin_zone) * info.zone_cap * info.lba_size, info.lba_size); { SZD::SZDOnceLog log(factory, info, begin_zone, end_zone, 1U); @@ -167,7 +167,7 @@ TEST_F(SZDTest, OnceLogMarkInactiveTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_once_log); + dev.GetEngineManager(), needed_channels_for_once_log); factory->Ref(); SZD::SZDOnceLog log(factory, info, begin_zone, end_zone, 1U); @@ -223,7 +223,7 @@ TEST_F(SZDTest, OnceLogReadAllTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_once_log); + dev.GetEngineManager(), needed_channels_for_once_log); factory->Ref(); SZD::SZDOnceLog log(factory, info, begin_zone, end_zone, 1U); @@ -255,7 +255,7 @@ TEST_F(SZDTest, OnceLogAsyncTest) { SZD::DeviceInfo info; SZDTestUtil::SZDSetupDevice(begin_zone, end_zone, &dev, &info); SZD::SZDChannelFactory *factory = new SZD::SZDChannelFactory( - dev.GetDeviceManager(), needed_channels_for_once_log); + dev.GetEngineManager(), needed_channels_for_once_log); factory->Ref(); SZD::SZDChannel **channel = new SZD::SZDChannel *[1]; factory->register_channel(&channel[0], true, 4); diff --git a/szd/cpp/tests/szd_test_util.hpp b/szd/cpp/tests/szd_test_util.hpp index 9a23485..da3495a 100644 --- a/szd/cpp/tests/szd_test_util.hpp +++ b/szd/cpp/tests/szd_test_util.hpp @@ -6,16 +6,17 @@ namespace SZDTestUtil { static void SZDSetupDevice(uint64_t min_zone, uint64_t max_zone, SZD::SZDDevice *device, SZD::DeviceInfo *dinfo) { ASSERT_EQ(device->Init(), SZD::SZDStatus::Success); - std::vector info; - ASSERT_EQ(device->Probe(info), SZD::SZDStatus::Success); - std::string device_to_use = "None"; - for (auto it = info.begin(); it != info.end(); it++) { - if (it->is_zns) { - device_to_use.assign(it->traddr); - printf("using device at traddr %s \n", it->traddr.data()); - break; - } - } + // std::vector info; + // ASSERT_EQ(device->Probe(info), SZD::SZDStatus::Success); + // std::string device_to_use = "None"; + // for (auto it = info.begin(); it != info.end(); it++) { + // if (it->is_zns) { + // device_to_use.assign(it->traddr); + // printf("using device at traddr %s \n", it->traddr.data()); + // break; + // } + // } + std::string device_to_use = "/dev/ng5n1"; ASSERT_EQ(device->Open(device_to_use, min_zone, max_zone), SZD::SZDStatus::Success); ASSERT_EQ(device->GetInfo(dinfo), SZD::SZDStatus::Success);