From 71928373fe8a4b222b764ba77cf71915b5bd65e8 Mon Sep 17 00:00:00 2001 From: Shakil Thakur Date: Thu, 17 Apr 2025 20:26:29 -0500 Subject: [PATCH] updating to 3.12.3 --- README.md | 2 +- ....rockspec => lua-simdjson-0.0.7-1.rockspec | 4 +- src/luasimdjson.cpp | 2 +- src/simdjson.cpp | 119 +- src/simdjson.h | 1851 +++++++++++------ 5 files changed, 1340 insertions(+), 638 deletions(-) rename lua-simdjson-0.0.6-1.rockspec => lua-simdjson-0.0.7-1.rockspec (96%) diff --git a/README.md b/README.md index 845e788..2f2aa45 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is an incredibly fast JSON parser that uses SIMD instructions and fancy algorithms to parse JSON very quickly. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, 5.3, and 5.4 on linux/osx/windows. It has a general parsing mode and a lazy mode that uses a JSON pointer. -Current simdjson version: 3.11.3 +Current simdjson version: 3.12.3 ## Installation If all the requirements are met, lua-simdjson can be install via luarocks with: diff --git a/lua-simdjson-0.0.6-1.rockspec b/lua-simdjson-0.0.7-1.rockspec similarity index 96% rename from lua-simdjson-0.0.6-1.rockspec rename to lua-simdjson-0.0.7-1.rockspec index b93d502..1b2ba29 100644 --- a/lua-simdjson-0.0.6-1.rockspec +++ b/lua-simdjson-0.0.7-1.rockspec @@ -1,8 +1,8 @@ package="lua-simdjson" -version="0.0.6-1" +version="0.0.7-1" source = { url = "git://github.com/FourierTransformer/lua-simdjson", - tag = "0.0.6" + tag = "0.0.7" } description = { summary = "This is a simple Lua binding for simdjson", diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 8d77785..1f64a53 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -15,7 +15,7 @@ #include "luasimdjson.h" #define LUA_SIMDJSON_NAME "simdjson" -#define LUA_SIMDJSON_VERSION "0.0.6" +#define LUA_SIMDJSON_VERSION "0.0.7" using namespace simdjson; diff --git a/src/simdjson.cpp b/src/simdjson.cpp index 83b5b8a..d0f441b 100644 --- a/src/simdjson.cpp +++ b/src/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-12-12 10:37:26 -0500. Do not edit! */ +/* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -83,9 +83,20 @@ #endif #endif +#if defined(__apple_build_version__) +#if __apple_build_version__ < 14000000 +#define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to +#endif +#endif + + #if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +#if __cpp_concepts >= 201907L #include #define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif #else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) #define SIMDJSON_SUPPORTS_DESERIALIZATION 0 #endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) @@ -102,11 +113,15 @@ #include #include #include +#include #ifndef _WIN32 // strcasecmp, strncasecmp #include #endif +static_assert(CHAR_BIT == 8, "simdjson requires 8-bit bytes"); + + // We are using size_t without namespace std:: throughout the project using std::size_t; @@ -140,6 +155,7 @@ using std::size_t; #elif defined(__loongarch_lp64) #define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) +#define SIMDJSON_IS_PPC64 1 #if defined(__ALTIVEC__) #define SIMDJSON_IS_PPC64_VMX 1 #endif // defined(__ALTIVEC__) @@ -760,22 +776,22 @@ inline namespace literals { inline namespace string_view_literals { -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } @@ -2106,22 +2122,22 @@ nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2130,22 +2146,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, #if nssv_CONFIG_USR_SV_OPERATOR -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2415,7 +2431,7 @@ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong, this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error. DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' @@ -2440,13 +2456,21 @@ enum error_code { PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. - INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input NUM_ERROR_CODES }; +/** + * Some errors are fatal and invalidate the document. This function returns true if the + * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT. + * Once a fatal error is encountered, the on-demand document is no longer valid and + * processing should stop. + */ + inline bool is_fatal(error_code error) noexcept; + /** * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code @@ -2749,14 +2773,30 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=) #undef SIMDJSON_IMPL_CONCEPT } // namespace details + +template +concept string_view_like = std::is_convertible_v && + !std::is_convertible_v; + +template +concept constructible_from_string_view = std::is_constructible_v + && !std::is_same_v + && std::is_default_constructible_v; + +template +concept string_view_keyed_map = string_view_like + && requires(std::remove_cvref_t& m, typename M::key_type sv, typename M::mapped_type v) { + { m.emplace(sv, v) } -> std::same_as>; +}; + /// Check if T is a container that we can append to, including: /// std::vector, std::deque, std::list, std::string, ... template concept appendable_containers = - details::supports_emplace_back || details::supports_emplace || + (details::supports_emplace_back || details::supports_emplace || details::supports_push_back || details::supports_push || details::supports_add || details::supports_append || - details::supports_insert; + details::supports_insert) && !string_view_keyed_map; /// Insert into the container however possible template @@ -2824,6 +2864,8 @@ concept optional_type = requires(std::remove_cvref_t obj) { { static_cast(obj) } -> std::same_as; // convertible to bool }; + + } // namespace concepts } // namespace simdjson #endif // SIMDJSON_SUPPORTS_DESERIALIZATION @@ -4495,6 +4537,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; #include namespace simdjson { + +inline bool is_fatal(error_code error) noexcept { + return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT; +} + namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { @@ -4680,7 +4727,7 @@ namespace internal { { SUCCESS, "SUCCESS: No error" }, { CAPACITY, "CAPACITY: This parser can't support a document that big" }, { MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" }, - { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, + { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error." }, { DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" }, { STRING_ERROR, "STRING_ERROR: Problem while parsing a string" }, { T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" }, @@ -4705,7 +4752,7 @@ namespace internal { { PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." }, { OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." }, { INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, - { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error." }, { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."}, { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."} @@ -6771,7 +6818,7 @@ class document { * The memory allocation is strict: you * can you use this function to increase * or lower the amount of allocated memory. - * Passsing zero clears the memory. + * Passing zero clears the memory. */ error_code allocate(size_t len) noexcept; /** @private Capacity in bytes, in terms @@ -9169,7 +9216,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -15529,7 +15576,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -20797,14 +20844,18 @@ namespace simd { // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask } template @@ -21749,7 +21800,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -23427,14 +23478,18 @@ namespace simd { // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask } template @@ -28125,7 +28180,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -34867,7 +34922,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -41433,7 +41488,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -47444,7 +47499,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -53054,7 +53109,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // diff --git a/src/simdjson.h b/src/simdjson.h index c80e8cd..a0d4499 100644 --- a/src/simdjson.h +++ b/src/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-12-12 10:37:26 -0500. Do not edit! */ +/* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */ /* including simdjson.h: */ /* begin file simdjson.h */ #ifndef SIMDJSON_H @@ -103,9 +103,20 @@ #endif #endif +#if defined(__apple_build_version__) +#if __apple_build_version__ < 14000000 +#define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to +#endif +#endif + + #if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +#if __cpp_concepts >= 201907L #include #define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif #else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) #define SIMDJSON_SUPPORTS_DESERIALIZATION 0 #endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) @@ -122,11 +133,15 @@ #include #include #include +#include #ifndef _WIN32 // strcasecmp, strncasecmp #include #endif +static_assert(CHAR_BIT == 8, "simdjson requires 8-bit bytes"); + + // We are using size_t without namespace std:: throughout the project using std::size_t; @@ -160,6 +175,7 @@ using std::size_t; #elif defined(__loongarch_lp64) #define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) +#define SIMDJSON_IS_PPC64 1 #if defined(__ALTIVEC__) #define SIMDJSON_IS_PPC64_VMX 1 #endif // defined(__ALTIVEC__) @@ -780,22 +796,22 @@ inline namespace literals { inline namespace string_view_literals { -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } @@ -2126,22 +2142,22 @@ nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2150,22 +2166,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, #if nssv_CONFIG_USR_SV_OPERATOR -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2421,7 +2437,7 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.11.3" +#define SIMDJSON_VERSION "3.12.3" namespace simdjson { enum { @@ -2432,7 +2448,7 @@ enum { /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 11, + SIMDJSON_VERSION_MINOR = 12, /** * The revision (major.minor.REVISION) of simdjson being used. */ @@ -2478,7 +2494,7 @@ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong, this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error. DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' @@ -2503,13 +2519,21 @@ enum error_code { PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. - INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input NUM_ERROR_CODES }; +/** + * Some errors are fatal and invalidate the document. This function returns true if the + * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT. + * Once a fatal error is encountered, the on-demand document is no longer valid and + * processing should stop. + */ + inline bool is_fatal(error_code error) noexcept; + /** * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code @@ -2812,14 +2836,30 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=) #undef SIMDJSON_IMPL_CONCEPT } // namespace details + +template +concept string_view_like = std::is_convertible_v && + !std::is_convertible_v; + +template +concept constructible_from_string_view = std::is_constructible_v + && !std::is_same_v + && std::is_default_constructible_v; + +template +concept string_view_keyed_map = string_view_like + && requires(std::remove_cvref_t& m, typename M::key_type sv, typename M::mapped_type v) { + { m.emplace(sv, v) } -> std::same_as>; +}; + /// Check if T is a container that we can append to, including: /// std::vector, std::deque, std::list, std::string, ... template concept appendable_containers = - details::supports_emplace_back || details::supports_emplace || + (details::supports_emplace_back || details::supports_emplace || details::supports_push_back || details::supports_push || details::supports_add || details::supports_append || - details::supports_insert; + details::supports_insert) && !string_view_keyed_map; /// Insert into the container however possible template @@ -2887,6 +2927,8 @@ concept optional_type = requires(std::remove_cvref_t obj) { { static_cast(obj) } -> std::same_as; // convertible to bool }; + + } // namespace concepts } // namespace simdjson #endif // SIMDJSON_SUPPORTS_DESERIALIZATION @@ -2954,6 +2996,11 @@ enum class tape_type; #include namespace simdjson { + +inline bool is_fatal(error_code error) noexcept { + return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT; +} + namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { @@ -4640,7 +4687,7 @@ class document { * The memory allocation is strict: you * can you use this function to increase * or lower the amount of allocated memory. - * Passsing zero clears the memory. + * Passing zero clears the memory. */ error_code allocate(size_t len) noexcept; /** @private Capacity in bytes, in terms @@ -4997,7 +5044,7 @@ class parser { * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the + * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### Error Handling @@ -5091,7 +5138,7 @@ class parser { * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the + * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### Error Handling @@ -11583,7 +11630,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -13691,7 +13738,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -16291,7 +16338,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -17932,14 +17979,18 @@ namespace simd { // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask } template @@ -18884,7 +18935,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -21598,7 +21649,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -24629,7 +24680,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -27137,7 +27188,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -29658,7 +29709,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -32127,7 +32178,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -32137,7 +32189,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -32259,7 +32312,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -32269,7 +32323,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -32567,22 +32622,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -35217,20 +35278,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -35792,20 +35860,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -36980,6 +37055,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -37025,6 +37110,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + arm64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -40949,36 +41067,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -40990,7 +41111,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -42246,19 +42367,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -43100,7 +43225,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -43110,7 +43236,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -43232,7 +43359,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -43242,7 +43370,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -43540,22 +43669,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -46190,20 +46325,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -46765,20 +46907,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -47953,6 +48102,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -47998,6 +48157,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + fallback::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -51922,36 +52114,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -51963,7 +52158,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -53219,19 +53414,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -54565,7 +54764,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -54575,7 +54775,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -54697,7 +54898,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -54707,7 +54909,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -55005,22 +55208,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -57655,20 +57864,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -58230,20 +58446,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -59418,6 +59641,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -59463,6 +59696,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + haswell::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -63387,36 +63653,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -63428,7 +63697,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -64684,19 +64953,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -65376,14 +65649,18 @@ namespace simd { // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask } template @@ -66023,7 +66300,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -66033,7 +66311,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -66155,7 +66434,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -66165,7 +66445,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -66463,22 +66744,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -69113,20 +69400,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -69688,20 +69982,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -70876,6 +71177,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -70921,6 +71232,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + icelake::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -74845,36 +75189,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -74886,7 +75233,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -76142,19 +76489,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -77602,7 +77953,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -77612,7 +77964,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -77734,7 +78087,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -77744,7 +78098,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -78042,22 +78397,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -80692,20 +81053,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -81267,20 +81635,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -82455,6 +82830,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -82500,6 +82885,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + ppc64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -86424,36 +86842,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -86465,7 +86886,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -87721,19 +88142,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -89498,7 +89923,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -89508,7 +89934,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -89630,7 +90057,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -89640,7 +90068,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -89938,22 +90367,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -92588,20 +93023,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -93163,20 +93605,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -94351,6 +94800,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -94396,6 +94855,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + westmere::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -98320,36 +98812,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -98361,7 +98856,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -99617,19 +100112,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -100871,7 +101370,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -100881,7 +101381,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -101003,7 +101504,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -101013,7 +101515,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -101311,22 +101814,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -103961,20 +104470,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -104536,20 +105052,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -105724,6 +106247,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -105769,6 +106302,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + lsx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -109693,36 +110259,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -109734,7 +110303,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -110990,19 +111559,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -112257,7 +112830,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -112267,7 +112841,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -112389,7 +112964,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -112399,7 +112975,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -112697,22 +113274,28 @@ class value { noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -115347,20 +115930,27 @@ class document { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -115922,20 +116512,27 @@ class document_reference { if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -117110,6 +117707,16 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -117155,6 +117762,39 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + lasx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + /** * This CPO (Customization Point Object) will help deserialize into @@ -121079,36 +121719,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -121120,7 +121763,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -122376,19 +123019,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null");