From e343e4591115755e6954302e51b4656bec659b0a Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 20 Jan 2026 19:17:20 +0100 Subject: [PATCH 1/3] Add a JSON parser --- Cargo.lock | 167 +++++--- assets/highlighting-tests/json.json | 26 ++ crates/edit/Cargo.toml | 4 +- crates/edit/benches/lib.rs | 86 +++- crates/edit/src/json.rs | 644 ++++++++++++++++++++++++++++ crates/edit/src/lib.rs | 1 + 6 files changed, 842 insertions(+), 86 deletions(-) create mode 100644 assets/highlighting-tests/json.json create mode 100644 crates/edit/src/json.rs diff --git a/Cargo.lock b/Cargo.lock index da1473dc64e7..8375271b12fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -46,9 +55,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "cast" @@ -58,9 +67,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.44" +version = "1.2.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37521ac7aabe3d13122dc382493e20c9416f299d2ccd5b3a5340a2570cdeb0f3" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" dependencies = [ "find-msvc-tools", "jobserver", @@ -76,9 +85,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -116,18 +125,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.51" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.51" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" dependencies = [ "anstyle", "clap_lex", @@ -135,9 +144,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "core-foundation-sys" @@ -147,10 +156,11 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "criterion" -version = "0.7.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" +checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" dependencies = [ + "alloca", "anes", "cast", "ciborium", @@ -159,6 +169,7 @@ dependencies = [ "itertools", "num-traits", "oorandom", + "page_size", "plotters", "rayon", "regex", @@ -170,9 +181,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.6.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" +checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" dependencies = [ "cast", "itertools", @@ -215,8 +226,6 @@ version = "1.2.1" dependencies = [ "criterion", "libc", - "serde", - "serde_json", "stdext", "toml-span", "windows-sys", @@ -232,9 +241,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" [[package]] name = "getrandom" @@ -303,9 +312,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jobserver" @@ -319,9 +328,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -329,15 +338,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.177" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" @@ -366,6 +375,16 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "pico-args" version = "0.5.0" @@ -408,18 +427,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -494,12 +513,6 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - [[package]] name = "same-file" version = "1.0.6" @@ -541,15 +554,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -573,9 +586,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.108" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -637,18 +650,18 @@ dependencies = [ [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -659,9 +672,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -669,9 +682,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -682,23 +695,39 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -708,6 +737,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.62.2" @@ -778,39 +813,45 @@ dependencies = [ [[package]] name = "winresource" -version = "0.1.26" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6d1a5aac12a0cccc4dea310c464cae005a78eadfa72fc3bc45fe696ebfbb9d" +checksum = "17cdfa8da4b111045a5e47c7c839e6c5e11c942de1309bc624393ed5d87f89c6" dependencies = [ "version_check", ] [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "zmij" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65" + [[package]] name = "zstd" version = "0.13.3" diff --git a/assets/highlighting-tests/json.json b/assets/highlighting-tests/json.json new file mode 100644 index 000000000000..b4aae26ea7c5 --- /dev/null +++ b/assets/highlighting-tests/json.json @@ -0,0 +1,26 @@ +{ + // Object with various value types + "string": "Hello, world!", // string literal + "numberInt": 42, // integer number + "numberFloat": -3.14e+2, // floating point with exponent + "booleanTrue": true, // boolean true + "booleanFalse": false, // boolean false + "nullValue": null, // null literal + "array": [ + "item1", // string in array + 2, // number in array + false, // boolean in array + null, // null in array + { + "nested": "object" + } // object in array + ], + "emptyObject": {}, // empty object + "emptyArray": [], // empty array + /* Multi-line comment: + This is a block comment + inside JSONC. + */ + "unicodeString": "Emoji: \uD83D\uDE03", // Unicode escape + "escapedChars": "Line1\nLine2\tTabbed\\Backslash\"Quote" // Escaped characters +} diff --git a/crates/edit/Cargo.toml b/crates/edit/Cargo.toml index f96ceb7d5313..c7bc03f0afb1 100644 --- a/crates/edit/Cargo.toml +++ b/crates/edit/Cargo.toml @@ -47,7 +47,5 @@ features = [ ] [dev-dependencies] -criterion = { version = "0.7", features = ["html_reports"] } -serde = { version = "1.0", features = ["derive"] } -serde_json = { version = "1.0" } +criterion = { version = "0.8", features = ["html_reports"] } zstd = { version = "0.13", default-features = false } diff --git a/crates/edit/benches/lib.rs b/crates/edit/benches/lib.rs index 4c8fcc37df3e..0aa04798b8d1 100644 --- a/crates/edit/benches/lib.rs +++ b/crates/edit/benches/lib.rs @@ -1,40 +1,70 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +#![feature(allocator_api)] + use std::hint::black_box; use std::io::Cursor; +use std::ops::Range; use std::{mem, vec}; use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use edit::helpers::*; use edit::simd::MemsetSafe; -use edit::{buffer, hash, oklab, simd, unicode}; -use serde::Deserialize; -use stdext::arena; +use edit::{buffer, hash, json, oklab, simd, unicode}; +use stdext::arena::{Arena, scratch_arena}; +use stdext::{arena, varint}; -#[derive(Deserialize)] -pub struct EditingTracePatch(pub usize, pub usize, pub String); +struct EditingTracePatch<'a>(usize, usize, &'a str); -#[derive(Deserialize)] -pub struct EditingTraceTransaction { - pub patches: Vec, +struct EditingTraceTransaction<'a> { + patches: Vec, &'a Arena>, } -#[derive(Deserialize)] -pub struct EditingTraceData { - #[serde(rename = "startContent")] - pub start_content: String, - #[serde(rename = "endContent")] - pub end_content: String, - pub txns: Vec, +struct EditingTraceData<'a> { + start_content: &'a str, + end_content: &'a str, + txns: Vec, &'a Arena>, } fn bench_buffer(c: &mut Criterion) { - let data = include_bytes!("../../../assets/editing-traces/rustcode.json.zst"); - let data = zstd::decode_all(Cursor::new(data)).unwrap(); - let data: EditingTraceData = serde_json::from_slice(&data).unwrap(); - let mut patches_with_coords = Vec::new(); + let scratch = scratch_arena(None); + let data = { + let data = include_bytes!("../../../assets/editing-traces/rustcode.json.zst"); + let data = zstd::decode_all(Cursor::new(data)).unwrap(); + let data = str::from_utf8(&data).unwrap(); + + let data = json::parse(&scratch, data).unwrap(); + let root = data.as_object().unwrap(); + let txns = root.get_array("txns").unwrap(); + + let mut res = EditingTraceData { + start_content: root.get_str("startContent").unwrap(), + end_content: root.get_str("endContent").unwrap(), + txns: Vec::with_capacity_in(txns.len(), &scratch), + }; + + for txn in txns { + let txn = txn.as_object().unwrap(); + let patches = txn.get_array("patches").unwrap(); + let mut txn = + EditingTraceTransaction { patches: Vec::with_capacity_in(patches.len(), &scratch) }; + + for patch in patches { + let patch = patch.as_array().unwrap(); + let offset = patch[0].as_number().unwrap() as usize; + let del_len = patch[1].as_number().unwrap() as usize; + let ins_str = patch[2].as_str().unwrap(); + txn.patches.push(EditingTracePatch(offset, del_len, ins_str)); + } + + res.txns.push(txn); + } + + res + }; + let mut patches_with_coords = Vec::new(); { let mut tb = buffer::TextBuffer::new(false).unwrap(); tb.set_crlf(false); @@ -48,7 +78,7 @@ fn bench_buffer(c: &mut Criterion) { tb.delete(buffer::CursorMovement::Grapheme, p.1 as CoordType); tb.write_raw(p.2.as_bytes()); - patches_with_coords.push((beg, p.1 as CoordType, p.2.clone())); + patches_with_coords.push((beg, p.1 as CoordType, p.2)); } } @@ -126,6 +156,21 @@ fn bench_hash(c: &mut Criterion) { }); } +fn bench_json(c: &mut Criterion) { + let str = include_str!("../../../assets/highlighting-tests/json.json"); + + c.benchmark_group("json").throughput(Throughput::Bytes(str.len() as u64)).bench_function( + "parse", + |b| { + b.iter(|| { + let scratch = scratch_arena(None); + let obj = json::parse(&scratch, black_box(str)).unwrap(); + black_box(obj); + }) + }, + ); +} + fn bench_oklab(c: &mut Criterion) { c.benchmark_group("oklab") .bench_function("StraightRgba::as_oklab", |b| { @@ -232,6 +277,7 @@ fn bench(c: &mut Criterion) { bench_buffer(c); bench_hash(c); + bench_json(c); bench_oklab(c); bench_simd_lines_fwd(c); bench_simd_memchr2(c); diff --git a/crates/edit/src/json.rs b/crates/edit/src/json.rs new file mode 100644 index 000000000000..6238a1f1a5a1 --- /dev/null +++ b/crates/edit/src/json.rs @@ -0,0 +1,644 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! A simple JSONC parser with trailing comma support. +//! +//! It's designed for parsing our small settings files, +//! but its performance is rather competitive in general. + +use std::fmt; +use std::hint::unreachable_unchecked; + +use stdext::arena::{Arena, ArenaString}; + +use crate::unicode::MeasurementConfig; + +/// Maximum nesting depth to prevent stack overflow. +const MAX_DEPTH: usize = 64; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ParseErrorKind { + /// Invalid JSON syntax + Syntax, + /// Maximum nesting depth exceeded + MaxDepth, +} + +#[derive(Debug, Clone)] +pub struct ParseError { + kind: ParseErrorKind, + line: usize, + column: usize, +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let message = match self.kind { + ParseErrorKind::Syntax => "Invalid JSON", + ParseErrorKind::MaxDepth => "JSON too deeply nested", + }; + write!(f, "{}:{}: {}", self.line, self.column, message) + } +} + +impl std::error::Error for ParseError {} + +#[derive(Debug, Clone)] +pub enum Value<'a> { + Null, + Bool(bool), + Number(f64), + String(&'a str), + Array(&'a [Value<'a>]), + Object(&'a [(&'a str, Value<'a>)]), +} + +impl<'a> Value<'a> { + pub fn is_null(&self) -> bool { + matches!(self, Value::Null) + } + + pub fn as_bool(&self) -> Option { + match self { + Value::Bool(b) => Some(*b), + _ => None, + } + } + + pub fn as_number(&self) -> Option { + match self { + Value::Number(n) => Some(*n), + _ => None, + } + } + + pub fn as_str(&self) -> Option<&'a str> { + match self { + Value::String(s) => Some(s), + _ => None, + } + } + + pub fn as_array(&self) -> Option<&'a [Value<'a>]> { + match self { + Value::Array(arr) => Some(arr), + _ => None, + } + } + + pub fn as_object(&self) -> Option> { + match self { + Value::Object(entries) => Some(Object { entries }), + _ => None, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Object<'a> { + entries: &'a [(&'a str, Value<'a>)], +} + +impl<'a> Object<'a> { + pub fn get(&self, key: &str) -> Option<&'a Value<'a>> { + self.entries.iter().find(|e| e.0 == key).map(|e| &e.1) + } + + pub fn get_bool(&self, key: &str) -> Option { + self.get(key).and_then(Value::as_bool) + } + + pub fn get_number(&self, key: &str) -> Option { + self.get(key).and_then(Value::as_number) + } + + pub fn get_str(&self, key: &str) -> Option<&'a str> { + self.get(key).and_then(Value::as_str) + } + + pub fn get_array(&self, key: &str) -> Option<&'a [Value<'a>]> { + self.get(key).and_then(Value::as_array) + } + + pub fn get_object(&self, key: &str) -> Option> { + self.get(key).and_then(Value::as_object) + } + + pub fn iter(&self) -> impl Iterator)> { + self.entries.iter() + } + + pub fn len(&self) -> usize { + self.entries.len() + } + + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } +} + +pub fn parse<'a>(arena: &'a Arena, input: &str) -> Result, ParseError> { + let mut parser = Parser::new(arena, input); + parser.skip_bom(); + let value = parser.parse_value(0)?; + parser.skip_whitespace_and_comments()?; + if parser.pos == parser.input.len() { + Ok(value) + } else { + // Unexpected data after JSON value + Err(parser.fail(parser.pos, ParseErrorKind::Syntax)) + } +} + +struct Parser<'a, 'i> { + arena: &'a Arena, + input: &'i str, + bytes: &'i [u8], + pos: usize, +} + +impl<'a, 'i> Parser<'a, 'i> { + fn new(arena: &'a Arena, input: &'i str) -> Self { + Self { arena, input, bytes: input.as_bytes(), pos: 0 } + } + + fn parse_value(&mut self, depth: usize) -> Result, ParseError> { + // Prevent stack overflow from deeply nested structures + if depth >= MAX_DEPTH { + return Err(self.fail(self.pos, ParseErrorKind::MaxDepth)); + } + + self.skip_whitespace_and_comments()?; + + let ch = match self.peek() { + Some(ch) => ch, + // Unexpected end of input + None => return Err(self.fail(self.pos, ParseErrorKind::Syntax)), + }; + + match ch { + 'n' => self.parse_null(), + 't' => self.parse_true(), + 'f' => self.parse_false(), + '-' | '0'..='9' => self.parse_number(), + '"' => self.parse_string(), + '[' => self.parse_array(depth), + '{' => self.parse_object(depth), + _ => Err(self.fail(self.pos, ParseErrorKind::Syntax)), + } + } + + fn parse_null(&mut self) -> Result, ParseError> { + self.expect_str("null")?; + Ok(Value::Null) + } + + fn parse_true(&mut self) -> Result, ParseError> { + self.expect_str("true")?; + Ok(Value::Bool(true)) + } + + fn parse_false(&mut self) -> Result, ParseError> { + self.expect_str("false")?; + Ok(Value::Bool(false)) + } + + fn parse_number(&mut self) -> Result, ParseError> { + let start = self.pos; + + while self.pos < self.bytes.len() + && matches!(self.bytes[self.pos], b'0'..=b'9' | b'.' | b'-' | b'+' | b'e' | b'E') + { + self.pos += 1; + } + + if let Ok(num) = self.input[start..self.pos].parse::() + && num.is_finite() + { + Ok(Value::Number(num)) + } else { + Err(self.fail(self.pos, ParseErrorKind::Syntax)) + } + } + + fn parse_string(&mut self) -> Result, ParseError> { + self.expect(b'"')?; + + let mut result = ArenaString::new_in(self.arena); + + loop { + if self.pos >= self.bytes.len() { + // Unterminated string + return Err(self.fail(self.pos, ParseErrorKind::Syntax)); + } + + let b = self.bytes[self.pos]; + self.pos += 1; + + match b { + b'"' => break, + b'\\' => self.parse_escape(&mut result)?, + ..=0x1f => { + // Control characters must be escaped + return Err(self.fail(self.pos - 1, ParseErrorKind::Syntax)); + } + _ => { + let beg = self.pos - 1; + + while self.pos < self.bytes.len() + && !matches!(self.bytes[self.pos], b'"' | b'\\' | ..=0x1f) + { + self.pos += 1; + } + + result.push_str(&self.input[beg..self.pos]); + } + } + } + + Ok(Value::String(result.leak())) + } + + #[cold] + fn parse_escape(&mut self, result: &mut ArenaString) -> Result<(), ParseError> { + if self.pos >= self.bytes.len() { + // Unterminated escape sequence + return Err(self.fail(self.pos, ParseErrorKind::Syntax)); + } + + let b = self.bytes[self.pos]; + self.pos += 1; + + let ch = match b { + b'"' => b'"', + b'\\' => b'\\', + b'/' => b'/', + b'b' => b'\x08', + b'f' => b'\x0C', + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'u' => return self.parse_unicode_escape(result), + _ => { + // Invalid escape sequence + return Err(self.fail(self.pos - 2, ParseErrorKind::Syntax)); + } + }; + + result.push(ch as char); + Ok(()) + } + + #[cold] + fn parse_unicode_escape(&mut self, result: &mut ArenaString) -> Result<(), ParseError> { + let start = self.pos - 2; // parse_escape() already advanced past "\u" + let mut code = self.parse_hex4()?; + + if (0xd800..=0xdbff).contains(&code) { + if self.is_str("\\u") + && let _ = self.advance(2) + && let Ok(low) = self.parse_hex4() + && (0xdc00..=0xdfff).contains(&low) + { + code = 0x10000 + ((code - 0xd800) << 10) + (low - 0xdc00); + } else { + code = u32::MAX; + }; + } + + match char::from_u32(code) { + Some(c) => { + result.push(c); + Ok(()) + } + None => Err(self.fail(start, ParseErrorKind::Syntax)), + } + } + + fn parse_hex4(&mut self) -> Result { + let start = self.pos - 2; // parse_unicode_escape() already advanced past "\u" + + self.bytes + .get(self.pos..self.pos + 4) + .and_then(|b| { + self.pos += 4; + b.iter().try_fold(0u32, |acc, &b| { + let d = (b as char).to_digit(16)?; + Some((acc << 4) | d) + }) + }) + .ok_or_else(|| self.fail(start, ParseErrorKind::Syntax)) + } + + fn parse_array(&mut self, depth: usize) -> Result, ParseError> { + let mut values = Vec::new_in(self.arena); + let mut expects_comma = false; + + self.expect(b'[')?; + + loop { + self.skip_whitespace_and_comments()?; + + match self.peek() { + // Unexpected end of input + None => return Err(self.fail(self.pos, ParseErrorKind::Syntax)), + Some(']') => break, + Some(',') => { + if !expects_comma { + // Unexpected comma + return Err(self.fail(self.pos, ParseErrorKind::Syntax)); + } + + self.advance(1); + self.skip_whitespace_and_comments()?; + expects_comma = false; + } + Some(_) => { + if expects_comma { + // Missing comma + return Err(self.fail(self.pos, ParseErrorKind::Syntax)); + } + + values.push(self.parse_value(depth + 1)?); + expects_comma = true; + } + } + } + + self.expect(b']')?; + Ok(Value::Array(values.leak())) + } + + fn parse_object(&mut self, depth: usize) -> Result, ParseError> { + let mut entries = Vec::new_in(self.arena); + let mut expects_comma = false; + + self.expect(b'{')?; + + loop { + self.skip_whitespace_and_comments()?; + + match self.peek() { + // Unexpected end of input + None => return Err(self.fail(self.pos, ParseErrorKind::Syntax)), + Some(',') => { + if !expects_comma { + // Unexpected comma + return Err(self.fail(self.pos, ParseErrorKind::Syntax)); + } + + self.advance(1); + self.skip_whitespace_and_comments()?; + expects_comma = false; + } + Some('}') => break, + Some(_) => { + if expects_comma { + // Missing comma + return Err(self.fail(self.pos, ParseErrorKind::Syntax)); + } + + let key = match self.parse_string()? { + Value::String(s) => s, + // The entire point of parse_string is to return a string. + // If that fails, we all should start farming potatoes. + // This is essentially an unwrap_unchecked(). + _ => unsafe { unreachable_unchecked() }, + }; + self.skip_whitespace_and_comments()?; + self.expect(b':')?; + + let value = self.parse_value(depth + 1)?; + entries.push((key, value)); + expects_comma = true; + } + } + } + + self.expect(b'}')?; + Ok(Value::Object(entries.leak())) + } + + fn skip_bom(&mut self) { + if self.is_str("\u{feff}") { + self.advance(3); + } + } + + fn skip_whitespace_and_comments(&mut self) -> Result<(), ParseError> { + loop { + loop { + if self.pos >= self.bytes.len() { + return Ok(()); + } + match self.bytes[self.pos] { + b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1, + _ => break, + } + } + + if self.is_str("//") { + self.pos += 2; + while self.pos < self.bytes.len() && self.bytes[self.pos] != b'\n' { + self.pos += 1; + } + } else if self.is_str("/*") { + let start = self.pos; + self.pos += 2; + loop { + while self.pos < self.bytes.len() && self.bytes[self.pos] != b'*' { + self.pos += 1; + } + if self.pos >= self.bytes.len() { + return Err(self.fail(start, ParseErrorKind::Syntax)); + } + if self.is_str("*/") { + self.pos += 2; + break; + } + self.pos += 1; + } + } else { + return Ok(()); + } + } + } + + fn expect(&mut self, expected: u8) -> Result<(), ParseError> { + if self.bytes.get(self.pos) == Some(&expected) { + self.pos += 1; + Ok(()) + } else { + Err(self.fail(self.pos, ParseErrorKind::Syntax)) + } + } + + fn expect_str(&mut self, expected: &str) -> Result<(), ParseError> { + if self.is_str(expected) { + self.pos += expected.len(); + Ok(()) + } else { + Err(self.fail(self.pos, ParseErrorKind::Syntax)) + } + } + + fn is_str(&self, expected: &str) -> bool { + self.bytes.get(self.pos..self.pos + expected.len()) == Some(expected.as_bytes()) + } + + fn peek(&self) -> Option { + if self.pos < self.bytes.len() { Some(self.bytes[self.pos] as char) } else { None } + } + + fn advance(&mut self, num: usize) { + self.pos += num; + } + + #[cold] + fn fail(&self, pos: usize, kind: ParseErrorKind) -> ParseError { + let mut cfg = MeasurementConfig::new(&self.bytes); + let pos = cfg.goto_offset(pos); + let line = pos.logical_pos.y.max(0) as usize + 1; + let column = pos.logical_pos.x.max(0) as usize + 1; + ParseError { kind, line, column } + } +} + +#[allow(non_snake_case)] +#[allow(clippy::invisible_characters)] +#[cfg(test)] +mod tests { + use stdext::arena::scratch_arena; + + use super::*; + + #[test] + fn test_null() { + let scratch = scratch_arena(None); + assert!(parse(&scratch, "null").unwrap().is_null()); + } + + #[test] + fn test_bool() { + let scratch = scratch_arena(None); + assert_eq!(parse(&scratch, "true").unwrap().as_bool(), Some(true)); + assert_eq!(parse(&scratch, "false").unwrap().as_bool(), Some(false)); + } + + #[test] + fn test_number() { + let scratch = scratch_arena(None); + assert_eq!(parse(&scratch, "0").unwrap().as_number(), Some(0.0)); + assert_eq!(parse(&scratch, "123").unwrap().as_number(), Some(123.0)); + assert_eq!(parse(&scratch, "-456").unwrap().as_number(), Some(-456.0)); + assert_eq!(parse(&scratch, "3.15").unwrap().as_number(), Some(3.15)); + assert_eq!(parse(&scratch, "1e10").unwrap().as_number(), Some(1e10)); + assert_eq!(parse(&scratch, "1.5e-3").unwrap().as_number(), Some(0.0015)); + } + + #[test] + fn test_string() { + let scratch = scratch_arena(None); + assert_eq!(parse(&scratch, r#""hello""#).unwrap().as_str(), Some("hello")); + assert_eq!(parse(&scratch, r#""hello\nworld""#).unwrap().as_str(), Some("hello\nworld")); + assert_eq!(parse(&scratch, r#""\u0041\u0042\u0043""#).unwrap().as_str(), Some("ABC")); + } + + #[test] + fn test_array() { + let scratch = scratch_arena(None); + let value = parse(&scratch, "[1, 2, 3]").unwrap(); + let arr = value.as_array().unwrap(); + assert_eq!(arr.len(), 3); + assert_eq!(arr[0].as_number(), Some(1.0)); + assert_eq!(arr[1].as_number(), Some(2.0)); + assert_eq!(arr[2].as_number(), Some(3.0)); + } + + #[test] + fn test_object() { + let scratch = scratch_arena(None); + let value = parse(&scratch, r#"{"a": 1, "b": true}"#).unwrap(); + let obj = value.as_object().unwrap(); + assert_eq!(obj.get_number("a"), Some(1.0)); + assert_eq!(obj.get_bool("b"), Some(true)); + } + + #[test] + fn test_comments() { + let scratch = scratch_arena(None); + let input = r#"{ + // Line comment + "a": 1, + /* Block comment */ + "b": 2 + }"#; + let value = parse(&scratch, input).unwrap(); + let obj = value.as_object().unwrap(); + assert_eq!(obj.get_number("a"), Some(1.0)); + assert_eq!(obj.get_number("b"), Some(2.0)); + } + + #[test] + fn test_trailing_comma() { + let scratch = scratch_arena(None); + assert!(parse(&scratch, "[1, 2, 3,]").is_ok()); + assert!(parse(&scratch, r#"{"a": 1,}"#).is_ok()); + } + + #[test] + fn test_nested() { + let scratch = scratch_arena(None); + let input = r#"{ + "nested": { + "array": [1, 2, {"deep": true}] + } + }"#; + let value = parse(&scratch, input).unwrap(); + let obj = value.as_object().unwrap(); + let nested = obj.get_object("nested").unwrap(); + let array = nested.get_array("array").unwrap(); + assert_eq!(array.len(), 3); + let deep_obj = array[2].as_object().unwrap(); + assert_eq!(deep_obj.get_bool("deep"), Some(true)); + } + + #[test] + fn test_max_depth() { + let scratch = scratch_arena(None); + let mut input = String::new(); + for _ in 0..100 { + input.push('['); + } + for _ in 0..100 { + input.push(']'); + } + assert!(parse(&scratch, &input).is_err()); + } + + #[test] + fn test_invalid_json() { + let scratch = scratch_arena(None); + assert!(parse(&scratch, "").is_err()); + assert!(parse(&scratch, "{").is_err()); + assert!(parse(&scratch, r#"{"a":}"#).is_err()); + assert!(parse(&scratch, "[1, 2,").is_err()); + assert!(parse(&scratch, r#""unterminated"#).is_err()); + } + + #[test] + fn test_control_chars() { + let scratch = scratch_arena(None); + // Control characters must be escaped + assert!(parse(&scratch, "\"\x01\"").is_err()); + } + + #[test] + fn test_unicode() { + let scratch = scratch_arena(None); + // Test emoji (surrogate pair) + assert_eq!(parse(&scratch, r#""\uD83D\uDE00""#).unwrap().as_str(), Some("😀")); + // Test regular unicode + assert_eq!(parse(&scratch, r#""\u2764""#).unwrap().as_str(), Some("❤")); + } +} diff --git a/crates/edit/src/lib.rs b/crates/edit/src/lib.rs index 59e3df0ed022..126cbce525f9 100644 --- a/crates/edit/src/lib.rs +++ b/crates/edit/src/lib.rs @@ -29,6 +29,7 @@ pub mod hash; pub mod helpers; pub mod icu; pub mod input; +pub mod json; pub mod oklab; pub mod path; pub mod simd; From 1f9a7eb08e9a24b9eb99d9f01dc8340f25e36591 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 20 Jan 2026 22:15:55 +0100 Subject: [PATCH 2/3] Fix build --- crates/edit/benches/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/edit/benches/lib.rs b/crates/edit/benches/lib.rs index 0aa04798b8d1..57ccc73ef324 100644 --- a/crates/edit/benches/lib.rs +++ b/crates/edit/benches/lib.rs @@ -5,15 +5,14 @@ use std::hint::black_box; use std::io::Cursor; -use std::ops::Range; use std::{mem, vec}; use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use edit::helpers::*; use edit::simd::MemsetSafe; use edit::{buffer, hash, json, oklab, simd, unicode}; +use stdext::arena; use stdext::arena::{Arena, scratch_arena}; -use stdext::{arena, varint}; struct EditingTracePatch<'a>(usize, usize, &'a str); From 322618b3fe834a3c24c4f8161af63882d27363d0 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 20 Jan 2026 23:56:56 +0100 Subject: [PATCH 3/3] Address feedback --- crates/edit/src/json.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/edit/src/json.rs b/crates/edit/src/json.rs index 6238a1f1a5a1..368de37c8c7c 100644 --- a/crates/edit/src/json.rs +++ b/crates/edit/src/json.rs @@ -622,6 +622,7 @@ mod tests { assert!(parse(&scratch, "").is_err()); assert!(parse(&scratch, "{").is_err()); assert!(parse(&scratch, r#"{"a":}"#).is_err()); + assert!(parse(&scratch, r#"{5:1}"#).is_err()); assert!(parse(&scratch, "[1, 2,").is_err()); assert!(parse(&scratch, r#""unterminated"#).is_err()); }