From e4dc07ef6c4c3ff7c6373fab30c50212b415bc28 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 14 Apr 2025 00:27:22 +0200 Subject: [PATCH 01/20] init --- content/blog/ugly/index.md | 117 +++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 content/blog/ugly/index.md diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md new file mode 100644 index 00000000..26734ad2 --- /dev/null +++ b/content/blog/ugly/index.md @@ -0,0 +1,117 @@ ++++ +title = "When Rust Gets Ugly" +date = 2025-04-14 +draft = false +template = "article.html" +[extra] +series = "Idiomatic Rust" ++++ + +Its clear that Rust has a readability problem. +At least that's what I hear on a regular basis. +After programming in Rust for 10 years, I think you have to dedicate some time to learn it properly +and that your background will inform how your Rust code looks. + +Let's look at a simple task: parsing a `.env` file. +After all, how hard could it be? + +```sh +DATABASE_URL=postgres://user:password@localhost:5432/mydb +API_KEY=12345-abcde-67890-fghij +``` + +The goal is to parse the above file and return a data structure that contains the key-value pairs. + +I invite you to write your own version first. +As a little hint, consider the edge-cases, which could occur. + +## A First Attempt + +At times I see code like the following to parse a `.env` file: + +```rust +use std::collections::HashMap; + +struct ParsedLine<'a> { + key: &'a str, + value: &'a str, +} + +fn parse_line<'a>(line: &'a str) -> Option> { + // Split on '=' and convert directly to array + let parts: Vec<&str> = line.split('=').collect(); + + // Dangerous direct indexing without bounds checking + if parts.len() < 2 { + return None; + } + + let key = parts[0].trim(); + + // Could have multiple '=' characters, just use the first split + let value = parts[1].trim(); + + // Return sentinel for empty key + if key.len() == 0 { + return Some(ParsedLine { key: "SENTINEL_EMPTY_KEY", value: "SENTINEL_EMPTY_VALUE" }); + } + + Some(ParsedLine { key, value }) +} + +fn parse_config_file<'a>(src: &'a str) -> HashMap { + let lines = src.lines().collect::>(); + let mut idx = 0; + let mut cfg: HashMap = HashMap::new(); + + while idx < lines.len() { + let lref = &lines[idx]; + let mut l = *lref; + l = l.trim(); + + if l.len() == 0 || l.starts_with("#") { + idx += 1; + continue; + } + + if l.contains('=') { + let maybe_parsed = parse_line(l); + if maybe_parsed.is_some() { + let parsed = maybe_parsed.unwrap(); + + let k = parsed.key.to_string(); + let mut v = parsed.value.to_string(); + + cfg.insert(k, v); + } else { + println!("Error parsing line: {}", l); + } + } else { + println!("Line is missing '=' but was allowed through: {}", l); + } + + idx += 1; + } + + cfg +} +``` + +I've seen way worse, but I would agree that this code looks quite ugly. +However, I would argue that it's not because of the syntax, but rather the semantics +and that there are way more ergonomic solutions in Rust. + +Immediately, one can make out a few red flags: +- The code is littered with `unwrap()` calls +- The code uses a sentinel value for empty values +- Manual indexing into arrays +- Lifetime annotations -- a sign of premature optimization +- Cryptic variable names + +It is safe to say that the code is not idiomatic Rust. + + + + + +Blog post idea: "This can never panic" and other lies we tell ourselves \ No newline at end of file From fa703afa2dd0c0f425f1b86612b68117412441c4 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 14 Apr 2025 00:57:47 +0200 Subject: [PATCH 02/20] wip --- content/blog/ugly/index.md | 39 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 26734ad2..4b6a37a7 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -38,22 +38,14 @@ struct ParsedLine<'a> { } fn parse_line<'a>(line: &'a str) -> Option> { - // Split on '=' and convert directly to array let parts: Vec<&str> = line.split('=').collect(); - // Dangerous direct indexing without bounds checking - if parts.len() < 2 { - return None; - } - let key = parts[0].trim(); - - // Could have multiple '=' characters, just use the first split let value = parts[1].trim(); // Return sentinel for empty key if key.len() == 0 { - return Some(ParsedLine { key: "SENTINEL_EMPTY_KEY", value: "SENTINEL_EMPTY_VALUE" }); + return Some(ParsedLine { key: "", value: "" }); } Some(ParsedLine { key, value }) @@ -68,26 +60,25 @@ fn parse_config_file<'a>(src: &'a str) -> HashMap { let lref = &lines[idx]; let mut l = *lref; l = l.trim(); + + if l.starts_with("#") { + idx += 1; + continue; + } - if l.len() == 0 || l.starts_with("#") { + if l.len() == 0 { idx += 1; continue; } - if l.contains('=') { - let maybe_parsed = parse_line(l); - if maybe_parsed.is_some() { - let parsed = maybe_parsed.unwrap(); - - let k = parsed.key.to_string(); - let mut v = parsed.value.to_string(); - - cfg.insert(k, v); - } else { - println!("Error parsing line: {}", l); - } - } else { - println!("Line is missing '=' but was allowed through: {}", l); + let parsed = parse_line(l); + if parsed.is_some() { + // This is safe because we just checked that it's Some + let p = parsed.unwrap(); + + let k = p.key.to_string(); + let v = p.value.to_string(); + cfg.insert(k, v); } idx += 1; From 2485d5680dcc387f2ce26708f57f6fdd388730b4 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 14 Apr 2025 01:10:05 +0200 Subject: [PATCH 03/20] unify --- content/blog/ugly/index.md | 50 ++++++++++++-------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 4b6a37a7..cce33277 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -32,27 +32,9 @@ At times I see code like the following to parse a `.env` file: ```rust use std::collections::HashMap; -struct ParsedLine<'a> { - key: &'a str, - value: &'a str, -} - -fn parse_line<'a>(line: &'a str) -> Option> { - let parts: Vec<&str> = line.split('=').collect(); - - let key = parts[0].trim(); - let value = parts[1].trim(); - - // Return sentinel for empty key - if key.len() == 0 { - return Some(ParsedLine { key: "", value: "" }); - } - - Some(ParsedLine { key, value }) -} - fn parse_config_file<'a>(src: &'a str) -> HashMap { let lines = src.lines().collect::>(); + let mut idx = 0; let mut cfg: HashMap = HashMap::new(); @@ -61,26 +43,26 @@ fn parse_config_file<'a>(src: &'a str) -> HashMap { let mut l = *lref; l = l.trim(); - if l.starts_with("#") { - idx += 1; - continue; - } - - if l.len() == 0 { + if l.starts_with("#") || l.len() == 0 { idx += 1; continue; } - let parsed = parse_line(l); - if parsed.is_some() { - // This is safe because we just checked that it's Some - let p = parsed.unwrap(); - - let k = p.key.to_string(); - let v = p.value.to_string(); - cfg.insert(k, v); + let parts: Vec<&str> = l.split('=').collect(); + + if parts.len() >= 2 { + let key = parts[0].trim(); + let value = parts[1].trim(); + + if key.len() > 0 { + cfg.insert(key.to_string(), v.to_string()); + } else { + println!("Empty key found, skipping"); + } + } else { + println!("Line is missing '=': {}", l); } - + idx += 1; } From 531c3a89b2358808d53def904ddd16fe37251f61 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 14 Apr 2025 01:24:11 +0200 Subject: [PATCH 04/20] convolute --- content/blog/ugly/index.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index cce33277..4eba8ec0 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -33,7 +33,7 @@ At times I see code like the following to parse a `.env` file: use std::collections::HashMap; fn parse_config_file<'a>(src: &'a str) -> HashMap { - let lines = src.lines().collect::>(); + let lines = src.split('\n').collect::>(); let mut idx = 0; let mut cfg: HashMap = HashMap::new(); @@ -48,14 +48,14 @@ fn parse_config_file<'a>(src: &'a str) -> HashMap { continue; } - let parts: Vec<&str> = l.split('=').collect(); + let parts = l.split('=').collect::>(); if parts.len() >= 2 { - let key = parts[0].trim(); - let value = parts[1].trim(); + let k: &str = parts[0].trim(); + let v: &str = parts[1].trim(); if key.len() > 0 { - cfg.insert(key.to_string(), v.to_string()); + cfg.insert(k.to_string(), v.to_string()); } else { println!("Empty key found, skipping"); } From 568de22bb4fc6d7a4783cad16424e7a1b881cea4 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 14 Apr 2025 01:34:54 +0200 Subject: [PATCH 05/20] wip --- content/blog/ugly/index.md | 40 +++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 4eba8ec0..4b04c1a9 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -20,7 +20,7 @@ DATABASE_URL=postgres://user:password@localhost:5432/mydb API_KEY=12345-abcde-67890-fghij ``` -The goal is to parse the above file and return a data structure that contains the key-value pairs. +The goal is to parse the above content from a file called `.env` and return a data structure that contains the key-value pairs. I invite you to write your own version first. As a little hint, consider the edge-cases, which could occur. @@ -32,9 +32,14 @@ At times I see code like the following to parse a `.env` file: ```rust use std::collections::HashMap; -fn parse_config_file<'a>(src: &'a str) -> HashMap { - let lines = src.split('\n').collect::>(); - +fn parse_config_file<'a>(path: &'a str) -> HashMap<&'str, &'str> { + let p = Path::new(path); + let mut file = File::open(p).unwrap(); + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes).unwrap(); + let s = String::from_utf8_lossy(&bytes); + let lines = s.split('\n').collect::>(); + let mut idx = 0; let mut cfg: HashMap = HashMap::new(); @@ -43,7 +48,14 @@ fn parse_config_file<'a>(src: &'a str) -> HashMap { let mut l = *lref; l = l.trim(); - if l.starts_with("#") || l.len() == 0 { + // Skip empty lines + if l.len() == 0 { + idx += 1; + continue; + } + + // Skip comments + if l.starts_with("#") { idx += 1; continue; } @@ -54,7 +66,7 @@ fn parse_config_file<'a>(src: &'a str) -> HashMap { let k: &str = parts[0].trim(); let v: &str = parts[1].trim(); - if key.len() > 0 { + if k.len() > 0 { cfg.insert(k.to_string(), v.to_string()); } else { println!("Empty key found, skipping"); @@ -70,19 +82,25 @@ fn parse_config_file<'a>(src: &'a str) -> HashMap { } ``` -I've seen way worse, but I would agree that this code looks quite ugly. -However, I would argue that it's not because of the syntax, but rather the semantics -and that there are way more ergonomic solutions in Rust. +Let's be clear: this is terrifying code with many footguns. +And yet, people use it as an excuse to call Rust an ugly language and give up on it. -Immediately, one can make out a few red flags: +However, I would argue that it's not because of Rust's syntax, but rather +because there are way more ergonomic solutions in Rust. +Typically, better semantics lead to easier to read syntax in Rust. +If you feel like you're fighting the language (not just its borrow-checker!), +then there's a chance that the language is trying to tell you that you're working against it. + +Immediately, one can make out a few red flags from the code above: - The code is littered with `unwrap()` calls -- The code uses a sentinel value for empty values - Manual indexing into arrays - Lifetime annotations -- a sign of premature optimization - Cryptic variable names It is safe to say that the code is not idiomatic Rust. +Okay, but how can we do better? + From 932a25d35183deaa34b2ac6a8f86f53fd9baffe0 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 14 Apr 2025 02:02:42 +0200 Subject: [PATCH 06/20] edit --- content/blog/ugly/index.md | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 4b04c1a9..a4b67206 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -33,15 +33,15 @@ At times I see code like the following to parse a `.env` file: use std::collections::HashMap; fn parse_config_file<'a>(path: &'a str) -> HashMap<&'str, &'str> { - let p = Path::new(path); - let mut file = File::open(p).unwrap(); + let p = Path::new(&path); + let mut file = File::open(&p).unwrap(); let mut bytes = Vec::new(); file.read_to_end(&mut bytes).unwrap(); let s = String::from_utf8_lossy(&bytes); let lines = s.split('\n').collect::>(); let mut idx = 0; - let mut cfg: HashMap = HashMap::new(); + let mut cfg: HashMap<&'a str, &'a str> = HashMap::new(); while idx < lines.len() { let lref = &lines[idx]; @@ -62,23 +62,18 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap<&'str, &'str> { let parts = l.split('=').collect::>(); - if parts.len() >= 2 { - let k: &str = parts[0].trim(); + let k: &str = parts[0].trim(); + if k.len() > 0 { let v: &str = parts[1].trim(); - - if k.len() > 0 { - cfg.insert(k.to_string(), v.to_string()); - } else { - println!("Empty key found, skipping"); - } + cfg.insert(k.to_string(), v.to_string()); } else { - println!("Line is missing '=': {}", l); + println!("Error in line {:?}", parts); } idx += 1; } - cfg + return cfg; } ``` @@ -97,6 +92,9 @@ Immediately, one can make out a few red flags from the code above: - Lifetime annotations -- a sign of premature optimization - Cryptic variable names +On top of that, there are plenty of business logic bugs in the code, +because the code makes quite a few unjustified assumptions. + It is safe to say that the code is not idiomatic Rust. Okay, but how can we do better? From 9b9d35f2924c6c07bf16e67e5f774bb545810153 Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 15 Apr 2025 15:53:37 +0200 Subject: [PATCH 07/20] wip --- content/blog/ugly/index.md | 243 +++++++++++++++++++++++++++++++++---- 1 file changed, 219 insertions(+), 24 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index a4b67206..485c9b27 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -12,12 +12,19 @@ At least that's what I hear on a regular basis. After programming in Rust for 10 years, I think you have to dedicate some time to learn it properly and that your background will inform how your Rust code looks. -Let's look at a simple task: parsing a `.env` file. -After all, how hard could it be? +Let's look at a simple example: parsing a `.env` file in Rust. After all, how hard could it be? ```sh -DATABASE_URL=postgres://user:password@localhost:5432/mydb -API_KEY=12345-abcde-67890-fghij +APP_ENV=production +API_KEY=my_api_key + +LOG_FILE=app.log + +DB_HOST=localhost +DB_PORT=5432 +DB_USERNAME=myuser +DB_PASSWORD=mypassword +DB_NAME=mydb ``` The goal is to parse the above content from a file called `.env` and return a data structure that contains the key-value pairs. @@ -25,26 +32,31 @@ The goal is to parse the above content from a file called `.env` and return a da I invite you to write your own version first. As a little hint, consider the edge-cases, which could occur. -## A First Attempt +## A Painful First Attempt -At times I see code like the following to parse a `.env` file: +At times I see code like the following: ```rust use std::collections::HashMap; +use std::fs::File; +use std::io::Read; +use std::path::Path; -fn parse_config_file<'a>(path: &'a str) -> HashMap<&'str, &'str> { +fn parse_config_file<'a>(path: &'a str) -> HashMap { let p = Path::new(&path); let mut file = File::open(&p).unwrap(); let mut bytes = Vec::new(); file.read_to_end(&mut bytes).unwrap(); - let s = String::from_utf8_lossy(&bytes); - let lines = s.split('\n').collect::>(); + + let s = String::from_utf8_lossy(&bytes).to_string(); + + let lines_with_refs: Vec<&'a str> = s.split('\n').collect(); let mut idx = 0; - let mut cfg: HashMap<&'a str, &'a str> = HashMap::new(); + let mut cfg: HashMap = HashMap::new(); - while idx < lines.len() { - let lref = &lines[idx]; + while idx < lines_with_refs.len() { + let lref = &lines_with_refs[idx]; let mut l = *lref; l = l.trim(); @@ -75,32 +87,215 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap<&'str, &'str> { return cfg; } + +fn main() { + // Test with a string literal instead of a file for demonstration + let config_content = r#" + # This is a config file + host = localhost + port = 8080 + user = admin + + # This line is empty on purpose + + password= secret + + # Edge cases + indented_key = indented_value + tab_key = tab_value + key with spaces = value with spaces + quotes="quoted value" + escaped\=key = escaped value + # = in comments shouldn't be processed + empty_value= + =empty_key + duplicate=first + duplicate=second + trailing_whitespace = value with spaces + spaced==double_equals + key=value#not_a_comment + "quoted key" = should_fail + multi\ + line\ + key=multiline_value + + # Invalid lines + justkey + "#; + + // Write the content to a temporary file + std::fs::write("temp_config.env", config_content).unwrap(); + + // Parse the file + let config = parse_config_file("temp_config.env"); + + // Print the results + println!("\nParsed config entries:"); + let mut keys: Vec = config.keys().cloned().collect(); + keys.sort(); + + for key in keys { + println!("{} = {}", key, config.get(&key).unwrap()); + } + + // Display some test results + println!("\nTest results:"); + + // Test 1: Check if basic keys are parsed correctly + if let Some(host) = config.get("host") { + println!("PASS: Basic key 'host' parsed correctly: {}", host); + } else { + println!("FAIL: Basic key 'host' not found"); + } + + // Test 2: Check if indentation is handled correctly + if let Some(value) = config.get("indented_key") { + println!("PASS: Indented key parsed correctly: {}", value); + } else { + println!("FAIL: Indented key not found"); + } + + // Test 3: Check if spaces in keys are preserved (bug) + if let Some(value) = config.get("key with spaces") { + println!("PASS: Key with spaces parsed correctly: {}", value); + } else { + println!("FAIL: Key with spaces not found (as expected with simple parser)"); + } + + // Test 4: Check for duplicate key behavior + if let Some(value) = config.get("duplicate") { + println!("NOTE: For duplicate keys, last value wins: {}", value); + } + + // Test 5: Check if escaped equals sign is handled (it's not) + if let Some(value) = config.get("escaped\\=key") { + println!("PASS: Escaped equals in key handled correctly"); + } else { + println!("FAIL: Escaped equals not handled correctly (expected with simple parser)"); + } + + // Test 6: Check comment character in value (will fail) + if let Some(value) = config.get("key") { + if value == "value#not_a_comment" { + println!("PASS: Comment character in value preserved"); + } else { + println!("FAIL: Comment character in value not preserved: {}", value); + } + } else { + println!("FAIL: Key with comment in value not found"); + } + + // Test 7: Check multiline key handling (will fail) + if let Some(value) = config.get("multi\\") { + println!("PASS: Multiline key handled"); + } else { + println!("FAIL: Multiline key not handled (expected with simple parser)"); + } + + // Clean up the temporary file + std::fs::remove_file("temp_config.env").unwrap_or_default(); +} ``` -Let's be clear: this is terrifying code with many footguns. +Let's be clear: there are many antipatterns in the above code. +Many of them have nothing to do with Rust, but with software engineering in general. And yet, people use it as an excuse to call Rust an ugly language and give up on it. -However, I would argue that it's not because of Rust's syntax, but rather -because there are way more ergonomic solutions in Rust. -Typically, better semantics lead to easier to read syntax in Rust. +I would argue that this code is ugly less because of Rust's syntax, but rather +because the author is unaware or ignorant of the ergonomics Rust provides. + +Typically, **better semantics lead to nicer syntax in Rust**. +Many people get that backwards. + If you feel like you're fighting the language (not just its borrow-checker!), -then there's a chance that the language is trying to tell you that you're working against it. +then there's a chance that the language is trying to **tell you something**. + +It bears repeating: this is terrifying code with many footguns. +Without much effort, one can make out a few red flags from the code above: -Immediately, one can make out a few red flags from the code above: - The code is littered with `unwrap()` calls +- Unnecessary mutability - Manual indexing into arrays -- Lifetime annotations -- a sign of premature optimization +- Unnecessary lifetime annotations - Cryptic variable names +- Very imperative coding style + +The above not just makes the code harder to read. +What is worse is that it leads to business logic bugs in the code, because the code makes quite a few unjustified assumptions +and the way the code is written makes it hard for Rust to help you out. + +I think we can all agree that the code is not idiomatic Rust. + +## The Five Stages of Grief About Rust Syntax + +Whenever I see people struggle with Rust syntax, I'm reminded of the five stages of grief: + +### Denial + +> "There's nothing wrong with my code - it works perfectly fine! The syntax is just Rust's problem, not mine." + +In this stage, developers continue writing C-style code with Rust syntax, ignoring compiler warnings and adding unnecessary lifetime annotations everywhere. They often blame the language for being "too complex" while refusing to revisit fundamental concepts. + +### Anger + +> "Why does Rust need all these lifetime annotations and explicit ownership? C++ never made me deal with this nonsense!" + +Frustration builds as developers encounter repeated compiler errors. They begin to resent the borrow checker and might abandon half-finished projects in favor of "more practical" languages. Excessive code comments containing rants about Rust's design decisions become common. + +### Bargaining + +> "Maybe if I just use more `.unwrap()` calls and sprinkle in some `unsafe` blocks, I can write Rust the way I want to." + +Desperate to make progress, developers start making dangerous compromises. They liberally use `.clone()` to silence ownership errors, wrap simple operations in `unsafe` blocks, and litter code with `.unwrap()` calls, effectively bypassing Rust's safety guarantees while keeping all of its verbosity. + +### Depression + +> "I'll never get used to this language. My code is a mess of references, clones, and unnecessary mutations that even I can't read anymore." + +Reality sets in as technical debt accumulates. Code becomes increasingly convoluted with superfluous mutable variables and overly complex data structures. Performance suffers from unnecessary allocations, and what started as a promising project now feels like an unreadable jumble of syntax. + +### Acceptance + +> "I see now that these idioms exist for a reason - my code is not only safer but actually more readable when I embrace Rust's patterns instead of fighting them." + +Finally, developers begin embracing idiomatic patterns and the design philosophy behind Rust. They refactor their spaghetti code into clean, expressive modules that leverage the type system rather than fight it. Performance improves, code becomes more maintainable, and they wonder how they ever wrote memory-unsafe code with confidence. + +Okay, you (or your team-member) reached acceptance, how can you do better? + +## Let Go Of Old Bad Habits + + +The first step is to acknowledge that the code goes against Rust's design principles. +Based on this, we can systematically improve the code. + +Ugly code is band-aid around bad habits. +Learn to do it the "Rustic way." + +We have seen plenty of ways to write better Rust code in previous articles: + +- Think in expressions +- Immutability by default +- Leaning into the typesystem +- Iterator patterns instead of manual iteration +- Proper error handling + +Even just following this basic advice, we can get it into a much better shape. + + -On top of that, there are plenty of business logic bugs in the code, -because the code makes quite a few unjustified assumptions. -It is safe to say that the code is not idiomatic Rust. +Blog post idea: "This can never panic" and other lies we tell ourselves +The language doesn't get more ugly beyond a certain point of complexity. +I can't say the same about C++. -Okay, but how can we do better? +what makes Rust "ugly" isn't just syntax but exposing complex concepts. +Physics over optics, not everything is about cosmetics. +"I don't want ugly Rust-like typing in my favorite language. It may look good in Rust, but it looks horrible in Python." +It's also pretty easy to go into the other extreme and make everything generic. That's also hard to read. +People don't confront their bad habits and find workarounds. That's the origin of ugly code. -Blog post idea: "This can never panic" and other lies we tell ourselves \ No newline at end of file +Assumptions about the program’s execution order are often wrong \ No newline at end of file From fb52888c8269a95e3ae7fb6d2abd7424ad0b253d Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 17 Apr 2025 16:08:00 +0200 Subject: [PATCH 08/20] wip --- content/blog/ugly/index.md | 313 ++++++++++++++++++++++++++++++++++++- 1 file changed, 311 insertions(+), 2 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 485c9b27..be95bda6 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -67,7 +67,7 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { } // Skip comments - if l.starts_with("#") { + if l[0] == '#' { idx += 1; continue; } @@ -273,13 +273,322 @@ Learn to do it the "Rustic way." We have seen plenty of ways to write better Rust code in previous articles: +- Read the standard library documentation - Think in expressions - Immutability by default - Leaning into the typesystem - Iterator patterns instead of manual iteration - Proper error handling +- Split up the problem into smaller parts + +Even just applying these basic techniques, we can get it into a much better shape. + + +Let's start by removing this boilerplate + +```rust +let p = Path::new(&path); +let mut file = File::open(&p).unwrap(); +let mut bytes = Vec::new(); +file.read_to_end(&mut bytes).unwrap(); +let s = String::from_utf8_lossy(&bytes).to_string(); +``` + +and instead calling [`std::fs::read_to_string`](https://doc.rust-lang.org/std/fs/fn.read_to_string.html): + +```rust +let s = read_to_string(path).unwrap(); +``` + +Rust is really good at inferring types. That's why we don't need to specify the type of +our `HashMap` explicitly. + +```rust +let mut config = HashMap::new(); +``` + +Next, manual string splitting is also unnecessary. + +```rust +let lines_with_refs: Vec<&'a str> = s.split('\n').collect(); +``` + +The above can be replaced with: + +```rust +let lines = s.lines(); +``` + +This returns an [iterator over the lines of a string](https://doc.rust-lang.org/std/primitive.str.html#method.lines). + +With that, we can simply iterate over each line: + +```rust +for line in s.lines() { + let line = line.trim(); + + if line.is_empty() || line.starts_with("#") { + continue; + } + + // ... +} +``` + +Note that we shadow `line` with `line.trim()`. +That is a common practice in Rust. +This way we don't have to come up with a new name for the trimmed line +and we also don't have to fall back to cryptic names like `lref` or `l` anymore. + +Instead of `line.len() == 0`, we can use `line.is_empty()`. + +We can also use `line.starts_with("#")` instead of checking for `l[0] == '#'`. + +Next, let's tackle this part: + +```rust +let parts = l.split('=').collect::>(); + +let k: &str = parts[0].trim(); +if k.len() > 0 { + let v: &str = parts[1].trim(); + cfg.insert(k.to_string(), v.to_string()); +} else { + println!("Error in line {:?}", parts); +} +``` + +Note how we access `parts[0]` and `parts[1]` without checking if they exist. +Let's lean into the typesystem a little more and use pattern matching to destructure the result of `split`: + +```rust +match l.split_once('=') { + Some((k, v)) => { + let k = k.trim(); + if !k.is_empty() { + let v = v.trim(); + config.insert(k.to_string(), v.to_string()); + } else { + println!("Error in line {:?}", parts); + } + } + None => println!("Error in line {:?}", parts), +} +``` + +With that, we end up with an improved version of the code: + +```rust +use std::collections::HashMap; +use std::fs::File; +use std::io::Read; +use std::path::Path; + +fn parse_config_file<'a>(path: &'a str) -> HashMap { + let s = read_to_string(path).unwrap(); + + let mut config = HashMap::new(); + for line in s.lines() { + let line = line.trim(); + + if line.is_empty() || line.starts_with("#") { + continue; + } + + match l.split_once('=') { + Some((k, v)) => { + let k = k.trim(); + if !k.is_empty() { + let v = v.trim(); + config.insert(k.to_string(), v.to_string()); + } else { + println!("Error in line {:?}", parts); + } + } + None => println!("Error in line {:?}", parts), + } + + } + + return config; +} +``` + +This is already cleaner. We can go one step further with proper error handling. +It depends on the business logic how you want to handle invalid lines. +Here's a version, which returns an error in the case: + +```rust +fn parse_config_file<'a>(path: &'a str) -> Result, ParseError> { + let s = read_to_string(path)?; + + let mut config = HashMap::new(); + for line in s.lines() { + let line = line.trim(); + + if line.is_empty() || line.starts_with("#") { + continue; + } + + match l.split_once('=') { + Some((k, v)) => { + let k = k.trim(); + if !k.is_empty() { + let v = v.trim(); + config.insert(k.to_string(), v.to_string()); + } else { + return Err(ParseError::InvalidLine(line.to_string())); + } + } + None => return Err(ParseError::InvalidLine(line.to_string())), + } + + } + + Ok(config) +} +``` + +Next, let's write a function for parsing individual lines. + +```rust +fn parse_line(line: &str) -> Result, ParseError> { + let line = line.trim(); + + if line.is_empty() || line.starts_with("#") { + return Ok(None); + } + + match line.split_once('=') { + Some((k, v)) => { + let k = k.trim(); + if !k.is_empty() { + let v = v.trim(); + Ok(Some((k.to_string(), v.to_string()))) + } else { + Err(ParseError::InvalidLine(line.to_string())) + } + } + None => Err(ParseError::InvalidLine(line.to_string())), + } +} +``` + +We can even introduce an enum to represent a parsed line: + +```rust +#[derive(Debug)] +enum ParsedLine { + Comment, + Empty, + KeyValue(String, String), +} +``` + +Then we can use it like this: + +```rust +fn parse_line(line: &str) -> Result { + let line = line.trim(); + + if line.is_empty() { + return Ok(ParsedLine::Empty); + } + + if line.starts_with("#") { + return Ok(ParsedLine::Comment); + } + + match line.split_once('=') { + Some((k, v)) => { + let k = k.trim(); + if !k.is_empty() { + let v = v.trim(); + Ok(ParsedLine::KeyValue(k.to_string(), v.to_string())) + } else { + Err(ParseError::InvalidLine(line.to_string())) + } + } + None => Err(ParseError::InvalidLine(line.to_string())), + } +} +``` + +The concept is to rely on the type system to make the code more readable and maintainable. +We could even go one step further and express more complexity in the type system. + +```rust +struct KeyValue { + key: String, + value: String, +} + +impl TryFrom<&str> for KeyValue { + type Error = ParseError; + + fn try_from(line: &str) -> Result { + let line = line.trim(); + + if line.is_empty() || line.starts_with("#") { + return Err(ParseError::InvalidLine(line.to_string())); + } + + match line.split_once('=') { + Some((k, v)) => { + let k = k.trim(); + if !k.is_empty() { + let v = v.trim(); + Ok(KeyValue { + key: k.to_string(), + value: v.to_string(), + }) + } else { + Err(ParseError::InvalidLine(line.to_string())) + } + } + None => Err(ParseError::InvalidLine(line.to_string())), + } + } +} +``` + +It might look like we made the problem more complicated than it is. +However, we can test the `KeyValue` struct in isolation now and +we handle the errors close to the source of the problem. + +Our main function now becomes way easier: + +```rust +fn parse_config_file<'a>(path: &'a str) -> Result, ParseError> { + let s = read_to_string(path)?; + + let mut config = HashMap::new(); + for line in s.lines() { + match KeyValue::try_from(line) { + Ok(kv) => { + config.insert(kv.key, kv.value); + } + Err(e) => return Err(e), + } + } + + Ok(config) +} +``` + +And by extension we can also turn the `parse_config_file` into a struct: + +```rust +struct EnvFileParser { + + + + + + + + -Even just following this basic advice, we can get it into a much better shape. From 590f3b85b636ed0936e8aacd6e4ecb82cd9d9ea2 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 1 May 2025 12:49:17 +0200 Subject: [PATCH 09/20] wip --- content/blog/ugly/index.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index be95bda6..2d82b25b 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -607,4 +607,6 @@ It's also pretty easy to go into the other extreme and make everything generic. People don't confront their bad habits and find workarounds. That's the origin of ugly code. -Assumptions about the program’s execution order are often wrong \ No newline at end of file +Assumptions about the program’s execution order are often wrong + +You have to understand the background: Rust is a systems programming language. It competes with C/C++ and for that it has better readability than C/C++ with modern syntax \ No newline at end of file From cfaad2e99fe57d108a64da8539434ea8a5059b43 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 1 May 2025 20:55:48 +0200 Subject: [PATCH 10/20] wip first complete draft --- content/blog/ugly/index.md | 359 +++++++++++++++++++++++++------------ 1 file changed, 246 insertions(+), 113 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 2d82b25b..42e11944 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -7,34 +7,28 @@ template = "article.html" series = "Idiomatic Rust" +++ -Its clear that Rust has a readability problem. -At least that's what I hear on a regular basis. -After programming in Rust for 10 years, I think you have to dedicate some time to learn it properly -and that your background will inform how your Rust code looks. +Its clear that Rust has a readability problem -- or at least that's what people say on a regular basis. +But after programming in Rust for 10 years, I think that your coding style has the biggest impact on how your Rust code will look and feel. -Let's look at a simple example: parsing a `.env` file in Rust. After all, how hard could it be? +Let's take at a simple example: parsing a `.env` file in Rust. How hard can it be? ```sh -APP_ENV=production -API_KEY=my_api_key - -LOG_FILE=app.log - DB_HOST=localhost DB_PORT=5432 -DB_USERNAME=myuser -DB_PASSWORD=mypassword -DB_NAME=mydb + +API_KEY=my_api_key +LOG_FILE=app.log ``` The goal is to parse the above content from a file called `.env` and return a data structure that contains the key-value pairs. +Easy! I invite you to write your own version first. -As a little hint, consider the edge-cases, which could occur. +Or at least take a second to consider all the edge-cases, that may occur... ## A Painful First Attempt -At times I see code like the following: +At times I see code like this: ```rust use std::collections::HashMap; @@ -47,14 +41,14 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { let mut file = File::open(&p).unwrap(); let mut bytes = Vec::new(); file.read_to_end(&mut bytes).unwrap(); - + let s = String::from_utf8_lossy(&bytes).to_string(); - - let lines_with_refs: Vec<&'a str> = s.split('\n').collect(); - + + let lines_with_refs: Vec<&'_ str> = s.split('\n').collect(); + let mut idx = 0; let mut cfg: HashMap = HashMap::new(); - + while idx < lines_with_refs.len() { let lref = &lines_with_refs[idx]; let mut l = *lref; @@ -67,13 +61,13 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { } // Skip comments - if l[0] == '#' { + if l.chars().next() == Some('#') { idx += 1; continue; } let parts = l.split('=').collect::>(); - + let k: &str = parts[0].trim(); if k.len() > 0 { let v: &str = parts[1].trim(); @@ -81,7 +75,7 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { } else { println!("Error in line {:?}", parts); } - + idx += 1; } @@ -122,58 +116,58 @@ fn main() { # Invalid lines justkey "#; - + // Write the content to a temporary file std::fs::write("temp_config.env", config_content).unwrap(); - + // Parse the file let config = parse_config_file("temp_config.env"); - + // Print the results println!("\nParsed config entries:"); let mut keys: Vec = config.keys().cloned().collect(); keys.sort(); - + for key in keys { println!("{} = {}", key, config.get(&key).unwrap()); } - + // Display some test results println!("\nTest results:"); - + // Test 1: Check if basic keys are parsed correctly if let Some(host) = config.get("host") { println!("PASS: Basic key 'host' parsed correctly: {}", host); } else { println!("FAIL: Basic key 'host' not found"); } - + // Test 2: Check if indentation is handled correctly if let Some(value) = config.get("indented_key") { println!("PASS: Indented key parsed correctly: {}", value); } else { println!("FAIL: Indented key not found"); } - + // Test 3: Check if spaces in keys are preserved (bug) if let Some(value) = config.get("key with spaces") { println!("PASS: Key with spaces parsed correctly: {}", value); } else { println!("FAIL: Key with spaces not found (as expected with simple parser)"); } - + // Test 4: Check for duplicate key behavior if let Some(value) = config.get("duplicate") { println!("NOTE: For duplicate keys, last value wins: {}", value); } - + // Test 5: Check if escaped equals sign is handled (it's not) if let Some(value) = config.get("escaped\\=key") { println!("PASS: Escaped equals in key handled correctly"); } else { println!("FAIL: Escaped equals not handled correctly (expected with simple parser)"); } - + // Test 6: Check comment character in value (will fail) if let Some(value) = config.get("key") { if value == "value#not_a_comment" { @@ -184,34 +178,36 @@ fn main() { } else { println!("FAIL: Key with comment in value not found"); } - + // Test 7: Check multiline key handling (will fail) if let Some(value) = config.get("multi\\") { println!("PASS: Multiline key handled"); } else { println!("FAIL: Multiline key not handled (expected with simple parser)"); } - + // Clean up the temporary file std::fs::remove_file("temp_config.env").unwrap_or_default(); } ``` -Let's be clear: there are many antipatterns in the above code. -Many of them have nothing to do with Rust, but with software engineering in general. -And yet, people use it as an excuse to call Rust an ugly language and give up on it. +Let's be clear: there are many, many antipatterns in the above code. + +Many antipatterns in the code have nothing to do with Rust, but with software engineering in general. +And yet, people take a quick look and use it as an excuse to call Rust an "ugly language" and give up on it. I would argue that this code is ugly less because of Rust's syntax, but rather because the author is unaware or ignorant of the ergonomics Rust provides. +The code carries all the hallmarks of a beginner Rust programmer -- possibly with a C/C++ background -- who +has not yet fully embraced what Rust brings to the table. -Typically, **better semantics lead to nicer syntax in Rust**. -Many people get that backwards. +In my experience, **better semantics brings nicer syntax in Rust**; many people get that backwards. If you feel like you're fighting the language (not just its borrow-checker!), -then there's a chance that the language is trying to **tell you something**. +then there's a chance that **the language is trying to push you in a different direction**. It bears repeating: this is terrifying code with many footguns. -Without much effort, one can make out a few red flags from the code above: +Without much effort, one can make out the red flags: - The code is littered with `unwrap()` calls - Unnecessary mutability @@ -220,53 +216,48 @@ Without much effort, one can make out a few red flags from the code above: - Cryptic variable names - Very imperative coding style -The above not just makes the code harder to read. -What is worse is that it leads to business logic bugs in the code, because the code makes quite a few unjustified assumptions -and the way the code is written makes it hard for Rust to help you out. - -I think we can all agree that the code is not idiomatic Rust. - -## The Five Stages of Grief About Rust Syntax +This not just makes the code harder to read. +What is worse is that it leads to business logic bugs in the code, because the code makes quite a few unsound assumptions about its input. +This makes it hard for Rust to help you out. Whenever I see people struggle with Rust syntax, I'm reminded of the five stages of grief: -### Denial +#### Stage 1: Denial -> "There's nothing wrong with my code - it works perfectly fine! The syntax is just Rust's problem, not mine." +"There's nothing wrong with my code - it works perfectly fine! The syntax is just Rust's problem, not mine." -In this stage, developers continue writing C-style code with Rust syntax, ignoring compiler warnings and adding unnecessary lifetime annotations everywhere. They often blame the language for being "too complex" while refusing to revisit fundamental concepts. +In this stage, developers continue writing C-style code with Rust syntax and ignoring compiler warnings. They often blame the language for being "overly complex" while refusing to learn the fundamentals. -### Anger +#### Stage 2: Anger -> "Why does Rust need all these lifetime annotations and explicit ownership? C++ never made me deal with this nonsense!" +"Why does Rust need all these `mut` keywords and explicit ownership? C++ never made me deal with this nonsense!" -Frustration builds as developers encounter repeated compiler errors. They begin to resent the borrow checker and might abandon half-finished projects in favor of "more practical" languages. Excessive code comments containing rants about Rust's design decisions become common. +Frustration builds as developers encounter repeated compiler errors. They begin to resent the borrow checker and might abandon half-finished projects in favor of "more practical" languages. At this stage they might post a snarky comment about Rust's design decisions on social media. -### Bargaining +#### Stage 3: Bargaining -> "Maybe if I just use more `.unwrap()` calls and sprinkle in some `unsafe` blocks, I can write Rust the way I want to." +"Maybe if I just use more `.unwrap()` calls and sprinkle in some `unsafe` blocks, I can write Rust the way I want to." Desperate to make progress, developers start making dangerous compromises. They liberally use `.clone()` to silence ownership errors, wrap simple operations in `unsafe` blocks, and litter code with `.unwrap()` calls, effectively bypassing Rust's safety guarantees while keeping all of its verbosity. -### Depression +#### Stage 4: Depression -> "I'll never get used to this language. My code is a mess of references, clones, and unnecessary mutations that even I can't read anymore." +"I'll never get used to this language. My code is a mess of references, clones, and unnecessary mutations that even I can't read anymore." -Reality sets in as technical debt accumulates. Code becomes increasingly convoluted with superfluous mutable variables and overly complex data structures. Performance suffers from unnecessary allocations, and what started as a promising project now feels like an unreadable jumble of syntax. +Reality sets in. Code becomes increasingly convoluted with superfluous mutable variables and overly complex data structures. What started as a promising project now feels like an unreadable jumble of syntax. -### Acceptance +#### Stage 5: Acceptance -> "I see now that these idioms exist for a reason - my code is not only safer but actually more readable when I embrace Rust's patterns instead of fighting them." +"I see now that these idioms exist for a reason - my code is not only safer but actually more readable when I embrace Rust's patterns instead of fighting them." -Finally, developers begin embracing idiomatic patterns and the design philosophy behind Rust. They refactor their spaghetti code into clean, expressive modules that leverage the type system rather than fight it. Performance improves, code becomes more maintainable, and they wonder how they ever wrote memory-unsafe code with confidence. +Finally, developers begin embracing idiomatic patterns and the design philosophy behind Rust. They refactor their spaghetti code and leverage the type system rather than fight it. Code becomes more maintainable, and they wonder how they ever wrote memory-unsafe code with confidence. Okay, you (or your team-member) reached acceptance, how can you do better? ## Let Go Of Old Bad Habits - The first step is to acknowledge that the code goes against Rust's design principles. -Based on this, we can systematically improve the code. +Based on this realization, we can systematically improve the code. Ugly code is band-aid around bad habits. Learn to do it the "Rustic way." @@ -274,17 +265,26 @@ Learn to do it the "Rustic way." We have seen plenty of ways to write better Rust code in previous articles: - Read the standard library documentation -- Think in expressions -- Immutability by default -- Leaning into the typesystem -- Iterator patterns instead of manual iteration +- [Think in expressions](/blog/expressions) +- [Immutability by default](/blog/immutability) +- [Leaning into the typesystem](/blog/illegal-state) +- [Iterator patterns instead of manual iteration](/blog/iterators) - Proper error handling - Split up the problem into smaller parts -Even just applying these basic techniques, we can get it into a much better shape. +Even just applying these basic techniques, we can get our code into a much better shape. +{% info(title="Try It Out Yourself!") %} -Let's start by removing this boilerplate +Feel free to use the above code as a refactoring exercise to practice these techniques. +Here's the [link to the Rust playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=2510df77c1c8c6a227680ea49407fe18). +I'll wait here while you experiment with the code. + +{% end %} + +### Read the Standard Library Documentation + +After reading the standard library documentation, we can remove this boilerplate ```rust let p = Path::new(&path); @@ -294,19 +294,23 @@ file.read_to_end(&mut bytes).unwrap(); let s = String::from_utf8_lossy(&bytes).to_string(); ``` -and instead calling [`std::fs::read_to_string`](https://doc.rust-lang.org/std/fs/fn.read_to_string.html): +and instead call [`std::fs::read_to_string`](https://doc.rust-lang.org/std/fs/fn.read_to_string.html): ```rust let s = read_to_string(path).unwrap(); ``` -Rust is really good at inferring types. That's why we don't need to specify the type of +### Use Type Inference + +[Rust is really good at inferring types.](https://rustc-dev-guide.rust-lang.org/type-inference.html) That's why we don't need to specify the type of our `HashMap` explicitly. ```rust let mut config = HashMap::new(); ``` +### Lean Into the Typesystem + Next, manual string splitting is also unnecessary. ```rust @@ -342,7 +346,7 @@ and we also don't have to fall back to cryptic names like `lref` or `l` anymore. Instead of `line.len() == 0`, we can use `line.is_empty()`. -We can also use `line.starts_with("#")` instead of checking for `l[0] == '#'`. +We can also use `line.starts_with("#")` instead of checking for `l.chars().next() == Some('#')`. Next, let's tackle this part: @@ -358,7 +362,7 @@ if k.len() > 0 { } ``` -Note how we access `parts[0]` and `parts[1]` without checking if they exist. +Note how we access `parts[0]` and `parts[1]` without checking if these are valid indices. Let's lean into the typesystem a little more and use pattern matching to destructure the result of `split`: ```rust @@ -376,7 +380,7 @@ match l.split_once('=') { } ``` -With that, we end up with an improved version of the code: +With that, we end up with a greatly improved version of the code: ```rust use std::collections::HashMap; @@ -414,7 +418,9 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { } ``` -This is already cleaner. We can go one step further with proper error handling. +### Use Proper Error Handling + +We can go one step further with proper error handling. It depends on the business logic how you want to handle invalid lines. Here's a version, which returns an error in the case: @@ -474,18 +480,26 @@ fn parse_line(line: &str) -> Result, ParseError> { } ``` -We can even introduce an enum to represent a parsed line: +The code is still quite "stringy-typed." +That's usually a sign of a missing abstraction. +To tackle that, we could introduce an enum to represent a parsed line for example: ```rust +#[derive(Debug)] +struct KeyValue { + key: String, + value: String, +} + #[derive(Debug)] enum ParsedLine { - Comment, + KeyValue(KeyValue), + Comment(String), Empty, - KeyValue(String, String), } ``` -Then we can use it like this: +And we'd use it like so: ```rust fn parse_line(line: &str) -> Result { @@ -496,7 +510,7 @@ fn parse_line(line: &str) -> Result { } if line.starts_with("#") { - return Ok(ParsedLine::Comment); + return Ok(ParsedLine::Comment(line.to_string())); } match line.split_once('=') { @@ -504,7 +518,10 @@ fn parse_line(line: &str) -> Result { let k = k.trim(); if !k.is_empty() { let v = v.trim(); - Ok(ParsedLine::KeyValue(k.to_string(), v.to_string())) + Ok(ParsedLine::KeyValue(KeyValue { + key: k.to_string(), + value: v.to_string(), + })) } else { Err(ParseError::InvalidLine(line.to_string())) } @@ -514,8 +531,11 @@ fn parse_line(line: &str) -> Result { } ``` -The concept is to rely on the type system to make the code more readable and maintainable. -We could even go one step further and express more complexity in the type system. +([Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=7a5e34cdac522dd8eb60759cc89de5a4)) + +We could even go one step further and express more of our invariants in the type system. +For example, we can make use of the fact that parsing a key-value pair only depends on a single line. +Since parsing is a fallible operation, we can implement `TryFrom` for our `KeyValue` struct. ```rust struct KeyValue { @@ -552,61 +572,174 @@ impl TryFrom<&str> for KeyValue { } ``` +([Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=722eb6136fe3da50e3a17326a6702ede)) + It might look like we made the problem more complicated than it is. -However, we can test the `KeyValue` struct in isolation now and -we handle the errors close to the source of the problem. +However, we can now simplify our parser even more and also test key-value parsing in isolation. +On top of that, errors get handled much closer to the source of the problem. -Our main function now becomes way easier: +Our `parse_config_file` function now gets much simpler: ```rust -fn parse_config_file<'a>(path: &'a str) -> Result, ParseError> { - let s = read_to_string(path)?; - +fn parse_config_file(path: &str) -> Result, ParseError> { + let content = std::fs::read_to_string(path)?; + let mut config = HashMap::new(); - for line in s.lines() { - match KeyValue::try_from(line) { - Ok(kv) => { - config.insert(kv.key, kv.value); - } - Err(e) => return Err(e), + + for result in content.lines().map(parse_line) { + if let ParsedLine::KeyValue(kv) = result? { + config.insert(kv.key, kv.value); } } - + Ok(config) } ``` -And by extension we can also turn the `parse_config_file` into a struct: +At this stage -- assuming we can still change the public API of our parser -- +we can convert `parse_config_file` into an `EnvParser` struct. +That's because all we do is creating a map of key-value pairs from some input. +While we're at it, we can lift the requirement of passing a file path to the parser +and instead accept any type that implements `Read`. -```rust -struct EnvFileParser { - +```rust +#[derive(Debug)] +enum ParseError { + InvalidLine(String), + IoError(std::io::Error), +} +impl Error for ParseError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + ParseError::IoError(err) => Some(err), + _ => None, + } + } +} +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ParseError::InvalidLine(line) => write!(f, "Invalid line format: {}", line), + ParseError::IoError(err) => write!(f, "I/O error: {}", err), + } + } +} +impl From for ParseError { + fn from(err: std::io::Error) -> Self { + ParseError::IoError(err) + } +} +#[derive(Debug, Clone)] +struct KeyValue { + key: String, + value: String, +} +impl TryFrom for KeyValue { + type Error = ParseError; + fn try_from(line: String) -> Result { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + return Err(ParseError::InvalidLine(line.to_string())); + } + match line.split_once('=') { + Some((k, v)) => { + let k = k.trim(); + if !k.is_empty() { + let v = v.trim(); + Ok(KeyValue { + key: k.to_string(), + value: v.to_string(), + }) + } else { + Err(ParseError::InvalidLine(line.to_string())) + } + } + None => Err(ParseError::InvalidLine(line.to_string())), + } + } +} + +#[derive(Debug)] +struct EnvConfig { + inner: HashMap, +} + +impl EnvConfig { + fn new() -> Self { + EnvConfig { + inner: HashMap::new(), + } + } + + fn insert(&mut self, keyvalue: KeyValue) { + self.inner.insert(keyvalue.key, keyvalue.value); + } + + fn get(&self, key: &str) -> Option<&str> { + self.inner.get(key).map(|v| v.as_str()) + } + + fn len(&self) -> usize { + self.inner.len() + } +} +struct EnvParser; + +impl EnvParser { + fn parse(reader: R) -> Result { + let reader = BufReader::new(reader); + let mut config = EnvConfig::new(); + + for line in reader.lines() { + match line { + Ok(line_str) => { + match KeyValue::try_from(line_str) { + Ok(kv) => config.insert(kv), + Err(ParseError::InvalidLine(_)) => continue, // Skip invalid lines + Err(e) => return Err(e), + } + } + Err(e) => return Err(ParseError::IoError(e)), + } + } + Ok(config) + } + fn parse_str(input: &str) -> Result { + Self::parse(input.as_bytes()) + } -Blog post idea: "This can never panic" and other lies we tell ourselves -The language doesn't get more ugly beyond a certain point of complexity. -I can't say the same about C++. + fn parse_file(path: &str) -> Result { + let file = File::open(path)?; + Self::parse(file) + } +} +``` -what makes Rust "ugly" isn't just syntax but exposing complex concepts. -Physics over optics, not everything is about cosmetics. +([Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=4fb0dbbab6d8242feb7e4f28b51a1d08)) -"I don't want ugly Rust-like typing in my favorite language. It may look good in Rust, but it looks horrible in Python." +I skipped a few intermediate steps, but the idea is always the same: continuously +look for wrinkles in the code and move more and more logic into the type system. -It's also pretty easy to go into the other extreme and make everything generic. That's also hard to read. +# Summary -People don't confront their bad habits and find workarounds. That's the origin of ugly code. +If there is anything that makes Rust "ugly", it isn't its syntax but the fact that it doesn't hide the complexity of the underlying system. +Rust values explicitness and you have to deal with the harsh reality that computing is messy. -Assumptions about the program’s execution order are often wrong +Turns out our assumptions about a program’s execution are often wrong and our mental models are flawed. +Fortunately, we can encapsulate a lot of the complexity behind ergonomic abstractions; it just takes some practice. +Don't worry: once you start to confront your bad habits and look around for better abstractions, greener pastures are right around the corner. -You have to understand the background: Rust is a systems programming language. It competes with C/C++ and for that it has better readability than C/C++ with modern syntax \ No newline at end of file +Rust, after all is said and done, is still a systems programming language in the end. +It competes with the likes of C/C++ and for that it has pretty good ergonomics. \ No newline at end of file From d99b9c9b6b849a14424af8f4a56cd1143f7bb2c4 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 1 May 2025 21:33:18 +0200 Subject: [PATCH 11/20] draft 2 --- content/blog/ugly/index.md | 293 +++++++++++++++++++++++++++---------- 1 file changed, 214 insertions(+), 79 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 42e11944..d5d8c1b8 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -7,10 +7,10 @@ template = "article.html" series = "Idiomatic Rust" +++ -Its clear that Rust has a readability problem -- or at least that's what people say on a regular basis. -But after programming in Rust for 10 years, I think that your coding style has the biggest impact on how your Rust code will look and feel. +It's clear that Rust has a readability problem -- or at least that's what people claim on a regular basis. +After programming in Rust for 10 years, I think that your coding style has the biggest impact on how your Rust code will look and feel. -Let's take at a simple example: parsing a `.env` file in Rust. How hard can it be? +Let's take a simple example: parsing a `.env` file in Rust. How hard can it be? ```sh DB_HOST=localhost @@ -24,7 +24,7 @@ The goal is to parse the above content from a file called `.env` and return a da Easy! I invite you to write your own version first. -Or at least take a second to consider all the edge-cases, that may occur... +Or at least take a second to consider all the edge-cases that may occur... ## A Painful First Attempt @@ -192,21 +192,16 @@ fn main() { ``` Let's be clear: there are many, many antipatterns in the above code. +Most of them have nothing to do with Rust, but with software engineering in general. -Many antipatterns in the code have nothing to do with Rust, but with software engineering in general. -And yet, people take a quick look and use it as an excuse to call Rust an "ugly language" and give up on it. +The code carries all the hallmarks of a beginner Rust programmer -- possibly with a C/C++ background -- who has not yet fully embraced the ergonomics Rust provides. -I would argue that this code is ugly less because of Rust's syntax, but rather -because the author is unaware or ignorant of the ergonomics Rust provides. -The code carries all the hallmarks of a beginner Rust programmer -- possibly with a C/C++ background -- who -has not yet fully embraced what Rust brings to the table. - -In my experience, **better semantics brings nicer syntax in Rust**; many people get that backwards. +## Better semantics enable nicer syntax in Rust If you feel like you're fighting the language (not just its borrow-checker!), -then there's a chance that **the language is trying to push you in a different direction**. +then there's a chance that **the language is trying to push you into a different direction**. -It bears repeating: this is terrifying code with many footguns. +It bears repeating: the above code is terrifying and contains many footguns. Without much effort, one can make out the red flags: - The code is littered with `unwrap()` calls @@ -220,7 +215,9 @@ This not just makes the code harder to read. What is worse is that it leads to business logic bugs in the code, because the code makes quite a few unsound assumptions about its input. This makes it hard for Rust to help you out. -Whenever I see people struggle with Rust syntax, I'm reminded of the five stages of grief: +Whenever I see people struggle with Rust syntax, I'm reminded of... + +## The five stages of grief #### Stage 1: Denial @@ -252,14 +249,14 @@ Reality sets in. Code becomes increasingly convoluted with superfluous mutable v Finally, developers begin embracing idiomatic patterns and the design philosophy behind Rust. They refactor their spaghetti code and leverage the type system rather than fight it. Code becomes more maintainable, and they wonder how they ever wrote memory-unsafe code with confidence. -Okay, you (or your team-member) reached acceptance, how can you do better? - ## Let Go Of Old Bad Habits +Okay, you (or your team-member) reached acceptance, how can you do better? + The first step is to acknowledge that the code goes against Rust's design principles. Based on this realization, we can systematically improve the code. -Ugly code is band-aid around bad habits. +Ugly code is a band-aid around bad habits. Learn to do it the "Rustic way." We have seen plenty of ways to write better Rust code in previous articles: @@ -276,15 +273,18 @@ Even just applying these basic techniques, we can get our code into a much bette {% info(title="Try It Out Yourself!") %} -Feel free to use the above code as a refactoring exercise to practice these techniques. +This is a hands-on exercise. +Feel free to paste the above code into your editor and practice refactoring it. Here's the [link to the Rust playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=2510df77c1c8c6a227680ea49407fe18). -I'll wait here while you experiment with the code. +I'll wait here. {% end %} ### Read the Standard Library Documentation -After reading the standard library documentation, we can remove this boilerplate +Many common patterns are beautifully handled by the standard library. +It is worth your time to read the documentation. +For instance, you will find that you can get rid of all of of this boilerplate: ```rust let p = Path::new(&path); @@ -297,6 +297,8 @@ let s = String::from_utf8_lossy(&bytes).to_string(); and instead call [`std::fs::read_to_string`](https://doc.rust-lang.org/std/fs/fn.read_to_string.html): ```rust +use std::fs::read_to_string; + let s = read_to_string(path).unwrap(); ``` @@ -311,13 +313,19 @@ let mut config = HashMap::new(); ### Lean Into the Typesystem -Next, manual string splitting is also unnecessary. +Manual string splitting is not necessary and very much discouraged. +The reason is that strings are, in fact, very complicated. +There is a perception that it's just an array of "characters", but that is ill-defined +and a dangerous assumption. ```rust let lines_with_refs: Vec<&'a str> = s.split('\n').collect(); ``` -The above can be replaced with: +This line expects that lines are separated by `\n`. +That's not true on Windows, where lines are separated by `\r\n`. + +The following line does the correct thing on all platforms: ```rust let lines = s.lines(); @@ -325,7 +333,7 @@ let lines = s.lines(); This returns an [iterator over the lines of a string](https://doc.rust-lang.org/std/primitive.str.html#method.lines). -With that, we can simply iterate over each line: +Knowing that, we can instead iterate over each line: ```rust for line in s.lines() { @@ -344,9 +352,8 @@ That is a common practice in Rust. This way we don't have to come up with a new name for the trimmed line and we also don't have to fall back to cryptic names like `lref` or `l` anymore. -Instead of `line.len() == 0`, we can use `line.is_empty()`. - -We can also use `line.starts_with("#")` instead of checking for `l.chars().next() == Some('#')`. +Instead of `line.len() == 0`, we use `line.is_empty()` now. +And `line.starts_with("#")` is easier to read than checking with `l.chars().next() == Some('#')`. Next, let's tackle this part: @@ -363,20 +370,21 @@ if k.len() > 0 { ``` Note how we access `parts[0]` and `parts[1]` without checking if these are valid indices. -Let's lean into the typesystem a little more and use pattern matching to destructure the result of `split`: +The code only coincidentally works for well-formed inputs. +Fortunately, we don't have to do all this if we lean into the typesystem a little more and use pattern matching to destructure the result of `split_once`: ```rust -match l.split_once('=') { +match line.split_once('=') { Some((k, v)) => { let k = k.trim(); if !k.is_empty() { let v = v.trim(); config.insert(k.to_string(), v.to_string()); } else { - println!("Error in line {:?}", parts); + println!("Error in line with empty key"); } } - None => println!("Error in line {:?}", parts), + None => println!("Error in line: no '=' found"), } ``` @@ -384,11 +392,9 @@ With that, we end up with a greatly improved version of the code: ```rust use std::collections::HashMap; -use std::fs::File; -use std::io::Read; -use std::path::Path; +use std::fs::read_to_string; -fn parse_config_file<'a>(path: &'a str) -> HashMap { +fn parse_config_file(path: &str) -> HashMap { let s = read_to_string(path).unwrap(); let mut config = HashMap::new(); @@ -399,33 +405,70 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { continue; } - match l.split_once('=') { + match line.split_once('=') { Some((k, v)) => { let k = k.trim(); if !k.is_empty() { let v = v.trim(); config.insert(k.to_string(), v.to_string()); } else { - println!("Error in line {:?}", parts); + println!("Error in line with empty key"); } } - None => println!("Error in line {:?}", parts), + None => println!("Error in line: no '=' found"), } - } - return config; + config } ``` +You'd be forgiven if you called it a day at this point. +However, to truly embrace Rust, it helps to a step back and think about our problem for a little longer. + ### Use Proper Error Handling -We can go one step further with proper error handling. -It depends on the business logic how you want to handle invalid lines. -Here's a version, which returns an error in the case: +One obvious next step is to introduce proper error handling. +It depends on the business logic how you want to handle invalid lines, but I prefer to implement proper parsing +and have complete freedom over the output on the callsite. ```rust -fn parse_config_file<'a>(path: &'a str) -> Result, ParseError> { +use std::collections::HashMap; +use std::fs::read_to_string; +use std::fmt; +use std::error::Error; + +#[derive(Debug)] +enum ParseError { + InvalidLine(String), + IoError(std::io::Error), +} + +impl Error for ParseError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + ParseError::IoError(err) => Some(err), + _ => None, + } + } +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ParseError::InvalidLine(line) => write!(f, "Invalid line format: {}", line), + ParseError::IoError(err) => write!(f, "I/O error: {}", err), + } + } +} + +impl From for ParseError { + fn from(err: std::io::Error) -> Self { + ParseError::IoError(err) + } +} + +fn parse_config_file(path: &str) -> Result, ParseError> { let s = read_to_string(path)?; let mut config = HashMap::new(); @@ -436,7 +479,7 @@ fn parse_config_file<'a>(path: &'a str) -> Result, Parse continue; } - match l.split_once('=') { + match line.split_once('=') { Some((k, v)) => { let k = k.trim(); if !k.is_empty() { @@ -448,7 +491,6 @@ fn parse_config_file<'a>(path: &'a str) -> Result, Parse } None => return Err(ParseError::InvalidLine(line.to_string())), } - } Ok(config) @@ -480,9 +522,8 @@ fn parse_line(line: &str) -> Result, ParseError> { } ``` -The code is still quite "stringy-typed." -That's usually a sign of a missing abstraction. -To tackle that, we could introduce an enum to represent a parsed line for example: +The code is still quite "stringy-typed," which usually is a sign of a missing abstraction. +How about we introduce an enum to represent a parsed line? ```rust #[derive(Debug)] @@ -499,7 +540,7 @@ enum ParsedLine { } ``` -And we'd use it like so: +We'd use it like so: ```rust fn parse_line(line: &str) -> Result { @@ -531,17 +572,12 @@ fn parse_line(line: &str) -> Result { } ``` -([Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=7a5e34cdac522dd8eb60759cc89de5a4)) - We could even go one step further and express more of our invariants in the type system. For example, we can make use of the fact that parsing a key-value pair only depends on a single line. -Since parsing is a fallible operation, we can implement `TryFrom` for our `KeyValue` struct. +Since parsing is a fallible operation, we can implement `TryFrom` for our `KeyValue` struct: ```rust -struct KeyValue { - key: String, - value: String, -} +use std::convert::TryFrom; impl TryFrom<&str> for KeyValue { type Error = ParseError; @@ -572,23 +608,25 @@ impl TryFrom<&str> for KeyValue { } ``` -([Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=722eb6136fe3da50e3a17326a6702ede)) - It might look like we made the problem more complicated than it is. However, we can now simplify our parser even more and also test key-value parsing in isolation. On top of that, errors get handled much closer to the source of the problem. -Our `parse_config_file` function now gets much simpler: +Our `parse_config_file` function now becomes much simpler: ```rust fn parse_config_file(path: &str) -> Result, ParseError> { - let content = std::fs::read_to_string(path)?; + let content = read_to_string(path)?; let mut config = HashMap::new(); - for result in content.lines().map(parse_line) { - if let ParsedLine::KeyValue(kv) = result? { - config.insert(kv.key, kv.value); + for line in content.lines() { + match KeyValue::try_from(line) { + Ok(kv) => { + config.insert(kv.key, kv.value); + }, + Err(ParseError::InvalidLine(_)) => continue, // Skip invalid lines + Err(e) => return Err(e), } } @@ -596,13 +634,20 @@ fn parse_config_file(path: &str) -> Result, ParseError> } ``` -At this stage -- assuming we can still change the public API of our parser -- -we can convert `parse_config_file` into an `EnvParser` struct. -That's because all we do is creating a map of key-value pairs from some input. -While we're at it, we can lift the requirement of passing a file path to the parser + +All we do is creating a map of key-value pairs from some input. +At this stage we might as well convert `parse_config_file` into an `EnvParser` struct. +And while we're at it, let's lift the requirement of passing a file path to the parser and instead accept any type that implements `Read`. -```rust +```rust +use std::collections::HashMap; +use std::io::{BufRead, BufReader, Read}; +use std::fs::File; +use std::fmt; +use std::error::Error; +use std::convert::TryFrom; + #[derive(Debug)] enum ParseError { InvalidLine(String), @@ -682,11 +727,11 @@ impl EnvConfig { fn insert(&mut self, keyvalue: KeyValue) { self.inner.insert(keyvalue.key, keyvalue.value); } - + fn get(&self, key: &str) -> Option<&str> { self.inner.get(key).map(|v| v.as_str()) } - + fn len(&self) -> usize { self.inner.len() } @@ -697,7 +742,7 @@ struct EnvParser; impl EnvParser { fn parse(reader: R) -> Result { let reader = BufReader::new(reader); - let mut config = EnvConfig::new(); + let mut config = EnvConfig::new(); for line in reader.lines() { match line { @@ -714,32 +759,122 @@ impl EnvParser { Ok(config) } - + fn parse_str(input: &str) -> Result { Self::parse(input.as_bytes()) } - + fn parse_file(path: &str) -> Result { let file = File::open(path)?; Self::parse(file) } } + +// Example usage +fn main() -> Result<(), Box> { + let env_content = " + DB_HOST=localhost + DB_PORT=5432 + + API_KEY=my_api_key + LOG_FILE=app.log + "; + + let config = EnvParser::parse_str(env_content)?; + + println!("Parsed config entries:"); + for (key, value) in &config.inner { + println!("{} = {}", key, value); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::NamedTempFile; + use std::io::Write; + + // Tests for KeyValue struct + #[test] + fn test_keyvalue_valid() { + let kv = KeyValue::try_from("key=value".to_string()).unwrap(); + assert_eq!(kv.key, "key"); + assert_eq!(kv.value, "value"); + } + + #[test] + fn test_keyvalue_with_spaces() { + let kv = KeyValue::try_from(" key = value ".to_string()).unwrap(); + assert_eq!(kv.key, "key"); + assert_eq!(kv.value, "value"); + } + + #[test] + fn test_keyvalue_empty_value() { + let kv = KeyValue::try_from("key=".to_string()).unwrap(); + assert_eq!(kv.key, "key"); + assert_eq!(kv.value, ""); + } + + #[test] + fn test_parser_duplicate_keys() { + let input = " + key=value1 + key=value2 + "; + + let config = EnvParser::parse_str(input).unwrap(); + assert_eq!(config.len(), 1); + // Last value should win for duplicate keys + assert_eq!(config.get("key"), Some("value2")); + } + + #[test] + fn test_parser_all_edge_cases() { + let input = " + # Comments should be ignored + simple=value + indented_key = indented_value + empty_value= + key_with_equals=value=with=equals + duplicate=first + duplicate=second + trailing_whitespace = value with spaces + "; + + let config = EnvParser::parse_str(input).unwrap(); + + assert_eq!(config.len(), 6); + assert_eq!(config.get("simple"), Some("value")); + assert_eq!(config.get("indented_key"), Some("indented_value")); + assert_eq!(config.get("empty_value"), Some("")); + assert_eq!(config.get("key_with_equals"), Some("value=with=equals")); + assert_eq!(config.get("duplicate"), Some("second")); + assert_eq!(config.get("trailing_whitespace"), Some("value with spaces")); + } +} ``` -([Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=4fb0dbbab6d8242feb7e4f28b51a1d08)) +Without much effort, we decoupled the code. +Now, every part has one clearly defined responsibility: + +- `KeyValue` is responsible for parsing a single line +- `EnvParser` is responsible for parsing the entire input +- `EnvConfig` stores the parsed key-value pairs I skipped a few intermediate steps, but the idea is always the same: continuously look for wrinkles in the code and move more and more logic into the type system. -# Summary +## Summary If there is anything that makes Rust "ugly", it isn't its syntax but the fact that it doesn't hide the complexity of the underlying system. Rust values explicitness and you have to deal with the harsh reality that computing is messy. - -Turns out our assumptions about a program’s execution are often wrong and our mental models are flawed. +Turns out our assumptions about a program's execution are often wrong and our mental models are flawed. Fortunately, we can encapsulate a lot of the complexity behind ergonomic abstractions; it just takes some practice. Don't worry: once you start to confront your bad habits and look around for better abstractions, greener pastures are right around the corner. Rust, after all is said and done, is still a systems programming language in the end. -It competes with the likes of C/C++ and for that it has pretty good ergonomics. \ No newline at end of file +It competes with the likes of C/C++ and for that it has pretty good ergonomics. \ No newline at end of file From 7e4847ca63afc01288925d81b06cc9bd776d6d05 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 1 May 2025 21:41:32 +0200 Subject: [PATCH 12/20] edge case section --- content/blog/ugly/index.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index d5d8c1b8..dc2adb8f 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -867,6 +867,30 @@ Now, every part has one clearly defined responsibility: I skipped a few intermediate steps, but the idea is always the same: continuously look for wrinkles in the code and move more and more logic into the type system. +## Did You Find All The Edge Cases? + +Parsing environment files sounds simple on the surface, but that is not the case. +How many of these cases did you catch in your implementation? + +- **Empty lines** - Should be skipped +- **Comment lines** - Lines starting with `#` should be skipped +- **Whitespace in keys/values** - Leading and trailing whitespace should be trimmed +- **Empty keys** - Lines like `=value` should be rejected +- **Empty values** - Lines like `key=` should be allowed! (With empty string value) +- **Missing equals sign** - Lines without an equals sign should be rejected +- **Multiple equals signs** - How do you handle `key=value=more`? On Unix, this is valid and everything after the first `=` is part of the value +- **Indented lines** - Lines with leading whitespace should be parsed normally +- **Duplicate keys** - Later values should overwrite earlier ones +- **Quoted values** - How do you handle `key="value"`? Our solution preserves the quotes +- **Escaping** - How do you handle `key=value\nwith\nnewlines` or `key=value#notacomment`? +- **Line continuations** - What about multi-line values with backslash? I don't handle them right now. +- **Unicode characters** - How does your parser handle non-ASCII content? +- **Invalid UTF-8** - How do you handle files with encoding errors? + +A robust parser would need to handle all these cases, with clear behavior defined for each. +Our improved implementation handles many of these cases, but not all. +This just goes to show that it's easy to gloss over details. + ## Summary If there is anything that makes Rust "ugly", it isn't its syntax but the fact that it doesn't hide the complexity of the underlying system. From 7a5da459f97bca4c1983c3949fc5190d95ab0fcf Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 1 May 2025 21:41:59 +0200 Subject: [PATCH 13/20] wording --- content/blog/ugly/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index dc2adb8f..f839e604 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -21,7 +21,7 @@ LOG_FILE=app.log ``` The goal is to parse the above content from a file called `.env` and return a data structure that contains the key-value pairs. -Easy! +Sounds simple enough! I invite you to write your own version first. Or at least take a second to consider all the edge-cases that may occur... From ef6678ae81b78fc8af487f4693a4fcd0729b6f4b Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 4 May 2025 17:23:40 +0200 Subject: [PATCH 14/20] wording --- content/blog/ugly/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index f839e604..761be734 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -893,6 +893,9 @@ This just goes to show that it's easy to gloss over details. ## Summary +What I find interesting in these exercises is that the benefits of looking for better abstractions are not about memory safety. +Instead, it Rust makes testing easier, which meant that the developers in the experiment were able to find bugs that had remain completely hidden otherwise. + If there is anything that makes Rust "ugly", it isn't its syntax but the fact that it doesn't hide the complexity of the underlying system. Rust values explicitness and you have to deal with the harsh reality that computing is messy. From fca37216a8bf4b7de05c64f44041e3626970dfe8 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 16 Jun 2025 16:40:18 +0300 Subject: [PATCH 15/20] prelude --- content/blog/ugly/index.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 761be734..5a67fabb 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -10,6 +10,13 @@ series = "Idiomatic Rust" It's clear that Rust has a readability problem -- or at least that's what people claim on a regular basis. After programming in Rust for 10 years, I think that your coding style has the biggest impact on how your Rust code will look and feel. +In workshops I find people getting frustrated with Rust. +They write Rust like they would write idiomatic code in other languages, but it doesn't feel right. +"You can write bad Java code in any language," is a common saying, and I think it applies here as well. + +**Idiomatic Rust ticks all the boxes: it feels right, is correct, and readable.** + + Let's take a simple example: parsing a `.env` file in Rust. How hard can it be? ```sh From 3c6a7c329f2eb4d7a94461a76465336df85f22c3 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 13 Jul 2025 19:17:17 +0200 Subject: [PATCH 16/20] wip --- content/blog/ugly/index.md | 241 ++++++++++++++++++++++++++----------- 1 file changed, 171 insertions(+), 70 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 5a67fabb..abb7f612 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -1,21 +1,32 @@ +++ title = "When Rust Gets Ugly" -date = 2025-04-14 +date = 2025-07-13 draft = false template = "article.html" [extra] series = "Idiomatic Rust" +++ -It's clear that Rust has a readability problem -- or at least that's what people claim on a regular basis. -After programming in Rust for 10 years, I think that your coding style has the biggest impact on how your Rust code will look and feel. +In workshops I often see people getting frustrated with Rust. -In workshops I find people getting frustrated with Rust. -They write Rust like they would write idiomatic code in other languages, but it doesn't feel right. +Here's some of the feedback I hear: + +- The borrow checker rules make it hard to write code that compiles +- The syntax is complex with too many symbols and operators -- it's overwhelming +- It's difficult to transition to Rust from a language they know +- Thee written code is not satisfying to read, it feels clunky and verbose + +From these frustrations, people often conclude that Rust not for them and quit. + +But after programming in Rust for 10 years, I think that **your coding style has the biggest impact on how your Rust code will look and feel**. + +If you write Rust like they would write idiomatic code in other languages, it will *never feel right*. "You can write bad Java code in any language," is a common saying, and I think it applies here as well. -**Idiomatic Rust ticks all the boxes: it feels right, is correct, and readable.** +**Idiomatic Rust ticks all the boxes: it is correct, readable, and maintainable.** +If your Rust code doesn't meet these criteria yet, this article is for you. +## The Problem Let's take a simple example: parsing a `.env` file in Rust. How hard can it be? @@ -35,7 +46,8 @@ Or at least take a second to consider all the edge-cases that may occur... ## A Painful First Attempt -At times I see code like this: +Here is one attempt to parse the above file that I often see in workshops. +Keep in mind that this is a bit of an exaggerated version, but it is not too far off from what I see in practice. ```rust use std::collections::HashMap; @@ -43,7 +55,9 @@ use std::fs::File; use std::io::Read; use std::path::Path; +// Parse .env file into a HashMap fn parse_config_file<'a>(path: &'a str) -> HashMap { + // Open the file and read its contents let p = Path::new(&path); let mut file = File::open(&p).unwrap(); let mut bytes = Vec::new(); @@ -51,12 +65,16 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { let s = String::from_utf8_lossy(&bytes).to_string(); + // Split the string into lines let lines_with_refs: Vec<&'_ str> = s.split('\n').collect(); + // Setup let mut idx = 0; let mut cfg: HashMap = HashMap::new(); + // Iter lines by idx while idx < lines_with_refs.len() { + // Get the line reference and trim it let lref = &lines_with_refs[idx]; let mut l = *lref; l = l.trim(); @@ -73,16 +91,22 @@ fn parse_config_file<'a>(path: &'a str) -> HashMap { continue; } + // Actual string splitting and trimming let parts = l.split('=').collect::>(); - let k: &str = parts[0].trim(); + + // Check if key is empty + // If it is, print an error message and continue if k.len() > 0 { + // We found a valid key. Insert into config let v: &str = parts[1].trim(); cfg.insert(k.to_string(), v.to_string()); } else { + // This only happens if the line is malformed, so skip println!("Error in line {:?}", parts); } + // Process next line idx += 1; } @@ -199,17 +223,9 @@ fn main() { ``` Let's be clear: there are many, many antipatterns in the above code. -Most of them have nothing to do with Rust, but with software engineering in general. - -The code carries all the hallmarks of a beginner Rust programmer -- possibly with a C/C++ background -- who has not yet fully embraced the ergonomics Rust provides. - -## Better semantics enable nicer syntax in Rust - -If you feel like you're fighting the language (not just its borrow-checker!), -then there's a chance that **the language is trying to push you into a different direction**. +The most important observation is that these antipatterns have nothing to do with Rust, but with bad coding practices. -It bears repeating: the above code is terrifying and contains many footguns. -Without much effort, one can make out the red flags: +The code carries all the hallmarks of a beginner Rust programmer (possibly with a C/C++ background) who has not yet fully embraced the ergonomics Rust provides. - The code is littered with `unwrap()` calls - Unnecessary mutability @@ -220,60 +236,78 @@ Without much effort, one can make out the red flags: This not just makes the code harder to read. What is worse is that it leads to business logic bugs in the code, because the code makes quite a few unsound assumptions about its input. -This makes it hard for Rust to help you out. -Whenever I see people struggle with Rust syntax, I'm reminded of... -## The five stages of grief +If you feel like you're fighting the language, then there's a chance that **the language is trying to tell you something**. +It tries to push you into a healthier direction, but you are resisting it, which causes friction. +The moment you start to listen to what Rust is trying to teach you, everything snaps into place; writing Rust becomes effortless and feels natural. + +Here is one of my favorite things about Rust, that I never experienced in other languages: **better semantics enable nicer syntax.** +That means, the more you learn about the core mechanics behind Rust (traits, pattern matching, expressions, composition over inheritance, etc.), the more you can leverage these concepts to write code that is readable and extensible. + + +## The Five Stages Of Grief + +Whenever I see people struggle with Rust syntax, I'm reminded of the five stages of grief. +It's a common framework for understanding how people deal with loss, but I think it's a great analogy for how stubborn developers react to their first encounter with Rust. #### Stage 1: Denial -"There's nothing wrong with my code - it works perfectly fine! The syntax is just Rust's problem, not mine." +> "There's nothing wrong with my code - it works perfectly fine! The syntax is just Rust's problem, not mine." In this stage, developers continue writing C-style code with Rust syntax and ignoring compiler warnings. They often blame the language for being "overly complex" while refusing to learn the fundamentals. +Oftentimes, this is the stage where they give up on the language and switch to something "more practical" like Python or JavaScript. Rust gets labeled as "unnecessarily complex, type-heavy, and verbose" and they convince themselves that life is better without a safety net to hold them back. #### Stage 2: Anger -"Why does Rust need all these `mut` keywords and explicit ownership? C++ never made me deal with this nonsense!" +> "Why does Rust need all these `mut` keywords and explicit ownership? C++ never made me deal with this nonsense!" -Frustration builds as developers encounter repeated compiler errors. They begin to resent the borrow checker and might abandon half-finished projects in favor of "more practical" languages. At this stage they might post a snarky comment about Rust's design decisions on social media. +Frustration builds as developers encounter unfamiliar compiler errors. +They complain about the verbosity of Rust's syntax, the strict ownership model, and the need for lifetimes. +The more they try to write code that looks like C or Java, the more they run into issues with Rust's strict rules and the tension rises. #### Stage 3: Bargaining -"Maybe if I just use more `.unwrap()` calls and sprinkle in some `unsafe` blocks, I can write Rust the way I want to." +> "Maybe if I just use more `.unwrap()` calls and sprinkle in some `unsafe` blocks, I can write Rust the way I want to." -Desperate to make progress, developers start making dangerous compromises. They liberally use `.clone()` to silence ownership errors, wrap simple operations in `unsafe` blocks, and litter code with `.unwrap()` calls, effectively bypassing Rust's safety guarantees while keeping all of its verbosity. +Desperate to make progress, developers start making dangerous compromises. They liberally use `.clone()` to silence ownership errors, wrap simple operations in `unsafe` blocks, and litter code with `.unwrap()` calls, effectively sidestepping Rust's safety guarantees while keeping all of its verbosity. #### Stage 4: Depression -"I'll never get used to this language. My code is a mess of references, clones, and unnecessary mutations that even I can't read anymore." +> "I'll never get used to this language. My code is a mess of references, clones, and unnecessary mutations that even I can't read anymore." -Reality sets in. Code becomes increasingly convoluted with superfluous mutable variables and overly complex data structures. What started as a promising project now feels like an unreadable jumble of syntax. +Reality sets in. Code becomes increasingly convoluted with superfluous mutable variables and overly complex data structures. +What started as a promising project now feels like an unreadable jumble of syntax. +Although the code compiles, the code is ugly and hard to maintain. +It just doesn't feel right but developers can't quite put their finger on why. +They feel trapped and don't know how to improve their code or even which questions to ask. #### Stage 5: Acceptance -"I see now that these idioms exist for a reason - my code is not only safer but actually more readable when I embrace Rust's patterns instead of fighting them." +> "I see now that these idioms exist for a reason - my code is not only safer but actually more readable when I embrace Rust's patterns instead of fighting them." -Finally, developers begin embracing idiomatic patterns and the design philosophy behind Rust. They refactor their spaghetti code and leverage the type system rather than fight it. Code becomes more maintainable, and they wonder how they ever wrote memory-unsafe code with confidence. +Finally, developers begin embracing idiomatic patterns and the philosophy behind Rust. +They refactor their spaghetti code and leverage stronger types rather than resisting them. +Code becomes more maintainable, and they wonder how they ever wrote memory-unsafe code in other languages with such confidence. ## Let Go Of Old Bad Habits -Okay, you (or your team-member) reached acceptance, how can you do better? +Okay, you (or your team-member) reached acceptance, how *can* you do better? -The first step is to acknowledge that the code goes against Rust's design principles. -Based on this realization, we can systematically improve the code. +The first step is to acknowledge that your existing code goes against Rust's design principles. +It's a symptom of outdated ideas from the past still haunting you and holding back your progress. +**Ugly Rust code is a band-aid around old, bad habits.** -Ugly code is a band-aid around bad habits. -Learn to do it the "Rustic way." +Based on this realization, we can systematically improve the code. -We have seen plenty of ways to write better Rust code in previous articles: +There are a few techniques that can help you write better Rust code, some of which we've discussed before: -- Read the standard library documentation - [Think in expressions](/blog/expressions) - [Immutability by default](/blog/immutability) -- [Leaning into the typesystem](/blog/illegal-state) -- [Iterator patterns instead of manual iteration](/blog/iterators) -- Proper error handling +- [Lean into the typesystem](/blog/illegal-state) +- [Use iterator patterns](/blog/iterators) +- Read the standard library documentation +- Use proper error handling - Split up the problem into smaller parts Even just applying these basic techniques, we can get our code into a much better shape. @@ -283,6 +317,7 @@ Even just applying these basic techniques, we can get our code into a much bette This is a hands-on exercise. Feel free to paste the above code into your editor and practice refactoring it. Here's the [link to the Rust playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=2510df77c1c8c6a227680ea49407fe18). +At the end, there will be a little quiz to see if you found all the edge-cases. I'll wait here. {% end %} @@ -290,7 +325,7 @@ I'll wait here. ### Read the Standard Library Documentation Many common patterns are beautifully handled by the standard library. -It is worth your time to read the documentation. +It is absolutely worth your time to [read the documentation](https://doc.rust-lang.org/std/). For instance, you will find that you can get rid of all of of this boilerplate: ```rust @@ -301,7 +336,7 @@ file.read_to_end(&mut bytes).unwrap(); let s = String::from_utf8_lossy(&bytes).to_string(); ``` -and instead call [`std::fs::read_to_string`](https://doc.rust-lang.org/std/fs/fn.read_to_string.html): +and instead just call [`std::fs::read_to_string`](https://doc.rust-lang.org/std/fs/fn.read_to_string.html): ```rust use std::fs::read_to_string; @@ -315,15 +350,23 @@ let s = read_to_string(path).unwrap(); our `HashMap` explicitly. ```rust -let mut config = HashMap::new(); +let mut cfg: HashMap = HashMap::new(); +``` + +becomes + +```rust +let mut cfg = HashMap::new(); ``` ### Lean Into the Typesystem -Manual string splitting is not necessary and very much discouraged. +Manual string splitting is error-prone and very much discouraged. The reason is that strings are, in fact, very complicated. -There is a perception that it's just an array of "characters", but that is ill-defined -and a dangerous assumption. +There is an outdated assumption that strings are just an array of "characters", but that assumption is ill-defined and a dangerous. +For example, the string `"café"` is represented as 5 bytes in UTF-8, but only 4 characters. + +But even in our simple example code from above, string splitting turns out to be a source of bugs: ```rust let lines_with_refs: Vec<&'a str> = s.split('\n').collect(); @@ -332,7 +375,7 @@ let lines_with_refs: Vec<&'a str> = s.split('\n').collect(); This line expects that lines are separated by `\n`. That's not true on Windows, where lines are separated by `\r\n`. -The following line does the correct thing on all platforms: +The following line does the right thing on all platforms: ```rust let lines = s.lines(); @@ -346,22 +389,30 @@ Knowing that, we can instead iterate over each line: for line in s.lines() { let line = line.trim(); - if line.is_empty() || line.starts_with("#") { - continue; - } - // ... } ``` Note that we shadow `line` with `line.trim()`. -That is a common practice in Rust. -This way we don't have to come up with a new name for the trimmed line -and we also don't have to fall back to cryptic names like `lref` or `l` anymore. +That is a common practice in Rust and very useful to keep the code clean. -Instead of `line.len() == 0`, we use `line.is_empty()` now. +It means we don't have to come up with a fancy new name for the trimmed line +and we also don't have to fall back to cryptic names like `lref` or `l` instead. + +By reading the standard library documentation, we learn about some useful methods on strings. +So instead of `line.len() == 0`, we write `line.is_empty()` now. And `line.starts_with("#")` is easier to read than checking with `l.chars().next() == Some('#')`. +```rust +for line in s.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with("#") { + continue; + } + // ... +} +``` + Next, let's tackle this part: ```rust @@ -378,7 +429,24 @@ if k.len() > 0 { Note how we access `parts[0]` and `parts[1]` without checking if these are valid indices. The code only coincidentally works for well-formed inputs. -Fortunately, we don't have to do all this if we lean into the typesystem a little more and use pattern matching to destructure the result of `split_once`: +We could add a check to ensure that `parts` has at least two elements: + +```rust +if parts.len() >= 2 { + let k: &str = parts[0].trim(); + if k.len() > 0 { + let v: &str = parts[1].trim(); + // insert into config + } else { + // handle empty key + } +} else { + // handle line error +} +``` + +But that's clunky and verbose. +Fortunately, we don't have to do all that if we lean into the typesystem a little more and use pattern matching to destructure the result of `split_once`: ```rust match line.split_once('=') { @@ -395,7 +463,7 @@ match line.split_once('=') { } ``` -With that, we end up with a greatly improved version of the code: +With that, we end up with an already greatly improved version of the code: ```rust use std::collections::HashMap; @@ -435,9 +503,8 @@ However, to truly embrace Rust, it helps to a step back and think about our prob ### Use Proper Error Handling -One obvious next step is to introduce proper error handling. -It depends on the business logic how you want to handle invalid lines, but I prefer to implement proper parsing -and have complete freedom over the output on the callsite. +There's a few things we left on the table so far; one obvious one is error handling. +It depends on the business logic how you want to handle invalid lines, but let's assume we want to return an error if the file is malformed. ```rust use std::collections::HashMap; @@ -504,7 +571,21 @@ fn parse_config_file(path: &str) -> Result, ParseError> } ``` -Next, let's write a function for parsing individual lines. +Note how our code has gotten quite a bit more verbose again. +But in comparison to the original code, the verbosity has a purpose: it marks the various bits and pieces of our code that can fail. +We can decide to handle these errors gracefully on the call site and have full control over how we want to deal with them. +Some errors are harder to handle than others. +For example, we can choose to skip invalid lines, but we might want to return an error if the file itself cannot be read. +This and more we can express in code now. + +## Parsing Individual Lines + +The "meat" of the parser is the part that parses individual lines. +This is still buried in the single `parse_config_file` function, which has quite a lot of responsibilities +such as reading the file, iterating over lines, and parsing each line. + +Since parsing lines is such a core part of the business logic, let's make sure it gets the attention it deserves. +For starters, let's move the line parsing logic into its own function. ```rust fn parse_line(line: &str) -> Result, ParseError> { @@ -529,24 +610,42 @@ fn parse_line(line: &str) -> Result, ParseError> { } ``` -The code is still quite "stringy-typed," which usually is a sign of a missing abstraction. -How about we introduce an enum to represent a parsed line? +Don't worry about the ugly function signature for now. +In fact, it is a tell-tale sign that the problem is harder than it seems on first glance and that we are still not quite done yet. -```rust -#[derive(Debug)] -struct KeyValue { - key: String, - value: String, -} +**In Rust, code that is "stringy-typed," usually is a sign of a missing abstraction.** + +In our case, the `Result>` type indicates that we are trying to parse a line that may or may not contain a key-value pair, and that parsing can fail. +That is a good start for thinking about our missing abstraction. + +We need to represent a few different outcomes of parsing a line: + +- An invalid line, represented by the `Result` +- An empty line +- A comment line +- Finally, a valid key-value pair + +Most likely, you would ignore empty lines and comments in your parser, but it still a valid outcome of parsing a line. +The crucial insight is that these outcomes are now more visible and that we have a choice of how to handle these outcomes in our code. + +With that in mind, we can define a new enum to represent the different outcomes of parsing a line: +```rust #[derive(Debug)] enum ParsedLine { KeyValue(KeyValue), Comment(String), Empty, } + +#[derive(Debug)] +struct KeyValue { + key: String, + value: String, +} ``` + We'd use it like so: ```rust @@ -646,6 +745,8 @@ All we do is creating a map of key-value pairs from some input. At this stage we might as well convert `parse_config_file` into an `EnvParser` struct. And while we're at it, let's lift the requirement of passing a file path to the parser and instead accept any type that implements `Read`. +This allows us to parse strings, files, or any other input that can be read. +It makes testing a lot easier, too. ```rust use std::collections::HashMap; @@ -864,7 +965,7 @@ mod tests { } ``` -Without much effort, we decoupled the code. +By just following a few key principles, we have transformed our initial parser into a more idiomatic Rust implementation. Now, every part has one clearly defined responsibility: - `KeyValue` is responsible for parsing a single line From b93eb84df483e7878dbb3ab8954302f72ad440ba Mon Sep 17 00:00:00 2001 From: Matthias Date: Fri, 18 Jul 2025 14:33:24 +0200 Subject: [PATCH 17/20] formatting --- content/blog/ugly/index.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index abb7f612..36587b49 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -322,7 +322,7 @@ I'll wait here. {% end %} -### Read the Standard Library Documentation +## Read the Standard Library Documentation Many common patterns are beautifully handled by the standard library. It is absolutely worth your time to [read the documentation](https://doc.rust-lang.org/std/). @@ -359,7 +359,7 @@ becomes let mut cfg = HashMap::new(); ``` -### Lean Into the Typesystem +## Lean Into the Typesystem Manual string splitting is error-prone and very much discouraged. The reason is that strings are, in fact, very complicated. @@ -501,7 +501,7 @@ fn parse_config_file(path: &str) -> HashMap { You'd be forgiven if you called it a day at this point. However, to truly embrace Rust, it helps to a step back and think about our problem for a little longer. -### Use Proper Error Handling +## Use Proper Error Handling There's a few things we left on the table so far; one obvious one is error handling. It depends on the business logic how you want to handle invalid lines, but let's assume we want to return an error if the file is malformed. @@ -680,6 +680,15 @@ fn parse_line(line: &str) -> Result { We could even go one step further and express more of our invariants in the type system. For example, we can make use of the fact that parsing a key-value pair only depends on a single line. + +{% info(title="Note", icon="warning") %} + +Multiline environment variables exist, so instead of "parsing a single line," we should say "parsing a single key-value pair." +For now, we will ignore multiline key-value pairs and assume that each line contains at most one key-value pair. +However, the solution we are building here is extensible enough to handle multiline key-value pairs if we wanted to. + +{% end %} + Since parsing is a fallible operation, we can implement `TryFrom` for our `KeyValue` struct: ```rust From 784bc2abae8337b8987d143600d6ffdbba13290c Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Sat, 11 Oct 2025 16:23:49 +0200 Subject: [PATCH 18/20] Apply suggestion from @mre --- content/blog/ugly/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 36587b49..640b30c5 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -20,6 +20,8 @@ From these frustrations, people often conclude that Rust not for them and quit. But after programming in Rust for 10 years, I think that **your coding style has the biggest impact on how your Rust code will look and feel**. +**People often say Rust has ugly syntax, but I'd argue the syntax is the least interesting thing about Rust. Rather, the semantics -- the bits and pieces the language provides to express your ideas and how those bits combine to build interesting things -- is much more important.** + If you write Rust like they would write idiomatic code in other languages, it will *never feel right*. "You can write bad Java code in any language," is a common saying, and I think it applies here as well. From b4fd10942d98295c70de2de9ef87d3b2b8477997 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Sat, 11 Oct 2025 16:26:08 +0200 Subject: [PATCH 19/20] Apply suggestion from @mre --- content/blog/ugly/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index 640b30c5..d93b5447 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -236,6 +236,8 @@ The code carries all the hallmarks of a beginner Rust programmer (possibly with - Cryptic variable names - Very imperative coding style +Rust makes all of these problems painfully explicit. And if you refuse to learn how to avoid such problems, you will end up writing bad code in any programming language. + This not just makes the code harder to read. What is worse is that it leads to business logic bugs in the code, because the code makes quite a few unsound assumptions about its input. From e2b876a9ffb66cdd8ecddc2361326ef046f251ca Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Sat, 11 Oct 2025 16:26:48 +0200 Subject: [PATCH 20/20] Apply suggestion from @mre --- content/blog/ugly/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/blog/ugly/index.md b/content/blog/ugly/index.md index d93b5447..0af182ee 100644 --- a/content/blog/ugly/index.md +++ b/content/blog/ugly/index.md @@ -249,6 +249,8 @@ The moment you start to listen to what Rust is trying to teach you, everything s Here is one of my favorite things about Rust, that I never experienced in other languages: **better semantics enable nicer syntax.** That means, the more you learn about the core mechanics behind Rust (traits, pattern matching, expressions, composition over inheritance, etc.), the more you can leverage these concepts to write code that is readable and extensible. +The syntax takes a backseat. It gives way to semantics, which are much more important. + ## The Five Stages Of Grief