diff --git a/_config.yml b/_config.yml index cfca4643..a93078e6 100644 --- a/_config.yml +++ b/_config.yml @@ -18,7 +18,6 @@ email: simon@ochsenreither.de description: > baseurl: "" # the subpath of your site, e.g. /blog url: "" # the base hostname & protocol for your site -twitter_username: oxnrtr github_username: soc plugins: - jekyll-redirect-from @@ -28,18 +27,22 @@ collections: output: true name: "Standards" published: true - hardware: - output: true - name: "Hardware" - published: true languages: output: true name: "Languages" published: true + runtimes: + output: true + name: "Runtimes" + published: true interfaces: output: true name: "Interfaces" published: true + hardware: + output: true + name: "Hardware" + published: true scala: output: true name: "Scala" @@ -56,12 +59,12 @@ defaults: layout: post - scope: - type: hardware + type: languages values: layout: post - scope: - type: languages + type: runtimes values: layout: post - @@ -69,6 +72,11 @@ defaults: type: interfaces values: layout: post + - + scope: + type: hardware + values: + layout: post - scope: type: scala diff --git a/_drafts/the-cost-of-everything.md b/_drafts/the-cost-of-everything.md deleted file mode 100644 index fbf93702..00000000 --- a/_drafts/the-cost-of-everything.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: "The Cost of Everything" -date: 2018-12-30 12:00:00 +0200 ---- - -| Merit Points | Item | Example | -| -----------: | ------------- | ------- | -| -100| Adding a language feature to do something that can already be done | | -| -100| Adding a language feature to do something that can be implemented with a library | | -| -80| Adding a language feature to do something that can be implemented with a macro | | -| -60| Adding a language feature to do something that can be achieved by fixing a compiler bug | | -| -10| Adding a new element to the standard library | | -| -100| Adding a new element to the global namespace | | -| -80| Adding a new element to `util` | | -| +10| Removing an existing element from the standard library while retaining abstraction and functionality | | -| +100| Removing superfluous language syntax | | -| +100| Removing a non-working language feature | | -| +80| Removing a language feature that can be implemented with code | | -| +40| Removing a pointless distinction | | diff --git a/_includes/pagination.html b/_includes/pagination.html index afdc65ca..656d544e 100644 --- a/_includes/pagination.html +++ b/_includes/pagination.html @@ -9,7 +9,7 @@ {% assign previous_url = page.previous.url %} {% assign previous_title = page.previous.title %}
- {{ previous_title }} + {{ previous_title | remove_first: "Language Design: " }}
{% endif %}{% endif %} @@ -17,13 +17,13 @@ {% assign next_url = page.page_next_url %} {% assign next_title = page.page_next_title %}
- {{ next_title | remove_first: "Language Design: " }} + {{ next_title | remove_first: "Language Design: " }}
{% else %}{% if page.next %} {% assign next_url = page.next.url %} {% assign next_title = page.next.title %}
- {{ next_title }} + {{ next_title | remove_first: "Language Design: " }}
{% endif %}{% endif %} diff --git a/_interfaces/x86-prefixes-and-escape-opcodes-flowchart.md b/_interfaces/x86-prefixes-and-escape-opcodes-flowchart.md new file mode 100644 index 00000000..e8cf18d5 --- /dev/null +++ b/_interfaces/x86-prefixes-and-escape-opcodes-flowchart.md @@ -0,0 +1,88 @@ +--- +title: "x86 prefixes and escape opcodes flowchart" +date: 2023-07-29 +updated: 2023-09-24 +markdeep: true +--- + +
+ start here + | + v +╔═══════════════════════════════════════════════╤══╗ ╔══════════════════════════════════════════════════╗ +║ 1-byte instructions (legacy map 0) │0F------------->║ 2-byte instructions (legacy map 1) ║ +║ └──╢ ║ ║ +╟──────────────────────────────────────────────────╢ .------>║ operand type specified ┌──┐ ┌──┐ ║ +║ 40-4F ║ | ║ via mandatory prefixes │38│ │3A--------------. +╟───────────────────────────|──────────────────────╢ | .--->║ - none (packed single) └─|┘ └──┘ ║ | +║ ┌──┐ ┌──┬──┐ | ║ | | ║ - 66 (packed double) | ║ | +║ .--62│ │66│67│ | ║ | | ║ - F2 (scalar single) | ║ | +║ | └──┘ └─|┴─|┘ | ║ | | ║ - F3 (scalar double) | ║ | +║ | | | | ┌──┬──┐ ║ | | ╚═══════════════════════════════|══════════════════╝ | +║ | | | | │C4│C5-----. ║ | | v | +║ | | | | └|─┼──┤ | ║ | | ╔══════════════════════════════════════════════════╗ | +╟──┐ | ┌──┬──┐ | | | | │D5│ | ║ | +--->║ 3-byte instructions (legacy map 2) ║ | +║F0│ | │F2│F3│ | | | | └─|┘ | ║ | | ║ ║ | +╚══╧═|═╧═|╧═|╧══════|══|════|══════|═══|═════|═════╝ | | ║ operand type specified ║ | + | | | ^ | | | ^ | | ^ | | | ║ via mandatory prefixes ║ | + | | | | | | | | | | +---|-----------+ | ║ - none (packed single) ║ | + v '--+---+---+--' v | v v | v m bit | | ║ - 66 (packed double) ║ | + ┏━━━━┓ | ┏━━━|┓┏━━━━┓┏━━|━┓┏━━━━┓ | | ║ - F2 (scalar single) ║ | + ┃EVEX┃ | ┃REX1┃┃VEX3┃┃REX2┃┃VEX2┃-------' | ║ - F3 (scalar double) ║ | + ┗━━|━┛ | ┗━━━━┛┗━━|━┛┗━━━━┛┗━━━━┛ | ╚══════════════════════════════════════════════════╝ | + | | | | | + '----------+------------------+------------------------+ ╔══════════════════════════════════════════════════╗ | + m bits '--->║ 3-byte instructions (legacy map 3) ║<-+ + ║ ║ + ║ operand type specified ║ + ║ via mandatory prefixes ║ + ║ - none (packed single) ║ + ║ - 66 (packed double) ║ + ║ - F2 (scalar single) ║ + ║ - F3 (scalar double) ║ + ╚══════════════════════════════════════════════════╝ +
+ +
+┏━┯━┯━┯━┯━┯━┯━┯━┓ ┏━┯━┯━┯━┯━┯━┯━┯━┳━┯━┯━┯━┯━┯━┯━┯━┓ +┃0 1 0 0 W R X B┃ ┃1 1 0 1 0 1 0 1┃M R X B W R X B┃ +┗━┷━┷━┷━┷━┷━┷━┷━┛ ┗━┷━┷━┷━┷━┷━┷━┷━┻━┷━┷━┷━┷━┷━┷━┷━┛ +REX (1-byte prefix) AMD64 (1999/2003) REX (2-byte prefix) APX (2023/????) +- W extends operand size - M selects legacy map 0 or legacy map 1 +- R extends register bits - R extends register bits +- X extends index in SIB byte - X extends index in SIB byte +- B extends base in SIB byte - B extends base in SIB byte + - W extends operand size + + +┏━┯━┯━┯━┯━┯━┯━┯━┳━┯━┯━┯━┯━┯━┯━┯━┓ ┏━┯━┯━┯━┯━┯━┯━┯━┳━┯━┯━┯━┯━┯━┯━┯━┳━┯━┯━┯━┯━┯━┯━┯━┓ +┃1 1 0 0 0 1 0 1┃Ṙ ⩒ ⩒ ⩒ ⩒ L p p┃ ┃1 1 0 0 0 1 0 0┃Ṙ Ẋ Ḃ m m m m m┃W ⩒ ⩒ ⩒ ⩒ L p p┃ +┗━┷━┷━┷━┷━┷━┷━┷━┻━┷━┷━┷━┷━┷━┷━┷━┛ ┗━┷━┷━┷━┷━┷━┷━┷━┻━┷━┷━┷━┷━┷━┷━┷━┻━┷━┷━┷━┷━┷━┷━┷━┛ +VEX (2-byte prefix) AVX (2008/2011) VEX (3-byte prefix) AVX (2008/2011) +- R extends register bits - R extends register bits +- v encodes additional source register - X extends index in SIB byte +- L selects vector length (0: 128bit | 1: 256bit) - B extends base in SIB byte +- p encodes mandatory prefixes - m encodes escape bytes (1: 0F | 2: 0F38 | 3: 0F3A) + (0: none | 1: 66 | 2: F2 | 3: F3) - W extends operand size +- escape byte 0F implied (legacy map 1) - v encodes additional source register + - L selects vector length (0: 128bit, 1: 256bit) + - p encodes mandatory prefixes + (0: none | 1: 66 | 2: F2 | 3: F3) + + +┏━┯━┯━┯━┯━┯━┯━┯━┳━┯━┯━┯━┯━┯━┯━┯━┳━┯━┯━┯━┯━┯━┯━┯━┳━┯━┯━┯━┯━┯━┯━┯━┓ Notes: +┃0 1 1 0 0 0 1 0┃Ṙ Ẋ Ḃ Ṙ B m m m┃W ⩒ ⩒ ⩒ ⩒ Ẋ p p┃z Ŀ L b ⩒ a a a┃ - years after the instruction set extension +┗━┷━┷━┷━┷━┷━┷━┷━┻━┷━┷━┷━┷━┷━┷━┷━┻━┷━┷━┷━┷━┷━┷━┷━┻━┷━┷━┷━┷━┷━┷━┷━┛ denote when it was first announced/shipped +EVEX (4-byte prefix) AVX-512 (2013/2017) - letters with a dot above denote that the +- R extends register bits prefix contains the bit in inverted form +- X extends index in SIB byte - the diagram elides escape bytes D8 til DF +- B extends base in SIB byte - the EVEX prefix has additional variations +- m encodes escape bytes (1: 0F | 2: 0F38 | 3: 0F3A) not shown here for encoding +- W extends operand size - VEX instructions +- v encodes additional source register - legacy instructions +- p encodes mandatory prefixes (0: none | 1: 66 | 2: F2 | 3: F3) - conditional CMP/TEST +- z selects merge mode (0: zero | 1: merge) +- Ŀ selects vector length (512bit) or rounding control mode (with L) +- L selects vector length (256bit) +- b encodes source broadcast or rounding control (with Ŀ and L) or exception suppression +
diff --git a/_languages/alasca/classes-values.md b/_languages/alasca/classes-values.md index 2975afe0..1744cd70 100644 --- a/_languages/alasca/classes-values.md +++ b/_languages/alasca/classes-values.md @@ -1,6 +1,7 @@ --- title: "Alasca: Classes and Values" date: 2018-08-31 12:00:00 +0200 +published: false --- - `class`: reference type diff --git a/_languages/alasca/collections.md b/_languages/alasca/collections.md index 0c75289c..b2f60258 100644 --- a/_languages/alasca/collections.md +++ b/_languages/alasca/collections.md @@ -1,6 +1,7 @@ --- title: "Alasca: Collections" date: 2018-08-31 12:00:00 +0200 +published: false --- - collections are immutable @@ -35,4 +36,4 @@ value SizeInfo object Unknown value Known(Size) value Bounded(Size) -``` \ No newline at end of file +``` diff --git a/_languages/alasca/conditions.md b/_languages/alasca/conditions.md index 1b513378..2cc559b6 100644 --- a/_languages/alasca/conditions.md +++ b/_languages/alasca/conditions.md @@ -1,6 +1,7 @@ --- title: "Alasca: Conditions" date: 2018-08-31 12:00:00 +0200 +published: false --- See [Unified Condition Syntax](https://soc.github.io/languages/unified-condition-syntax). diff --git a/_languages/alasca/functions.md b/_languages/alasca/functions.md index 53abea28..05e298a2 100644 --- a/_languages/alasca/functions.md +++ b/_languages/alasca/functions.md @@ -1,6 +1,7 @@ --- title: "Alasca: Functions" date: 2018-08-31 12:00:00 +0200 +published: false --- - zero or one paramemter list diff --git a/_languages/alasca/identity-equality-hashcode.md b/_languages/alasca/identity-equality-hashcode.md index 7b3a29b7..f00bde24 100644 --- a/_languages/alasca/identity-equality-hashcode.md +++ b/_languages/alasca/identity-equality-hashcode.md @@ -1,6 +1,7 @@ --- title: "Alasca: Identity, Equality and Hashcode" date: 2018-08-31 12:00:00 +0200 +published: false --- All values and non-open classes receive a default implementation of equality and identity. diff --git a/_languages/alasca/keywords.md b/_languages/alasca/keywords.md index da197516..9bf85611 100644 --- a/_languages/alasca/keywords.md +++ b/_languages/alasca/keywords.md @@ -1,6 +1,7 @@ --- title: "Alasca: Keywords" date: 2018-09-07 12:00:00 +0200 +published: false --- **6 letters** namespaces – declaring namespaces and bringing namespaces into scope: @@ -15,6 +16,7 @@ date: 2018-09-07 12:00:00 +0200 - `value` (value types) - `alias` (type aliases) - `with` ("extends") +- `enum` **4 letters** control flow: diff --git a/_languages/alasca/modifiers.md b/_languages/alasca/modifiers.md index 4d093a7f..f2db8a43 100644 --- a/_languages/alasca/modifiers.md +++ b/_languages/alasca/modifiers.md @@ -1,6 +1,7 @@ --- title: "Alasca: Modifiers" date: 2018-09-07 12:00:00 +0200 +published: false --- All modifiers start with an `@` sign and are placed on the preceding line of the element they annotate. diff --git a/_languages/alasca/modules.md b/_languages/alasca/modules.md index 01cb1c82..7e7867b2 100644 --- a/_languages/alasca/modules.md +++ b/_languages/alasca/modules.md @@ -1,6 +1,7 @@ --- title: "Alasca: Modules" date: 2018-08-31 12:00:00 +0200 +published: false --- Modules contain funs, lets, vars, classes, values, and other modules. diff --git a/_languages/alasca/standard-library.md b/_languages/alasca/standard-library.md index 88602547..df563f7c 100644 --- a/_languages/alasca/standard-library.md +++ b/_languages/alasca/standard-library.md @@ -1,6 +1,7 @@ --- title: "Alasca: Standard Library" date: 2018-08-31 12:00:00 +0200 +published: false --- Ordered from necessary to convenient: @@ -136,4 +137,4 @@ object std.regex // important enough for top-level? ``` // "every element except the last one!" loop(i < arr.length - 1) -// arr.length - 1 == -1 if arr is empty \ No newline at end of file +// arr.length - 1 == -1 if arr is empty diff --git a/_languages/alasca/streams.md b/_languages/alasca/streams.md index 71b256ed..fa424cde 100644 --- a/_languages/alasca/streams.md +++ b/_languages/alasca/streams.md @@ -1,6 +1,7 @@ --- title: "Alasca: Streams" date: 2018-09-31 12:00:00 +0200 +published: false --- diff --git a/_languages/alasca/string-text.md b/_languages/alasca/string-text.md index 08663759..f93fa77c 100644 --- a/_languages/alasca/string-text.md +++ b/_languages/alasca/string-text.md @@ -1,6 +1,7 @@ --- title: "Alasca: String and Text" date: 2018-08-31 12:00:00 +0200 +published: false --- #### String diff --git a/_languages/alasca/syntax-overview.md b/_languages/alasca/syntax-overview.md index f074631e..69929d0e 100644 --- a/_languages/alasca/syntax-overview.md +++ b/_languages/alasca/syntax-overview.md @@ -1,6 +1,7 @@ --- title: "Alasca: Syntax Overview" date: 2018-08-31 12:00:00 +0200 +published: false --- ```scala diff --git a/_languages/alasca/traits-sealed-enum.md b/_languages/alasca/traits-sealed-enum.md index 078f9449..4cc2a6b4 100644 --- a/_languages/alasca/traits-sealed-enum.md +++ b/_languages/alasca/traits-sealed-enum.md @@ -1,6 +1,7 @@ --- title: "Alasca: Traits, Sealed Traits and Enum Traits" date: 2018-08-31 12:00:00 +0200 +published: false --- #### Traits diff --git a/_languages/alasca/typeclasses.md b/_languages/alasca/typeclasses.md index 40d822d7..a74feb73 100644 --- a/_languages/alasca/typeclasses.md +++ b/_languages/alasca/typeclasses.md @@ -1,6 +1,7 @@ --- title: "Alasca: Typeclasses" date: 2018-08-31 12:00:00 +0200 +published: false --- ```scala diff --git a/_languages/alasca/visibility-exports.md b/_languages/alasca/visibility-exports.md index 48ae9517..67c64bbf 100644 --- a/_languages/alasca/visibility-exports.md +++ b/_languages/alasca/visibility-exports.md @@ -1,6 +1,7 @@ --- title: "Alasca: Visibility and Exports" date: 2018-08-31 12:00:00 +0200 +published: false --- - open modules: namespace to place files, at the top of source files diff --git a/_languages/annotations-obsolete-modifiers-attempts.md b/_languages/annotations-obsolete-modifiers-attempts.md index 6be1550e..a3714403 100644 --- a/_languages/annotations-obsolete-modifiers-attempts.md +++ b/_languages/annotations-obsolete-modifiers-attempts.md @@ -3,9 +3,7 @@ title: "Language Design: Annotations Obsolete Modifiers – Failed Attempts" date: 2021-12-15 14:00:00 +0200 --- -#### Failed Attempts - -##### Kotlin +#### Kotlin Kotlin [gave up on it](https://blog.jetbrains.com/kotlin/2015/08/modifiers-vs-annotations/), as they couldn't figure out how to recognize annotation usages as early in the compiler pipeline as modifiers previously. @@ -14,7 +12,8 @@ This lead to the determination that modifiers (without the prefix `@`) had to st able to omit the prefix `@`, leading to inconsistencies. -##### Ceylon +#### Ceylon Ceylon tried the route in which [everything is an annotation, but looks like a modifier (i. e. without prefix `@`)](https://ceylon-lang.org/documentation/1.3/reference/structure/annotation/). + This made it hard to distinguish between important keywords, and less important annotations. diff --git a/_languages/binary-operators-are-overused.md b/_languages/binary-operators-are-overused.md new file mode 100644 index 00000000..49203160 --- /dev/null +++ b/_languages/binary-operators-are-overused.md @@ -0,0 +1,66 @@ +--- +title: "Language Design: Binary Operators are Overused" +date: 2019-09-21 +--- + +_**TL;DR:** Use methods._ + +Many languages provide binary operators, usually for operations on numbers (addition, multiplication), +bits (shifts) and boolean values. In general, this language facility has been overused, forcing users +to learn and recall precedence and associativity of dozens of operators. + +Additionally, some popular operators have additional problems: + +### The problem with `&` + +Many older language that were influenced by C also inherited its [incorrect operator precedence](https://ericlippert.com/2020/02/27/hundred-year-mistakes/). + +Newer languages like Go or Swift avoided copying this mistake. + +### The problem with `<<`, `>>`, `>>>` + +Languages (like Java or JavaScript) that decided against providing both signed and unsigned number types often offer +two operators for shifting bits to the right: one that preserves the sign bit and one that doesn't. + +The operators in question – `>>` and `>>>` – don't indicate their respective semantics, forcing users +to remember the rules. + +### The problem with `%` + +#### In most languages the `%` operator implements a remainder operation, not a modulo operation + +- *remainder*: has the same sign as the dividend +- *modulo*: has the same sign as the divisor + +| | remainder | modulo | +|:---------:|-----------:|----------:| +| +4 % +3 | 1 | 1 | +| -4 % +3 | -1 | 1 | +| +4 % -3 | 1 | -1 | +| -4 % -3 | -1 | -1 | +{: .table-medium .table-width-small } + +#### There are multiple possible implementations of remainder and modulo, with no clear winner + +At least five approaches are known[^leijen][^boute]: + +- Remainder of truncated division +- Remainder of floored division +- Remainder of ceiling division +- Remainder of euclidean division +- Remainder of rounded division + +### A sensible, small set of operators + +| Precedence level | Operator | Description | +|-----------------:|------------------------------------------------|------------------------------------------------| +| 1 | `=` | Assignment | +| 2 | || | Boolean Or | +| 3 | `&&` | Boolean And | +| 4 | `==`, `!=`, `<`, `<=`, `>`, `>=`, `===`, `!==` | Comparisons | +| 5 | `+`, `-`, |, `^` | Addition, Subtraction, Bitwise Or, Bitwise Xor | +| 6 | `*`, `/`, `&` | Multiplication, Division, Bitwise And | +{: .table-medium .table-layout-auto } + +[^leijen]: [Division and Modulus for Computer Scientists](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf) +[^boute]: [The Euclidean Definition of the Functions div and mod](https://dl.acm.org/doi/pdf/10.1145/128861.128862) \ No newline at end of file diff --git a/_languages/comparing-and-sorting.md b/_languages/comparing-and-sorting.md index 299e849f..2d1d1169 100644 --- a/_languages/comparing-and-sorting.md +++ b/_languages/comparing-and-sorting.md @@ -1,46 +1,55 @@ --- title: "Language Design: Comparing and Sorting" -date: 2018-10-31 12:00:00 +0200 +date: 2018-10-31 +update: 2022-06-11 --- Similarly to [equality and identity](equality-and-identity-part1), most languages have severely restricted facilities to handle distinct ordering relationships like comparison and sorting. Languages usually provide only a single operation/protocol, often requiring workarounds for some data types in which the comparison operation and the sorting operation return distinct results. -Consider the following `Comparable` trait: +Consider the following `Comparable` trait as it frequently exists across many languages +(like [Haskell](https://hackage.haskell.org/package/base-4.16.1.0/docs/Data-Ord.html), +[Rust](https://doc.rust-lang.org/std/cmp/trait.PartialOrd.html), +[Swift](https://developer.apple.com/documentation/swift/comparable), ...): ```ml trait Comparable[T] - fun < (that: T): Boolean = ... - fun > (that: T): Boolean = ... + fun < (that: T): Bool + fun > (that: T): Bool + ... ``` -... and an IEEE754-conformant implementation of `Comparable` for floating point values, such that `-0.0 < +0.0`, and `Float.NaN < Float.PositiveInfinity` are both false. - -As it becomes obvious, such an implementation of _partial order_ used to correctly _compare_ values, cannot be used to correctly _sort_ values (_total order_).[^1] +... and an IEEE754-conformant comparison implementation for floating point values, +i. e. `-0.0 < +0.0`, and `Float.NaN < Float.PositiveInfinity` are both false. -Worded differently, an implementation of _comparison_ operations for floating point values cannot be used as a stand-in for _sorting_ operations on floating point values.[^2] +As it becomes obvious, such an implementation of _partial order_ can be used to _compare_ values, +but cannot be used to correctly _sort_ values (_total order_).[^1] -Conveniently, IEEE754 standardizes a `totalOrder` relation in §5.10, defining how floating point numbers should be sorted. -The only requirement language-wise is to introduce a distinct trait which represents _total ordering_, enabling a clean separation of _comparisons_ and _sorting_ operations: +The reason is that the implementation of _comparison operations_ for floating point values (a partial order, IEEE754 §5.11) +cannot be used as a stand-in for _sorting_ operations on floating point values. +Conveniently, IEEE754 standardizes a `totalOrder` relation in §5.10, defining how floating point numbers are sorted. +The only requirement language-wise is to introduce a distinct trait[^2] which represents _total ordering_, +enabling a clean separation of _comparison operations_ from _sorting operations_: ```ml trait Sortable[T] - fun sortsBefore(that: T): Boolean = ... - fun sortsAfter (that: T): Boolean = ... + fun sortsBefore(that: T): Bool + fun sortsAfter (that: T): Bool + ... ``` This enables the use of each individual trait for its specific purpose, without conflating different concerns: ```ml -// compare values using Comparable +// example of comparing values using Comparable fun compareReversed[T : Comparable](x: T, y: T) = y < x -// sort values using Sortable +// example of sorting values using Sortable fun sort[T : Sortable](values: Array[T]) = ... - x sortsBefore y + if values(i).sortsBefore(values(j)) { ... } ... ``` diff --git a/_languages/consistent-keyword-length.md b/_languages/consistent-keyword-length.md new file mode 100644 index 00000000..6d56458e --- /dev/null +++ b/_languages/consistent-keyword-length.md @@ -0,0 +1,47 @@ +--- +title: "Language Design: Use Consistent Keyword Length" +date: 2018-09-07 +update: 2023-09-16 +--- + +**6 letters** namespacing – declaring and managing namespaces: + +- `module` (unifies "object" and "package") +- `import` +- `export` + +**5 letters** "big" definitions (types): + +- `class` (reference type) +- `value` (value type, alternative to `struct`) +- `union` (alternative to `enum`) +- `trait` (interface/typeclass) +- `alias` (type alias) +- `mixin` + +**4 letters** control flow: + +- `case`/`then`/`else` or `when`/`then`/`else` +- `loop` (alternative to `while`) +- `skip` (alternative to `continue`) +- `exit` (alternative to `return`) +- `yeet` (alternative to `throw`) + +**3 letters** "small" definitions (members): + +- `fun` (function) +- `let` (immutable binding) +- `var` (mutable binding) + +--- + +Unused alternatives: + +**6 letters** "invasive" control flow: + +- `return` +- `throws` + +**2 letters** control flow: + +- `if`/`do`/`or` diff --git a/_languages/notes-on-rust.md b/_languages/design-mistakes-in-rust.md similarity index 97% rename from _languages/notes-on-rust.md rename to _languages/design-mistakes-in-rust.md index 5f8430d5..6fb66065 100644 --- a/_languages/notes-on-rust.md +++ b/_languages/design-mistakes-in-rust.md @@ -1,6 +1,7 @@ --- -title: "Language Design: Notes on Rust" +title: "Language Design: Mistakes in Rust" date: 2017-07-30 12:00:00 +0200 +redirect_from: "languages/notes-on-rust" --- It's an impressive language, but the user interface needs a lot of work. diff --git a/_languages/drop-break-and-continue.md b/_languages/drop-break-and-continue.md new file mode 100644 index 00000000..f2328ec3 --- /dev/null +++ b/_languages/drop-break-and-continue.md @@ -0,0 +1,74 @@ +--- +title: "Language Design: Drop `break` and `continue`" +date: 2022-12-10 +markdeep: true +--- + +_**TL;DR:** Optimize for the common case, not the exotic ones._ + +First of all: The argument is *not* that `break` and `continue` in loops aren't ... + +- useful +- convenient +- sometimes the best option +- ... + +That's not the argument being made. The argument that *is* being made is that `break` and `continue` are ... + +#### ... optimizing for an infrequent special case ... + +Consider a codebase that contains 1000 loops. + +Out of those 1000, 900 loops aren't using `break` or `continue`. + +Of the remaining 100 loops, perhaps 90 loops use `break`, and 10 loops use `continue`. + +Of those 90 loops with `break`s, 80 are easily convertible to equivalent code not using `break`. + +Of the 10 loops with `continue`s maybe 5 are easily convertible. + +
+.------------------------------------------------. +| all loops | +| | +| +---+ +| | | +| | |<---loops with break +| +---+ +| | |<---loops with continue +'--------------------------------------------+---' +
+ +This means that out of 1000 loops, supporting `break` and `continue` focuses on making 1.5% of the loops more convenient, +to the detriment of the other 98.5% of the loops. + +#### ... while worsening the general case! + +What's the detriment? The loss of the ability to read the head of the loop and know what's going on +(e. g. when the loop terminates), as the body of every loop could contain a `break` or `continue`: + +``` +while true { // is it really an endless loop? only way to find out is reading the whole loop body! + ... + if isNewLine() { + break; + } +} +``` + +This inability is so ingrained in people, that they cannot fathom the mental load that gets removed +when they do not have to keep "this loop may contain a `break` or `continue`" in the back of their head: + +``` +while !isNewLine { // loop head shows immediately when the loop terminates + ... + isNewLine = true +} +``` + +### Conclusion + +Dropping `break` and `continue` removes mental load from 98.5% of the loops that don't use them, +with the pain of having a few loops that are now more painful to write. + +That's a good trade-off. diff --git a/_languages/equality-and-identity-part1.md b/_languages/equality-and-identity-part1.md index bf0d7687..68343b59 100644 --- a/_languages/equality-and-identity-part1.md +++ b/_languages/equality-and-identity-part1.md @@ -1,6 +1,7 @@ --- title: "Language Design: Equality & Identity – Part 1: Overview" -date: 2017-10-31 12:00:00 +0200 +date: 2017-10-31 +update: 2022-06-08 redirect_from: "/articles/language-design/equality-and-identity" page_next_title: "Equality & Identity – Part 2: Problems" page_next_url: "equality-and-identity-part2" diff --git a/_languages/equality-and-identity-part2.md b/_languages/equality-and-identity-part2.md index f5776607..3ef2c8b4 100644 --- a/_languages/equality-and-identity-part2.md +++ b/_languages/equality-and-identity-part2.md @@ -1,6 +1,7 @@ --- title: "Language Design: Equality & Identity – Part 2: Problems" -date: 2017-10-31 12:00:00 +0200 +date: 2017-10-31 +update: 2022-06-08 page_previous_title: "Equality & Identity – Part 1: Overview" page_previous_url: "equality-and-identity-part1" page_next_title: "Equality & Identity – Part 3: Solution" diff --git a/_languages/equality-and-identity-part3.md b/_languages/equality-and-identity-part3.md index cb09391e..ff7b0654 100644 --- a/_languages/equality-and-identity-part3.md +++ b/_languages/equality-and-identity-part3.md @@ -1,6 +1,7 @@ --- title: "Language Design: Equality & Identity – Part 3: Solution" -date: 2017-10-31 12:00:00 +0200 +date: 2017-10-31 +update: 2022-06-08 page_previous_title: "Equality & Identity – Part 2: Problems" page_previous_url: "equality-and-identity-part2" page_next_title: "Equality & Identity – Part 4: Fixing Haskell" diff --git a/_languages/equality-and-identity-part4.md b/_languages/equality-and-identity-part4.md index 31bb3ee7..23279e5e 100644 --- a/_languages/equality-and-identity-part4.md +++ b/_languages/equality-and-identity-part4.md @@ -1,6 +1,7 @@ --- title: "Language Design: Equality & Identity – Part 4: Fixing Haskell" -date: 2019-03-14 12:00:00 +0200 +date: 2019-03-14 +update: 2022-06-09 page_previous_title: "Equality & Identity – Part 3: Solution" page_previous_url: "equality-and-identity-part3" page_next_title: "Comparing and Sorting" diff --git a/_languages/equality-and-identity-part5.md b/_languages/equality-and-identity-part5.md index e1e25f37..4fd59a6a 100644 --- a/_languages/equality-and-identity-part5.md +++ b/_languages/equality-and-identity-part5.md @@ -1,9 +1,20 @@ --- title: "Language Design: Equality & Identity – Part 5: Fixing Rust" -date: 2022-06-09 12:00:00 +0200 +date: 2022-06-09 +update: 2022-06-11 +published: false ---
Dear reader, comments on this page are invite-only due to low-quality feedback.
Please refrain from linking this page on community foris of any language mentioned herein.
+ +Rust designers recognized the issues with Haskell's approach, but were not able to address the issues with Rust's `Eq` +and `PartialEq` traits. + +The main cause of this failure is the sub-typing relationship between `PartialEq` and `Eq`: + +It requires that an implementation of partial order needs to be consistent with an implementation of total order. + +This works for many types, but not for diff --git a/_languages/equality-and-identity-part6.md b/_languages/equality-and-identity-part6.md new file mode 100644 index 00000000..a0d1e941 --- /dev/null +++ b/_languages/equality-and-identity-part6.md @@ -0,0 +1,10 @@ +--- +title: "Language Design: Equality & Identity – Part 5: Fixing Swift" +date: 2022-06-11 +published: false +--- + +
+ Dear reader, comments on this page are invite-only due to low-quality feedback.
+ Please refrain from linking this page on community foris of any language mentioned herein. +
diff --git a/_languages/familiarity.md b/_languages/familiarity.md index e99829e9..cb2e9a71 100644 --- a/_languages/familiarity.md +++ b/_languages/familiarity.md @@ -1,6 +1,7 @@ --- title: "Language Design: Familiarity – Familiarity is a tie-breaker, not a self-sufficient argument" -date: 2022-06-10 12:00:00 +0200 +date: 2022-06-10 +update: 2022-07-08 --- In the past, many languages did not pick up easily adoptable language design improvements and opted @@ -10,12 +11,12 @@ Examples include[^1]: - C's broken operator precedence[^2] spread to many other languages, most of whom have little in common with C. - C++'s use of `<>` for generics, which was adopted by languages that – unlike C++ – had better options available.[^3] -- C#'s design of properties, picked up by languages that did not suffer from C#'s legacy of fields and methods. +- C#'s design of properties, picked up by languages that did not suffer from C#'s legacy of fields and methods.[^4]
The benefits of familiarity ("it is easy, because I have seen it before") are limited to those who "have seen it before", while the benefits of simplicity ("it is easy, because it was designed this way") -apply to everyone, regardless of experience, schooling or development history.[^4] +apply to everyone, regardless of experience, schooling or development history.[^5] Therefore, it's best to treat familiarity as a tie-breaker: to be used sparingly, only when the pros and cons of different design options have been fully explored, and it has been determined that @@ -23,10 +24,11 @@ no design has an edge above the other. But if one design has arguments for it, and another design has only familiarity on its side, language designers of the future are implored to pick the former to stop propagating the same -language design mistakes further and further into the future.[^5] +language design mistakes further and further into the future.[^6] [^1]: see also [Popular, but Wrong](popular-but-wrong) [^2]: see [Hundred year mistakes](https://ericlippert.com/2020/02/27/hundred-year-mistakes/) [^3]: see [Stop Using `<>` for Generics](stop-using-angle-brackets-for-generics) -[^4]: confusing them is an easy, but dangerous mistake to make, [example](https://steveklabnik.com/writing/the-language-strangeness-budget) -[^5]: The target audience of this footnote probably hasn't made it this far before losing their mind, but to clarify: Nobody is planning on making you code in GIMP, all I'm saying is that some language decisions made in the 1970ies (with little thought on design) could _perhaps_ benefit from some scrutiny before copying them into new languages verbatim. +[^4]: see [Fields & Methods & Properties? – Pick Two!](fields-methods-properties-pick-two) +[^5]: confusing them is an easy, but dangerous mistake to make, [example](https://steveklabnik.com/writing/the-language-strangeness-budget) +[^6]: The target audience of this footnote probably hasn't made it this far before losing their mind, but to clarify: Nobody is planning on making you code in GIMP, all I'm saying is that some language decisions made in the 1970ies (with little thought on design) could _perhaps_ benefit from some scrutiny before copying them into new languages verbatim. diff --git a/_languages/fields-methods-properties-pick-two.md b/_languages/fields-methods-properties-pick-two.md new file mode 100644 index 00000000..08010cc4 --- /dev/null +++ b/_languages/fields-methods-properties-pick-two.md @@ -0,0 +1,257 @@ +--- +title: "Language Design: Fields & Methods & Properties? – Pick Two!" +date: 2022-07-07 +update: 2022-07-08 +redirect_from: "languages/stop-building-languages-with-properties" +--- + +_**TL;DR:** Properties are a hack employed to retrofit "nice" syntax into languages that already shipped with fields and methods. Instead, design language rules for fields and methods that get deliver the same (or more) benefits – at a lower price!_ + +#### Why do properties exist? + +The core feature of properties, in rough terms, is that (unlike getters and setters) property invocations look like field access, +but retain the possibility to add logic that is executed on access at a later date (unlike fields). + + +#### Examples of Languages with properties + +##### C# + +C# popularized properties when it shipped them in version 1, and extended their feature set in subsequent versions +(*auto-implemented properties* in C# 3, *initializers* in C# 6, *expression-bodied members* in C# 7). + +```c# +public class Person { + public string firstName { get; set; } +} +``` + +This means that – instead of e. g. `person.getFirstName()` – users can write `person.firstName`. + +In C#, this is not perfect: changing a field to a property is source compatible, but not binary compatible; +and changing getters and setters to a property is neither. + +##### Kotlin + +TODO: fields vs. properties vs. methods + +- poorly copied from C# +- large regression from Scala + +##### Swift + +TODO: stored properties vs. computed properties vs. methods + +- slightly better syntax than C# +- still a large language complexity footprint + + +#### The problem with properties + +Conceptually, it's a bit icky that languages feature three constructs to define members that fundamentally only express +two categories: members that store ("fields") and members that compute ("methods"). + +All three (fields, methods, properties) compete for the same syntactic sweet-spot and pollute the mental model – +as they can be thin wrappers around their storage ("auto-implemented properties" in C#, "stored properties" in Swift), +or contain complex custom logic (non-"auto-implemented properties" in C#, "computed properties" in Swift). + + +#### Which desirable characteristics should fields/methods/properties language provide? + +- "Nice" syntax at use- and declaration-site. +- Evolving access should be source- and binary-compatible. +- Users should be able to see whether they are accessing a value directly, or whether computation will occur during access. + + +#### How to deliver these characteristics without needing fields, methods *and* properties? + +- Use keyword-based syntax to distinguish between fields and methods. + - This means that methods without parameters do not need to require `()` to distinguish them from fields. +- Define that members live in the same namespace. + - This prohibits a type containing a field and a method with the same name. +- Implement late(-enough) binding of member invocations. + - This avoids encoding field/method invocation differences into call sites. +- Expose the difference between a field and a method invocation through the use of colors in the IDE. + - This preserves important information (compared to properties or explicit getter/setter calls). + +#### How to replace property getters with fields and methods? + +Consider a class definition that contains one field `let` and two methods `fun`: + +``` +class Person(let name: String) + fun firstName: String = this.name.split(" ").get(0) + fun lastName: String = this.name.split(" ").get(1) +``` + +Usage: + +``` +let person = Person("Jane Doe") +person.name // "Jane Doe" +person.firstName // "Jane" +person.lastName // "Doe" +``` + +As a mental model, a desugared encoding of `Person`'s `name` value could look like this: + +``` +class Person(name: String) + @private + let _name: String = name + fun name: String = this._name + ... /* other methods, as in the last example */ +``` + +If the `Person` class definition is changed to contain two fields and one method ... + +``` +class Person(let firstName: String, let lastName: String) + fun name: String = this.firstName + " " + this.lastName +``` + +... the usage stays the same, despite the implementation changing completely: + +``` +let person = Person("Jane", "Doe") +person.name // "Jane Doe" +person.firstName // "Jane" +person.lastName // "Doe" +``` + +Still, users of the class can see what's happening when they access `Person`'s members, +because the IDE can use different colors to mark fields (`let`) and methods (`fun`). + +#### How to replace property setters with fields and methods? + +While mutability is on its way out, and the benefits of this approach are less pronounced for property setters, +let's review an example that demonstrates how property setters can also be replaced with fields and functions: + +``` +class Wine(let name: String, var rating: Int32) +``` + +As a mental model, a desugared encoding of `Wine`'s `rating` variable could look like this: + +``` +class Wine(let name: String, rating: Int32) + @private + let _rating: Int32 = rating + fun rating: Int32 = this._rating + fun setRating(newRating: Int32) = this._rating = newRating +``` + +We use `setRating`, but instead of special property syntax `set;` or a `@setter("rating")` annotation, +we define some slight syntactic sugar for methods starting with `set`: + +> `x.setY(z)` can be written as `x.y = z` + +This is very similar to the desugaring rules used for [indexing operations](stop-using-angle-brackets-for-generics#3-it-allows--to-be-abused-for-syntax-conveniences) +(`x.get(y)` can be written as `x(y)`, and `x.set(y, z)` can be written as `x(y) = z`). + +It is used like this: + +``` +let wine = Wine("Schatoh-la Fid", 96) +wine.rating // 96 +wine.rating = 97 /* same as `wine.setRating(97)` */ +``` + +To add additional checks when setting a new value (which is a popular use-case for property setters), +we explicitly define a `setRating` method: + +``` +class Wine(let name: String, var rating: Int32) + fun setRating(newRating: Int32) = + require(newRating >= 0 && newRating <= 100, s"rating must be between 0 and 100, but was $newRating") + this.rating = newRating +``` + +It is used like this: + +``` +let wine = Wine("Schatoh-la Fid", 96) +wine.rating // 96 +wine.rating = 97 +wine.rating = -1 /* not ok */ +``` + +But now we realize, that – to protect our new invariant – we also want to run this check on construction, +so we refactor: + +``` +class Wine(let name: String, var rating: Int32) + checkRating(rating) + + @override + fun setRating(newRating: Int32) = + checkRating(newRating) + this.rating = newRating + + fun checkRating(newRating: Int32) = + require(newRating >= 0 && newRating <= 100, s"rating must be between 0 and 100, but was $newRating") +``` + +At this point, the use of (property) setters becomes questionable, as more mutable members mean more +checks that we need to be called at all the right places. Instead, consider this: + +``` +struct Rating(let value: Int32) + require(value >= 0 && value <= 100, s"rating must be between 0 and 100, but was $value") + +class Wine(let name: String, var rating: Rating) + +let wine = Wine("Schatoh-la Fid", Rating(96)) +wine.rating // Rating(96) +wine.rating = Rating(97) +wine.rating = Rating(-1) /* not ok */ +``` + +This preserves the simplicity of the `Wine` class definition, and moves the verification of the rating +to its own type that makes it trivial to ensure all invariants are preserved. + +#### What about method references? + +Allowing method definitions/invocation without `()` poses the question of "how to handle method references?". + +There are three options: + +1. Type inference + + The meaning of `person.firstName` depends on the expected type, i. e. + ``` + fun foo(s: String) = ... + foo(person.firstName) + ``` + evaluates `person.firstName`, while + ``` + fun bar(f: () => String) = ... + bar(person.firstName) + ``` + passes a method reference to `bar`. + This approach likely requires picking one choice as a default if there is no expected type, + as well as type annotations if the type is ambiguous. + +2. Explicit lambda syntax + + Instead of dealing with type inference and ambiguity, `person.firstName` could be specified to always evaluate, + using a lambda for `bar`. + ``` + fun bar(f: () => String) = ... + bar(() => person.firstName) + ``` + +3. Reference syntax + + Specific syntax could be introduced to create references from methods: + ``` + fun bar(f: () => String) = ... + bar(person::firstName) + ``` + This approach is especially interesting if the language has other program elements for which a "reference" syntax + could also be beneficial and could replace special constructs like Java's `String.class` or C#'s `typeof(String)`. + +#### Coda + +With this design, we have accomplished more than languages with properties, +while also avoiding the complexity of having fields *and* methods *and* properties. diff --git a/_languages/generics.md b/_languages/generics.md index 7310e4d6..12213a67 100644 --- a/_languages/generics.md +++ b/_languages/generics.md @@ -1,23 +1,22 @@ --- title: "Language Design: Generics" -date: 2017-07-21 12:00:00 +0200 +date: 2017-07-21 +update: 2022-07-30 redirect_from: "/lessons-learned/generics" --- -Achieving a language design sweet-spot for the syntax of generics requires two, interconnected -design decisions: +Two interconnected design decisions achieve a particularly interesting sweet-spot in language design: 1. The `ident: Type` syntax allows consistent and straight-forward placement of generics, compared - to languages which use `Type ident`[^identtype]:
- Generics (`[T]`) always follow the name of a class or a method, both at the definition-site and at the use-site. -2. A differentiated use of brackets results in a more regular, easier - to understand syntax and has superior readability compared to languages which - overload `<>` to stand for generics as well as comparisons and bitshifts, - or use `[]` to stand for operations on arrays[^stop-generics]: - - `[]` encloses types: everything inbetween is either a type parameter or a type argument - - `()` groups: for instance a single expression, a parameter list or a tuple - - `{}` sequences: for instance a block that can contain multiple statements and definitions + to languages which use `Type ident`[^identtype]: + > Generics (`[T]`) always follow the name of a class or a method, both at the definition-site and at the use-site. +2. A clearly defined use of brackets results in a more regular, easier to understand syntax that has + superior readability compared to languages that use `<` and `>` for generics as well as for + comparisons and bitshifts, or use `[]` to stand for operations on arrays[^stop-generics]: + > `[]` _encloses_ type parameters or type arguments
+ > `()` _groups_ expressions, parameter/argument lists or tuples
+ > `{}` _sequences_ statements or definitions This means that generics do not need to be treated as an "advanced" language concept. @@ -30,14 +29,12 @@ Instead, the mental model becomes so simple that every class or method can be th
```scala -class Foo[T](let bar: String) { - fun foo[T] = ??? -} +class Foo[T](let bar: String) + fun foo[U] = ??? -fun main() { +fun main() let instance = Foo[String]("abc") - instance.foo[String] -} + instance.foo[Int64] ``` @@ -45,4 +42,4 @@ And that's all there is to it! [^identtype]: [Why is `ident: Type` better than `Type ident`?](type-annotations) -[^stop-generics]: [Stop Using <> for Generics](stop-using-angle-brackets-for-generics) +[^stop-generics]: [Stop Using `<>` for Generics](stop-using-angle-brackets-for-generics) diff --git a/_languages/lower-bar-of-rust-2.md b/_languages/lower-bar-of-rust-2.md new file mode 100644 index 00000000..bf6ba7f5 --- /dev/null +++ b/_languages/lower-bar-of-rust-2.md @@ -0,0 +1,98 @@ +--- +title: "The lower bar of a hypothetical Rust 2.0" +date: 2022-12-18 +update: 2024-01-22 +--- + +_**TL;DR:** Rust 2.0 is not going to happen, but here are fixes that would make it actually worthwhile._ + +A [recent article](https://www.ncameron.org/blog/rust-in-2023/) touching on "Rust 2.0" and its reactionary reception made me realize that language evolution has two boundaries, not one: + +
+

Boundary 1 (upper bar of change): Things a hypothetical language "v2.0" is not allowed to improve for compatibility reasons.

+

Boundary 2 (lower bar of change): Things that a hypothetical language "v2.0" needs to improve for such an effort to be worthwhile to contributors and users.

+
+ +For Rust, we know the exact coordinates of the first boundary, but very little about the second boundary +as such "critical" engagement is often poorly received in the Rust community and no reasonably complete overview exists.[^1][^2][^3] + +Nevertheless, it only needs a cursory look to conclude that "Rust 2.0" is very unlikely: +The lower bar is above the upper one, i. e. the necessary change is larger than the change that +may be considered acceptable in Rust. + +This doesn't mean that we can't explore the second boundary, and collect these "unacceptable fixes" +as a learning opportunity for future language designers: + +#### Drop struct initialization syntax + +There is little reason why invoking functions, initializing structs and enums and initializing tupled structs and enums have to follow different rules. + +Also, there is no point in having special syntax to initialize structs if everyone immediately defines `::new()` functions to avoid it. People voted with their feet, and language designers need to respect that. + +See [this article](rust-struct-initializer-mistake) for more details. + +#### Named parameters using `=` + +After dropping struct literal initialization there is no point in using `:` for value assignments, but lots of reasons against. + +This allows restoring the intuition that `=` is followed by a value and `:` is followed by a type, and that every value can receive a type ascription. + +#### Vararg parameters + +All language designers hate varargs, but handing out macros as a replacement is a substantially worse idea. + +Don't hand out rocket artillery to people wanting to cut their toenail. + +#### Drop range syntax + +It takes up way too much language footprint for very little actual benefit, and is a source of constant language expansion proposals. + +Also, fix the relationship of ranges with `Iterator`/`IntoIterator` and perhaps [a few other problems](https://ridiculousfish.com/blog/posts/least-favorite-rust-type.html). + +#### Drop array and slice syntax + +This frees up the `[]` bracket pair for more useful purposes. + +#### Make generics use `[]` instead of `<>`/`::<>` + +Turns out "trying to preserve the strangeness budget"[^4] can't fix a [broken design](stop-using-angle-brackets-for-generics). + +Pretending it's not broken doesn't help either, otherwise we would have seen results by now, +because various languages tried that approach really hard for a few decades already. + +#### Fold `Index` and `IndexMut` into `Fn` trait family + +Providing traits to let people decide how round they want their function call parentheses to be is +not a useful feature. + +#### Remove the hierarchy between `Eq`/`Ord` and `PartialEq`/`PartialOrd` traits + +This means that floating point values can correctly implement the total order operation as defined in the IEEE754 spec.[^5] + +#### Drop `::` + +The distinction between path navigation (`::`) and member access (`.`) is not important enough to bother users with it at every single occasion. + +Instead, let the IDE use some syntax coloring and be done with it. + +#### Drop `as` + +... or at least make it make sense: it should *either* do type conversions *or* value conversions, but not both. + +#### Drop `if-let` + +You know a feature is not well thought out if it has spawned 4 extensions proposals already. + +Instead, use the vastly superior [`is` design](unified-condition-expressions-comparison). + +#### Remove significance of semicola + +Varying the meaning of a piece of code based on the presence of a `;` at a specific line is bad user interface design. + +Remove it and implement automatic semicolon inference, such that IDEs can show them, but no user has to ever type them. + +[^1]: ["Does Rust have any design mistakes?"](https://old.reddit.com/r/rust/comments/wvynot/does_rust_have_any_design_mistakes/) +[^2]: [label:rust-2-breakage-wishlist](https://github.com/rust-lang/rust/issues?q=label%3Arust-2-breakage-wishlist) +[^3]: [Broken and un-fixable parts of Rust](https://rust-lang.zulipchat.com/#narrow/stream/213817-t-lang/topic/broken.20and.20un-fixable.20parts.20of.20Rust) +[^4]: [The language strangeness budget](https://steveklabnik.com/writing/the-language-strangeness-budget) +[^5]: "Aaaakchually, float do not have a total order!?" – Please read the IEEE754 spec. diff --git a/_languages/modern-minimal.md b/_languages/modern-minimal.md index 34ff3160..14b758f9 100644 --- a/_languages/modern-minimal.md +++ b/_languages/modern-minimal.md @@ -1,35 +1,33 @@ --- title: "Language Design: Modern and Minimal" -date: 2019-11-05 12:00:00 +0200 +date: 2019-11-05 +update: 2022-12-06 --- 1. A smaller language, not a bigger one - namespaces: types, terms, ~~packages~~, ~~fields~~, ~~methods~~, ~~labels~~ -- modifiers: ~~keywords~~, annotations +- modifiers: ~~keywords~~, [annotations](annotations-obsolete-modifiers) - nesting: ~~packages~~, modules, ~~static~~ -- members: fields, methods, ~~properties~~ -- control flow: if-then-else, return, while, ~~break~~, ~~continue~~, ~~loop~~, ~~exceptions~~, ~~throw~~, ~~catch~~ -- constructor~~s~~: only one +- members: [fields, methods, ~~properties~~](fields-methods-properties-pick-two) +- control flow: [if-then-else](unified-condition-expressions), return, while, ~~break~~, ~~continue~~, ~~loop~~, ~~exceptions~~, ~~throw~~, ~~catch~~ +- constructors: primary, ~~secondary~~ - literals: ~~octal number literals~~, ~~class literals~~, ... 2. Correctness -- separate types for identity, equality, ordering and comparing +- separate types for [identity/equality](equality-and-identity-part1), [ordering/comparing](comparing-and-sorting) 3. Fewer special-cases, not more -- operators: ~~unary operators~~ +- operators: [~~unary operators~~](unary-operators-are-unnecessary), [fewer binary operators](binary-operators-are-overused) - ~~magic methods on all types~~ - ~~collection literals~~, ~~array syntax~~ - ~~instance-of-syntax~~, ~~cast-syntax~~ -- types have consistent casing (uppercase) +- types have [consistent casing](against-mixed-cased-type-names) (uppercase) 4. Simplicity, not familiarity -- ~~generics with <>~~ +- [~~generics with <>~~](stop-using-angle-brackets-for-generics) - member definition syntax differs only by `fun`, `let` or `var` 5. Higher Standards - simple to specify - simple to implement - simple to understand - - - diff --git a/_languages/naming-conventions-conversion.md b/_languages/naming-conventions-conversion.md index bae61b1b..265817ca 100644 --- a/_languages/naming-conventions-conversion.md +++ b/_languages/naming-conventions-conversion.md @@ -1,21 +1,27 @@ --- -title: "Language Design: Naming Conventions – Part 1: Creation" -date: 2018-06-19 12:00:00 +0200 -redirect_from: "/languages/naming" +title: "Language Design: Naming Conventions – Part 2: Conversion" +date: 2018-06-20 +update: 2022-06-26 +page_previous_title: "Naming Conventions – Part 1: Creation" +page_previous_url: "naming-conventions-creation" +page_next_title: "Naming Conventions – Part 3: Options" +page_next_url: "naming-conventions-options" --- - - + + - + - + diff --git a/_languages/naming-conventions-creation.md b/_languages/naming-conventions-creation.md index 7e91b651..ec36d2e3 100644 --- a/_languages/naming-conventions-creation.md +++ b/_languages/naming-conventions-creation.md @@ -1,20 +1,26 @@ --- -title: "Language Design: Naming Conventions – Part 2: Conversion" -date: 2018-06-19 12:00:00 +0200 +title: "Language Design: Naming Conventions – Part 1: Creation" +date: 2018-06-19 +update: 2022-06-24 +redirect_from: "/languages/naming" +page_next_title: "Naming Conventions – Part 2: Conversion" +page_next_url: "naming-conventions-conversion" ---
NameExampleNameExample Explanation
to

array.toList

int32Value.toFloat64

dictionary.to[Queue]

array.toList +int32Value.toFloat64 +dictionary.to[Queue]
  • implies a (potentially lossy) conversion of a value
  • @@ -25,10 +31,15 @@ redirect_from: "/languages/naming"
as

int64Value.asFloat64

int64Value.as[Float64]

stringBuffer.asByteBuffer

map.asSetOfEntries

setOfEntries.asMap

int64Value.asFloat64 +int64Value.as[Float64] +stringBuffer.asByteBuffer +map.asSetOfEntries +setOfEntries.asMap
  • implies a verbatim reinterpretation/wrapping/viewing of a value
  • +
  • replacement for numeric "casts"
- - + + - + - - + + - - + + - - + + - - + +
NameExampleNameExample Explanation

List(1, 2, 3)

Array(12.3, 45.6)

Set("a", "b", "c")

List(1, 2, 3) +Array(12.3, 45.6) +Set("a", "b", "c")
  • primary way of construction
  • @@ -23,8 +29,8 @@ date: 2018-06-19 12:00:00 +0200
ofPerson.of(name, age)of(val1, ...)Person.of(name, age)
  • secondary way of construction
  • @@ -34,8 +40,9 @@ date: 2018-06-19 12:00:00 +0200
from

Person.from(personEntity)

Person.from(family)

from(val)Person.from(personEntity) +Person.from(family)
  • tertiary way of construction
  • @@ -46,8 +53,9 @@ date: 2018-06-19 12:00:00 +0200
parse

Person.parse(string)

Int64.parse(string)

parse(string)Person.parse(string) +Int64.parse(string)
  • quaternary way of construction
  • @@ -57,8 +65,9 @@ date: 2018-06-19 12:00:00 +0200
with

person.withAge(23)

person.with(age = 23)

with(val)person.withAge(23) +person.with(age = 23)
  • returns a copy of a value with parts replaced by the provided argument
  • diff --git a/_languages/naming-conventions-lookup.md b/_languages/naming-conventions-lookup.md index 8502609d..ed9af54a 100644 --- a/_languages/naming-conventions-lookup.md +++ b/_languages/naming-conventions-lookup.md @@ -1,30 +1,104 @@ --- -title: "Language Design: Naming Conventions – Part 3: Lookup" -date: 2022-06-07 12:00:00 +0200 +title: "Language Design: Naming Conventions – Part 4: Lookup" +date: 2022-06-07 +update: 2022-07-26 +page_previous_title: "Naming Conventions – Part 3: Options" +page_previous_url: "naming-conventions-options" +page_next_title: "Naming Conventions – Part 5: Streaming" +page_next_url: "naming-conventions-streaming" --- - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_languages/naming-conventions-options.md b/_languages/naming-conventions-options.md new file mode 100644 index 00000000..7a25f932 --- /dev/null +++ b/_languages/naming-conventions-options.md @@ -0,0 +1,73 @@ +--- +title: "Language Design: Naming Conventions – Part 3: Options" +date: 2024-07-05 +page_previous_title: "Naming Conventions – Part 2: Conversion" +page_previous_url: "naming-conventions-conversion" +page_next_title: "Naming Conventions – Part 4: Lookup" +page_next_url: "naming-conventions-lookup" +--- + +
    NameExampleNameExample Explanation
    get

    List(1, 2, 3)

    Array(12.3, 45.6)

    Set("a", "b", "c")

    List(12.3, 45.6)(0) +--> Some(12.3) +Map("key", "val")("key") +--> Some("val")
      +
    • retrieves the value at the given index/key
    atArray(12.3, 45.6).at(1)at(idx)Array(12.3, 45.6).at(1) +--> Some(Ref(arr, 1))
      +
    • returns a reference to the given position in the array
    • +
    +
    contains(val)List(1.0, -0.0, NaN).contains(0.0) +--> true +List(1.0, -0.0, NaN).contains(NaN) +--> true +Map("key", "val").contains("key") +--> true + +
      +
    • checks whether container contains a value, as determined by either equality (==) or identity (===)
    • +
    +
    includes(val)List(1.0, -0.0, NaN).includes(0.0) +--> true +List(1.0, -0.0, NaN).includes(NaN) +--> false +Map("key", "val").includes("key") +--> true +
      +
    • checks whether container includes a value, as determined by equality (==)
    • +
    +
    has(val)List(1.0, -0.0, NaN).has(0.0) +--> false +List(1.0, -0.0, NaN).has(NaN) +--> true +Map("key", "val").includes("key") +--> true +
      +
    • checks whether container has a value, as determined by identity (===)
    • +
    +
    findFirst(pred)List(3, 1, 2, 3).findFirst(_ < 3) +--> Some(1) +
      +
    • find first value in container that satisfies the provided predicate
    • +
    +
    findLast(pred)List(3, 1, 2, 3).findLast(_ < 3) +--> Some(2) +
      +
    • find last value in container that satisfies the provided predicate
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameExample
    orSome(1).or(Some(2)) +--> Some(1) +None.or(Some(2)) +--> Some(2)
    orElseSome(1).orElse(2) +--> Some(1) +None.orElse(2) +--> Some(2)
    orGetSome(1).orGet(() -> Some(2)) +--> Some(1) +None.orGet(() -> Some(2)) +--> Some(2) +None.orGet(() -> None) +--> None
    orElseGetSome(1).orElseGet(() -> 2) +--> Some(1) +None.orElseGet(() -> 2) +--> Some(2)
    orElsePanicSome(1).orElsePanic() +--> 1 +None.orElsePanic() +# program aborts
    orElsePanicWithSome(1).orElsePanicWith("expected some") +--> 1 +None.orElsePanicWith("expected some") +# program aborts with message "expected some"
    + +
    + +--- + +Naming scheme: + +- `...Else...` indicates going from `Option[T]` to `T` +- `...Get` indicates a closure argument +- all panicking methods contain `...Panic` diff --git a/_languages/naming-conventions-stream.md b/_languages/naming-conventions-stream.md deleted file mode 100644 index 7a594ee3..00000000 --- a/_languages/naming-conventions-stream.md +++ /dev/null @@ -1,263 +0,0 @@ ---- -title: "Language Design: Naming Conventions – Part 4: Stream" -date: 2022-06-07 12:00:00 +0200 ---- - -#### Mapping - - - - - - - - - - - - - - - - - - - - - -
    NameExampleExplanation
    map

    -
      -
    -
    flatMap
    mapAndFlatten
    -
      -
    -
    - -#### Filtering - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    NameExampleExplanation
    get(index) -
      -
    -
    first -
      -
    -
    last -
      -
    -
    take(amount) -
      -
    -
    takeWhile(predicate) -
      -
    -
    drop(amount) -
      -
    -
    dropWhile(predicate) -
      -
    -
    accept(predicate)
    select(predicate)
    -
      -
    -
    reject(predicate) -
      -
    -
    distinct(predicate) -
      -
    -
    - -#### Folding - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    NameExampleExplanation
    fold(fun, startValue) -
      -
    -
    reduce(fun) -
      -
    -
    combine[Monoid] -
      -
    -
    sum[Numeric]product[Numeric]average[Numeric] -
      -
    -

    all(predicate)

    forAll(predicate)

    -
      -
    • returns true if predicate returns true for all elements
    • -
    -

    any(predicate)

    forAny(predicate)

    -
      -
    • returns true if predicate returns true for any element
    • -
    -

    none(predicate)

    forNone(predicate)

    -
      -
    • returns true if predicate returns false for all elements
    • -
    -
    - -#### Injecting - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    NameExampleExplanation
    joinInner[Record] -
      -
    -
    joinLeft[Record] -
      -
    -
    joinRight[Record] -
      -
    -
    joinFull[Record] -
      -
    -
    groupBy(function) -
      -
    -
    partitionBy(function) -
      -
    -
    diff --git a/_languages/naming-conventions-streaming.md b/_languages/naming-conventions-streaming.md new file mode 100644 index 00000000..ac729cb6 --- /dev/null +++ b/_languages/naming-conventions-streaming.md @@ -0,0 +1,405 @@ +--- +title: "Language Design: Naming Conventions – Part 5: Streaming" +date: 2022-06-08 +update: 2024-12-27 +page_previous_title: "Naming Conventions – Part 4: Lookup" +page_previous_url: "naming-conventions-lookup" +--- + +#### Projections + + + + + + + + + + + + + + + + + + + + + +
    NameExampleExplanation
    map(fun)List(1, 2, 3).map(_ + 1) +--> List(1, 2, 3) +
      +
    • returns a stream in which fun is applied to each element
    • +
    +

    mapMany(fun)

    mapMulti

    flatMap

    mapFlat

    mapAndFlatten

    List(1, 2).mapMany(x -> List(x, x)) +--> List(1, 1, 2, 2) + +List(1, 2).mapMany(x -> Some(x)) +--> List(1, 2) +List(1, 2).mapMany(x -> None) +--> List() +
      +
    • returns a stream in which fun is applied to each element, producing a sequence of elements that is subsequently flattened
    • +
    +
    + +#### Filters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameExampleExplanation
    firstList(1, 2, 3).first +--> Option(1) +
      +
    • returns the first element of the stream
    • +
    +
    lastList(1, 2, 3).last +--> Option(3) +
      +
    • returns the last element of the stream
    • +
    +

    retainFirst(num)

    take

    keep

    pick

    List(1, 2, 3, 4).retainFirst(2) +--> List(1, 2) +
      +
    • returns a stream that produces the first num elements of the input stream
    • +
    +

    retain(pred)

    accept

    select

    filter

    List(1, 2, 3, 1).retain(_ < 2) +--> List(1, 2, 1) +
      +
    • returns a stream that produces only elements for which pred evaluates to true
    • +
    • filter is a poor name as it's unclear (especially for non-native speakers) whether "filtered elements" are those retained, or those "filtered out"
    • +
    • accept is not ideal, as the visitor pattern also makes use of this name
    • +
    • select is even less ideal, as SQL uses the name for a completely different purpose
    • +
    +

    retainIndex(pred)

    List("a", "b", "c").retainIndex(_ % 2 == 0) +--> List("a", "c") +
      +
    • returns a stream that produces only elements for which pred evaluates to true
    • +
    +

    retainWhile(pred)

    takeWhile

    keepWhile

    pickWhile

    List(1, 2, 3, 1).retainWhile(_ < 3) +--> List(1, 2) +
      +
    • returns a stream that produces elements of the input stream until the pred evaluates to false
    • +
    +

    retainUntil(pred)

    takeUntil

    keepUntil

    pickUntil

    List(4, 3, 2, 4).retainUntil(_ < 3) +--> List(4, 3) +
      +
    • returns a stream that produces elements of the input stream until the pred evaluates to true
    • +
    • redundant, equivalent to retainWhile(pred.not)
    • +
    +

    rejectFirst(num)

    skip

    drop

    List(1, 2, 3, 4).rejectFirst(1) +--> List(2, 3, 4) +
      +
    • returns a stream without the first num elements of the input stream
    • +
    +

    reject(pred)

    filterNot

    List(1, 2, 3, 1).reject(_ < 2) +--> List(3) +
      +
    • returns a stream that produces only elements for which pred evaluates to false
    • +
    • filterNot is a poor name as it's unclear (especially for non-native speakers) whether "filtered elements" are those retained, or those "filtered out"
    • +
    +

    rejectIndex(pred)

    List("a", "b", "c").rejectIndex(_ % 2 == 0) +--> List("b") +
      +
    • returns a stream that produces only elements for which pred evaluates to false
    • +
    +

    rejectWhile(pred)

    skipWhile

    dropWhile

    List(2, 3, 4, 1).rejectWhile(_ < 2) +--> List(3, 4, 1) +
      +
    • returns a stream that skips elements of the input stream until pred evaluates to false
    • +
    +

    rejectUntil(pred)

    skipUntil

    dropUntil

    List(3, 2, 1, 4).rejectUntil(_ < 2) +--> List(1, 4) +
      +
    • returns a stream that skips elements of the input stream until pred evaluates to true
    • +
    • redundant, equivalent to rejectWhile(pred.not)
    • +
    +
    distinctList(1, 2, 3, 1).distinct +--> List(1, 2, 3) +
      +
    • returns a stream that produces only the first occurrence of elements occurring multiple times
    • +
    +
    + +#### Folds + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameExampleExplanation
    fold(fun, start) +
      +
    +
    reduce(fun) +
      +
    +
    combine[Monoid] +
      +
    +
    sum[Numeric]List(1.0, 2.0, 3.0, 4.0).sum +--> 10.0 +
      +
    • computes the sum of the list of numbers
    • +
    +
    product[Numeric]List(1.0, 2.0, 3.0, 4.0).product +--> 24.0 +
      +
    • computes the product of the list of numbers
    • +
    +
    average[Numeric]List(1.0, 2.0, 3.0, 4.0).average +--> 2.5 +
      +
    • computes the average of the list of numbers
    • +
    +

    all(pred)

    forAll

    +
      +
    • returns true if pred returns true for all elements
    • +
    +

    any(pred)

    forAny

    +
      +
    • returns true if pred returns true for any element
    • +
    +

    none(pred)

    forNone

    +
      +
    • returns true if pred returns false for all elements
    • +
    +
    + +#### Injects + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameExampleExplanation
    joinInner[Record] +
      +
    +
    joinLeft[Record] +
      +
    +
    joinRight[Record] +
      +
    +
    joinFull[Record] +
      +
    +
    groupBy(fun) +
      +
    +
    partitionBy(fun) +
      +
    +
    + +#### Fan-Ins + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameExampleExplanation
    concat +
      +
    • returns a stream that produces the values from the first stream and then the values of the second stream
    • +
    +
    interleave +
      +
    • returns a stream that produces a value, alternating between the first and second stream
    • +
    • likely requires multiple method variants that handle streams of different lengths in different ways
    • +
    +
    zip +
      +
    • returns a stream that produces a tuple value, where the first element is from the first stream and the second element is from the second stream
    • +
    • likely requires multiple method variants that handle streams of different lengths in different ways
    • +
    +
    zipWithIndex +
      +
    • returns a stream that produces a tuple value, where the first element is from the stream and the second element is the index at which the value was produced
    • +
    +
    diff --git a/_languages/nondefinitional-enums.md b/_languages/nondefinitional-enums.md deleted file mode 100644 index 5bedea20..00000000 --- a/_languages/nondefinitional-enums.md +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: "Language Design: Nondefinitional Enums" -date: 2021-08-26 12:00:00 +0200 -redirect_from: "/languages/better-enums" ---- - -### Introduction - -The following piece of code shows a "classic" interpretation of an enum (ADT) definition as it exists in many languages ... - - enum Pet { Cat(name: String, lives: Int), Dog(name: String, age: Int) } - -... in which `Pet` (the enum itself), as well as `Cat` and `Dog` (enum members of `Pet`), are defined. - -Similarly, many languages have unions types, often written as ... - - type Number = Int | Float - -in which the union type `Number` is defined, with its members `Int` and `Float` referring to existing types. - -We observe that ADTs are generally tagged unions (meaning that individual members can be told apart, even if they contained the same values) come with wrappers (`Cat`, `Dog`) around their payloads, while untagged unions do not contain metadata to tell members apart, but also lack wrappers. - - - - - - - - - - - - - - - - - -
    Syntactic WrappingNo Syntactic Wrapping
    Runtime Taggingtagged union/ADT/enum?
    No Runtime Tagging?untagged union/union type
    - - - - -### How to fill the gaps? - -Let's call the tagged union without syntactic wrapping combination in the upper right quadrant a _nondefinitional_ enum. Consider this code ... - - enum Pet of Cat, Dog - class Cat(name: String, lives: Int) - class Dog(name: String, age: Int) - -... that defines the enum `Pet`, but refers to *existing types* `Cat` and `Dog`, instead of defining them too. - -Intuitively, this works similarly to `permits` clauses of [_sealed interfaces_ in Java](https://docs.oracle.com/en/java/javase/17/language/sealed-classes-and-interfaces.html) in the sense that - - sealed interface E permits A, B { ... } - -does not define `A` or `B`, but refers to existing `A` and `B` in scope.[^sealed] - -### Benefits of nondefinitional enums - -1. Enum variants have types, because they have a "real" class/struct/... declaration. (This fixes a mistake that some languages like Rust or Haskell made.) -2. Variants can be reference types or value types (because they have a "real" class/struct/... declaration). -3. No "stutter", where variant names have to be invented to wrap existing types- (Rust has this issue.) -4. enum values can be passed/created more easily, because there are fewer layers of wrapping. -5. Variants can be re-used in different enums. -6. The ability to build ad-hoc enums out of existing types obviates the need for a separate union type or type alias feature in the language. - ---- - -#### Example for 1., 2., 3. - -So while - - enum Option[T] { Some(value: T), None } - -would receive little benefit from being written as ... - - enum Option[T] of Some[T], None - struct Some[T](value: T) - module None - -... even trivial ADTs like a JSON tree would benefit. Instead of ... - - enum JsonValue { - JsonObject(Map[String, JsonValue]) - JsonArray (Array[JsonValue]), - JsonString(String), - JsonNumber(Float64), - JsonBool (Bool), - JsonNull, - ... - } - -... one would write (with `Array`, `Float64` and `String` being existing types in the language): - - enum JsonValue of - Map[String, JsonValue] - Array[JsonValue], - String, - Float64 - Bool, - JsonNull, - ... - - module JsonNull - -#### Example for 4. - -It would also do away with having to wrap data the enum's "variant" when passing arguments, as it's done with the "traditional" approach: - - fun someValue(value: JsonValue) = ... - someValue(JsonString("test")) // "traditional" approach - someValue("test") // could also be allowed - -#### Example for 5. - -Consider a class like - - class Name(name: String) - -With this approach we can use this `Name` type multiple times in different enums (and elsewhere): - - enum PersonIdentifier of - Name, - ... // other identifiers like TaxId, Description, PhoneNumber etc. - - enum DogTag of - Name, - ... // other identifiers like RegId, ... - ---- - -This approach reduces indirection at use-sites and increases the utility of enums compared to more "traditional" enums, -while not changing their runtime costs or representation. - -[^sealed]: Unlike sealed interfaces in Java though, `E` and `A`/`B` have no subtyping relationship in nondefinitional enums. diff --git a/_languages/package-objects.md b/_languages/package-objects-in-scala.md similarity index 88% rename from _languages/package-objects.md rename to _languages/package-objects-in-scala.md index 1189a346..7eec32c4 100644 --- a/_languages/package-objects.md +++ b/_languages/package-objects-in-scala.md @@ -1,7 +1,9 @@ --- -title: "Language Design: Package Objects" +title: "Language Design: Package Objects in Scala" date: 2017-12-20 00:00:00 -redirect_from: "/articles/language-design/package-objects" +redirect_from: + - "/articles/language-design/package-objects" + - "/languages/package-objects" --- Scala has the concept of @@ -17,7 +19,7 @@ be called with `foo.bar.qux()`. Package objects are useful, but the way they are defined is pretty weird, and one of the obscure, inconsistent and hard to explain decisions of the language: -![Impressum](/assets/img/package-objects-bad.png) +![package-objects-bad](/assets/img/package-objects-bad.png) - The package clause is `foo`, not `foo.bar`. - Given the package clause, the file package.scala is placed _outside_ of the @@ -53,7 +55,7 @@ objects is to ignore the confusing syntax completely. Instead, directly define them this way: -![Impressum](/assets/img/package-objects-good.png) +![package-objects-good](/assets/img/package-objects-good.png) Coincidentally, this is exactly the transformation the compiler already does when compiling package objects. It side-steps all the unnecessary language diff --git a/_languages/popular-but-wrong.md b/_languages/popular-but-wrong.md index 0a31cb2f..cc6c2409 100644 --- a/_languages/popular-but-wrong.md +++ b/_languages/popular-but-wrong.md @@ -1,6 +1,7 @@ --- title: "Language Design: Popular, but Wrong" -date: 2020-10-20 12:00:00 +0200 +date: 2020-10-20 +update: 2024-01-22 redirect_from: - "/languages/popular-wrong-decisions" - "/languages/popular-wrong-opinions" @@ -12,10 +13,11 @@ redirect_from: 4. `[]` for arrays ([see](stop-using-angle-brackets-for-generics)) 5. `Type ident` instead of `ident: Type` ([see](type-annotations)) 6. having if-then-else *and* switch/case *and* a ternary operator ([see](unified-condition-expressions)) - 7. separate namespaces for methods and fields - 8. having both modifiers and annotations ([see](annotations-obsolete-modifiers)) - 9. method overloading -10. namespace declarations doubling as imports -11. special syntax for casting -12. using cast syntax for things that are not casts -13. requiring `()` for methods without parameters + 7. having both modifiers and annotations ([see](annotations-obsolete-modifiers)) + 8. `async`/`await` + 9. separate namespaces for methods and fields +10. method overloading +11. namespace declarations doubling as imports +12. special syntax for casting +13. using cast syntax for things that are not casts +14. requiring `()` for methods without parameters diff --git a/_languages/result-naming.md b/_languages/result-naming.md new file mode 100644 index 00000000..a9506766 --- /dev/null +++ b/_languages/result-naming.md @@ -0,0 +1,32 @@ +--- +title: "Language Design: `Result` naming" +date: 2022-10-31 +draft: true +--- + +I was never too happy with the existing naming approaches for the Result type: + +#### Success/Failure + +- pro: both names have the same length, like Option's Some/None +- con: quite long, concern that people may use Option over Result + +#### Ok/Err + +- pro: short +- con: names don't have the same length, leading to inconsistent indentation when pattern matching +- con: Err is not a word + +--- + +The new naming ticks all the boxes: + +#### Pass/Fail + +- pro: both names have the same length +- pro: same length as Some/None +- pro: real words + +It's a minor thing, but it's nice to have found a good design even where it doesn't matter that much! + +The only concern I have is that I might find a type in a testing-related context where Pass/Fail would fit even better! diff --git a/_languages/rust-almost-rules.md b/_languages/rust-almost-rules.md new file mode 100644 index 00000000..bc563daf --- /dev/null +++ b/_languages/rust-almost-rules.md @@ -0,0 +1,46 @@ +--- +title: "Language Design: Rust's Almost-Rules" +date: 2024-08-05 +--- + +(Inspired by [Almost Rules](https://matklad.github.io/2022/07/10/almost-rules.html).) + +### Syntax + +##### `:` is followed by a type + +- except inside struct initializers, where it is followed by a value +- except function result types, which are preceded by `->` + +##### generics use `<>` + +- except in expression contexts, which uses `::<>` + +##### invocations use `()` + +- except where `{}` or `[]` is used, because "they convey important information" + - except for macro invocations, where `()`, `{}`, `[]` are equivalent and interchangeable + +##### `T {}` initializes a struct + +- except inside an `if`, where `{` starts a branch + +##### Rust has no varargs + +- except for `extern` functions +- except for macros + +##### patterns introduce bindings + +- except in macro pattern matching, where identifiers are matched verbatim + + +### Semantics + +##### types with a total order implement `Eq` and `Ord` + +- except `f64` and `f32`, which do not + +##### struct initializers use temporary lifetime extension +- except tuple structs + - except when using curly braces to initialize tuple structs diff --git a/_languages/rust-struct-initializer-mistake.md b/_languages/rust-struct-initializer-mistake.md new file mode 100644 index 00000000..110143f4 --- /dev/null +++ b/_languages/rust-struct-initializer-mistake.md @@ -0,0 +1,171 @@ +--- +title: "Language Design: Rust's Struct Initializer Syntax Was a Mistake" +date: 2023-09-20 +update: 2023-12-23 +--- + +Rust has distinct syntactic facilities for ... +- invoking functions +- initializing structs and enums +- initializing tupled structs and enums + +... that provide different affordances and features: + +- method arguments are positional and cannot be named +- initializer arguments are named, and what looks like positional arguments is a special shorthand notation +- initializer arguments for tuple structs and enums look like method arguments, but are initializer arguments without shorthand notation + + +The following example shows all three: + +```rust +struct User { + username: String, + email: String, + active: State, + sign_in_count: u64, +} + +struct State(bool); // tuple struct + +fn user(username: String, email: String) -> User { + User { + username, + email, + active: State(true), + sign_in_count: 0, + } +} + +fn main() { + user("Jane Example".into(), "jane@example.com".into()); +} +``` + +### The Problems + +Some of the issues caused by Rust implementing "most features, half the time": + +#### Diverging Code Styles and Best Practices + +The unwieldiness of struct initialization combined with the +[lack of named/default parameters](https://github.com/samsieber/rubber-duck/blob/master/REVIEW.md) has lead to diverging +code styles and best practices – like [constructor pattern](https://rust-unofficial.github.io/patterns/idioms/ctor.html), +[builders](https://www.greyblake.com/blog/builder-pattern-in-rust), +[option structs](https://xaeroxe.github.io/init-struct-pattern/), +[default hacks](https://github.com/rust-unofficial/patterns/discussions/237)) +– for dealing with issues like "this thing has grown and is taking way too many parameters now". + +Every future language improvement/addition will change the scale slightly, causing churn due to another technique +becoming the next "best practice". +(And most likely only apply to either struct/enum initializers *or* method calls, but not both.) + + +#### Needless Ambiguity + +Using `{` for struct initialization also means that something as trivial as `if foo {` appears to be syntactically ambiguous. + +(In Rust such code is always treated as a condition body, a struct initialization in this position would need to be enclosed in parentheses.) + + +#### Lack of Consistent Rules for Type Ascriptions + +Using `:` for struct initialization means that it's not possible to use `: Foo` as a type ascription. +Languages from the 70ies managed to get this right, Rust somehow regressed on that, failing to ship +[type ascriptions](https://rust-lang.github.io/rfcs/0803-type-ascription.html) and +[giving up on it after 8 years](https://rust-lang.github.io/rfcs/3307-de-rfc-type-ascription.html). + + +### A Solution + +What Rust should have done instead is to decide upon *one* ruleset that all those invocation follow, such as: + +- Function calls and struct/enum initializers use `()`, not a mix of `{}` and `()`. +- Use `=` for passing actual values as named arguments, such that everything "just works" if default parameters were added in the future. + (See the [Appendix](#appendix-a-detailed-look-at-the-role-of-) for an evaluation of available options.) +- Use `:` only for type ascriptions. +- Shorthand notation or positional arguments? Pick one. + +Adapting the example code from above, the code would look like this: + +```rust +struct User( + username: String, + email: String, + active: State, + sign_in_count: u64 = 0) // default parameter value + +struct State(active: bool) + +fn user(username: String, email: String) -> User { + User(username, email, active = State(true)) // named parameter +} + +fn main() { + user("Jane Example".into(), "jane@example.com".into()); +} +``` + +### Appendix: A Detailed Look at the Role of `=` + +#### How to distinguish between a variable assigment and a named parameter use inside a function invocation? + +```rust + fn someFunction(a: i64) { ... } + let mut a = 12; + someFunction(a = 23) // what does this mean? +``` + +Two points have to be considered here: + +1. Reducing chances of mix-ups: + - Frequency: Are variable assignments within function calls or function calls with named parameters projected to be used more often? + - Intuitivity: How can the syntax be distributed to those two use-cases such that the choice makes intuitively sense from a user point-of-view? +2. Reducing the harm from mix-ups: + - Can code change meaning unexpectedly, e. g. when function parameters are renamed? + +##### Option 1: Try to use the same syntax for both variable assigments and named parameters + +This means that named parameters simply act like another scope in which identifiers are looked up. + +The danger with this approach is that changing the name of a named parameter can silently change the meaning of callsites if a variable with the previously used parameter name happens to be in scope. + +```rust +// named parameter, but if someFunction's parameter name changes, +// without the callsite being updated, it silently becomes an +// assignment instead of a compilation failure: +someFunction(a = 23) +``` + +It also means that variables with the same name as a parameter name cannot be assigned within a function call. + +##### Option 2: Let variable assignments use the "good" syntax and give named parameters some "workaround" syntax + +In this example, the workaround syntax for named parameters is a `.`, prefixed to the parameter name. + +```rust +// variable assigment inside a function argument list, +// only works if assignment returns the assigned value +// (which is generally a bad idea): +someFunction(a = 23) + +// named parameter, and if someFunction's parameter name changes, +// without the callsite being updated, it becomes a compilation failure: +someFunction(.a = 23) +``` + +##### Option 3: Let named parameters use the "good" syntax and give variable assignments some "workaround" syntax + +Inside a functions argument list, the first level of `=` use is always a named parameter and never a +variable assignment, even if some variable `a` would be in scope. + +```rust +// named parameter, and if someFunction's parameter name changes, +// without the callsite being updated, it becomes a compilation failure: +someFunction(a = 23) + +// variable assigment inside a function argument list, +// only works if assignment returns the assigned value +// (which is generally a bad idea): +someFunction(a = { a = 23 }) +``` diff --git a/_languages/rusts-into-in-scala.md b/_languages/rusts-into-in-scala.md index 8ea4c1fe..67d73e8f 100644 --- a/_languages/rusts-into-in-scala.md +++ b/_languages/rusts-into-in-scala.md @@ -87,9 +87,11 @@ The only drawback is that, similar to Rust, the conversion method `into` needs t #### Java's Supplier -Perhaps amusingly, it's also possible to completely forgo the definition of our own `Into` type in favor of using Java 8's [`Supplier`](https://docs.oracle.com/javase/8/docs/api/java/util/function/Supplier.html) interface: +Perhaps amusingly, it's also possible to completely forgo the definition of our own `Into` type in favor of using Java 8's +[`Supplier`](https://docs.oracle.com/javase/8/docs/api/java/util/function/Supplier.html) interface +(adjusting the types in the approaches above from `Into` to `Supplier accordingly): - def strings(s: Supplier[String]) = s.into + def strings(s: Supplier[String]) = s.get ... and that's it! diff --git a/_languages/schema-as-source-code.md b/_languages/schema-as-source-code.md new file mode 100644 index 00000000..0bbd5576 --- /dev/null +++ b/_languages/schema-as-source-code.md @@ -0,0 +1,15 @@ +--- +title: "Language Design: Schemas as Source Code" +date: 2022-08-06 +--- + +TLD;DR: Your compiler should treat schema definitions as a valid (alternate) source syntax of your programming language. + +## What's the goal + +## Failed Alternatives + +- poorly integrated source generation +- compiler plugins +- macros/type providers +- annotating program texts diff --git a/_languages/stop-using-angle-brackets-for-generics.md b/_languages/stop-using-angle-brackets-for-generics.md index 28361765..3ffad61e 100644 --- a/_languages/stop-using-angle-brackets-for-generics.md +++ b/_languages/stop-using-angle-brackets-for-generics.md @@ -1,6 +1,7 @@ --- title: "Language Design: Stop Using `<>` for Generics" -date: 2020-04-04 12:00:00 +0200 +date: 2020-04-04 +update: 2022-07-30 redirect_from: "/languages/stop-using-for-generics" --- @@ -43,8 +44,10 @@ instance.foo(); _C#_ and _Kotlin_ tried to retain a more consistent syntax by introducing unlimited look-ahead: Their parser just keeps reading input after the `<` until it can make a decision.[^csharp] -For decades, _C++_ required adding spaces to nested closing generics to allow the compiler to -distinguish between the right-shift operator `>>` and the end of a nested generic type definition.[^cpp] +_C++_ suffers from a plethora of `<>`-related issues.[^cpp1] +The only issue addressed by the C++ committee after decades was the requirement to add spaces to +nested closing generics to allow the compiler to distinguish between the right-shift operator `>>` +and the end of a nested generic type definition.[^cpp2] All other issues appear to be unfixable. _Rust_ is forced to use the hideous "turbofish" operator `::<>` to distinguish between the left side of a comparison and the start of a generic type, introducing syntactic inconsistency between generics in @@ -56,29 +59,37 @@ let vec: Vec = Vec::::new(); /*or*/ >::new(); ``` -#### 3. It allows `[]` to be (ab)used for syntax "conveniences" +#### 3. It makes the uses of brackets confusing and inconsistent -Many languages used `[]` to add syntax for collection literals (`[1, 2, 3]`) or array lookup -(`array[0]`), adding pointless complexity to the language for very little benefit – as such built-in -syntax usually becomes dead weight a few years down the road, as the preferred choice -of data structure implementation evolves.[^javalit][^jslit] +Many legacy languages use `<` and `>` for comparisons, bit-shifts and generics, as well as both `()` and `[]` for function calls. -Using `[]` for generics instead of `<>` shuts down this possibility for good, and encourages the use -of standard method call brackets (`()`) for these use-cases instead:[^nim] +Instead, imagine a design where each bracket has a clearly-defined use ... + +> `[]` _encloses_ type parameters or type arguments
    +> `()` _groups_ expressions, parameter/argument lists or tuples
    +> `{}` _sequences_ statements or definitions
    + +... and `<`/`>` is only used as a comparison operator, and not misused as a makeshift bracket. + +This substantially simplifies the mental model beginners need to adopt before writing their first program +(_"`()` is for values, `[]` is for types"_), and encourages the elimination of syntactic special cases like collection literals ... + +``` +Array(1, 2, 3) /* instead of */ [ 1, 2, 3 ] +Set("a", "b", "c") /* instead of */ { "a", "b", "c" } +``` + +... and array indexing in favor of standard function call syntax[^nim]: ``` -Array.get(1, 2, 3) /* instead of */ [1, 2, 3] someList.get(0) /* instead of */ someList[0] array.set(0, 23.42) /* instead of */ array[0] = 23.42 map.set("name", "Joe") /* instead of */ map["name"] = "Joe" ``` -At this stage, some small amount of syntax sugar can be considered that would allow every type with -a `get` method to be written as `instance(arg)` and a `set` method written as `instance(index, arg)`, -leading to the following code:[^pythonscala] +A [small amount of syntax sugar](useful-syntax-sugar) can be considered, leading to the following code:[^pythonscala] ``` -Array(1, 2, 3) /* instead of */ [1, 2, 3] someList(0) /* instead of */ someList[0] array(0) = 23.42 /* instead of */ array[0] = 23.42 map("name") = "Joe" /* instead of */ map["name"] = "Joe" @@ -88,15 +99,19 @@ map("name") = "Joe" /* instead of */ map["name"] = "Joe" #### Coda -Thankfully, the number of languages using `[]` for generics seems to increase lately – with Scala, Python, Nim and Go joining Eiffel, which was pretty much the sole user of `[]` for decades. +Thankfully, the number of languages using `[]` for generics seems to increase lately – +with Scala, Python, and Nim joining Eiffel, which was pretty much the sole user of `[]` for decades. -It remains to be seen whether this turns into tidal change similar to the widespread [adoption of `ident: Type` over `Type ident`](https://soc.me/languages/type-annotations) in modern languages. +~~It remains to be seen whether this turns into tidal change similar to the widespread +[adoption of `ident: Type` over `Type ident`](https://soc.me/languages/type-annotations) in modern languages.~~ +_With the recent adoption of `[]` for generics by Go and Carbon this seems to be the likely outcome._ [^related]: [Parsing Ambiguity: Type Argument v. Less Than](https://keleshev.com/parsing-ambiguity-type-argument-v-less-than) is a similar article focusing on some of these issues in more depth. [^java]: Java: The syntax inconsistency is due to the difficulty a compiler would have to tell whether some token stream of `instance` `.` `foo` `<` is the left side of a comparison (with `<` being the "less-than" operator) or the start of a generic type argument within a method call. [^csharp]: C#: See [ECMA-334, 4th Edition, §9.2.3 – Grammar Ambiguities](https://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf) -[^cpp]: C++: See [Wikipedia – C++11 right angle bracket](https://en.wikipedia.org/wiki/C%2B%2B11#Right_angle_bracket) +[^cpp1]: C++: See [What are all the syntax problems introduced by the usage of angle brackets in C++ templates?](https://stackoverflow.com/questions/7304699/what-are-all-the-syntax-problems-introduced-by-the-usage-of-angle-brackets-in-c) +[^cpp2]: C++: See [Wikipedia – C++11 right angle bracket](https://en.wikipedia.org/wiki/C%2B%2B11#Right_angle_bracket) [^javalit]: Java pretty much abandoned arrays – they never integrated them with collections in 1.2, let alone generics in 1.5. [^jslit]: JavaScript stopped giving out new collection literals almost immediately after its first release – no collection type added since received its own literals (`Set`, `Map`, `ByteBuffer`, ...). [^nim]: Nim uses `[]` for generics, but employs [a hack to _also_ use `[]` for lookup](https://nim-lang.org/docs/manual.html#procedures-method-call-syntax). diff --git a/_languages/the-cost-of-everything.md b/_languages/the-cost-of-everything.md new file mode 100644 index 00000000..4bd84fd6 --- /dev/null +++ b/_languages/the-cost-of-everything.md @@ -0,0 +1,13 @@ +--- +title: "Language Design: The Cost of Everything" +date: 2022-07-07 13:00:00 +0200 +--- + +| Merit Points | Item | +|-------------:|-----------------------------------------------------------------------------------------| +| -100 | Adding a language feature to do something that can already be done | +| -90 | Adding a language feature to do something that can be implemented in a library | +| -80 | Adding a language feature to do something that can be achieved by fixing a compiler bug | +| -70 | Adding a new element to the global namespace | +| -60 | Adding a new element to a `util` namespace | +| -20 | Adding a new element to the standard library | diff --git a/_languages/threads-futures-async.md b/_languages/threads-futures-async.md index 4933241e..e9b670dc 100644 --- a/_languages/threads-futures-async.md +++ b/_languages/threads-futures-async.md @@ -1,4 +1,5 @@ --- title: "Language Design: Threads, Futures and Async" date: 2022-06-05 12:00:00 +0200 +published: false --- diff --git a/_languages/type-annotations.md b/_languages/type-annotations.md index d626da52..330fb9ca 100644 --- a/_languages/type-annotations.md +++ b/_languages/type-annotations.md @@ -1,6 +1,7 @@ --- title: "Language Design: Use `ident: Type`, not `Type ident`" -date: 2017-07-21 12:00:00 +0200 +date: 2017-07-21 +update: 2022-08-21 redirect_from: "/articles/language-design/type-annotations" --- @@ -10,8 +11,7 @@ those names carry higher importance. The `ident: Type` syntax lets developers focus on the name by placing it ahead of its type annotation. -This means that the vertical offset of names stays consistent, regardless of whether a type -annotation is present (and how long it is) or not[^type-inference]: +This means that the vertical offset of names stays consistent, regardless of a type annotation's presence or absence[^type-inference]: ```scala let x: String = "hello" @@ -63,7 +63,7 @@ the three properties mentioned above: **C#** -```csharp +```java T id(T x) { ... } ``` @@ -75,26 +75,26 @@ fun id(x: T): T { ... } **Ceylon** -```ceylon +```java T id(T x) { ... } ``` -**Scala** +**Core** ```scala -def id[T](x: T): T = ... +fun id[T](x: T): T = ... ``` Only the last approach delivers all three desirable properties: -| | Input before output | Definition/usage
    consistency | Definition before
    usage | +| | Input before output | Definition/usage consistency | Definition before usage | |--------------|:-------------------:|:----------------------------:|:-----------------------:| -| ***Java*** | No | No | Yes | -| ***C#*** | No | Yes | No | -| ***Kotlin*** | Yes | No | Yes | -| ***Ceylon*** | No | Yes | No | -| ***Scala*** | Yes | Yes | Yes | -{: style="width:100%"} +| ***Java*** | ❌ | ❌ | ✅ | +| ***C#*** | ❌ | ✅ | ❌ | +| ***Kotlin*** | ✅ | ❌ | ✅ | +| ***Ceylon*** | ❌ | ✅ | ❌ | +| ***Core*** | ✅ | ✅ | ✅ | +{: .table-medium .table-layout-auto} [^type-inference]: type inference means that the compiler can figure out types without having a developer writing them down explicitly [^curly]: focusing on curly-brace languages here, as languages like Haskell, ML and OCaml, Idris have slightly different design optima diff --git a/_languages/typing-terminology.md b/_languages/typing-terminology.md new file mode 100644 index 00000000..0a6fdc42 --- /dev/null +++ b/_languages/typing-terminology.md @@ -0,0 +1,54 @@ +--- +title: "Language Design: Typing Terminology" +date: 2022-08-06 +--- + +Most people think only in terms of the dichotomy between Nominal-Manifest-Static-Strong and Structural-Inferred-Dynamic-Weak in any given discussion of programming language type system design. And it is exhausting. + +Most individual distinction are a scale, not a strict yes/no checkbox. + +--- + +## Static ⟷ Dynamic (Typing Modality/Presence) + +It refers to what mode of the program it exists in, the analysis stage (where you get e.g. syntax errors too) or the execution stage. More Static: Haskell, CommonLisp, More Dynamic: SmallTalk, Scheme + +### Manifest ⟷ Inferred (Typing Apparency) + +It describes the degree to which types need to mentioned in the program text. + +More Manifest: Java, C, More Inferred: Python, Haskell. + +### Nominal ⟷ Structural (Typing Morphology) + +It pertains to how types are described and referred to and when they are judged equal. + +More Nominal: Rust, D, More Structural: Ruby, OCaml. + +→ Mention Java SAM types. + +### Reified ⟷ Erased (Typing Preservation) + +reification: runtime- vs. user-exposed? + +### Compile-time vs. Run-time Reflection? + +Not directly typing related, but typing preservation choices have a direct impact on what's possible. +(Maybe as sub-point of reified vs. erased?) + +## Untyped Languages + +### Tagged ⟷ Untagged + +--- + +→ cite Benjamin Pierce +→ cite Bob Harper + +--- + +Not really typing: + +## Strong ⟷ Weak (Typing Discipline/Value Convertibility) + +which I think should be called Typing discipline because it pertains to the number of type errors you get (compile time or runtime, doesn't matter.) Stronger: SML, Python, Weaker: JavaScript, C. diff --git a/_languages/unary-operators.md b/_languages/unary-operators-are-unnecessary.md similarity index 52% rename from _languages/unary-operators.md rename to _languages/unary-operators-are-unnecessary.md index 2d4d329b..ac71ce18 100644 --- a/_languages/unary-operators.md +++ b/_languages/unary-operators-are-unnecessary.md @@ -1,8 +1,12 @@ --- -title: "Language Design: Unary Operators" -date: 2019-09-21 12:00:00 +0200 +title: "Language Design: Unary Operators are Unnecessary" +date: 2019-09-21 +update: 2022-07-17 +redirect_from: "/languages/unary-operators" --- +_**TL;DR:** Use methods._ + Many languages provide unary operators, usually written as a prefix to the value they apply to. The most common unary operators are: @@ -13,23 +17,30 @@ The most common unary operators are: - `+`: useless (on numbers)
    Except for reasons of tradition and familiarity, their privileged position in many languages is unnecessary. -Considering they provide rather limited benefits – while adding complexity to the core language – -it is questionable whether unary operators are a good place to spend a language's complexity budget on. +They provide rather limited benefits – while adding complexity to the core language. + +Unary operators are a waste of a language's complexity budget. An alternative is to define methods on the respective types, dropping unary operators altogether: -- `not` replaces `!` on booleans -- `not` replaces `~` on numbers -- `negate` replaces `-` on numbers +- `not` replaces `!` on booleans: `someBool.not` instead of `!someBool` +- `not` replaces `~` on integers: `1.not` instead of `~1` +- `negate` replaces `-` on numbers: `1.negate` instead of `-1`
    This also elegantly solves the question whether -```scala +```ml let x = 1 -x.abs ``` -evaluates to `1` or `-1`, by requiring users to write `x.negate.abs` – thereby leaving no ambiguity to precedence. +should evaluate to `1` or `-1`, as + +```ml +x.negate.abs +``` + +is completely unambiguous. There are two additional benefits to the use methods instead of operators: @@ -43,5 +54,27 @@ There are two additional benefits to the use methods instead of operators: the same operation on the more common fixed-size types (`Int`, `Long`, ...) could benefit from returning an optional result to indicate that a negative value may lack a positive counterpart. +--- + +#### Appendix + +Incomplete list of languages and their interpretation of `-1.abs`: + +| | -1.abs | let x = 1; -x.abs | +|------------:|-------:|------------------:| +| C# | -1 | -1 | +| D | -1 | -1 | +| Dart | -1 | -1 | +| Fantom | -1 | -1 | +| Groovy | -1 | -1 | +| Kitten | 1 | n.a. | +| JavaScript | -1 | -1 | +| Nim | -1 | -1 | +| Raku | -1 | -1 | +| Ruby | 1 | -1 | +| Rust | -1 | -1 | +| Scala | 1 | -1 | +| Smalltalk | 1 | n.a. | +{: .table-medium .table-width-small} -[^1]: The Rust community had a similar [discussion](https://internals.rust-lang.org/t/the-is-not-empty-method-as-more-clearly-alternative-for-is-empty/) about this topic. \ No newline at end of file +[^1]: The Rust community had a similar [discussion](https://internals.rust-lang.org/t/the-is-not-empty-method-as-more-clearly-alternative-for-is-empty/) about this topic. diff --git a/_languages/unified-condition-expressions-comparison.md b/_languages/unified-condition-expressions-comparison.md new file mode 100644 index 00000000..3e5298a5 --- /dev/null +++ b/_languages/unified-condition-expressions-comparison.md @@ -0,0 +1,145 @@ +--- +title: "Language Design: Unified Condition Expressions – Comparison with Rust" +date: 2022-11-07 +page_previous_title: "Unified Condition Expressions – Exceptions" +page_previous_url: "unified-condition-expressions-exceptions" +--- + +##### simple if expression + +```ml +if x == 1.0 { "a" } +else { "z" } +``` + +This translates straight-forward to Rust: + +```rust +if x == 1.0 { "a" } +else { "z" } +``` + +##### multiple cases, equality relation + +```ml +if x +... == 1.0 { "a" } +... == 2.0 { "b" } +else { "z" } +``` + +In Rust, using `match` is idiomatic: + +```rust +match x { + 1.0 => "a", + 2.0 => "b", + _ => "z" +} +``` + +##### multiple cases, any other relation + +```ml +if x +... == 1.0 { "a" } +... != 2.0 { "b" } +else { "z" } +``` + +Rust requires the use `match` with guards (`match` on its own only supports equality relations), or an `if` expression: + +```rust +match x { + 1.0 => "a" if x == 1.0 { "a" } + x if x != 2.0 => "b" else if x != 2.0 { "b" } + _ => "z" else { "z" } +``` + +##### multiple cases, method calls + +```ml +if x +... .isInfinite { "a" } +... .isNaN { "b" } +else { "z" } +``` + +In Rust one would use `match` with guards, or an `if` expression: + +```rust +match x { + x if x.is_infinite() => "a" if x.is_infinite() { "a" } + x if x.is_nan() => "b" else if x.is_nan() { "b" } + _ => "z" else { "z" } +} +``` + +##### "if-let", statement[^rust-if-let][^swift-if-let] + +```ml +if opt_number is Some(i) { /* use `i` */ } +``` + +Rust requires a special construct to pattern match or introduce bindings: + +```rust +if let Some(i) = opt_number { /* use `i` */ } +``` + +##### "if-let", expression[^rust-if-let][^swift-if-let] + +```ml +let result = if opt_number + is Some(i) { i } + else { 0 } +``` + +Rust uses the `let-equals-if-let-equals` pattern: + +```rust +let result = if let Some(i) = opt_number { + i +} else { + 0 +} +``` + +##### "if-let" chains[^rust-if-let-chains] + +```ml +let result = if opt_number.contains(1.0) { 1.0 } else { 0 } +``` + +Rust proposes the `if-let` chains syntax: + +```rust +let result = if let Some(i) && i == 1.0 = opt_number { + i +} else { + 0 +} +``` + +##### "let-else"[^rust-let-else][^swift-guard-let] + +```ml +let i = if opt_number + is Some(i) { i } + else { return 0 } +``` + +Rust's `let-else` allows binding a fallible pattern without introducing nesting: + +```rust +let Some(i) = opt_number else { + return 0; +}; +``` + + +[^rust-if-let]: Rust `if-let` – https://doc.rust-lang.org/book/second-edition/ch06-03-if-let.html +[^swift-if-let]: Swift `if-let` – https://developer.apple.com/library/content/documentation/Swift/Conceptual/Swift_Programming_Language/OptionalChaining.html +[^rust-if-let-chains]: Rust `if-let` chains – https://github.com/rust-lang/rust/issues/53667 +[^rust-let-else]: Rust `let-else` – https://blog.rust-lang.org/2022/11/03/Rust-1.65.0.html#let-else-statements +[^swift-guard-let]: Swift `guard-let` – https://docs.swift.org/swift-book/LanguageGuide/ErrorHandling.html diff --git a/_languages/unified-condition-expressions-exceptions.md b/_languages/unified-condition-expressions-exceptions.md index 11eaabb8..3a1b2df3 100644 --- a/_languages/unified-condition-expressions-exceptions.md +++ b/_languages/unified-condition-expressions-exceptions.md @@ -1,7 +1,12 @@ --- title: "Language Design: Unified Condition Expressions – Exceptions" -date: 2018-04-28 12:00:00 +0200 +date: 2018-04-28 +update: 2022-06-24 redirect_from: "/languages/unified-condition-syntax-advanced" +page_previous_title: "Unified Condition Expressions – Implementation" +page_previous_url: "unified-condition-expressions-implementation" +page_next_title: "Unified Condition Expressions – Comparison with Rust" +page_next_url: "unified-condition-expressions-comparison" --- A reasonable question that might be asked is whether this design can be extended to also handle thrown exceptions, @@ -25,4 +30,3 @@ depending on the expressiveness of the core language. Considering the costs and the complexity involved, it may be a better approach to simply drop exceptions from the design of the language and do without this additional layer of control flow. - \ No newline at end of file diff --git a/_languages/unified-condition-expressions-implementation.md b/_languages/unified-condition-expressions-implementation.md index f1cf8a19..5feb57e2 100644 --- a/_languages/unified-condition-expressions-implementation.md +++ b/_languages/unified-condition-expressions-implementation.md @@ -1,7 +1,12 @@ --- title: "Language Design: Unified Condition Expressions – Implementation" -date: 2019-09-21 12:00:00 +0200 +date: 2019-09-21 +update: 2024-04-01 redirect_from: "/languages/unified-condition-expressions-parsing" +page_previous_title: "Unified Condition Expressions – Introduction" +page_previous_url: "unified-condition-expressions" +page_next_title: "Unified Condition Expressions – Exceptions" +page_next_url: "unified-condition-expressions-exceptions" --- #### How to Parse? @@ -11,17 +16,17 @@ it makes sense to build a feature-reduced version of unified condition expressio using a different keyword, in parallel to existing syntax. After unified condition expressions have gained sufficient maturity and functionality, -they can then be switched over to the "real" keyword, and any old implementations of -ternary operators, switch-cases or if-expressions can be removed. +they can then be switched over to the "real" keyword, old implementations of ternary operators, +switch-cases or if-expressions can be removed and their uses migrated to unified condition expressions. ##### Level 1: Basics ```ml -case // separate keyword - person .. // `..` to indicate end of common condition fragment - == john { true } - == jane { true } +if person + // `...` to indicate start of individual condition fragment + ... == john { true } + ... == jane { true } else false ``` @@ -33,65 +38,90 @@ fragment with each individual branch has to be taken into account, but the common fragment has to be retained until code-generation. -##### Level 2: Partial Conditions +##### Level 2: Pattern Matching + +The core insight is that pattern matching occurs either always (`switch`&`case`, `match`&`case`) or never +(`if`&`then`&`else`, `?`&`:`) with "legacy" approaches. + +With unified condition expressions, this choice can be made for each branch individually, using the `is` keyword: ```ml -case person == .. // partial common condition fragment - john { true } - jane { true } +if person + ... is Person("john", _, 42) { true } // paternn match + ... .age > 23 { false } // no pattern match else false ``` -At level 2, the notion of the condition's common fragment is made more flexible: -Now the common fragment can be partial; i. e. the common fragment may not be a valid -expression on its own. +##### Level 3: Bindings -The challenge here is how such code can be expressed best in the AST. +The main design task is picking a convention/rule that decides whether an identifier inside a pattern match introduces a +new binding with that name, or refers to an existing binding of that name in scope. +Possible design options include ... -##### Level 3: Partial Branches +1. ... using a keyword or symbol (for instance `let` or `@`) to introduce bindings in patterns: -```ml -case person .. - .firstName == "john" { true } - .age + 23 > jane.age { true } -else false -``` + ```ml + let age = 43 + if person + // refers to the `age` binding defined earlier + ... is Person("john", "miller", age) { age.toString } + // `let` introduces a new binding for jane's last name + ... is Person("jane", let lastName, 23) { lastName } + else false + ``` -At this stage the focus is on checking and ensuring that the syntax introduced -at level 1 is supporting the whole language, and is not special-cased, e. g. -to binary comparison operators. +2. ... using a keyword or symbol (for instance `$`) to reference existing bindings in scope: -Level 3 requires introducing indentation-based syntax. -Depending on how complex the rest of the language is, this can be a rather big leap. + ```ml + let age = 43 + if person + // `$` refers to the `age` binding defined earlier + ... is Person("john", "miller", $age) { age.toString } + // introduces a new binding for jane's last name + ... is Person("jane", lastName, 23) { lastName } + else false + ``` +3. ... using casing rules to distinguish bindings from references: -##### Level 4: Pattern Matching + ```ml + let Age = 43 + if person + // uppercase refers to the `Age` binding defined earlier + ... is Person("john", "miller", Age) { age.toString } + // lowercase introduce a new binding for jane's last name + ... is Person("jane", lastName, 23) { lastName } + else false + ``` -```ml -case person is .. - Person("john", _, 42) { true } - Person("jane", "smith", _) { true } -else false -``` +##### Optional: Partial Conditions +The notion of the condition's common fragment can be made more flexible: -##### Level 5: Bindings +The common fragment can be partial; i. e. the common fragment may not be a valid expression on its own: ```ml -case person is .. - Person("john", "miller", $age) { age.toString } // introduce binding for john's age - Person("jane", $lastName, 23) { lastName } // ... and jane's last name +if person == // partial common condition fragment + ... john { true } + ... jane { true } else false ``` +The challenge here is how such code can be expressed best in the AST. + + +##### Optional: Indentation-based syntax + +Introducing an indentation-based syntax allows dropping `...` from the unified condition syntax +without introducing problems in other places. -##### (Optional) Unified condition expressions: Indentation +Similarly, `{}` could be replaced with `then`. ```ml -case person == // no `..` needed to indicate end of common condition fragment - john then true // {} has been replaced with then +if person == // no `...` needed to indicate end of common condition fragment + john then true // optional: replace `{}` with `then` jane then true else false ``` diff --git a/_languages/unified-condition-expressions.md b/_languages/unified-condition-expressions.md index 01f4fdfc..5f726073 100644 --- a/_languages/unified-condition-expressions.md +++ b/_languages/unified-condition-expressions.md @@ -1,7 +1,10 @@ --- title: "Language Design: Unified Condition Expressions – Introduction" -date: 2018-01-21 12:00:00 +0200 +date: 2018-01-21 +update: 2024-04-01 redirect_from: "/languages/unified-condition-syntax" +page_next_title: "Unified Condition Expressions – Implementation" +page_next_url: "unified-condition-expressions-implementation" --- #### Idea @@ -16,17 +19,16 @@ with a single, unified condition expression that scales from simple one-liners t #### Motivation -The intention is to cut the different syntax options down to a single one that is still easily recognizable by users, -not to minimize keywords (i. e. `a == b ? c : d`) or turn conditions into methods (like Smalltalk). +- Cut the different syntax options down to a single one that is still easily recognizable by users. +- Make this design scale seamlessly from simple cases to complicated ones. -#### Principles +
    Minimizing the number of keywords or turning condition syntax into method calls (like Smalltalk) are non-goals. -- The condition can be split between a common _discriminator_ and individual cases. - - This requires doing away with mandatory parentheses around the conditions. - - This strongly suggests using a keyword (`then`) to introduce branches, instead of using curly braces, - based on readability considerations. -- The keyword `if` is chosen over other options like `match`, `when`, `switch` or `case` - because it is keyword the largest number of developers are familiar with. +#### Considerations + +- The condition can be split between a common _discriminator_ and individual cases.
    + This requires doing away with mandatory parentheses around conditions. +- `if` has been chosen in code examples as the primary keyword, other reasonable keyword choices are `match`, `when`, `switch` or `case`. #### Examples @@ -44,10 +46,10 @@ else "z" ##### one comparison operator on multiple targets ```ml -if x == if x /* same as */ - 1.0 then "a" == 1.0 then "a" if x == 1.0 then "a" - 2.0 then "b" == 2.0 then "b" else if x == 2.0 then "b" - else "z" else "z" else "z" +if x == /* same as */ if x /* same as */ + 1.0 then "a" == 1.0 then "a" if x == 1.0 then "a" + 2.0 then "b" == 2.0 then "b" else if x == 2.0 then "b" + else "z" else "z" else "z" ``` ##### different comparison operators, equality and identity @@ -66,26 +68,26 @@ if xs /* same as */ else "z" else "z" ``` -##### pattern matching (`is`), introducing bindings (`$`) +##### pattern matching (`is`), introducing bindings, flow typing ```ml if alice - .age < 18 then "18" - is Person("Alice", $age) then "$age" - is Person("Bob", _)$person then "{$person.age}" - else "0" + .age < 18 then "18" + is Person("Alice", _) then "{$person.age}" + is Person("Bob", let age) then "$age" + else "0" ``` ##### pattern matching using "if-let"[^rust][^swift] ```ml -if person is Person("Alice", $age) then "$age" else "o" +if person is Person("Alice", let age) then "$age" else "o" ``` ##### wildcards (`_`) and pattern guards ```ml -if person /* same as */ if person is - is Person("Alice", _) then "alice" Person("Alice", _) then "alice" - is Person(_, $age) && age >= 18 then "adult" Person(_, $age) && age >= 18 then "adult" - else "minor" else "minor" +if person /* same as */ if person is + is Person("Alice", _) then "alice" Person("Alice", _) then "alice" + is Person(_, let age) && age >= 18 then "adult" Person(_, let age) && age >= 18 then "adult" + else "minor" else "minor" ``` #### Related Work diff --git a/_languages/unions.md b/_languages/unions.md new file mode 100644 index 00000000..5c834e3c --- /dev/null +++ b/_languages/unions.md @@ -0,0 +1,164 @@ +--- +title: "Language Design: Unions" +date: 2021-08-26 +update: 2022-11-26 +redirect_from: "/languages/better-enums" +redirect_from: "/languages/nondefinitional-enums" +--- + +_**TL;DR:** Tagged unions whose variants do not require syntactic wrappers._ + +### Introduction + +A "traditional" enum (ADT) definition as it exists in various languages defines both the enum itself +(`Pet`), as well as its variants (`Cat` and `Dog`): + + enum Pet { + Cat(name: String, lives: Int), + Dog(name: String, age: Int) + } + let pet: Pet = Cat("Molly", 9) + +Some languages like Rust, C or C++ provide untagged unions, where the chosen variant has to be specified on creation and access: + + union Pet { + cat: Cat, + dog: Dog + } + let pet = Pet { cat: Cat("Molly", 9) } + +Other languages provide untagged union types where the union type itself (`Pet`) is defined, +and its variants (`Cat` and `Dog`) refer to existing types in scope that may or may not allow detecting the chosen variant[^untagged-unions]: + + type Pet = Cat | Dog + let pet: Pet = Cat("Molly", 9) + +#### Observation + +- ADTs are generally tagged unions (their variants can be told apart, even if they contain the same values) +and come with wrappers (`Cat`, `Dog`) around their payloads. +- Untagged unions do not contain metadata (runtime tags) to distinguish variants, but require that every access is qualified with variant information. +- Union types do not contain metadata (runtime tags) to distinguish variants and do not use syntactic wrappers. + + + + + + + + + + + + + + + + + +
    Syntactic WrappingNo Syntactic Wrapping
    Runtime Taggingtagged union/ADT/enum?
    No Runtime Tagginguntagged union (Rust, C, C++)union type
    + + + + +### Filling in the upper right quadrant + +Let's think about the combination of tagged union without syntactic wrapping in the upper right quadrant: + + class Cat(name: String, lives: Int) + class Dog(name: String, age: Int) + union Pet of Cat, Dog + + let pet: Pet = Cat("Molly", 9) + +This defines the union `Pet`, refers to *existing types* `Cat` and `Dog`, +and assigns an instance of `Cat` to a binding `pet` of type `Pet` without syntactic wrapping. + +Intuitively, this works similarly to `permits` clauses of [_sealed interfaces_ in Java](https://docs.oracle.com/en/java/javase/17/language/sealed-classes-and-interfaces.html) in the sense that + + sealed interface Pet permits Cat, Dog { ... } + +does not define `Cat` or `Dog`, but refers to existing `Cat` and `Dog` types in scope.[^sealed] + +### Benefits of such unions + +1. Union variants have types, because they have a "real" class/struct/... declaration.
    + (This fixes a mistake that some languages like Rust or Haskell made with their enums/ADTs.[^enum-variants-1][^enum-variants-2]) +2. Variants can be reference types or value types (as they refer to "real" `class` or `value` definitions). +3. No "stutter", where variant names have to be invented to wrap existing types. (Rust has this issue.) +4. Union values can be passed/created more easily, as no syntactic wrapping is required. +5. Variants can be re-used in different unions. +6. The ability to build ad-hoc unions out of existing types obviates the need for a separate type alias feature. + +--- + +#### Example for 1., 2., 3. + + enum Option[T] { Some(value: T), None } + +... would receive little benefit from being written as ... + + union Option[T] of Some[T], None + value Some[T](value: T) + module None + +..., but even trivial ADTs like a JSON representation would benefit. + +Instead of ... + + enum JsonValue { + JsonObject(Map[String, JsonValue]) + JsonArray (Array[JsonValue]), + JsonString(String), + JsonNumber(Float64), + JsonBool (Bool), + JsonNull, + ... + } + +... one would write (with `Array`, `Float64` and `String` being existing types in the language): + + union JsonValue of + Map[String, JsonValue] + Array[JsonValue], + String, + Float64 + Bool, + JsonNull, + ... + + module JsonNull + +#### Example for 4. + +No wrapping required when passing arguments (unlike "traditional" enum approaches): + + fun someValue(value: JsonValue) = ... + someValue(JsonString("test")) // "traditional" approach + someValue("test") // with non-definitional unions + +#### Example for 5. + +Consider this class definition: + + class Name(name: String) + +With non-definitional unions, `Name` can be used multiple times – in different unions (and elsewhere): + + union PersonIdentifier of + Name, + ... // other identifiers like TaxId, Description, PhoneNumber etc. + + union DogTag of + Name, + ... // other identifiers like RegId, ... + +--- + +Non-definitional unions reduce indirection at use-sites and can be used in more scenarios (compared to more "traditional" enums), +while not changing their runtime costs or representation. + +[^untagged-unions]: `type Num = Int | Int` does not allow detecting whether an `Int` instance is the first or the second variant; the definition is equivalent to `type Num = Int` +[^sealed]: Unlike sealed interfaces in Java though, `Cat` and `Dog` are not subtypes of `Pet` in non-definitional unions. +[^enum-variants-1]: [Types for enum variants](https://github.com/rust-lang/rfcs/pull/1450) +[^enum-variants-2]: [Enum variant types](https://github.com/rust-lang/rfcs/pull/2593) diff --git a/_languages/useful-syntax-sugar.md b/_languages/useful-syntax-sugar.md new file mode 100644 index 00000000..260f1db4 --- /dev/null +++ b/_languages/useful-syntax-sugar.md @@ -0,0 +1,153 @@ +--- +title: "Language Design: Useful Syntax Sugar" +date: 2022-07-10 +--- + +#### `get` sugar + +##### Rule + +> `x.get(y)` can be written as `x(y)` + +##### Explanation + +Instead of special-purpose syntax that is used for indexing operations (reading) in many languages, like + +```java +int firstValue = someArray[0]; +``` + +one can write + +``` +let firstValue = someArray(0) +/* same as */ +let firstValue = someArray.get(0) +``` + +assuming a definition like + +``` +class Array[T] + fun get(idx: Int64): T = ... +``` + +--- + +In combination with varargs, it can also replace special-purpose syntax used to construct various data structures. + +Instead of e. g. + +```java +int[] someArray = int[] { 1, 2, 3 }; +``` + +one can write + +``` +let someArray = Array(1, 2, 3) +/* same as */ +let someArray = Array.get(1, 2, 3) +``` + +assuming a definition like + +``` +module Array + fun get[T](vals: T*): Array[T] = ... +``` + +---- + +Of course `Array` is just one example; this rule applies to other data structures and use-cases equally: + +``` +let countriesAndCapitals = + Map("France" -> "Paris", "Germany" -> "Berlin", ...) +countriesAndCapitals("France") // "Paris" + +let baroqueComposers = Set("Bach", "Händel", "Vivaldi", ...) +baroqueComposers("Rammstein") // false +``` + +#### `set` sugar + +##### Rule + +> `x.set(y, z)` can be written as `x(y) = z` + +##### Explanation + +Instead of special-purpose syntax that is used for indexing operations (writing) in many languages, like + +```java +someArray[0] = 23; +``` + +one can write + +``` +someArray(0) = 23 +/* same as */ +someArray.set(0, 23) +``` + +assuming a definition like + +``` +class Array[T] + fun set(idx: Int64, val: T): Unit = ... +``` + +---- + +Of course `Array` is just one example; this rule applies to other data structures and use-cases equally: + +``` +let countriesAndCapitals = + Map("France" -> "Paris", "Germany" -> "Berlin", ...) +countriesAndCapitals("England") = "London" // new entry added + +let baroqueComposers = Set("Bach", "Händel", "Vivaldi", ...) +baroqueComposers("Monteverdi") = true // new entry added +``` + +#### `set...` sugar + +##### Rule + +> `x.setY(z)` can be written as `x.y = z` + +##### Explanation + +Instead of special-purpose syntax for properties and their setters, like + +```c# +struct Rating { + int value { + get { return value; } + set { + if (value < 0 || value > 100) + throw new ArgumentOutOfRangeException(); + this.value = value; + } + } +} + +someRating.value = 97; +``` + +one can keep writing + +``` +someRating.value = 97 +``` + +assuming a definition like + +``` +struct Rating(var value: Int32) + fun setValue(val: Int32) = ... +``` + +but does not have to pay the complexity cost of adding properties to the language. diff --git a/_languages/winding-down-rust-feature-development.md b/_languages/winding-down-rust-feature-development.md new file mode 100644 index 00000000..4acd9c65 --- /dev/null +++ b/_languages/winding-down-rust-feature-development.md @@ -0,0 +1,18 @@ +--- +title: "Time to wind down Rust feature development" +date: 2022-12-16 +--- + +_**TL;DR:** Regardless on where you stand on the "Rust 2.0", Rust's current approach to language evolution is not sustainable._ + +Whenever a language considers adding a feature, the cost of having to remove the feature (for any reasons) +should be factored in from the start. + +In Rust case, where fixing pretty much any anything after release is close to impossible – +that cost function goes toward infinity. + +Looking at the last few years of feature additions I have issues coming up with a feature whose +benefits are larger than its costs. + +- async/await: too early to tell whether it was actually worth it +- if let: growing extensions proposals at an impressive rate diff --git a/_layouts/post.html b/_layouts/post.html index 0c2a990e..fe44c59b 100644 --- a/_layouts/post.html +++ b/_layouts/post.html @@ -11,7 +11,7 @@

    {{ title_parts[0] }}