From 53af27d99e8b9d615981e5a8f61ee63557fe77f6 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 30 Nov 2025 19:30:35 +0100 Subject: [PATCH 01/12] Support quote delimited strings --- src/ast/value.rs | 10 +++ src/parser/mod.rs | 10 +++ src/tokenizer.rs | 97 +++++++++++++++++++++- tests/sqlparser_oracle.rs | 165 +++++++++++++++++++++++++++++++++++++- 4 files changed, 279 insertions(+), 3 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index fdfa6a674..f4d05c31f 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -167,6 +167,12 @@ pub enum Value { TripleDoubleQuotedRawStringLiteral(String), /// N'string value' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + QuoteDelimitedStringLiteral(char, String, char), + /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + NationalQuoteDelimitedStringLiteral(char, String, char), /// X'hex value' HexStringLiteral(String), @@ -205,6 +211,8 @@ impl Value { | Value::EscapedStringLiteral(s) | Value::UnicodeStringLiteral(s) | Value::NationalStringLiteral(s) + | Value::QuoteDelimitedStringLiteral(_, s, _) + | Value::NationalQuoteDelimitedStringLiteral(_, s, _) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), _ => None, @@ -242,6 +250,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), + Value::QuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "Q'{q1}{s}{q2}'"), + Value::NationalQuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "NQ'{q1}{s}{q2}'"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3ba4ba571..a89589f0d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_, _, _) + | Token::NationalQuoteDelimitedStringLiteral(_, _, _) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_, _, _) + | Token::NationalQuoteDelimitedStringLiteral(_, _, _) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( "either filler, WITH, or WITHOUT in LISTAGG", @@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } + Token::QuoteDelimitedStringLiteral(q1, s, q2) => { + ok_value(Value::QuoteDelimitedStringLiteral(q1, s, q2)) + } + Token::NationalQuoteDelimitedStringLiteral(q1, s, q2) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(q1, s, q2)) + } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1f..fe5002b7a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,10 +29,10 @@ use alloc::{ vec, vec::Vec, }; -use core::iter::Peekable; use core::num::NonZeroU8; use core::str::Chars; use core::{cmp, fmt}; +use core::{iter::Peekable, str}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -40,11 +40,11 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::dialect::Dialect; use crate::dialect::{ BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect, SnowflakeDialect, }; +use crate::dialect::{Dialect, OracleDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; use crate::{ast::DollarQuotedString, dialect::HiveDialect}; @@ -98,6 +98,12 @@ pub enum Token { TripleDoubleQuotedRawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + QuoteDelimitedStringLiteral(char, String, char), + /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + NationalQuoteDelimitedStringLiteral(char, String, char), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -292,6 +298,10 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), + Token::QuoteDelimitedStringLiteral(q1, ref s, q2) => write!(f, "Q'{q1}{s}{q2}'"), + Token::NationalQuoteDelimitedStringLiteral(q1, ref s, q2) => { + write!(f, "NQ'{q1}{s}{q2}'") + } Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -1032,6 +1042,16 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } + Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) => + { + chars.next(); // consume and check the next char + self.tokenize_word_or_quote_delimited_string( + chars, + &[n, q], + Token::NationalQuoteDelimitedStringLiteral, + ) + .map(Some) + } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); @@ -1039,6 +1059,15 @@ impl<'a> Tokenizer<'a> { } } } + q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => { + chars.next(); // consume and check the next char + self.tokenize_word_or_quote_delimited_string( + chars, + &[q], + Token::QuoteDelimitedStringLiteral, + ) + .map(Some) + } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); @@ -1994,6 +2023,70 @@ impl<'a> Tokenizer<'a> { ) } + /// Reads a quote delimited string without "backslash escaping" or a word + /// depending on whether `chars.next()` delivers a `'`. + /// + /// See + fn tokenize_word_or_quote_delimited_string( + &self, + chars: &mut State, + // the prefix that introduced the possible literal or word, + // e.g. "Q" or "nq" + word_prefix: &[char], + // turns an identified quote string literal, + // ie. `(start-quote-char, string-literal, end-quote-char)` + // into a token + as_literal: fn(char, String, char) -> Token, + ) -> Result { + match chars.peek() { + Some('\'') => { + chars.next(); + // ~ determine the "quote character(s)" + let error_loc = chars.location(); + let (start_quote_char, end_quote_char) = match chars.next() { + // ~ "newline" is not allowed by Oracle's SQL Reference, + // but works with sql*plus nevertheless + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + error_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''.", + String::from_iter(word_prefix) + ), + ); + } + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + // read the string literal until the "quote character" following a by literal quote + let mut s = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote_char { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(as_literal(start_quote_char, s, end_quote_char)); + } + } + s.push(ch); + } + self.tokenizer_error(error_loc, "Unterminated string literal") + } + // ~ not a literal introduced with _token_prefix_, assm + _ => { + let s = self.tokenize_word(String::from_iter(word_prefix), chars); + Ok(Token::make_word(&s, None)) + } + } + } + /// Read a quoted string. fn tokenize_quoted_string( &self, diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 09fd41912..6308e1b98 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,7 +21,7 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Value, ValueWithSpan}, + ast::{BinaryOperator, Expr, Ident, Value, ValueWithSpan}, dialect::OracleDialect, tokenizer::Span, }; @@ -103,3 +103,166 @@ fn plusminus_have_same_precedence_as_strconcat() { } ); } + +#[test] +fn parse_quote_delimited_string() { + let sql = "SELECT Q'.abc.', \ + Q'Xab'cX', \ + Q'|abc'''|', \ + Q'{abc}d}', \ + Q'[]abc[]', \ + Q'', \ + Q'<<', \ + Q'('abc'('abc)', \ + Q'(abc'def))', \ + Q'(abc'def)))' \ + FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(10, select.projection.len()); + assert_eq!( + &Expr::Value(Value::QuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('X', "ab'c".into(), 'X')).with_empty_span() + ), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('|', "abc'''".into(), '|')).with_empty_span() + ), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('{', "abc}d".into(), '}')).with_empty_span() + ), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('[', "]abc[".into(), ']')).with_empty_span() + ), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('<', "a'bc".into(), '>')).with_empty_span() + ), + expr_from_projection(&select.projection[5]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('<', "<')).with_empty_span() + ), + expr_from_projection(&select.projection[6]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "'abc'('abc".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[7]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "abc'def)".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[8]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "abc'def))".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[9]) + ); +} + +#[test] +fn parse_quote_delimited_string_lowercase() { + let sql = "select q'!a'b'c!d!' from dual"; + let select = oracle().verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!').with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_quote_delimited_string_but_is_a_word() { + let sql = "SELECT q, quux, q.abc FROM dual q"; + let select = oracle().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "q")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "quux")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "q"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn parse_national_quote_delimited_string() { + let sql = "SELECT NQ'.abc.' FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_national_quote_delimited_string_lowercase() { + for prefix in ["nq", "Nq", "nQ", "NQ"] { + let select = oracle().verified_only_select_with_canonical( + &format!("select {prefix}'!a'b'c!d!' from dual"), + "SELECT NQ'!a'b'c!d!' FROM dual", + ); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!') + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + } +} + +#[test] +fn parse_national_quote_delimited_string_but_is_a_word() { + let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; + let select = oracle().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nq")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "nq"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} From d36183e7700376fd08587171678a55d5c0200344 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 08:33:20 +0100 Subject: [PATCH 02/12] Correct link --- src/tokenizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fe5002b7a..ad7df61be 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -99,7 +99,7 @@ pub enum Token { /// "National" string literal: i.e: N'string' NationalStringLiteral(String), /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` - /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) QuoteDelimitedStringLiteral(char, String, char), /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) From 9daded0aeeb5ed0879130335062ac6e0ab35182b Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 09:37:48 +0100 Subject: [PATCH 03/12] Explicit QuoteDelimitedString type --- src/ast/mod.rs | 2 +- src/ast/value.rs | 43 +++++++++++++++++++++++++++++++-------- src/parser/mod.rs | 16 +++++++-------- src/tokenizer.rs | 39 +++++++++++++++++++---------------- tests/sqlparser_oracle.rs | 41 +++++++++++++++++++++---------------- 5 files changed, 89 insertions(+), 52 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 23cde478b..f1e79b0d2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -110,7 +110,7 @@ pub use self::trigger::{ pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, - NormalizationForm, TrimWhereField, Value, ValueWithSpan, + NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan, }; use crate::ast::helpers::key_value_options::KeyValueOptions; diff --git a/src/ast/value.rs b/src/ast/value.rs index f4d05c31f..430ba8f11 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -19,6 +19,7 @@ use alloc::string::String; use core::fmt; +use std::fmt::Write; #[cfg(feature = "bigdecimal")] use bigdecimal::BigDecimal; @@ -168,11 +169,11 @@ pub enum Value { /// N'string value' NationalStringLiteral(String), /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` - /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) - QuoteDelimitedStringLiteral(char, String, char), + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` - /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) - NationalQuoteDelimitedStringLiteral(char, String, char), + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// X'hex value' HexStringLiteral(String), @@ -211,10 +212,10 @@ impl Value { | Value::EscapedStringLiteral(s) | Value::UnicodeStringLiteral(s) | Value::NationalStringLiteral(s) - | Value::QuoteDelimitedStringLiteral(_, s, _) - | Value::NationalQuoteDelimitedStringLiteral(_, s, _) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), + Value::QuoteDelimitedStringLiteral(s) => Some(s.value), + Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value), _ => None, } } @@ -250,8 +251,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), - Value::QuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "Q'{q1}{s}{q2}'"), - Value::NationalQuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "NQ'{q1}{s}{q2}'"), + Value::QuoteDelimitedStringLiteral(v) => v.fmt(f), + Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), @@ -289,6 +290,32 @@ impl fmt::Display for DollarQuotedString { } } +/// A quote delimited string literal, e.g. `Q'_abc_'`. +/// +/// See [Token::QuoteDelimitedStringLiteral] and/or +/// [Token::NationalQuoteDelimitedStringLiteral]. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct QuoteDelimitedString { + /// the quote start character; i.e. the character _after_ the opening `Q'` + pub start_quote: char, + /// the string literal value itself + pub value: String, + /// the quote end character; i.e. the character _before_ the closing `'` + pub end_quote: char, +} + +impl fmt::Display for QuoteDelimitedString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("Q'")?; + f.write_char(self.start_quote)?; + f.write_str(&self.value)?; + f.write_char(self.end_quote)?; + f.write_char('\'') + } +} + #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a89589f0d..ade3c250f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1754,8 +1754,8 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) - | Token::QuoteDelimitedStringLiteral(_, _, _) - | Token::NationalQuoteDelimitedStringLiteral(_, _, _) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -2772,8 +2772,8 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) - | Token::QuoteDelimitedStringLiteral(_, _, _) - | Token::NationalQuoteDelimitedStringLiteral(_, _, _) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( "either filler, WITH, or WITHOUT in LISTAGG", @@ -10701,11 +10701,11 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } - Token::QuoteDelimitedStringLiteral(q1, s, q2) => { - ok_value(Value::QuoteDelimitedStringLiteral(q1, s, q2)) + Token::QuoteDelimitedStringLiteral(v) => { + ok_value(Value::QuoteDelimitedStringLiteral(v)) } - Token::NationalQuoteDelimitedStringLiteral(q1, s, q2) => { - ok_value(Value::NationalQuoteDelimitedStringLiteral(q1, s, q2)) + Token::NationalQuoteDelimitedStringLiteral(v) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(v)) } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ad7df61be..e7a7696ff 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -46,7 +46,10 @@ use crate::dialect::{ }; use crate::dialect::{Dialect, OracleDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; -use crate::{ast::DollarQuotedString, dialect::HiveDialect}; +use crate::{ + ast::{DollarQuotedString, QuoteDelimitedString}, + dialect::HiveDialect, +}; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -99,11 +102,11 @@ pub enum Token { /// "National" string literal: i.e: N'string' NationalStringLiteral(String), /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` - /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) - QuoteDelimitedStringLiteral(char, String, char), + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` - /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) - NationalQuoteDelimitedStringLiteral(char, String, char), + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -298,10 +301,8 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), - Token::QuoteDelimitedStringLiteral(q1, ref s, q2) => write!(f, "Q'{q1}{s}{q2}'"), - Token::NationalQuoteDelimitedStringLiteral(q1, ref s, q2) => { - write!(f, "NQ'{q1}{s}{q2}'") - } + Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f), + Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -2024,9 +2025,9 @@ impl<'a> Tokenizer<'a> { } /// Reads a quote delimited string without "backslash escaping" or a word - /// depending on whether `chars.next()` delivers a `'`. + /// depending on `chars.next()` delivering a `'`. /// - /// See + /// See fn tokenize_word_or_quote_delimited_string( &self, chars: &mut State, @@ -2036,14 +2037,14 @@ impl<'a> Tokenizer<'a> { // turns an identified quote string literal, // ie. `(start-quote-char, string-literal, end-quote-char)` // into a token - as_literal: fn(char, String, char) -> Token, + as_literal: fn(QuoteDelimitedString) -> Token, ) -> Result { match chars.peek() { Some('\'') => { chars.next(); // ~ determine the "quote character(s)" let error_loc = chars.location(); - let (start_quote_char, end_quote_char) = match chars.next() { + let (start_quote, end_quote) = match chars.next() { // ~ "newline" is not allowed by Oracle's SQL Reference, // but works with sql*plus nevertheless None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { @@ -2067,15 +2068,19 @@ impl<'a> Tokenizer<'a> { ), }; // read the string literal until the "quote character" following a by literal quote - let mut s = String::new(); + let mut value = String::new(); while let Some(ch) = chars.next() { - if ch == end_quote_char { + if ch == end_quote { if let Some('\'') = chars.peek() { chars.next(); // ~ consume the quote - return Ok(as_literal(start_quote_char, s, end_quote_char)); + return Ok(as_literal(QuoteDelimitedString { + start_quote, + value, + end_quote, + })); } } - s.push(ch); + value.push(ch); } self.tokenizer_error(error_loc, "Unterminated string literal") } diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 6308e1b98..a194b875e 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,7 +21,7 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Ident, Value, ValueWithSpan}, + ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, dialect::OracleDialect, tokenizer::Span, }; @@ -33,6 +33,15 @@ fn oracle() -> TestedDialects { TestedDialects::new(vec![Box::new(OracleDialect)]) } +/// Convenience constructor for [QuoteDelimitedstring]. +fn qds(start_quote: char, value: &'static str, end_quote: char) -> QuoteDelimitedString { + QuoteDelimitedString { + start_quote, + value: value.into(), + end_quote, + } +} + /// Oracle: `||` has a lower precedence than `*` and `/` #[test] fn muldiv_have_higher_precedence_than_strconcat() { @@ -120,60 +129,56 @@ fn parse_quote_delimited_string() { let select = oracle().verified_only_select(sql); assert_eq!(10, select.projection.len()); assert_eq!( - &Expr::Value(Value::QuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()), + &Expr::Value(Value::QuoteDelimitedStringLiteral(qds('.', "abc", '.')).with_empty_span()), expr_from_projection(&select.projection[0]) ); assert_eq!( - &Expr::Value( - (Value::QuoteDelimitedStringLiteral('X', "ab'c".into(), 'X')).with_empty_span() - ), + &Expr::Value((Value::QuoteDelimitedStringLiteral(qds('X', "ab'c", 'X'))).with_empty_span()), expr_from_projection(&select.projection[1]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral('|', "abc'''".into(), '|')).with_empty_span() + (Value::QuoteDelimitedStringLiteral(qds('|', "abc'''", '|'))).with_empty_span() ), expr_from_projection(&select.projection[2]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral('{', "abc}d".into(), '}')).with_empty_span() + (Value::QuoteDelimitedStringLiteral(qds('{', "abc}d", '}'))).with_empty_span() ), expr_from_projection(&select.projection[3]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral('[', "]abc[".into(), ']')).with_empty_span() + (Value::QuoteDelimitedStringLiteral(qds('[', "]abc[", ']'))).with_empty_span() ), expr_from_projection(&select.projection[4]) ); assert_eq!( - &Expr::Value( - (Value::QuoteDelimitedStringLiteral('<', "a'bc".into(), '>')).with_empty_span() - ), + &Expr::Value((Value::QuoteDelimitedStringLiteral(qds('<', "a'bc", '>'))).with_empty_span()), expr_from_projection(&select.projection[5]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral('<', "<')).with_empty_span() + (Value::QuoteDelimitedStringLiteral(qds('<', "<'))).with_empty_span() ), expr_from_projection(&select.projection[6]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral('(', "'abc'('abc".into(), ')')).with_empty_span() + (Value::QuoteDelimitedStringLiteral(qds('(', "'abc'('abc", ')'))).with_empty_span() ), expr_from_projection(&select.projection[7]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral('(', "abc'def)".into(), ')')).with_empty_span() + (Value::QuoteDelimitedStringLiteral(qds('(', "abc'def)", ')'))).with_empty_span() ), expr_from_projection(&select.projection[8]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral('(', "abc'def))".into(), ')')).with_empty_span() + (Value::QuoteDelimitedStringLiteral(qds('(', "abc'def))", ')'))).with_empty_span() ), expr_from_projection(&select.projection[9]) ); @@ -186,7 +191,7 @@ fn parse_quote_delimited_string_lowercase() { assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( - Value::QuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!').with_empty_span() + Value::QuoteDelimitedStringLiteral(qds('!', "a'b'c!d", '!')).with_empty_span() ), expr_from_projection(&select.projection[0]) ); @@ -221,7 +226,7 @@ fn parse_national_quote_delimited_string() { assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( - Value::NationalQuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span() + Value::NationalQuoteDelimitedStringLiteral(qds('.', "abc", '.')).with_empty_span() ), expr_from_projection(&select.projection[0]) ); @@ -237,7 +242,7 @@ fn parse_national_quote_delimited_string_lowercase() { assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( - Value::NationalQuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!') + Value::NationalQuoteDelimitedStringLiteral(qds('!', "a'b'c!d", '!')) .with_empty_span() ), expr_from_projection(&select.projection[0]) From 7ea0d86b1fb0f4de7f2075b596c32cefedba5b0e Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 10:32:39 +0100 Subject: [PATCH 04/12] Simplify parsing method --- src/tokenizer.rs | 127 ++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 68 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index e7a7696ff..5b3f4c65a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1046,12 +1046,13 @@ impl<'a> Tokenizer<'a> { Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) => { chars.next(); // consume and check the next char - self.tokenize_word_or_quote_delimited_string( - chars, - &[n, q], - Token::NationalQuoteDelimitedStringLiteral, - ) - .map(Some) + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[n, q]) + .map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(String::from_iter([n, q]), chars); + Ok(Some(Token::make_word(&s, None))) + } } _ => { // regular identifier starting with an "N" @@ -1062,12 +1063,13 @@ impl<'a> Tokenizer<'a> { } q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => { chars.next(); // consume and check the next char - self.tokenize_word_or_quote_delimited_string( - chars, - &[q], - Token::QuoteDelimitedStringLiteral, - ) - .map(Some) + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[q]) + .map(|s| Some(Token::QuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(q, chars); + Ok(Some(Token::make_word(&s, None))) + } } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { @@ -2024,72 +2026,61 @@ impl<'a> Tokenizer<'a> { ) } - /// Reads a quote delimited string without "backslash escaping" or a word - /// depending on `chars.next()` delivering a `'`. + /// Reads a quote delimited string expecting `chars.next()` to deliver a quote. /// /// See - fn tokenize_word_or_quote_delimited_string( + fn tokenize_quote_delimited_string( &self, chars: &mut State, // the prefix that introduced the possible literal or word, // e.g. "Q" or "nq" - word_prefix: &[char], - // turns an identified quote string literal, - // ie. `(start-quote-char, string-literal, end-quote-char)` - // into a token - as_literal: fn(QuoteDelimitedString) -> Token, - ) -> Result { - match chars.peek() { - Some('\'') => { - chars.next(); - // ~ determine the "quote character(s)" - let error_loc = chars.location(); - let (start_quote, end_quote) = match chars.next() { - // ~ "newline" is not allowed by Oracle's SQL Reference, - // but works with sql*plus nevertheless - None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { - return self.tokenizer_error( - error_loc, - format!( - "Invalid space, tab, newline, or EOF after '{}''.", - String::from_iter(word_prefix) - ), - ); - } - Some(c) => ( - c, - match c { - '[' => ']', - '{' => '}', - '<' => '>', - '(' => ')', - c => c, - }, + literal_prefix: &[char], + ) -> Result { + let literal_start_loc = chars.location(); + chars.next(); + + let start_quote_loc = chars.location(); + let (start_quote, end_quote) = match chars.next() { + // ~ "newline" is not allowed by Oracle's SQL Reference, + // but works with sql*plus nevertheless + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + start_quote_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''.", + String::from_iter(literal_prefix) ), - }; - // read the string literal until the "quote character" following a by literal quote - let mut value = String::new(); - while let Some(ch) = chars.next() { - if ch == end_quote { - if let Some('\'') = chars.peek() { - chars.next(); // ~ consume the quote - return Ok(as_literal(QuoteDelimitedString { - start_quote, - value, - end_quote, - })); - } - } - value.push(ch); - } - self.tokenizer_error(error_loc, "Unterminated string literal") + ); } - // ~ not a literal introduced with _token_prefix_, assm - _ => { - let s = self.tokenize_word(String::from_iter(word_prefix), chars); - Ok(Token::make_word(&s, None)) + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + + // read the string literal until the "quote character" following a by literal quote + let mut value = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(QuoteDelimitedString { + start_quote, + value, + end_quote, + }); + } } + value.push(ch); } + + self.tokenizer_error(literal_start_loc, "Unterminated string literal") } /// Read a quoted string. From 0cd577e558bac12b30cffc7a49c8966bb261aff7 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 10:42:27 +0100 Subject: [PATCH 05/12] Test coverage --- src/tokenizer.rs | 2 +- tests/sqlparser_oracle.rs | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5b3f4c65a..d11c2fcd1 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -2047,7 +2047,7 @@ impl<'a> Tokenizer<'a> { return self.tokenizer_error( start_quote_loc, format!( - "Invalid space, tab, newline, or EOF after '{}''.", + "Invalid space, tab, newline, or EOF after '{}''", String::from_iter(literal_prefix) ), ); diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index a194b875e..8a0d8721d 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,9 +21,7 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, - dialect::OracleDialect, - tokenizer::Span, + ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, dialect::OracleDialect, parser::ParserError, tokenizer::Span }; use test_utils::{expr_from_projection, number, TestedDialects}; @@ -184,6 +182,27 @@ fn parse_quote_delimited_string() { ); } +#[test] +fn parse_invalid_quote_delimited_strings() { + // ~ invalid quote delimiter + for q in [' ', '\t', '\r', '\n'] { + assert_eq!( + oracle().parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), + Err(ParserError::TokenizerError("Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into())), + "with quote char {q:?}"); + } + // ~ invalid eof after quote + assert_eq!( + oracle().parse_sql_statements("SELECT Q'"), + Err(ParserError::TokenizerError("Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into())), + "with EOF quote char"); + // ~ unterminated string + assert_eq!( + oracle().parse_sql_statements("SELECT Q'|asdfa...."), + Err(ParserError::TokenizerError("Unterminated string literal at Line: 1, Column: 9".into())), + "with EOF quote char"); +} + #[test] fn parse_quote_delimited_string_lowercase() { let sql = "select q'!a'b'c!d!' from dual"; From d9466f0391ba19e0691580225767adb6f90f5478 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 10:52:18 +0100 Subject: [PATCH 06/12] Guard quote delimited string by explicit dialect setting --- src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 7 +++++++ src/dialect/oracle.rs | 4 ++++ src/tokenizer.rs | 7 ++++--- tests/sqlparser_oracle.rs | 26 +++++++++++++++++++------- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index dffc5b527..bbedbc059 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -195,4 +195,8 @@ impl Dialect for GenericDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1d99d8631..1a416e4df 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any { fn supports_semantic_view_table_factor(&self) -> bool { false } + + /// Support quote delimited string literals, e.g. `Q'{...}'` + /// + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + fn supports_quote_delimited_string(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index f8bb0e155..54c2ace5f 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -95,4 +95,8 @@ impl Dialect for OracleDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d11c2fcd1..238bf2334 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -40,11 +40,11 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; +use crate::dialect::Dialect; use crate::dialect::{ BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect, SnowflakeDialect, }; -use crate::dialect::{Dialect, OracleDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; use crate::{ ast::{DollarQuotedString, QuoteDelimitedString}, @@ -1043,7 +1043,8 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } - Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) => + Some(&q @ 'q') | Some(&q @ 'Q') + if self.dialect.supports_quote_delimited_string() => { chars.next(); // consume and check the next char if let Some('\'') = chars.peek() { @@ -1061,7 +1062,7 @@ impl<'a> Tokenizer<'a> { } } } - q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => { + q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => { chars.next(); // consume and check the next char if let Some('\'') = chars.peek() { self.tokenize_quote_delimited_string(chars, &[q]) diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 8a0d8721d..ee5f209e8 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,7 +21,10 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, dialect::OracleDialect, parser::ParserError, tokenizer::Span + ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, + dialect::OracleDialect, + parser::ParserError, + tokenizer::Span, }; use test_utils::{expr_from_projection, number, TestedDialects}; @@ -188,19 +191,28 @@ fn parse_invalid_quote_delimited_strings() { for q in [' ', '\t', '\r', '\n'] { assert_eq!( oracle().parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), - Err(ParserError::TokenizerError("Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into())), - "with quote char {q:?}"); + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with quote char {q:?}" + ); } // ~ invalid eof after quote assert_eq!( oracle().parse_sql_statements("SELECT Q'"), - Err(ParserError::TokenizerError("Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into())), - "with EOF quote char"); + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with EOF quote char" + ); // ~ unterminated string assert_eq!( oracle().parse_sql_statements("SELECT Q'|asdfa...."), - Err(ParserError::TokenizerError("Unterminated string literal at Line: 1, Column: 9".into())), - "with EOF quote char"); + Err(ParserError::TokenizerError( + "Unterminated string literal at Line: 1, Column: 9".into() + )), + "with EOF quote char" + ); } #[test] From 2f6d970cab6aa3cf1f16e50034c4c36aed97e8ba Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 10:55:08 +0100 Subject: [PATCH 07/12] Remove comment --- src/tokenizer.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 238bf2334..2ae17cf4a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -2042,8 +2042,6 @@ impl<'a> Tokenizer<'a> { let start_quote_loc = chars.location(); let (start_quote, end_quote) = match chars.next() { - // ~ "newline" is not allowed by Oracle's SQL Reference, - // but works with sql*plus nevertheless None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { return self.tokenizer_error( start_quote_loc, From c2908351ed1d4968d4d89c2e5a1d93b3a5dd37f8 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 10:58:56 +0100 Subject: [PATCH 08/12] Correct doc reference --- src/ast/value.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 430ba8f11..20282cc86 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -292,8 +292,8 @@ impl fmt::Display for DollarQuotedString { /// A quote delimited string literal, e.g. `Q'_abc_'`. /// -/// See [Token::QuoteDelimitedStringLiteral] and/or -/// [Token::NationalQuoteDelimitedStringLiteral]. +/// See [Value::QuoteDelimitedStringLiteral] and/or +/// [Value::NationalQuoteDelimitedStringLiteral]. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] From 9da533312b500fcc6037415160239c0a1ee2b2b9 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 11:02:55 +0100 Subject: [PATCH 09/12] Make no_std compliant --- src/ast/value.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 20282cc86..ccbb12a33 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -19,7 +19,6 @@ use alloc::string::String; use core::fmt; -use std::fmt::Write; #[cfg(feature = "bigdecimal")] use bigdecimal::BigDecimal; @@ -307,12 +306,8 @@ pub struct QuoteDelimitedString { } impl fmt::Display for QuoteDelimitedString { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("Q'")?; - f.write_char(self.start_quote)?; - f.write_str(&self.value)?; - f.write_char(self.end_quote)?; - f.write_char('\'') + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote) } } From f8d8af523dadf07ad4f15464ba0bfe7569605d2e Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 14 Dec 2025 11:03:01 +0100 Subject: [PATCH 10/12] Remove unused import --- src/parser/merge.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/merge.rs b/src/parser/merge.rs index b2283b671..2bc1544f0 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -13,7 +13,7 @@ //! SQL Parser for a `MERGE` statement #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; +use alloc::{boxed::Box, format, vec, vec::Vec}; use crate::{ ast::{ From a34d416a1fea572defcac4a42eac0199fe2623b6 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Tue, 16 Dec 2025 13:44:02 +0100 Subject: [PATCH 11/12] Method name --- tests/sqlparser_oracle.rs | 54 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index ee5f209e8..7c7d1d51a 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -35,7 +35,11 @@ fn oracle() -> TestedDialects { } /// Convenience constructor for [QuoteDelimitedstring]. -fn qds(start_quote: char, value: &'static str, end_quote: char) -> QuoteDelimitedString { +fn quote_delimited_string( + start_quote: char, + value: &'static str, + end_quote: char, +) -> QuoteDelimitedString { QuoteDelimitedString { start_quote, value: value.into(), @@ -130,56 +134,72 @@ fn parse_quote_delimited_string() { let select = oracle().verified_only_select(sql); assert_eq!(10, select.projection.len()); assert_eq!( - &Expr::Value(Value::QuoteDelimitedStringLiteral(qds('.', "abc", '.')).with_empty_span()), + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), expr_from_projection(&select.projection[0]) ); assert_eq!( - &Expr::Value((Value::QuoteDelimitedStringLiteral(qds('X', "ab'c", 'X'))).with_empty_span()), + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('X', "ab'c", 'X'))) + .with_empty_span() + ), expr_from_projection(&select.projection[1]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral(qds('|', "abc'''", '|'))).with_empty_span() + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('|', "abc'''", '|'))) + .with_empty_span() ), expr_from_projection(&select.projection[2]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral(qds('{', "abc}d", '}'))).with_empty_span() + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('{', "abc}d", '}'))) + .with_empty_span() ), expr_from_projection(&select.projection[3]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral(qds('[', "]abc[", ']'))).with_empty_span() + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('[', "]abc[", ']'))) + .with_empty_span() ), expr_from_projection(&select.projection[4]) ); assert_eq!( - &Expr::Value((Value::QuoteDelimitedStringLiteral(qds('<', "a'bc", '>'))).with_empty_span()), + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "a'bc", '>'))) + .with_empty_span() + ), expr_from_projection(&select.projection[5]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral(qds('<', "<'))).with_empty_span() + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "<'))) + .with_empty_span() ), expr_from_projection(&select.projection[6]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral(qds('(', "'abc'('abc", ')'))).with_empty_span() + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "'abc'('abc", ')'))) + .with_empty_span() ), expr_from_projection(&select.projection[7]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral(qds('(', "abc'def)", ')'))).with_empty_span() + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def)", ')'))) + .with_empty_span() ), expr_from_projection(&select.projection[8]) ); assert_eq!( &Expr::Value( - (Value::QuoteDelimitedStringLiteral(qds('(', "abc'def))", ')'))).with_empty_span() + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def))", ')'))) + .with_empty_span() ), expr_from_projection(&select.projection[9]) ); @@ -222,7 +242,8 @@ fn parse_quote_delimited_string_lowercase() { assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( - Value::QuoteDelimitedStringLiteral(qds('!', "a'b'c!d", '!')).with_empty_span() + Value::QuoteDelimitedStringLiteral(quote_delimited_string('!', "a'b'c!d", '!')) + .with_empty_span() ), expr_from_projection(&select.projection[0]) ); @@ -257,7 +278,8 @@ fn parse_national_quote_delimited_string() { assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( - Value::NationalQuoteDelimitedStringLiteral(qds('.', "abc", '.')).with_empty_span() + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() ), expr_from_projection(&select.projection[0]) ); @@ -273,8 +295,10 @@ fn parse_national_quote_delimited_string_lowercase() { assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( - Value::NationalQuoteDelimitedStringLiteral(qds('!', "a'b'c!d", '!')) - .with_empty_span() + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string( + '!', "a'b'c!d", '!' + )) + .with_empty_span() ), expr_from_projection(&select.projection[0]) ); From a7cf34a3171ee7ec69194f6e9161628bfadd9a53 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Tue, 16 Dec 2025 13:48:32 +0100 Subject: [PATCH 12/12] Test quote delimited strings against all supported dialects --- tests/sqlparser_oracle.rs | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 7c7d1d51a..683660369 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -26,7 +26,7 @@ use sqlparser::{ parser::ParserError, tokenizer::Span, }; -use test_utils::{expr_from_projection, number, TestedDialects}; +use test_utils::{all_dialects_where, expr_from_projection, number, TestedDialects}; mod test_utils; @@ -120,6 +120,7 @@ fn plusminus_have_same_precedence_as_strconcat() { #[test] fn parse_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); let sql = "SELECT Q'.abc.', \ Q'Xab'cX', \ Q'|abc'''|', \ @@ -131,7 +132,7 @@ fn parse_quote_delimited_string() { Q'(abc'def))', \ Q'(abc'def)))' \ FROM dual"; - let select = oracle().verified_only_select(sql); + let select = dialect.verified_only_select(sql); assert_eq!(10, select.projection.len()); assert_eq!( &Expr::Value( @@ -207,10 +208,11 @@ fn parse_quote_delimited_string() { #[test] fn parse_invalid_quote_delimited_strings() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); // ~ invalid quote delimiter for q in [' ', '\t', '\r', '\n'] { assert_eq!( - oracle().parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), + dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), Err(ParserError::TokenizerError( "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() )), @@ -219,7 +221,7 @@ fn parse_invalid_quote_delimited_strings() { } // ~ invalid eof after quote assert_eq!( - oracle().parse_sql_statements("SELECT Q'"), + dialect.parse_sql_statements("SELECT Q'"), Err(ParserError::TokenizerError( "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() )), @@ -227,7 +229,7 @@ fn parse_invalid_quote_delimited_strings() { ); // ~ unterminated string assert_eq!( - oracle().parse_sql_statements("SELECT Q'|asdfa...."), + dialect.parse_sql_statements("SELECT Q'|asdfa...."), Err(ParserError::TokenizerError( "Unterminated string literal at Line: 1, Column: 9".into() )), @@ -237,8 +239,9 @@ fn parse_invalid_quote_delimited_strings() { #[test] fn parse_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); let sql = "select q'!a'b'c!d!' from dual"; - let select = oracle().verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + let select = dialect.verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( @@ -251,8 +254,9 @@ fn parse_quote_delimited_string_lowercase() { #[test] fn parse_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); let sql = "SELECT q, quux, q.abc FROM dual q"; - let select = oracle().verified_only_select(sql); + let select = dialect.verified_only_select(sql); assert_eq!(3, select.projection.len()); assert_eq!( &Expr::Identifier(Ident::with_span(Span::empty(), "q")), @@ -273,8 +277,9 @@ fn parse_quote_delimited_string_but_is_a_word() { #[test] fn parse_national_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); let sql = "SELECT NQ'.abc.' FROM dual"; - let select = oracle().verified_only_select(sql); + let select = dialect.verified_only_select(sql); assert_eq!(1, select.projection.len()); assert_eq!( &Expr::Value( @@ -287,8 +292,9 @@ fn parse_national_quote_delimited_string() { #[test] fn parse_national_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); for prefix in ["nq", "Nq", "nQ", "NQ"] { - let select = oracle().verified_only_select_with_canonical( + let select = dialect.verified_only_select_with_canonical( &format!("select {prefix}'!a'b'c!d!' from dual"), "SELECT NQ'!a'b'c!d!' FROM dual", ); @@ -307,8 +313,9 @@ fn parse_national_quote_delimited_string_lowercase() { #[test] fn parse_national_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; - let select = oracle().verified_only_select(sql); + let select = dialect.verified_only_select(sql); assert_eq!(3, select.projection.len()); assert_eq!( &Expr::Identifier(Ident::with_span(Span::empty(), "nq")),