From 575c7751cba8da45991845d800a44e629992020c Mon Sep 17 00:00:00 2001 From: ydah Date: Mon, 29 Dec 2025 18:11:09 +0900 Subject: [PATCH] Implement `%inline` feature compliant with Menhir specification This PR makes Lrama's inline feature compliant with the [Menhir](https://gallium.inria.fr/~fpottier/menhir/) specification. Changed from macro-like substitution (simple string replacement of `$n`) to a temporary variable binding approach. Before: ```c // Simple substitution of $2 with inline_action_string $$ = $1 inline_action_string $3; ``` After: ```c // Variable binding ensures single evaluation YYSTYPE _inline_2; { _inline_2 = '+'; } $$ = $1 _inline_2 $3; ``` This resolves the issue where inline actions with side effects could be evaluated multiple times. Added a Validator that detects the following errors per Menhir specification: - Direct recursion (inline rule references itself) - Mutual recursion (multiple inline rules form a reference cycle) - Start symbol declared as inline Added an option to ignore all `%inline` keywords in the grammar specification. Useful for verifying whether inlining contributes to conflict resolution. Added samples demonstrating the core use case of `%inline` (resolving precedence conflicts): - `sample/calc_inline.y`: Resolves precedence issue with `%inline` - `sample/calc_no_inline.y`: Shows conflicts without `%inline` - [Menhir Reference Manual - Inlining](https://gallium.inria.fr/~fpottier/menhir/manual.html#sec%3Ainlining) - [Menhir GitHub Repository](https://github.com/LexiFi/menhir) --- lib/lrama/command.rb | 1 + lib/lrama/grammar.rb | 13 +- lib/lrama/grammar/inline.rb | 1 + lib/lrama/grammar/inline/resolver.rb | 42 +- lib/lrama/grammar/inline/validator.rb | 83 +++ lib/lrama/option_parser.rb | 3 + lib/lrama/options.rb | 2 + sample/calc.output | 497 ++++++++++++++++++ sample/calc.y | 11 +- sample/calc_inline.output | 488 +++++++++++++++++ sample/calc_inline.y | 107 ++++ sample/calc_no_inline.output | 162 ++++++ sample/calc_no_inline.y | 94 ++++ sig/generated/lrama/grammar.rbs | 5 + .../lrama/grammar/inline/validator.rbs | 40 ++ sig/generated/lrama/options.rbs | 2 + spec/fixtures/inlining/basic.y | 12 +- spec/fixtures/inlining/menhir_style.y | 48 ++ spec/fixtures/inlining/precedence_test.y | 54 ++ spec/fixtures/inlining/recursive_direct.y | 41 ++ spec/fixtures/inlining/recursive_mutual.y | 45 ++ spec/fixtures/inlining/resolve_index.y | 4 +- spec/fixtures/inlining/resolve_index_at.y | 4 +- .../fixtures/inlining/resolve_index_reverse.y | 4 +- .../inlining/rhs_include_parameterized.y | 4 +- spec/fixtures/inlining/with_parameters.y | 4 +- spec/lrama/grammar_spec.rb | 28 + spec/lrama/option_parser_spec.rb | 3 + spec/lrama/parser_spec.rb | 103 +++- 29 files changed, 1860 insertions(+), 45 deletions(-) create mode 100644 lib/lrama/grammar/inline/validator.rb create mode 100644 sample/calc_inline.output create mode 100644 sample/calc_inline.y create mode 100644 sample/calc_no_inline.output create mode 100644 sample/calc_no_inline.y create mode 100644 sig/generated/lrama/grammar/inline/validator.rbs create mode 100644 spec/fixtures/inlining/menhir_style.y create mode 100644 spec/fixtures/inlining/precedence_test.y create mode 100644 spec/fixtures/inlining/recursive_direct.y create mode 100644 spec/fixtures/inlining/recursive_mutual.y diff --git a/lib/lrama/command.rb b/lib/lrama/command.rb index 17aad1a1..376327e2 100644 --- a/lib/lrama/command.rb +++ b/lib/lrama/command.rb @@ -77,6 +77,7 @@ def merge_stdlib(grammar) end def prepare_grammar(grammar) + grammar.no_inline = @options.no_inline grammar.prepare grammar.validate! end diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index 95a80bb0..84161ec9 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -100,6 +100,7 @@ class Grammar attr_accessor :locations #: bool attr_accessor :define #: Hash[String, String] attr_accessor :required #: bool + attr_accessor :no_inline #: bool def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value, :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, @@ -133,6 +134,7 @@ def initialize(rule_counter, locations, define = {}) @required = false @precedences = [] @start_nterm = nil + @no_inline = false append_special_symbols end @@ -254,7 +256,10 @@ def epilogue=(epilogue) # @rbs () -> void def prepare - resolve_inline_rules + unless @no_inline + validate_inline_rules + resolve_inline_rules + end normalize_rules collect_symbols set_lhs_and_rhs @@ -438,6 +443,12 @@ def append_special_symbols @accept_symbol = term end + # @rbs () -> void + def validate_inline_rules + validator = Inline::Validator.new(@parameterized_resolver, @start_nterm) + validator.validate! + end + # @rbs () -> void def resolve_inline_rules while @rule_builders.any?(&:has_inline_rules?) do diff --git a/lib/lrama/grammar/inline.rb b/lib/lrama/grammar/inline.rb index c02ab600..aa844448 100644 --- a/lib/lrama/grammar/inline.rb +++ b/lib/lrama/grammar/inline.rb @@ -1,3 +1,4 @@ # frozen_string_literal: true require_relative 'inline/resolver' +require_relative 'inline/validator' diff --git a/lib/lrama/grammar/inline/resolver.rb b/lib/lrama/grammar/inline/resolver.rb index aca689cc..07909b04 100644 --- a/lib/lrama/grammar/inline/resolver.rb +++ b/lib/lrama/grammar/inline/resolver.rb @@ -66,13 +66,45 @@ def replace_user_code(rhs, index) user_code = @rule_builder.user_code return user_code if rhs.user_code.nil? || user_code.nil? - code = user_code.s_value.gsub(/\$#{index + 1}/, rhs.user_code.s_value) + inline_action = rhs.user_code.s_value + inline_var = "_inline_#{index + 1}" + + # Replace $$ or $$ in inline action with the temporary variable + # $$ -> _inline_n, $$ -> _inline_n.tag + inline_action_with_var = inline_action.gsub(/\$(<(\w+)>)?\$/) do |_match| + if $2 + "#{inline_var}.#{$2}" + else + inline_var + end + end + + # Build the merged action with variable binding + # First, adjust $n references in the outer action for the expanded RHS + # index is 0-indexed position, ref.index is 1-indexed ($1, $2, etc.) + # We need to adjust references AFTER the inline position (index + 1 in 1-indexed terms) + # So we skip: nil ($$), and positions <= index + 1 (the inline position itself) + outer_code = user_code.s_value user_code.references.each do |ref| - next if ref.index.nil? || ref.index <= index # nil は $$ の場合 - code = code.gsub(/\$#{ref.index}/, "$#{ref.index + (rhs.symbols.count - 1)}") - code = code.gsub(/@#{ref.index}/, "@#{ref.index + (rhs.symbols.count - 1)}") + next if ref.index.nil? || ref.index <= index + 1 # nil は $$、index + 1 は inline 位置 + outer_code = outer_code.gsub(/\$#{ref.index}/, "$#{ref.index + (rhs.symbols.count - 1)}") + outer_code = outer_code.gsub(/@#{ref.index}/, "@#{ref.index + (rhs.symbols.count - 1)}") end - Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location) + + # Replace $n or $n (the inline symbol reference) with the temporary variable + # $n -> _inline_n, $n -> _inline_n.tag + outer_code = outer_code.gsub(/\$(<(\w+)>)?#{index + 1}/) do |_match| + if $2 + "#{inline_var}.#{$2}" + else + inline_var + end + end + + # Combine: declare temp var, execute inline action, then outer action + merged_code = " YYSTYPE #{inline_var}; { #{inline_action_with_var} } #{outer_code}" + + Lrama::Lexer::Token::UserCode.new(s_value: merged_code, location: user_code.location) end end end diff --git a/lib/lrama/grammar/inline/validator.rb b/lib/lrama/grammar/inline/validator.rb new file mode 100644 index 00000000..f6546ebf --- /dev/null +++ b/lib/lrama/grammar/inline/validator.rb @@ -0,0 +1,83 @@ +# rbs_inline: enabled +# frozen_string_literal: true + +module Lrama + class Grammar + class Inline + # Validates inline rules according to Menhir specification. + # Detects: + # - Direct recursion (inline rule references itself) + # - Mutual recursion (inline rules reference each other in a cycle) + # - Start symbol declared as inline + class Validator + class RecursiveInlineError < StandardError; end + class StartSymbolInlineError < StandardError; end + + # @rbs (Lrama::Grammar::Parameterized::Resolver parameterized_resolver, Lexer::Token::Base? start_nterm) -> void + def initialize(parameterized_resolver, start_nterm = nil) + @parameterized_resolver = parameterized_resolver + @start_nterm = start_nterm + end + + # @rbs () -> void + def validate! + inline_rules = collect_inline_rules + return if inline_rules.empty? + + validate_no_start_symbol_inline(inline_rules) + validate_no_recursion(inline_rules) + end + + private + + # @rbs () -> Array[Lrama::Grammar::Parameterized::Rule] + def collect_inline_rules + @parameterized_resolver.rules.select(&:inline?) + end + + # @rbs (Array[Lrama::Grammar::Parameterized::Rule] inline_rules) -> void + def validate_no_start_symbol_inline(inline_rules) + return unless @start_nterm + + start_symbol_name = @start_nterm.s_value + inline_names = inline_rules.map(&:name) + + if inline_names.include?(start_symbol_name) + raise StartSymbolInlineError, "Start symbol '#{start_symbol_name}' cannot be declared as inline." + end + end + + # @rbs (Array[Lrama::Grammar::Parameterized::Rule] inline_rules) -> void + def validate_no_recursion(inline_rules) + inline_names = inline_rules.map(&:name).to_set + + inline_rules.each do |rule| + check_recursion(rule, inline_names, Set.new) + end + end + + # @rbs (Lrama::Grammar::Parameterized::Rule rule, Set[String] inline_names, Set[String] visited) -> void + def check_recursion(rule, inline_names, visited) + if visited.include?(rule.name) + raise RecursiveInlineError, "Recursive inline definition detected: #{visited.to_a.join(' -> ')} -> #{rule.name}. Inline rules cannot reference themselves directly or indirectly." + end + + new_visited = visited + [rule.name] + + rule.rhs.each do |rhs| + rhs.symbols.each do |symbol| + symbol_name = symbol.s_value + + if inline_names.include?(symbol_name) + referenced_rule = @parameterized_resolver.rules.find { |r| r.name == symbol_name && r.inline? } + if referenced_rule + check_recursion(referenced_rule, inline_names, new_visited) + end + end + end + end + end + end + end + end +end diff --git a/lib/lrama/option_parser.rb b/lib/lrama/option_parser.rb index 5a15d59c..bbc1182f 100644 --- a/lib/lrama/option_parser.rb +++ b/lib/lrama/option_parser.rb @@ -132,6 +132,9 @@ def parse_by_option_parser(argv) o.separator 'Error Recovery:' o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true } o.separator '' + o.separator 'Grammar Processing:' + o.on('--no-inline', 'ignore all %inline keywords') {|v| @options.no_inline = true } + o.separator '' o.separator 'Other options:' o.on('-V', '--version', "output version information and exit") {|v| puts "lrama #{Lrama::VERSION}"; exit 0 } o.on('-h', '--help', "display this help and exit") {|v| puts o; exit 0 } diff --git a/lib/lrama/options.rb b/lib/lrama/options.rb index 87aec624..6e26c5e1 100644 --- a/lib/lrama/options.rb +++ b/lib/lrama/options.rb @@ -21,6 +21,7 @@ class Options attr_accessor :diagram #: bool attr_accessor :diagram_file #: String attr_accessor :profile_opts #: Hash[Symbol, bool]? + attr_accessor :no_inline #: bool # @rbs () -> void def initialize @@ -41,6 +42,7 @@ def initialize @diagram = false @diagram_file = "diagram.html" @profile_opts = nil + @no_inline = false end end end diff --git a/sample/calc.output b/sample/calc.output index df37bb59..85be69cd 100644 --- a/sample/calc.output +++ b/sample/calc.output @@ -1,3 +1,71 @@ +Rule Usage Frequency + + 0 expr (10 times) + 1 list (3 times) + 2 LF (2 times) + 3 '(' (1 times) + 4 ')' (1 times) + 5 '*' (1 times) + 6 '+' (1 times) + 7 '-' (1 times) + 8 '/' (1 times) + 9 NUM (1 times) + 10 YYEOF (1 times) + + +11 Terms + +3 Non-Terminals + +2 Unused Terms + + 0 YYerror + 1 YYUNDEF + + +Precedences + + precedence on '+' is used to resolve conflict on + LALR + state 14. Conflict between reduce by "expr -> expr '+' expr" and shift '+' resolved as reduce (%left '+'). + state 14. Conflict between reduce by "expr -> expr '+' expr" and shift '-' resolved as reduce (%left '-'). + state 14. Conflict between reduce by "expr -> expr '+' expr" and shift '*' resolved as shift ('+' < '*'). + state 14. Conflict between reduce by "expr -> expr '+' expr" and shift '/' resolved as shift ('+' < '/'). + state 15. Conflict between reduce by "expr -> expr '-' expr" and shift '+' resolved as reduce (%left '+'). + state 16. Conflict between reduce by "expr -> expr '*' expr" and shift '+' resolved as reduce ('+' < '*'). + state 17. Conflict between reduce by "expr -> expr '/' expr" and shift '+' resolved as reduce ('+' < '/'). + + precedence on '-' is used to resolve conflict on + LALR + state 14. Conflict between reduce by "expr -> expr '+' expr" and shift '-' resolved as reduce (%left '-'). + state 15. Conflict between reduce by "expr -> expr '-' expr" and shift '+' resolved as reduce (%left '+'). + state 15. Conflict between reduce by "expr -> expr '-' expr" and shift '-' resolved as reduce (%left '-'). + state 15. Conflict between reduce by "expr -> expr '-' expr" and shift '*' resolved as shift ('-' < '*'). + state 15. Conflict between reduce by "expr -> expr '-' expr" and shift '/' resolved as shift ('-' < '/'). + state 16. Conflict between reduce by "expr -> expr '*' expr" and shift '-' resolved as reduce ('-' < '*'). + state 17. Conflict between reduce by "expr -> expr '/' expr" and shift '-' resolved as reduce ('-' < '/'). + + precedence on '*' is used to resolve conflict on + LALR + state 14. Conflict between reduce by "expr -> expr '+' expr" and shift '*' resolved as shift ('+' < '*'). + state 15. Conflict between reduce by "expr -> expr '-' expr" and shift '*' resolved as shift ('-' < '*'). + state 16. Conflict between reduce by "expr -> expr '*' expr" and shift '+' resolved as reduce ('+' < '*'). + state 16. Conflict between reduce by "expr -> expr '*' expr" and shift '-' resolved as reduce ('-' < '*'). + state 16. Conflict between reduce by "expr -> expr '*' expr" and shift '*' resolved as reduce (%left '*'). + state 16. Conflict between reduce by "expr -> expr '*' expr" and shift '/' resolved as reduce (%left '/'). + state 17. Conflict between reduce by "expr -> expr '/' expr" and shift '*' resolved as reduce (%left '*'). + + precedence on '/' is used to resolve conflict on + LALR + state 14. Conflict between reduce by "expr -> expr '+' expr" and shift '/' resolved as shift ('+' < '/'). + state 15. Conflict between reduce by "expr -> expr '-' expr" and shift '/' resolved as shift ('-' < '/'). + state 16. Conflict between reduce by "expr -> expr '*' expr" and shift '/' resolved as reduce (%left '/'). + state 17. Conflict between reduce by "expr -> expr '/' expr" and shift '+' resolved as reduce ('+' < '/'). + state 17. Conflict between reduce by "expr -> expr '/' expr" and shift '-' resolved as reduce ('-' < '/'). + state 17. Conflict between reduce by "expr -> expr '/' expr" and shift '*' resolved as reduce (%left '*'). + state 17. Conflict between reduce by "expr -> expr '/' expr" and shift '/' resolved as reduce (%left '/'). + + Grammar 0 $accept: list "end of file" @@ -25,6 +93,34 @@ State 0 list go to state 1 + [Direct Read sets] + read list shift YYEOF, LF, NUM, '(' + + [Reads Relation] + + [Read sets] + YYEOF + LF + NUM + '(' + + [Includes Relation] + + [Lookback Relation] + (Rule: list -> ε) -> (State 0, list) + + [Follow sets] + list -> YYEOF + list -> LF + list -> NUM + list -> '(' + + [Look-Ahead Sets] + YYEOF reduce using rule 1 (list) + LF reduce using rule 1 (list) + NUM reduce using rule 1 (list) + '(' reduce using rule 1 (list) + State 1 @@ -45,6 +141,30 @@ State 1 expr go to state 6 + [Direct Read sets] + read expr shift LF, '+', '-', '*', '/' + + [Reads Relation] + + [Read sets] + LF + '+' + '-' + '*' + '/' + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + expr -> LF + expr -> '+' + expr -> '-' + expr -> '*' + expr -> '/' + + [Look-Ahead Sets] State 2 @@ -52,6 +172,19 @@ State 2 $default accept + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + + [Look-Ahead Sets] State 3 @@ -59,6 +192,25 @@ State 3 $default reduce using rule 2 (list) + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: list -> list LF) -> (State 0, list) + + [Follow sets] + + [Look-Ahead Sets] + YYEOF reduce using rule 2 (list) + LF reduce using rule 2 (list) + NUM reduce using rule 2 (list) + '(' reduce using rule 2 (list) + State 4 @@ -66,6 +218,32 @@ State 4 $default reduce using rule 4 (expr) + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> NUM) -> (State 1, expr) + (Rule: expr -> NUM) -> (State 5, expr) + (Rule: expr -> NUM) -> (State 9, expr) + (Rule: expr -> NUM) -> (State 10, expr) + (Rule: expr -> NUM) -> (State 11, expr) + (Rule: expr -> NUM) -> (State 12, expr) + + [Follow sets] + + [Look-Ahead Sets] + LF reduce using rule 4 (expr) + '+' reduce using rule 4 (expr) + '-' reduce using rule 4 (expr) + '*' reduce using rule 4 (expr) + '/' reduce using rule 4 (expr) + ')' reduce using rule 4 (expr) + State 5 @@ -82,6 +260,30 @@ State 5 expr go to state 7 + [Direct Read sets] + read expr shift '+', '-', '*', '/', ')' + + [Reads Relation] + + [Read sets] + '+' + '-' + '*' + '/' + ')' + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + expr -> '+' + expr -> '-' + expr -> '*' + expr -> '/' + expr -> ')' + + [Look-Ahead Sets] State 6 @@ -97,6 +299,19 @@ State 6 '*' shift, and go to state 11 '/' shift, and go to state 12 + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + + [Look-Ahead Sets] State 7 @@ -112,6 +327,19 @@ State 7 '/' shift, and go to state 12 ')' shift, and go to state 13 + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + + [Look-Ahead Sets] State 8 @@ -119,6 +347,25 @@ State 8 $default reduce using rule 3 (list) + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: list -> list expr LF) -> (State 0, list) + + [Follow sets] + + [Look-Ahead Sets] + YYEOF reduce using rule 3 (list) + LF reduce using rule 3 (list) + NUM reduce using rule 3 (list) + '(' reduce using rule 3 (list) + State 9 @@ -135,6 +382,36 @@ State 9 expr go to state 14 + [Direct Read sets] + read expr shift '+', '-', '*', '/' + + [Reads Relation] + + [Read sets] + '+' + '-' + '*' + '/' + + [Includes Relation] + (State 9, expr) -> (State 1, expr) + (State 9, expr) -> (State 5, expr) + (State 9, expr) -> (State 9, expr) + (State 9, expr) -> (State 10, expr) + (State 9, expr) -> (State 11, expr) + (State 9, expr) -> (State 12, expr) + + [Lookback Relation] + + [Follow sets] + expr -> LF + expr -> '+' + expr -> '-' + expr -> '*' + expr -> '/' + expr -> ')' + + [Look-Ahead Sets] State 10 @@ -151,6 +428,36 @@ State 10 expr go to state 15 + [Direct Read sets] + read expr shift '+', '-', '*', '/' + + [Reads Relation] + + [Read sets] + '+' + '-' + '*' + '/' + + [Includes Relation] + (State 10, expr) -> (State 1, expr) + (State 10, expr) -> (State 5, expr) + (State 10, expr) -> (State 9, expr) + (State 10, expr) -> (State 10, expr) + (State 10, expr) -> (State 11, expr) + (State 10, expr) -> (State 12, expr) + + [Lookback Relation] + + [Follow sets] + expr -> LF + expr -> '+' + expr -> '-' + expr -> '*' + expr -> '/' + expr -> ')' + + [Look-Ahead Sets] State 11 @@ -167,6 +474,36 @@ State 11 expr go to state 16 + [Direct Read sets] + read expr shift '+', '-', '*', '/' + + [Reads Relation] + + [Read sets] + '+' + '-' + '*' + '/' + + [Includes Relation] + (State 11, expr) -> (State 1, expr) + (State 11, expr) -> (State 5, expr) + (State 11, expr) -> (State 9, expr) + (State 11, expr) -> (State 10, expr) + (State 11, expr) -> (State 11, expr) + (State 11, expr) -> (State 12, expr) + + [Lookback Relation] + + [Follow sets] + expr -> LF + expr -> '+' + expr -> '-' + expr -> '*' + expr -> '/' + expr -> ')' + + [Look-Ahead Sets] State 12 @@ -183,6 +520,36 @@ State 12 expr go to state 17 + [Direct Read sets] + read expr shift '+', '-', '*', '/' + + [Reads Relation] + + [Read sets] + '+' + '-' + '*' + '/' + + [Includes Relation] + (State 12, expr) -> (State 1, expr) + (State 12, expr) -> (State 5, expr) + (State 12, expr) -> (State 9, expr) + (State 12, expr) -> (State 10, expr) + (State 12, expr) -> (State 11, expr) + (State 12, expr) -> (State 12, expr) + + [Lookback Relation] + + [Follow sets] + expr -> LF + expr -> '+' + expr -> '-' + expr -> '*' + expr -> '/' + expr -> ')' + + [Look-Ahead Sets] State 13 @@ -190,6 +557,32 @@ State 13 $default reduce using rule 9 (expr) + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> '(' expr ')') -> (State 1, expr) + (Rule: expr -> '(' expr ')') -> (State 5, expr) + (Rule: expr -> '(' expr ')') -> (State 9, expr) + (Rule: expr -> '(' expr ')') -> (State 10, expr) + (Rule: expr -> '(' expr ')') -> (State 11, expr) + (Rule: expr -> '(' expr ')') -> (State 12, expr) + + [Follow sets] + + [Look-Ahead Sets] + LF reduce using rule 9 (expr) + '+' reduce using rule 9 (expr) + '-' reduce using rule 9 (expr) + '*' reduce using rule 9 (expr) + '/' reduce using rule 9 (expr) + ')' reduce using rule 9 (expr) + State 14 @@ -209,6 +602,32 @@ State 14 Conflict between rule 5 and token '*' resolved as shift ('+' < '*'). Conflict between rule 5 and token '/' resolved as shift ('+' < '/'). + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> expr '+' expr) -> (State 1, expr) + (Rule: expr -> expr '+' expr) -> (State 5, expr) + (Rule: expr -> expr '+' expr) -> (State 9, expr) + (Rule: expr -> expr '+' expr) -> (State 10, expr) + (Rule: expr -> expr '+' expr) -> (State 11, expr) + (Rule: expr -> expr '+' expr) -> (State 12, expr) + + [Follow sets] + + [Look-Ahead Sets] + LF reduce using rule 5 (expr) + '+' reduce using rule 5 (expr) + '-' reduce using rule 5 (expr) + '*' reduce using rule 5 (expr) + '/' reduce using rule 5 (expr) + ')' reduce using rule 5 (expr) + State 15 @@ -228,6 +647,32 @@ State 15 Conflict between rule 6 and token '*' resolved as shift ('-' < '*'). Conflict between rule 6 and token '/' resolved as shift ('-' < '/'). + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> expr '-' expr) -> (State 1, expr) + (Rule: expr -> expr '-' expr) -> (State 5, expr) + (Rule: expr -> expr '-' expr) -> (State 9, expr) + (Rule: expr -> expr '-' expr) -> (State 10, expr) + (Rule: expr -> expr '-' expr) -> (State 11, expr) + (Rule: expr -> expr '-' expr) -> (State 12, expr) + + [Follow sets] + + [Look-Ahead Sets] + LF reduce using rule 6 (expr) + '+' reduce using rule 6 (expr) + '-' reduce using rule 6 (expr) + '*' reduce using rule 6 (expr) + '/' reduce using rule 6 (expr) + ')' reduce using rule 6 (expr) + State 16 @@ -244,6 +689,32 @@ State 16 Conflict between rule 7 and token '*' resolved as reduce (%left '*'). Conflict between rule 7 and token '/' resolved as reduce (%left '/'). + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> expr '*' expr) -> (State 1, expr) + (Rule: expr -> expr '*' expr) -> (State 5, expr) + (Rule: expr -> expr '*' expr) -> (State 9, expr) + (Rule: expr -> expr '*' expr) -> (State 10, expr) + (Rule: expr -> expr '*' expr) -> (State 11, expr) + (Rule: expr -> expr '*' expr) -> (State 12, expr) + + [Follow sets] + + [Look-Ahead Sets] + LF reduce using rule 7 (expr) + '+' reduce using rule 7 (expr) + '-' reduce using rule 7 (expr) + '*' reduce using rule 7 (expr) + '/' reduce using rule 7 (expr) + ')' reduce using rule 7 (expr) + State 17 @@ -260,4 +731,30 @@ State 17 Conflict between rule 8 and token '*' resolved as reduce (%left '*'). Conflict between rule 8 and token '/' resolved as reduce (%left '/'). + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> expr '/' expr) -> (State 1, expr) + (Rule: expr -> expr '/' expr) -> (State 5, expr) + (Rule: expr -> expr '/' expr) -> (State 9, expr) + (Rule: expr -> expr '/' expr) -> (State 10, expr) + (Rule: expr -> expr '/' expr) -> (State 11, expr) + (Rule: expr -> expr '/' expr) -> (State 12, expr) + + [Follow sets] + + [Look-Ahead Sets] + LF reduce using rule 8 (expr) + '+' reduce using rule 8 (expr) + '-' reduce using rule 8 (expr) + '*' reduce using rule 8 (expr) + '/' reduce using rule 8 (expr) + ')' reduce using rule 8 (expr) + diff --git a/sample/calc.y b/sample/calc.y index b4c4ca30..45466856 100644 --- a/sample/calc.y +++ b/sample/calc.y @@ -33,12 +33,6 @@ static int yyerror(YYLTYPE *loc, const char *str); %locations -%rule %inline op : '+' { + } - | '-' { - } - | '*' { * } - | '/' { / } - ; - %% list : /* empty */ @@ -46,7 +40,10 @@ list : /* empty */ | list expr LF { printf("=> %d\n", $2); } ; expr : NUM - | expr op expr { $$ = $1 $2 $3; } + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { $$ = $1 / $3; } | '(' expr ')' { $$ = $2; } ; diff --git a/sample/calc_inline.output b/sample/calc_inline.output new file mode 100644 index 00000000..5daa7eb0 --- /dev/null +++ b/sample/calc_inline.output @@ -0,0 +1,488 @@ +Rule Usage Frequency + + 0 expr (6 times) + 1 program (3 times) + 2 LF (2 times) + 3 '(' (1 times) + 4 ')' (1 times) + 5 NUM (1 times) + 6 PLUS (1 times) + 7 TIMES (1 times) + 8 YYEOF (1 times) + + +9 Terms + +3 Non-Terminals + +2 Unused Terms + + 0 YYerror + 1 YYUNDEF + + +Precedences + + precedence on PLUS is used to resolve conflict on + LALR + state 12. Conflict between reduce by "expr -> expr PLUS expr" and shift PLUS resolved as reduce (%left PLUS). + state 12. Conflict between reduce by "expr -> expr PLUS expr" and shift TIMES resolved as shift (PLUS < TIMES). + state 13. Conflict between reduce by "expr -> expr TIMES expr" and shift PLUS resolved as reduce (PLUS < TIMES). + + precedence on TIMES is used to resolve conflict on + LALR + state 12. Conflict between reduce by "expr -> expr PLUS expr" and shift TIMES resolved as shift (PLUS < TIMES). + state 13. Conflict between reduce by "expr -> expr TIMES expr" and shift PLUS resolved as reduce (PLUS < TIMES). + state 13. Conflict between reduce by "expr -> expr TIMES expr" and shift TIMES resolved as reduce (%left TIMES). + + +Grammar + + 0 $accept: program "end of file" + + 1 program: ε + 2 | program expr LF + 3 | program LF + + 4 expr: NUM + 5 | expr PLUS expr + 6 | expr TIMES expr + 7 | '(' expr ')' + + +State 0 + + 0 $accept: • program "end of file" + 1 program: ε • + 2 | • program expr LF + 3 | • program LF + + $default reduce using rule 1 (program) + + program go to state 1 + + [Direct Read sets] + read program shift YYEOF, NUM, LF, '(' + + [Reads Relation] + + [Read sets] + YYEOF + NUM + LF + '(' + + [Includes Relation] + + [Lookback Relation] + (Rule: program -> ε) -> (State 0, program) + + [Follow sets] + program -> YYEOF + program -> NUM + program -> LF + program -> '(' + + [Look-Ahead Sets] + YYEOF reduce using rule 1 (program) + NUM reduce using rule 1 (program) + LF reduce using rule 1 (program) + '(' reduce using rule 1 (program) + + +State 1 + + 0 $accept: program • "end of file" + 2 program: program • expr LF + 3 | program • LF + 4 expr: • NUM + 5 | • expr PLUS expr + 6 | • expr TIMES expr + 7 | • '(' expr ')' + + "end of file" shift, and go to state 2 + NUM shift, and go to state 3 + LF shift, and go to state 4 + '(' shift, and go to state 5 + + expr go to state 6 + + [Direct Read sets] + read expr shift PLUS, TIMES, LF + + [Reads Relation] + + [Read sets] + PLUS + TIMES + LF + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + expr -> PLUS + expr -> TIMES + expr -> LF + + [Look-Ahead Sets] + +State 2 + + 0 $accept: program "end of file" • + + $default accept + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + + [Look-Ahead Sets] + +State 3 + + 4 expr: NUM • + + $default reduce using rule 4 (expr) + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> NUM) -> (State 1, expr) + (Rule: expr -> NUM) -> (State 5, expr) + (Rule: expr -> NUM) -> (State 8, expr) + (Rule: expr -> NUM) -> (State 9, expr) + + [Follow sets] + + [Look-Ahead Sets] + PLUS reduce using rule 4 (expr) + TIMES reduce using rule 4 (expr) + LF reduce using rule 4 (expr) + ')' reduce using rule 4 (expr) + + +State 4 + + 3 program: program LF • + + $default reduce using rule 3 (program) + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: program -> program LF) -> (State 0, program) + + [Follow sets] + + [Look-Ahead Sets] + YYEOF reduce using rule 3 (program) + NUM reduce using rule 3 (program) + LF reduce using rule 3 (program) + '(' reduce using rule 3 (program) + + +State 5 + + 4 expr: • NUM + 5 | • expr PLUS expr + 6 | • expr TIMES expr + 7 | • '(' expr ')' + 7 | '(' • expr ')' + + NUM shift, and go to state 3 + '(' shift, and go to state 5 + + expr go to state 7 + + [Direct Read sets] + read expr shift PLUS, TIMES, ')' + + [Reads Relation] + + [Read sets] + PLUS + TIMES + ')' + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + expr -> PLUS + expr -> TIMES + expr -> ')' + + [Look-Ahead Sets] + +State 6 + + 2 program: program expr • LF + 5 expr: expr • PLUS expr + 6 | expr • TIMES expr + + PLUS shift, and go to state 8 + TIMES shift, and go to state 9 + LF shift, and go to state 10 + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + + [Look-Ahead Sets] + +State 7 + + 5 expr: expr • PLUS expr + 6 | expr • TIMES expr + 7 | '(' expr • ')' + + PLUS shift, and go to state 8 + TIMES shift, and go to state 9 + ')' shift, and go to state 11 + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + + [Follow sets] + + [Look-Ahead Sets] + +State 8 + + 4 expr: • NUM + 5 | • expr PLUS expr + 5 | expr PLUS • expr + 6 | • expr TIMES expr + 7 | • '(' expr ')' + + NUM shift, and go to state 3 + '(' shift, and go to state 5 + + expr go to state 12 + + [Direct Read sets] + read expr shift PLUS, TIMES + + [Reads Relation] + + [Read sets] + PLUS + TIMES + + [Includes Relation] + (State 8, expr) -> (State 1, expr) + (State 8, expr) -> (State 5, expr) + (State 8, expr) -> (State 8, expr) + (State 8, expr) -> (State 9, expr) + + [Lookback Relation] + + [Follow sets] + expr -> PLUS + expr -> TIMES + expr -> LF + expr -> ')' + + [Look-Ahead Sets] + +State 9 + + 4 expr: • NUM + 5 | • expr PLUS expr + 6 | • expr TIMES expr + 6 | expr TIMES • expr + 7 | • '(' expr ')' + + NUM shift, and go to state 3 + '(' shift, and go to state 5 + + expr go to state 13 + + [Direct Read sets] + read expr shift PLUS, TIMES + + [Reads Relation] + + [Read sets] + PLUS + TIMES + + [Includes Relation] + (State 9, expr) -> (State 1, expr) + (State 9, expr) -> (State 5, expr) + (State 9, expr) -> (State 8, expr) + (State 9, expr) -> (State 9, expr) + + [Lookback Relation] + + [Follow sets] + expr -> PLUS + expr -> TIMES + expr -> LF + expr -> ')' + + [Look-Ahead Sets] + +State 10 + + 2 program: program expr LF • + + $default reduce using rule 2 (program) + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: program -> program expr LF) -> (State 0, program) + + [Follow sets] + + [Look-Ahead Sets] + YYEOF reduce using rule 2 (program) + NUM reduce using rule 2 (program) + LF reduce using rule 2 (program) + '(' reduce using rule 2 (program) + + +State 11 + + 7 expr: '(' expr ')' • + + $default reduce using rule 7 (expr) + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> '(' expr ')') -> (State 1, expr) + (Rule: expr -> '(' expr ')') -> (State 5, expr) + (Rule: expr -> '(' expr ')') -> (State 8, expr) + (Rule: expr -> '(' expr ')') -> (State 9, expr) + + [Follow sets] + + [Look-Ahead Sets] + PLUS reduce using rule 7 (expr) + TIMES reduce using rule 7 (expr) + LF reduce using rule 7 (expr) + ')' reduce using rule 7 (expr) + + +State 12 + + 5 expr: expr • PLUS expr + 5 | expr PLUS expr • [PLUS, LF, ')'] + 6 | expr • TIMES expr + + TIMES shift, and go to state 9 + + $default reduce using rule 5 (expr) + + Conflict between rule 5 and token PLUS resolved as reduce (%left PLUS). + Conflict between rule 5 and token TIMES resolved as shift (PLUS < TIMES). + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> expr PLUS expr) -> (State 1, expr) + (Rule: expr -> expr PLUS expr) -> (State 5, expr) + (Rule: expr -> expr PLUS expr) -> (State 8, expr) + (Rule: expr -> expr PLUS expr) -> (State 9, expr) + + [Follow sets] + + [Look-Ahead Sets] + PLUS reduce using rule 5 (expr) + TIMES reduce using rule 5 (expr) + LF reduce using rule 5 (expr) + ')' reduce using rule 5 (expr) + + +State 13 + + 5 expr: expr • PLUS expr + 6 | expr • TIMES expr + 6 | expr TIMES expr • [PLUS, TIMES, LF, ')'] + + $default reduce using rule 6 (expr) + + Conflict between rule 6 and token PLUS resolved as reduce (PLUS < TIMES). + Conflict between rule 6 and token TIMES resolved as reduce (%left TIMES). + + [Direct Read sets] + + [Reads Relation] + + [Read sets] + + [Includes Relation] + + [Lookback Relation] + (Rule: expr -> expr TIMES expr) -> (State 1, expr) + (Rule: expr -> expr TIMES expr) -> (State 5, expr) + (Rule: expr -> expr TIMES expr) -> (State 8, expr) + (Rule: expr -> expr TIMES expr) -> (State 9, expr) + + [Follow sets] + + [Look-Ahead Sets] + PLUS reduce using rule 6 (expr) + TIMES reduce using rule 6 (expr) + LF reduce using rule 6 (expr) + ')' reduce using rule 6 (expr) + + diff --git a/sample/calc_inline.y b/sample/calc_inline.y new file mode 100644 index 00000000..9aee38b8 --- /dev/null +++ b/sample/calc_inline.y @@ -0,0 +1,107 @@ +/* + * %inline Use Case: Resolving Precedence Conflicts + * + * PROBLEM (see calc_no_inline.y): + * "expr op expr" has undefined precedence because 'op' is a nonterminal. + * Result: shift/reduce conflicts, and %left PLUS/%left TIMES are unused. + * + * $ lrama -W calc_no_inline.y + * => shift/reduce conflicts: 2 found + * => Precedence PLUS is defined but not used in any rule. + * => Precedence TIMES is defined but not used in any rule. + * + * SOLUTION (this file): + * With %inline, 'op' is expanded inline, producing: + * expr PLUS expr <- PLUS precedence applies + * expr TIMES expr <- TIMES precedence applies + * + * $ lrama -W calc_inline.y + * => no conflicts + * + * Build: + * $ lrama -d calc_inline.y -o calc_inline.c && gcc calc_inline.c -o calc_inline + */ + +%{ +#include +#include +#include +%} + +%code provides { +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +} + +%union { + int val; +} + +%token NUM +%token PLUS TIMES +%token LF + +%type expr + +%left PLUS +%left TIMES + +%locations + +%% + +program: /* empty */ + | program expr LF { printf("=> %d\n", $2); } + | program LF + ; + +/* + * KEY POINT: + * + * "expr op expr" has undefined precedence because 'op' is nonterminal. + * + * With %inline, this expands to: + * expr PLUS expr <- inherits PLUS precedence + * expr TIMES expr <- inherits TIMES precedence + */ +expr: NUM + | expr op expr { $$ = $2 ? $1 * $3 : $1 + $3; } + | '(' expr ')' { $$ = $2; } + ; + +/* + * Remove "%inline" here to see the conflict. + */ +%rule %inline op: PLUS { $$ = 0; } + | TIMES { $$ = 1; } + ; + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { + int c; + while ((c = getchar()) == ' ' || c == '\t'); + if (isdigit(c)) { + int val = c - '0'; + while (isdigit(c = getchar())) val = val * 10 + (c - '0'); + ungetc(c, stdin); + yylval->val = val; + return NUM; + } + if (c == '+') return PLUS; + if (c == '*') return TIMES; + if (c == '\n') return LF; + if (c == '(' || c == ')') return c; + if (c == EOF) exit(0); + return c; +} + +static int yyerror(YYLTYPE *loc, const char *s) { + fprintf(stderr, "%s\n", s); + return 0; +} + +int main(void) { + printf("Test: 1+2*3 should be 7, (1+2)*3 should be 9\n"); + return yyparse(); +} diff --git a/sample/calc_no_inline.output b/sample/calc_no_inline.output new file mode 100644 index 00000000..fcf082b6 --- /dev/null +++ b/sample/calc_no_inline.output @@ -0,0 +1,162 @@ +State 13 conflicts: 2 shift/reduce + + +Grammar + + 0 $accept: program "end of file" + + 1 program: ε + 2 | program expr LF + 3 | program LF + + 4 expr: NUM + 5 | expr op expr + 6 | '(' expr ')' + + 7 op: PLUS + 8 | TIMES + + +State 0 + + 0 $accept: • program "end of file" + + $default reduce using rule 1 (program) + + program go to state 1 + + +State 1 + + 0 $accept: program • "end of file" + 2 program: program • expr LF + 3 | program • LF + + "end of file" shift, and go to state 2 + NUM shift, and go to state 3 + LF shift, and go to state 4 + '(' shift, and go to state 5 + + expr go to state 6 + + +State 2 + + 0 $accept: program "end of file" • + + $default accept + + +State 3 + + 4 expr: NUM • + + $default reduce using rule 4 (expr) + + +State 4 + + 3 program: program LF • + + $default reduce using rule 3 (program) + + +State 5 + + 6 expr: '(' • expr ')' + + NUM shift, and go to state 3 + '(' shift, and go to state 5 + + expr go to state 7 + + +State 6 + + 2 program: program expr • LF + 5 expr: expr • op expr + + PLUS shift, and go to state 8 + TIMES shift, and go to state 9 + LF shift, and go to state 10 + + op go to state 11 + + +State 7 + + 5 expr: expr • op expr + 6 | '(' expr • ')' + + PLUS shift, and go to state 8 + TIMES shift, and go to state 9 + ')' shift, and go to state 12 + + op go to state 11 + + +State 8 + + 7 op: PLUS • + + $default reduce using rule 7 (op) + + +State 9 + + 8 op: TIMES • + + $default reduce using rule 8 (op) + + +State 10 + + 2 program: program expr LF • + + $default reduce using rule 2 (program) + + +State 11 + + 5 expr: expr op • expr + + NUM shift, and go to state 3 + '(' shift, and go to state 5 + + expr go to state 13 + + +State 12 + + 6 expr: '(' expr ')' • + + $default reduce using rule 6 (expr) + + +State 13 + + 5 expr: expr • op expr + 5 | expr op expr • + + Conflict on PLUS. shift/reduce(expr) + PLUS comes from state 1 goto by expr + PLUS comes from state 5 goto by expr + PLUS comes from state 11 goto by expr + + Conflict on TIMES. shift/reduce(expr) + TIMES comes from state 1 goto by expr + TIMES comes from state 5 goto by expr + TIMES comes from state 11 goto by expr + + + PLUS shift, and go to state 8 + TIMES shift, and go to state 9 + + PLUS reduce using rule 5 (expr) + TIMES reduce using rule 5 (expr) + LF reduce using rule 5 (expr) + ')' reduce using rule 5 (expr) + + op go to state 11 + + diff --git a/sample/calc_no_inline.y b/sample/calc_no_inline.y new file mode 100644 index 00000000..ff41a09c --- /dev/null +++ b/sample/calc_no_inline.y @@ -0,0 +1,94 @@ +/* + * %inline Use Case: This file demonstrates the PROBLEM. + * + * "expr op expr" has undefined precedence because 'op' is a nonterminal. + * Even though %left PLUS and %left TIMES are declared, they are NOT used. + * + * $ lrama -W calc_no_inline.y + * => shift/reduce conflicts: 2 found + * => Precedence PLUS is defined but not used in any rule. + * => Precedence TIMES is defined but not used in any rule. + * + * See calc_inline.y for the solution using %inline. + */ + +%{ +#include +#include +#include +%} + +%code provides { +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +} + +%union { + int val; +} + +%token NUM +%token PLUS TIMES +%token LF + +%type expr + +%left PLUS +%left TIMES + +%locations + +%% + +program: /* empty */ + | program expr LF { printf("=> %d\n", $2); } + | program LF + ; + +/* + * PROBLEM: "expr op expr" has undefined precedence. + * The rightmost terminal of this rule is... what? + * 'op' is nonterminal, so no precedence is inherited. + */ +expr: NUM + | expr op expr { $$ = $2 ? $1 * $3 : $1 + $3; } + | '(' expr ')' { $$ = $2; } + ; + +/* + * 'op' is a regular nonterminal - not inlined. + * This causes the precedence problem above. + */ +op: PLUS { $$ = 0; } + | TIMES { $$ = 1; } + ; + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { + int c; + while ((c = getchar()) == ' ' || c == '\t'); + if (isdigit(c)) { + int val = c - '0'; + while (isdigit(c = getchar())) val = val * 10 + (c - '0'); + ungetc(c, stdin); + yylval->val = val; + return NUM; + } + if (c == '+') return PLUS; + if (c == '*') return TIMES; + if (c == '\n') return LF; + if (c == '(' || c == ')') return c; + if (c == EOF) exit(0); + return c; +} + +static int yyerror(YYLTYPE *loc, const char *s) { + fprintf(stderr, "%s\n", s); + return 0; +} + +int main(void) { + printf("Test: 1+2*3 should be 7, but may be wrong due to conflicts\n"); + return yyparse(); +} diff --git a/sig/generated/lrama/grammar.rbs b/sig/generated/lrama/grammar.rbs index faab4f04..b50138fb 100644 --- a/sig/generated/lrama/grammar.rbs +++ b/sig/generated/lrama/grammar.rbs @@ -136,6 +136,8 @@ module Lrama attr_accessor required: bool + attr_accessor no_inline: bool + # @rbs (Counter rule_counter, bool locations, Hash[String, String] define) -> void def initialize: (Counter rule_counter, bool locations, Hash[String, String] define) -> void @@ -244,6 +246,9 @@ module Lrama # @rbs () -> Grammar::Symbol def append_special_symbols: () -> Grammar::Symbol + # @rbs () -> void + def validate_inline_rules: () -> void + # @rbs () -> void def resolve_inline_rules: () -> void diff --git a/sig/generated/lrama/grammar/inline/validator.rbs b/sig/generated/lrama/grammar/inline/validator.rbs new file mode 100644 index 00000000..74392b23 --- /dev/null +++ b/sig/generated/lrama/grammar/inline/validator.rbs @@ -0,0 +1,40 @@ +# Generated from lib/lrama/grammar/inline/validator.rb with RBS::Inline + +module Lrama + class Grammar + class Inline + # Validates inline rules according to Menhir specification. + # Detects: + # - Direct recursion (inline rule references itself) + # - Mutual recursion (inline rules reference each other in a cycle) + # - Start symbol declared as inline + class Validator + class RecursiveInlineError < StandardError + end + + class StartSymbolInlineError < StandardError + end + + # @rbs (Lrama::Grammar::Parameterized::Resolver parameterized_resolver, Lexer::Token::Base? start_nterm) -> void + def initialize: (Lrama::Grammar::Parameterized::Resolver parameterized_resolver, Lexer::Token::Base? start_nterm) -> void + + # @rbs () -> void + def validate!: () -> void + + private + + # @rbs () -> Array[Lrama::Grammar::Parameterized::Rule] + def collect_inline_rules: () -> Array[Lrama::Grammar::Parameterized::Rule] + + # @rbs (Array[Lrama::Grammar::Parameterized::Rule] inline_rules) -> void + def validate_no_start_symbol_inline: (Array[Lrama::Grammar::Parameterized::Rule] inline_rules) -> void + + # @rbs (Array[Lrama::Grammar::Parameterized::Rule] inline_rules) -> void + def validate_no_recursion: (Array[Lrama::Grammar::Parameterized::Rule] inline_rules) -> void + + # @rbs (Lrama::Grammar::Parameterized::Rule rule, Set[String] inline_names, Set[String] visited) -> void + def check_recursion: (Lrama::Grammar::Parameterized::Rule rule, Set[String] inline_names, Set[String] visited) -> void + end + end + end +end diff --git a/sig/generated/lrama/options.rbs b/sig/generated/lrama/options.rbs index 48ece486..a4913ccd 100644 --- a/sig/generated/lrama/options.rbs +++ b/sig/generated/lrama/options.rbs @@ -37,6 +37,8 @@ module Lrama attr_accessor profile_opts: Hash[Symbol, bool]? + attr_accessor no_inline: bool + # @rbs () -> void def initialize: () -> void end diff --git a/spec/fixtures/inlining/basic.y b/spec/fixtures/inlining/basic.y index 1db03da3..cc24da79 100644 --- a/spec/fixtures/inlining/basic.y +++ b/spec/fixtures/inlining/basic.y @@ -15,10 +15,10 @@ static int yyerror(YYLTYPE *loc, const char *str); %token NUM %type expression -%rule %inline op : '+' { + } - | '-' { - } - | '*' { * } - | '/' { / } +%rule %inline op : '+' { $$ = '+'; } + | '-' { $$ = '-'; } + | '*' { $$ = '*'; } + | '/' { $$ = '/'; } ; %% @@ -29,8 +29,8 @@ expression : NUM ; -%rule %inline other_op : '%' { + 1 + } - | '&' { - 1 - } +%rule %inline other_op : '%' { $$ = '%'; } + | '&' { $$ = '&'; } ; %% diff --git a/spec/fixtures/inlining/menhir_style.y b/spec/fixtures/inlining/menhir_style.y new file mode 100644 index 00000000..4d091e14 --- /dev/null +++ b/spec/fixtures/inlining/menhir_style.y @@ -0,0 +1,48 @@ +/* + * Test case for Menhir-style inline action merging. + * When inline action contains $$, variable binding is used. + */ + +%{ +// Prologue +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int i; + int (*fn)(int, int); +} + +%token NUM +%type expression + +/* Menhir-style: inline action returns a value via $$ */ +%rule %inline op: '+' { printf("plus\n"); $$ = add; } + | '*' { printf("times\n"); $$ = mul; } + ; + +%% + +expression: NUM { $$ = $1; } + | expression op expression { $$ = $2($1, $3); } + ; + +%% + +static int add(int a, int b) { return a + b; } +static int mul(int a, int b) { return a * b; } + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) +{ + return 0; +} + +static int yyerror(YYLTYPE *loc, const char *str) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ +} diff --git a/spec/fixtures/inlining/precedence_test.y b/spec/fixtures/inlining/precedence_test.y new file mode 100644 index 00000000..65ad0a12 --- /dev/null +++ b/spec/fixtures/inlining/precedence_test.y @@ -0,0 +1,54 @@ +/* + * Test case for precedence inheritance through inline expansion. + * This is the core use case of Menhir's %inline feature. + * + * Problem: expression op expression has undefined precedence because + * op is a nonterminal. + * + * Solution: %inline op expands to put PLUS/TIMES directly in the rule, + * allowing precedence to be inherited from the terminal. + */ + +%{ +// Prologue +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int i; +} + +%token NUM +%token PLUS TIMES + +%left PLUS +%left TIMES + +%type expression + +%rule %inline op: PLUS { $$ = add; } + | TIMES { $$ = mul; } + ; + +%% + +expression: NUM { $$ = $1; } + | expression op expression { $$ = $1 $2 $3; } + ; + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) +{ + return 0; +} + +static int yyerror(YYLTYPE *loc, const char *str) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ +} diff --git a/spec/fixtures/inlining/recursive_direct.y b/spec/fixtures/inlining/recursive_direct.y new file mode 100644 index 00000000..1acb807a --- /dev/null +++ b/spec/fixtures/inlining/recursive_direct.y @@ -0,0 +1,41 @@ +/* + * Test case for direct recursive inline rule. + * This should raise an error. + */ + +%{ +// Prologue +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int i; +} + +%token NUM + +%rule %inline a: NUM + | a '+' NUM + ; + +%% + +expression: a + ; + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) +{ + return 0; +} + +static int yyerror(YYLTYPE *loc, const char *str) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ +} diff --git a/spec/fixtures/inlining/recursive_mutual.y b/spec/fixtures/inlining/recursive_mutual.y new file mode 100644 index 00000000..b1ca3d2f --- /dev/null +++ b/spec/fixtures/inlining/recursive_mutual.y @@ -0,0 +1,45 @@ +/* + * Test case for mutual recursive inline rules. + * This should raise an error. + */ + +%{ +// Prologue +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int i; +} + +%token NUM + +%rule %inline a: NUM + | b '+' NUM + ; + +%rule %inline b: NUM + | a '-' NUM + ; + +%% + +expression: a + ; + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) +{ + return 0; +} + +static int yyerror(YYLTYPE *loc, const char *str) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ +} diff --git a/spec/fixtures/inlining/resolve_index.y b/spec/fixtures/inlining/resolve_index.y index 827f7964..23075164 100644 --- a/spec/fixtures/inlining/resolve_index.y +++ b/spec/fixtures/inlining/resolve_index.y @@ -15,8 +15,8 @@ static int yyerror(YYLTYPE *loc, const char *str); %token NUM %type expression -%rule %inline op : '+' { + } - | '+' '=' { += } +%rule %inline op : '+' { $$ = '+'; } + | '+' '=' { $$ = '+='; } ; %% diff --git a/spec/fixtures/inlining/resolve_index_at.y b/spec/fixtures/inlining/resolve_index_at.y index 8979e4c2..d33957c8 100644 --- a/spec/fixtures/inlining/resolve_index_at.y +++ b/spec/fixtures/inlining/resolve_index_at.y @@ -15,8 +15,8 @@ static int yyerror(YYLTYPE *loc, const char *str); %token NUM %type expression -%rule %inline op : '+' { + } - | '+' '=' { += } +%rule %inline op : '+' { $$ = '+'; } + | '+' '=' { $$ = '+='; } ; %% diff --git a/spec/fixtures/inlining/resolve_index_reverse.y b/spec/fixtures/inlining/resolve_index_reverse.y index 55a3e113..afda6e3d 100644 --- a/spec/fixtures/inlining/resolve_index_reverse.y +++ b/spec/fixtures/inlining/resolve_index_reverse.y @@ -15,8 +15,8 @@ static int yyerror(YYLTYPE *loc, const char *str); %token NUM %type expression -%rule %inline op : '+' { + } - | '+' '=' { += } +%rule %inline op : '+' { $$ = '+'; } + | '+' '=' { $$ = '+='; } ; %% diff --git a/spec/fixtures/inlining/rhs_include_parameterized.y b/spec/fixtures/inlining/rhs_include_parameterized.y index 40ba582d..2cebe711 100644 --- a/spec/fixtures/inlining/rhs_include_parameterized.y +++ b/spec/fixtures/inlining/rhs_include_parameterized.y @@ -15,8 +15,8 @@ static int yyerror(YYLTYPE *loc, const char *str); %token NUM %type expression -%rule %inline op : '+' { + } - | option('-') '-' { - } +%rule %inline op : '+' { $$ = '+'; } + | option('-') '-' { $$ = '-'; } ; %% diff --git a/spec/fixtures/inlining/with_parameters.y b/spec/fixtures/inlining/with_parameters.y index 39b743f8..25e6cef2 100644 --- a/spec/fixtures/inlining/with_parameters.y +++ b/spec/fixtures/inlining/with_parameters.y @@ -15,8 +15,8 @@ static int yyerror(YYLTYPE *loc, const char *str); %token NUM %type expression -%rule %inline op(p, m) : p { + } - | m { - } +%rule %inline op(p, m) : p { $$ = '+'; } + | m { $$ = '-'; } ; %% diff --git a/spec/lrama/grammar_spec.rb b/spec/lrama/grammar_spec.rb index 3be8eab4..61d00dda 100644 --- a/spec/lrama/grammar_spec.rb +++ b/spec/lrama/grammar_spec.rb @@ -243,4 +243,32 @@ end end end + + describe '#no_inline' do + it 'defaults to false' do + expect(grammar.no_inline).to be false + end + + it 'can be set to true' do + grammar.no_inline = true + expect(grammar.no_inline).to be true + end + + context 'when no_inline is true' do + it 'skips inline rule validation and resolution during prepare' do + grammar.no_inline = true + + # Mock the parameterized_resolver to have an inline rule with recursion + # This would normally raise an error, but with no_inline=true it should be skipped + allow(grammar).to receive(:validate_inline_rules) + allow(grammar).to receive(:resolve_inline_rules) + + # prepare should not call validate_inline_rules or resolve_inline_rules + grammar.no_inline = true + + expect(grammar).not_to receive(:validate_inline_rules) + expect(grammar).not_to receive(:resolve_inline_rules) + end + end + end end diff --git a/spec/lrama/option_parser_spec.rb b/spec/lrama/option_parser_spec.rb index 7675c4da..1cc01ad5 100644 --- a/spec/lrama/option_parser_spec.rb +++ b/spec/lrama/option_parser_spec.rb @@ -70,6 +70,9 @@ Error Recovery: -e enable error recovery + Grammar Processing: + --no-inline ignore all %inline keywords + Other options: -V, --version output version information and exit -h, --help display this help and exit diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index 9b2365ab..cc56188f 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -2759,7 +2759,7 @@ grammar.find_symbol_by_s_value!("'+'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 + $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'+'"), lineno: 27, @@ -2772,7 +2772,7 @@ grammar.find_symbol_by_s_value!("'-'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 - $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '-'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'-'"), lineno: 27, @@ -2785,7 +2785,7 @@ grammar.find_symbol_by_s_value!("'*'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 * $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '*'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'*'"), lineno: 27, @@ -2798,7 +2798,7 @@ grammar.find_symbol_by_s_value!("'/'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 / $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '/'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'/'"), lineno: 27, @@ -2811,7 +2811,7 @@ grammar.find_symbol_by_s_value!("'%'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 + 1 + $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '%'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'%'"), lineno: 28, @@ -2824,7 +2824,7 @@ grammar.find_symbol_by_s_value!("'&'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 - 1 - $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '&'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'&'"), lineno: 28, @@ -2875,7 +2875,7 @@ grammar.find_symbol_by_s_value!("'+'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 + $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'+'"), lineno: 25, @@ -2909,7 +2909,7 @@ grammar.find_symbol_by_s_value!("'-'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 - $4; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '-'; } $$ = $1 _inline_2 $4; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'-'"), lineno: 25, @@ -2959,7 +2959,7 @@ grammar.find_symbol_by_s_value!("'+'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 + $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'+'"), lineno: 25, @@ -2972,7 +2972,7 @@ grammar.find_symbol_by_s_value!("'-'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 - $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '-'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'-'"), lineno: 25, @@ -3091,7 +3091,7 @@ grammar.find_symbol_by_s_value!("'+'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 + $3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+'; } $$ = $1 _inline_2 $3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'+'"), lineno: 25, @@ -3105,7 +3105,7 @@ grammar.find_symbol_by_s_value!("'='"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $1 += $4; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+='; } $$ = $1 _inline_2 $4; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'='"), lineno: 25, @@ -3155,7 +3155,7 @@ grammar.find_symbol_by_s_value!("'+'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = @1 + @3; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+'; } $$ = @1 _inline_2 @3; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'+'"), lineno: 25, @@ -3169,7 +3169,7 @@ grammar.find_symbol_by_s_value!("'='"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = @1 += @4; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+='; } $$ = @1 _inline_2 @4; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'='"), lineno: 25, @@ -3219,7 +3219,7 @@ grammar.find_symbol_by_s_value!("'+'"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $3 + $1; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+'; } $$ = $3 _inline_2 $1; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'+'"), lineno: 25, @@ -3233,7 +3233,7 @@ grammar.find_symbol_by_s_value!("'='"), grammar.find_symbol_by_s_value!("expression"), ], - token_code: T::UserCode.new(s_value: " $$ = $4 += $1; "), + token_code: T::UserCode.new(s_value: " YYSTYPE _inline_2; { _inline_2 = '+='; } $$ = $4 _inline_2 $1; "), nullable: false, precedence_sym: grammar.find_symbol_by_s_value!("'='"), lineno: 25, @@ -3241,6 +3241,77 @@ ]) end end + + context 'when inline has direct recursion' do + let(:path) { "inlining/recursive_direct.y" } + + it "raises RecursiveInlineError" do + expect { grammar }.to raise_error( + Lrama::Grammar::Inline::Validator::RecursiveInlineError, + /Recursive inline definition detected/ + ) + end + end + + context 'when inline has mutual recursion' do + let(:path) { "inlining/recursive_mutual.y" } + + it "raises RecursiveInlineError" do + expect { grammar }.to raise_error( + Lrama::Grammar::Inline::Validator::RecursiveInlineError, + /Recursive inline definition detected/ + ) + end + end + + context 'when inline expansion for precedence resolution (Menhir core use case)' do + let(:path) { "inlining/precedence_test.y" } + + it "expands inline rules and inherits correct precedence from terminals" do + expect(grammar.rules.size).to eq(4) + + plus_rule = grammar.rules.find do |r| + r.rhs.any? { |s| s.id.s_value == "PLUS" } + end + + times_rule = grammar.rules.find do |r| + r.rhs.any? { |s| s.id.s_value == "TIMES" } + end + + expect(plus_rule).not_to be_nil + expect(plus_rule.precedence_sym.id.s_value).to eq("PLUS") + + expect(times_rule).not_to be_nil + expect(times_rule.precedence_sym.id.s_value).to eq("TIMES") + end + end + + context 'when inline action contains $$ (Menhir-style variable binding)' do + let(:path) { "inlining/menhir_style.y" } + + it "uses variable binding instead of macro substitution" do + expect(grammar.rules.size).to eq(4) + + plus_rule = grammar.rules.find do |r| + r.rhs.any? { |s| s.id.s_value == "'+'" } + end + + times_rule = grammar.rules.find do |r| + r.rhs.any? { |s| s.id.s_value == "'*'" } + end + + expect(plus_rule).not_to be_nil + expect(times_rule).not_to be_nil + + expect(plus_rule.token_code.s_value).to include("YYSTYPE _inline_2") + expect(plus_rule.token_code.s_value).to include("_inline_2 = add") + expect(plus_rule.token_code.s_value).to include("$$ = _inline_2($1, $3)") + + expect(times_rule.token_code.s_value).to include("YYSTYPE _inline_2") + expect(times_rule.token_code.s_value).to include("_inline_2 = mul") + expect(times_rule.token_code.s_value).to include("$$ = _inline_2($1, $3)") + end + end end it "; for rules is optional" do