From dfd6280cfdfedbc0158f97685350ce0b99566e68 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 28 May 2021 10:11:05 +0200 Subject: [PATCH 01/15] Fix for detecting left recursion in rules like: indexing_list = indexing_element | indexing_list ',' indexing_element . --- src/Tab.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Tab.java b/src/Tab.java index b423437..1981614 100644 --- a/src/Tab.java +++ b/src/Tab.java @@ -1026,17 +1026,17 @@ public CNode (Symbol l, Symbol r) { } } - void GetSingles(Node p, ArrayList singles) { + void GetSingles(Node p, ArrayList singles, Node rule) { if (p == null) return; // end of graph if (p.typ == Node.nt) { - if (p.up || DelGraph(p.next)) singles.add(p.sym); + if (p.up || DelGraph(p.next) || p.sym.graph == rule) singles.add(p.sym); } else if (p.typ == Node.alt || p.typ == Node.iter || p.typ == Node.opt) { if (p.up || DelGraph(p.next)) { - GetSingles(p.sub, singles); - if (p.typ == Node.alt) GetSingles(p.down, singles); + GetSingles(p.sub, singles, rule); + if (p.typ == Node.alt) GetSingles(p.down, singles, rule); } } - if (!p.up && DelNode(p)) GetSingles(p.next, singles); + if (!p.up && DelNode(p)) GetSingles(p.next, singles, rule); } public boolean NoCircularProductions() { @@ -1045,7 +1045,7 @@ public boolean NoCircularProductions() { for (int i = 0; i < nonterminals.size(); i++) { Symbol sym = (Symbol)nonterminals.get(i); ArrayList singles = new ArrayList(); - GetSingles(sym.graph, singles); // get nonterminals s such that sym-->s + GetSingles(sym.graph, singles, sym.graph); // get nonterminals s such that sym-->s for (int j = 0; j < singles.size(); j++) { Symbol s = (Symbol)singles.get(j); list.add(new CNode(sym, s)); From 493068c005a72caf449bbae8239b5ee50eaa32c9 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 28 May 2021 10:25:38 +0200 Subject: [PATCH 02/15] Fix possible mistake mixing boolean comparison with bitwise operation --- src/DFA.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DFA.java b/src/DFA.java index e145698..767c1e6 100644 --- a/src/DFA.java +++ b/src/DFA.java @@ -487,7 +487,7 @@ void DeleteRedundantStates() { for (State s1 = firstState.next; s1 != null; s1 = s1.next) // firstState cannot be final if (used.get(s1.nr) && s1.endOf != null && s1.firstAction == null && !s1.ctx) for (State s2 = s1.next; s2 != null; s2 = s2.next) - if (used.get(s2.nr) && s1.endOf == s2.endOf && s2.firstAction == null & !s2.ctx) { + if (used.get(s2.nr) && s1.endOf == s2.endOf && s2.firstAction == null && !s2.ctx) { used.set(s2.nr, false); newState[s2.nr] = s1; } for (State state = firstState; state != null; state = state.next) From e1bd24ac0aeeae574f16e2f0ff849f128edc2d0a Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 16:33:37 +0200 Subject: [PATCH 03/15] Start implement my CocoR extensions here --- src/Coco.atg | 85 ++++++----- src/Coco.java | 10 ++ src/DFA.java | 5 +- src/Parser.java | 341 +++++++++++++++++++++++---------------------- src/ParserGen.java | 132 +++++++++++++++++- src/Scanner.java | 20 +-- src/Tab.java | 40 ++++-- 7 files changed, 400 insertions(+), 233 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 2b6be89..02afa8e 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -37,7 +37,7 @@ COMPILER Coco static final int id = 0; static final int str = 1; - + public Trace trace; // other Coco objects referenced by this ATG public Tab tab; public DFA dfa; @@ -83,19 +83,19 @@ IGNORE cr + lf + tab PRODUCTIONS -Coco (. Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg; .) +Coco (. Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg, line; .) = [ // import statements - ANY (. beg = t.pos; .) - { ANY } (. pgen.usingPos = new Position(beg, la.pos, 0); .) + ANY (. beg = t.pos; line = t.line; .) + { ANY } (. pgen.usingPos = new Position(beg, la.pos, 0, line); .) ] - "COMPILER" (. genScanner = true; + "COMPILER" (. genScanner = true; tab.ignored = new CharSet(); .) ident (. gramName = t.val; - beg = la.pos; + beg = la.pos; line = la.line; .) - { ANY } (. tab.semDeclPos = new Position(beg, la.pos, 0); .) + { ANY } (. tab.semDeclPos = new Position(beg, la.pos, 0, line); .) [ "IGNORECASE" (. dfa.ignoreCase = true; .) ] /* pdt */ [ "CHARACTERS" { SetDecl }] [ "TOKENS" { TokenDecl }] @@ -157,7 +157,16 @@ Coco (. Symbol sym; Graph g, g1, g2; String gramName; System.out.println("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + boolean doGenCode = false; + if(tab.ignoreErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } + if (doGenCode) { System.out.print("parser"); pgen.WriteParser(); if (genScanner) { @@ -263,20 +272,20 @@ TokenDecl (. SymInfo s; Symbol sym; Graph g; .) /*------------------------------------------------------------------------------------*/ -AttrDecl (. int beg, col; .) -= +AttrDecl (. int beg, col, line; .) += '<' // attributes denoted by < ... > ( ('^' | "out") (. beg = la.pos; .) TypeName (. sym.retType = scanner.buffer.GetString(beg, la.pos); .) ident (. sym.retVar = t.val; .) ( '>' - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY } '>' (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ANY { ANY } ] '>' (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ) | "<." // attributes denoted by <. ... .> @@ -284,13 +293,13 @@ AttrDecl (. int beg, col; .) TypeName (. sym.retType = scanner.buffer.GetString(beg, la.pos); .) ident (. sym.retVar = t.val; .) ( ".>" - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY } ".>" (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ANY { ANY } ] ".>" (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ). /*------------------------------------------------------------------------------------*/ @@ -322,9 +331,9 @@ Term (. Graph g2; Node rslv = null; g = null; .) .) { Factor (. tab.MakeSequence(g, g2); .) } -| (. g = new Graph(tab.NewNode(Node.eps, null, 0)); .) +| (. g = new Graph(tab.NewNode(Node.eps, null, t.line)); .) ) (. if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line)); .) . @@ -370,18 +379,18 @@ Factor (. SymInfo s; Position pos; boolean weak = false | '(' Expression ')' | '[' Expression ']' (. tab.MakeOption(g); .) | '{' Expression '}' (. tab.MakeIteration(g); .) -| SemText (. Node p = tab.NewNode(Node.sem, null, 0); +| SemText (. Node p = tab.NewNode(Node.sem, null, t.line); p.pos = pos; g = new Graph(p); .) | "ANY" (. Node p = tab.NewNode(Node.any, null, t.line); // p.set is set in tab.SetupAnys g = new Graph(p); .) -| "SYNC" (. Node p = tab.NewNode(Node.sync, null, 0); +| "SYNC" (. Node p = tab.NewNode(Node.sync, null, t.line); g = new Graph(p); .) ) (. if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line)); .) . @@ -389,8 +398,8 @@ Factor (. SymInfo s; Position pos; boolean weak = false Resolver = - "IF" "(" (. int beg = la.pos; int col = la.col; .) - Condition (. pos = new Position(beg, t.pos, col); .) + "IF" "(" (. int beg = la.pos; int col = la.col; int line = la.line; .) + Condition (. pos = new Position(beg, t.pos, col, line); .) . /*------------------------------------------------------------------------------------*/ @@ -431,7 +440,7 @@ TokenFactor (. SymInfo s; .) ( Sym (. if (s.kind == id) { CharClass c = tab.FindCharClass(s.name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + s.name); c = tab.NewCharClass(s.name, new CharSet()); } Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; @@ -447,7 +456,7 @@ TokenFactor (. SymInfo s; .) | '[' TokenExpr ']' (. tab.MakeOption(g); tokenString = noString; .) | '{' TokenExpr '}' (. tab.MakeIteration(g); tokenString = noString; .) ) (. if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); .) + g = new Graph(tab.NewNode(Node.eps, null, t.line)); .) . /*------------------------------------------------------------------------------------*/ @@ -466,7 +475,7 @@ Sym /*------------------------------------------------------------------------------------*/ -Attribs (. int beg, col; .) +Attribs (. int beg, col, line; .) = '<' // attributes denoted by < ... > ( ('^' | "out") (. beg = la.pos; .) @@ -475,19 +484,19 @@ Attribs (. int beg, col; .) | badString (. SemErr("bad string in attributes"); .) } (. n.retVar = scanner.buffer.GetString(beg, la.pos); .) ( '>' - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY | badString (. SemErr("bad string in attributes"); .) - } '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + } '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ( ANY | badString (. SemErr("bad string in attributes"); .) ) { ANY | badString (. SemErr("bad string in attributes"); .) } - ] '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + ] '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) | "<." // attributes denoted by <. ... .> @@ -497,19 +506,19 @@ Attribs (. int beg, col; .) | badString (. SemErr("bad string in attributes"); .) } (. n.retVar = scanner.buffer.GetString(beg, la.pos); .) ( ".>" - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY | badString (. SemErr("bad string in attributes"); .) - } ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + } ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ( ANY | badString (. SemErr("bad string in attributes"); .) ) { ANY | badString (. SemErr("bad string in attributes"); .) } - ] ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + ] ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) . @@ -522,12 +531,12 @@ Bracketed SemText = - "(." (. int beg = la.pos; int col = la.col; .) + "(." (. int beg = la.pos; int col = la.col; int line = la.line; .) { ANY | badString (. SemErr("bad string in semantic action"); .) | "(." (. SemErr("missing end of previous semantic action"); .) } - ".)" (. pos = new Position(beg, t.pos, col); .) + ".)" (. pos = new Position(beg, t.pos, col, line); .) . END Coco. diff --git a/src/Coco.java b/src/Coco.java index b8d1271..46373d1 100644 --- a/src/Coco.java +++ b/src/Coco.java @@ -50,12 +50,16 @@ public class Coco { public static void main (String[] arg) { System.out.println("Coco/R (Apr 15, 2013)"); String srcName = null, nsName = null, frameDir = null, ddtString = null, outDir = null; + boolean ignoreErrors = false, genAST = false, genRREBNF = false; int retVal = 1; for (int i = 0; i < arg.length; i++) { if (arg[i].equals("-package") && i < arg.length - 1) nsName = arg[++i].trim(); else if (arg[i].equals("-frames") && i < arg.length - 1) frameDir = arg[++i].trim(); else if (arg[i].equals("-trace") && i < arg.length - 1) ddtString = arg[++i].trim(); else if (arg[i].equals("-o") && i < arg.length - 1) outDir = arg[++i].trim(); + else if (arg[i].equals("-genAST")) genAST = true; + else if (arg[i].equals("-genRREBNF")) genRREBNF = true; + else if (arg[i].equals("-ignoreErrors")) ignoreErrors = true; else srcName = arg[i]; } if (arg.length > 0 && srcName != null) { @@ -75,6 +79,9 @@ public static void main (String[] arg) { parser.tab.nsName = nsName; parser.tab.frameDir = frameDir; parser.tab.outDir = (outDir != null) ? outDir : srcDir; + parser.tab.genAST = genAST; + parser.tab.genRREBNF = genRREBNF; + parser.tab.ignoreErrors = ignoreErrors; if (ddtString != null) parser.tab.SetDDT(ddtString); parser.Parse(); @@ -93,6 +100,9 @@ public static void main (String[] arg) { " -frames \n" + " -trace \n" + " -o \n" + + " -genRREBNF\n" + + " -genAST\n" + + " -ignoreErrors ignore grammar errors for developing purposes\n" + "Valid characters in the trace string:\n" + " A trace automaton\n" + " F list first/follow sets\n" + diff --git a/src/DFA.java b/src/DFA.java index 767c1e6..322c9c4 100644 --- a/src/DFA.java +++ b/src/DFA.java @@ -750,10 +750,11 @@ public void PrintStates() { boolean first = true; if (state.endOf == null) trace.Write(" "); else trace.Write("E(" + tab.Name(state.endOf.name) + ")", 12); - trace.Write(state.nr + ":", 3); + trace.Write(state.nr + "", 3); + trace.Write(":"); if (state.firstAction == null) trace.WriteLine(); for (Action action = state.firstAction; action != null; action = action.next) { - if (first) {trace.Write(" "); first = false;} else trace.Write(" "); + if (first) {trace.Write(" "); first = false;} else trace.Write(" "); if (action.typ == Node.clas) trace.Write(((CharClass)tab.classes.get(action.sym)).name); else trace.Write(Ch((char)action.sym), 3); diff --git a/src/Parser.java b/src/Parser.java index bd48758..5910748 100644 --- a/src/Parser.java +++ b/src/Parser.java @@ -5,26 +5,26 @@ ported from C# to Java by Wolfgang Ahorner with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. --------------------------------------------------------------------------*/ +------------------------------------------------------------------------*/ package Coco; public class Parser { @@ -51,7 +51,7 @@ public class Parser { static final int id = 0; static final int str = 1; - + public Trace trace; // other Coco objects referenced by this ATG public Tab tab; public DFA dfa; @@ -89,10 +89,10 @@ void Get () { break; } - if (la.kind == 45) { + if (la.kind == _ddtSym) { tab.SetDDT(la.val); } - if (la.kind == 46) { + if (la.kind == _optionSym) { tab.SetOption(la.val); } la = t; @@ -130,72 +130,72 @@ boolean WeakSeparator (int n, int syFol, int repFol) { } void Coco() { - Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg; + Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg, line; if (StartOf(1)) { Get(); - beg = t.pos; + beg = t.pos; line = t.line; while (StartOf(1)) { Get(); } - pgen.usingPos = new Position(beg, la.pos, 0); + pgen.usingPos = new Position(beg, la.pos, 0, line); } - Expect(6); - genScanner = true; + Expect(6 /* "COMPILER" */); + genScanner = true; tab.ignored = new CharSet(); - Expect(1); + Expect(_ident); gramName = t.val; - beg = la.pos; + beg = la.pos; line = la.line; while (StartOf(2)) { Get(); } - tab.semDeclPos = new Position(beg, la.pos, 0); - if (la.kind == 7) { + tab.semDeclPos = new Position(beg, la.pos, 0, line); + if (la.kind == 7 /* "IGNORECASE" */) { Get(); dfa.ignoreCase = true; } - if (la.kind == 8) { + if (la.kind == 8 /* "CHARACTERS" */) { Get(); - while (la.kind == 1) { + while (la.kind == _ident) { SetDecl(); } } - if (la.kind == 9) { + if (la.kind == 9 /* "TOKENS" */) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { + while (la.kind == _ident || la.kind == _string || la.kind == _char) { TokenDecl(Node.t); } } - if (la.kind == 10) { + if (la.kind == 10 /* "PRAGMAS" */) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { + while (la.kind == _ident || la.kind == _string || la.kind == _char) { TokenDecl(Node.pr); } } - while (la.kind == 11) { + while (la.kind == 11 /* "COMMENTS" */) { Get(); boolean nested = false; - Expect(12); + Expect(12 /* "FROM" */); g1 = TokenExpr(); - Expect(13); + Expect(13 /* "TO" */); g2 = TokenExpr(); - if (la.kind == 14) { + if (la.kind == 14 /* "NESTED" */) { Get(); nested = true; } dfa.NewComment(g1.l, g2.l, nested); } - while (la.kind == 15) { + while (la.kind == 15 /* "IGNORE" */) { Get(); s = Set(); tab.ignored.Or(s); } - while (!(la.kind == 0 || la.kind == 16)) {SynErr(45); Get();} - Expect(16); + while (!(la.kind == _EOF || la.kind == 16 /* "PRODUCTIONS" */)) {SynErr(45); Get();} + Expect(16 /* "PRODUCTIONS" */); if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); - while (la.kind == 1) { + while (la.kind == _ident) { Get(); sym = tab.FindSym(t.val); boolean undef = sym == null; @@ -211,7 +211,7 @@ void Coco() { boolean noRet = sym.retVar==null; sym.retVar = null; - if (la.kind == 24 || la.kind == 29) { + if (la.kind == 24 /* "<" */ || la.kind == 29 /* "<." */) { AttrDecl(sym); } if (!undef) @@ -219,18 +219,18 @@ void Coco() { || noRet != (sym.retVar == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - if (la.kind == 42) { + if (la.kind == 42 /* "(." */) { sym.semPos = SemText(); } - ExpectWeak(17, 3); + ExpectWeak(17 /* "=" */, 3); g = Expression(); sym.graph = g.l; tab.Finish(g); - ExpectWeak(18, 4); + ExpectWeak(18 /* "." */, 4); } - Expect(19); - Expect(1); + Expect(19 /* "END" */); + Expect(_ident); if (gramName.compareTo(t.val) != 0) SemErr("name does not match grammar name"); tab.gramSy = tab.FindSym(gramName); @@ -249,7 +249,16 @@ void Coco() { System.out.println("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + boolean doGenCode = false; + if(tab.ignoreErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } + if (doGenCode) { System.out.print("parser"); pgen.WriteParser(); if (genScanner) { @@ -263,22 +272,22 @@ void Coco() { } if (tab.ddt[6]) tab.PrintSymbolTable(); - Expect(18); + Expect(18 /* "." */); } void SetDecl() { CharSet s; - Expect(1); + Expect(_ident); String name = t.val; CharClass c = tab.FindCharClass(name); if (c != null) SemErr("name declared twice"); - Expect(17); + Expect(17 /* "=" */); s = Set(); if (s.Elements() == 0) SemErr("character set must not be empty"); c = tab.NewCharClass(name, s); - Expect(18); + Expect(18 /* "." */); } void TokenDecl(int typ) { @@ -293,10 +302,10 @@ void TokenDecl(int typ) { tokenString = null; while (!(StartOf(5))) {SynErr(46); Get();} - if (la.kind == 17) { + if (la.kind == 17 /* "=" */) { Get(); g = TokenExpr(); - Expect(18); + Expect(18 /* "." */); if (s.kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); if (tokenString == null || tokenString.equals(noString)) @@ -313,7 +322,7 @@ void TokenDecl(int typ) { else dfa.MatchLiteral(sym.name, sym); } else SynErr(47); - if (la.kind == 42) { + if (la.kind == 42 /* "(." */) { sym.semPos = SemText(); if (typ != Node.pr) SemErr("semantic action not allowed here"); } @@ -337,8 +346,8 @@ CharSet Set() { CharSet s; CharSet s2; s = SimSet(); - while (la.kind == 20 || la.kind == 21) { - if (la.kind == 20) { + while (la.kind == 20 /* "+" */ || la.kind == 21 /* "-" */) { + if (la.kind == 20 /* "+" */) { Get(); s2 = SimSet(); s.Or(s2); @@ -352,11 +361,11 @@ CharSet Set() { } void AttrDecl(Symbol sym) { - int beg, col; - if (la.kind == 24) { + int beg, col, line; + if (la.kind == 24 /* "<" */) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { + if (la.kind == 25 /* "^" */) { Get(); } else { Get(); @@ -364,36 +373,36 @@ void AttrDecl(Symbol sym) { beg = la.pos; TypeName(); sym.retType = scanner.buffer.GetString(beg, la.pos); - Expect(1); + Expect(_ident); sym.retVar = t.val; - if (la.kind == 27) { + if (la.kind == 27 /* ">" */) { Get(); - } else if (la.kind == 28) { + } else if (la.kind == 28 /* "," */) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(9)) { Get(); } - Expect(27); + Expect(27 /* ">" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); + sym.attrPos = new Position(beg, t.pos, col, line); } else SynErr(48); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(11)) { Get(); while (StartOf(9)) { Get(); } } - Expect(27); + Expect(27 /* ">" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); + sym.attrPos = new Position(beg, t.pos, col, line); } else SynErr(49); - } else if (la.kind == 29) { + } else if (la.kind == 29 /* "<." */) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { + if (la.kind == 25 /* "^" */) { Get(); } else { Get(); @@ -401,43 +410,43 @@ void AttrDecl(Symbol sym) { beg = la.pos; TypeName(); sym.retType = scanner.buffer.GetString(beg, la.pos); - Expect(1); + Expect(_ident); sym.retVar = t.val; - if (la.kind == 30) { + if (la.kind == 30 /* ".>" */) { Get(); - } else if (la.kind == 28) { + } else if (la.kind == 28 /* "," */) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(12)) { Get(); } - Expect(30); + Expect(30 /* ".>" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); + sym.attrPos = new Position(beg, t.pos, col, line); } else SynErr(50); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(13)) { Get(); while (StartOf(12)) { Get(); } } - Expect(30); + Expect(30 /* ".>" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); + sym.attrPos = new Position(beg, t.pos, col, line); } else SynErr(51); } else SynErr(52); } Position SemText() { Position pos; - Expect(42); - int beg = la.pos; int col = la.col; + Expect(42 /* "(." */); + int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(14)) { if (StartOf(15)) { Get(); - } else if (la.kind == 4) { + } else if (la.kind == _badString) { Get(); SemErr("bad string in semantic action"); } else { @@ -445,8 +454,8 @@ Position SemText() { SemErr("missing end of previous semantic action"); } } - Expect(43); - pos = new Position(beg, t.pos, col); + Expect(43 /* ".)" */); + pos = new Position(beg, t.pos, col, line); return pos; } @@ -468,27 +477,27 @@ CharSet SimSet() { CharSet s; int n1, n2; s = new CharSet(); - if (la.kind == 1) { + if (la.kind == _ident) { Get(); CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); - } else if (la.kind == 3) { + } else if (la.kind == _string) { Get(); String name = t.val; name = tab.Unescape(name.substring(1, name.length()-1)); for (int i = 0; i < name.length(); i++) if (dfa.ignoreCase) s.Set(Character.toLowerCase(name.charAt(i))); else s.Set(name.charAt(i)); - } else if (la.kind == 5) { + } else if (la.kind == _char) { n1 = Char(); s.Set(n1); - if (la.kind == 22) { + if (la.kind == 22 /* ".." */) { Get(); n2 = Char(); for (int i = n1; i <= n2; i++) s.Set(i); } - } else if (la.kind == 23) { + } else if (la.kind == 23 /* "ANY" */) { Get(); s = new CharSet(); s.Fill(); } else SynErr(53); @@ -497,7 +506,7 @@ CharSet SimSet() { int Char() { int n; - Expect(5); + Expect(_char); String name = t.val; n = 0; name = tab.Unescape(name.substring(1, name.length()-1)); if (name.length() == 1) n = name.charAt(0); @@ -510,11 +519,11 @@ int Char() { SymInfo Sym() { SymInfo s; s = new SymInfo(); s.name = "???"; s.kind = id; - if (la.kind == 1) { + if (la.kind == _ident) { Get(); s.kind = id; s.name = t.val; - } else if (la.kind == 3 || la.kind == 5) { - if (la.kind == 3) { + } else if (la.kind == _string || la.kind == _char) { + if (la.kind == _string) { Get(); s.name = t.val; } else { @@ -530,22 +539,22 @@ SymInfo Sym() { } void TypeName() { - Expect(1); - while (la.kind == 18 || la.kind == 24 || la.kind == 31) { - if (la.kind == 18) { + Expect(_ident); + while (la.kind == 18 /* "." */ || la.kind == 24 /* "<" */ || la.kind == 31 /* "[" */) { + if (la.kind == 18 /* "." */) { Get(); - Expect(1); - } else if (la.kind == 31) { + Expect(_ident); + } else if (la.kind == 31 /* "[" */) { Get(); - Expect(32); + Expect(32 /* "]" */); } else { Get(); TypeName(); - while (la.kind == 28) { + while (la.kind == 28 /* "," */) { Get(); TypeName(); } - Expect(27); + Expect(27 /* ">" */); } } } @@ -554,7 +563,7 @@ Graph Term() { Graph g; Graph g2; Node rslv = null; g = null; if (StartOf(18)) { - if (la.kind == 40) { + if (la.kind == 40 /* "IF" */) { rslv = tab.NewNode(Node.rslv, null, la.line); rslv.pos = Resolver(); g = new Graph(rslv); @@ -568,21 +577,21 @@ Graph Term() { tab.MakeSequence(g, g2); } } else if (StartOf(20)) { - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line)); } else SynErr(55); if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line)); return g; } Position Resolver() { Position pos; - Expect(40); - Expect(35); - int beg = la.pos; int col = la.col; + Expect(40 /* "IF" */); + Expect(35 /* "(" */); + int beg = la.pos; int col = la.col; int line = la.line; Condition(); - pos = new Position(beg, t.pos, col); + pos = new Position(beg, t.pos, col, line); return pos; } @@ -591,8 +600,8 @@ Graph Factor() { SymInfo s; Position pos; boolean weak = false; g = null; switch (la.kind) { - case 1: case 3: case 5: case 34: { - if (la.kind == 34) { + case _ident: case _string: case _char: case 34 /* "WEAK" */: { + if (la.kind == 34 /* "WEAK" */) { Get(); weak = true; } @@ -621,7 +630,7 @@ else if (genScanner) { Node p = tab.NewNode(typ, sym, t.line); g = new Graph(p); - if (la.kind == 24 || la.kind == 29) { + if (la.kind == 24 /* "<" */ || la.kind == 29 /* "<." */) { Attribs(p); if (s.kind != id) SemErr("a literal must not have attributes"); } @@ -634,44 +643,44 @@ else if (genScanner) { break; } - case 35: { + case 35 /* "(" */: { Get(); g = Expression(); - Expect(36); + Expect(36 /* ")" */); break; } - case 31: { + case 31 /* "[" */: { Get(); g = Expression(); - Expect(32); + Expect(32 /* "]" */); tab.MakeOption(g); break; } - case 37: { + case 37 /* "{" */: { Get(); g = Expression(); - Expect(38); + Expect(38 /* "}" */); tab.MakeIteration(g); break; } - case 42: { + case 42 /* "(." */: { pos = SemText(); - Node p = tab.NewNode(Node.sem, null, 0); + Node p = tab.NewNode(Node.sem, null, t.line); p.pos = pos; g = new Graph(p); break; } - case 23: { + case 23 /* "ANY" */: { Get(); Node p = tab.NewNode(Node.any, null, t.line); // p.set is set in tab.SetupAnys g = new Graph(p); break; } - case 39: { + case 39 /* "SYNC" */: { Get(); - Node p = tab.NewNode(Node.sync, null, 0); + Node p = tab.NewNode(Node.sync, null, t.line); g = new Graph(p); break; @@ -679,17 +688,17 @@ else if (genScanner) { default: SynErr(56); break; } if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line)); return g; } void Attribs(Node n) { - int beg, col; - if (la.kind == 24) { + int beg, col, line; + if (la.kind == 24 /* "<" */) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { + if (la.kind == 25 /* "^" */) { Get(); } else { Get(); @@ -698,7 +707,7 @@ void Attribs(Node n) { while (StartOf(21)) { if (StartOf(22)) { Get(); - } else if (la.kind == 31 || la.kind == 35) { + } else if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { Bracketed(); } else { Get(); @@ -706,11 +715,11 @@ void Attribs(Node n) { } } n.retVar = scanner.buffer.GetString(beg, la.pos); - if (la.kind == 27) { + if (la.kind == 27 /* ">" */) { Get(); - } else if (la.kind == 28) { + } else if (la.kind == 28 /* "," */) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(9)) { if (StartOf(23)) { Get(); @@ -719,11 +728,11 @@ void Attribs(Node n) { SemErr("bad string in attributes"); } } - Expect(27); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); + Expect(27 /* ">" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); } else SynErr(57); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(11)) { if (StartOf(24)) { Get(); @@ -740,13 +749,13 @@ void Attribs(Node n) { } } } - Expect(27); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); + Expect(27 /* ">" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); } else SynErr(58); - } else if (la.kind == 29) { + } else if (la.kind == 29 /* "<." */) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { + if (la.kind == 25 /* "^" */) { Get(); } else { Get(); @@ -755,7 +764,7 @@ void Attribs(Node n) { while (StartOf(25)) { if (StartOf(26)) { Get(); - } else if (la.kind == 31 || la.kind == 35) { + } else if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { Bracketed(); } else { Get(); @@ -763,11 +772,11 @@ void Attribs(Node n) { } } n.retVar = scanner.buffer.GetString(beg, la.pos); - if (la.kind == 30) { + if (la.kind == 30 /* ".>" */) { Get(); - } else if (la.kind == 28) { + } else if (la.kind == 28 /* "," */) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(12)) { if (StartOf(27)) { Get(); @@ -776,11 +785,11 @@ void Attribs(Node n) { SemErr("bad string in attributes"); } } - Expect(30); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); + Expect(30 /* ".>" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); } else SynErr(59); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(13)) { if (StartOf(28)) { Get(); @@ -797,22 +806,22 @@ void Attribs(Node n) { } } } - Expect(30); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); + Expect(30 /* ".>" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); } else SynErr(60); } else SynErr(61); } void Condition() { while (StartOf(29)) { - if (la.kind == 35) { + if (la.kind == 35 /* "(" */) { Get(); Condition(); } else { Get(); } } - Expect(36); + Expect(36 /* ")" */); } Graph TokenTerm() { @@ -823,13 +832,13 @@ Graph TokenTerm() { g2 = TokenFactor(); tab.MakeSequence(g, g2); } - if (la.kind == 41) { + if (la.kind == 41 /* "CONTEXT" */) { Get(); - Expect(35); + Expect(35 /* "(" */); g2 = TokenExpr(); tab.SetContextTrans(g2.l); dfa.hasCtxMoves = true; tab.MakeSequence(g, g2); - Expect(36); + Expect(36 /* ")" */); } return g; } @@ -838,12 +847,12 @@ Graph TokenFactor() { Graph g; SymInfo s; g = null; - if (la.kind == 1 || la.kind == 3 || la.kind == 5) { + if (la.kind == _ident || la.kind == _string || la.kind == _char) { s = Sym(); if (s.kind == id) { CharClass c = tab.FindCharClass(s.name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + s.name); c = tab.NewCharClass(s.name, new CharSet()); } Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; @@ -855,47 +864,47 @@ Graph TokenFactor() { else tokenString = noString; } - } else if (la.kind == 35) { + } else if (la.kind == 35 /* "(" */) { Get(); g = TokenExpr(); - Expect(36); - } else if (la.kind == 31) { + Expect(36 /* ")" */); + } else if (la.kind == 31 /* "[" */) { Get(); g = TokenExpr(); - Expect(32); + Expect(32 /* "]" */); tab.MakeOption(g); tokenString = noString; - } else if (la.kind == 37) { + } else if (la.kind == 37 /* "{" */) { Get(); g = TokenExpr(); - Expect(38); + Expect(38 /* "}" */); tab.MakeIteration(g); tokenString = noString; } else SynErr(62); if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line)); return g; } void Bracketed() { - if (la.kind == 35) { + if (la.kind == 35 /* "(" */) { Get(); while (StartOf(29)) { - if (la.kind == 31 || la.kind == 35) { + if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { Bracketed(); } else { Get(); } } - Expect(36); - } else if (la.kind == 31) { + Expect(36 /* ")" */); + } else if (la.kind == 31 /* "[" */) { Get(); while (StartOf(30)) { - if (la.kind == 31 || la.kind == 35) { + if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { Bracketed(); } else { Get(); } } - Expect(32); + Expect(32 /* "]" */); } else SynErr(63); } diff --git a/src/ParserGen.java b/src/ParserGen.java index 00e1e54..ffb0151 100644 --- a/src/ParserGen.java +++ b/src/ParserGen.java @@ -40,6 +40,11 @@ import java.io.FileWriter; /* pdt */ import java.util.ArrayList; import java.util.BitSet; +import java.util.Comparator; +import java.util.Hashtable; +import java.util.Map; +import java.util.TreeMap; +import java.util.Iterator; public class ParserGen { @@ -80,10 +85,18 @@ boolean Overlaps (BitSet s1, BitSet s2) { for (int i = 0; i < len; ++i) { if (s1.get(i) && s2.get(i)) { return true; + } } + return false; } - return false; - } + + void WriteSymbolOrCode(Symbol sym) { + if (!Character.isLetter(sym.name.charAt(0))) { + gen.print(sym.n + " /* " + sym.name + " */"); + } else { + gen.print("_" + sym.name); + } + } // AW: use a switch if more than 5 alternatives and none starts with a resolver, no LL1 warning boolean UseSwitch (Node p) { @@ -158,7 +171,8 @@ else if (n <= maxTerm) { for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); if (s.get(sym.n)) { - gen.print("la.kind == " + sym.n); + gen.print("la.kind == "); + WriteSymbolOrCode(sym); --n; if (n > 0) gen.print(" || "); } @@ -171,7 +185,11 @@ else if (n <= maxTerm) { void PutCaseLabels (BitSet s) { for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); - if (s.get(sym.n)) gen.print("case " + sym.n + ": "); + if (s.get(sym.n)) { + gen.print("case "); + WriteSymbolOrCode(sym); + gen.print(": "); + } } } @@ -192,14 +210,20 @@ void GenCode (Node p, int indent, BitSet isChecked) { Indent(indent); // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n if (isChecked.get(p.sym.n)) gen.println("Get();"); - else gen.println("Expect(" + p.sym.n + ");"); + else { + gen.print("Expect("); + WriteSymbolOrCode(p.sym); + gen.println(");"); + } break; } case Node.wt: { Indent(indent); s1 = tab.Expected(p.next, curSy); s1.or(tab.allSyncSets); - gen.println("ExpectWeak(" + p.sym.n + ", " + NewCondSet(s1) + ");"); + gen.print("ExpectWeak("); + WriteSymbolOrCode(p.sym); + gen.println(", " + NewCondSet(s1) + ");"); break; } case Node.any: { @@ -324,7 +348,9 @@ void GenCodePragmas() { for (int i = 0; i < tab.pragmas.size(); i++) { Symbol sym = (Symbol)tab.pragmas.get(i); gen.println(); - gen.println("\t\t\tif (la.kind == " + sym.n + ") {"); + gen.print("\t\t\tif (la.kind == "); + WriteSymbolOrCode(sym); + gen.println(") {"); CopySourcePart(sym.semPos, 4); gen.print ("\t\t\t}"); } @@ -413,6 +439,98 @@ protected void OnWriteParserInitializationDone() { // nothing to do } + public int GenCodeRREBNF (Node p) { + int rc = 0; + Node p2; + while (p != null) { + switch (p.typ) { + case Node.nt: + case Node.t: { + gen.print(p.sym.name); + gen.print(" "); + ++rc; + break; + } + case Node.wt: { + break; + } + case Node.any: { + gen.print("ANY "); + break; + } + case Node.eps: break; // nothing + case Node.rslv: break; // nothing + case Node.sem: { + break; + } + case Node.sync: { + break; + } + case Node.alt: { + gen.print("( "); + p2 = p; + while (p2 != null) { + rc += GenCodeRREBNF(p2.sub); + p2 = p2.down; + if(p2 != null) gen.print("| "); + } + gen.print(") "); + break; + } + case Node.iter: { + gen.print("( "); + rc += GenCodeRREBNF(p.sub); + gen.print(")* "); + break; + } + case Node.opt: + gen.print("( "); + rc += GenCodeRREBNF(p.sub); + gen.print(")? "); + break; + } + if (p.up) break; + p = p.next; + } + return rc; + } + + public void WriteRREBNF () { + Generator g = new Generator(tab); + gen = g.OpenGen("Parser.ebnf"); + + gen.print("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n"); + gen.print("\n//\n// productions\n//\n\n"); + for (int i = 0; i < tab.nonterminals.size(); i++) { + Symbol sym = (Symbol)tab.nonterminals.get(i); + gen.print(sym.name + " ::= "); + if(GenCodeRREBNF(sym.graph) == 0) { + gen.print("\"??()??\""); + } + gen.print("\n"); + } + gen.print("\n//\n// tokens\n//\n\n"); + for (int i = 0; i < tab.terminals.size(); i++) { + Symbol sym = (Symbol)tab.terminals.get(i); + if (Character.isLetter(sym.name.charAt(0))) { // real name value is stored in Tab.literals + java.util.Iterator iter = tab.literals.entrySet().iterator(); + Map.Entry me = null; + //foreach (DictionaryEntry e in literals) { + while (iter.hasNext()) { + me = (Map.Entry)iter.next(); + Symbol hsym = (Symbol)me.getValue(); + if (hsym == sym) { + gen.print(sym.name + " ::= " + me.getKey() + "\n"); + break; + } + } + } else { + //gen.print(sym.n + " /* " + sym.name + " */"); + } + } + gen.close(); + } + public void WriteStatistics () { trace.WriteLine(); trace.WriteLine(tab.terminals.size() + " terminals"); diff --git a/src/Scanner.java b/src/Scanner.java index 21a14d9..fd73a13 100644 --- a/src/Scanner.java +++ b/src/Scanner.java @@ -5,26 +5,26 @@ ported from C# to Java by Wolfgang Ahorner with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. --------------------------------------------------------------------------*/ +------------------------------------------------------------------------*/ package Coco; import java.io.InputStream; diff --git a/src/Tab.java b/src/Tab.java index 1981614..8400989 100644 --- a/src/Tab.java +++ b/src/Tab.java @@ -42,9 +42,10 @@ class Position { // position of source code stretch (e.g. semantic action, resol public final int beg; // start relative to the beginning of the file public final int end; // end of stretch public final int col; // column number of start position + public final int line; // line number of start position - public Position(int beg, int end, int col) { - this.beg = beg; this.end = end; this.col = col; + public Position(int beg, int end, int col, int line) { + this.beg = beg; this.end = end; this.col = col; this.line = line; } } @@ -204,6 +205,9 @@ public class Tab { public Position semDeclPos; // position of global semantic declarations public CharSet ignored; // characters ignored by the scanner public boolean[] ddt = new boolean[10]; // debug and test switches + public boolean genAST = false; // generate parser tree generation code + public boolean genRREBNF = false; //generate EBNF for railroad diagram + public boolean ignoreErrors = false; // ignore grammar errors for developing purposes public Symbol gramSy; // root nonterminal; filled by ATG public Symbol eofSy; // end of file symbol public Symbol noSym; // used in case of an error @@ -278,7 +282,7 @@ int Num(Node p) { void PrintSym(Symbol sym) { trace.Write(Integer.toString(sym.n), 3); - trace.Write(" "); + trace.Write(" "); trace.Write(Name(sym.name), -14); trace.Write(" "); trace.Write(nTyp[sym.typ], 2); @@ -295,7 +299,7 @@ void PrintSym(Symbol sym) { public void PrintSymbolTable() { trace.WriteLine("Symbol Table:"); trace.WriteLine("------------"); trace.WriteLine(); - trace.WriteLine(" nr name typ hasAt graph del line tokenKind"); + trace.WriteLine(" nr name typ hasAt graph del line tokenKind"); //foreach (Symbol sym in Symbol.terminals) for (int i = 0; i < terminals.size(); i++) { PrintSym((Symbol)terminals.get(i)); @@ -401,7 +405,9 @@ public void MakeSequence(Graph g1, Graph g2) { } public void MakeIteration(Graph g) { + int line = g.l.line; g.l = NewNode(Node.iter, g.l); + g.l.line = line; g.r.up = true; Node p = g.r; g.r = g.l; @@ -412,7 +418,9 @@ public void MakeIteration(Graph g) { } public void MakeOption(Graph g) { + int line = g.l.line; g.l = NewNode(Node.opt, g.l); + g.l.line = line; g.r.up = true; g.l.next = g.r; g.r = g.l; @@ -479,7 +487,7 @@ public boolean DelNode(Node p) { String Ptr(Node p, boolean up) { String ptr = (p == null) ? "0" : Integer.toString(p.n); - return (up) ? ("-" + ptr) : ptr; + return (up && (ptr != "0")) ? ("-" + ptr) : ptr; } String Pos(Position pos) { @@ -540,7 +548,7 @@ public void PrintNodes() { trace.Write(" "); trace.Write(Pos(p.pos), 5); break; - case Node.eps: case Node.any: case Node.sync: + case Node.eps: case Node.any: case Node.sync: case Node.rslv: trace.Write(" "); break; } trace.WriteLine(Integer.toString(p.line), 5); @@ -603,7 +611,8 @@ public void WriteCharClasses () { //foreach (CharClass c in classes) { for (int i = 0; i < classes.size(); i++) { CharClass c = (CharClass)classes.get(i); - trace.Write(c.name + ": ", -10); + trace.Write(c.name, -10); + trace.Write(": "); WriteCharSet(c.set); trace.WriteLine(); } @@ -913,8 +922,8 @@ public void CompSymbolSets() { Node p = (Node)nodes.get(i); if (p.typ == Node.any || p.typ == Node.sync) { trace.Write("Line: "); - trace.WriteLine(Integer.toString(p.line), 4); - trace.Write("Node: "); + trace.Write(Integer.toString(p.line), 4); + trace.Write(" Node: "); trace.Write(Integer.toString(p.n), 4); trace.Write(" "); trace.Write(nTyp[p.typ], 4); @@ -1010,12 +1019,23 @@ public String Escape (String s) { public boolean GrammarOk() { boolean ok = NtsComplete() + && AllNtReached() && NoCircularProductions() && AllNtToTerm(); - if (ok) { AllNtReached(); CheckResolvers(); CheckLL1(); } + if (ok) { CheckResolvers(); CheckLL1(); } return ok; } + public boolean GrammarCheckAll() { + int errors = 0; + if(!NtsComplete()) ++errors; + if(!AllNtReached()) ++errors; + if(!NoCircularProductions()) ++errors; + if(!AllNtToTerm()) ++errors; + CheckResolvers(); CheckLL1(); + return errors == 0; + } + //--------------- check for circular productions ---------------------- class CNode { // node of list for finding circular productions From 1a8b2bbe18c65a7379d86c586604f2dee944c389 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 17:37:12 +0200 Subject: [PATCH 04/15] More custom extensions implemented --- src/Coco.atg | 32 ++++++------ src/DFA.java | 16 ++++-- src/Parser.java | 95 +++++++++++++++++++++++++++------ src/ParserGen.java | 22 +++++++- src/Scanner.frame | 8 ++- src/Scanner.java | 88 ++++++++++++++++--------------- src/Tab.java | 127 +++++++++++++++++++++++++++++++++++++-------- 7 files changed, 285 insertions(+), 103 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 02afa8e..832666a 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -115,7 +115,7 @@ Coco (. Symbol sym; Graph g, g1, g2; String gramName; .) { ident (. sym = tab.FindSym(t.val); boolean undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -149,7 +149,7 @@ Coco (. Symbol sym; Graph g, g1, g2; String gramName; if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -245,7 +245,7 @@ TokenDecl (. SymInfo s; Symbol sym; Graph g; .) Sym (. sym = tab.FindSym(s.name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, s.name, t.line); + sym = tab.NewSym(typ, s.name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; @@ -266,7 +266,7 @@ TokenDecl (. SymInfo s; Symbol sym; Graph g; .) else dfa.MatchLiteral(sym.name, sym); .) ) - [ SemText (. if (typ != Node.pr) SemErr("semantic action not allowed here"); .) + [ SemText (. if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); .) //SemErr("semantic action not allowed here"); .) ] . @@ -323,7 +323,7 @@ Expression (. Graph g2; .) Term (. Graph g2; Node rslv = null; g = null; .) = -( [ (. rslv = tab.NewNode(Node.rslv, null, la.line); .) +( [ (. rslv = tab.NewNode(Node.rslv, null, la.line, la.col); .) Resolver (. g = new Graph(rslv); .) ] Factor (. if (rslv != null) tab.MakeSequence(g, g2); @@ -331,9 +331,9 @@ Term (. Graph g2; Node rslv = null; g = null; .) .) { Factor (. tab.MakeSequence(g, g2); .) } -| (. g = new Graph(tab.NewNode(Node.eps, null, t.line)); .) +| (. g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) ) (. if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, t.line)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . @@ -350,9 +350,9 @@ Factor (. SymInfo s; Position pos; boolean weak = false boolean undef = sym == null; if (undef) { if (s.kind == id) - sym = tab.NewSym(Node.nt, s.name, 0); // forward nt + sym = tab.NewSym(Node.nt, s.name, 0, 0); // forward nt else if (genScanner) { - sym = tab.NewSym(Node.t, s.name, t.line); + sym = tab.NewSym(Node.t, s.name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -365,7 +365,7 @@ Factor (. SymInfo s; Position pos; boolean weak = false if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); .) [ Attribs

(. if (s.kind != id) SemErr("a literal must not have attributes"); .) @@ -379,18 +379,18 @@ Factor (. SymInfo s; Position pos; boolean weak = false | '(' Expression ')' | '[' Expression ']' (. tab.MakeOption(g); .) | '{' Expression '}' (. tab.MakeIteration(g); .) -| SemText (. Node p = tab.NewNode(Node.sem, null, t.line); +| SemText (. Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); .) -| "ANY" (. Node p = tab.NewNode(Node.any, null, t.line); // p.set is set in tab.SetupAnys +| "ANY" (. Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); .) -| "SYNC" (. Node p = tab.NewNode(Node.sync, null, t.line); +| "SYNC" (. Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); .) ) (. if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, t.line)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . @@ -443,7 +443,7 @@ TokenFactor (. SymInfo s; .) SemErr("undefined name: " + s.name); c = tab.NewCharClass(s.name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, 0, 0); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -456,7 +456,7 @@ TokenFactor (. SymInfo s; .) | '[' TokenExpr ']' (. tab.MakeOption(g); tokenString = noString; .) | '{' TokenExpr '}' (. tab.MakeIteration(g); tokenString = noString; .) ) (. if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, t.line)); .) + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ diff --git a/src/DFA.java b/src/DFA.java index 322c9c4..c3df2f6 100644 --- a/src/DFA.java +++ b/src/DFA.java @@ -952,7 +952,7 @@ void WriteState(State state) { Symbol endOf = state.endOf; gen.println("\t\t\t\tcase " + state.nr + ":"); if (endOf != null && state.firstAction != null) { - gen.println("\t\t\t\t\trecEnd = pos; recKind = " + endOf.n + ";"); + gen.println("\t\t\t\t\trecEnd = pos; recKind = " + endOf.n + " /* " + endOf.name + " */;"); } boolean ctxEnd = state.ctx; for (Action action = state.firstAction; action != null; action = action.next) { @@ -981,10 +981,15 @@ void WriteState(State state) { if (endOf == null) { gen.println("state = 0; break;}"); } else { - gen.print("t.kind = " + endOf.n + "; "); + gen.print("t.kind = " + endOf.n + " /* " + endOf.name + " */; "); if (endOf.tokenKind == Symbol.classLitToken) { gen.println("t.val = new String(tval, 0, tlen); CheckLiteral(); return t;}"); } else { + if(endOf.semPos != null && endOf.typ == Node.t) { + gen.print(" {"); + parser.pgen.CopySourcePart(parser, gen, endOf.semPos, 0); + gen.print("};"); + } gen.println("break loop;}"); } } @@ -1050,11 +1055,11 @@ public void WriteScanner() { gen.println("\t\tval = val.toLowerCase();"); } g.CopyFramePart("-->scan1"); - gen.print("\t\t\t"); + gen.print("\t\t\t\t"); if (tab.ignored.Elements() > 0) { PutRange(tab.ignored); } else { gen.print("false"); } g.CopyFramePart("-->scan2"); if (firstComment != null) { - gen.print("\t\tif ("); + gen.print("\t\t\tif ("); com = firstComment; comIdx = 0; while (com != null) { gen.print(ChCond(com.start.charAt(0))); @@ -1062,8 +1067,9 @@ public void WriteScanner() { if (com.next != null) gen.print(" ||"); com = com.next; comIdx++; } - gen.print(") return NextToken();"); + gen.print(") continue;"); } + g.CopyFramePart("-->scan22"); if (hasCtxMoves) { gen.println(); gen.print("\t\tint apx = 0;"); } /* pdt */ g.CopyFramePart("-->scan3"); for (State state = firstState.next; state != null; state = state.next) diff --git a/src/Parser.java b/src/Parser.java index 5910748..88dca60 100644 --- a/src/Parser.java +++ b/src/Parser.java @@ -28,12 +28,75 @@ package Coco; public class Parser { + //non terminals + public static final int _NT_Coco = 0; + public static final int _NT_SetDecl = 1; + public static final int _NT_TokenDecl = 2; + public static final int _NT_TokenExpr = 3; + public static final int _NT_Set = 4; + public static final int _NT_AttrDecl = 5; + public static final int _NT_SemText = 6; + public static final int _NT_Expression = 7; + public static final int _NT_SimSet = 8; + public static final int _NT_Char = 9; + public static final int _NT_Sym = 10; + public static final int _NT_TypeName = 11; + public static final int _NT_Term = 12; + public static final int _NT_Resolver = 13; + public static final int _NT_Factor = 14; + public static final int _NT_Attribs = 15; + public static final int _NT_Condition = 16; + public static final int _NT_TokenTerm = 17; + public static final int _NT_TokenFactor = 18; + public static final int _NT_Bracketed = 19; + public static final int maxNT = 19; + //terminals public static final int _EOF = 0; public static final int _ident = 1; public static final int _number = 2; public static final int _string = 3; public static final int _badString = 4; public static final int _char = 5; +// public static final int _("COMPILER") = 6; +// public static final int _("IGNORECASE") = 7; +// public static final int _("CHARACTERS") = 8; +// public static final int _("TOKENS") = 9; +// public static final int _("PRAGMAS") = 10; +// public static final int _("COMMENTS") = 11; +// public static final int _("FROM") = 12; +// public static final int _("TO") = 13; +// public static final int _("NESTED") = 14; +// public static final int _("IGNORE") = 15; +// public static final int _("PRODUCTIONS") = 16; +// public static final int _("=") = 17; +// public static final int _(".") = 18; +// public static final int _("END") = 19; +// public static final int _("+") = 20; +// public static final int _("-") = 21; +// public static final int _("..") = 22; +// public static final int _("ANY") = 23; +// public static final int _("<") = 24; +// public static final int _("^") = 25; +// public static final int _("out") = 26; +// public static final int _(">") = 27; +// public static final int _(",") = 28; +// public static final int _("<.") = 29; +// public static final int _(".>") = 30; +// public static final int _("[") = 31; +// public static final int _("]") = 32; +// public static final int _("|") = 33; +// public static final int _("WEAK") = 34; +// public static final int _("(") = 35; +// public static final int _(")") = 36; +// public static final int _("{") = 37; +// public static final int _("}") = 38; +// public static final int _("SYNC") = 39; +// public static final int _("IF") = 40; +// public static final int _("CONTEXT") = 41; +// public static final int _("(.") = 42; +// public static final int _(".)") = 43; +// public static final int _(???) = 44; + //non terminals public static final int maxT = 44; public static final int _ddtSym = 45; public static final int _optionSym = 46; @@ -199,7 +262,7 @@ void Coco() { Get(); sym = tab.FindSym(t.val); boolean undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -241,7 +304,7 @@ void Coco() { if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -296,7 +359,7 @@ void TokenDecl(int typ) { sym = tab.FindSym(s.name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, s.name, t.line); + sym = tab.NewSym(typ, s.name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; @@ -324,7 +387,7 @@ void TokenDecl(int typ) { } else SynErr(47); if (la.kind == 42 /* "(." */) { sym.semPos = SemText(); - if (typ != Node.pr) SemErr("semantic action not allowed here"); + if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); } } @@ -564,7 +627,7 @@ Graph Term() { Graph g2; Node rslv = null; g = null; if (StartOf(18)) { if (la.kind == 40 /* "IF" */) { - rslv = tab.NewNode(Node.rslv, null, la.line); + rslv = tab.NewNode(Node.rslv, null, la.line, la.col); rslv.pos = Resolver(); g = new Graph(rslv); } @@ -577,10 +640,10 @@ Graph Term() { tab.MakeSequence(g, g2); } } else if (StartOf(20)) { - g = new Graph(tab.NewNode(Node.eps, null, t.line)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); } else SynErr(55); if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, t.line)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } @@ -612,9 +675,9 @@ Graph Factor() { boolean undef = sym == null; if (undef) { if (s.kind == id) - sym = tab.NewSym(Node.nt, s.name, 0); // forward nt + sym = tab.NewSym(Node.nt, s.name, 0, 0); // forward nt else if (genScanner) { - sym = tab.NewSym(Node.t, s.name, t.line); + sym = tab.NewSym(Node.t, s.name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -627,7 +690,7 @@ else if (genScanner) { if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); if (la.kind == 24 /* "<" */ || la.kind == 29 /* "<." */) { @@ -665,7 +728,7 @@ else if (genScanner) { } case 42 /* "(." */: { pos = SemText(); - Node p = tab.NewNode(Node.sem, null, t.line); + Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); @@ -673,14 +736,14 @@ else if (genScanner) { } case 23 /* "ANY" */: { Get(); - Node p = tab.NewNode(Node.any, null, t.line); // p.set is set in tab.SetupAnys + Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); break; } case 39 /* "SYNC" */: { Get(); - Node p = tab.NewNode(Node.sync, null, t.line); + Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); break; @@ -688,7 +751,7 @@ else if (genScanner) { default: SynErr(56); break; } if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, t.line)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } @@ -855,7 +918,7 @@ Graph TokenFactor() { SemErr("undefined name: " + s.name); c = tab.NewCharClass(s.name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, 0, 0); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -880,7 +943,7 @@ Graph TokenFactor() { tab.MakeIteration(g); tokenString = noString; } else SynErr(62); if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, t.line)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } diff --git a/src/ParserGen.java b/src/ParserGen.java index ffb0151..7670bdd 100644 --- a/src/ParserGen.java +++ b/src/ParserGen.java @@ -141,6 +141,17 @@ void CopySourcePart (Position pos, int indent) { } } + /* TODO better interface for CopySourcePart */ + public void CopySourcePart (Parser parser, PrintWriter gen, Position pos, int indent) { + // Copy text described by pos from atg to gen + int oldPos = parser.pgen.buffer.getPos(); // Pos is modified by CopySourcePart + PrintWriter prevGen = parser.pgen.gen; + parser.pgen.gen = gen; + parser.pgen.CopySourcePart(pos, 0); + parser.pgen.gen = prevGen; + parser.pgen.buffer.setPos(oldPos); + } + void GenErrorMsg (int errTyp, Symbol sym) { errorNr++; err.write(ls + "\t\t\tcase " + errorNr + ": s = \""); @@ -328,12 +339,21 @@ void GenCode (Node p, int indent, BitSet isChecked) { } void GenTokens() { - //foreach (Symbol sym in Symbol.terminals) { + gen.println("\t//non terminals"); + for (int i = 0; i < tab.nonterminals.size(); i++) { + Symbol sym = (Symbol)tab.nonterminals.get(i); + gen.println("\tpublic static final int _NT_" + sym.name + " = " + sym.n + ";"); + } + gen.println("\tpublic static final int maxNT = " + (tab.nonterminals.size()-1) + ";"); + gen.println("\t//terminals"); for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); if (Character.isLetter(sym.name.charAt(0))) gen.println("\tpublic static final int _" + sym.name + " = " + sym.n + ";"); + else + gen.println("//\tpublic static final int _(" + sym.name + ") = " + sym.n + ";"); } + gen.println("\t//non terminals"); } void GenPragmas() { diff --git a/src/Scanner.frame b/src/Scanner.frame index 5edcbef..6b0c47e 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -369,10 +369,14 @@ public class Scanner { } Token NextToken() { - while (ch == ' ' || + for(;;) { + while (ch == ' ' || -->scan1 - ) NextCh(); + ) NextCh(); -->scan2 + break; + } +-->scan22 int recKind = noSym; int recEnd = pos; t = new Token(); diff --git a/src/Scanner.java b/src/Scanner.java index fd73a13..0195e32 100644 --- a/src/Scanner.java +++ b/src/Scanner.java @@ -460,10 +460,14 @@ void CheckLiteral() { } Token NextToken() { - while (ch == ' ' || - ch >= 9 && ch <= 10 || ch == 13 - ) NextCh(); - if (ch == '/' && Comment0() ||ch == '/' && Comment1()) return NextToken(); + for(;;) { + while (ch == ' ' || + ch >= 9 && ch <= 10 || ch == 13 + ) NextCh(); + if (ch == '/' && Comment0() ||ch == '/' && Comment1()) continue; + break; + } + int recKind = noSym; int recEnd = pos; t = new Token(); @@ -482,17 +486,17 @@ Token NextToken() { t.kind = recKind; break loop; } // NextCh already done case 1: - recEnd = pos; recKind = 1; + recEnd = pos; recKind = 1 /* ident */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 1; break;} - else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + else {t.kind = 1 /* ident */; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} case 2: - recEnd = pos; recKind = 2; + recEnd = pos; recKind = 2 /* number */; if (ch >= '0' && ch <= '9') {AddCh(); state = 2; break;} - else {t.kind = 2; break loop;} + else {t.kind = 2 /* number */; break loop;} case 3: - {t.kind = 3; break loop;} + {t.kind = 3 /* string */; break loop;} case 4: - {t.kind = 4; break loop;} + {t.kind = 4 /* badString */; break loop;} case 5: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); state = 6; break;} else if (ch == 92) {AddCh(); state = 7; break;} @@ -508,15 +512,15 @@ Token NextToken() { else if (ch == 39) {AddCh(); state = 9; break;} else {state = 0; break;} case 9: - {t.kind = 5; break loop;} + {t.kind = 5 /* char */; break loop;} case 10: - recEnd = pos; recKind = 45; + recEnd = pos; recKind = 45 /* ddtSym */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 10; break;} - else {t.kind = 45; break loop;} + else {t.kind = 45 /* ddtSym */; break loop;} case 11: - recEnd = pos; recKind = 46; + recEnd = pos; recKind = 46 /* optionSym */; if (ch >= '-' && ch <= '.' || ch >= '0' && ch <= ':' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 11; break;} - else {t.kind = 46; break loop;} + else {t.kind = 46 /* optionSym */; break loop;} case 12: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); state = 12; break;} else if (ch == 10 || ch == 13) {AddCh(); state = 4; break;} @@ -524,67 +528,67 @@ Token NextToken() { else if (ch == 92) {AddCh(); state = 14; break;} else {state = 0; break;} case 13: - recEnd = pos; recKind = 45; + recEnd = pos; recKind = 45 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); state = 10; break;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 15; break;} - else {t.kind = 45; break loop;} + else {t.kind = 45 /* ddtSym */; break loop;} case 14: if (ch >= ' ' && ch <= '~') {AddCh(); state = 12; break;} else {state = 0; break;} case 15: - recEnd = pos; recKind = 45; + recEnd = pos; recKind = 45 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); state = 10; break;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 15; break;} else if (ch == '=') {AddCh(); state = 11; break;} - else {t.kind = 45; break loop;} + else {t.kind = 45 /* ddtSym */; break loop;} case 16: - {t.kind = 17; break loop;} + {t.kind = 17 /* "=" */; break loop;} case 17: - {t.kind = 20; break loop;} + {t.kind = 20 /* "+" */; break loop;} case 18: - {t.kind = 21; break loop;} + {t.kind = 21 /* "-" */; break loop;} case 19: - {t.kind = 22; break loop;} + {t.kind = 22 /* ".." */; break loop;} case 20: - {t.kind = 25; break loop;} + {t.kind = 25 /* "^" */; break loop;} case 21: - {t.kind = 27; break loop;} + {t.kind = 27 /* ">" */; break loop;} case 22: - {t.kind = 28; break loop;} + {t.kind = 28 /* "," */; break loop;} case 23: - {t.kind = 29; break loop;} + {t.kind = 29 /* "<." */; break loop;} case 24: - {t.kind = 30; break loop;} + {t.kind = 30 /* ".>" */; break loop;} case 25: - {t.kind = 31; break loop;} + {t.kind = 31 /* "[" */; break loop;} case 26: - {t.kind = 32; break loop;} + {t.kind = 32 /* "]" */; break loop;} case 27: - {t.kind = 33; break loop;} + {t.kind = 33 /* "|" */; break loop;} case 28: - {t.kind = 36; break loop;} + {t.kind = 36 /* ")" */; break loop;} case 29: - {t.kind = 37; break loop;} + {t.kind = 37 /* "{" */; break loop;} case 30: - {t.kind = 38; break loop;} + {t.kind = 38 /* "}" */; break loop;} case 31: - {t.kind = 42; break loop;} + {t.kind = 42 /* "(." */; break loop;} case 32: - {t.kind = 43; break loop;} + {t.kind = 43 /* ".)" */; break loop;} case 33: - recEnd = pos; recKind = 18; + recEnd = pos; recKind = 18 /* "." */; if (ch == '.') {AddCh(); state = 19; break;} else if (ch == '>') {AddCh(); state = 24; break;} else if (ch == ')') {AddCh(); state = 32; break;} - else {t.kind = 18; break loop;} + else {t.kind = 18 /* "." */; break loop;} case 34: - recEnd = pos; recKind = 24; + recEnd = pos; recKind = 24 /* "<" */; if (ch == '.') {AddCh(); state = 23; break;} - else {t.kind = 24; break loop;} + else {t.kind = 24 /* "<" */; break loop;} case 35: - recEnd = pos; recKind = 35; + recEnd = pos; recKind = 35 /* "(" */; if (ch == '.') {AddCh(); state = 31; break;} - else {t.kind = 35; break loop;} + else {t.kind = 35 /* "(" */; break loop;} } } diff --git a/src/Tab.java b/src/Tab.java index 8400989..c0ed8a6 100644 --- a/src/Tab.java +++ b/src/Tab.java @@ -77,14 +77,15 @@ class Symbol { public BitSet follow; // nt: terminal followers public BitSet nts; // nt: nonterminals whose followers have to be added to this sym public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node public Position attrPos; // nt: position of attributes in source text (or null) public Position semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) public String retType; // AH - nt: Type of output attribute (or null) public String retVar; // AH - nt: Name of output attribute (or null) - public Symbol(int typ, String name, int line) { - this.typ = typ; this.name = name; this.line = line; + public Symbol(int typ, String name, int line, int col) { + this.typ = typ; this.name = name; this.line = line; this.col = col; } } @@ -127,12 +128,13 @@ class Node { public Position pos; // nt, t, wt: pos of actual attributes // sem: pos of semantic action in source text public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node public State state; // DFA state corresponding to this node // (only used in DFA.ConvertToStates) public String retVar; // AH 20040206 - nt: name of output attribute (or null) - public Node(int typ, Symbol sym, int line) { - this.typ = typ; this.sym = sym; this.line = line; + public Node(int typ, Symbol sym, int line, int col) { + this.typ = typ; this.sym = sym; this.line = line; this.col = col; } } @@ -233,8 +235,8 @@ public Tab(Parser parser) { this.parser = parser; trace = parser.trace; errors = parser.errors; - eofSy = NewSym(Node.t, "EOF", 0); - dummyNode = NewNode(Node.eps, null, 0); + eofSy = NewSym(Node.t, "EOF", 0, 0); + dummyNode = NewNode(Node.eps, null, 0, 0); literals = new Hashtable(); } @@ -248,11 +250,11 @@ public Tab(Parser parser) { String[] tKind = {"fixedToken", "classToken", "litToken", "classLitToken"}; - public Symbol NewSym(int typ, String name, int line) { + public Symbol NewSym(int typ, String name, int line, int col) { if (name.length() == 2 && name.charAt(0) == '"') { parser.SemErr("empty token not allowed"); name = "???"; } - Symbol sym = new Symbol(typ, name, line); + Symbol sym = new Symbol(typ, name, line, col); switch (typ) { case Node.t: sym.n = terminals.size(); terminals.add(sym); break; case Node.pr: pragmas.add(sym); break; @@ -354,21 +356,21 @@ public void PrintSet(BitSet s, int indent) { "sync", "sem ", "alt ", "iter", "opt ", "rslv"}; Node dummyNode; - public Node NewNode(int typ, Symbol sym, int line) { - Node node = new Node(typ, sym, line); + public Node NewNode(int typ, Symbol sym, int line, int col) { + Node node = new Node(typ, sym, line, col); node.n = nodes.size(); nodes.add(node); return node; } public Node NewNode(int typ, Node sub) { - Node node = NewNode(typ, null, 0); + Node node = NewNode(typ, null, 0, 0); node.sub = sub; return node; } - public Node NewNode(int typ, int val, int line) { - Node node = NewNode(typ, null, line); + public Node NewNode(int typ, int val, int line, int col) { + Node node = NewNode(typ, null, line, col); node.val = val; return node; } @@ -436,7 +438,7 @@ public void Finish(Graph g) { public void DeleteNodes() { nodes = new ArrayList(); - dummyNode = NewNode(Node.eps, null, 0); + dummyNode = NewNode(Node.eps, null, 0, 0); } public Graph StrToGraph(String str) { @@ -445,7 +447,7 @@ public Graph StrToGraph(String str) { Graph g = new Graph(); g.r = dummyNode; for (int i = 0; i < s.length(); i++) { - Node p = NewNode(Node.chr, (int)s.charAt(i), 0); + Node p = NewNode(Node.chr, (int)s.charAt(i), 0, 0); g.r.next = p; g.r = p; } g.l = dummyNode.next; dummyNode.next = null; @@ -1098,7 +1100,7 @@ public boolean NoCircularProductions() { //--------------- check for LL(1) errors ---------------------- void LL1Error(int cond, Symbol sym) { - String s = " LL1 warning in " + curSy.name + ": "; + String s = " LL1 warning in " + curSy.name + ":" + curSy.line + ":" + curSy.col + ": "; if (sym != null) s += sym.name + " is "; switch (cond) { case 1: s += "start of several alternatives"; break; @@ -1109,22 +1111,92 @@ void LL1Error(int cond, Symbol sym) { errors.Warning(s); } - void CheckOverlap(BitSet s1, BitSet s2, int cond) { + int CheckOverlap(BitSet s1, BitSet s2, int cond) { + int overlaped = 0; for (int i = 0; i < terminals.size(); i++) { Symbol sym = (Symbol) terminals.get(i); - if (s1.get(sym.n) && s2.get(sym.n)) LL1Error(cond, sym); + if (s1.get(sym.n) && s2.get(sym.n)) { + LL1Error(cond, sym); + ++overlaped; + } + } + return overlaped; + } + + /* print the path for first set that contains token tok for the graph rooted at p */ + void PrintFirstPath(Node p, int tok, String indent, int depth) + { + while (p != null) + { + switch (p.typ) + { + case Node.nt: + { + if (p.sym.firstReady) + { + if (p.sym.first.get(tok)) + { + if (indent.length() == 1) System.out.println(indent + "=> " + p.sym.name + ":" + p.line + ":" + p.col + ":"); + System.out.println(indent + "-> " + p.sym.name + ":" + p.sym.line + ":" + p.sym.col + ":"); + if (p.sym.graph != null) PrintFirstPath(p.sym.graph, tok, indent + " ", depth + 1); + return; + } + } + break; + } + case Node.t: + case Node.wt: + { + if (p.sym.n == tok) System.out.println(indent + "= " + p.sym.name + ":" + p.line + ":" + p.col + ":"); + break; + } + case Node.any: + { + break; + } + case Node.alt: + { + PrintFirstPath(p.sub, tok, indent, depth + 1); + PrintFirstPath(p.down, tok, indent, depth + 1); + break; + } + case Node.iter: + case Node.opt: + { + PrintFirstPath(p.sub, tok, indent, depth + 1); + break; + } + } + if (!DelNode(p)) break; + p = p.next; } } + void PrintFirstPath(Node p, int tok) + { + PrintFirstPath(p, tok, "\t", 0); + } - void CheckAlts(Node p) { + int CheckAlts(Node p) { BitSet s1, s2; + int rc = 0; while (p != null) { if (p.typ == Node.alt) { Node q = p; s1 = new BitSet(terminals.size()); while (q != null) { // for all alternatives s2 = Expected0(q.sub, curSy); - CheckOverlap(s1, s2, 1); + int overlaped = CheckOverlap(s1, s2, 1); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i = 0; i < terminals.size(); i++) { + Symbol sym = (Symbol) terminals.get(i); + if (s1.get(sym.n) && s2.get(sym.n)) { overlapToken = sym.n; break; } + } + PrintFirstPath(p, overlapToken); + rc += overlaped; + } s1.or(s2); CheckAlts(q.sub); q = q.down; @@ -1134,7 +1206,19 @@ void CheckAlts(Node p) { else { s1 = Expected0(p.sub, curSy); s2 = Expected(p.next, curSy); - CheckOverlap(s1, s2, 2); + int overlaped = CheckOverlap(s1, s2, 2); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i = 0; i < terminals.size(); i++) { + Symbol sym = (Symbol) terminals.get(i); + if (s1.get(sym.n) && s2.get(sym.n)) { overlapToken = sym.n; break; } + } + //Console.WriteLine(format("\t=>:{0}: {1}", p.line, overlaped)); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } } CheckAlts(p.sub); } else if (p.typ == Node.any) { @@ -1144,6 +1228,7 @@ void CheckAlts(Node p) { if (p.up) break; p = p.next; } + return rc; } public void CheckLL1() { From 8593714b9cedbda4c31840b53738d94775001d35 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 20:36:13 +0200 Subject: [PATCH 05/15] Implemented the parser syntax tree generation --- src/Parser.frame | 85 ++++++++++++++++++++++++++++++++++++++++++++++ src/Parser.java | 85 ++++++++++++++++++++++++++++++++++++++++++++++ src/ParserGen.java | 11 ++++++ 3 files changed, 181 insertions(+) diff --git a/src/Parser.frame b/src/Parser.frame index 99d30b8..c943b1d 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -27,7 +27,69 @@ Coco/R itself) does not fall under the GNU General Public License. ------------------------------------------------------------------------*/ -->begin +import java.util.ArrayList; +import java.util.Stack; + public class Parser { + + public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + private void printIndent(int n) { + for(int i=0; i < n; ++i) System.out.print(" "); + } + + public void dump(int indent, boolean isLast) { + int last_idx = children.size(); + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump(indent+4, idx == last_idx); + } + } + public void dump() { + dump(0, false); + } + + public void dump2(int maxT, int indent, boolean isLast) { + int last_idx = children.size(); + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + if(last_idx == 1) { + if(children.get(0).tok.kind < maxT) { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(maxT, indent+4, idx == last_idx); + } + } + public void dump2(int maxT) { + dump2(maxT, 0, false); + } + } + -->constants static final boolean _T = true; static final boolean _x = false; @@ -42,6 +104,29 @@ public class Parser { -->declarations + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ast_stack.peek().children.add(st); + } + + boolean AstAddNonTerminal(int kind, String nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ast_stack.peek().children.add(st); + ast_stack.push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.pop(); + } + public Parser(Scanner scanner) { this.scanner = scanner; errors = new Errors(); diff --git a/src/Parser.java b/src/Parser.java index 88dca60..91124d9 100644 --- a/src/Parser.java +++ b/src/Parser.java @@ -27,7 +27,69 @@ ------------------------------------------------------------------------*/ package Coco; +import java.util.ArrayList; +import java.util.Stack; + public class Parser { + + public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + private void printIndent(int n) { + for(int i=0; i < n; ++i) System.out.print(" "); + } + + public void dump(int indent, boolean isLast) { + int last_idx = children.size(); + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump(indent+4, idx == last_idx); + } + } + public void dump() { + dump(0, false); + } + + public void dump2(int maxT, int indent, boolean isLast) { + int last_idx = children.size(); + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + if(last_idx == 1) { + if(children.get(0).tok.kind < maxT) { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(maxT, indent+4, idx == last_idx); + } + } + public void dump2(int maxT) { + dump2(maxT, 0, false); + } + } + //non terminals public static final int _NT_Coco = 0; public static final int _NT_SetDecl = 1; @@ -128,6 +190,29 @@ public class Parser { + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ast_stack.peek().children.add(st); + } + + boolean AstAddNonTerminal(int kind, String nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ast_stack.peek().children.add(st); + ast_stack.push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.pop(); + } + public Parser(Scanner scanner) { this.scanner = scanner; errors = new Errors(); diff --git a/src/ParserGen.java b/src/ParserGen.java index 7670bdd..756dfc7 100644 --- a/src/ParserGen.java +++ b/src/ParserGen.java @@ -226,6 +226,9 @@ void GenCode (Node p, int indent, BitSet isChecked) { WriteSymbolOrCode(p.sym); gen.println(");"); } + if(tab.genAST) { + gen.println("\tAstAddTerminal();"); + } break; } case Node.wt: { @@ -387,7 +390,15 @@ void GenProductions() { gen.println(") {"); if (sym.retVar != null) gen.println("\t\t" + sym.retType + " " + sym.retVar + ";"); CopySourcePart(sym.semPos, 2); + if(tab.genAST) { + if(i == 0) gen.println("\tToken rt = new Token(); rt.kind = _NT_" + sym.name + "; rt.val = \"" + sym.name + "\";ast_root = new SynTree( rt ); ast_stack = new Stack(); ast_stack.push(ast_root);"); + else gen.println("\tboolean ntAdded = AstAddNonTerminal(_NT_" + sym.name + ", \"" + sym.name + "\", la.line);"); + } GenCode(sym.graph, 2, new BitSet(tab.terminals.size())); + if(tab.genAST) { + if(i == 0) gen.println("\tAstPopNonTerminal();"); + else gen.println("\tif(ntAdded) AstPopNonTerminal();"); + } if (sym.retVar != null) gen.println("\t\treturn " + sym.retVar + ";"); gen.println("\t}"); gen.println(); } From ea7cb48c626775f5880b81cf6b2c5d6b31abcad6 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 20:57:07 +0200 Subject: [PATCH 06/15] Allow till 8 characters for comment delimiters --- src/DFA.java | 45 +++++++++++++++++++++++++++++---------------- src/Scanner.java | 8 +++----- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/DFA.java b/src/DFA.java index c3df2f6..ef5526b 100644 --- a/src/DFA.java +++ b/src/DFA.java @@ -855,8 +855,8 @@ String CommentStr(Node p) { } else parser.SemErr("comment delimiters must not be structured"); p = p.next; } - if (s.length() == 0 || s.length() > 2) { - parser.SemErr("comment delimiters must be 1 or 2 characters long"); + if (s.length() == 0 || s.length() > 8) { + parser.SemErr("comment delimiters must be 1 or 8 characters long"); s = new StringBuffer("?"); } return s.toString(); @@ -870,6 +870,7 @@ public void NewComment(Node from, Node to, boolean nested) { //--------------------- scanner generation ------------------------ void GenComBody(Comment com) { + int imax = com.start.length()-1; gen.println("\t\t\tfor(;;) {"); gen.print ("\t\t\t\tif (" + ChCond(com.stop.charAt(0)) + ") "); gen.println("{"); if (com.stop.length() == 1) { @@ -877,22 +878,31 @@ void GenComBody(Comment com) { gen.println("\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); gen.println("\t\t\t\t\tNextCh();"); } else { - gen.println("\t\t\t\t\tNextCh();"); - gen.println("\t\t\t\t\tif (" + ChCond(com.stop.charAt(1)) + ") {"); + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.println("\t\t\t\t\tNextCh();"); + gen.println("\t\t\t\t\tif (" + ChCond(com.stop.charAt(sidx)) + ") {"); + } gen.println("\t\t\t\t\t\tlevel--;"); - gen.println("\t\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); + gen.println("\t\t\t\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }"); gen.println("\t\t\t\t\t\tNextCh();"); - gen.println("\t\t\t\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.println("\t\t\t\t\t}"); + } } if (com.nested) { gen.print ("\t\t\t\t}"); gen.println(" else if (" + ChCond(com.start.charAt(0)) + ") {"); if (com.start.length() == 1) gen.println("\t\t\t\t\tlevel++; NextCh();"); else { - gen.println("\t\t\t\t\tNextCh();"); - gen.print ("\t\t\t\t\tif (" + ChCond(com.start.charAt(1)) + ") "); gen.println("{"); + int imaxN = com.start.length()-1; + for(int sidx = 1; sidx <= imaxN; ++sidx) { + gen.println("\t\t\t\t\tNextCh();"); + gen.print ("\t\t\t\t\tif (" + ChCond(com.start.charAt(sidx)) + ") "); gen.println("{"); + } gen.println("\t\t\t\t\t\tlevel++; NextCh();"); - gen.println("\t\t\t\t\t}"); + for(int sidx = imaxN; sidx > 0; --sidx) { + gen.println("\t\t\t\t\t}"); + } } } gen.println( "\t\t\t\t} else if (ch == Buffer.EOF) return false;"); @@ -904,17 +914,20 @@ void GenComment(Comment com, int i) { gen.println(); gen.print ("\tboolean Comment" + i + "() "); gen.println("{"); gen.println("\t\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;"); + gen.println("\t\tNextCh();"); if (com.start.length() == 1) { - gen.println("\t\tNextCh();"); GenComBody(com); } else { - gen.println("\t\tNextCh();"); - gen.print ("\t\tif (" + ChCond(com.start.charAt(1)) + ") "); gen.println("{"); - gen.println("\t\t\tNextCh();"); + int imax = com.start.length()-1; + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.print ("\t\tif (" + ChCond(com.start.charAt(sidx)) + ") "); gen.println("{"); + gen.println("\t\t\tNextCh();"); + } GenComBody(com); - gen.println("\t\t} else {"); - gen.println("\t\t\tbuffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;"); - gen.println("\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.println("\t\t}"); + } + gen.println("\t\tbuffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;"); gen.println("\t\treturn false;"); } gen.println("\t}"); diff --git a/src/Scanner.java b/src/Scanner.java index 0195e32..6fa271e 100644 --- a/src/Scanner.java +++ b/src/Scanner.java @@ -416,9 +416,8 @@ boolean Comment0() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -432,7 +431,7 @@ boolean Comment1() { NextCh(); if (ch == '/') { level--; - if (level == 0) { oldEols = line - line0; NextCh(); return true; } + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } NextCh(); } } else if (ch == '/') { @@ -443,9 +442,8 @@ boolean Comment1() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } From d8bc0f1e4d61d57a7a86a42189e507547e41a94e Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 16:47:10 +0200 Subject: [PATCH 07/15] Add an overview of my main changes --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index d341fdf..e74211f 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,23 @@ Coco/R is a compiler generator, which takes an attributed grammar of a source language and generates a scanner and a parser for this language. The scanner works as a deterministic finite automaton. The parser uses recursive descent. LL(1) conflicts can be resolved by a multi-symbol lookahead or by semantic checks. Thus the class of accepted grammars is LL(k) for an arbitrary k. http://ssw.jku.at/coco/ + +And this are my main modifications to the original: + +- Enhance left recursion detection + +- Allow semantic actions on `token declaration` similar to `pragmas` but the code executes on the Scanner + +- Allow till 8 characters as comment delimiters + +- Add option `-genRREBNF` to generate an EBNF grammar to crate railroad diagrams at https://www.bottlecaps.de/rr/ui + +- Add option `-geAST` to generate code to generate `parser syntax tree` based on https://github.com/rochus-keller/EbnfStudio + +- Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals + +- Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) + +- Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro + +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) From 18816277805851d005f33ac29ca2c22efa2cf2ef Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 19:18:37 +0200 Subject: [PATCH 08/15] Remove line only relevant to C++ --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e74211f..6dc69da 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,6 @@ And this are my main modifications to the original: - Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) -- Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) -- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) +See also https://github.com/mingodad/CocoR-CPP and https://github.com/mingodad/CocoR-CSharp From c3b00e4aa901d43b904789235f1d93c39a2b1142 Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 19:21:59 +0200 Subject: [PATCH 09/15] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6dc69da..c313536 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ And this are my main modifications to the original: - Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals -- Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) +- Add a `TERMINALS` section to generate user define tokens not managed by the Scanner (from cocoxml) - Generate between comments the correspondent representation of several magic numbers (mainly Tokens) From 0070de169fa303592160c13aac3fc2c7464603df Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 11 Jun 2021 15:06:15 +0200 Subject: [PATCH 10/15] My last fix for left recursion detection didn't worked for any depth, this now seems to work in all cases --- src/Tab.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Tab.java b/src/Tab.java index c0ed8a6..ad2a670 100644 --- a/src/Tab.java +++ b/src/Tab.java @@ -1032,7 +1032,7 @@ public boolean GrammarCheckAll() { int errors = 0; if(!NtsComplete()) ++errors; if(!AllNtReached()) ++errors; - if(!NoCircularProductions()) ++errors; + if(!NoCircularProductions()) System.exit(1); if(!AllNtToTerm()) ++errors; CheckResolvers(); CheckLL1(); return errors == 0; @@ -1048,17 +1048,17 @@ public CNode (Symbol l, Symbol r) { } } - void GetSingles(Node p, ArrayList singles, Node rule) { + void GetSingles(Node p, ArrayList singles) { if (p == null) return; // end of graph if (p.typ == Node.nt) { - if (p.up || DelGraph(p.next) || p.sym.graph == rule) singles.add(p.sym); + singles.add(p.sym); } else if (p.typ == Node.alt || p.typ == Node.iter || p.typ == Node.opt) { if (p.up || DelGraph(p.next)) { - GetSingles(p.sub, singles, rule); - if (p.typ == Node.alt) GetSingles(p.down, singles, rule); + GetSingles(p.sub, singles); + if (p.typ == Node.alt) GetSingles(p.down, singles); } } - if (!p.up && DelNode(p)) GetSingles(p.next, singles, rule); + if (!p.up && DelNode(p)) GetSingles(p.next, singles); } public boolean NoCircularProductions() { @@ -1067,7 +1067,7 @@ public boolean NoCircularProductions() { for (int i = 0; i < nonterminals.size(); i++) { Symbol sym = (Symbol)nonterminals.get(i); ArrayList singles = new ArrayList(); - GetSingles(sym.graph, singles, sym.graph); // get nonterminals s such that sym-->s + GetSingles(sym.graph, singles); // get nonterminals s such that sym-->s for (int j = 0; j < singles.size(); j++) { Symbol s = (Symbol)singles.get(j); list.add(new CNode(sym, s)); @@ -1092,7 +1092,7 @@ public boolean NoCircularProductions() { for (int i = 0; i < list.size(); i++) { CNode n = (CNode)list.get(i); ok = false; - errors.SemErr(" " + n.left.name + " --> " + n.right.name); + errors.SemErr(" " + n.left.name + ":" + n.left.line + " --> " + n.right.name + ":" + n.right.line); } return ok; } From ab55d5e80c39206e4fc7b20a612113b394832b08 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 12 Jun 2021 13:00:47 +0200 Subject: [PATCH 11/15] Fix SynTree.dump2 that is supposed to show a pruned tree --- src/Parser.frame | 11 +++++------ src/Parser.java | 11 +++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Parser.frame b/src/Parser.frame index c943b1d..3fbd5b8 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -63,18 +63,20 @@ public class Parser { dump(0, false); } - public void dump2(int maxT, int indent, boolean isLast) { + public void dump2(int indent, boolean isLast) { int last_idx = children.size(); + int indentPlus = 4; if(tok.col > 0) { printIndent(indent); System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); } else { if(last_idx == 1) { - if(children.get(0).tok.kind < maxT) { + if(children.get(0).children.size() == 0) { printIndent(indent); System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); } + else indentPlus = 0; } else { printIndent(indent); @@ -82,12 +84,9 @@ public class Parser { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(indent+indentPlus, idx == last_idx); } } - public void dump2(int maxT) { - dump2(maxT, 0, false); - } } -->constants diff --git a/src/Parser.java b/src/Parser.java index 91124d9..12d8d15 100644 --- a/src/Parser.java +++ b/src/Parser.java @@ -63,18 +63,20 @@ public void dump() { dump(0, false); } - public void dump2(int maxT, int indent, boolean isLast) { + public void dump2(int indent, boolean isLast) { int last_idx = children.size(); + int indentPlus = 4; if(tok.col > 0) { printIndent(indent); System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); } else { if(last_idx == 1) { - if(children.get(0).tok.kind < maxT) { + if(children.get(0).children.size() == 0) { printIndent(indent); System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); } + else indentPlus = 0; } else { printIndent(indent); @@ -82,12 +84,9 @@ public void dump2(int maxT, int indent, boolean isLast) { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(indent+indentPlus, idx == last_idx); } } - public void dump2(int maxT) { - dump2(maxT, 0, false); - } } //non terminals From 91ef474a4286f19d531ec61dc189aa3cd94ef192 Mon Sep 17 00:00:00 2001 From: mingodad Date: Mon, 14 Jun 2021 14:13:05 +0200 Subject: [PATCH 12/15] Rename SynTree::dump to SynTree::dump_all and SynTree::dump to SynTree::dump_pruned --- src/Parser.frame | 15 +++++++++------ src/Parser.java | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/Parser.frame b/src/Parser.frame index 3fbd5b8..fbb42ff 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -45,7 +45,7 @@ public class Parser { for(int i=0; i < n; ++i) System.out.print(" "); } - public void dump(int indent, boolean isLast) { + public void dump_all(int indent, boolean isLast) { int last_idx = children.size(); if(tok.col > 0) { printIndent(indent); @@ -56,14 +56,14 @@ public class Parser { System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_all(indent+4, idx == last_idx); } } - public void dump() { - dump(0, false); + public void dump_all() { + dump_all(0, false); } - public void dump2(int indent, boolean isLast) { + public void dump_pruned(int indent, boolean isLast) { int last_idx = children.size(); int indentPlus = 4; if(tok.col > 0) { @@ -84,9 +84,12 @@ public class Parser { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_pruned(indent+indentPlus, idx == last_idx); } } + public void dump_pruned() { + dump_pruned(0, false); + } } -->constants diff --git a/src/Parser.java b/src/Parser.java index 12d8d15..4d56629 100644 --- a/src/Parser.java +++ b/src/Parser.java @@ -45,7 +45,7 @@ private void printIndent(int n) { for(int i=0; i < n; ++i) System.out.print(" "); } - public void dump(int indent, boolean isLast) { + public void dump_all(int indent, boolean isLast) { int last_idx = children.size(); if(tok.col > 0) { printIndent(indent); @@ -56,14 +56,14 @@ public void dump(int indent, boolean isLast) { System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_all(indent+4, idx == last_idx); } } - public void dump() { - dump(0, false); + public void dump_all() { + dump_all(0, false); } - public void dump2(int indent, boolean isLast) { + public void dump_pruned(int indent, boolean isLast) { int last_idx = children.size(); int indentPlus = 4; if(tok.col > 0) { @@ -84,9 +84,12 @@ public void dump2(int indent, boolean isLast) { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_pruned(indent+indentPlus, idx == last_idx); } } + public void dump_pruned() { + dump_pruned(0, false); + } } //non terminals From cf30ae11775817cb21d635630fb33e369e026bb2 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 1 Jul 2021 12:09:34 +0200 Subject: [PATCH 13/15] Fix for endless loop with some ill grammars --- src/Tab.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Tab.java b/src/Tab.java index ad2a670..55dbc77 100644 --- a/src/Tab.java +++ b/src/Tab.java @@ -1163,7 +1163,8 @@ void PrintFirstPath(Node p, int tok, String indent, int depth) case Node.iter: case Node.opt: { - PrintFirstPath(p.sub, tok, indent, depth + 1); + if (!DelNode(p.sub)) //prevent endless loop with some ill grammars + PrintFirstPath(p.sub, tok, indent, depth + 1); break; } } From 8128b344a1f15e3ca068ac3d4beac7c7f800a9af Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 14 Aug 2021 15:22:38 +0200 Subject: [PATCH 14/15] Add the suffix "_NT" to non terminal generated functions --- src/ParserGen.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ParserGen.java b/src/ParserGen.java index 756dfc7..c7f405f 100644 --- a/src/ParserGen.java +++ b/src/ParserGen.java @@ -96,7 +96,7 @@ void WriteSymbolOrCode(Symbol sym) { } else { gen.print("_" + sym.name); } - } + } // AW: use a switch if more than 5 alternatives and none starts with a resolver, no LL1 warning boolean UseSwitch (Node p) { @@ -199,7 +199,7 @@ void PutCaseLabels (BitSet s) { if (s.get(sym.n)) { gen.print("case "); WriteSymbolOrCode(sym); - gen.print(": "); + gen.print(": "); } } } @@ -212,7 +212,7 @@ void GenCode (Node p, int indent, BitSet isChecked) { case Node.nt: { Indent(indent); if (p.retVar != null) gen.print(p.retVar + " = "); - gen.print(p.sym.name + "("); + gen.print(p.sym.name + "_NT("); CopySourcePart(p.pos, 0); gen.println(");"); break; @@ -385,7 +385,7 @@ void GenProductions() { curSy = sym; gen.print("\t"); if (sym.retType == null) gen.print("void "); else gen.print(sym.retType + " "); - gen.print(sym.name + "("); + gen.print(sym.name + "_NT("); CopySourcePart(sym.attrPos, 0); gen.println(") {"); if (sym.retVar != null) gen.println("\t\t" + sym.retType + " " + sym.retVar + ";"); @@ -455,7 +455,7 @@ public void WriteParser () { g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0); g.CopyFramePart("-->pragmas"); GenCodePragmas(); g.CopyFramePart("-->productions"); GenProductions(); - g.CopyFramePart("-->parseRoot"); gen.println("\t\t" + tab.gramSy.name + "();"); if (tab.checkEOF) gen.println("\t\tExpect(0);"); + g.CopyFramePart("-->parseRoot"); gen.println("\t\t" + tab.gramSy.name + "_NT();"); if (tab.checkEOF) gen.println("\t\tExpect(0);"); g.CopyFramePart("-->initialization"); InitSets(); g.CopyFramePart("-->errors"); gen.print(err.toString()); g.CopyFramePart(null); @@ -529,7 +529,7 @@ public int GenCodeRREBNF (Node p) { public void WriteRREBNF () { Generator g = new Generator(tab); gen = g.OpenGen("Parser.ebnf"); - + gen.print("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n"); gen.print("\n//\n// productions\n//\n\n"); for (int i = 0; i < tab.nonterminals.size(); i++) { From 78343858a0da732b1d5f2e96b29f686e588c8249 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 14 Aug 2021 16:06:36 +0200 Subject: [PATCH 15/15] Add token inheritance from https://github.com/Lercher/CocoR --- src/Coco.atg | 10 +- src/Parser.frame | 70 +++--- src/Parser.java | 615 ++++++++++++++++++++++++--------------------- src/ParserGen.java | 22 +- src/Scanner.java | 101 ++++---- src/Tab.java | 1 + 6 files changed, 445 insertions(+), 374 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 832666a..477070d 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -240,7 +240,7 @@ Char /*------------------------------------------------------------------------------------*/ -TokenDecl (. SymInfo s; Symbol sym; Graph g; .) +TokenDecl (. SymInfo s, si; Symbol sym, inheritsSym; Graph g; .) = Sym (. sym = tab.FindSym(s.name); if (sym != null) SemErr("name declared twice"); @@ -250,6 +250,14 @@ TokenDecl (. SymInfo s; Symbol sym; Graph g; .) } tokenString = null; .) + [ ':' Sym + (. inheritsSym = tab.FindSym(si.name); + if (inheritsSym == null) SemErr("token can't inherit from undeclared name"); + else if (inheritsSym == sym) SemErr("token must not inherit from itself"); + else if (inheritsSym.typ != typ) SemErr("token can't inherit from this token type"); + else sym.inherits = inheritsSym; + .) + ] SYNC ( '=' TokenExpr '.' (. if (s.kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); diff --git a/src/Parser.frame b/src/Parser.frame index fbb42ff..be20e8d 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported from C# to Java by Wolfgang Ahorner with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. ------------------------------------------------------------------------*/ -->begin @@ -100,7 +100,7 @@ public class Parser { public Token t; // last recognized token public Token la; // lookahead token int errDist = minErrDist; - + public Scanner scanner; public Errors errors; @@ -108,7 +108,7 @@ public class Parser { public SynTree ast_root; Stack ast_stack; - + void AstAddTerminal() { SynTree st = new SynTree( t ); ast_stack.peek().children.add(st); @@ -143,7 +143,7 @@ public class Parser { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; @@ -156,26 +156,35 @@ public class Parser { la = t; } } - + + boolean isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } - + boolean StartOf (int s) { return set[s][la.kind]; } - + void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); } } - + boolean WeakSeparator (int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) { Get(); return true; } + if (isKind(la, n)) { Get(); return true; } else if (StartOf(repFol)) return false; else { SynErr(n); @@ -186,16 +195,21 @@ public class Parser { return StartOf(syFol); } } - + -->productions public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); -->parseRoot } + // a token's base type + public static final int[] tBase = { +-->tbase + }; + private static final boolean[][] set = { -->initialization }; @@ -206,7 +220,7 @@ class Errors { public int count = 0; // number of errors detected public java.io.PrintStream errorStream = System.out; // error messages go to this stream public String errMsgFormat = "-- line {0} col {1}: {2}"; // 0=line, 1=column, 2=text - + protected void printMsg(int line, int column, String msg) { StringBuffer b = new StringBuffer(errMsgFormat); int pos = b.indexOf("{0}"); @@ -217,7 +231,7 @@ class Errors { if (pos >= 0) b.replace(pos, pos+3, msg); errorStream.println(b.toString()); } - + public void SynErr (int line, int col, int n) { String s; switch (n) {-->errors @@ -227,20 +241,20 @@ class Errors { count++; } - public void SemErr (int line, int col, String s) { + public void SemErr (int line, int col, String s) { printMsg(line, col, s); count++; } - + public void SemErr (String s) { errorStream.println(s); count++; } - - public void Warning (int line, int col, String s) { + + public void Warning (int line, int col, String s) { printMsg(line, col, s); } - + public void Warning (String s) { errorStream.println(s); } diff --git a/src/Parser.java b/src/Parser.java index 4d56629..e221f63 100644 --- a/src/Parser.java +++ b/src/Parser.java @@ -139,31 +139,32 @@ public void dump_pruned() { // public static final int _("-") = 21; // public static final int _("..") = 22; // public static final int _("ANY") = 23; -// public static final int _("<") = 24; -// public static final int _("^") = 25; -// public static final int _("out") = 26; -// public static final int _(">") = 27; -// public static final int _(",") = 28; -// public static final int _("<.") = 29; -// public static final int _(".>") = 30; -// public static final int _("[") = 31; -// public static final int _("]") = 32; -// public static final int _("|") = 33; -// public static final int _("WEAK") = 34; -// public static final int _("(") = 35; -// public static final int _(")") = 36; -// public static final int _("{") = 37; -// public static final int _("}") = 38; -// public static final int _("SYNC") = 39; -// public static final int _("IF") = 40; -// public static final int _("CONTEXT") = 41; -// public static final int _("(.") = 42; -// public static final int _(".)") = 43; -// public static final int _(???) = 44; +// public static final int _(":") = 24; +// public static final int _("<") = 25; +// public static final int _("^") = 26; +// public static final int _("out") = 27; +// public static final int _(">") = 28; +// public static final int _(",") = 29; +// public static final int _("<.") = 30; +// public static final int _(".>") = 31; +// public static final int _("[") = 32; +// public static final int _("]") = 33; +// public static final int _("|") = 34; +// public static final int _("WEAK") = 35; +// public static final int _("(") = 36; +// public static final int _(")") = 37; +// public static final int _("{") = 38; +// public static final int _("}") = 39; +// public static final int _("SYNC") = 40; +// public static final int _("IF") = 41; +// public static final int _("CONTEXT") = 42; +// public static final int _("(.") = 43; +// public static final int _(".)") = 44; +// public static final int _(???) = 45; //non terminals - public static final int maxT = 44; - public static final int _ddtSym = 45; - public static final int _optionSym = 46; + public static final int maxT = 45; + public static final int _ddtSym = 46; + public static final int _optionSym = 47; static final boolean _T = true; static final boolean _x = false; @@ -172,7 +173,7 @@ public void dump_pruned() { public Token t; // last recognized token public Token la; // lookahead token int errDist = minErrDist; - + public Scanner scanner; public Errors errors; @@ -194,7 +195,7 @@ public void dump_pruned() { public SynTree ast_root; Stack ast_stack; - + void AstAddTerminal() { SynTree st = new SynTree( t ); ast_stack.peek().children.add(st); @@ -229,7 +230,7 @@ public void SemErr (String msg) { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; @@ -248,26 +249,35 @@ void Get () { la = t; } } - + + boolean isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } - + boolean StartOf (int s) { return set[s][la.kind]; } - + void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); } } - + boolean WeakSeparator (int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) { Get(); return true; } + if (isKind(la, n)) { Get(); return true; } else if (StartOf(repFol)) return false; else { SynErr(n); @@ -278,8 +288,8 @@ boolean WeakSeparator (int n, int syFol, int repFol) { return StartOf(syFol); } } - - void Coco() { + + void Coco_NT() { Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg, line; if (StartOf(1)) { Get(); @@ -300,52 +310,52 @@ void Coco() { Get(); } tab.semDeclPos = new Position(beg, la.pos, 0, line); - if (la.kind == 7 /* "IGNORECASE" */) { + if (isKind(la, 7 /* "IGNORECASE" */)) { Get(); dfa.ignoreCase = true; } - if (la.kind == 8 /* "CHARACTERS" */) { + if (isKind(la, 8 /* "CHARACTERS" */)) { Get(); - while (la.kind == _ident) { - SetDecl(); + while (isKind(la, _ident)) { + SetDecl_NT(); } } - if (la.kind == 9 /* "TOKENS" */) { + if (isKind(la, 9 /* "TOKENS" */)) { Get(); - while (la.kind == _ident || la.kind == _string || la.kind == _char) { - TokenDecl(Node.t); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.t); } } - if (la.kind == 10 /* "PRAGMAS" */) { + if (isKind(la, 10 /* "PRAGMAS" */)) { Get(); - while (la.kind == _ident || la.kind == _string || la.kind == _char) { - TokenDecl(Node.pr); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.pr); } } - while (la.kind == 11 /* "COMMENTS" */) { + while (isKind(la, 11 /* "COMMENTS" */)) { Get(); boolean nested = false; Expect(12 /* "FROM" */); - g1 = TokenExpr(); + g1 = TokenExpr_NT(); Expect(13 /* "TO" */); - g2 = TokenExpr(); - if (la.kind == 14 /* "NESTED" */) { + g2 = TokenExpr_NT(); + if (isKind(la, 14 /* "NESTED" */)) { Get(); nested = true; } dfa.NewComment(g1.l, g2.l, nested); } - while (la.kind == 15 /* "IGNORE" */) { + while (isKind(la, 15 /* "IGNORE" */)) { Get(); - s = Set(); + s = Set_NT(); tab.ignored.Or(s); } - while (!(la.kind == _EOF || la.kind == 16 /* "PRODUCTIONS" */)) {SynErr(45); Get();} + while (!(isKind(la, _EOF) || isKind(la, 16 /* "PRODUCTIONS" */))) {SynErr(46); Get();} Expect(16 /* "PRODUCTIONS" */); if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); - while (la.kind == _ident) { + while (isKind(la, _ident)) { Get(); sym = tab.FindSym(t.val); boolean undef = sym == null; @@ -361,19 +371,19 @@ void Coco() { boolean noRet = sym.retVar==null; sym.retVar = null; - if (la.kind == 24 /* "<" */ || la.kind == 29 /* "<." */) { - AttrDecl(sym); + if (isKind(la, 25 /* "<" */) || isKind(la, 30 /* "<." */)) { + AttrDecl_NT(sym); } if (!undef) if (noAttrs != (sym.attrPos == null) || noRet != (sym.retVar == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - if (la.kind == 42 /* "(." */) { - sym.semPos = SemText(); + if (isKind(la, 43 /* "(." */)) { + sym.semPos = SemText_NT(); } ExpectWeak(17 /* "=" */, 3); - g = Expression(); + g = Expression_NT(); sym.graph = g.l; tab.Finish(g); @@ -425,7 +435,7 @@ void Coco() { Expect(18 /* "." */); } - void SetDecl() { + void SetDecl_NT() { CharSet s; Expect(_ident); String name = t.val; @@ -433,16 +443,16 @@ void SetDecl() { if (c != null) SemErr("name declared twice"); Expect(17 /* "=" */); - s = Set(); + s = Set_NT(); if (s.Elements() == 0) SemErr("character set must not be empty"); c = tab.NewCharClass(name, s); Expect(18 /* "." */); } - void TokenDecl(int typ) { - SymInfo s; Symbol sym; Graph g; - s = Sym(); + void TokenDecl_NT(int typ) { + SymInfo s, si; Symbol sym, inheritsSym; Graph g; + s = Sym_NT(); sym = tab.FindSym(s.name); if (sym != null) SemErr("name declared twice"); else { @@ -451,10 +461,20 @@ void TokenDecl(int typ) { } tokenString = null; - while (!(StartOf(5))) {SynErr(46); Get();} - if (la.kind == 17 /* "=" */) { + if (isKind(la, 24 /* ":" */)) { + Get(); + si = Sym_NT(); + inheritsSym = tab.FindSym(si.name); + if (inheritsSym == null) SemErr("token can't inherit from undeclared name"); + else if (inheritsSym == sym) SemErr("token must not inherit from itself"); + else if (inheritsSym.typ != typ) SemErr("token can't inherit from this token type"); + else sym.inherits = inheritsSym; + + } + while (!(StartOf(5))) {SynErr(47); Get();} + if (isKind(la, 17 /* "=" */)) { Get(); - g = TokenExpr(); + g = TokenExpr_NT(); Expect(18 /* "." */); if (s.kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); @@ -471,20 +491,20 @@ void TokenDecl(int typ) { if (s.kind == id) genScanner = false; else dfa.MatchLiteral(sym.name, sym); - } else SynErr(47); - if (la.kind == 42 /* "(." */) { - sym.semPos = SemText(); + } else SynErr(48); + if (isKind(la, 43 /* "(." */)) { + sym.semPos = SemText_NT(); if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); } } - Graph TokenExpr() { + Graph TokenExpr_NT() { Graph g; Graph g2; - g = TokenTerm(); + g = TokenTerm_NT(); boolean first = true; - while (WeakSeparator(33,7,8) ) { - g2 = TokenTerm(); + while (WeakSeparator(34,7,8) ) { + g2 = TokenTerm_NT(); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); @@ -492,51 +512,51 @@ Graph TokenExpr() { return g; } - CharSet Set() { + CharSet Set_NT() { CharSet s; CharSet s2; - s = SimSet(); - while (la.kind == 20 /* "+" */ || la.kind == 21 /* "-" */) { - if (la.kind == 20 /* "+" */) { + s = SimSet_NT(); + while (isKind(la, 20 /* "+" */) || isKind(la, 21 /* "-" */)) { + if (isKind(la, 20 /* "+" */)) { Get(); - s2 = SimSet(); + s2 = SimSet_NT(); s.Or(s2); } else { Get(); - s2 = SimSet(); + s2 = SimSet_NT(); s.Subtract(s2); } } return s; } - void AttrDecl(Symbol sym) { + void AttrDecl_NT(Symbol sym) { int beg, col, line; - if (la.kind == 24 /* "<" */) { + if (isKind(la, 25 /* "<" */)) { Get(); - if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { - if (la.kind == 25 /* "^" */) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); } beg = la.pos; - TypeName(); + TypeName_NT(); sym.retType = scanner.buffer.GetString(beg, la.pos); Expect(_ident); sym.retVar = t.val; - if (la.kind == 27 /* ">" */) { + if (isKind(la, 28 /* ">" */)) { Get(); - } else if (la.kind == 28 /* "," */) { + } else if (isKind(la, 29 /* "," */)) { Get(); beg = la.pos; col = la.col; line = la.line; while (StartOf(9)) { Get(); } - Expect(27 /* ">" */); + Expect(28 /* ">" */); if (t.pos > beg) sym.attrPos = new Position(beg, t.pos, col, line); - } else SynErr(48); + } else SynErr(49); } else if (StartOf(10)) { beg = la.pos; col = la.col; line = la.line; if (StartOf(11)) { @@ -545,35 +565,35 @@ void AttrDecl(Symbol sym) { Get(); } } - Expect(27 /* ">" */); + Expect(28 /* ">" */); if (t.pos > beg) sym.attrPos = new Position(beg, t.pos, col, line); - } else SynErr(49); - } else if (la.kind == 29 /* "<." */) { + } else SynErr(50); + } else if (isKind(la, 30 /* "<." */)) { Get(); - if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { - if (la.kind == 25 /* "^" */) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); } beg = la.pos; - TypeName(); + TypeName_NT(); sym.retType = scanner.buffer.GetString(beg, la.pos); Expect(_ident); sym.retVar = t.val; - if (la.kind == 30 /* ".>" */) { + if (isKind(la, 31 /* ".>" */)) { Get(); - } else if (la.kind == 28 /* "," */) { + } else if (isKind(la, 29 /* "," */)) { Get(); beg = la.pos; col = la.col; line = la.line; while (StartOf(12)) { Get(); } - Expect(30 /* ".>" */); + Expect(31 /* ".>" */); if (t.pos > beg) sym.attrPos = new Position(beg, t.pos, col, line); - } else SynErr(50); + } else SynErr(51); } else if (StartOf(10)) { beg = la.pos; col = la.col; line = la.line; if (StartOf(13)) { @@ -582,21 +602,21 @@ void AttrDecl(Symbol sym) { Get(); } } - Expect(30 /* ".>" */); + Expect(31 /* ".>" */); if (t.pos > beg) sym.attrPos = new Position(beg, t.pos, col, line); - } else SynErr(51); - } else SynErr(52); + } else SynErr(52); + } else SynErr(53); } - Position SemText() { + Position SemText_NT() { Position pos; - Expect(42 /* "(." */); + Expect(43 /* "(." */); int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(14)) { if (StartOf(15)) { Get(); - } else if (la.kind == _badString) { + } else if (isKind(la, _badString)) { Get(); SemErr("bad string in semantic action"); } else { @@ -604,18 +624,18 @@ Position SemText() { SemErr("missing end of previous semantic action"); } } - Expect(43 /* ".)" */); + Expect(44 /* ".)" */); pos = new Position(beg, t.pos, col, line); return pos; } - Graph Expression() { + Graph Expression_NT() { Graph g; Graph g2; - g = Term(); + g = Term_NT(); boolean first = true; - while (WeakSeparator(33,16,17) ) { - g2 = Term(); + while (WeakSeparator(34,16,17) ) { + g2 = Term_NT(); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); @@ -623,38 +643,38 @@ Graph Expression() { return g; } - CharSet SimSet() { + CharSet SimSet_NT() { CharSet s; int n1, n2; s = new CharSet(); - if (la.kind == _ident) { + if (isKind(la, _ident)) { Get(); CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); - } else if (la.kind == _string) { + } else if (isKind(la, _string)) { Get(); String name = t.val; name = tab.Unescape(name.substring(1, name.length()-1)); for (int i = 0; i < name.length(); i++) if (dfa.ignoreCase) s.Set(Character.toLowerCase(name.charAt(i))); else s.Set(name.charAt(i)); - } else if (la.kind == _char) { - n1 = Char(); + } else if (isKind(la, _char)) { + n1 = Char_NT(); s.Set(n1); - if (la.kind == 22 /* ".." */) { + if (isKind(la, 22 /* ".." */)) { Get(); - n2 = Char(); + n2 = Char_NT(); for (int i = n1; i <= n2; i++) s.Set(i); } - } else if (la.kind == 23 /* "ANY" */) { + } else if (isKind(la, 23 /* "ANY" */)) { Get(); s = new CharSet(); s.Fill(); - } else SynErr(53); + } else SynErr(54); return s; } - int Char() { + int Char_NT() { int n; Expect(_char); String name = t.val; n = 0; @@ -666,14 +686,14 @@ int Char() { return n; } - SymInfo Sym() { + SymInfo Sym_NT() { SymInfo s; s = new SymInfo(); s.name = "???"; s.kind = id; - if (la.kind == _ident) { + if (isKind(la, _ident)) { Get(); s.kind = id; s.name = t.val; - } else if (la.kind == _string || la.kind == _char) { - if (la.kind == _string) { + } else if (isKind(la, _string) || isKind(la, _char)) { + if (isKind(la, _string)) { Get(); s.name = t.val; } else { @@ -684,78 +704,78 @@ SymInfo Sym() { if (dfa.ignoreCase) s.name = s.name.toLowerCase(); if (s.name.indexOf(' ') >= 0) SemErr("literal tokens must not contain blanks"); - } else SynErr(54); + } else SynErr(55); return s; } - void TypeName() { + void TypeName_NT() { Expect(_ident); - while (la.kind == 18 /* "." */ || la.kind == 24 /* "<" */ || la.kind == 31 /* "[" */) { - if (la.kind == 18 /* "." */) { + while (isKind(la, 18 /* "." */) || isKind(la, 25 /* "<" */) || isKind(la, 32 /* "[" */)) { + if (isKind(la, 18 /* "." */)) { Get(); Expect(_ident); - } else if (la.kind == 31 /* "[" */) { + } else if (isKind(la, 32 /* "[" */)) { Get(); - Expect(32 /* "]" */); + Expect(33 /* "]" */); } else { Get(); - TypeName(); - while (la.kind == 28 /* "," */) { + TypeName_NT(); + while (isKind(la, 29 /* "," */)) { Get(); - TypeName(); + TypeName_NT(); } - Expect(27 /* ">" */); + Expect(28 /* ">" */); } } } - Graph Term() { + Graph Term_NT() { Graph g; Graph g2; Node rslv = null; g = null; if (StartOf(18)) { - if (la.kind == 40 /* "IF" */) { + if (isKind(la, 41 /* "IF" */)) { rslv = tab.NewNode(Node.rslv, null, la.line, la.col); - rslv.pos = Resolver(); + rslv.pos = Resolver_NT(); g = new Graph(rslv); } - g2 = Factor(); + g2 = Factor_NT(); if (rslv != null) tab.MakeSequence(g, g2); else g = g2; while (StartOf(19)) { - g2 = Factor(); + g2 = Factor_NT(); tab.MakeSequence(g, g2); } } else if (StartOf(20)) { g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); - } else SynErr(55); + } else SynErr(56); if (g == null) // invalid start of Term g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } - Position Resolver() { + Position Resolver_NT() { Position pos; - Expect(40 /* "IF" */); - Expect(35 /* "(" */); + Expect(41 /* "IF" */); + Expect(36 /* "(" */); int beg = la.pos; int col = la.col; int line = la.line; - Condition(); + Condition_NT(); pos = new Position(beg, t.pos, col, line); return pos; } - Graph Factor() { + Graph Factor_NT() { Graph g; SymInfo s; Position pos; boolean weak = false; g = null; switch (la.kind) { - case _ident: case _string: case _char: case 34 /* "WEAK" */: { - if (la.kind == 34 /* "WEAK" */) { + case _ident: case _string: case _char: case 35 /* "WEAK" */: { + if (isKind(la, 35 /* "WEAK" */)) { Get(); weak = true; } - s = Sym(); + s = Sym_NT(); Symbol sym = tab.FindSym(s.name); if (sym == null && s.kind == str) sym = (Symbol)tab.literals.get(s.name); @@ -780,8 +800,8 @@ else if (genScanner) { Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); - if (la.kind == 24 /* "<" */ || la.kind == 29 /* "<." */) { - Attribs(p); + if (isKind(la, 25 /* "<" */) || isKind(la, 30 /* "<." */)) { + Attribs_NT(p); if (s.kind != id) SemErr("a literal must not have attributes"); } if (undef) { @@ -793,28 +813,28 @@ else if (genScanner) { break; } - case 35 /* "(" */: { + case 36 /* "(" */: { Get(); - g = Expression(); - Expect(36 /* ")" */); + g = Expression_NT(); + Expect(37 /* ")" */); break; } - case 31 /* "[" */: { + case 32 /* "[" */: { Get(); - g = Expression(); - Expect(32 /* "]" */); + g = Expression_NT(); + Expect(33 /* "]" */); tab.MakeOption(g); break; } - case 37 /* "{" */: { + case 38 /* "{" */: { Get(); - g = Expression(); - Expect(38 /* "}" */); + g = Expression_NT(); + Expect(39 /* "}" */); tab.MakeIteration(g); break; } - case 42 /* "(." */: { - pos = SemText(); + case 43 /* "(." */: { + pos = SemText_NT(); Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); @@ -828,14 +848,14 @@ else if (genScanner) { break; } - case 39 /* "SYNC" */: { + case 40 /* "SYNC" */: { Get(); Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); break; } - default: SynErr(56); break; + default: SynErr(57); break; } if (g == null) // invalid start of Factor g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); @@ -843,12 +863,12 @@ else if (genScanner) { return g; } - void Attribs(Node n) { + void Attribs_NT(Node n) { int beg, col, line; - if (la.kind == 24 /* "<" */) { + if (isKind(la, 25 /* "<" */)) { Get(); - if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { - if (la.kind == 25 /* "^" */) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); @@ -857,17 +877,17 @@ void Attribs(Node n) { while (StartOf(21)) { if (StartOf(22)) { Get(); - } else if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { - Bracketed(); + } else if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); SemErr("bad string in attributes"); } } n.retVar = scanner.buffer.GetString(beg, la.pos); - if (la.kind == 27 /* ">" */) { + if (isKind(la, 28 /* ">" */)) { Get(); - } else if (la.kind == 28 /* "," */) { + } else if (isKind(la, 29 /* "," */)) { Get(); beg = la.pos; col = la.col; line = la.line; while (StartOf(9)) { @@ -878,9 +898,9 @@ void Attribs(Node n) { SemErr("bad string in attributes"); } } - Expect(27 /* ">" */); + Expect(28 /* ">" */); if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); - } else SynErr(57); + } else SynErr(58); } else if (StartOf(10)) { beg = la.pos; col = la.col; line = la.line; if (StartOf(11)) { @@ -899,13 +919,13 @@ void Attribs(Node n) { } } } - Expect(27 /* ">" */); + Expect(28 /* ">" */); if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); - } else SynErr(58); - } else if (la.kind == 29 /* "<." */) { + } else SynErr(59); + } else if (isKind(la, 30 /* "<." */)) { Get(); - if (la.kind == 25 /* "^" */ || la.kind == 26 /* "out" */) { - if (la.kind == 25 /* "^" */) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); @@ -914,17 +934,17 @@ void Attribs(Node n) { while (StartOf(25)) { if (StartOf(26)) { Get(); - } else if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { - Bracketed(); + } else if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); SemErr("bad string in attributes"); } } n.retVar = scanner.buffer.GetString(beg, la.pos); - if (la.kind == 30 /* ".>" */) { + if (isKind(la, 31 /* ".>" */)) { Get(); - } else if (la.kind == 28 /* "," */) { + } else if (isKind(la, 29 /* "," */)) { Get(); beg = la.pos; col = la.col; line = la.line; while (StartOf(12)) { @@ -935,9 +955,9 @@ void Attribs(Node n) { SemErr("bad string in attributes"); } } - Expect(30 /* ".>" */); + Expect(31 /* ".>" */); if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); - } else SynErr(59); + } else SynErr(60); } else if (StartOf(10)) { beg = la.pos; col = la.col; line = la.line; if (StartOf(13)) { @@ -956,49 +976,49 @@ void Attribs(Node n) { } } } - Expect(30 /* ".>" */); + Expect(31 /* ".>" */); if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); - } else SynErr(60); - } else SynErr(61); + } else SynErr(61); + } else SynErr(62); } - void Condition() { + void Condition_NT() { while (StartOf(29)) { - if (la.kind == 35 /* "(" */) { + if (isKind(la, 36 /* "(" */)) { Get(); - Condition(); + Condition_NT(); } else { Get(); } } - Expect(36 /* ")" */); + Expect(37 /* ")" */); } - Graph TokenTerm() { + Graph TokenTerm_NT() { Graph g; Graph g2; - g = TokenFactor(); + g = TokenFactor_NT(); while (StartOf(7)) { - g2 = TokenFactor(); + g2 = TokenFactor_NT(); tab.MakeSequence(g, g2); } - if (la.kind == 41 /* "CONTEXT" */) { + if (isKind(la, 42 /* "CONTEXT" */)) { Get(); - Expect(35 /* "(" */); - g2 = TokenExpr(); + Expect(36 /* "(" */); + g2 = TokenExpr_NT(); tab.SetContextTrans(g2.l); dfa.hasCtxMoves = true; tab.MakeSequence(g, g2); - Expect(36 /* ")" */); + Expect(37 /* ")" */); } return g; } - Graph TokenFactor() { + Graph TokenFactor_NT() { Graph g; SymInfo s; g = null; - if (la.kind == _ident || la.kind == _string || la.kind == _char) { - s = Sym(); + if (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + s = Sym_NT(); if (s.kind == id) { CharClass c = tab.FindCharClass(s.name); if (c == null) { @@ -1014,93 +1034,101 @@ Graph TokenFactor() { else tokenString = noString; } - } else if (la.kind == 35 /* "(" */) { + } else if (isKind(la, 36 /* "(" */)) { Get(); - g = TokenExpr(); - Expect(36 /* ")" */); - } else if (la.kind == 31 /* "[" */) { + g = TokenExpr_NT(); + Expect(37 /* ")" */); + } else if (isKind(la, 32 /* "[" */)) { Get(); - g = TokenExpr(); - Expect(32 /* "]" */); + g = TokenExpr_NT(); + Expect(33 /* "]" */); tab.MakeOption(g); tokenString = noString; - } else if (la.kind == 37 /* "{" */) { + } else if (isKind(la, 38 /* "{" */)) { Get(); - g = TokenExpr(); - Expect(38 /* "}" */); + g = TokenExpr_NT(); + Expect(39 /* "}" */); tab.MakeIteration(g); tokenString = noString; - } else SynErr(62); + } else SynErr(63); if (g == null) // invalid start of TokenFactor g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } - void Bracketed() { - if (la.kind == 35 /* "(" */) { + void Bracketed_NT() { + if (isKind(la, 36 /* "(" */)) { Get(); while (StartOf(29)) { - if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { - Bracketed(); + if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); } } - Expect(36 /* ")" */); - } else if (la.kind == 31 /* "[" */) { + Expect(37 /* ")" */); + } else if (isKind(la, 32 /* "[" */)) { Get(); while (StartOf(30)) { - if (la.kind == 31 /* "[" */ || la.kind == 35 /* "(" */) { - Bracketed(); + if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); } } - Expect(32 /* "]" */); - } else SynErr(63); + Expect(33 /* "]" */); + } else SynErr(64); } public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); - Coco(); + Coco_NT(); Expect(0); } + // a token's base type + public static final int[] tBase = { + + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1, + }; + private static final boolean[][] set = { - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _x,_T,_x,_T, _T,_x,_T,_x, _x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x, _x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_T,_T, _x,_T,_x,_T, _T,_x,_T,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_T,_T, _x,_T,_x,_T, _x,_x,_T,_x, _x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_T,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_T,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x} + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_T, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_T, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_T, _T,_x,_T,_x, _T,_x,_x,_T, _x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_T,_x, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_x,_T,_T, _x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x, _x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x} }; } // end Parser @@ -1110,7 +1138,7 @@ class Errors { public int count = 0; // number of errors detected public java.io.PrintStream errorStream = System.out; // error messages go to this stream public String errMsgFormat = "-- line {0} col {1}: {2}"; // 0=line, 1=column, 2=text - + protected void printMsg(int line, int column, String msg) { StringBuffer b = new StringBuffer(errMsgFormat); int pos = b.indexOf("{0}"); @@ -1121,7 +1149,7 @@ protected void printMsg(int line, int column, String msg) { if (pos >= 0) b.replace(pos, pos+3, msg); errorStream.println(b.toString()); } - + public void SynErr (int line, int col, int n) { String s; switch (n) { @@ -1149,66 +1177,67 @@ public void SynErr (int line, int col, int n) { case 21: s = "\"-\" expected"; break; case 22: s = "\"..\" expected"; break; case 23: s = "\"ANY\" expected"; break; - case 24: s = "\"<\" expected"; break; - case 25: s = "\"^\" expected"; break; - case 26: s = "\"out\" expected"; break; - case 27: s = "\">\" expected"; break; - case 28: s = "\",\" expected"; break; - case 29: s = "\"<.\" expected"; break; - case 30: s = "\".>\" expected"; break; - case 31: s = "\"[\" expected"; break; - case 32: s = "\"]\" expected"; break; - case 33: s = "\"|\" expected"; break; - case 34: s = "\"WEAK\" expected"; break; - case 35: s = "\"(\" expected"; break; - case 36: s = "\")\" expected"; break; - case 37: s = "\"{\" expected"; break; - case 38: s = "\"}\" expected"; break; - case 39: s = "\"SYNC\" expected"; break; - case 40: s = "\"IF\" expected"; break; - case 41: s = "\"CONTEXT\" expected"; break; - case 42: s = "\"(.\" expected"; break; - case 43: s = "\".)\" expected"; break; - case 44: s = "??? expected"; break; - case 45: s = "this symbol not expected in Coco"; break; - case 46: s = "this symbol not expected in TokenDecl"; break; - case 47: s = "invalid TokenDecl"; break; - case 48: s = "invalid AttrDecl"; break; + case 24: s = "\":\" expected"; break; + case 25: s = "\"<\" expected"; break; + case 26: s = "\"^\" expected"; break; + case 27: s = "\"out\" expected"; break; + case 28: s = "\">\" expected"; break; + case 29: s = "\",\" expected"; break; + case 30: s = "\"<.\" expected"; break; + case 31: s = "\".>\" expected"; break; + case 32: s = "\"[\" expected"; break; + case 33: s = "\"]\" expected"; break; + case 34: s = "\"|\" expected"; break; + case 35: s = "\"WEAK\" expected"; break; + case 36: s = "\"(\" expected"; break; + case 37: s = "\")\" expected"; break; + case 38: s = "\"{\" expected"; break; + case 39: s = "\"}\" expected"; break; + case 40: s = "\"SYNC\" expected"; break; + case 41: s = "\"IF\" expected"; break; + case 42: s = "\"CONTEXT\" expected"; break; + case 43: s = "\"(.\" expected"; break; + case 44: s = "\".)\" expected"; break; + case 45: s = "??? expected"; break; + case 46: s = "this symbol not expected in Coco"; break; + case 47: s = "this symbol not expected in TokenDecl"; break; + case 48: s = "invalid TokenDecl"; break; case 49: s = "invalid AttrDecl"; break; case 50: s = "invalid AttrDecl"; break; case 51: s = "invalid AttrDecl"; break; case 52: s = "invalid AttrDecl"; break; - case 53: s = "invalid SimSet"; break; - case 54: s = "invalid Sym"; break; - case 55: s = "invalid Term"; break; - case 56: s = "invalid Factor"; break; - case 57: s = "invalid Attribs"; break; + case 53: s = "invalid AttrDecl"; break; + case 54: s = "invalid SimSet"; break; + case 55: s = "invalid Sym"; break; + case 56: s = "invalid Term"; break; + case 57: s = "invalid Factor"; break; case 58: s = "invalid Attribs"; break; case 59: s = "invalid Attribs"; break; case 60: s = "invalid Attribs"; break; case 61: s = "invalid Attribs"; break; - case 62: s = "invalid TokenFactor"; break; - case 63: s = "invalid Bracketed"; break; + case 62: s = "invalid Attribs"; break; + case 63: s = "invalid TokenFactor"; break; + case 64: s = "invalid Bracketed"; break; default: s = "error " + n; break; } printMsg(line, col, s); count++; } - public void SemErr (int line, int col, String s) { + public void SemErr (int line, int col, String s) { printMsg(line, col, s); count++; } - + public void SemErr (String s) { errorStream.println(s); count++; } - - public void Warning (int line, int col, String s) { + + public void Warning (int line, int col, String s) { printMsg(line, col, s); } - + public void Warning (String s) { errorStream.println(s); } diff --git a/src/ParserGen.java b/src/ParserGen.java index c7f405f..8757ab9 100644 --- a/src/ParserGen.java +++ b/src/ParserGen.java @@ -182,8 +182,9 @@ else if (n <= maxTerm) { for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); if (s.get(sym.n)) { - gen.print("la.kind == "); + gen.print("isKind(la, "); WriteSymbolOrCode(sym); + gen.print(")"); --n; if (n > 0) gen.print(" || "); } @@ -341,6 +342,17 @@ void GenCode (Node p, int indent, BitSet isChecked) { } } + void GenTokenBase() { + for (int i = 0; i < tab.terminals.size(); i++) { + Symbol sym = (Symbol)tab.terminals.get(i); + if((i % 20) == 0) gen.print("\n\t\t"); + if (sym.inherits == null) + gen.print("-1,"); // not inherited + else + gen.print(sym.inherits.n + ","); + } + } + void GenTokens() { gen.println("\t//non terminals"); for (int i = 0; i < tab.nonterminals.size(); i++) { @@ -352,9 +364,12 @@ void GenTokens() { for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); if (Character.isLetter(sym.name.charAt(0))) - gen.println("\tpublic static final int _" + sym.name + " = " + sym.n + ";"); + gen.print("\tpublic static final int _" + sym.name + " = " + sym.n + ";"); else - gen.println("//\tpublic static final int _(" + sym.name + ") = " + sym.n + ";"); + gen.print("//\tpublic static final int _(" + sym.name + ") = " + sym.n + ";"); + if(sym.inherits != null) + gen.print(" // INHERITS -> " + sym.inherits.name); + gen.println(); } gen.println("\t//non terminals"); } @@ -456,6 +471,7 @@ public void WriteParser () { g.CopyFramePart("-->pragmas"); GenCodePragmas(); g.CopyFramePart("-->productions"); GenProductions(); g.CopyFramePart("-->parseRoot"); gen.println("\t\t" + tab.gramSy.name + "_NT();"); if (tab.checkEOF) gen.println("\t\tExpect(0);"); + g.CopyFramePart("-->tbase"); GenTokenBase(); // write all tokens base types g.CopyFramePart("-->initialization"); InitSets(); g.CopyFramePart("-->errors"); gen.print(err.toString()); g.CopyFramePart(null); diff --git a/src/Scanner.java b/src/Scanner.java index 6fa271e..0ea50bc 100644 --- a/src/Scanner.java +++ b/src/Scanner.java @@ -278,8 +278,8 @@ public int state(int key) { public class Scanner { static final char EOL = '\n'; static final int eofSym = 0; - static final int maxT = 44; - static final int noSym = 44; + static final int maxT = 45; + static final int noSym = 45; public Buffer buffer; // scanner buffer @@ -312,20 +312,21 @@ public class Scanner { start.set(39, 5); start.set(36, 13); start.set(61, 16); - start.set(46, 33); + start.set(46, 34); start.set(43, 17); start.set(45, 18); - start.set(60, 34); - start.set(94, 20); - start.set(62, 21); - start.set(44, 22); - start.set(91, 25); - start.set(93, 26); - start.set(124, 27); - start.set(40, 35); - start.set(41, 28); - start.set(123, 29); - start.set(125, 30); + start.set(58, 20); + start.set(60, 35); + start.set(94, 21); + start.set(62, 22); + start.set(44, 23); + start.set(91, 26); + start.set(93, 27); + start.set(124, 28); + start.set(40, 36); + start.set(41, 29); + start.set(123, 30); + start.set(125, 31); start.set(Buffer.EOF, -1); literals.put("COMPILER", new Integer(6)); literals.put("IGNORECASE", new Integer(7)); @@ -340,11 +341,11 @@ public class Scanner { literals.put("PRODUCTIONS", new Integer(16)); literals.put("END", new Integer(19)); literals.put("ANY", new Integer(23)); - literals.put("out", new Integer(26)); - literals.put("WEAK", new Integer(34)); - literals.put("SYNC", new Integer(39)); - literals.put("IF", new Integer(40)); - literals.put("CONTEXT", new Integer(41)); + literals.put("out", new Integer(27)); + literals.put("WEAK", new Integer(35)); + literals.put("SYNC", new Integer(40)); + literals.put("IF", new Integer(41)); + literals.put("CONTEXT", new Integer(42)); } @@ -512,13 +513,13 @@ Token NextToken() { case 9: {t.kind = 5 /* char */; break loop;} case 10: - recEnd = pos; recKind = 45 /* ddtSym */; + recEnd = pos; recKind = 46 /* ddtSym */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 10; break;} - else {t.kind = 45 /* ddtSym */; break loop;} + else {t.kind = 46 /* ddtSym */; break loop;} case 11: - recEnd = pos; recKind = 46 /* optionSym */; + recEnd = pos; recKind = 47 /* optionSym */; if (ch >= '-' && ch <= '.' || ch >= '0' && ch <= ':' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 11; break;} - else {t.kind = 46 /* optionSym */; break loop;} + else {t.kind = 47 /* optionSym */; break loop;} case 12: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); state = 12; break;} else if (ch == 10 || ch == 13) {AddCh(); state = 4; break;} @@ -526,19 +527,19 @@ Token NextToken() { else if (ch == 92) {AddCh(); state = 14; break;} else {state = 0; break;} case 13: - recEnd = pos; recKind = 45 /* ddtSym */; + recEnd = pos; recKind = 46 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); state = 10; break;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 15; break;} - else {t.kind = 45 /* ddtSym */; break loop;} + else {t.kind = 46 /* ddtSym */; break loop;} case 14: if (ch >= ' ' && ch <= '~') {AddCh(); state = 12; break;} else {state = 0; break;} case 15: - recEnd = pos; recKind = 45 /* ddtSym */; + recEnd = pos; recKind = 46 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); state = 10; break;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 15; break;} else if (ch == '=') {AddCh(); state = 11; break;} - else {t.kind = 45 /* ddtSym */; break loop;} + else {t.kind = 46 /* ddtSym */; break loop;} case 16: {t.kind = 17 /* "=" */; break loop;} case 17: @@ -548,45 +549,47 @@ Token NextToken() { case 19: {t.kind = 22 /* ".." */; break loop;} case 20: - {t.kind = 25 /* "^" */; break loop;} + {t.kind = 24 /* ":" */; break loop;} case 21: - {t.kind = 27 /* ">" */; break loop;} + {t.kind = 26 /* "^" */; break loop;} case 22: - {t.kind = 28 /* "," */; break loop;} + {t.kind = 28 /* ">" */; break loop;} case 23: - {t.kind = 29 /* "<." */; break loop;} + {t.kind = 29 /* "," */; break loop;} case 24: - {t.kind = 30 /* ".>" */; break loop;} + {t.kind = 30 /* "<." */; break loop;} case 25: - {t.kind = 31 /* "[" */; break loop;} + {t.kind = 31 /* ".>" */; break loop;} case 26: - {t.kind = 32 /* "]" */; break loop;} + {t.kind = 32 /* "[" */; break loop;} case 27: - {t.kind = 33 /* "|" */; break loop;} + {t.kind = 33 /* "]" */; break loop;} case 28: - {t.kind = 36 /* ")" */; break loop;} + {t.kind = 34 /* "|" */; break loop;} case 29: - {t.kind = 37 /* "{" */; break loop;} + {t.kind = 37 /* ")" */; break loop;} case 30: - {t.kind = 38 /* "}" */; break loop;} + {t.kind = 38 /* "{" */; break loop;} case 31: - {t.kind = 42 /* "(." */; break loop;} + {t.kind = 39 /* "}" */; break loop;} case 32: - {t.kind = 43 /* ".)" */; break loop;} + {t.kind = 43 /* "(." */; break loop;} case 33: + {t.kind = 44 /* ".)" */; break loop;} + case 34: recEnd = pos; recKind = 18 /* "." */; if (ch == '.') {AddCh(); state = 19; break;} - else if (ch == '>') {AddCh(); state = 24; break;} - else if (ch == ')') {AddCh(); state = 32; break;} + else if (ch == '>') {AddCh(); state = 25; break;} + else if (ch == ')') {AddCh(); state = 33; break;} else {t.kind = 18 /* "." */; break loop;} - case 34: - recEnd = pos; recKind = 24 /* "<" */; - if (ch == '.') {AddCh(); state = 23; break;} - else {t.kind = 24 /* "<" */; break loop;} case 35: - recEnd = pos; recKind = 35 /* "(" */; - if (ch == '.') {AddCh(); state = 31; break;} - else {t.kind = 35 /* "(" */; break loop;} + recEnd = pos; recKind = 25 /* "<" */; + if (ch == '.') {AddCh(); state = 24; break;} + else {t.kind = 25 /* "<" */; break loop;} + case 36: + recEnd = pos; recKind = 36 /* "(" */; + if (ch == '.') {AddCh(); state = 32; break;} + else {t.kind = 36 /* "(" */; break loop;} } } diff --git a/src/Tab.java b/src/Tab.java index 55dbc77..684b4d4 100644 --- a/src/Tab.java +++ b/src/Tab.java @@ -83,6 +83,7 @@ class Symbol { // nt: pos of local declarations in source text (or null) public String retType; // AH - nt: Type of output attribute (or null) public String retVar; // AH - nt: Name of output attribute (or null) + public Symbol inherits; // optional, token from which this token derives public Symbol(int typ, String name, int line, int col) { this.typ = typ; this.name = name; this.line = line; this.col = col;