diff --git a/README.md b/README.md index d341fdf..c313536 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,23 @@ Coco/R is a compiler generator, which takes an attributed grammar of a source language and generates a scanner and a parser for this language. The scanner works as a deterministic finite automaton. The parser uses recursive descent. LL(1) conflicts can be resolved by a multi-symbol lookahead or by semantic checks. Thus the class of accepted grammars is LL(k) for an arbitrary k. http://ssw.jku.at/coco/ + +And this are my main modifications to the original: + +- Enhance left recursion detection + +- Allow semantic actions on `token declaration` similar to `pragmas` but the code executes on the Scanner + +- Allow till 8 characters as comment delimiters + +- Add option `-genRREBNF` to generate an EBNF grammar to crate railroad diagrams at https://www.bottlecaps.de/rr/ui + +- Add option `-geAST` to generate code to generate `parser syntax tree` based on https://github.com/rochus-keller/EbnfStudio + +- Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals + +- Add a `TERMINALS` section to generate user define tokens not managed by the Scanner (from cocoxml) + +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) + +See also https://github.com/mingodad/CocoR-CPP and https://github.com/mingodad/CocoR-CSharp diff --git a/src/Coco.atg b/src/Coco.atg index 2b6be89..477070d 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -37,7 +37,7 @@ COMPILER Coco static final int id = 0; static final int str = 1; - + public Trace trace; // other Coco objects referenced by this ATG public Tab tab; public DFA dfa; @@ -83,19 +83,19 @@ IGNORE cr + lf + tab PRODUCTIONS -Coco (. Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg; .) +Coco (. Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg, line; .) = [ // import statements - ANY (. beg = t.pos; .) - { ANY } (. pgen.usingPos = new Position(beg, la.pos, 0); .) + ANY (. beg = t.pos; line = t.line; .) + { ANY } (. pgen.usingPos = new Position(beg, la.pos, 0, line); .) ] - "COMPILER" (. genScanner = true; + "COMPILER" (. genScanner = true; tab.ignored = new CharSet(); .) ident (. gramName = t.val; - beg = la.pos; + beg = la.pos; line = la.line; .) - { ANY } (. tab.semDeclPos = new Position(beg, la.pos, 0); .) + { ANY } (. tab.semDeclPos = new Position(beg, la.pos, 0, line); .) [ "IGNORECASE" (. dfa.ignoreCase = true; .) ] /* pdt */ [ "CHARACTERS" { SetDecl }] [ "TOKENS" { TokenDecl }] @@ -115,7 +115,7 @@ Coco (. Symbol sym; Graph g, g1, g2; String gramName; .) { ident (. sym = tab.FindSym(t.val); boolean undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -149,7 +149,7 @@ Coco (. Symbol sym; Graph g, g1, g2; String gramName; if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -157,7 +157,16 @@ Coco (. Symbol sym; Graph g, g1, g2; String gramName; System.out.println("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + boolean doGenCode = false; + if(tab.ignoreErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } + if (doGenCode) { System.out.print("parser"); pgen.WriteParser(); if (genScanner) { @@ -231,16 +240,24 @@ Char /*------------------------------------------------------------------------------------*/ -TokenDecl (. SymInfo s; Symbol sym; Graph g; .) +TokenDecl (. SymInfo s, si; Symbol sym, inheritsSym; Graph g; .) = Sym (. sym = tab.FindSym(s.name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, s.name, t.line); + sym = tab.NewSym(typ, s.name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; .) + [ ':' Sym + (. inheritsSym = tab.FindSym(si.name); + if (inheritsSym == null) SemErr("token can't inherit from undeclared name"); + else if (inheritsSym == sym) SemErr("token must not inherit from itself"); + else if (inheritsSym.typ != typ) SemErr("token can't inherit from this token type"); + else sym.inherits = inheritsSym; + .) + ] SYNC ( '=' TokenExpr '.' (. if (s.kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); @@ -257,26 +274,26 @@ TokenDecl (. SymInfo s; Symbol sym; Graph g; .) else dfa.MatchLiteral(sym.name, sym); .) ) - [ SemText (. if (typ != Node.pr) SemErr("semantic action not allowed here"); .) + [ SemText (. if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); .) //SemErr("semantic action not allowed here"); .) ] . /*------------------------------------------------------------------------------------*/ -AttrDecl (. int beg, col; .) -= +AttrDecl (. int beg, col, line; .) += '<' // attributes denoted by < ... > ( ('^' | "out") (. beg = la.pos; .) TypeName (. sym.retType = scanner.buffer.GetString(beg, la.pos); .) ident (. sym.retVar = t.val; .) ( '>' - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY } '>' (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ANY { ANY } ] '>' (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ) | "<." // attributes denoted by <. ... .> @@ -284,13 +301,13 @@ AttrDecl (. int beg, col; .) TypeName (. sym.retType = scanner.buffer.GetString(beg, la.pos); .) ident (. sym.retVar = t.val; .) ( ".>" - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY } ".>" (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ANY { ANY } ] ".>" (. if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); .) + sym.attrPos = new Position(beg, t.pos, col, line); .) ). /*------------------------------------------------------------------------------------*/ @@ -314,7 +331,7 @@ Expression (. Graph g2; .) Term (. Graph g2; Node rslv = null; g = null; .) = -( [ (. rslv = tab.NewNode(Node.rslv, null, la.line); .) +( [ (. rslv = tab.NewNode(Node.rslv, null, la.line, la.col); .) Resolver (. g = new Graph(rslv); .) ] Factor (. if (rslv != null) tab.MakeSequence(g, g2); @@ -322,9 +339,9 @@ Term (. Graph g2; Node rslv = null; g = null; .) .) { Factor (. tab.MakeSequence(g, g2); .) } -| (. g = new Graph(tab.NewNode(Node.eps, null, 0)); .) +| (. g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) ) (. if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . @@ -341,9 +358,9 @@ Factor (. SymInfo s; Position pos; boolean weak = false boolean undef = sym == null; if (undef) { if (s.kind == id) - sym = tab.NewSym(Node.nt, s.name, 0); // forward nt + sym = tab.NewSym(Node.nt, s.name, 0, 0); // forward nt else if (genScanner) { - sym = tab.NewSym(Node.t, s.name, t.line); + sym = tab.NewSym(Node.t, s.name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -356,7 +373,7 @@ Factor (. SymInfo s; Position pos; boolean weak = false if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); .) [ Attribs

(. if (s.kind != id) SemErr("a literal must not have attributes"); .) @@ -370,18 +387,18 @@ Factor (. SymInfo s; Position pos; boolean weak = false | '(' Expression ')' | '[' Expression ']' (. tab.MakeOption(g); .) | '{' Expression '}' (. tab.MakeIteration(g); .) -| SemText (. Node p = tab.NewNode(Node.sem, null, 0); +| SemText (. Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); .) -| "ANY" (. Node p = tab.NewNode(Node.any, null, t.line); // p.set is set in tab.SetupAnys +| "ANY" (. Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); .) -| "SYNC" (. Node p = tab.NewNode(Node.sync, null, 0); +| "SYNC" (. Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); .) ) (. if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . @@ -389,8 +406,8 @@ Factor (. SymInfo s; Position pos; boolean weak = false Resolver = - "IF" "(" (. int beg = la.pos; int col = la.col; .) - Condition (. pos = new Position(beg, t.pos, col); .) + "IF" "(" (. int beg = la.pos; int col = la.col; int line = la.line; .) + Condition (. pos = new Position(beg, t.pos, col, line); .) . /*------------------------------------------------------------------------------------*/ @@ -431,10 +448,10 @@ TokenFactor (. SymInfo s; .) ( Sym (. if (s.kind == id) { CharClass c = tab.FindCharClass(s.name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + s.name); c = tab.NewCharClass(s.name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, 0, 0); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -447,7 +464,7 @@ TokenFactor (. SymInfo s; .) | '[' TokenExpr ']' (. tab.MakeOption(g); tokenString = noString; .) | '{' TokenExpr '}' (. tab.MakeIteration(g); tokenString = noString; .) ) (. if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); .) + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ @@ -466,7 +483,7 @@ Sym /*------------------------------------------------------------------------------------*/ -Attribs (. int beg, col; .) +Attribs (. int beg, col, line; .) = '<' // attributes denoted by < ... > ( ('^' | "out") (. beg = la.pos; .) @@ -475,19 +492,19 @@ Attribs (. int beg, col; .) | badString (. SemErr("bad string in attributes"); .) } (. n.retVar = scanner.buffer.GetString(beg, la.pos); .) ( '>' - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY | badString (. SemErr("bad string in attributes"); .) - } '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + } '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ( ANY | badString (. SemErr("bad string in attributes"); .) ) { ANY | badString (. SemErr("bad string in attributes"); .) } - ] '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + ] '>' (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) | "<." // attributes denoted by <. ... .> @@ -497,19 +514,19 @@ Attribs (. int beg, col; .) | badString (. SemErr("bad string in attributes"); .) } (. n.retVar = scanner.buffer.GetString(beg, la.pos); .) ( ".>" - | ',' (. beg = la.pos; col = la.col; .) + | ',' (. beg = la.pos; col = la.col; line = la.line; .) { ANY | badString (. SemErr("bad string in attributes"); .) - } ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + } ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) - | (. beg = la.pos; col = la.col; .) + | (. beg = la.pos; col = la.col; line = la.line; .) [ ( ANY | badString (. SemErr("bad string in attributes"); .) ) { ANY | badString (. SemErr("bad string in attributes"); .) } - ] ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col); .) + ] ".>" (. if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); .) ) . @@ -522,12 +539,12 @@ Bracketed SemText = - "(." (. int beg = la.pos; int col = la.col; .) + "(." (. int beg = la.pos; int col = la.col; int line = la.line; .) { ANY | badString (. SemErr("bad string in semantic action"); .) | "(." (. SemErr("missing end of previous semantic action"); .) } - ".)" (. pos = new Position(beg, t.pos, col); .) + ".)" (. pos = new Position(beg, t.pos, col, line); .) . END Coco. diff --git a/src/Coco.java b/src/Coco.java index b8d1271..46373d1 100644 --- a/src/Coco.java +++ b/src/Coco.java @@ -50,12 +50,16 @@ public class Coco { public static void main (String[] arg) { System.out.println("Coco/R (Apr 15, 2013)"); String srcName = null, nsName = null, frameDir = null, ddtString = null, outDir = null; + boolean ignoreErrors = false, genAST = false, genRREBNF = false; int retVal = 1; for (int i = 0; i < arg.length; i++) { if (arg[i].equals("-package") && i < arg.length - 1) nsName = arg[++i].trim(); else if (arg[i].equals("-frames") && i < arg.length - 1) frameDir = arg[++i].trim(); else if (arg[i].equals("-trace") && i < arg.length - 1) ddtString = arg[++i].trim(); else if (arg[i].equals("-o") && i < arg.length - 1) outDir = arg[++i].trim(); + else if (arg[i].equals("-genAST")) genAST = true; + else if (arg[i].equals("-genRREBNF")) genRREBNF = true; + else if (arg[i].equals("-ignoreErrors")) ignoreErrors = true; else srcName = arg[i]; } if (arg.length > 0 && srcName != null) { @@ -75,6 +79,9 @@ public static void main (String[] arg) { parser.tab.nsName = nsName; parser.tab.frameDir = frameDir; parser.tab.outDir = (outDir != null) ? outDir : srcDir; + parser.tab.genAST = genAST; + parser.tab.genRREBNF = genRREBNF; + parser.tab.ignoreErrors = ignoreErrors; if (ddtString != null) parser.tab.SetDDT(ddtString); parser.Parse(); @@ -93,6 +100,9 @@ public static void main (String[] arg) { " -frames \n" + " -trace \n" + " -o \n" + + " -genRREBNF\n" + + " -genAST\n" + + " -ignoreErrors ignore grammar errors for developing purposes\n" + "Valid characters in the trace string:\n" + " A trace automaton\n" + " F list first/follow sets\n" + diff --git a/src/DFA.java b/src/DFA.java index e145698..ef5526b 100644 --- a/src/DFA.java +++ b/src/DFA.java @@ -487,7 +487,7 @@ void DeleteRedundantStates() { for (State s1 = firstState.next; s1 != null; s1 = s1.next) // firstState cannot be final if (used.get(s1.nr) && s1.endOf != null && s1.firstAction == null && !s1.ctx) for (State s2 = s1.next; s2 != null; s2 = s2.next) - if (used.get(s2.nr) && s1.endOf == s2.endOf && s2.firstAction == null & !s2.ctx) { + if (used.get(s2.nr) && s1.endOf == s2.endOf && s2.firstAction == null && !s2.ctx) { used.set(s2.nr, false); newState[s2.nr] = s1; } for (State state = firstState; state != null; state = state.next) @@ -750,10 +750,11 @@ public void PrintStates() { boolean first = true; if (state.endOf == null) trace.Write(" "); else trace.Write("E(" + tab.Name(state.endOf.name) + ")", 12); - trace.Write(state.nr + ":", 3); + trace.Write(state.nr + "", 3); + trace.Write(":"); if (state.firstAction == null) trace.WriteLine(); for (Action action = state.firstAction; action != null; action = action.next) { - if (first) {trace.Write(" "); first = false;} else trace.Write(" "); + if (first) {trace.Write(" "); first = false;} else trace.Write(" "); if (action.typ == Node.clas) trace.Write(((CharClass)tab.classes.get(action.sym)).name); else trace.Write(Ch((char)action.sym), 3); @@ -854,8 +855,8 @@ String CommentStr(Node p) { } else parser.SemErr("comment delimiters must not be structured"); p = p.next; } - if (s.length() == 0 || s.length() > 2) { - parser.SemErr("comment delimiters must be 1 or 2 characters long"); + if (s.length() == 0 || s.length() > 8) { + parser.SemErr("comment delimiters must be 1 or 8 characters long"); s = new StringBuffer("?"); } return s.toString(); @@ -869,6 +870,7 @@ public void NewComment(Node from, Node to, boolean nested) { //--------------------- scanner generation ------------------------ void GenComBody(Comment com) { + int imax = com.start.length()-1; gen.println("\t\t\tfor(;;) {"); gen.print ("\t\t\t\tif (" + ChCond(com.stop.charAt(0)) + ") "); gen.println("{"); if (com.stop.length() == 1) { @@ -876,22 +878,31 @@ void GenComBody(Comment com) { gen.println("\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); gen.println("\t\t\t\t\tNextCh();"); } else { - gen.println("\t\t\t\t\tNextCh();"); - gen.println("\t\t\t\t\tif (" + ChCond(com.stop.charAt(1)) + ") {"); + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.println("\t\t\t\t\tNextCh();"); + gen.println("\t\t\t\t\tif (" + ChCond(com.stop.charAt(sidx)) + ") {"); + } gen.println("\t\t\t\t\t\tlevel--;"); - gen.println("\t\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); + gen.println("\t\t\t\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }"); gen.println("\t\t\t\t\t\tNextCh();"); - gen.println("\t\t\t\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.println("\t\t\t\t\t}"); + } } if (com.nested) { gen.print ("\t\t\t\t}"); gen.println(" else if (" + ChCond(com.start.charAt(0)) + ") {"); if (com.start.length() == 1) gen.println("\t\t\t\t\tlevel++; NextCh();"); else { - gen.println("\t\t\t\t\tNextCh();"); - gen.print ("\t\t\t\t\tif (" + ChCond(com.start.charAt(1)) + ") "); gen.println("{"); + int imaxN = com.start.length()-1; + for(int sidx = 1; sidx <= imaxN; ++sidx) { + gen.println("\t\t\t\t\tNextCh();"); + gen.print ("\t\t\t\t\tif (" + ChCond(com.start.charAt(sidx)) + ") "); gen.println("{"); + } gen.println("\t\t\t\t\t\tlevel++; NextCh();"); - gen.println("\t\t\t\t\t}"); + for(int sidx = imaxN; sidx > 0; --sidx) { + gen.println("\t\t\t\t\t}"); + } } } gen.println( "\t\t\t\t} else if (ch == Buffer.EOF) return false;"); @@ -903,17 +914,20 @@ void GenComment(Comment com, int i) { gen.println(); gen.print ("\tboolean Comment" + i + "() "); gen.println("{"); gen.println("\t\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;"); + gen.println("\t\tNextCh();"); if (com.start.length() == 1) { - gen.println("\t\tNextCh();"); GenComBody(com); } else { - gen.println("\t\tNextCh();"); - gen.print ("\t\tif (" + ChCond(com.start.charAt(1)) + ") "); gen.println("{"); - gen.println("\t\t\tNextCh();"); + int imax = com.start.length()-1; + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.print ("\t\tif (" + ChCond(com.start.charAt(sidx)) + ") "); gen.println("{"); + gen.println("\t\t\tNextCh();"); + } GenComBody(com); - gen.println("\t\t} else {"); - gen.println("\t\t\tbuffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;"); - gen.println("\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.println("\t\t}"); + } + gen.println("\t\tbuffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;"); gen.println("\t\treturn false;"); } gen.println("\t}"); @@ -951,7 +965,7 @@ void WriteState(State state) { Symbol endOf = state.endOf; gen.println("\t\t\t\tcase " + state.nr + ":"); if (endOf != null && state.firstAction != null) { - gen.println("\t\t\t\t\trecEnd = pos; recKind = " + endOf.n + ";"); + gen.println("\t\t\t\t\trecEnd = pos; recKind = " + endOf.n + " /* " + endOf.name + " */;"); } boolean ctxEnd = state.ctx; for (Action action = state.firstAction; action != null; action = action.next) { @@ -980,10 +994,15 @@ void WriteState(State state) { if (endOf == null) { gen.println("state = 0; break;}"); } else { - gen.print("t.kind = " + endOf.n + "; "); + gen.print("t.kind = " + endOf.n + " /* " + endOf.name + " */; "); if (endOf.tokenKind == Symbol.classLitToken) { gen.println("t.val = new String(tval, 0, tlen); CheckLiteral(); return t;}"); } else { + if(endOf.semPos != null && endOf.typ == Node.t) { + gen.print(" {"); + parser.pgen.CopySourcePart(parser, gen, endOf.semPos, 0); + gen.print("};"); + } gen.println("break loop;}"); } } @@ -1049,11 +1068,11 @@ public void WriteScanner() { gen.println("\t\tval = val.toLowerCase();"); } g.CopyFramePart("-->scan1"); - gen.print("\t\t\t"); + gen.print("\t\t\t\t"); if (tab.ignored.Elements() > 0) { PutRange(tab.ignored); } else { gen.print("false"); } g.CopyFramePart("-->scan2"); if (firstComment != null) { - gen.print("\t\tif ("); + gen.print("\t\t\tif ("); com = firstComment; comIdx = 0; while (com != null) { gen.print(ChCond(com.start.charAt(0))); @@ -1061,8 +1080,9 @@ public void WriteScanner() { if (com.next != null) gen.print(" ||"); com = com.next; comIdx++; } - gen.print(") return NextToken();"); + gen.print(") continue;"); } + g.CopyFramePart("-->scan22"); if (hasCtxMoves) { gen.println(); gen.print("\t\tint apx = 0;"); } /* pdt */ g.CopyFramePart("-->scan3"); for (State state = firstState.next; state != null; state = state.next) diff --git a/src/Parser.frame b/src/Parser.frame index 99d30b8..be20e8d 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -5,29 +5,93 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz ported from C# to Java by Wolfgang Ahorner with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. ------------------------------------------------------------------------*/ -->begin +import java.util.ArrayList; +import java.util.Stack; + public class Parser { + + public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + private void printIndent(int n) { + for(int i=0; i < n; ++i) System.out.print(" "); + } + + public void dump_all(int indent, boolean isLast) { + int last_idx = children.size(); + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_all(indent+4, idx == last_idx); + } + } + public void dump_all() { + dump_all(0, false); + } + + public void dump_pruned(int indent, boolean isLast) { + int last_idx = children.size(); + int indentPlus = 4; + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + if(last_idx == 1) { + if(children.get(0).children.size() == 0) { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_pruned(indent+indentPlus, idx == last_idx); + } + } + public void dump_pruned() { + dump_pruned(0, false); + } + } + -->constants static final boolean _T = true; static final boolean _x = false; @@ -36,12 +100,35 @@ public class Parser { public Token t; // last recognized token public Token la; // lookahead token int errDist = minErrDist; - + public Scanner scanner; public Errors errors; -->declarations + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ast_stack.peek().children.add(st); + } + + boolean AstAddNonTerminal(int kind, String nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ast_stack.peek().children.add(st); + ast_stack.push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.pop(); + } + public Parser(Scanner scanner) { this.scanner = scanner; errors = new Errors(); @@ -56,7 +143,7 @@ public class Parser { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; @@ -69,26 +156,35 @@ public class Parser { la = t; } } - + + boolean isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } - + boolean StartOf (int s) { return set[s][la.kind]; } - + void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); } } - + boolean WeakSeparator (int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) { Get(); return true; } + if (isKind(la, n)) { Get(); return true; } else if (StartOf(repFol)) return false; else { SynErr(n); @@ -99,16 +195,21 @@ public class Parser { return StartOf(syFol); } } - + -->productions public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); -->parseRoot } + // a token's base type + public static final int[] tBase = { +-->tbase + }; + private static final boolean[][] set = { -->initialization }; @@ -119,7 +220,7 @@ class Errors { public int count = 0; // number of errors detected public java.io.PrintStream errorStream = System.out; // error messages go to this stream public String errMsgFormat = "-- line {0} col {1}: {2}"; // 0=line, 1=column, 2=text - + protected void printMsg(int line, int column, String msg) { StringBuffer b = new StringBuffer(errMsgFormat); int pos = b.indexOf("{0}"); @@ -130,7 +231,7 @@ class Errors { if (pos >= 0) b.replace(pos, pos+3, msg); errorStream.println(b.toString()); } - + public void SynErr (int line, int col, int n) { String s; switch (n) {-->errors @@ -140,20 +241,20 @@ class Errors { count++; } - public void SemErr (int line, int col, String s) { + public void SemErr (int line, int col, String s) { printMsg(line, col, s); count++; } - + public void SemErr (String s) { errorStream.println(s); count++; } - - public void Warning (int line, int col, String s) { + + public void Warning (int line, int col, String s) { printMsg(line, col, s); } - + public void Warning (String s) { errorStream.println(s); } diff --git a/src/Parser.java b/src/Parser.java index bd48758..e221f63 100644 --- a/src/Parser.java +++ b/src/Parser.java @@ -5,38 +5,166 @@ ported from C# to Java by Wolfgang Ahorner with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. --------------------------------------------------------------------------*/ +------------------------------------------------------------------------*/ package Coco; +import java.util.ArrayList; +import java.util.Stack; + public class Parser { + + public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + private void printIndent(int n) { + for(int i=0; i < n; ++i) System.out.print(" "); + } + + public void dump_all(int indent, boolean isLast) { + int last_idx = children.size(); + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_all(indent+4, idx == last_idx); + } + } + public void dump_all() { + dump_all(0, false); + } + + public void dump_pruned(int indent, boolean isLast) { + int last_idx = children.size(); + int indentPlus = 4; + if(tok.col > 0) { + printIndent(indent); + System.out.println(((isLast || (last_idx == 0)) ? "= " : " ") + "\t" + tok.line + "\t" + tok.col + "\t" + tok.kind + "\t" + tok.val); + } + else { + if(last_idx == 1) { + if(children.get(0).children.size() == 0) { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + System.out.println(children.size() + "\t" + tok.line + "\t" + tok.kind + "\t" + tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) children.get(idx).dump_pruned(indent+indentPlus, idx == last_idx); + } + } + public void dump_pruned() { + dump_pruned(0, false); + } + } + + //non terminals + public static final int _NT_Coco = 0; + public static final int _NT_SetDecl = 1; + public static final int _NT_TokenDecl = 2; + public static final int _NT_TokenExpr = 3; + public static final int _NT_Set = 4; + public static final int _NT_AttrDecl = 5; + public static final int _NT_SemText = 6; + public static final int _NT_Expression = 7; + public static final int _NT_SimSet = 8; + public static final int _NT_Char = 9; + public static final int _NT_Sym = 10; + public static final int _NT_TypeName = 11; + public static final int _NT_Term = 12; + public static final int _NT_Resolver = 13; + public static final int _NT_Factor = 14; + public static final int _NT_Attribs = 15; + public static final int _NT_Condition = 16; + public static final int _NT_TokenTerm = 17; + public static final int _NT_TokenFactor = 18; + public static final int _NT_Bracketed = 19; + public static final int maxNT = 19; + //terminals public static final int _EOF = 0; public static final int _ident = 1; public static final int _number = 2; public static final int _string = 3; public static final int _badString = 4; public static final int _char = 5; - public static final int maxT = 44; - public static final int _ddtSym = 45; - public static final int _optionSym = 46; +// public static final int _("COMPILER") = 6; +// public static final int _("IGNORECASE") = 7; +// public static final int _("CHARACTERS") = 8; +// public static final int _("TOKENS") = 9; +// public static final int _("PRAGMAS") = 10; +// public static final int _("COMMENTS") = 11; +// public static final int _("FROM") = 12; +// public static final int _("TO") = 13; +// public static final int _("NESTED") = 14; +// public static final int _("IGNORE") = 15; +// public static final int _("PRODUCTIONS") = 16; +// public static final int _("=") = 17; +// public static final int _(".") = 18; +// public static final int _("END") = 19; +// public static final int _("+") = 20; +// public static final int _("-") = 21; +// public static final int _("..") = 22; +// public static final int _("ANY") = 23; +// public static final int _(":") = 24; +// public static final int _("<") = 25; +// public static final int _("^") = 26; +// public static final int _("out") = 27; +// public static final int _(">") = 28; +// public static final int _(",") = 29; +// public static final int _("<.") = 30; +// public static final int _(".>") = 31; +// public static final int _("[") = 32; +// public static final int _("]") = 33; +// public static final int _("|") = 34; +// public static final int _("WEAK") = 35; +// public static final int _("(") = 36; +// public static final int _(")") = 37; +// public static final int _("{") = 38; +// public static final int _("}") = 39; +// public static final int _("SYNC") = 40; +// public static final int _("IF") = 41; +// public static final int _("CONTEXT") = 42; +// public static final int _("(.") = 43; +// public static final int _(".)") = 44; +// public static final int _(???) = 45; + //non terminals + public static final int maxT = 45; + public static final int _ddtSym = 46; + public static final int _optionSym = 47; static final boolean _T = true; static final boolean _x = false; @@ -45,13 +173,13 @@ public class Parser { public Token t; // last recognized token public Token la; // lookahead token int errDist = minErrDist; - + public Scanner scanner; public Errors errors; static final int id = 0; static final int str = 1; - + public Trace trace; // other Coco objects referenced by this ATG public Tab tab; public DFA dfa; @@ -65,6 +193,29 @@ public class Parser { + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ast_stack.peek().children.add(st); + } + + boolean AstAddNonTerminal(int kind, String nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ast_stack.peek().children.add(st); + ast_stack.push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.pop(); + } + public Parser(Scanner scanner) { this.scanner = scanner; errors = new Errors(); @@ -79,7 +230,7 @@ public void SemErr (String msg) { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; @@ -89,35 +240,44 @@ void Get () { break; } - if (la.kind == 45) { + if (la.kind == _ddtSym) { tab.SetDDT(la.val); } - if (la.kind == 46) { + if (la.kind == _optionSym) { tab.SetOption(la.val); } la = t; } } - + + boolean isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } - + boolean StartOf (int s) { return set[s][la.kind]; } - + void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); } } - + boolean WeakSeparator (int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) { Get(); return true; } + if (isKind(la, n)) { Get(); return true; } else if (StartOf(repFol)) return false; else { SynErr(n); @@ -128,78 +288,78 @@ boolean WeakSeparator (int n, int syFol, int repFol) { return StartOf(syFol); } } - - void Coco() { - Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg; + + void Coco_NT() { + Symbol sym; Graph g, g1, g2; String gramName; CharSet s; int beg, line; if (StartOf(1)) { Get(); - beg = t.pos; + beg = t.pos; line = t.line; while (StartOf(1)) { Get(); } - pgen.usingPos = new Position(beg, la.pos, 0); + pgen.usingPos = new Position(beg, la.pos, 0, line); } - Expect(6); - genScanner = true; + Expect(6 /* "COMPILER" */); + genScanner = true; tab.ignored = new CharSet(); - Expect(1); + Expect(_ident); gramName = t.val; - beg = la.pos; + beg = la.pos; line = la.line; while (StartOf(2)) { Get(); } - tab.semDeclPos = new Position(beg, la.pos, 0); - if (la.kind == 7) { + tab.semDeclPos = new Position(beg, la.pos, 0, line); + if (isKind(la, 7 /* "IGNORECASE" */)) { Get(); dfa.ignoreCase = true; } - if (la.kind == 8) { + if (isKind(la, 8 /* "CHARACTERS" */)) { Get(); - while (la.kind == 1) { - SetDecl(); + while (isKind(la, _ident)) { + SetDecl_NT(); } } - if (la.kind == 9) { + if (isKind(la, 9 /* "TOKENS" */)) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { - TokenDecl(Node.t); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.t); } } - if (la.kind == 10) { + if (isKind(la, 10 /* "PRAGMAS" */)) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { - TokenDecl(Node.pr); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.pr); } } - while (la.kind == 11) { + while (isKind(la, 11 /* "COMMENTS" */)) { Get(); boolean nested = false; - Expect(12); - g1 = TokenExpr(); - Expect(13); - g2 = TokenExpr(); - if (la.kind == 14) { + Expect(12 /* "FROM" */); + g1 = TokenExpr_NT(); + Expect(13 /* "TO" */); + g2 = TokenExpr_NT(); + if (isKind(la, 14 /* "NESTED" */)) { Get(); nested = true; } dfa.NewComment(g1.l, g2.l, nested); } - while (la.kind == 15) { + while (isKind(la, 15 /* "IGNORE" */)) { Get(); - s = Set(); + s = Set_NT(); tab.ignored.Or(s); } - while (!(la.kind == 0 || la.kind == 16)) {SynErr(45); Get();} - Expect(16); + while (!(isKind(la, _EOF) || isKind(la, 16 /* "PRODUCTIONS" */))) {SynErr(46); Get();} + Expect(16 /* "PRODUCTIONS" */); if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); - while (la.kind == 1) { + while (isKind(la, _ident)) { Get(); sym = tab.FindSym(t.val); boolean undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -211,26 +371,26 @@ void Coco() { boolean noRet = sym.retVar==null; sym.retVar = null; - if (la.kind == 24 || la.kind == 29) { - AttrDecl(sym); + if (isKind(la, 25 /* "<" */) || isKind(la, 30 /* "<." */)) { + AttrDecl_NT(sym); } if (!undef) if (noAttrs != (sym.attrPos == null) || noRet != (sym.retVar == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - if (la.kind == 42) { - sym.semPos = SemText(); + if (isKind(la, 43 /* "(." */)) { + sym.semPos = SemText_NT(); } - ExpectWeak(17, 3); - g = Expression(); + ExpectWeak(17 /* "=" */, 3); + g = Expression_NT(); sym.graph = g.l; tab.Finish(g); - ExpectWeak(18, 4); + ExpectWeak(18 /* "." */, 4); } - Expect(19); - Expect(1); + Expect(19 /* "END" */); + Expect(_ident); if (gramName.compareTo(t.val) != 0) SemErr("name does not match grammar name"); tab.gramSy = tab.FindSym(gramName); @@ -241,7 +401,7 @@ void Coco() { if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -249,7 +409,16 @@ void Coco() { System.out.println("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + boolean doGenCode = false; + if(tab.ignoreErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } + if (doGenCode) { System.out.print("parser"); pgen.WriteParser(); if (genScanner) { @@ -263,40 +432,50 @@ void Coco() { } if (tab.ddt[6]) tab.PrintSymbolTable(); - Expect(18); + Expect(18 /* "." */); } - void SetDecl() { + void SetDecl_NT() { CharSet s; - Expect(1); + Expect(_ident); String name = t.val; CharClass c = tab.FindCharClass(name); if (c != null) SemErr("name declared twice"); - Expect(17); - s = Set(); + Expect(17 /* "=" */); + s = Set_NT(); if (s.Elements() == 0) SemErr("character set must not be empty"); c = tab.NewCharClass(name, s); - Expect(18); + Expect(18 /* "." */); } - void TokenDecl(int typ) { - SymInfo s; Symbol sym; Graph g; - s = Sym(); + void TokenDecl_NT(int typ) { + SymInfo s, si; Symbol sym, inheritsSym; Graph g; + s = Sym_NT(); sym = tab.FindSym(s.name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, s.name, t.line); + sym = tab.NewSym(typ, s.name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; - while (!(StartOf(5))) {SynErr(46); Get();} - if (la.kind == 17) { + if (isKind(la, 24 /* ":" */)) { + Get(); + si = Sym_NT(); + inheritsSym = tab.FindSym(si.name); + if (inheritsSym == null) SemErr("token can't inherit from undeclared name"); + else if (inheritsSym == sym) SemErr("token must not inherit from itself"); + else if (inheritsSym.typ != typ) SemErr("token can't inherit from this token type"); + else sym.inherits = inheritsSym; + + } + while (!(StartOf(5))) {SynErr(47); Get();} + if (isKind(la, 17 /* "=" */)) { Get(); - g = TokenExpr(); - Expect(18); + g = TokenExpr_NT(); + Expect(18 /* "." */); if (s.kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); if (tokenString == null || tokenString.equals(noString)) @@ -312,20 +491,20 @@ void TokenDecl(int typ) { if (s.kind == id) genScanner = false; else dfa.MatchLiteral(sym.name, sym); - } else SynErr(47); - if (la.kind == 42) { - sym.semPos = SemText(); - if (typ != Node.pr) SemErr("semantic action not allowed here"); + } else SynErr(48); + if (isKind(la, 43 /* "(." */)) { + sym.semPos = SemText_NT(); + if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); } } - Graph TokenExpr() { + Graph TokenExpr_NT() { Graph g; Graph g2; - g = TokenTerm(); + g = TokenTerm_NT(); boolean first = true; - while (WeakSeparator(33,7,8) ) { - g2 = TokenTerm(); + while (WeakSeparator(34,7,8) ) { + g2 = TokenTerm_NT(); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); @@ -333,111 +512,111 @@ Graph TokenExpr() { return g; } - CharSet Set() { + CharSet Set_NT() { CharSet s; CharSet s2; - s = SimSet(); - while (la.kind == 20 || la.kind == 21) { - if (la.kind == 20) { + s = SimSet_NT(); + while (isKind(la, 20 /* "+" */) || isKind(la, 21 /* "-" */)) { + if (isKind(la, 20 /* "+" */)) { Get(); - s2 = SimSet(); + s2 = SimSet_NT(); s.Or(s2); } else { Get(); - s2 = SimSet(); + s2 = SimSet_NT(); s.Subtract(s2); } } return s; } - void AttrDecl(Symbol sym) { - int beg, col; - if (la.kind == 24) { + void AttrDecl_NT(Symbol sym) { + int beg, col, line; + if (isKind(la, 25 /* "<" */)) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); } beg = la.pos; - TypeName(); + TypeName_NT(); sym.retType = scanner.buffer.GetString(beg, la.pos); - Expect(1); + Expect(_ident); sym.retVar = t.val; - if (la.kind == 27) { + if (isKind(la, 28 /* ">" */)) { Get(); - } else if (la.kind == 28) { + } else if (isKind(la, 29 /* "," */)) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(9)) { Get(); } - Expect(27); + Expect(28 /* ">" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); - } else SynErr(48); + sym.attrPos = new Position(beg, t.pos, col, line); + } else SynErr(49); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(11)) { Get(); while (StartOf(9)) { Get(); } } - Expect(27); + Expect(28 /* ">" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); - } else SynErr(49); - } else if (la.kind == 29) { + sym.attrPos = new Position(beg, t.pos, col, line); + } else SynErr(50); + } else if (isKind(la, 30 /* "<." */)) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); } beg = la.pos; - TypeName(); + TypeName_NT(); sym.retType = scanner.buffer.GetString(beg, la.pos); - Expect(1); + Expect(_ident); sym.retVar = t.val; - if (la.kind == 30) { + if (isKind(la, 31 /* ".>" */)) { Get(); - } else if (la.kind == 28) { + } else if (isKind(la, 29 /* "," */)) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(12)) { Get(); } - Expect(30); + Expect(31 /* ".>" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); - } else SynErr(50); + sym.attrPos = new Position(beg, t.pos, col, line); + } else SynErr(51); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(13)) { Get(); while (StartOf(12)) { Get(); } } - Expect(30); + Expect(31 /* ".>" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col); - } else SynErr(51); - } else SynErr(52); + sym.attrPos = new Position(beg, t.pos, col, line); + } else SynErr(52); + } else SynErr(53); } - Position SemText() { + Position SemText_NT() { Position pos; - Expect(42); - int beg = la.pos; int col = la.col; + Expect(43 /* "(." */); + int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(14)) { if (StartOf(15)) { Get(); - } else if (la.kind == 4) { + } else if (isKind(la, _badString)) { Get(); SemErr("bad string in semantic action"); } else { @@ -445,18 +624,18 @@ Position SemText() { SemErr("missing end of previous semantic action"); } } - Expect(43); - pos = new Position(beg, t.pos, col); + Expect(44 /* ".)" */); + pos = new Position(beg, t.pos, col, line); return pos; } - Graph Expression() { + Graph Expression_NT() { Graph g; Graph g2; - g = Term(); + g = Term_NT(); boolean first = true; - while (WeakSeparator(33,16,17) ) { - g2 = Term(); + while (WeakSeparator(34,16,17) ) { + g2 = Term_NT(); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); @@ -464,40 +643,40 @@ Graph Expression() { return g; } - CharSet SimSet() { + CharSet SimSet_NT() { CharSet s; int n1, n2; s = new CharSet(); - if (la.kind == 1) { + if (isKind(la, _ident)) { Get(); CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); - } else if (la.kind == 3) { + } else if (isKind(la, _string)) { Get(); String name = t.val; name = tab.Unescape(name.substring(1, name.length()-1)); for (int i = 0; i < name.length(); i++) if (dfa.ignoreCase) s.Set(Character.toLowerCase(name.charAt(i))); else s.Set(name.charAt(i)); - } else if (la.kind == 5) { - n1 = Char(); + } else if (isKind(la, _char)) { + n1 = Char_NT(); s.Set(n1); - if (la.kind == 22) { + if (isKind(la, 22 /* ".." */)) { Get(); - n2 = Char(); + n2 = Char_NT(); for (int i = n1; i <= n2; i++) s.Set(i); } - } else if (la.kind == 23) { + } else if (isKind(la, 23 /* "ANY" */)) { Get(); s = new CharSet(); s.Fill(); - } else SynErr(53); + } else SynErr(54); return s; } - int Char() { + int Char_NT() { int n; - Expect(5); + Expect(_char); String name = t.val; n = 0; name = tab.Unescape(name.substring(1, name.length()-1)); if (name.length() == 1) n = name.charAt(0); @@ -507,14 +686,14 @@ int Char() { return n; } - SymInfo Sym() { + SymInfo Sym_NT() { SymInfo s; s = new SymInfo(); s.name = "???"; s.kind = id; - if (la.kind == 1) { + if (isKind(la, _ident)) { Get(); s.kind = id; s.name = t.val; - } else if (la.kind == 3 || la.kind == 5) { - if (la.kind == 3) { + } else if (isKind(la, _string) || isKind(la, _char)) { + if (isKind(la, _string)) { Get(); s.name = t.val; } else { @@ -525,87 +704,87 @@ SymInfo Sym() { if (dfa.ignoreCase) s.name = s.name.toLowerCase(); if (s.name.indexOf(' ') >= 0) SemErr("literal tokens must not contain blanks"); - } else SynErr(54); + } else SynErr(55); return s; } - void TypeName() { - Expect(1); - while (la.kind == 18 || la.kind == 24 || la.kind == 31) { - if (la.kind == 18) { + void TypeName_NT() { + Expect(_ident); + while (isKind(la, 18 /* "." */) || isKind(la, 25 /* "<" */) || isKind(la, 32 /* "[" */)) { + if (isKind(la, 18 /* "." */)) { Get(); - Expect(1); - } else if (la.kind == 31) { + Expect(_ident); + } else if (isKind(la, 32 /* "[" */)) { Get(); - Expect(32); + Expect(33 /* "]" */); } else { Get(); - TypeName(); - while (la.kind == 28) { + TypeName_NT(); + while (isKind(la, 29 /* "," */)) { Get(); - TypeName(); + TypeName_NT(); } - Expect(27); + Expect(28 /* ">" */); } } } - Graph Term() { + Graph Term_NT() { Graph g; Graph g2; Node rslv = null; g = null; if (StartOf(18)) { - if (la.kind == 40) { - rslv = tab.NewNode(Node.rslv, null, la.line); - rslv.pos = Resolver(); + if (isKind(la, 41 /* "IF" */)) { + rslv = tab.NewNode(Node.rslv, null, la.line, la.col); + rslv.pos = Resolver_NT(); g = new Graph(rslv); } - g2 = Factor(); + g2 = Factor_NT(); if (rslv != null) tab.MakeSequence(g, g2); else g = g2; while (StartOf(19)) { - g2 = Factor(); + g2 = Factor_NT(); tab.MakeSequence(g, g2); } } else if (StartOf(20)) { - g = new Graph(tab.NewNode(Node.eps, null, 0)); - } else SynErr(55); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); + } else SynErr(56); if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } - Position Resolver() { + Position Resolver_NT() { Position pos; - Expect(40); - Expect(35); - int beg = la.pos; int col = la.col; - Condition(); - pos = new Position(beg, t.pos, col); + Expect(41 /* "IF" */); + Expect(36 /* "(" */); + int beg = la.pos; int col = la.col; int line = la.line; + Condition_NT(); + pos = new Position(beg, t.pos, col, line); return pos; } - Graph Factor() { + Graph Factor_NT() { Graph g; SymInfo s; Position pos; boolean weak = false; g = null; switch (la.kind) { - case 1: case 3: case 5: case 34: { - if (la.kind == 34) { + case _ident: case _string: case _char: case 35 /* "WEAK" */: { + if (isKind(la, 35 /* "WEAK" */)) { Get(); weak = true; } - s = Sym(); + s = Sym_NT(); Symbol sym = tab.FindSym(s.name); if (sym == null && s.kind == str) sym = (Symbol)tab.literals.get(s.name); boolean undef = sym == null; if (undef) { if (s.kind == id) - sym = tab.NewSym(Node.nt, s.name, 0); // forward nt + sym = tab.NewSym(Node.nt, s.name, 0, 0); // forward nt else if (genScanner) { - sym = tab.NewSym(Node.t, s.name, t.line); + sym = tab.NewSym(Node.t, s.name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -618,11 +797,11 @@ else if (genScanner) { if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); - if (la.kind == 24 || la.kind == 29) { - Attribs(p); + if (isKind(la, 25 /* "<" */) || isKind(la, 30 /* "<." */)) { + Attribs_NT(p); if (s.kind != id) SemErr("a literal must not have attributes"); } if (undef) { @@ -634,62 +813,62 @@ else if (genScanner) { break; } - case 35: { + case 36 /* "(" */: { Get(); - g = Expression(); - Expect(36); + g = Expression_NT(); + Expect(37 /* ")" */); break; } - case 31: { + case 32 /* "[" */: { Get(); - g = Expression(); - Expect(32); + g = Expression_NT(); + Expect(33 /* "]" */); tab.MakeOption(g); break; } - case 37: { + case 38 /* "{" */: { Get(); - g = Expression(); - Expect(38); + g = Expression_NT(); + Expect(39 /* "}" */); tab.MakeIteration(g); break; } - case 42: { - pos = SemText(); - Node p = tab.NewNode(Node.sem, null, 0); + case 43 /* "(." */: { + pos = SemText_NT(); + Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); break; } - case 23: { + case 23 /* "ANY" */: { Get(); - Node p = tab.NewNode(Node.any, null, t.line); // p.set is set in tab.SetupAnys + Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); break; } - case 39: { + case 40 /* "SYNC" */: { Get(); - Node p = tab.NewNode(Node.sync, null, 0); + Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); break; } - default: SynErr(56); break; + default: SynErr(57); break; } if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } - void Attribs(Node n) { - int beg, col; - if (la.kind == 24) { + void Attribs_NT(Node n) { + int beg, col, line; + if (isKind(la, 25 /* "<" */)) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); @@ -698,19 +877,19 @@ void Attribs(Node n) { while (StartOf(21)) { if (StartOf(22)) { Get(); - } else if (la.kind == 31 || la.kind == 35) { - Bracketed(); + } else if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); SemErr("bad string in attributes"); } } n.retVar = scanner.buffer.GetString(beg, la.pos); - if (la.kind == 27) { + if (isKind(la, 28 /* ">" */)) { Get(); - } else if (la.kind == 28) { + } else if (isKind(la, 29 /* "," */)) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(9)) { if (StartOf(23)) { Get(); @@ -719,11 +898,11 @@ void Attribs(Node n) { SemErr("bad string in attributes"); } } - Expect(27); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); - } else SynErr(57); + Expect(28 /* ">" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); + } else SynErr(58); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(11)) { if (StartOf(24)) { Get(); @@ -740,13 +919,13 @@ void Attribs(Node n) { } } } - Expect(27); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); - } else SynErr(58); - } else if (la.kind == 29) { + Expect(28 /* ">" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); + } else SynErr(59); + } else if (isKind(la, 30 /* "<." */)) { Get(); - if (la.kind == 25 || la.kind == 26) { - if (la.kind == 25) { + if (isKind(la, 26 /* "^" */) || isKind(la, 27 /* "out" */)) { + if (isKind(la, 26 /* "^" */)) { Get(); } else { Get(); @@ -755,19 +934,19 @@ void Attribs(Node n) { while (StartOf(25)) { if (StartOf(26)) { Get(); - } else if (la.kind == 31 || la.kind == 35) { - Bracketed(); + } else if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); SemErr("bad string in attributes"); } } n.retVar = scanner.buffer.GetString(beg, la.pos); - if (la.kind == 30) { + if (isKind(la, 31 /* ".>" */)) { Get(); - } else if (la.kind == 28) { + } else if (isKind(la, 29 /* "," */)) { Get(); - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; while (StartOf(12)) { if (StartOf(27)) { Get(); @@ -776,11 +955,11 @@ void Attribs(Node n) { SemErr("bad string in attributes"); } } - Expect(30); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); - } else SynErr(59); + Expect(31 /* ".>" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); + } else SynErr(60); } else if (StartOf(10)) { - beg = la.pos; col = la.col; + beg = la.pos; col = la.col; line = la.line; if (StartOf(13)) { if (StartOf(28)) { Get(); @@ -797,56 +976,56 @@ void Attribs(Node n) { } } } - Expect(30); - if (t.pos > beg) n.pos = new Position(beg, t.pos, col); - } else SynErr(60); - } else SynErr(61); + Expect(31 /* ".>" */); + if (t.pos > beg) n.pos = new Position(beg, t.pos, col, line); + } else SynErr(61); + } else SynErr(62); } - void Condition() { + void Condition_NT() { while (StartOf(29)) { - if (la.kind == 35) { + if (isKind(la, 36 /* "(" */)) { Get(); - Condition(); + Condition_NT(); } else { Get(); } } - Expect(36); + Expect(37 /* ")" */); } - Graph TokenTerm() { + Graph TokenTerm_NT() { Graph g; Graph g2; - g = TokenFactor(); + g = TokenFactor_NT(); while (StartOf(7)) { - g2 = TokenFactor(); + g2 = TokenFactor_NT(); tab.MakeSequence(g, g2); } - if (la.kind == 41) { + if (isKind(la, 42 /* "CONTEXT" */)) { Get(); - Expect(35); - g2 = TokenExpr(); + Expect(36 /* "(" */); + g2 = TokenExpr_NT(); tab.SetContextTrans(g2.l); dfa.hasCtxMoves = true; tab.MakeSequence(g, g2); - Expect(36); + Expect(37 /* ")" */); } return g; } - Graph TokenFactor() { + Graph TokenFactor_NT() { Graph g; SymInfo s; g = null; - if (la.kind == 1 || la.kind == 3 || la.kind == 5) { - s = Sym(); + if (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + s = Sym_NT(); if (s.kind == id) { CharClass c = tab.FindCharClass(s.name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + s.name); c = tab.NewCharClass(s.name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, 0, 0); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -855,93 +1034,101 @@ Graph TokenFactor() { else tokenString = noString; } - } else if (la.kind == 35) { + } else if (isKind(la, 36 /* "(" */)) { Get(); - g = TokenExpr(); - Expect(36); - } else if (la.kind == 31) { + g = TokenExpr_NT(); + Expect(37 /* ")" */); + } else if (isKind(la, 32 /* "[" */)) { Get(); - g = TokenExpr(); - Expect(32); + g = TokenExpr_NT(); + Expect(33 /* "]" */); tab.MakeOption(g); tokenString = noString; - } else if (la.kind == 37) { + } else if (isKind(la, 38 /* "{" */)) { Get(); - g = TokenExpr(); - Expect(38); + g = TokenExpr_NT(); + Expect(39 /* "}" */); tab.MakeIteration(g); tokenString = noString; - } else SynErr(62); + } else SynErr(63); if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); return g; } - void Bracketed() { - if (la.kind == 35) { + void Bracketed_NT() { + if (isKind(la, 36 /* "(" */)) { Get(); while (StartOf(29)) { - if (la.kind == 31 || la.kind == 35) { - Bracketed(); + if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); } } - Expect(36); - } else if (la.kind == 31) { + Expect(37 /* ")" */); + } else if (isKind(la, 32 /* "[" */)) { Get(); while (StartOf(30)) { - if (la.kind == 31 || la.kind == 35) { - Bracketed(); + if (isKind(la, 32 /* "[" */) || isKind(la, 36 /* "(" */)) { + Bracketed_NT(); } else { Get(); } } - Expect(32); - } else SynErr(63); + Expect(33 /* "]" */); + } else SynErr(64); } public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); - Coco(); + Coco_NT(); Expect(0); } + // a token's base type + public static final int[] tBase = { + + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1, + }; + private static final boolean[][] set = { - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _x,_T,_x,_T, _T,_x,_T,_x, _x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x, _x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_T,_T, _x,_T,_x,_T, _T,_x,_T,_x, _x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_T,_T, _x,_T,_x,_T, _x,_x,_T,_x, _x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_T,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_T,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x} + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_T, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_T, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_T, _T,_x,_T,_x, _T,_x,_x,_T, _x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_T,_x, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_x,_T,_T, _x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x, _x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_x, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x} }; } // end Parser @@ -951,7 +1138,7 @@ class Errors { public int count = 0; // number of errors detected public java.io.PrintStream errorStream = System.out; // error messages go to this stream public String errMsgFormat = "-- line {0} col {1}: {2}"; // 0=line, 1=column, 2=text - + protected void printMsg(int line, int column, String msg) { StringBuffer b = new StringBuffer(errMsgFormat); int pos = b.indexOf("{0}"); @@ -962,7 +1149,7 @@ protected void printMsg(int line, int column, String msg) { if (pos >= 0) b.replace(pos, pos+3, msg); errorStream.println(b.toString()); } - + public void SynErr (int line, int col, int n) { String s; switch (n) { @@ -990,66 +1177,67 @@ public void SynErr (int line, int col, int n) { case 21: s = "\"-\" expected"; break; case 22: s = "\"..\" expected"; break; case 23: s = "\"ANY\" expected"; break; - case 24: s = "\"<\" expected"; break; - case 25: s = "\"^\" expected"; break; - case 26: s = "\"out\" expected"; break; - case 27: s = "\">\" expected"; break; - case 28: s = "\",\" expected"; break; - case 29: s = "\"<.\" expected"; break; - case 30: s = "\".>\" expected"; break; - case 31: s = "\"[\" expected"; break; - case 32: s = "\"]\" expected"; break; - case 33: s = "\"|\" expected"; break; - case 34: s = "\"WEAK\" expected"; break; - case 35: s = "\"(\" expected"; break; - case 36: s = "\")\" expected"; break; - case 37: s = "\"{\" expected"; break; - case 38: s = "\"}\" expected"; break; - case 39: s = "\"SYNC\" expected"; break; - case 40: s = "\"IF\" expected"; break; - case 41: s = "\"CONTEXT\" expected"; break; - case 42: s = "\"(.\" expected"; break; - case 43: s = "\".)\" expected"; break; - case 44: s = "??? expected"; break; - case 45: s = "this symbol not expected in Coco"; break; - case 46: s = "this symbol not expected in TokenDecl"; break; - case 47: s = "invalid TokenDecl"; break; - case 48: s = "invalid AttrDecl"; break; + case 24: s = "\":\" expected"; break; + case 25: s = "\"<\" expected"; break; + case 26: s = "\"^\" expected"; break; + case 27: s = "\"out\" expected"; break; + case 28: s = "\">\" expected"; break; + case 29: s = "\",\" expected"; break; + case 30: s = "\"<.\" expected"; break; + case 31: s = "\".>\" expected"; break; + case 32: s = "\"[\" expected"; break; + case 33: s = "\"]\" expected"; break; + case 34: s = "\"|\" expected"; break; + case 35: s = "\"WEAK\" expected"; break; + case 36: s = "\"(\" expected"; break; + case 37: s = "\")\" expected"; break; + case 38: s = "\"{\" expected"; break; + case 39: s = "\"}\" expected"; break; + case 40: s = "\"SYNC\" expected"; break; + case 41: s = "\"IF\" expected"; break; + case 42: s = "\"CONTEXT\" expected"; break; + case 43: s = "\"(.\" expected"; break; + case 44: s = "\".)\" expected"; break; + case 45: s = "??? expected"; break; + case 46: s = "this symbol not expected in Coco"; break; + case 47: s = "this symbol not expected in TokenDecl"; break; + case 48: s = "invalid TokenDecl"; break; case 49: s = "invalid AttrDecl"; break; case 50: s = "invalid AttrDecl"; break; case 51: s = "invalid AttrDecl"; break; case 52: s = "invalid AttrDecl"; break; - case 53: s = "invalid SimSet"; break; - case 54: s = "invalid Sym"; break; - case 55: s = "invalid Term"; break; - case 56: s = "invalid Factor"; break; - case 57: s = "invalid Attribs"; break; + case 53: s = "invalid AttrDecl"; break; + case 54: s = "invalid SimSet"; break; + case 55: s = "invalid Sym"; break; + case 56: s = "invalid Term"; break; + case 57: s = "invalid Factor"; break; case 58: s = "invalid Attribs"; break; case 59: s = "invalid Attribs"; break; case 60: s = "invalid Attribs"; break; case 61: s = "invalid Attribs"; break; - case 62: s = "invalid TokenFactor"; break; - case 63: s = "invalid Bracketed"; break; + case 62: s = "invalid Attribs"; break; + case 63: s = "invalid TokenFactor"; break; + case 64: s = "invalid Bracketed"; break; default: s = "error " + n; break; } printMsg(line, col, s); count++; } - public void SemErr (int line, int col, String s) { + public void SemErr (int line, int col, String s) { printMsg(line, col, s); count++; } - + public void SemErr (String s) { errorStream.println(s); count++; } - - public void Warning (int line, int col, String s) { + + public void Warning (int line, int col, String s) { printMsg(line, col, s); } - + public void Warning (String s) { errorStream.println(s); } diff --git a/src/ParserGen.java b/src/ParserGen.java index 00e1e54..8757ab9 100644 --- a/src/ParserGen.java +++ b/src/ParserGen.java @@ -40,6 +40,11 @@ import java.io.FileWriter; /* pdt */ import java.util.ArrayList; import java.util.BitSet; +import java.util.Comparator; +import java.util.Hashtable; +import java.util.Map; +import java.util.TreeMap; +import java.util.Iterator; public class ParserGen { @@ -80,9 +85,17 @@ boolean Overlaps (BitSet s1, BitSet s2) { for (int i = 0; i < len; ++i) { if (s1.get(i) && s2.get(i)) { return true; + } } + return false; } - return false; + + void WriteSymbolOrCode(Symbol sym) { + if (!Character.isLetter(sym.name.charAt(0))) { + gen.print(sym.n + " /* " + sym.name + " */"); + } else { + gen.print("_" + sym.name); + } } // AW: use a switch if more than 5 alternatives and none starts with a resolver, no LL1 warning @@ -128,6 +141,17 @@ void CopySourcePart (Position pos, int indent) { } } + /* TODO better interface for CopySourcePart */ + public void CopySourcePart (Parser parser, PrintWriter gen, Position pos, int indent) { + // Copy text described by pos from atg to gen + int oldPos = parser.pgen.buffer.getPos(); // Pos is modified by CopySourcePart + PrintWriter prevGen = parser.pgen.gen; + parser.pgen.gen = gen; + parser.pgen.CopySourcePart(pos, 0); + parser.pgen.gen = prevGen; + parser.pgen.buffer.setPos(oldPos); + } + void GenErrorMsg (int errTyp, Symbol sym) { errorNr++; err.write(ls + "\t\t\tcase " + errorNr + ": s = \""); @@ -158,7 +182,9 @@ else if (n <= maxTerm) { for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); if (s.get(sym.n)) { - gen.print("la.kind == " + sym.n); + gen.print("isKind(la, "); + WriteSymbolOrCode(sym); + gen.print(")"); --n; if (n > 0) gen.print(" || "); } @@ -171,7 +197,11 @@ else if (n <= maxTerm) { void PutCaseLabels (BitSet s) { for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); - if (s.get(sym.n)) gen.print("case " + sym.n + ": "); + if (s.get(sym.n)) { + gen.print("case "); + WriteSymbolOrCode(sym); + gen.print(": "); + } } } @@ -183,7 +213,7 @@ void GenCode (Node p, int indent, BitSet isChecked) { case Node.nt: { Indent(indent); if (p.retVar != null) gen.print(p.retVar + " = "); - gen.print(p.sym.name + "("); + gen.print(p.sym.name + "_NT("); CopySourcePart(p.pos, 0); gen.println(");"); break; @@ -192,14 +222,23 @@ void GenCode (Node p, int indent, BitSet isChecked) { Indent(indent); // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n if (isChecked.get(p.sym.n)) gen.println("Get();"); - else gen.println("Expect(" + p.sym.n + ");"); + else { + gen.print("Expect("); + WriteSymbolOrCode(p.sym); + gen.println(");"); + } + if(tab.genAST) { + gen.println("\tAstAddTerminal();"); + } break; } case Node.wt: { Indent(indent); s1 = tab.Expected(p.next, curSy); s1.or(tab.allSyncSets); - gen.println("ExpectWeak(" + p.sym.n + ", " + NewCondSet(s1) + ");"); + gen.print("ExpectWeak("); + WriteSymbolOrCode(p.sym); + gen.println(", " + NewCondSet(s1) + ");"); break; } case Node.any: { @@ -303,13 +342,36 @@ void GenCode (Node p, int indent, BitSet isChecked) { } } + void GenTokenBase() { + for (int i = 0; i < tab.terminals.size(); i++) { + Symbol sym = (Symbol)tab.terminals.get(i); + if((i % 20) == 0) gen.print("\n\t\t"); + if (sym.inherits == null) + gen.print("-1,"); // not inherited + else + gen.print(sym.inherits.n + ","); + } + } + void GenTokens() { - //foreach (Symbol sym in Symbol.terminals) { + gen.println("\t//non terminals"); + for (int i = 0; i < tab.nonterminals.size(); i++) { + Symbol sym = (Symbol)tab.nonterminals.get(i); + gen.println("\tpublic static final int _NT_" + sym.name + " = " + sym.n + ";"); + } + gen.println("\tpublic static final int maxNT = " + (tab.nonterminals.size()-1) + ";"); + gen.println("\t//terminals"); for (int i = 0; i < tab.terminals.size(); i++) { Symbol sym = (Symbol)tab.terminals.get(i); if (Character.isLetter(sym.name.charAt(0))) - gen.println("\tpublic static final int _" + sym.name + " = " + sym.n + ";"); + gen.print("\tpublic static final int _" + sym.name + " = " + sym.n + ";"); + else + gen.print("//\tpublic static final int _(" + sym.name + ") = " + sym.n + ";"); + if(sym.inherits != null) + gen.print(" // INHERITS -> " + sym.inherits.name); + gen.println(); } + gen.println("\t//non terminals"); } void GenPragmas() { @@ -324,7 +386,9 @@ void GenCodePragmas() { for (int i = 0; i < tab.pragmas.size(); i++) { Symbol sym = (Symbol)tab.pragmas.get(i); gen.println(); - gen.println("\t\t\tif (la.kind == " + sym.n + ") {"); + gen.print("\t\t\tif (la.kind == "); + WriteSymbolOrCode(sym); + gen.println(") {"); CopySourcePart(sym.semPos, 4); gen.print ("\t\t\t}"); } @@ -336,12 +400,20 @@ void GenProductions() { curSy = sym; gen.print("\t"); if (sym.retType == null) gen.print("void "); else gen.print(sym.retType + " "); - gen.print(sym.name + "("); + gen.print(sym.name + "_NT("); CopySourcePart(sym.attrPos, 0); gen.println(") {"); if (sym.retVar != null) gen.println("\t\t" + sym.retType + " " + sym.retVar + ";"); CopySourcePart(sym.semPos, 2); + if(tab.genAST) { + if(i == 0) gen.println("\tToken rt = new Token(); rt.kind = _NT_" + sym.name + "; rt.val = \"" + sym.name + "\";ast_root = new SynTree( rt ); ast_stack = new Stack(); ast_stack.push(ast_root);"); + else gen.println("\tboolean ntAdded = AstAddNonTerminal(_NT_" + sym.name + ", \"" + sym.name + "\", la.line);"); + } GenCode(sym.graph, 2, new BitSet(tab.terminals.size())); + if(tab.genAST) { + if(i == 0) gen.println("\tAstPopNonTerminal();"); + else gen.println("\tif(ntAdded) AstPopNonTerminal();"); + } if (sym.retVar != null) gen.println("\t\treturn " + sym.retVar + ";"); gen.println("\t}"); gen.println(); } @@ -398,7 +470,8 @@ public void WriteParser () { g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0); g.CopyFramePart("-->pragmas"); GenCodePragmas(); g.CopyFramePart("-->productions"); GenProductions(); - g.CopyFramePart("-->parseRoot"); gen.println("\t\t" + tab.gramSy.name + "();"); if (tab.checkEOF) gen.println("\t\tExpect(0);"); + g.CopyFramePart("-->parseRoot"); gen.println("\t\t" + tab.gramSy.name + "_NT();"); if (tab.checkEOF) gen.println("\t\tExpect(0);"); + g.CopyFramePart("-->tbase"); GenTokenBase(); // write all tokens base types g.CopyFramePart("-->initialization"); InitSets(); g.CopyFramePart("-->errors"); gen.print(err.toString()); g.CopyFramePart(null); @@ -413,6 +486,98 @@ protected void OnWriteParserInitializationDone() { // nothing to do } + public int GenCodeRREBNF (Node p) { + int rc = 0; + Node p2; + while (p != null) { + switch (p.typ) { + case Node.nt: + case Node.t: { + gen.print(p.sym.name); + gen.print(" "); + ++rc; + break; + } + case Node.wt: { + break; + } + case Node.any: { + gen.print("ANY "); + break; + } + case Node.eps: break; // nothing + case Node.rslv: break; // nothing + case Node.sem: { + break; + } + case Node.sync: { + break; + } + case Node.alt: { + gen.print("( "); + p2 = p; + while (p2 != null) { + rc += GenCodeRREBNF(p2.sub); + p2 = p2.down; + if(p2 != null) gen.print("| "); + } + gen.print(") "); + break; + } + case Node.iter: { + gen.print("( "); + rc += GenCodeRREBNF(p.sub); + gen.print(")* "); + break; + } + case Node.opt: + gen.print("( "); + rc += GenCodeRREBNF(p.sub); + gen.print(")? "); + break; + } + if (p.up) break; + p = p.next; + } + return rc; + } + + public void WriteRREBNF () { + Generator g = new Generator(tab); + gen = g.OpenGen("Parser.ebnf"); + + gen.print("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n"); + gen.print("\n//\n// productions\n//\n\n"); + for (int i = 0; i < tab.nonterminals.size(); i++) { + Symbol sym = (Symbol)tab.nonterminals.get(i); + gen.print(sym.name + " ::= "); + if(GenCodeRREBNF(sym.graph) == 0) { + gen.print("\"??()??\""); + } + gen.print("\n"); + } + gen.print("\n//\n// tokens\n//\n\n"); + for (int i = 0; i < tab.terminals.size(); i++) { + Symbol sym = (Symbol)tab.terminals.get(i); + if (Character.isLetter(sym.name.charAt(0))) { // real name value is stored in Tab.literals + java.util.Iterator iter = tab.literals.entrySet().iterator(); + Map.Entry me = null; + //foreach (DictionaryEntry e in literals) { + while (iter.hasNext()) { + me = (Map.Entry)iter.next(); + Symbol hsym = (Symbol)me.getValue(); + if (hsym == sym) { + gen.print(sym.name + " ::= " + me.getKey() + "\n"); + break; + } + } + } else { + //gen.print(sym.n + " /* " + sym.name + " */"); + } + } + gen.close(); + } + public void WriteStatistics () { trace.WriteLine(); trace.WriteLine(tab.terminals.size() + " terminals"); diff --git a/src/Scanner.frame b/src/Scanner.frame index 5edcbef..6b0c47e 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -369,10 +369,14 @@ public class Scanner { } Token NextToken() { - while (ch == ' ' || + for(;;) { + while (ch == ' ' || -->scan1 - ) NextCh(); + ) NextCh(); -->scan2 + break; + } +-->scan22 int recKind = noSym; int recEnd = pos; t = new Token(); diff --git a/src/Scanner.java b/src/Scanner.java index 21a14d9..0ea50bc 100644 --- a/src/Scanner.java +++ b/src/Scanner.java @@ -5,26 +5,26 @@ ported from C# to Java by Wolfgang Ahorner with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. --------------------------------------------------------------------------*/ +------------------------------------------------------------------------*/ package Coco; import java.io.InputStream; @@ -278,8 +278,8 @@ public int state(int key) { public class Scanner { static final char EOL = '\n'; static final int eofSym = 0; - static final int maxT = 44; - static final int noSym = 44; + static final int maxT = 45; + static final int noSym = 45; public Buffer buffer; // scanner buffer @@ -312,20 +312,21 @@ public class Scanner { start.set(39, 5); start.set(36, 13); start.set(61, 16); - start.set(46, 33); + start.set(46, 34); start.set(43, 17); start.set(45, 18); - start.set(60, 34); - start.set(94, 20); - start.set(62, 21); - start.set(44, 22); - start.set(91, 25); - start.set(93, 26); - start.set(124, 27); - start.set(40, 35); - start.set(41, 28); - start.set(123, 29); - start.set(125, 30); + start.set(58, 20); + start.set(60, 35); + start.set(94, 21); + start.set(62, 22); + start.set(44, 23); + start.set(91, 26); + start.set(93, 27); + start.set(124, 28); + start.set(40, 36); + start.set(41, 29); + start.set(123, 30); + start.set(125, 31); start.set(Buffer.EOF, -1); literals.put("COMPILER", new Integer(6)); literals.put("IGNORECASE", new Integer(7)); @@ -340,11 +341,11 @@ public class Scanner { literals.put("PRODUCTIONS", new Integer(16)); literals.put("END", new Integer(19)); literals.put("ANY", new Integer(23)); - literals.put("out", new Integer(26)); - literals.put("WEAK", new Integer(34)); - literals.put("SYNC", new Integer(39)); - literals.put("IF", new Integer(40)); - literals.put("CONTEXT", new Integer(41)); + literals.put("out", new Integer(27)); + literals.put("WEAK", new Integer(35)); + literals.put("SYNC", new Integer(40)); + literals.put("IF", new Integer(41)); + literals.put("CONTEXT", new Integer(42)); } @@ -416,9 +417,8 @@ boolean Comment0() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -432,7 +432,7 @@ boolean Comment1() { NextCh(); if (ch == '/') { level--; - if (level == 0) { oldEols = line - line0; NextCh(); return true; } + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } NextCh(); } } else if (ch == '/') { @@ -443,9 +443,8 @@ boolean Comment1() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.setPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -460,10 +459,14 @@ void CheckLiteral() { } Token NextToken() { - while (ch == ' ' || - ch >= 9 && ch <= 10 || ch == 13 - ) NextCh(); - if (ch == '/' && Comment0() ||ch == '/' && Comment1()) return NextToken(); + for(;;) { + while (ch == ' ' || + ch >= 9 && ch <= 10 || ch == 13 + ) NextCh(); + if (ch == '/' && Comment0() ||ch == '/' && Comment1()) continue; + break; + } + int recKind = noSym; int recEnd = pos; t = new Token(); @@ -482,17 +485,17 @@ Token NextToken() { t.kind = recKind; break loop; } // NextCh already done case 1: - recEnd = pos; recKind = 1; + recEnd = pos; recKind = 1 /* ident */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 1; break;} - else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + else {t.kind = 1 /* ident */; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} case 2: - recEnd = pos; recKind = 2; + recEnd = pos; recKind = 2 /* number */; if (ch >= '0' && ch <= '9') {AddCh(); state = 2; break;} - else {t.kind = 2; break loop;} + else {t.kind = 2 /* number */; break loop;} case 3: - {t.kind = 3; break loop;} + {t.kind = 3 /* string */; break loop;} case 4: - {t.kind = 4; break loop;} + {t.kind = 4 /* badString */; break loop;} case 5: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); state = 6; break;} else if (ch == 92) {AddCh(); state = 7; break;} @@ -508,15 +511,15 @@ Token NextToken() { else if (ch == 39) {AddCh(); state = 9; break;} else {state = 0; break;} case 9: - {t.kind = 5; break loop;} + {t.kind = 5 /* char */; break loop;} case 10: - recEnd = pos; recKind = 45; + recEnd = pos; recKind = 46 /* ddtSym */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 10; break;} - else {t.kind = 45; break loop;} + else {t.kind = 46 /* ddtSym */; break loop;} case 11: - recEnd = pos; recKind = 46; + recEnd = pos; recKind = 47 /* optionSym */; if (ch >= '-' && ch <= '.' || ch >= '0' && ch <= ':' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 11; break;} - else {t.kind = 46; break loop;} + else {t.kind = 47 /* optionSym */; break loop;} case 12: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); state = 12; break;} else if (ch == 10 || ch == 13) {AddCh(); state = 4; break;} @@ -524,67 +527,69 @@ Token NextToken() { else if (ch == 92) {AddCh(); state = 14; break;} else {state = 0; break;} case 13: - recEnd = pos; recKind = 45; + recEnd = pos; recKind = 46 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); state = 10; break;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 15; break;} - else {t.kind = 45; break loop;} + else {t.kind = 46 /* ddtSym */; break loop;} case 14: if (ch >= ' ' && ch <= '~') {AddCh(); state = 12; break;} else {state = 0; break;} case 15: - recEnd = pos; recKind = 45; + recEnd = pos; recKind = 46 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); state = 10; break;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); state = 15; break;} else if (ch == '=') {AddCh(); state = 11; break;} - else {t.kind = 45; break loop;} + else {t.kind = 46 /* ddtSym */; break loop;} case 16: - {t.kind = 17; break loop;} + {t.kind = 17 /* "=" */; break loop;} case 17: - {t.kind = 20; break loop;} + {t.kind = 20 /* "+" */; break loop;} case 18: - {t.kind = 21; break loop;} + {t.kind = 21 /* "-" */; break loop;} case 19: - {t.kind = 22; break loop;} + {t.kind = 22 /* ".." */; break loop;} case 20: - {t.kind = 25; break loop;} + {t.kind = 24 /* ":" */; break loop;} case 21: - {t.kind = 27; break loop;} + {t.kind = 26 /* "^" */; break loop;} case 22: - {t.kind = 28; break loop;} + {t.kind = 28 /* ">" */; break loop;} case 23: - {t.kind = 29; break loop;} + {t.kind = 29 /* "," */; break loop;} case 24: - {t.kind = 30; break loop;} + {t.kind = 30 /* "<." */; break loop;} case 25: - {t.kind = 31; break loop;} + {t.kind = 31 /* ".>" */; break loop;} case 26: - {t.kind = 32; break loop;} + {t.kind = 32 /* "[" */; break loop;} case 27: - {t.kind = 33; break loop;} + {t.kind = 33 /* "]" */; break loop;} case 28: - {t.kind = 36; break loop;} + {t.kind = 34 /* "|" */; break loop;} case 29: - {t.kind = 37; break loop;} + {t.kind = 37 /* ")" */; break loop;} case 30: - {t.kind = 38; break loop;} + {t.kind = 38 /* "{" */; break loop;} case 31: - {t.kind = 42; break loop;} + {t.kind = 39 /* "}" */; break loop;} case 32: - {t.kind = 43; break loop;} + {t.kind = 43 /* "(." */; break loop;} case 33: - recEnd = pos; recKind = 18; - if (ch == '.') {AddCh(); state = 19; break;} - else if (ch == '>') {AddCh(); state = 24; break;} - else if (ch == ')') {AddCh(); state = 32; break;} - else {t.kind = 18; break loop;} + {t.kind = 44 /* ".)" */; break loop;} case 34: - recEnd = pos; recKind = 24; - if (ch == '.') {AddCh(); state = 23; break;} - else {t.kind = 24; break loop;} + recEnd = pos; recKind = 18 /* "." */; + if (ch == '.') {AddCh(); state = 19; break;} + else if (ch == '>') {AddCh(); state = 25; break;} + else if (ch == ')') {AddCh(); state = 33; break;} + else {t.kind = 18 /* "." */; break loop;} case 35: - recEnd = pos; recKind = 35; - if (ch == '.') {AddCh(); state = 31; break;} - else {t.kind = 35; break loop;} + recEnd = pos; recKind = 25 /* "<" */; + if (ch == '.') {AddCh(); state = 24; break;} + else {t.kind = 25 /* "<" */; break loop;} + case 36: + recEnd = pos; recKind = 36 /* "(" */; + if (ch == '.') {AddCh(); state = 32; break;} + else {t.kind = 36 /* "(" */; break loop;} } } diff --git a/src/Tab.java b/src/Tab.java index b423437..684b4d4 100644 --- a/src/Tab.java +++ b/src/Tab.java @@ -42,9 +42,10 @@ class Position { // position of source code stretch (e.g. semantic action, resol public final int beg; // start relative to the beginning of the file public final int end; // end of stretch public final int col; // column number of start position + public final int line; // line number of start position - public Position(int beg, int end, int col) { - this.beg = beg; this.end = end; this.col = col; + public Position(int beg, int end, int col, int line) { + this.beg = beg; this.end = end; this.col = col; this.line = line; } } @@ -76,14 +77,16 @@ class Symbol { public BitSet follow; // nt: terminal followers public BitSet nts; // nt: nonterminals whose followers have to be added to this sym public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node public Position attrPos; // nt: position of attributes in source text (or null) public Position semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) public String retType; // AH - nt: Type of output attribute (or null) public String retVar; // AH - nt: Name of output attribute (or null) + public Symbol inherits; // optional, token from which this token derives - public Symbol(int typ, String name, int line) { - this.typ = typ; this.name = name; this.line = line; + public Symbol(int typ, String name, int line, int col) { + this.typ = typ; this.name = name; this.line = line; this.col = col; } } @@ -126,12 +129,13 @@ class Node { public Position pos; // nt, t, wt: pos of actual attributes // sem: pos of semantic action in source text public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node public State state; // DFA state corresponding to this node // (only used in DFA.ConvertToStates) public String retVar; // AH 20040206 - nt: name of output attribute (or null) - public Node(int typ, Symbol sym, int line) { - this.typ = typ; this.sym = sym; this.line = line; + public Node(int typ, Symbol sym, int line, int col) { + this.typ = typ; this.sym = sym; this.line = line; this.col = col; } } @@ -204,6 +208,9 @@ public class Tab { public Position semDeclPos; // position of global semantic declarations public CharSet ignored; // characters ignored by the scanner public boolean[] ddt = new boolean[10]; // debug and test switches + public boolean genAST = false; // generate parser tree generation code + public boolean genRREBNF = false; //generate EBNF for railroad diagram + public boolean ignoreErrors = false; // ignore grammar errors for developing purposes public Symbol gramSy; // root nonterminal; filled by ATG public Symbol eofSy; // end of file symbol public Symbol noSym; // used in case of an error @@ -229,8 +236,8 @@ public Tab(Parser parser) { this.parser = parser; trace = parser.trace; errors = parser.errors; - eofSy = NewSym(Node.t, "EOF", 0); - dummyNode = NewNode(Node.eps, null, 0); + eofSy = NewSym(Node.t, "EOF", 0, 0); + dummyNode = NewNode(Node.eps, null, 0, 0); literals = new Hashtable(); } @@ -244,11 +251,11 @@ public Tab(Parser parser) { String[] tKind = {"fixedToken", "classToken", "litToken", "classLitToken"}; - public Symbol NewSym(int typ, String name, int line) { + public Symbol NewSym(int typ, String name, int line, int col) { if (name.length() == 2 && name.charAt(0) == '"') { parser.SemErr("empty token not allowed"); name = "???"; } - Symbol sym = new Symbol(typ, name, line); + Symbol sym = new Symbol(typ, name, line, col); switch (typ) { case Node.t: sym.n = terminals.size(); terminals.add(sym); break; case Node.pr: pragmas.add(sym); break; @@ -278,7 +285,7 @@ int Num(Node p) { void PrintSym(Symbol sym) { trace.Write(Integer.toString(sym.n), 3); - trace.Write(" "); + trace.Write(" "); trace.Write(Name(sym.name), -14); trace.Write(" "); trace.Write(nTyp[sym.typ], 2); @@ -295,7 +302,7 @@ void PrintSym(Symbol sym) { public void PrintSymbolTable() { trace.WriteLine("Symbol Table:"); trace.WriteLine("------------"); trace.WriteLine(); - trace.WriteLine(" nr name typ hasAt graph del line tokenKind"); + trace.WriteLine(" nr name typ hasAt graph del line tokenKind"); //foreach (Symbol sym in Symbol.terminals) for (int i = 0; i < terminals.size(); i++) { PrintSym((Symbol)terminals.get(i)); @@ -350,21 +357,21 @@ public void PrintSet(BitSet s, int indent) { "sync", "sem ", "alt ", "iter", "opt ", "rslv"}; Node dummyNode; - public Node NewNode(int typ, Symbol sym, int line) { - Node node = new Node(typ, sym, line); + public Node NewNode(int typ, Symbol sym, int line, int col) { + Node node = new Node(typ, sym, line, col); node.n = nodes.size(); nodes.add(node); return node; } public Node NewNode(int typ, Node sub) { - Node node = NewNode(typ, null, 0); + Node node = NewNode(typ, null, 0, 0); node.sub = sub; return node; } - public Node NewNode(int typ, int val, int line) { - Node node = NewNode(typ, null, line); + public Node NewNode(int typ, int val, int line, int col) { + Node node = NewNode(typ, null, line, col); node.val = val; return node; } @@ -401,7 +408,9 @@ public void MakeSequence(Graph g1, Graph g2) { } public void MakeIteration(Graph g) { + int line = g.l.line; g.l = NewNode(Node.iter, g.l); + g.l.line = line; g.r.up = true; Node p = g.r; g.r = g.l; @@ -412,7 +421,9 @@ public void MakeIteration(Graph g) { } public void MakeOption(Graph g) { + int line = g.l.line; g.l = NewNode(Node.opt, g.l); + g.l.line = line; g.r.up = true; g.l.next = g.r; g.r = g.l; @@ -428,7 +439,7 @@ public void Finish(Graph g) { public void DeleteNodes() { nodes = new ArrayList(); - dummyNode = NewNode(Node.eps, null, 0); + dummyNode = NewNode(Node.eps, null, 0, 0); } public Graph StrToGraph(String str) { @@ -437,7 +448,7 @@ public Graph StrToGraph(String str) { Graph g = new Graph(); g.r = dummyNode; for (int i = 0; i < s.length(); i++) { - Node p = NewNode(Node.chr, (int)s.charAt(i), 0); + Node p = NewNode(Node.chr, (int)s.charAt(i), 0, 0); g.r.next = p; g.r = p; } g.l = dummyNode.next; dummyNode.next = null; @@ -479,7 +490,7 @@ public boolean DelNode(Node p) { String Ptr(Node p, boolean up) { String ptr = (p == null) ? "0" : Integer.toString(p.n); - return (up) ? ("-" + ptr) : ptr; + return (up && (ptr != "0")) ? ("-" + ptr) : ptr; } String Pos(Position pos) { @@ -540,7 +551,7 @@ public void PrintNodes() { trace.Write(" "); trace.Write(Pos(p.pos), 5); break; - case Node.eps: case Node.any: case Node.sync: + case Node.eps: case Node.any: case Node.sync: case Node.rslv: trace.Write(" "); break; } trace.WriteLine(Integer.toString(p.line), 5); @@ -603,7 +614,8 @@ public void WriteCharClasses () { //foreach (CharClass c in classes) { for (int i = 0; i < classes.size(); i++) { CharClass c = (CharClass)classes.get(i); - trace.Write(c.name + ": ", -10); + trace.Write(c.name, -10); + trace.Write(": "); WriteCharSet(c.set); trace.WriteLine(); } @@ -913,8 +925,8 @@ public void CompSymbolSets() { Node p = (Node)nodes.get(i); if (p.typ == Node.any || p.typ == Node.sync) { trace.Write("Line: "); - trace.WriteLine(Integer.toString(p.line), 4); - trace.Write("Node: "); + trace.Write(Integer.toString(p.line), 4); + trace.Write(" Node: "); trace.Write(Integer.toString(p.n), 4); trace.Write(" "); trace.Write(nTyp[p.typ], 4); @@ -1010,12 +1022,23 @@ public String Escape (String s) { public boolean GrammarOk() { boolean ok = NtsComplete() + && AllNtReached() && NoCircularProductions() && AllNtToTerm(); - if (ok) { AllNtReached(); CheckResolvers(); CheckLL1(); } + if (ok) { CheckResolvers(); CheckLL1(); } return ok; } + public boolean GrammarCheckAll() { + int errors = 0; + if(!NtsComplete()) ++errors; + if(!AllNtReached()) ++errors; + if(!NoCircularProductions()) System.exit(1); + if(!AllNtToTerm()) ++errors; + CheckResolvers(); CheckLL1(); + return errors == 0; + } + //--------------- check for circular productions ---------------------- class CNode { // node of list for finding circular productions @@ -1029,7 +1052,7 @@ public CNode (Symbol l, Symbol r) { void GetSingles(Node p, ArrayList singles) { if (p == null) return; // end of graph if (p.typ == Node.nt) { - if (p.up || DelGraph(p.next)) singles.add(p.sym); + singles.add(p.sym); } else if (p.typ == Node.alt || p.typ == Node.iter || p.typ == Node.opt) { if (p.up || DelGraph(p.next)) { GetSingles(p.sub, singles); @@ -1070,7 +1093,7 @@ public boolean NoCircularProductions() { for (int i = 0; i < list.size(); i++) { CNode n = (CNode)list.get(i); ok = false; - errors.SemErr(" " + n.left.name + " --> " + n.right.name); + errors.SemErr(" " + n.left.name + ":" + n.left.line + " --> " + n.right.name + ":" + n.right.line); } return ok; } @@ -1078,7 +1101,7 @@ public boolean NoCircularProductions() { //--------------- check for LL(1) errors ---------------------- void LL1Error(int cond, Symbol sym) { - String s = " LL1 warning in " + curSy.name + ": "; + String s = " LL1 warning in " + curSy.name + ":" + curSy.line + ":" + curSy.col + ": "; if (sym != null) s += sym.name + " is "; switch (cond) { case 1: s += "start of several alternatives"; break; @@ -1089,22 +1112,93 @@ void LL1Error(int cond, Symbol sym) { errors.Warning(s); } - void CheckOverlap(BitSet s1, BitSet s2, int cond) { + int CheckOverlap(BitSet s1, BitSet s2, int cond) { + int overlaped = 0; for (int i = 0; i < terminals.size(); i++) { Symbol sym = (Symbol) terminals.get(i); - if (s1.get(sym.n) && s2.get(sym.n)) LL1Error(cond, sym); + if (s1.get(sym.n) && s2.get(sym.n)) { + LL1Error(cond, sym); + ++overlaped; + } + } + return overlaped; + } + + /* print the path for first set that contains token tok for the graph rooted at p */ + void PrintFirstPath(Node p, int tok, String indent, int depth) + { + while (p != null) + { + switch (p.typ) + { + case Node.nt: + { + if (p.sym.firstReady) + { + if (p.sym.first.get(tok)) + { + if (indent.length() == 1) System.out.println(indent + "=> " + p.sym.name + ":" + p.line + ":" + p.col + ":"); + System.out.println(indent + "-> " + p.sym.name + ":" + p.sym.line + ":" + p.sym.col + ":"); + if (p.sym.graph != null) PrintFirstPath(p.sym.graph, tok, indent + " ", depth + 1); + return; + } + } + break; + } + case Node.t: + case Node.wt: + { + if (p.sym.n == tok) System.out.println(indent + "= " + p.sym.name + ":" + p.line + ":" + p.col + ":"); + break; + } + case Node.any: + { + break; + } + case Node.alt: + { + PrintFirstPath(p.sub, tok, indent, depth + 1); + PrintFirstPath(p.down, tok, indent, depth + 1); + break; + } + case Node.iter: + case Node.opt: + { + if (!DelNode(p.sub)) //prevent endless loop with some ill grammars + PrintFirstPath(p.sub, tok, indent, depth + 1); + break; + } + } + if (!DelNode(p)) break; + p = p.next; } } + void PrintFirstPath(Node p, int tok) + { + PrintFirstPath(p, tok, "\t", 0); + } - void CheckAlts(Node p) { + int CheckAlts(Node p) { BitSet s1, s2; + int rc = 0; while (p != null) { if (p.typ == Node.alt) { Node q = p; s1 = new BitSet(terminals.size()); while (q != null) { // for all alternatives s2 = Expected0(q.sub, curSy); - CheckOverlap(s1, s2, 1); + int overlaped = CheckOverlap(s1, s2, 1); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i = 0; i < terminals.size(); i++) { + Symbol sym = (Symbol) terminals.get(i); + if (s1.get(sym.n) && s2.get(sym.n)) { overlapToken = sym.n; break; } + } + PrintFirstPath(p, overlapToken); + rc += overlaped; + } s1.or(s2); CheckAlts(q.sub); q = q.down; @@ -1114,7 +1208,19 @@ void CheckAlts(Node p) { else { s1 = Expected0(p.sub, curSy); s2 = Expected(p.next, curSy); - CheckOverlap(s1, s2, 2); + int overlaped = CheckOverlap(s1, s2, 2); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + for (int i = 0; i < terminals.size(); i++) { + Symbol sym = (Symbol) terminals.get(i); + if (s1.get(sym.n) && s2.get(sym.n)) { overlapToken = sym.n; break; } + } + //Console.WriteLine(format("\t=>:{0}: {1}", p.line, overlaped)); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } } CheckAlts(p.sub); } else if (p.typ == Node.any) { @@ -1124,6 +1230,7 @@ void CheckAlts(Node p) { if (p.up) break; p = p.next; } + return rc; } public void CheckLL1() {