1 module text.RecursiveDescentParser;
2 
3 import std.algorithm;
4 import std.range;
5 import std..string;
6 
7 @safe
8 struct RecursiveDescentParser
9 {
10     private string text;
11 
12     private size_t cursor;
13 
14     public this(string text) @nogc
15     {
16         this.text = text;
17     }
18 
19     invariant
20     {
21         import std.utf : stride;
22 
23         assert(this.cursor >= 0 && this.cursor <= this.text.length);
24 
25         // validate that this.cursor lies at the start of a utf-8 character
26         assert(this.cursor == this.text.length || this.text[this.cursor .. $].stride > 0);
27     }
28 
29     public bool matchGroup(scope bool delegate() @nogc @safe action) @nogc
30     {
31         auto backup = this.cursor;
32         auto result = action();
33 
34         if (!result) // parse failure, roll back state
35         {
36             this.cursor = backup;
37         }
38 
39         return result;
40     }
41 
42     public bool captureGroupInto(out string target, scope bool delegate() @nogc @safe action) @nogc
43     {
44         auto startCursor = this.cursor;
45         auto result = action();
46 
47         if (result)
48         {
49             auto endCursor = this.cursor;
50 
51             target = this.text[startCursor .. endCursor];
52         }
53 
54         return result;
55     }
56 
57     public bool matchZeroOrMore(scope bool delegate() @nogc @safe action) @nogc
58     {
59         action.generate.find(false);
60 
61         return true;
62     }
63 
64     public bool matchOptional(scope bool delegate() @nogc @safe action) @nogc
65     {
66         action();
67 
68         return true;
69     }
70 
71     public bool matchTimes(int num, scope bool delegate() @nogc @safe action) @nogc
72     {
73         return matchGroup(() => action.generate.takeExactly(num).all);
74     }
75 
76     public bool acceptAsciiChar(scope bool delegate(char) @nogc @safe predicate) @nogc
77     {
78         import std.ascii : isASCII;
79 
80         bool advance()
81         {
82             this.cursor = this.cursor + 1;
83             return true;
84         }
85 
86         return !eof
87             // it's safe to do this check because we only advance in ways that cause text[cursor] to be valid utf-8
88             // (see invariant)
89             && this.text[this.cursor].isASCII
90             && predicate(this.text[this.cursor])
91             && advance;
92     }
93 
94     public bool eof() @nogc
95     {
96         return this.remainingText.length == 0;
97     }
98 
99     public bool accept(string needle) @nogc
100     {
101         bool advance()
102         {
103             this.cursor = this.cursor + needle.length;
104             return true;
105         }
106 
107         return this.remainingText.startsWith(needle) && advance;
108     }
109 
110     public @property string remainingText() const @nogc
111     {
112         return this.text[this.cursor .. $];
113     }
114 }
115 
116 unittest
117 {
118     import dshould : be, equal, should;
119 
120     with (RecursiveDescentParser("aaaaaaaa"))
121     {
122         matchTimes(8, () => accept("a")).should.be(true);
123         matchTimes(1, () => accept("a")).should.be(false);
124         accept("a").should.be(false);
125         remainingText.should.equal("");
126     }
127 }
128 
129 unittest
130 {
131     import dshould : be, equal, should;
132 
133     with (RecursiveDescentParser("aaaaaaaa"))
134     {
135         matchZeroOrMore(() => accept("a")).should.be(true);
136         remainingText.should.equal("");
137         matchZeroOrMore(() => accept("a")).should.be(true);
138     }
139 }