@@ -26,41 +26,40 @@ export parse
2626# Shared includes
2727using DataStructures
2828
29- # Constants and globals
29+ # Basic constants and globals - should capture locations is inserted by the parser generator based on parameters given to the command line
3030const EOF = " \$ "
31- yytext = " "
32- yylength = 0
33- __res = nothing
34- __loc = nothing
35- should_capture_locations = {{{CAPTURE_LOCATIONS}}}
31+ const should_capture_locations = {{{CAPTURE_LOCATIONS}}}
3632
3733# Types
3834struct SyntaxError <: Exception
39- msg:: AbstractString
35+ msg:: String
4036end
4137
4238function Base. showerror (io:: IO , err:: SyntaxError )
4339 print (io, err. msg)
4440end
4541
4642Base. @kwdef mutable struct yyLoc
47- startoffset
48- endoffset
49- startline
50- endline
51- startcolumn
52- endcolumn
43+ startoffset:: Int
44+ endoffset:: Int
45+ startline:: Int
46+ endline:: Int
47+ startcolumn:: Int
48+ endcolumn:: Int
5349end
5450
5551Base. @kwdef mutable struct StackEntry
56- symbol
57- semanticvalue
58- loc
52+ symbol:: Int
53+ semanticvalue:: Any
54+ loc:: Union{yyLoc, Nothing}
5955end
6056
61- # --------------------------------------------------------------
62- # Module includes provided by the grammar.
63- {{{MODULE_INCLUDE}}}
57+ Base. @kwdef mutable struct ParserData
58+ yytext:: String
59+ yylength:: Int = 0
60+ __res = nothing
61+ __loc = nothing
62+ end
6463
6564# --------------------------------------------------------------
6665# Tokenizer.
@@ -102,40 +101,46 @@ end
102101
103102{{{PRODUCTION_HANDLERS}}}
104103
104+ # Constant mappings inserted by the parser generator by processing the grammar definition
105+ const productions = {{{PRODUCTIONS}}} # [[1, 2, "handler1"], [3, 4, "handler2], ...] i.e. Vector{Vector{Union{Integer, String}}}
106+ const table = {{{TABLE}}} # i.e. Dict{Int, String}
107+
105108# blank stand-ins for begin and end
106109function parsebegin () end
107110
108111function parseend (value) end
109112
113+ # --------------------------------------------------------------
114+ # Module includes provided by the grammar.
115+ {{{MODULE_INCLUDE}}}
116+
110117#=
111118 Primary parsing function
112119 ss - the code to parse, in a String
113120 onParseBegin - a function to call when parsing begins
114121 onParseEnd - a function to call when parsing ends, should accept as a single argument with the parsed value result
115122=#
116- # Q for Dmetry: ok to default tokenizer here? other implementations throw but I wanted to keep the parse interface to only the string required
117- function parse (ss:: AbstractString ; tokenizerInitFunction:: Function = inittokenizer, onparsebegin:: Function = parsebegin, onparseend:: Function = parseend)
118- # constants inserted by the parser generator
119- productions = {{{PRODUCTIONS}}} # [[1, 2, "handler1"], [3, 4, "handler2], ...] i.e. Vector{Vector{Union{Integer, String}}}
120- table = {{{TABLE}}} # i.e. Dict{Int, String}
123+ function parse (ss:: AbstractString ; tokenizerinitfunction:: Function = inittokenizer, onparsebegin:: Function = parsebegin, onparseend:: Function = parseend)
124+ # initialize our parser data
125+ parserdata = ParserData (yytext = " " , yylength = 0 , __res = nothing , __loc = nothing )
121126
122127 # initialization and prep for parsing
123128 ! isnothing (onparsebegin) && onparsebegin ()
124- tokenizerData = tokenizerInitFunction (ss)
125- stack = Stack {Union{StackEntry,Integer }} ()
129+ tokenizerdata = tokenizerinitfunction (ss)
130+ stack = Stack {Union{StackEntry,Int }} ()
126131 push! (stack, 0 )
127132
128133 # begin parsing
129- token = getnexttoken! (tokenizerData)
134+ token = getnexttoken! (parserdata, tokenizerdata) :: Token
130135 shiftedtoken = nothing
131- while hasmoretokens (tokenizerData ) || ! isempty (stack)
136+ while hasmoretokens (tokenizerdata ) || ! isempty (stack)
132137 # get a token and look it up in our parsing table
133138 isnothing (token) && unexpectedendofinput ()
134139 state = first (stack)
135140 column = token. type
136141 entry = get (table[state+ 1 ], column, nothing )
137142 if isnothing (entry)
138- unexpectedtoken (tokenizerData , token)
143+ unexpectedtoken (tokenizerdata , token)
139144 break
140145 end
141146
@@ -144,7 +149,7 @@ function parse(ss::AbstractString; tokenizerInitFunction::Function = inittokeniz
144149 push! (stack, StackEntry (symbol = token. type, semanticvalue = token. value, loc = yyloc (token)))
145150 push! (stack, tryparse (Int, SubString (entry, 2 )))
146151 shiftedtoken = token
147- token = getnexttoken! (tokenizerData)
152+ token = getnexttoken! (parserdata, tokenizerdata) :: Token
148153
149154 # found "reduce" instruction, which starts with r then has <production number> to reduce by - i.e. r2 means "reduce by production 2"
150155 elseif entry[1 ] == ' r'
@@ -161,12 +166,12 @@ function parse(ss::AbstractString; tokenizerInitFunction::Function = inittokeniz
161166 pop! (stack)
162167
163168 # pop the stack entry
164- stackEntry = pop! (stack)
169+ stackentry = pop! (stack)
165170
166171 # collection all the semantic values from the stack to the argument list, which will be passed to the action handler
167172 if hassemanticaction
168- pushfirst! (semanticvalueargs, stackEntry . semanticvalue)
169- should_capture_locations && pushfirst! (locationargs, stackEntry . loc)
173+ pushfirst! (semanticvalueargs, stackentry . semanticvalue)
174+ should_capture_locations && pushfirst! (locationargs, stackentry . loc)
170175 end
171176 rhslength -= 1
172177 end
@@ -175,19 +180,19 @@ function parse(ss::AbstractString; tokenizerInitFunction::Function = inittokeniz
175180 symboltoproducewith = production[1 ]
176181 reducestackentry = StackEntry (symbol = symboltoproducewith, semanticvalue = nothing , loc = nothing )
177182 if hassemanticaction
178- global yytext = isnothing (shiftedtoken) ? nothing : shiftedtoken. value
179- global yylength = isnothing (shiftedtoken) ? 0 : length (shiftedtoken. value)
183+ parserdata . yytext = isnothing (shiftedtoken) ? nothing : shiftedtoken. value
184+ parserdata . yylength = isnothing (shiftedtoken) ? 0 : length (shiftedtoken. value)
180185 semanticactionhandler = getfield (SyntaxParser, Symbol (production[3 ]))
181186 semanticactionargs = semanticvalueargs
182187 if should_capture_locations
183188 semanticactionargs = vcat (semanticactionargs, locationargs)
184189 end
185190
186191 # call the handler the result is put in __res, which is accessed/assigned to by for example $$ = <something> in the grammar
187- semanticactionhandler (semanticactionargs... )
188- reducestackentry. semanticvalue = __res
192+ semanticactionhandler (parserdata, semanticactionargs... )
193+ reducestackentry. semanticvalue = parserdata . __res
189194 if should_capture_locations
190- reducestackentry. loc = __loc
195+ reducestackentry. loc = parserdata . __loc
191196 end
192197 end
193198 push! (stack, reducestackentry)
@@ -200,8 +205,8 @@ function parse(ss::AbstractString; tokenizerInitFunction::Function = inittokeniz
200205 parsed = pop! (stack)
201206
202207 # Check for if the stack has other stuff on it, which would be bad
203- if length (stack) != 1 || first (stack) != 0 || hasmoretokens (tokenizerData )
204- unexpectedtoken (tokenizerData , token)
208+ if length (stack) != 1 || first (stack) != 0 || hasmoretokens (tokenizerdata )
209+ unexpectedtoken (tokenizerdata , token)
205210 end
206211
207212 # success!
@@ -215,19 +220,19 @@ function parse(ss::AbstractString; tokenizerInitFunction::Function = inittokeniz
215220 return nothing
216221end
217222
218- function unexpectedtoken (tokenizerData :: TokenizerData , token:: Token )
219- if token. type == tokenizerData . EOF_TOKEN. type
223+ function unexpectedtoken (tokenizerdata :: TokenizerData , token:: Token )
224+ if token. type == EOF_TOKEN. type
220225 unexpectedendofinput ()
221226 else
222- throwunexpectedtoken (tokenizerData , token. value, token. startline, token. startcolumn)
227+ throwunexpectedtoken (tokenizerdata , token. value, token. startline, token. startcolumn)
223228 end
224229end
225230
226231function unexpectedendofinput ()
227232 parseerror (" Unexpected end of input." )
228233end
229234
230- function parseerror (message:: AbstractString )
235+ function parseerror (message)
231236 throw (SyntaxError (message))
232237end
233238
0 commit comments