1 | ;;; |
---|
2 | ;;; peg.scm - Parser Expression Grammar Parser |
---|
3 | ;;; |
---|
4 | ;;; Copyright (c) 2006 Rui Ueyama (rui314@gmail.com) |
---|
5 | ;;; |
---|
6 | ;;; Redistribution and use in source and binary forms, with or without |
---|
7 | ;;; modification, are permitted provided that the following conditions |
---|
8 | ;;; are met: |
---|
9 | ;;; |
---|
10 | ;;; 1. Redistributions of source code must retain the above copyright |
---|
11 | ;;; notice, this list of conditions and the following disclaimer. |
---|
12 | ;;; |
---|
13 | ;;; 2. Redistributions in binary form must reproduce the above copyright |
---|
14 | ;;; notice, this list of conditions and the following disclaimer in the |
---|
15 | ;;; documentation and/or other materials provided with the distribution. |
---|
16 | ;;; |
---|
17 | ;;; 3. Neither the name of the authors nor the names of its contributors |
---|
18 | ;;; may be used to endorse or promote products derived from this |
---|
19 | ;;; software without specific prior written permission. |
---|
20 | ;;; |
---|
21 | ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
22 | ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
23 | ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
24 | ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
25 | ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
26 | ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
---|
27 | ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
---|
28 | ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
---|
29 | ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
---|
30 | ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
---|
31 | ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
32 | ;;; |
---|
33 | |
---|
34 | (define-module peg |
---|
35 | (use srfi-1) |
---|
36 | (use srfi-13) |
---|
37 | (use srfi-14) |
---|
38 | (use util.match) |
---|
39 | (export parse-success? parse-failure? |
---|
40 | stream-position |
---|
41 | <parse-error> |
---|
42 | |
---|
43 | result-value |
---|
44 | result-next |
---|
45 | failure-type |
---|
46 | failure-message |
---|
47 | failure-position |
---|
48 | parse-string |
---|
49 | $return $fail $expect |
---|
50 | $do $cut $seq $or $many $skip-many |
---|
51 | $repeat $optional |
---|
52 | |
---|
53 | $alternate |
---|
54 | |
---|
55 | $sep-by $end-by $sep-end-by |
---|
56 | $count $between |
---|
57 | $not $many-till $chain-left $chain-right |
---|
58 | $lazy |
---|
59 | $string $string-ci |
---|
60 | $char $one-of $none-of |
---|
61 | $satisfy |
---|
62 | |
---|
63 | anychar upper lower letter alphanum digit |
---|
64 | hexdigit newline tab space spaces eof |
---|
65 | |
---|
66 | $->rope semantic-value-finalize! |
---|
67 | ) |
---|
68 | ) |
---|
69 | (select-module peg) |
---|
70 | |
---|
71 | (debug-print-width 1024) |
---|
72 | |
---|
73 | ;;;============================================================ |
---|
74 | ;;; How is EBNF represented in the PEG library? |
---|
75 | ;;; |
---|
76 | ;;; A ::= B C |
---|
77 | ;;; => (define a ($seq b c)) |
---|
78 | ;;; If you need values of B and C, $do can be used: |
---|
79 | ;;; => (define a ($do ((x b) (y c)) (cons x y))) |
---|
80 | ;;; |
---|
81 | ;;; A :: B | C |
---|
82 | ;;; => (define a ($or b c)) |
---|
83 | ;;; |
---|
84 | ;;; A :: B* |
---|
85 | ;;; => (define a ($many b)) |
---|
86 | ;;; |
---|
87 | ;;; A :: B+ |
---|
88 | ;;; => (define a ($many b 1)) |
---|
89 | ;;; |
---|
90 | ;;; A ::= B B | B B B |
---|
91 | ;;; => (define a ($many b 2 3)) |
---|
92 | ;;; |
---|
93 | ;;; A ::= B? |
---|
94 | ;;; => (define a ($optional b)) |
---|
95 | ;;; |
---|
96 | |
---|
97 | ;;;============================================================ |
---|
98 | ;;; Parse result types |
---|
99 | ;;; |
---|
100 | |
---|
101 | ;; result ::= ('success <semantic-value> <stream>) |
---|
102 | ;; error ::= ('fail <failure-type> <message-string> <position>) |
---|
103 | |
---|
104 | (define-condition-type <parse-error> <error> #f |
---|
105 | (position)) |
---|
106 | |
---|
107 | (define-method write-object ((o <parse-error>) out) |
---|
108 | (format out "#<<parse-error> ~a ~S>" |
---|
109 | (ref o 'position) |
---|
110 | (ref o 'message))) |
---|
111 | |
---|
112 | (define (parse-success? obj) |
---|
113 | (and (vector? obj) |
---|
114 | (eq? 'success (vector-ref obj 0)))) |
---|
115 | |
---|
116 | (define (parse-failure? obj) |
---|
117 | (and (vector? obj) |
---|
118 | (eq? 'fail (vector-ref obj 0)))) |
---|
119 | |
---|
120 | (define (make-result value stream) |
---|
121 | (vector 'success value stream)) |
---|
122 | |
---|
123 | (define result-value (cut vector-ref <> 1)) |
---|
124 | (define result-next (cut vector-ref <> 2)) |
---|
125 | (define failure-type (cut vector-ref <> 1)) |
---|
126 | (define failure-message (cut vector-ref <> 2)) |
---|
127 | (define failure-position (cut vector-ref <> 3)) |
---|
128 | |
---|
129 | (define (make-message-failure m p) |
---|
130 | (vector 'fail 'message (list m) p)) |
---|
131 | (define (make-expect-failure m p) |
---|
132 | (vector 'fail 'expect (list m) p)) |
---|
133 | (define (make-unexpect-failure m p) |
---|
134 | (vector 'fail 'unexpect (list m) p)) |
---|
135 | |
---|
136 | ;; entry point |
---|
137 | (define (parse-string parse str) |
---|
138 | (define (error->string err) |
---|
139 | (case (failure-type err) |
---|
140 | ((message) (failure-message err)) |
---|
141 | ((expect) (failure-message err)) |
---|
142 | ((unexpect) (format #f "unexpected: ~a" (failure-message err))))) |
---|
143 | (let1 r (parse (make-string-stream str)) |
---|
144 | (if (parse-success? r) |
---|
145 | (semantic-value-finalize! (result-value r)) |
---|
146 | (raise (make-condition <parse-error> |
---|
147 | 'position (failure-position r) |
---|
148 | 'message (error->string r)))))) |
---|
149 | |
---|
150 | ;;;============================================================ |
---|
151 | ;;; Lazily-constructed string |
---|
152 | ;;; |
---|
153 | (define-class <rope> () |
---|
154 | ((tree :init-keyword :tree))) |
---|
155 | |
---|
156 | (define (rope->string obj) |
---|
157 | (define (traverse obj) |
---|
158 | (cond ((is-a? obj <rope>) |
---|
159 | (traverse (slot-ref obj 'tree))) |
---|
160 | ((list? obj) (map traverse obj)) |
---|
161 | ((string? obj) (display obj)) |
---|
162 | ((char? obj) (display obj)) |
---|
163 | (else (error "don't know how to write:" obj)))) |
---|
164 | (with-output-to-string |
---|
165 | (lambda () (traverse obj)))) |
---|
166 | |
---|
167 | (define (make-rope obj) |
---|
168 | (make <rope> :tree obj)) |
---|
169 | |
---|
170 | ;;;============================================================ |
---|
171 | ;;; Input Stream |
---|
172 | ;;; |
---|
173 | |
---|
174 | ;;(define (make-string-stream str) |
---|
175 | ;; (let loop ((str str) (pos 0)) |
---|
176 | ;; (lambda () |
---|
177 | ;; (if (zero? (string-length str)) |
---|
178 | ;; (let loop () (values #f pos loop)) |
---|
179 | ;; (values (string-ref str 0) |
---|
180 | ;; pos |
---|
181 | ;; (loop (string-drop str 1) (+ pos 1))))))) |
---|
182 | |
---|
183 | (define (make-string-stream str) |
---|
184 | (let loop ((ptr (make-string-pointer str))) |
---|
185 | (lambda () |
---|
186 | (let ((c (string-pointer-ref ptr)) |
---|
187 | (pos (string-pointer-index ptr))) |
---|
188 | (if (eof-object? c) |
---|
189 | (let loop () (values #f pos loop)) |
---|
190 | (let1 new-ptr (string-pointer-copy ptr) |
---|
191 | (string-pointer-next! new-ptr) |
---|
192 | (values c pos (loop new-ptr)))))))) |
---|
193 | |
---|
194 | (define (stream-position s) |
---|
195 | (values-ref (s) 1)) |
---|
196 | |
---|
197 | ;;;============================================================ |
---|
198 | ;;; Primitives |
---|
199 | ;;; |
---|
200 | (define ($return val) |
---|
201 | (lambda (s) (make-result val s))) |
---|
202 | |
---|
203 | (define ($fail msg) |
---|
204 | (lambda (s) |
---|
205 | (make-message-failure msg (stream-position s)))) |
---|
206 | |
---|
207 | (define ($expect parse msg) |
---|
208 | (lambda (s) |
---|
209 | (let1 r (parse s) |
---|
210 | (if (parse-success? r) |
---|
211 | r |
---|
212 | (make-expect-failure msg (stream-position s)))))) |
---|
213 | |
---|
214 | (define ($unexpect msg pos) |
---|
215 | (lambda (s) |
---|
216 | (make-unexpect-failure msg pos))) |
---|
217 | |
---|
218 | ;;;============================================================ |
---|
219 | ;;; Error handler |
---|
220 | ;;; |
---|
221 | (define (merge-failure err) |
---|
222 | (let loop ((r '()) (err err) (pos 0)) |
---|
223 | (if (null? err) |
---|
224 | (vector 'fail |
---|
225 | (vector-ref (car r) 1) |
---|
226 | (append-map (cut vector-ref <> 2) (reverse! r)) |
---|
227 | pos) |
---|
228 | (let1 npos (failure-position (car err)) |
---|
229 | (cond ((= pos npos) |
---|
230 | (loop (cons (car err) r) (cdr err) pos)) |
---|
231 | ((< pos npos) |
---|
232 | (loop (list (car err)) (cdr err) npos)) |
---|
233 | (else (loop r (cdr err) pos))))))) |
---|
234 | |
---|
235 | |
---|
236 | ;;;============================================================ |
---|
237 | ;;; Backtrack control |
---|
238 | ;;; |
---|
239 | (define-syntax $cut |
---|
240 | (syntax-rules () |
---|
241 | ((_ mark) (set! mark #t)))) |
---|
242 | |
---|
243 | ;;;================================================================== |
---|
244 | ;;; Combinators |
---|
245 | ;;; |
---|
246 | (define-syntax $do |
---|
247 | (syntax-rules () |
---|
248 | (($do :: var clause ...) |
---|
249 | (begin ($cut var) ($do clause ...))) |
---|
250 | (($do ((parse))) parse) |
---|
251 | (($do parse) parse) |
---|
252 | (($do (var parse) clause ...) |
---|
253 | (lambda (s) |
---|
254 | (let1 tmp (parse s) |
---|
255 | (if (parse-success? tmp) |
---|
256 | (let1 var (result-value tmp) |
---|
257 | (($do clause ...) (result-next tmp))) |
---|
258 | tmp)))) |
---|
259 | (($do ((parse)) clause ...) |
---|
260 | (lambda (s) |
---|
261 | (let1 tmp (parse s) |
---|
262 | (if (parse-success? tmp) |
---|
263 | (($do clause ...) (result-next tmp)) |
---|
264 | tmp)))) |
---|
265 | (($do c0 c1 c2 ...) |
---|
266 | ($do (c0) c1 c2 ...)) |
---|
267 | (($do . rest) |
---|
268 | (syntax-error "malformed $do binding form:" rest)))) |
---|
269 | |
---|
270 | (define-syntax $or |
---|
271 | (syntax-rules (quote) |
---|
272 | (($or 'mark) ($return #t)) |
---|
273 | (($or 'mark p0 p1 ...) |
---|
274 | (lambda (s) |
---|
275 | (let1 mark #f |
---|
276 | (let loop ((errors '()) |
---|
277 | (parsers (list p0 p1 ...))) |
---|
278 | (let1 r ((car parsers) s) |
---|
279 | (cond ((parse-success? r) r) |
---|
280 | ((or mark (null? (cdr parsers))) |
---|
281 | (if (null? errors) |
---|
282 | r |
---|
283 | (merge-failure (reverse! (cons r errors))))) |
---|
284 | (else |
---|
285 | (loop (cons r errors) (cdr parsers))))))))) |
---|
286 | (($or) ($return #t)) |
---|
287 | (($or p0) p0) |
---|
288 | (($or p0 p1 ...) |
---|
289 | (lambda (s) |
---|
290 | (let loop ((errors '()) |
---|
291 | (parsers (list p0 p1 ...))) |
---|
292 | (let1 r ((car parsers) s) |
---|
293 | (cond ((parse-success? r) r) |
---|
294 | ((null? (cdr parsers)) |
---|
295 | (if (null? errors) |
---|
296 | r |
---|
297 | (merge-failure (reverse! (cons r errors))))) |
---|
298 | (else |
---|
299 | (loop (cons r errors) (cdr parsers)))))))))) |
---|
300 | |
---|
301 | (define ($seq . parsers) |
---|
302 | (match parsers |
---|
303 | (() ($return #t)) |
---|
304 | ((parse) parse) |
---|
305 | ((parse . rest) |
---|
306 | ($do ((parse)) (apply $seq rest))) |
---|
307 | (_ (error "can't be here")))) |
---|
308 | |
---|
309 | (define (%check-min-max min max) |
---|
310 | (when (or (negative? min) |
---|
311 | (and max (> min max))) |
---|
312 | (error "invalid argument:" min max))) |
---|
313 | |
---|
314 | (define ($many parse . args) |
---|
315 | (let-optionals* args ((min 0) (max #f)) |
---|
316 | (%check-min-max min max) |
---|
317 | (lambda (s) |
---|
318 | (define (max? count) |
---|
319 | (and max (>= count max))) |
---|
320 | (let loop ((r '()) (s s) (count 0)) |
---|
321 | (if (max? count) |
---|
322 | (make-result (reverse! r) s) |
---|
323 | (let1 v (parse s) |
---|
324 | (cond ((parse-success? v) |
---|
325 | (loop (cons (result-value v) r) |
---|
326 | (result-next v) |
---|
327 | (+ count 1))) |
---|
328 | ((<= min count) |
---|
329 | (make-result (reverse! r) s)) |
---|
330 | (else v)))))))) |
---|
331 | |
---|
332 | (define ($skip-many . args) |
---|
333 | (apply $many args)) |
---|
334 | |
---|
335 | (define ($repeat parse n) |
---|
336 | ($many parse n n)) |
---|
337 | |
---|
338 | (define ($optional parse) |
---|
339 | ($or parse ($return #f))) |
---|
340 | |
---|
341 | (define ($sep-by parse sep . args) |
---|
342 | (let-optionals* args ((min 0) (max #f)) |
---|
343 | (%check-min-max min max) |
---|
344 | (if (and max (zero? max)) |
---|
345 | ($return #t) |
---|
346 | (lambda (s) |
---|
347 | (let1 r (parse s) |
---|
348 | (cond ((parse-success? r) |
---|
349 | (let1 r2 (($many ($do sep parse) |
---|
350 | (clamp (- min 1) 0) |
---|
351 | (and max (- max 1))) |
---|
352 | (result-next r)) |
---|
353 | (if (parse-success? r2) |
---|
354 | (make-result (cons (result-value r) (result-value r2)) |
---|
355 | (result-next r2)) |
---|
356 | r2))) |
---|
357 | ((zero? min) (make-result #t s)) |
---|
358 | (else r))))))) |
---|
359 | |
---|
360 | (define ($alternate parse sep) |
---|
361 | ($do (h parse) |
---|
362 | (t ($many ($do (v1 sep) (v2 parse) ($return (list v1 v2))))) |
---|
363 | ($return (cons h (apply append! t))))) |
---|
364 | |
---|
365 | (define ($end-by parse sep . args) |
---|
366 | (apply $many ($do (v parse) sep ($return v)) args)) |
---|
367 | |
---|
368 | (define ($sep-end-by parse sep . args) |
---|
369 | ($do (v (apply $sep-by parse sep args)) |
---|
370 | (($optional sep)) |
---|
371 | ($return v))) |
---|
372 | |
---|
373 | (define ($count parse n) |
---|
374 | ($many parse n n)) |
---|
375 | |
---|
376 | (define ($between open parse close) |
---|
377 | ($do open (v parse) close ($return v))) |
---|
378 | |
---|
379 | (define ($not parse) |
---|
380 | (lambda (s) |
---|
381 | (($or 'grp |
---|
382 | ($do (v parse) :: grp ($unexpect v (stream-position s))) |
---|
383 | ($return #f)) |
---|
384 | s))) |
---|
385 | |
---|
386 | (define ($many-till parse end . args) |
---|
387 | (apply $many ($do (($not end)) parse) args)) |
---|
388 | |
---|
389 | (define ($chain-left parse op) |
---|
390 | (lambda (st) |
---|
391 | (let1 r (parse st) |
---|
392 | (if (parse-success? r) |
---|
393 | (let loop ((r r)) |
---|
394 | (let1 r2 (($do (proc op) (v parse) |
---|
395 | ($return (proc (result-value r) v))) |
---|
396 | (result-next r)) |
---|
397 | (if (parse-success? r2) |
---|
398 | (loop r2) |
---|
399 | r))) |
---|
400 | r)))) |
---|
401 | |
---|
402 | (define ($chain-right parse op) |
---|
403 | (rec (loop st) |
---|
404 | (($do (h parse) |
---|
405 | ($or ($do (proc op) |
---|
406 | (t loop) |
---|
407 | ($return (proc h t))) |
---|
408 | ($return h))) |
---|
409 | st))) |
---|
410 | |
---|
411 | (define-syntax $lazy |
---|
412 | (syntax-rules () |
---|
413 | ((_ parse) |
---|
414 | (lambda args (apply parse args))))) |
---|
415 | |
---|
416 | ;;;============================================================ |
---|
417 | ;;; Intermediate structure constructor |
---|
418 | ;;; |
---|
419 | (define ($->rope parse) |
---|
420 | ($do (v parse) ($return (make-rope v)))) |
---|
421 | |
---|
422 | (define (semantic-value-finalize! obj) |
---|
423 | (cond ((is-a? obj <rope>) (rope->string obj)) |
---|
424 | ((pair? obj) |
---|
425 | (cons (semantic-value-finalize! (car obj)) |
---|
426 | (semantic-value-finalize! (cdr obj)))) |
---|
427 | (else obj))) |
---|
428 | |
---|
429 | ;;;============================================================ |
---|
430 | ;;; String parsers |
---|
431 | ;;; |
---|
432 | (define ($satisfy pred expect) |
---|
433 | (lambda (s) |
---|
434 | (receive (c pos next) (s) |
---|
435 | (if c |
---|
436 | (let1 r (pred c) |
---|
437 | (if r |
---|
438 | (make-result c next) |
---|
439 | (make-expect-failure expect pos))) |
---|
440 | (make-expect-failure expect pos))))) |
---|
441 | |
---|
442 | (define-values ($string $string-ci) |
---|
443 | (let-syntax |
---|
444 | ((expand |
---|
445 | (syntax-rules () |
---|
446 | ((_ char=) |
---|
447 | (lambda (str) |
---|
448 | (let1 lis (string->list str) |
---|
449 | (lambda (s) |
---|
450 | (let loop ((r '()) (s s) (lis lis)) |
---|
451 | (if (null? lis) |
---|
452 | (make-result (make-rope (reverse! r)) s) |
---|
453 | (receive (c pos next) (s) |
---|
454 | (if (and c (char= c (car lis))) |
---|
455 | (loop (cons c r) next (cdr lis)) |
---|
456 | (make-expect-failure str (stream-position s))))))))))))) |
---|
457 | (values (expand char=?) |
---|
458 | (expand char-ci=?)))) |
---|
459 | |
---|
460 | (define ($char c) |
---|
461 | ($satisfy (cut char=? c <>) c)) |
---|
462 | |
---|
463 | (define ($char-ci c) |
---|
464 | ($satisfy (cut char-ci=? c <>) |
---|
465 | (list->char-set c (char-upcase c) (char-downcase c)))) |
---|
466 | |
---|
467 | (define ($one-of charset) |
---|
468 | ($satisfy (cut char-set-contains? charset <>) |
---|
469 | charset)) |
---|
470 | |
---|
471 | (define ($none-of charset) |
---|
472 | ($one-of (char-set-complement charset))) |
---|
473 | |
---|
474 | (define (anychar st) |
---|
475 | (receive (c pos next) (st) |
---|
476 | (if c |
---|
477 | (make-result c next) |
---|
478 | (make-expect-failure "character" pos)))) |
---|
479 | |
---|
480 | (define-syntax define-char-parser |
---|
481 | (syntax-rules () |
---|
482 | ((_ proc charset expect) |
---|
483 | (define proc |
---|
484 | ($expect ($one-of charset) expect))))) |
---|
485 | |
---|
486 | (define-char-parser upper #[A-Z] "upper case letter") |
---|
487 | (define-char-parser lower #[a-z] "lower case letter") |
---|
488 | (define-char-parser letter #[A-Za-z] "letter") |
---|
489 | (define-char-parser alphanum #[A-Za-z0-9] "letter or digit") |
---|
490 | (define-char-parser digit #[0-9] "digit") |
---|
491 | (define-char-parser hexdigit #[0-9A-Fa-f] "hexadecimal digit") |
---|
492 | (define-char-parser newline #[\n] "newline") |
---|
493 | (define-char-parser tab #[\t] "tab") |
---|
494 | (define-char-parser space #[ \v\f\t\r\n] "space") |
---|
495 | |
---|
496 | (define spaces ($->rope ($many space))) |
---|
497 | |
---|
498 | (define eof |
---|
499 | (lambda (s) |
---|
500 | (receive (c pos next) (s) |
---|
501 | (if c |
---|
502 | (make-expect-failure "end of input" pos) |
---|
503 | (make-result #t next))))) |
---|
504 | |
---|
505 | ;;============================================================ |
---|
506 | ;; Token Parsers |
---|
507 | ;; |
---|
508 | |
---|
509 | (provide "peg") |
---|