1 | #!/usr/bin/env gosh |
---|
2 | ;; -*- coding: utf-8 mode: scheme -*- |
---|
3 | |
---|
4 | ;;; |
---|
5 | ;;; Original PEG Grammars |
---|
6 | ;;; |
---|
7 | ;; Hierarchical syntax |
---|
8 | ; Grammar <- Spacing Definition+ EndOfFile |
---|
9 | ; Definition <- Identifier LEFTARROW Expression |
---|
10 | ; |
---|
11 | ; Expression <- Sequence (SLASH Sequence)* |
---|
12 | ; Sequence <- Prefix* |
---|
13 | ; Prefix <- (AND / NOT)? Suffix |
---|
14 | ; Suffix <- Primary (QUESTION / STAR / PLUS)? |
---|
15 | ; Primary <- Identifier !LEFTARROW |
---|
16 | ; / OPEN Expression CLOSE |
---|
17 | ; / Literal / Class / DOT |
---|
18 | ; |
---|
19 | ;; Lexical syntax |
---|
20 | ; Identifier <- IdentStart IdentCont* Spacing |
---|
21 | ; IdentStart <- [a-zA-Z_] |
---|
22 | ; IdentCont <- IdentStart / [0-9] |
---|
23 | ; |
---|
24 | ; Literal <- ['] (!['] Char)* ['] Spacing |
---|
25 | ; / ["] (!["] Char)* ["] Spacing |
---|
26 | ; Class <- '[' (!']' Range)* ']' Spacing |
---|
27 | ; Range <- Char '-' Char / Char |
---|
28 | ; Char <- '\\' [nrt'"\[\]\\] |
---|
29 | ; / '\\' [0-2][0-7][0-7] |
---|
30 | ; / '\\' [0-7][0-7]? |
---|
31 | ; / !'\\' . |
---|
32 | ; |
---|
33 | ; LEFTARROW <- '<-' Spacing |
---|
34 | ; SLASH <- '/' Spacing |
---|
35 | ; AND <- '&' Spacing |
---|
36 | ; NOT <- '!' Spacing |
---|
37 | ; QUESTION <- '?' Spacing |
---|
38 | ; STAR <- '*' Spacing |
---|
39 | ; PLUS <- '+' Spacing |
---|
40 | ; OPEN <- '(' Spacing |
---|
41 | ; CLOSE <- ')' Spacing |
---|
42 | ; DOT <- '.' Spacing |
---|
43 | ; |
---|
44 | ; Spacing <- (Space / Comment)* |
---|
45 | ; Comment <- '#' (!EndOfLine .)* EndOfLine |
---|
46 | ; Space <- ' ' / '\t' / EndOfLine |
---|
47 | ; EndOfLine <- '\r\n' / '\n' / '\r' |
---|
48 | ; EndOfFile <- !. |
---|
49 | |
---|
50 | ;;; |
---|
51 | ;;; S-exp PEG Grammar |
---|
52 | ;;; |
---|
53 | ; |
---|
54 | ;; Hierarchical syntax |
---|
55 | ; |
---|
56 | ; Definition <- Identifier LEFTARROW Expression |
---|
57 | ; |
---|
58 | ; Expression <- Sequence (SLASH Sequence)* |
---|
59 | ; Sequence <- Prefix* (RETURN .)? |
---|
60 | ; Prefix <- IdentifierClause / (AND / NOT)? Suffix |
---|
61 | ; Suffix <- Primary (QUESTION / STAR / PLUS)? |
---|
62 | ; IdentifierClause <- OPEN CaptureName Primary CLOSE |
---|
63 | ; Primary <- Identifier !LEFTARROW |
---|
64 | ; / OPEN Expression CLOSE |
---|
65 | ; / Literal / Class / ANY |
---|
66 | ; |
---|
67 | ;; Lexical syntax |
---|
68 | ; |
---|
69 | ; Identifier <- !NonIdentifier <scheme-symbol> |
---|
70 | ; NonIdentifier <- OPEN / CLOSE |
---|
71 | ; CaptureName <- <scheme-keyword> |
---|
72 | ; Literal <- <scheme-string> / <scheme-char> |
---|
73 | ; Class <- <scheme-char-set> |
---|
74 | ; |
---|
75 | ; OPEN <- |<| |
---|
76 | ; CLOSE <- |>| |
---|
77 | ; LEFTARROW <- |<-| |
---|
78 | ; SLASH <- |/| |
---|
79 | ; AND <- |&| |
---|
80 | ; NOT <- |!| |
---|
81 | ; QUESTION <- |?| |
---|
82 | ; STAR <- |*| |
---|
83 | ; PLUS <- |+| |
---|
84 | ; RETURN <- :return |
---|
85 | ; ANY <- |%any%| |
---|
86 | |
---|
87 | ; (define-parser math-exp |
---|
88 | ; (additive <- (multitive left-val) #\+ (additive right-val) |
---|
89 | ; &return (+ left-val right-val) |
---|
90 | ; / multitive) |
---|
91 | ; (multitive <- (primary left-val) #\* (multitive right-val) |
---|
92 | ; &return (* left-val right-val) |
---|
93 | ; / primary) |
---|
94 | ; (primary <- #\( (additive val) #\) &return val |
---|
95 | ; / decimal) |
---|
96 | ; (decimal <- (#[0-9] num-char) |
---|
97 | ; &return (- (char->integer num-char) (char->integer #\0)))) |
---|
98 | |
---|
99 | (use peg) |
---|
100 | (use util.match) |
---|
101 | |
---|
102 | (define (main args) |
---|
103 | 0) |
---|
104 | |
---|
105 | (define nil ()) |
---|
106 | |
---|
107 | (define-class <derivs> () |
---|
108 | (; Hierarchical tokens (non-terminals) |
---|
109 | (grammar :init-keyword :grammar) |
---|
110 | (definition :init-keyword :definition) |
---|
111 | (expression :init-keyword :expression) |
---|
112 | (sequence :init-keyword :sequence) |
---|
113 | (prefix :init-keyword :prefix) |
---|
114 | (suffix :init-keyword :suffix) |
---|
115 | (identifier-clause :init-keyword :identifier-clause) |
---|
116 | (primary :init-keyword :primary) |
---|
117 | |
---|
118 | ; Lexical tokens (terminals) |
---|
119 | (identifier :init-keyword :identifier) |
---|
120 | (non-identifier :init-keyword :non-identifier) |
---|
121 | (capture-name :init-keyword :capture-name) |
---|
122 | (literal :init-keyword :literal) |
---|
123 | (class :init-keyword :class) |
---|
124 | |
---|
125 | (open :init-keyword :open) |
---|
126 | (close :init-keyword :close) |
---|
127 | (leftarrow :init-keyword :leftarrow) |
---|
128 | (return :init-keyword :return) |
---|
129 | (any :init-keyword :any) |
---|
130 | (plus :init-keyword :plus) |
---|
131 | (star :init-keyword :star) |
---|
132 | (question :init-keyword :question) |
---|
133 | (not :init-keyword :not) |
---|
134 | (and :init-keyword :and) |
---|
135 | (slash :init-keyword :slash) |
---|
136 | |
---|
137 | ; Raw input |
---|
138 | (token :init-keyword :token :getter deriv-token) |
---|
139 | )) |
---|
140 | |
---|
141 | (define-macro (define-deriv name) |
---|
142 | `(define-method ,(string->symbol |
---|
143 | (string-append "deriv-" (symbol->string name))) |
---|
144 | ((derivs <derivs>)) |
---|
145 | (let1 slt (slot-ref derivs ',name) |
---|
146 | (if (promise? slt) |
---|
147 | (let1 forced (force slt) |
---|
148 | (slot-set! derivs ',name forced) |
---|
149 | forced) |
---|
150 | slt)))) |
---|
151 | |
---|
152 | (define-macro (define-derivs . names) |
---|
153 | `(begin |
---|
154 | ,@(map |
---|
155 | (lambda (deriv-name) |
---|
156 | `(define-deriv ,deriv-name)) |
---|
157 | names))) |
---|
158 | |
---|
159 | (define-derivs ; define generic function *-deriv |
---|
160 | ; for hierarchical syntax |
---|
161 | grammar definition expression sequence prefix suffix primary identifier-clause |
---|
162 | ; for lexical tokens |
---|
163 | identifier non-identifier capture-name literal class |
---|
164 | open close leftarrow slash and not question star plus return any) |
---|
165 | |
---|
166 | (define (make-result val chars) |
---|
167 | `(:result ,val ,chars)) |
---|
168 | |
---|
169 | (define (parse tokens) |
---|
170 | (let loop((ret-deriv nil) (tokens (cons nil (reverse tokens)))) |
---|
171 | (if (null? tokens) |
---|
172 | ret-deriv |
---|
173 | (loop |
---|
174 | (letrec |
---|
175 | ((deriv |
---|
176 | (make <derivs> |
---|
177 | ; code for generating code |
---|
178 | ; (for-each (lambda (ident) (format #t ":~a (lazy (parse-~a deriv))\n" ident ident)) '(leftarrow slash and not question star plus any)) |
---|
179 | |
---|
180 | ; hierarychical syntax |
---|
181 | :grammar (lazy (parse-grammar deriv)) |
---|
182 | :definition (lazy (parse-definition deriv)) |
---|
183 | :expression (lazy (parse-expression deriv)) |
---|
184 | :sequence (lazy (parse-sequence deriv)) |
---|
185 | :prefix (lazy (parse-prefix deriv)) |
---|
186 | :suffix (lazy (parse-suffix deriv)) |
---|
187 | :identifier-clause (lazy (parse-identifier-clause deriv)) |
---|
188 | :primary (lazy (parse-primary deriv)) |
---|
189 | |
---|
190 | ; lexical syntax |
---|
191 | :identifier (lazy (parse-identifier deriv)) |
---|
192 | :non-identifier (lazy (parse-non-identifier deriv)) |
---|
193 | :capture-name (lazy (parse-capture-name deriv)) |
---|
194 | :literal (lazy (parse-literal deriv)) |
---|
195 | :class (lazy (parse-class deriv)) |
---|
196 | |
---|
197 | :open (lazy (parse-open deriv)) |
---|
198 | :close (lazy (parse-close deriv)) |
---|
199 | :leftarrow (lazy (parse-leftarrow deriv)) |
---|
200 | :slash (lazy (parse-slash deriv)) |
---|
201 | :and (lazy (parse-and deriv)) |
---|
202 | :not (lazy (parse-not deriv)) |
---|
203 | :question (lazy (parse-question deriv)) |
---|
204 | :star (lazy (parse-star deriv)) |
---|
205 | :plus (lazy (parse-plus deriv)) |
---|
206 | :return (lazy (parse-return deriv)) |
---|
207 | :any (lazy (parse-any deriv)) |
---|
208 | |
---|
209 | ;raw input |
---|
210 | :token (if (null? tokens) |
---|
211 | nil |
---|
212 | (make-result (car tokens) ret-deriv))))) |
---|
213 | deriv) |
---|
214 | (cdr tokens))))) |
---|
215 | |
---|
216 | (define (parse-grammars pegs) |
---|
217 | (map |
---|
218 | (lambda (gram) |
---|
219 | (let1 gram (flatten-grammar gram) |
---|
220 | (let* ((ret (deriv-definition (parse gram))) |
---|
221 | (derivs (caddr (deriv-token (caddr ret))))) |
---|
222 | (if (not (null? derivs)) |
---|
223 | (error (format #f "Syntax error: ~a" gram)) |
---|
224 | (cadr ret))))) |
---|
225 | pegs)) |
---|
226 | |
---|
227 | |
---|
228 | ;;; |
---|
229 | ;;; Preprocessor of grammar input |
---|
230 | ;;; |
---|
231 | ;;; input: |
---|
232 | ;;; '(foo -> (e1 bar) (e2 baz) :return (do 'sth) / (e3 hoge) :return (do 'anth)) |
---|
233 | ;;; |
---|
234 | ;;; output: |
---|
235 | ;;; '(foo -> < e1 bar > < e2 baz > :return (do 'sth) / < e3 hoge > :return (do 'anth)) |
---|
236 | ;;; |
---|
237 | (define (flatten-grammar gram) |
---|
238 | (let loop((ret nil) (gram (reverse gram)) (expression? #f)) |
---|
239 | (if (null? gram) |
---|
240 | ret |
---|
241 | (loop |
---|
242 | (if (and expression? |
---|
243 | (list? (car gram))) |
---|
244 | (append `(< ,@(car gram) >) ret) |
---|
245 | (cons (car gram) ret)) |
---|
246 | (cdr gram) |
---|
247 | (cond |
---|
248 | ((eq? '/ (car gram)) #f) |
---|
249 | ((eq? :return (car gram)) #t) |
---|
250 | (else expression?)))))) |
---|
251 | |
---|
252 | (define (flatten-grammar gram) |
---|
253 | (concat-with |
---|
254 | '/ |
---|
255 | (map |
---|
256 | (lambda (sequence) |
---|
257 | (concat-with |
---|
258 | :return |
---|
259 | (let1 seq-pair (separate (cut eq? :return <>) sequence) |
---|
260 | `(,(apply append |
---|
261 | (map (lambda (elem) |
---|
262 | (if (pair? elem) |
---|
263 | `(< ,@(flatten-grammar elem) >) |
---|
264 | `(,elem))) |
---|
265 | (car seq-pair))) |
---|
266 | ,@(cdr seq-pair))))) |
---|
267 | (separate (cut eq? '/ <>) gram)))) |
---|
268 | |
---|
269 | (define (separate separator? ls) |
---|
270 | (let loop1((ret ()) (lst ls)) |
---|
271 | (if (null? lst) |
---|
272 | (if (separator? (car (reverse ls))) |
---|
273 | (reverse (cons () ret)) |
---|
274 | (reverse ret)) |
---|
275 | (receive (head tails) |
---|
276 | (let loop2((ret ()) (ls lst)) |
---|
277 | (cond |
---|
278 | ((null? ls) (values (reverse ret) ls)) |
---|
279 | ((separator? (car ls)) |
---|
280 | (values (reverse ret) (cdr ls))) |
---|
281 | (else |
---|
282 | (loop2 (cons (car ls) ret) (cdr ls))))) |
---|
283 | (loop1 (cons head ret) |
---|
284 | tails))))) |
---|
285 | |
---|
286 | (define (concat-with joiner ls) |
---|
287 | (let loop((ret (car (reverse ls))) (ls (cdr (reverse ls)))) |
---|
288 | (if (null? ls) |
---|
289 | ret |
---|
290 | (loop `(,@(car ls) ,joiner ,@ret) (cdr ls))))) |
---|
291 | |
---|
292 | (define-macro (parse-string-with grammars str) |
---|
293 | `(let () |
---|
294 | ,@(map translate-definition |
---|
295 | (parse-grammars grammars)) |
---|
296 | (parse-string ,(caar grammars) ,str))) |
---|
297 | |
---|
298 | |
---|
299 | (define-macro (define-parse-symbol ident sym) |
---|
300 | `(define (,(string->symbol (string-append "parse-" (symbol->string ident))) derivs) |
---|
301 | (match (deriv-token derivs) |
---|
302 | ((:result ',sym derivs-) (make-result ',sym derivs-)) |
---|
303 | (_ nil)))) |
---|
304 | |
---|
305 | |
---|
306 | (define-parse-symbol open <) |
---|
307 | (define-parse-symbol close >) |
---|
308 | (define-parse-symbol leftarrow <-) |
---|
309 | (define-parse-symbol slash /) |
---|
310 | (define-parse-symbol and &) |
---|
311 | (define-parse-symbol not !) |
---|
312 | (define-parse-symbol question ?) |
---|
313 | (define-parse-symbol star *) |
---|
314 | (define-parse-symbol plus +) |
---|
315 | (define-parse-symbol return :return) |
---|
316 | (define-parse-symbol any %any%) |
---|
317 | |
---|
318 | (define (parse-class derivs) |
---|
319 | (match (deriv-token derivs) |
---|
320 | ((:result val derivs-) |
---|
321 | (if (char-set? val) |
---|
322 | (make-result `(:char-set ,val) derivs-) |
---|
323 | nil)) |
---|
324 | (_ nil))) |
---|
325 | |
---|
326 | (define (parse-literal derivs) |
---|
327 | (match (deriv-token derivs) |
---|
328 | ((:result val derivs-) |
---|
329 | (cond |
---|
330 | ((string? val) (make-result `(:string ,val) derivs-)) |
---|
331 | ((char? val) (make-result `(:char ,val) derivs-)) |
---|
332 | (else nil))) |
---|
333 | (_ nil))) |
---|
334 | |
---|
335 | (define (parse-capture-name derivs) |
---|
336 | (match (deriv-token derivs) |
---|
337 | ((:result val derivs-) |
---|
338 | (if (keyword? val) |
---|
339 | (make-result val derivs-) |
---|
340 | nil)) |
---|
341 | (_ nil))) |
---|
342 | |
---|
343 | (define (parse-non-identifier derivs) |
---|
344 | (let* ((alt3 |
---|
345 | (lazy |
---|
346 | (match (deriv-slash derivs) |
---|
347 | ((:result ret derivs-) |
---|
348 | (make-result '/ derivs-)) |
---|
349 | (_ nil)))) |
---|
350 | (alt2 |
---|
351 | (lazy |
---|
352 | (match (deriv-close derivs) |
---|
353 | ((:result ret derivs-) |
---|
354 | (make-result '> derivs-)) |
---|
355 | (_ alt3)))) |
---|
356 | (alt1 |
---|
357 | (lazy |
---|
358 | (match (deriv-open derivs) |
---|
359 | ((:result ret derivs-) |
---|
360 | (make-result '< derivs-)) |
---|
361 | (_ alt2))))) |
---|
362 | (force alt1))) |
---|
363 | |
---|
364 | (define (parse-identifier derivs) |
---|
365 | (match (deriv-non-identifier derivs) |
---|
366 | (() |
---|
367 | (match (deriv-token derivs) |
---|
368 | ((:result val derivs-) |
---|
369 | (if (symbol? val) |
---|
370 | (make-result `(:identifier ,val) derivs-) |
---|
371 | nil)) |
---|
372 | (_ nil))) |
---|
373 | (_ nil))) |
---|
374 | |
---|
375 | (define (parse-primary derivs) |
---|
376 | (let* ((alt6 |
---|
377 | (lazy |
---|
378 | (match (deriv-any derivs) |
---|
379 | ((:result val derivs-) |
---|
380 | (make-result val derivs-)) |
---|
381 | (_ nil)))) |
---|
382 | (alt5 |
---|
383 | (lazy |
---|
384 | (match (deriv-class derivs) |
---|
385 | ((:result val derivs-) |
---|
386 | (make-result val derivs-)) |
---|
387 | (_ alt6)))) |
---|
388 | (alt4 |
---|
389 | (lazy |
---|
390 | (match (deriv-literal derivs) |
---|
391 | ((:result val derivs-) |
---|
392 | (make-result val derivs-)) |
---|
393 | (_ alt5)))) |
---|
394 | (alt3 |
---|
395 | (lazy |
---|
396 | (match (deriv-open derivs) |
---|
397 | ((:result val derivs-) |
---|
398 | (match (deriv-expression derivs-) |
---|
399 | ((:result exp derivs--) |
---|
400 | (match (deriv-close derivs--) |
---|
401 | ((:result val derivs---) |
---|
402 | (make-result exp derivs---)) |
---|
403 | (_ alt4))) |
---|
404 | (_ alt4))) |
---|
405 | (_ alt4)))) |
---|
406 | (alt1 |
---|
407 | (lazy |
---|
408 | (match (deriv-identifier derivs) |
---|
409 | ((:result ident derivs-) |
---|
410 | (match (deriv-leftarrow derivs-) |
---|
411 | ((:result val derivs--) alt3) |
---|
412 | (_ (make-result ident derivs-)))) |
---|
413 | (_ alt3))))) |
---|
414 | (force alt1))) |
---|
415 | |
---|
416 | (define (parse-identifier-clause derivs) |
---|
417 | (let* ((alt1 |
---|
418 | (lazy |
---|
419 | (match (deriv-open derivs) |
---|
420 | ((:result opn derivs-) |
---|
421 | (match (deriv-capture-name derivs-) |
---|
422 | ((:result capt derivs--) |
---|
423 | (match (deriv-primary derivs--) |
---|
424 | ((:result primary derivs---) |
---|
425 | (match (deriv-close derivs---) |
---|
426 | ((:result cls derivs----) |
---|
427 | (make-result `(:identifier-clause |
---|
428 | ,primary |
---|
429 | :capture |
---|
430 | ,(string->symbol (x->string capt))) |
---|
431 | derivs----)) |
---|
432 | (_ nil))) |
---|
433 | (_ nil))) |
---|
434 | (_ nil))) |
---|
435 | (_ nil))))) |
---|
436 | (force alt1))) |
---|
437 | |
---|
438 | (define (parse-suffix derivs) |
---|
439 | (match (deriv-primary derivs) |
---|
440 | ((:result prim derivs-) |
---|
441 | (let* ((alt3 |
---|
442 | (lazy |
---|
443 | (match (deriv-plus derivs-) |
---|
444 | ((:result plus derivs--) |
---|
445 | (make-result `(:one-more ,prim) derivs--)) |
---|
446 | (_ (make-result prim derivs-))))) |
---|
447 | (alt2 |
---|
448 | (lazy |
---|
449 | (match (deriv-star derivs-) |
---|
450 | ((:result star derivs--) |
---|
451 | (make-result `(:zero-more ,prim) derivs--)) |
---|
452 | (_ alt3)))) |
---|
453 | (alt1 |
---|
454 | (lazy |
---|
455 | (match (deriv-question derivs-) |
---|
456 | ((:result qstn derivs--) |
---|
457 | (make-result `(:optional ,prim) derivs--)) |
---|
458 | (_ alt2))))) |
---|
459 | (force alt1))) |
---|
460 | (_ nil))) |
---|
461 | |
---|
462 | (define (parse-prefix derivs) |
---|
463 | (let* ((glob-alt2 |
---|
464 | (lazy |
---|
465 | (let* ((alt2 |
---|
466 | (lazy |
---|
467 | (match (deriv-not derivs) |
---|
468 | ((:result val derivs-) |
---|
469 | (values :not derivs-)) |
---|
470 | (_ (values nil derivs))))) |
---|
471 | (alt1 |
---|
472 | (lazy |
---|
473 | (match (deriv-and derivs) |
---|
474 | ((:result val derivs-) |
---|
475 | (values :and derivs-)) |
---|
476 | (_ alt2))))) |
---|
477 | (receive (val derivs-) (force alt1) |
---|
478 | (match (deriv-suffix derivs-) |
---|
479 | ((:result suffix derivs--) |
---|
480 | (if (null? val) |
---|
481 | (make-result suffix derivs--) |
---|
482 | (make-result `(,val ,suffix) derivs--))) |
---|
483 | (_ nil)))))) |
---|
484 | (glob-alt1 |
---|
485 | (lazy |
---|
486 | (match (deriv-identifier-clause derivs) |
---|
487 | ((:result ident-clause derivs-) |
---|
488 | (make-result ident-clause derivs-)) |
---|
489 | (_ glob-alt2))))) |
---|
490 | (force glob-alt1))) |
---|
491 | |
---|
492 | (define (parse-sequence derivs) |
---|
493 | (receive (seqs derivs) |
---|
494 | (let loop((ret ()) (derivs derivs)) |
---|
495 | (match (deriv-prefix derivs) |
---|
496 | ((:result val derivs-) |
---|
497 | (loop (cons val ret) derivs-)) |
---|
498 | (_ (values (reverse ret) derivs)))) |
---|
499 | (let* ((alt2 |
---|
500 | (lazy |
---|
501 | (make-result `(:sequence ,seqs :callback ()) derivs))) |
---|
502 | (alt1 |
---|
503 | (lazy |
---|
504 | (match (deriv-return derivs) |
---|
505 | ((:result val derivs-) |
---|
506 | (match (deriv-token derivs-) |
---|
507 | ((:result callback derivs--) |
---|
508 | (make-result `(:sequence ,seqs :callback ,callback) derivs--)) |
---|
509 | (_ alt2))) |
---|
510 | (_ alt2))))) |
---|
511 | (force alt1)))) |
---|
512 | |
---|
513 | (define (parse-expression derivs) |
---|
514 | (match (deriv-sequence derivs) |
---|
515 | ((:result seq derivs-) |
---|
516 | (receive (ret derivs) |
---|
517 | (let loop((ret `(,seq)) (derivs derivs-)) |
---|
518 | (match (deriv-slash derivs) |
---|
519 | ((:result sla derivs-) |
---|
520 | (match (deriv-sequence derivs-) |
---|
521 | ((:result seq derivs--) |
---|
522 | (loop (cons seq ret) derivs--)) |
---|
523 | (_ (values (reverse ret) derivs)))) |
---|
524 | (_ (values (reverse ret) derivs)))) |
---|
525 | (make-result (if (null? (cdr ret)) |
---|
526 | (car ret) |
---|
527 | (cons :expression ret)) |
---|
528 | derivs))))) |
---|
529 | |
---|
530 | (define (parse-definition derivs) |
---|
531 | (match (deriv-identifier derivs) |
---|
532 | ((:result ident derivs-) |
---|
533 | (match (deriv-leftarrow derivs-) |
---|
534 | ((:result larr derivs--) |
---|
535 | (match (deriv-expression derivs--) |
---|
536 | ((:result expr derivs---) |
---|
537 | (make-result `(:definition ,ident ,expr) derivs---)) |
---|
538 | (_ nil))) |
---|
539 | (_ nil))) |
---|
540 | (_ nil))) |
---|
541 | |
---|
542 | ;;; |
---|
543 | ;;; AST translation to PEG |
---|
544 | ;;; |
---|
545 | ;;; ref. WiLiKi:Rui:ParsingExpressionGrammar |
---|
546 | ;;; |
---|
547 | |
---|
548 | ;; |
---|
549 | ;; expected input |
---|
550 | ;; * identifier |
---|
551 | ;; (:identifier <symbol>) |
---|
552 | ;; * identifier-clause |
---|
553 | ;; (:identifier-clause (:identifier <symbol>) :capture <symbol>) |
---|
554 | ;; * char(literal) |
---|
555 | ;; (:char <char>) |
---|
556 | ;; * string(literal) |
---|
557 | ;; (:string <string>) |
---|
558 | ;; * char-set(class) |
---|
559 | ;; (:char-set <char-set>) |
---|
560 | ;; * expression |
---|
561 | ;; (:expression (:sequence (...) :callback ...) ...) |
---|
562 | ;; |
---|
563 | ;; return (values <parser> <captured-name>) |
---|
564 | ;; |
---|
565 | (define (translate-primary primary) |
---|
566 | (let ((tag (car primary)) |
---|
567 | (value (cadr primary))) |
---|
568 | (case tag |
---|
569 | ((:identifier) value) |
---|
570 | ((:char) `($char ,value)) |
---|
571 | ((:string) `($string ,value)) |
---|
572 | ((:char-set) `($one-of ,value)) |
---|
573 | ((:sequence) (translate-sequence primary)) |
---|
574 | ((:expression) (translate-expression primary))))) |
---|
575 | |
---|
576 | ;; |
---|
577 | ;; expected input |
---|
578 | ;; * more than zero |
---|
579 | ;; (:zero-more <primary>) |
---|
580 | ;; * more than one |
---|
581 | ;; (:one-more <primary>) |
---|
582 | ;; * primary |
---|
583 | ;; <primary> (see translate-primary) |
---|
584 | ;; |
---|
585 | ;; return (values <parser> <captured-name>) |
---|
586 | ;; |
---|
587 | (define (translate-suffix suffix) |
---|
588 | (let ((tag (car suffix)) |
---|
589 | (value (cadr suffix))) |
---|
590 | (case tag |
---|
591 | ((:zero-more) `($many ,(translate-primary value))) |
---|
592 | ((:one-more) `($many ,(translate-primary value) 1)) |
---|
593 | ((:optional) `($optional ,(translate-primary value))) |
---|
594 | (else (translate-primary suffix))))) |
---|
595 | |
---|
596 | ;; |
---|
597 | ;; expected input |
---|
598 | ;; * and(syntactic predicate) |
---|
599 | ;; (:and <suffix>) |
---|
600 | ;; * not(syntactic predicate) |
---|
601 | ;; (:not <suffix>) |
---|
602 | ;; * suffix |
---|
603 | ;; <suffix> (see translate-suffix) |
---|
604 | ;; |
---|
605 | ;; return (values <parser> <ident-clause?>) |
---|
606 | ;; |
---|
607 | (define (translate-prefix prefix) |
---|
608 | (let ((tag (car prefix)) |
---|
609 | (value (cadr prefix))) |
---|
610 | (case tag |
---|
611 | ((:not) (values `($not ,(translate-suffix value)) #f)) |
---|
612 | ((:identifier-clause) |
---|
613 | (values `(,(get-keyword :capture prefix) |
---|
614 | ,(translate-primary value)) |
---|
615 | #t)) |
---|
616 | (else (values (translate-suffix prefix) #f))))) |
---|
617 | |
---|
618 | (define (translate-sequence sequence) |
---|
619 | (let ((tag (car sequence)) |
---|
620 | (prefixes (cadr sequence)) |
---|
621 | (callback (get-keyword :callback sequence #f))) |
---|
622 | (case tag |
---|
623 | ((:sequence) |
---|
624 | (if (null? prefixes) |
---|
625 | `($do ($return ,callback)) |
---|
626 | (if |
---|
627 | (and (null? (cdr prefixes)) (null? callback)) |
---|
628 | (translate-prefix (car prefixes)) |
---|
629 | (let loop((items nil) (capture-names nil) (prefixes prefixes)) |
---|
630 | (cond |
---|
631 | ((null? prefixes) |
---|
632 | (cons |
---|
633 | '$do |
---|
634 | (reverse |
---|
635 | (cons |
---|
636 | (if (null? callback) |
---|
637 | `($return (list ,@(reverse capture-names))) |
---|
638 | `($return ,callback)) |
---|
639 | items)))) |
---|
640 | (else |
---|
641 | (receive (prefix ident-clause?) |
---|
642 | (translate-prefix (car prefixes)) |
---|
643 | (let1 capt (gensym) |
---|
644 | (loop (cons (if ident-clause? |
---|
645 | prefix |
---|
646 | `(,capt ,prefix)) |
---|
647 | items) |
---|
648 | (cons (if ident-clause? |
---|
649 | (car prefix) |
---|
650 | capt) |
---|
651 | capture-names) |
---|
652 | (cdr prefixes) |
---|
653 | ))))))))) |
---|
654 | (else |
---|
655 | (translate-prefix sequence))))) |
---|
656 | |
---|
657 | (define (translate-expression expression) |
---|
658 | (let ((tag (car expression))) |
---|
659 | (case tag |
---|
660 | ((:expression) |
---|
661 | (let1 sequences (cdr expression) |
---|
662 | (if (null? (cdr sequences)) |
---|
663 | (translate-sequence (car sequences)) |
---|
664 | `($or ,@(map translate-sequence sequences))))) |
---|
665 | (else |
---|
666 | (translate-sequence expression))))) |
---|
667 | |
---|
668 | (define (translate-definition definition) |
---|
669 | (let ((tag (car definition)) |
---|
670 | (ident (cadr (cadr definition))) |
---|
671 | (expression (caddr definition))) |
---|
672 | (case tag |
---|
673 | ((:definition) |
---|
674 | `(define ,ident ,(translate-expression expression))) |
---|
675 | (else (error "Translation error"))))) |
---|
676 | |
---|
677 | (define (hoge) |
---|
678 | (let* ((spaces ($many ($one-of #[ \t]))) |
---|
679 | (comma ($seq spaces ($char #\,) spaces)) |
---|
680 | (dquote ($char #\")) |
---|
681 | (double-dquote ($do (($string "\"\"")) ($return #\"))) |
---|
682 | (quoted-body ($many ($or double-dquote ($one-of #[^\"])))) |
---|
683 | (quoted ($between dquote quoted-body dquote)) |
---|
684 | (unquoted ($many-till anychar ($or comma newline))) |
---|
685 | (field ($or quoted unquoted)) |
---|
686 | (record ($sep-by ($->rope field) comma))) |
---|
687 | #?=(parse-string record "a,b,c") |
---|
688 | #?=(parse-string record "\"a\" , b , c") |
---|
689 | #?=(parse-string record "\"a \"\" \n\" , b , c")) |
---|
690 | |
---|
691 | (parse-string-with |
---|
692 | ((Record <- (:fld Field) (:suffix RecordSuffix) :return (cons fld suffix)) |
---|
693 | (RecordSuffix <- Comma (:fld Field) (:suffix RecordSuffix) |
---|
694 | :return (cons fld suffix) |
---|
695 | / :return () ) |
---|
696 | (Field <- (:charlist Quoted) :return (list->string charlist) |
---|
697 | / (:charlist UnQuoted) :return (list->string charlist)) |
---|
698 | (Spaces <- #[ \t] *) |
---|
699 | (Comma <- Spaces #\, Spaces) |
---|
700 | (DQuote <- #\") |
---|
701 | (DoubleDQuote <- #\" #\" :return #\") |
---|
702 | (Quoted <- DQuote |
---|
703 | (:body ((DoubleDQuote / #[^\"]) *)) |
---|
704 | DQuote |
---|
705 | :return body) |
---|
706 | (UnQuoted <- ((! (Comma / newline) (:ch anychar) :return ch) *)) |
---|
707 | ) |
---|
708 | "\"a\" , b , c") |
---|
709 | ) |
---|
710 | |
---|
711 | ;; (parse-string-with |
---|
712 | ;; ((Record <- (:fld Field) |
---|
713 | ;; (:rest-fld ((Comma (:fld Field) :return fld) *)) |
---|
714 | ;; :return (cons fld rest-fld)) |
---|
715 | ;; (Field <- (:charlist Quoted) :return (list->string charlist) |
---|
716 | ;; / (:charlist UnQuoted) :return (list->string charlist)) |
---|
717 | ;; (Spaces <- #[ \t] *) |
---|
718 | ;; (Comma <- Spaces #\, Spaces) |
---|
719 | ;; (DQuote <- #\") |
---|
720 | ;; (Quoted <- DQuote |
---|
721 | ;; (:body ((#\" #\" :return #\" / #[^\"]) *)) |
---|
722 | ;; DQuote |
---|
723 | ;; :return body) |
---|
724 | ;; (UnQuoted <- ! (Comma / newline) (:ch anychar) (:unq UnQuoted) |
---|
725 | ;; :return (cons ch unq) |
---|
726 | ;; / :return ()) |
---|
727 | ;; ) |
---|
728 | ;; "\"a\"\"bc\" , b , c") |
---|
729 | |
---|
730 | ;; (parse-string-with |
---|
731 | ;; ((A <- "a" A "a" / "a")) |
---|
732 | ;; "aaaaa") |
---|