[15] | 1 | #!/usr/bin/env gosh |
---|
| 2 | ;; -*- coding: utf-8 mode: scheme -*- |
---|
| 3 | |
---|
| 4 | ;;; |
---|
| 5 | ;;; Original PEG Grammars |
---|
| 6 | ;;; |
---|
| 7 | ;; Hierarchical syntax |
---|
| 8 | ; Grammar <- Spacing Definition+ EndOfFile |
---|
| 9 | ; Definition <- Identifier LEFTARROW Expression |
---|
| 10 | ; |
---|
| 11 | ; Expression <- Sequence (SLASH Sequence)* |
---|
| 12 | ; Sequence <- Prefix* |
---|
| 13 | ; Prefix <- (AND / NOT)? Suffix |
---|
| 14 | ; Suffix <- Primary (QUESTION / STAR / PLUS)? |
---|
| 15 | ; Primary <- Identifier !LEFTARROW |
---|
| 16 | ; / OPEN Expression CLOSE |
---|
| 17 | ; / Literal / Class / DOT |
---|
| 18 | ; |
---|
| 19 | ;; Lexical syntax |
---|
| 20 | ; Identifier <- IdentStart IdentCont* Spacing |
---|
| 21 | ; IdentStart <- [a-zA-Z_] |
---|
| 22 | ; IdentCont <- IdentStart / [0-9] |
---|
| 23 | ; |
---|
| 24 | ; Literal <- ['] (!['] Char)* ['] Spacing |
---|
| 25 | ; / ["] (!["] Char)* ["] Spacing |
---|
| 26 | ; Class <- '[' (!']' Range)* ']' Spacing |
---|
| 27 | ; Range <- Char '-' Char / Char |
---|
| 28 | ; Char <- '\\' [nrt'"\[\]\\] |
---|
| 29 | ; / '\\' [0-2][0-7][0-7] |
---|
| 30 | ; / '\\' [0-7][0-7]? |
---|
| 31 | ; / !'\\' . |
---|
| 32 | ; |
---|
| 33 | ; LEFTARROW <- '<-' Spacing |
---|
| 34 | ; SLASH <- '/' Spacing |
---|
| 35 | ; AND <- '&' Spacing |
---|
| 36 | ; NOT <- '!' Spacing |
---|
| 37 | ; QUESTION <- '?' Spacing |
---|
| 38 | ; STAR <- '*' Spacing |
---|
| 39 | ; PLUS <- '+' Spacing |
---|
| 40 | ; OPEN <- '(' Spacing |
---|
| 41 | ; CLOSE <- ')' Spacing |
---|
| 42 | ; DOT <- '.' Spacing |
---|
| 43 | ; |
---|
| 44 | ; Spacing <- (Space / Comment)* |
---|
| 45 | ; Comment <- '#' (!EndOfLine .)* EndOfLine |
---|
| 46 | ; Space <- ' ' / '\t' / EndOfLine |
---|
| 47 | ; EndOfLine <- '\r\n' / '\n' / '\r' |
---|
| 48 | ; EndOfFile <- !. |
---|
| 49 | |
---|
| 50 | ;;; |
---|
| 51 | ;;; S-exp PEG Grammar |
---|
| 52 | ;;; |
---|
| 53 | ; |
---|
| 54 | ;; Hierarchical syntax |
---|
| 55 | ; |
---|
| 56 | ; Definition <- Identifier LEFTARROW Expression |
---|
| 57 | ; |
---|
| 58 | ; Expression <- Sequence (SLASH Sequence)* |
---|
| 59 | ; Sequence <- Prefix* (RETURN .)? |
---|
| 60 | ; Prefix <- IdentifierClause / (AND / NOT)? Suffix |
---|
| 61 | ; Suffix <- Primary (QUESTION / STAR / PLUS)? |
---|
| 62 | ; IdentifierClause <- OPEN CaptureName Primary CLOSE |
---|
| 63 | ; Primary <- Identifier !LEFTARROW |
---|
| 64 | ; / OPEN Expression CLOSE |
---|
| 65 | ; / Literal / Class / ANY |
---|
| 66 | ; |
---|
| 67 | ;; Lexical syntax |
---|
| 68 | ; |
---|
| 69 | ; Identifier <- !NonIdentifier <scheme-symbol> |
---|
| 70 | ; NonIdentifier <- OPEN / CLOSE |
---|
| 71 | ; CaptureName <- <scheme-keyword> |
---|
| 72 | ; Literal <- <scheme-string> / <scheme-char> |
---|
| 73 | ; Class <- <scheme-char-set> |
---|
| 74 | ; |
---|
| 75 | ; OPEN <- |<| |
---|
| 76 | ; CLOSE <- |>| |
---|
| 77 | ; LEFTARROW <- |<-| |
---|
| 78 | ; SLASH <- |/| |
---|
| 79 | ; AND <- |&| |
---|
| 80 | ; NOT <- |!| |
---|
| 81 | ; QUESTION <- |?| |
---|
| 82 | ; STAR <- |*| |
---|
| 83 | ; PLUS <- |+| |
---|
| 84 | ; RETURN <- :return |
---|
| 85 | ; ANY <- |%any%| |
---|
| 86 | |
---|
| 87 | ; (define-parser math-exp |
---|
| 88 | ; (additive <- (multitive left-val) #\+ (additive right-val) |
---|
| 89 | ; &return (+ left-val right-val) |
---|
| 90 | ; / multitive) |
---|
| 91 | ; (multitive <- (primary left-val) #\* (multitive right-val) |
---|
| 92 | ; &return (* left-val right-val) |
---|
| 93 | ; / primary) |
---|
| 94 | ; (primary <- #\( (additive val) #\) &return val |
---|
| 95 | ; / decimal) |
---|
| 96 | ; (decimal <- (#[0-9] num-char) |
---|
| 97 | ; &return (- (char->integer num-char) (char->integer #\0)))) |
---|
| 98 | |
---|
| 99 | (use peg) |
---|
| 100 | (use util.match) |
---|
| 101 | |
---|
| 102 | (define (main args) |
---|
| 103 | 0) |
---|
| 104 | |
---|
| 105 | (define nil ()) |
---|
| 106 | |
---|
| 107 | (define-class <derivs> () |
---|
| 108 | (; Hierarchical tokens (non-terminals) |
---|
| 109 | (grammar :init-keyword :grammar) |
---|
| 110 | (definition :init-keyword :definition) |
---|
| 111 | (expression :init-keyword :expression) |
---|
| 112 | (sequence :init-keyword :sequence) |
---|
| 113 | (prefix :init-keyword :prefix) |
---|
| 114 | (suffix :init-keyword :suffix) |
---|
| 115 | (identifier-clause :init-keyword :identifier-clause) |
---|
| 116 | (primary :init-keyword :primary) |
---|
| 117 | |
---|
| 118 | ; Lexical tokens (terminals) |
---|
| 119 | (identifier :init-keyword :identifier) |
---|
| 120 | (non-identifier :init-keyword :non-identifier) |
---|
| 121 | (capture-name :init-keyword :capture-name) |
---|
| 122 | (literal :init-keyword :literal) |
---|
| 123 | (class :init-keyword :class) |
---|
| 124 | |
---|
| 125 | (open :init-keyword :open) |
---|
| 126 | (close :init-keyword :close) |
---|
| 127 | (leftarrow :init-keyword :leftarrow) |
---|
| 128 | (return :init-keyword :return) |
---|
| 129 | (any :init-keyword :any) |
---|
| 130 | (plus :init-keyword :plus) |
---|
| 131 | (star :init-keyword :star) |
---|
| 132 | (question :init-keyword :question) |
---|
| 133 | (not :init-keyword :not) |
---|
| 134 | (and :init-keyword :and) |
---|
| 135 | (slash :init-keyword :slash) |
---|
| 136 | |
---|
| 137 | ; Raw input |
---|
| 138 | (token :init-keyword :token :getter deriv-token) |
---|
| 139 | )) |
---|
| 140 | |
---|
| 141 | (define-macro (define-deriv name) |
---|
| 142 | `(define-method ,(string->symbol |
---|
| 143 | (string-append "deriv-" (symbol->string name))) |
---|
| 144 | ((derivs <derivs>)) |
---|
| 145 | (let1 slt (slot-ref derivs ',name) |
---|
| 146 | (if (promise? slt) |
---|
| 147 | (let1 forced (force slt) |
---|
| 148 | (slot-set! derivs ',name forced) |
---|
| 149 | forced) |
---|
| 150 | slt)))) |
---|
| 151 | |
---|
| 152 | (define-macro (define-derivs . names) |
---|
| 153 | `(begin |
---|
| 154 | ,@(map |
---|
| 155 | (lambda (deriv-name) |
---|
| 156 | `(define-deriv ,deriv-name)) |
---|
| 157 | names))) |
---|
| 158 | |
---|
| 159 | (define-derivs ; define generic function *-deriv |
---|
| 160 | ; for hierarchical syntax |
---|
| 161 | grammar definition expression sequence prefix suffix primary identifier-clause |
---|
| 162 | ; for lexical tokens |
---|
| 163 | identifier non-identifier capture-name literal class |
---|
| 164 | open close leftarrow slash and not question star plus return any) |
---|
| 165 | |
---|
| 166 | (define (make-result val chars) |
---|
| 167 | `(:result ,val ,chars)) |
---|
| 168 | |
---|
| 169 | (define (parse tokens) |
---|
| 170 | (let loop((ret-deriv nil) (tokens (cons nil (reverse tokens)))) |
---|
| 171 | (if (null? tokens) |
---|
| 172 | ret-deriv |
---|
| 173 | (loop |
---|
| 174 | (letrec |
---|
| 175 | ((deriv |
---|
| 176 | (make <derivs> |
---|
| 177 | ; code for generating code |
---|
| 178 | ; (for-each (lambda (ident) (format #t ":~a (lazy (parse-~a deriv))\n" ident ident)) '(leftarrow slash and not question star plus any)) |
---|
| 179 | |
---|
| 180 | ; hierarychical syntax |
---|
| 181 | :grammar (lazy (parse-grammar deriv)) |
---|
| 182 | :definition (lazy (parse-definition deriv)) |
---|
| 183 | :expression (lazy (parse-expression deriv)) |
---|
| 184 | :sequence (lazy (parse-sequence deriv)) |
---|
| 185 | :prefix (lazy (parse-prefix deriv)) |
---|
| 186 | :suffix (lazy (parse-suffix deriv)) |
---|
| 187 | :identifier-clause (lazy (parse-identifier-clause deriv)) |
---|
| 188 | :primary (lazy (parse-primary deriv)) |
---|
| 189 | |
---|
| 190 | ; lexical syntax |
---|
| 191 | :identifier (lazy (parse-identifier deriv)) |
---|
| 192 | :non-identifier (lazy (parse-non-identifier deriv)) |
---|
| 193 | :capture-name (lazy (parse-capture-name deriv)) |
---|
| 194 | :literal (lazy (parse-literal deriv)) |
---|
| 195 | :class (lazy (parse-class deriv)) |
---|
| 196 | |
---|
| 197 | :open (lazy (parse-open deriv)) |
---|
| 198 | :close (lazy (parse-close deriv)) |
---|
| 199 | :leftarrow (lazy (parse-leftarrow deriv)) |
---|
| 200 | :slash (lazy (parse-slash deriv)) |
---|
| 201 | :and (lazy (parse-and deriv)) |
---|
| 202 | :not (lazy (parse-not deriv)) |
---|
| 203 | :question (lazy (parse-question deriv)) |
---|
| 204 | :star (lazy (parse-star deriv)) |
---|
| 205 | :plus (lazy (parse-plus deriv)) |
---|
| 206 | :return (lazy (parse-return deriv)) |
---|
| 207 | :any (lazy (parse-any deriv)) |
---|
| 208 | |
---|
| 209 | ;raw input |
---|
| 210 | :token (if (null? tokens) |
---|
| 211 | nil |
---|
| 212 | (make-result (car tokens) ret-deriv))))) |
---|
| 213 | deriv) |
---|
| 214 | (cdr tokens))))) |
---|
| 215 | |
---|
| 216 | (define (parse-grammars pegs) |
---|
| 217 | (map |
---|
| 218 | (lambda (gram) |
---|
| 219 | (let1 gram (flatten-grammar gram) |
---|
| 220 | (let* ((ret (deriv-definition (parse gram))) |
---|
| 221 | (derivs (caddr (deriv-token (caddr ret))))) |
---|
| 222 | (if (not (null? derivs)) |
---|
| 223 | (error (format #f "Syntax error: ~a" gram)) |
---|
| 224 | (cadr ret))))) |
---|
| 225 | pegs)) |
---|
| 226 | |
---|
| 227 | |
---|
| 228 | ;;; |
---|
| 229 | ;;; Preprocessor of grammar input |
---|
| 230 | ;;; |
---|
| 231 | ;;; input: |
---|
| 232 | ;;; '(foo -> (e1 bar) (e2 baz) :return (do 'sth) / (e3 hoge) :return (do 'anth)) |
---|
| 233 | ;;; |
---|
| 234 | ;;; output: |
---|
| 235 | ;;; '(foo -> < e1 bar > < e2 baz > :return (do 'sth) / < e3 hoge > :return (do 'anth)) |
---|
| 236 | ;;; |
---|
| 237 | (define (flatten-grammar gram) |
---|
| 238 | (let loop((ret nil) (gram (reverse gram)) (expression? #f)) |
---|
| 239 | (if (null? gram) |
---|
| 240 | ret |
---|
| 241 | (loop |
---|
| 242 | (if (and expression? |
---|
| 243 | (list? (car gram))) |
---|
| 244 | (append `(< ,@(car gram) >) ret) |
---|
| 245 | (cons (car gram) ret)) |
---|
| 246 | (cdr gram) |
---|
| 247 | (cond |
---|
| 248 | ((eq? '/ (car gram)) #f) |
---|
| 249 | ((eq? :return (car gram)) #t) |
---|
| 250 | (else expression?)))))) |
---|
| 251 | |
---|
| 252 | (define (flatten-grammar gram) |
---|
| 253 | (concat-with |
---|
| 254 | '/ |
---|
| 255 | (map |
---|
| 256 | (lambda (sequence) |
---|
| 257 | (concat-with |
---|
| 258 | :return |
---|
| 259 | (let1 seq-pair (separate (cut eq? :return <>) sequence) |
---|
| 260 | `(,(apply append |
---|
| 261 | (map (lambda (elem) |
---|
| 262 | (if (pair? elem) |
---|
| 263 | `(< ,@(flatten-grammar elem) >) |
---|
| 264 | `(,elem))) |
---|
| 265 | (car seq-pair))) |
---|
| 266 | ,@(cdr seq-pair))))) |
---|
| 267 | (separate (cut eq? '/ <>) gram)))) |
---|
| 268 | |
---|
| 269 | (define (separate separator? ls) |
---|
| 270 | (let loop1((ret ()) (lst ls)) |
---|
| 271 | (if (null? lst) |
---|
| 272 | (if (separator? (car (reverse ls))) |
---|
| 273 | (reverse (cons () ret)) |
---|
| 274 | (reverse ret)) |
---|
| 275 | (receive (head tails) |
---|
| 276 | (let loop2((ret ()) (ls lst)) |
---|
| 277 | (cond |
---|
| 278 | ((null? ls) (values (reverse ret) ls)) |
---|
| 279 | ((separator? (car ls)) |
---|
| 280 | (values (reverse ret) (cdr ls))) |
---|
| 281 | (else |
---|
| 282 | (loop2 (cons (car ls) ret) (cdr ls))))) |
---|
| 283 | (loop1 (cons head ret) |
---|
| 284 | tails))))) |
---|
| 285 | |
---|
| 286 | (define (concat-with joiner ls) |
---|
| 287 | (let loop((ret (car (reverse ls))) (ls (cdr (reverse ls)))) |
---|
| 288 | (if (null? ls) |
---|
| 289 | ret |
---|
| 290 | (loop `(,@(car ls) ,joiner ,@ret) (cdr ls))))) |
---|
| 291 | |
---|
| 292 | (define-macro (parse-string-with grammars str) |
---|
| 293 | `(let () |
---|
| 294 | ,@(map translate-definition |
---|
| 295 | (parse-grammars grammars)) |
---|
| 296 | (parse-string ,(caar grammars) ,str))) |
---|
| 297 | |
---|
| 298 | |
---|
| 299 | (define-macro (define-parse-symbol ident sym) |
---|
| 300 | `(define (,(string->symbol (string-append "parse-" (symbol->string ident))) derivs) |
---|
| 301 | (match (deriv-token derivs) |
---|
| 302 | ((:result ',sym derivs-) (make-result ',sym derivs-)) |
---|
| 303 | (_ nil)))) |
---|
| 304 | |
---|
| 305 | |
---|
| 306 | (define-parse-symbol open <) |
---|
| 307 | (define-parse-symbol close >) |
---|
| 308 | (define-parse-symbol leftarrow <-) |
---|
| 309 | (define-parse-symbol slash /) |
---|
| 310 | (define-parse-symbol and &) |
---|
| 311 | (define-parse-symbol not !) |
---|
| 312 | (define-parse-symbol question ?) |
---|
| 313 | (define-parse-symbol star *) |
---|
| 314 | (define-parse-symbol plus +) |
---|
| 315 | (define-parse-symbol return :return) |
---|
| 316 | (define-parse-symbol any %any%) |
---|
| 317 | |
---|
| 318 | (define (parse-class derivs) |
---|
| 319 | (match (deriv-token derivs) |
---|
| 320 | ((:result val derivs-) |
---|
| 321 | (if (char-set? val) |
---|
| 322 | (make-result `(:char-set ,val) derivs-) |
---|
| 323 | nil)) |
---|
| 324 | (_ nil))) |
---|
| 325 | |
---|
| 326 | (define (parse-literal derivs) |
---|
| 327 | (match (deriv-token derivs) |
---|
| 328 | ((:result val derivs-) |
---|
| 329 | (cond |
---|
| 330 | ((string? val) (make-result `(:string ,val) derivs-)) |
---|
| 331 | ((char? val) (make-result `(:char ,val) derivs-)) |
---|
| 332 | (else nil))) |
---|
| 333 | (_ nil))) |
---|
| 334 | |
---|
| 335 | (define (parse-capture-name derivs) |
---|
| 336 | (match (deriv-token derivs) |
---|
| 337 | ((:result val derivs-) |
---|
| 338 | (if (keyword? val) |
---|
| 339 | (make-result val derivs-) |
---|
| 340 | nil)) |
---|
| 341 | (_ nil))) |
---|
| 342 | |
---|
| 343 | (define (parse-non-identifier derivs) |
---|
| 344 | (let* ((alt3 |
---|
| 345 | (lazy |
---|
| 346 | (match (deriv-slash derivs) |
---|
| 347 | ((:result ret derivs-) |
---|
| 348 | (make-result '/ derivs-)) |
---|
| 349 | (_ nil)))) |
---|
| 350 | (alt2 |
---|
| 351 | (lazy |
---|
| 352 | (match (deriv-close derivs) |
---|
| 353 | ((:result ret derivs-) |
---|
| 354 | (make-result '> derivs-)) |
---|
| 355 | (_ alt3)))) |
---|
| 356 | (alt1 |
---|
| 357 | (lazy |
---|
| 358 | (match (deriv-open derivs) |
---|
| 359 | ((:result ret derivs-) |
---|
| 360 | (make-result '< derivs-)) |
---|
| 361 | (_ alt2))))) |
---|
| 362 | (force alt1))) |
---|
| 363 | |
---|
| 364 | (define (parse-identifier derivs) |
---|
| 365 | (match (deriv-non-identifier derivs) |
---|
| 366 | (() |
---|
| 367 | (match (deriv-token derivs) |
---|
| 368 | ((:result val derivs-) |
---|
| 369 | (if (symbol? val) |
---|
| 370 | (make-result `(:identifier ,val) derivs-) |
---|
| 371 | nil)) |
---|
| 372 | (_ nil))) |
---|
| 373 | (_ nil))) |
---|
| 374 | |
---|
| 375 | (define (parse-primary derivs) |
---|
| 376 | (let* ((alt6 |
---|
| 377 | (lazy |
---|
| 378 | (match (deriv-any derivs) |
---|
| 379 | ((:result val derivs-) |
---|
| 380 | (make-result val derivs-)) |
---|
| 381 | (_ nil)))) |
---|
| 382 | (alt5 |
---|
| 383 | (lazy |
---|
| 384 | (match (deriv-class derivs) |
---|
| 385 | ((:result val derivs-) |
---|
| 386 | (make-result val derivs-)) |
---|
| 387 | (_ alt6)))) |
---|
| 388 | (alt4 |
---|
| 389 | (lazy |
---|
| 390 | (match (deriv-literal derivs) |
---|
| 391 | ((:result val derivs-) |
---|
| 392 | (make-result val derivs-)) |
---|
| 393 | (_ alt5)))) |
---|
| 394 | (alt3 |
---|
| 395 | (lazy |
---|
| 396 | (match (deriv-open derivs) |
---|
| 397 | ((:result val derivs-) |
---|
| 398 | (match (deriv-expression derivs-) |
---|
| 399 | ((:result exp derivs--) |
---|
| 400 | (match (deriv-close derivs--) |
---|
| 401 | ((:result val derivs---) |
---|
| 402 | (make-result exp derivs---)) |
---|
| 403 | (_ alt4))) |
---|
| 404 | (_ alt4))) |
---|
| 405 | (_ alt4)))) |
---|
| 406 | (alt1 |
---|
| 407 | (lazy |
---|
| 408 | (match (deriv-identifier derivs) |
---|
| 409 | ((:result ident derivs-) |
---|
| 410 | (match (deriv-leftarrow derivs-) |
---|
| 411 | ((:result val derivs--) alt3) |
---|
| 412 | (_ (make-result ident derivs-)))) |
---|
| 413 | (_ alt3))))) |
---|
| 414 | (force alt1))) |
---|
| 415 | |
---|
| 416 | (define (parse-identifier-clause derivs) |
---|
| 417 | (let* ((alt1 |
---|
| 418 | (lazy |
---|
| 419 | (match (deriv-open derivs) |
---|
| 420 | ((:result opn derivs-) |
---|
| 421 | (match (deriv-capture-name derivs-) |
---|
| 422 | ((:result capt derivs--) |
---|
| 423 | (match (deriv-primary derivs--) |
---|
| 424 | ((:result primary derivs---) |
---|
| 425 | (match (deriv-close derivs---) |
---|
| 426 | ((:result cls derivs----) |
---|
| 427 | (make-result `(:identifier-clause |
---|
| 428 | ,primary |
---|
| 429 | :capture |
---|
| 430 | ,(string->symbol (x->string capt))) |
---|
| 431 | derivs----)) |
---|
| 432 | (_ nil))) |
---|
| 433 | (_ nil))) |
---|
| 434 | (_ nil))) |
---|
| 435 | (_ nil))))) |
---|
| 436 | (force alt1))) |
---|
| 437 | |
---|
| 438 | (define (parse-suffix derivs) |
---|
| 439 | (match (deriv-primary derivs) |
---|
| 440 | ((:result prim derivs-) |
---|
| 441 | (let* ((alt3 |
---|
| 442 | (lazy |
---|
| 443 | (match (deriv-plus derivs-) |
---|
| 444 | ((:result plus derivs--) |
---|
| 445 | (make-result `(:one-more ,prim) derivs--)) |
---|
| 446 | (_ (make-result prim derivs-))))) |
---|
| 447 | (alt2 |
---|
| 448 | (lazy |
---|
| 449 | (match (deriv-star derivs-) |
---|
| 450 | ((:result star derivs--) |
---|
| 451 | (make-result `(:zero-more ,prim) derivs--)) |
---|
| 452 | (_ alt3)))) |
---|
| 453 | (alt1 |
---|
| 454 | (lazy |
---|
| 455 | (match (deriv-question derivs-) |
---|
| 456 | ((:result qstn derivs--) |
---|
| 457 | (make-result `(:optional ,prim) derivs--)) |
---|
| 458 | (_ alt2))))) |
---|
| 459 | (force alt1))) |
---|
| 460 | (_ nil))) |
---|
| 461 | |
---|
| 462 | (define (parse-prefix derivs) |
---|
| 463 | (let* ((glob-alt2 |
---|
| 464 | (lazy |
---|
| 465 | (let* ((alt2 |
---|
| 466 | (lazy |
---|
| 467 | (match (deriv-not derivs) |
---|
| 468 | ((:result val derivs-) |
---|
| 469 | (values :not derivs-)) |
---|
| 470 | (_ (values nil derivs))))) |
---|
| 471 | (alt1 |
---|
| 472 | (lazy |
---|
| 473 | (match (deriv-and derivs) |
---|
| 474 | ((:result val derivs-) |
---|
| 475 | (values :and derivs-)) |
---|
| 476 | (_ alt2))))) |
---|
| 477 | (receive (val derivs-) (force alt1) |
---|
| 478 | (match (deriv-suffix derivs-) |
---|
| 479 | ((:result suffix derivs--) |
---|
| 480 | (if (null? val) |
---|
| 481 | (make-result suffix derivs--) |
---|
| 482 | (make-result `(,val ,suffix) derivs--))) |
---|
| 483 | (_ nil)))))) |
---|
| 484 | (glob-alt1 |
---|
| 485 | (lazy |
---|
| 486 | (match (deriv-identifier-clause derivs) |
---|
| 487 | ((:result ident-clause derivs-) |
---|
| 488 | (make-result ident-clause derivs-)) |
---|
| 489 | (_ glob-alt2))))) |
---|
| 490 | (force glob-alt1))) |
---|
| 491 | |
---|
| 492 | (define (parse-sequence derivs) |
---|
| 493 | (receive (seqs derivs) |
---|
| 494 | (let loop((ret ()) (derivs derivs)) |
---|
| 495 | (match (deriv-prefix derivs) |
---|
| 496 | ((:result val derivs-) |
---|
| 497 | (loop (cons val ret) derivs-)) |
---|
| 498 | (_ (values (reverse ret) derivs)))) |
---|
| 499 | (let* ((alt2 |
---|
| 500 | (lazy |
---|
| 501 | (make-result `(:sequence ,seqs :callback ()) derivs))) |
---|
| 502 | (alt1 |
---|
| 503 | (lazy |
---|
| 504 | (match (deriv-return derivs) |
---|
| 505 | ((:result val derivs-) |
---|
| 506 | (match (deriv-token derivs-) |
---|
| 507 | ((:result callback derivs--) |
---|
| 508 | (make-result `(:sequence ,seqs :callback ,callback) derivs--)) |
---|
| 509 | (_ alt2))) |
---|
| 510 | (_ alt2))))) |
---|
| 511 | (force alt1)))) |
---|
| 512 | |
---|
| 513 | (define (parse-expression derivs) |
---|
| 514 | (match (deriv-sequence derivs) |
---|
| 515 | ((:result seq derivs-) |
---|
| 516 | (receive (ret derivs) |
---|
| 517 | (let loop((ret `(,seq)) (derivs derivs-)) |
---|
| 518 | (match (deriv-slash derivs) |
---|
| 519 | ((:result sla derivs-) |
---|
| 520 | (match (deriv-sequence derivs-) |
---|
| 521 | ((:result seq derivs--) |
---|
| 522 | (loop (cons seq ret) derivs--)) |
---|
| 523 | (_ (values (reverse ret) derivs)))) |
---|
| 524 | (_ (values (reverse ret) derivs)))) |
---|
| 525 | (make-result (if (null? (cdr ret)) |
---|
| 526 | (car ret) |
---|
| 527 | (cons :expression ret)) |
---|
| 528 | derivs))))) |
---|
| 529 | |
---|
| 530 | (define (parse-definition derivs) |
---|
| 531 | (match (deriv-identifier derivs) |
---|
| 532 | ((:result ident derivs-) |
---|
| 533 | (match (deriv-leftarrow derivs-) |
---|
| 534 | ((:result larr derivs--) |
---|
| 535 | (match (deriv-expression derivs--) |
---|
| 536 | ((:result expr derivs---) |
---|
| 537 | (make-result `(:definition ,ident ,expr) derivs---)) |
---|
| 538 | (_ nil))) |
---|
| 539 | (_ nil))) |
---|
| 540 | (_ nil))) |
---|
| 541 | |
---|
| 542 | ;;; |
---|
| 543 | ;;; AST translation to PEG |
---|
| 544 | ;;; |
---|
| 545 | ;;; ref. WiLiKi:Rui:ParsingExpressionGrammar |
---|
| 546 | ;;; |
---|
| 547 | |
---|
| 548 | ;; |
---|
| 549 | ;; expected input |
---|
| 550 | ;; * identifier |
---|
| 551 | ;; (:identifier <symbol>) |
---|
| 552 | ;; * identifier-clause |
---|
| 553 | ;; (:identifier-clause (:identifier <symbol>) :capture <symbol>) |
---|
| 554 | ;; * char(literal) |
---|
| 555 | ;; (:char <char>) |
---|
| 556 | ;; * string(literal) |
---|
| 557 | ;; (:string <string>) |
---|
| 558 | ;; * char-set(class) |
---|
| 559 | ;; (:char-set <char-set>) |
---|
| 560 | ;; * expression |
---|
| 561 | ;; (:expression (:sequence (...) :callback ...) ...) |
---|
| 562 | ;; |
---|
| 563 | ;; return (values <parser> <captured-name>) |
---|
| 564 | ;; |
---|
| 565 | (define (translate-primary primary) |
---|
| 566 | (let ((tag (car primary)) |
---|
| 567 | (value (cadr primary))) |
---|
| 568 | (case tag |
---|
| 569 | ((:identifier) value) |
---|
| 570 | ((:char) `($char ,value)) |
---|
| 571 | ((:string) `($string ,value)) |
---|
| 572 | ((:char-set) `($one-of ,value)) |
---|
| 573 | ((:sequence) (translate-sequence primary)) |
---|
| 574 | ((:expression) (translate-expression primary))))) |
---|
| 575 | |
---|
| 576 | ;; |
---|
| 577 | ;; expected input |
---|
| 578 | ;; * more than zero |
---|
| 579 | ;; (:zero-more <primary>) |
---|
| 580 | ;; * more than one |
---|
| 581 | ;; (:one-more <primary>) |
---|
| 582 | ;; * primary |
---|
| 583 | ;; <primary> (see translate-primary) |
---|
| 584 | ;; |
---|
| 585 | ;; return (values <parser> <captured-name>) |
---|
| 586 | ;; |
---|
| 587 | (define (translate-suffix suffix) |
---|
| 588 | (let ((tag (car suffix)) |
---|
| 589 | (value (cadr suffix))) |
---|
| 590 | (case tag |
---|
| 591 | ((:zero-more) `($many ,(translate-primary value))) |
---|
| 592 | ((:one-more) `($many ,(translate-primary value) 1)) |
---|
| 593 | ((:optional) `($optional ,(translate-primary value))) |
---|
| 594 | (else (translate-primary suffix))))) |
---|
| 595 | |
---|
| 596 | ;; |
---|
| 597 | ;; expected input |
---|
| 598 | ;; * and(syntactic predicate) |
---|
| 599 | ;; (:and <suffix>) |
---|
| 600 | ;; * not(syntactic predicate) |
---|
| 601 | ;; (:not <suffix>) |
---|
| 602 | ;; * suffix |
---|
| 603 | ;; <suffix> (see translate-suffix) |
---|
| 604 | ;; |
---|
| 605 | ;; return (values <parser> <ident-clause?>) |
---|
| 606 | ;; |
---|
| 607 | (define (translate-prefix prefix) |
---|
| 608 | (let ((tag (car prefix)) |
---|
| 609 | (value (cadr prefix))) |
---|
| 610 | (case tag |
---|
| 611 | ((:not) (values `($not ,(translate-suffix value)) #f)) |
---|
| 612 | ((:identifier-clause) |
---|
| 613 | (values `(,(get-keyword :capture prefix) |
---|
| 614 | ,(translate-primary value)) |
---|
| 615 | #t)) |
---|
| 616 | (else (values (translate-suffix prefix) #f))))) |
---|
| 617 | |
---|
| 618 | (define (translate-sequence sequence) |
---|
| 619 | (let ((tag (car sequence)) |
---|
| 620 | (prefixes (cadr sequence)) |
---|
| 621 | (callback (get-keyword :callback sequence #f))) |
---|
| 622 | (case tag |
---|
| 623 | ((:sequence) |
---|
| 624 | (if (null? prefixes) |
---|
| 625 | `($do ($return ,callback)) |
---|
| 626 | (if |
---|
| 627 | (and (null? (cdr prefixes)) (null? callback)) |
---|
| 628 | (translate-prefix (car prefixes)) |
---|
| 629 | (let loop((items nil) (capture-names nil) (prefixes prefixes)) |
---|
| 630 | (cond |
---|
| 631 | ((null? prefixes) |
---|
| 632 | (cons |
---|
| 633 | '$do |
---|
| 634 | (reverse |
---|
| 635 | (cons |
---|
| 636 | (if (null? callback) |
---|
| 637 | `($return (list ,@(reverse capture-names))) |
---|
| 638 | `($return ,callback)) |
---|
| 639 | items)))) |
---|
| 640 | (else |
---|
| 641 | (receive (prefix ident-clause?) |
---|
| 642 | (translate-prefix (car prefixes)) |
---|
| 643 | (let1 capt (gensym) |
---|
| 644 | (loop (cons (if ident-clause? |
---|
| 645 | prefix |
---|
| 646 | `(,capt ,prefix)) |
---|
| 647 | items) |
---|
| 648 | (cons (if ident-clause? |
---|
| 649 | (car prefix) |
---|
| 650 | capt) |
---|
| 651 | capture-names) |
---|
| 652 | (cdr prefixes) |
---|
| 653 | ))))))))) |
---|
| 654 | (else |
---|
| 655 | (translate-prefix sequence))))) |
---|
| 656 | |
---|
| 657 | (define (translate-expression expression) |
---|
| 658 | (let ((tag (car expression))) |
---|
| 659 | (case tag |
---|
| 660 | ((:expression) |
---|
| 661 | (let1 sequences (cdr expression) |
---|
| 662 | (if (null? (cdr sequences)) |
---|
| 663 | (translate-sequence (car sequences)) |
---|
| 664 | `($or ,@(map translate-sequence sequences))))) |
---|
| 665 | (else |
---|
| 666 | (translate-sequence expression))))) |
---|
| 667 | |
---|
| 668 | (define (translate-definition definition) |
---|
| 669 | (let ((tag (car definition)) |
---|
| 670 | (ident (cadr (cadr definition))) |
---|
| 671 | (expression (caddr definition))) |
---|
| 672 | (case tag |
---|
| 673 | ((:definition) |
---|
| 674 | `(define ,ident ,(translate-expression expression))) |
---|
| 675 | (else (error "Translation error"))))) |
---|
| 676 | |
---|
| 677 | (define (hoge) |
---|
| 678 | (let* ((spaces ($many ($one-of #[ \t]))) |
---|
| 679 | (comma ($seq spaces ($char #\,) spaces)) |
---|
| 680 | (dquote ($char #\")) |
---|
| 681 | (double-dquote ($do (($string "\"\"")) ($return #\"))) |
---|
| 682 | (quoted-body ($many ($or double-dquote ($one-of #[^\"])))) |
---|
| 683 | (quoted ($between dquote quoted-body dquote)) |
---|
| 684 | (unquoted ($many-till anychar ($or comma newline))) |
---|
| 685 | (field ($or quoted unquoted)) |
---|
| 686 | (record ($sep-by ($->rope field) comma))) |
---|
| 687 | #?=(parse-string record "a,b,c") |
---|
| 688 | #?=(parse-string record "\"a\" , b , c") |
---|
| 689 | #?=(parse-string record "\"a \"\" \n\" , b , c")) |
---|
| 690 | |
---|
| 691 | (parse-string-with |
---|
| 692 | ((Record <- (:fld Field) (:suffix RecordSuffix) :return (cons fld suffix)) |
---|
| 693 | (RecordSuffix <- Comma (:fld Field) (:suffix RecordSuffix) |
---|
| 694 | :return (cons fld suffix) |
---|
| 695 | / :return () ) |
---|
| 696 | (Field <- (:charlist Quoted) :return (list->string charlist) |
---|
| 697 | / (:charlist UnQuoted) :return (list->string charlist)) |
---|
| 698 | (Spaces <- #[ \t] *) |
---|
| 699 | (Comma <- Spaces #\, Spaces) |
---|
| 700 | (DQuote <- #\") |
---|
| 701 | (DoubleDQuote <- #\" #\" :return #\") |
---|
| 702 | (Quoted <- DQuote |
---|
| 703 | (:body ((DoubleDQuote / #[^\"]) *)) |
---|
| 704 | DQuote |
---|
| 705 | :return body) |
---|
| 706 | (UnQuoted <- ((! (Comma / newline) (:ch anychar) :return ch) *)) |
---|
| 707 | ) |
---|
| 708 | "\"a\" , b , c") |
---|
| 709 | ) |
---|
| 710 | |
---|
| 711 | ;; (parse-string-with |
---|
| 712 | ;; ((Record <- (:fld Field) |
---|
| 713 | ;; (:rest-fld ((Comma (:fld Field) :return fld) *)) |
---|
| 714 | ;; :return (cons fld rest-fld)) |
---|
| 715 | ;; (Field <- (:charlist Quoted) :return (list->string charlist) |
---|
| 716 | ;; / (:charlist UnQuoted) :return (list->string charlist)) |
---|
| 717 | ;; (Spaces <- #[ \t] *) |
---|
| 718 | ;; (Comma <- Spaces #\, Spaces) |
---|
| 719 | ;; (DQuote <- #\") |
---|
| 720 | ;; (Quoted <- DQuote |
---|
| 721 | ;; (:body ((#\" #\" :return #\" / #[^\"]) *)) |
---|
| 722 | ;; DQuote |
---|
| 723 | ;; :return body) |
---|
| 724 | ;; (UnQuoted <- ! (Comma / newline) (:ch anychar) (:unq UnQuoted) |
---|
| 725 | ;; :return (cons ch unq) |
---|
| 726 | ;; / :return ()) |
---|
| 727 | ;; ) |
---|
| 728 | ;; "\"a\"\"bc\" , b , c") |
---|
| 729 | |
---|
| 730 | ;; (parse-string-with |
---|
| 731 | ;; ((A <- "a" A "a" / "a")) |
---|
| 732 | ;; "aaaaa") |
---|