| 1 | #!/usr/bin/env gosh |
|---|
| 2 | ;; -*- coding: utf-8 mode: scheme -*- |
|---|
| 3 | |
|---|
| 4 | ;;; |
|---|
| 5 | ;;; Original PEG Grammars |
|---|
| 6 | ;;; |
|---|
| 7 | ;; Hierarchical syntax |
|---|
| 8 | ; Grammar <- Spacing Definition+ EndOfFile |
|---|
| 9 | ; Definition <- Identifier LEFTARROW Expression |
|---|
| 10 | ; |
|---|
| 11 | ; Expression <- Sequence (SLASH Sequence)* |
|---|
| 12 | ; Sequence <- Prefix* |
|---|
| 13 | ; Prefix <- (AND / NOT)? Suffix |
|---|
| 14 | ; Suffix <- Primary (QUESTION / STAR / PLUS)? |
|---|
| 15 | ; Primary <- Identifier !LEFTARROW |
|---|
| 16 | ; / OPEN Expression CLOSE |
|---|
| 17 | ; / Literal / Class / DOT |
|---|
| 18 | ; |
|---|
| 19 | ;; Lexical syntax |
|---|
| 20 | ; Identifier <- IdentStart IdentCont* Spacing |
|---|
| 21 | ; IdentStart <- [a-zA-Z_] |
|---|
| 22 | ; IdentCont <- IdentStart / [0-9] |
|---|
| 23 | ; |
|---|
| 24 | ; Literal <- ['] (!['] Char)* ['] Spacing |
|---|
| 25 | ; / ["] (!["] Char)* ["] Spacing |
|---|
| 26 | ; Class <- '[' (!']' Range)* ']' Spacing |
|---|
| 27 | ; Range <- Char '-' Char / Char |
|---|
| 28 | ; Char <- '\\' [nrt'"\[\]\\] |
|---|
| 29 | ; / '\\' [0-2][0-7][0-7] |
|---|
| 30 | ; / '\\' [0-7][0-7]? |
|---|
| 31 | ; / !'\\' . |
|---|
| 32 | ; |
|---|
| 33 | ; LEFTARROW <- '<-' Spacing |
|---|
| 34 | ; SLASH <- '/' Spacing |
|---|
| 35 | ; AND <- '&' Spacing |
|---|
| 36 | ; NOT <- '!' Spacing |
|---|
| 37 | ; QUESTION <- '?' Spacing |
|---|
| 38 | ; STAR <- '*' Spacing |
|---|
| 39 | ; PLUS <- '+' Spacing |
|---|
| 40 | ; OPEN <- '(' Spacing |
|---|
| 41 | ; CLOSE <- ')' Spacing |
|---|
| 42 | ; DOT <- '.' Spacing |
|---|
| 43 | ; |
|---|
| 44 | ; Spacing <- (Space / Comment)* |
|---|
| 45 | ; Comment <- '#' (!EndOfLine .)* EndOfLine |
|---|
| 46 | ; Space <- ' ' / '\t' / EndOfLine |
|---|
| 47 | ; EndOfLine <- '\r\n' / '\n' / '\r' |
|---|
| 48 | ; EndOfFile <- !. |
|---|
| 49 | |
|---|
| 50 | ;;; |
|---|
| 51 | ;;; S-exp PEG Grammar |
|---|
| 52 | ;;; |
|---|
| 53 | ; |
|---|
| 54 | ;; Hierarchical syntax |
|---|
| 55 | ; |
|---|
| 56 | ; Definition <- Identifier LEFTARROW Expression |
|---|
| 57 | ; |
|---|
| 58 | ; Expression <- Sequence (SLASH Sequence)* |
|---|
| 59 | ; Sequence <- Prefix* (RETURN .)? |
|---|
| 60 | ; Prefix <- IdentifierClause / (AND / NOT)? Suffix |
|---|
| 61 | ; Suffix <- Primary (QUESTION / STAR / PLUS)? |
|---|
| 62 | ; IdentifierClause <- OPEN CaptureName Primary CLOSE |
|---|
| 63 | ; Primary <- Identifier !LEFTARROW |
|---|
| 64 | ; / OPEN Expression CLOSE |
|---|
| 65 | ; / Literal / Class / ANY |
|---|
| 66 | ; |
|---|
| 67 | ;; Lexical syntax |
|---|
| 68 | ; |
|---|
| 69 | ; Identifier <- !NonIdentifier <scheme-symbol> |
|---|
| 70 | ; NonIdentifier <- OPEN / CLOSE |
|---|
| 71 | ; CaptureName <- <scheme-keyword> |
|---|
| 72 | ; Literal <- <scheme-string> / <scheme-char> |
|---|
| 73 | ; Class <- <scheme-char-set> |
|---|
| 74 | ; |
|---|
| 75 | ; OPEN <- |<| |
|---|
| 76 | ; CLOSE <- |>| |
|---|
| 77 | ; LEFTARROW <- |<-| |
|---|
| 78 | ; SLASH <- |/| |
|---|
| 79 | ; AND <- |&| |
|---|
| 80 | ; NOT <- |!| |
|---|
| 81 | ; QUESTION <- |?| |
|---|
| 82 | ; STAR <- |*| |
|---|
| 83 | ; PLUS <- |+| |
|---|
| 84 | ; RETURN <- :return |
|---|
| 85 | ; ANY <- |%any%| |
|---|
| 86 | |
|---|
| 87 | ; (define-parser math-exp |
|---|
| 88 | ; (additive <- (multitive left-val) #\+ (additive right-val) |
|---|
| 89 | ; &return (+ left-val right-val) |
|---|
| 90 | ; / multitive) |
|---|
| 91 | ; (multitive <- (primary left-val) #\* (multitive right-val) |
|---|
| 92 | ; &return (* left-val right-val) |
|---|
| 93 | ; / primary) |
|---|
| 94 | ; (primary <- #\( (additive val) #\) &return val |
|---|
| 95 | ; / decimal) |
|---|
| 96 | ; (decimal <- (#[0-9] num-char) |
|---|
| 97 | ; &return (- (char->integer num-char) (char->integer #\0)))) |
|---|
| 98 | |
|---|
| 99 | (use peg) |
|---|
| 100 | (use util.match) |
|---|
| 101 | |
|---|
| 102 | (define (main args) |
|---|
| 103 | 0) |
|---|
| 104 | |
|---|
| 105 | (define nil ()) |
|---|
| 106 | |
|---|
| 107 | (define-class <derivs> () |
|---|
| 108 | (; Hierarchical tokens (non-terminals) |
|---|
| 109 | (grammar :init-keyword :grammar) |
|---|
| 110 | (definition :init-keyword :definition) |
|---|
| 111 | (expression :init-keyword :expression) |
|---|
| 112 | (sequence :init-keyword :sequence) |
|---|
| 113 | (prefix :init-keyword :prefix) |
|---|
| 114 | (suffix :init-keyword :suffix) |
|---|
| 115 | (identifier-clause :init-keyword :identifier-clause) |
|---|
| 116 | (primary :init-keyword :primary) |
|---|
| 117 | |
|---|
| 118 | ; Lexical tokens (terminals) |
|---|
| 119 | (identifier :init-keyword :identifier) |
|---|
| 120 | (non-identifier :init-keyword :non-identifier) |
|---|
| 121 | (capture-name :init-keyword :capture-name) |
|---|
| 122 | (literal :init-keyword :literal) |
|---|
| 123 | (class :init-keyword :class) |
|---|
| 124 | |
|---|
| 125 | (open :init-keyword :open) |
|---|
| 126 | (close :init-keyword :close) |
|---|
| 127 | (leftarrow :init-keyword :leftarrow) |
|---|
| 128 | (return :init-keyword :return) |
|---|
| 129 | (any :init-keyword :any) |
|---|
| 130 | (plus :init-keyword :plus) |
|---|
| 131 | (star :init-keyword :star) |
|---|
| 132 | (question :init-keyword :question) |
|---|
| 133 | (not :init-keyword :not) |
|---|
| 134 | (and :init-keyword :and) |
|---|
| 135 | (slash :init-keyword :slash) |
|---|
| 136 | |
|---|
| 137 | ; Raw input |
|---|
| 138 | (token :init-keyword :token :getter deriv-token) |
|---|
| 139 | )) |
|---|
| 140 | |
|---|
| 141 | (define-macro (define-deriv name) |
|---|
| 142 | `(define-method ,(string->symbol |
|---|
| 143 | (string-append "deriv-" (symbol->string name))) |
|---|
| 144 | ((derivs <derivs>)) |
|---|
| 145 | (let1 slt (slot-ref derivs ',name) |
|---|
| 146 | (if (promise? slt) |
|---|
| 147 | (let1 forced (force slt) |
|---|
| 148 | (slot-set! derivs ',name forced) |
|---|
| 149 | forced) |
|---|
| 150 | slt)))) |
|---|
| 151 | |
|---|
| 152 | (define-macro (define-derivs . names) |
|---|
| 153 | `(begin |
|---|
| 154 | ,@(map |
|---|
| 155 | (lambda (deriv-name) |
|---|
| 156 | `(define-deriv ,deriv-name)) |
|---|
| 157 | names))) |
|---|
| 158 | |
|---|
| 159 | (define-derivs ; define generic function *-deriv |
|---|
| 160 | ; for hierarchical syntax |
|---|
| 161 | grammar definition expression sequence prefix suffix primary identifier-clause |
|---|
| 162 | ; for lexical tokens |
|---|
| 163 | identifier non-identifier capture-name literal class |
|---|
| 164 | open close leftarrow slash and not question star plus return any) |
|---|
| 165 | |
|---|
| 166 | (define (make-result val chars) |
|---|
| 167 | `(:result ,val ,chars)) |
|---|
| 168 | |
|---|
| 169 | (define (parse tokens) |
|---|
| 170 | (let loop((ret-deriv nil) (tokens (cons nil (reverse tokens)))) |
|---|
| 171 | (if (null? tokens) |
|---|
| 172 | ret-deriv |
|---|
| 173 | (loop |
|---|
| 174 | (letrec |
|---|
| 175 | ((deriv |
|---|
| 176 | (make <derivs> |
|---|
| 177 | ; code for generating code |
|---|
| 178 | ; (for-each (lambda (ident) (format #t ":~a (lazy (parse-~a deriv))\n" ident ident)) '(leftarrow slash and not question star plus any)) |
|---|
| 179 | |
|---|
| 180 | ; hierarychical syntax |
|---|
| 181 | :grammar (lazy (parse-grammar deriv)) |
|---|
| 182 | :definition (lazy (parse-definition deriv)) |
|---|
| 183 | :expression (lazy (parse-expression deriv)) |
|---|
| 184 | :sequence (lazy (parse-sequence deriv)) |
|---|
| 185 | :prefix (lazy (parse-prefix deriv)) |
|---|
| 186 | :suffix (lazy (parse-suffix deriv)) |
|---|
| 187 | :identifier-clause (lazy (parse-identifier-clause deriv)) |
|---|
| 188 | :primary (lazy (parse-primary deriv)) |
|---|
| 189 | |
|---|
| 190 | ; lexical syntax |
|---|
| 191 | :identifier (lazy (parse-identifier deriv)) |
|---|
| 192 | :non-identifier (lazy (parse-non-identifier deriv)) |
|---|
| 193 | :capture-name (lazy (parse-capture-name deriv)) |
|---|
| 194 | :literal (lazy (parse-literal deriv)) |
|---|
| 195 | :class (lazy (parse-class deriv)) |
|---|
| 196 | |
|---|
| 197 | :open (lazy (parse-open deriv)) |
|---|
| 198 | :close (lazy (parse-close deriv)) |
|---|
| 199 | :leftarrow (lazy (parse-leftarrow deriv)) |
|---|
| 200 | :slash (lazy (parse-slash deriv)) |
|---|
| 201 | :and (lazy (parse-and deriv)) |
|---|
| 202 | :not (lazy (parse-not deriv)) |
|---|
| 203 | :question (lazy (parse-question deriv)) |
|---|
| 204 | :star (lazy (parse-star deriv)) |
|---|
| 205 | :plus (lazy (parse-plus deriv)) |
|---|
| 206 | :return (lazy (parse-return deriv)) |
|---|
| 207 | :any (lazy (parse-any deriv)) |
|---|
| 208 | |
|---|
| 209 | ;raw input |
|---|
| 210 | :token (if (null? tokens) |
|---|
| 211 | nil |
|---|
| 212 | (make-result (car tokens) ret-deriv))))) |
|---|
| 213 | deriv) |
|---|
| 214 | (cdr tokens))))) |
|---|
| 215 | |
|---|
| 216 | (define (parse-grammars pegs) |
|---|
| 217 | (map |
|---|
| 218 | (lambda (gram) |
|---|
| 219 | (let1 gram (flatten-grammar gram) |
|---|
| 220 | (let* ((ret (deriv-definition (parse gram))) |
|---|
| 221 | (derivs (caddr (deriv-token (caddr ret))))) |
|---|
| 222 | (if (not (null? derivs)) |
|---|
| 223 | (error (format #f "Syntax error: ~a" gram)) |
|---|
| 224 | (cadr ret))))) |
|---|
| 225 | pegs)) |
|---|
| 226 | |
|---|
| 227 | |
|---|
| 228 | ;;; |
|---|
| 229 | ;;; Preprocessor of grammar input |
|---|
| 230 | ;;; |
|---|
| 231 | ;;; input: |
|---|
| 232 | ;;; '(foo -> (e1 bar) (e2 baz) :return (do 'sth) / (e3 hoge) :return (do 'anth)) |
|---|
| 233 | ;;; |
|---|
| 234 | ;;; output: |
|---|
| 235 | ;;; '(foo -> < e1 bar > < e2 baz > :return (do 'sth) / < e3 hoge > :return (do 'anth)) |
|---|
| 236 | ;;; |
|---|
| 237 | (define (flatten-grammar gram) |
|---|
| 238 | (let loop((ret nil) (gram (reverse gram)) (expression? #f)) |
|---|
| 239 | (if (null? gram) |
|---|
| 240 | ret |
|---|
| 241 | (loop |
|---|
| 242 | (if (and expression? |
|---|
| 243 | (list? (car gram))) |
|---|
| 244 | (append `(< ,@(car gram) >) ret) |
|---|
| 245 | (cons (car gram) ret)) |
|---|
| 246 | (cdr gram) |
|---|
| 247 | (cond |
|---|
| 248 | ((eq? '/ (car gram)) #f) |
|---|
| 249 | ((eq? :return (car gram)) #t) |
|---|
| 250 | (else expression?)))))) |
|---|
| 251 | |
|---|
| 252 | (define (flatten-grammar gram) |
|---|
| 253 | (concat-with |
|---|
| 254 | '/ |
|---|
| 255 | (map |
|---|
| 256 | (lambda (sequence) |
|---|
| 257 | (concat-with |
|---|
| 258 | :return |
|---|
| 259 | (let1 seq-pair (separate (cut eq? :return <>) sequence) |
|---|
| 260 | `(,(apply append |
|---|
| 261 | (map (lambda (elem) |
|---|
| 262 | (if (pair? elem) |
|---|
| 263 | `(< ,@(flatten-grammar elem) >) |
|---|
| 264 | `(,elem))) |
|---|
| 265 | (car seq-pair))) |
|---|
| 266 | ,@(cdr seq-pair))))) |
|---|
| 267 | (separate (cut eq? '/ <>) gram)))) |
|---|
| 268 | |
|---|
| 269 | (define (separate separator? ls) |
|---|
| 270 | (let loop1((ret ()) (lst ls)) |
|---|
| 271 | (if (null? lst) |
|---|
| 272 | (if (separator? (car (reverse ls))) |
|---|
| 273 | (reverse (cons () ret)) |
|---|
| 274 | (reverse ret)) |
|---|
| 275 | (receive (head tails) |
|---|
| 276 | (let loop2((ret ()) (ls lst)) |
|---|
| 277 | (cond |
|---|
| 278 | ((null? ls) (values (reverse ret) ls)) |
|---|
| 279 | ((separator? (car ls)) |
|---|
| 280 | (values (reverse ret) (cdr ls))) |
|---|
| 281 | (else |
|---|
| 282 | (loop2 (cons (car ls) ret) (cdr ls))))) |
|---|
| 283 | (loop1 (cons head ret) |
|---|
| 284 | tails))))) |
|---|
| 285 | |
|---|
| 286 | (define (concat-with joiner ls) |
|---|
| 287 | (let loop((ret (car (reverse ls))) (ls (cdr (reverse ls)))) |
|---|
| 288 | (if (null? ls) |
|---|
| 289 | ret |
|---|
| 290 | (loop `(,@(car ls) ,joiner ,@ret) (cdr ls))))) |
|---|
| 291 | |
|---|
| 292 | (define-macro (parse-string-with grammars str) |
|---|
| 293 | `(let () |
|---|
| 294 | ,@(map translate-definition |
|---|
| 295 | (parse-grammars grammars)) |
|---|
| 296 | (parse-string ,(caar grammars) ,str))) |
|---|
| 297 | |
|---|
| 298 | |
|---|
| 299 | (define-macro (define-parse-symbol ident sym) |
|---|
| 300 | `(define (,(string->symbol (string-append "parse-" (symbol->string ident))) derivs) |
|---|
| 301 | (match (deriv-token derivs) |
|---|
| 302 | ((:result ',sym derivs-) (make-result ',sym derivs-)) |
|---|
| 303 | (_ nil)))) |
|---|
| 304 | |
|---|
| 305 | |
|---|
| 306 | (define-parse-symbol open <) |
|---|
| 307 | (define-parse-symbol close >) |
|---|
| 308 | (define-parse-symbol leftarrow <-) |
|---|
| 309 | (define-parse-symbol slash /) |
|---|
| 310 | (define-parse-symbol and &) |
|---|
| 311 | (define-parse-symbol not !) |
|---|
| 312 | (define-parse-symbol question ?) |
|---|
| 313 | (define-parse-symbol star *) |
|---|
| 314 | (define-parse-symbol plus +) |
|---|
| 315 | (define-parse-symbol return :return) |
|---|
| 316 | (define-parse-symbol any %any%) |
|---|
| 317 | |
|---|
| 318 | (define (parse-class derivs) |
|---|
| 319 | (match (deriv-token derivs) |
|---|
| 320 | ((:result val derivs-) |
|---|
| 321 | (if (char-set? val) |
|---|
| 322 | (make-result `(:char-set ,val) derivs-) |
|---|
| 323 | nil)) |
|---|
| 324 | (_ nil))) |
|---|
| 325 | |
|---|
| 326 | (define (parse-literal derivs) |
|---|
| 327 | (match (deriv-token derivs) |
|---|
| 328 | ((:result val derivs-) |
|---|
| 329 | (cond |
|---|
| 330 | ((string? val) (make-result `(:string ,val) derivs-)) |
|---|
| 331 | ((char? val) (make-result `(:char ,val) derivs-)) |
|---|
| 332 | (else nil))) |
|---|
| 333 | (_ nil))) |
|---|
| 334 | |
|---|
| 335 | (define (parse-capture-name derivs) |
|---|
| 336 | (match (deriv-token derivs) |
|---|
| 337 | ((:result val derivs-) |
|---|
| 338 | (if (keyword? val) |
|---|
| 339 | (make-result val derivs-) |
|---|
| 340 | nil)) |
|---|
| 341 | (_ nil))) |
|---|
| 342 | |
|---|
| 343 | (define (parse-non-identifier derivs) |
|---|
| 344 | (let* ((alt3 |
|---|
| 345 | (lazy |
|---|
| 346 | (match (deriv-slash derivs) |
|---|
| 347 | ((:result ret derivs-) |
|---|
| 348 | (make-result '/ derivs-)) |
|---|
| 349 | (_ nil)))) |
|---|
| 350 | (alt2 |
|---|
| 351 | (lazy |
|---|
| 352 | (match (deriv-close derivs) |
|---|
| 353 | ((:result ret derivs-) |
|---|
| 354 | (make-result '> derivs-)) |
|---|
| 355 | (_ alt3)))) |
|---|
| 356 | (alt1 |
|---|
| 357 | (lazy |
|---|
| 358 | (match (deriv-open derivs) |
|---|
| 359 | ((:result ret derivs-) |
|---|
| 360 | (make-result '< derivs-)) |
|---|
| 361 | (_ alt2))))) |
|---|
| 362 | (force alt1))) |
|---|
| 363 | |
|---|
| 364 | (define (parse-identifier derivs) |
|---|
| 365 | (match (deriv-non-identifier derivs) |
|---|
| 366 | (() |
|---|
| 367 | (match (deriv-token derivs) |
|---|
| 368 | ((:result val derivs-) |
|---|
| 369 | (if (symbol? val) |
|---|
| 370 | (make-result `(:identifier ,val) derivs-) |
|---|
| 371 | nil)) |
|---|
| 372 | (_ nil))) |
|---|
| 373 | (_ nil))) |
|---|
| 374 | |
|---|
| 375 | (define (parse-primary derivs) |
|---|
| 376 | (let* ((alt6 |
|---|
| 377 | (lazy |
|---|
| 378 | (match (deriv-any derivs) |
|---|
| 379 | ((:result val derivs-) |
|---|
| 380 | (make-result val derivs-)) |
|---|
| 381 | (_ nil)))) |
|---|
| 382 | (alt5 |
|---|
| 383 | (lazy |
|---|
| 384 | (match (deriv-class derivs) |
|---|
| 385 | ((:result val derivs-) |
|---|
| 386 | (make-result val derivs-)) |
|---|
| 387 | (_ alt6)))) |
|---|
| 388 | (alt4 |
|---|
| 389 | (lazy |
|---|
| 390 | (match (deriv-literal derivs) |
|---|
| 391 | ((:result val derivs-) |
|---|
| 392 | (make-result val derivs-)) |
|---|
| 393 | (_ alt5)))) |
|---|
| 394 | (alt3 |
|---|
| 395 | (lazy |
|---|
| 396 | (match (deriv-open derivs) |
|---|
| 397 | ((:result val derivs-) |
|---|
| 398 | (match (deriv-expression derivs-) |
|---|
| 399 | ((:result exp derivs--) |
|---|
| 400 | (match (deriv-close derivs--) |
|---|
| 401 | ((:result val derivs---) |
|---|
| 402 | (make-result exp derivs---)) |
|---|
| 403 | (_ alt4))) |
|---|
| 404 | (_ alt4))) |
|---|
| 405 | (_ alt4)))) |
|---|
| 406 | (alt1 |
|---|
| 407 | (lazy |
|---|
| 408 | (match (deriv-identifier derivs) |
|---|
| 409 | ((:result ident derivs-) |
|---|
| 410 | (match (deriv-leftarrow derivs-) |
|---|
| 411 | ((:result val derivs--) alt3) |
|---|
| 412 | (_ (make-result ident derivs-)))) |
|---|
| 413 | (_ alt3))))) |
|---|
| 414 | (force alt1))) |
|---|
| 415 | |
|---|
| 416 | (define (parse-identifier-clause derivs) |
|---|
| 417 | (let* ((alt1 |
|---|
| 418 | (lazy |
|---|
| 419 | (match (deriv-open derivs) |
|---|
| 420 | ((:result opn derivs-) |
|---|
| 421 | (match (deriv-capture-name derivs-) |
|---|
| 422 | ((:result capt derivs--) |
|---|
| 423 | (match (deriv-primary derivs--) |
|---|
| 424 | ((:result primary derivs---) |
|---|
| 425 | (match (deriv-close derivs---) |
|---|
| 426 | ((:result cls derivs----) |
|---|
| 427 | (make-result `(:identifier-clause |
|---|
| 428 | ,primary |
|---|
| 429 | :capture |
|---|
| 430 | ,(string->symbol (x->string capt))) |
|---|
| 431 | derivs----)) |
|---|
| 432 | (_ nil))) |
|---|
| 433 | (_ nil))) |
|---|
| 434 | (_ nil))) |
|---|
| 435 | (_ nil))))) |
|---|
| 436 | (force alt1))) |
|---|
| 437 | |
|---|
| 438 | (define (parse-suffix derivs) |
|---|
| 439 | (match (deriv-primary derivs) |
|---|
| 440 | ((:result prim derivs-) |
|---|
| 441 | (let* ((alt3 |
|---|
| 442 | (lazy |
|---|
| 443 | (match (deriv-plus derivs-) |
|---|
| 444 | ((:result plus derivs--) |
|---|
| 445 | (make-result `(:one-more ,prim) derivs--)) |
|---|
| 446 | (_ (make-result prim derivs-))))) |
|---|
| 447 | (alt2 |
|---|
| 448 | (lazy |
|---|
| 449 | (match (deriv-star derivs-) |
|---|
| 450 | ((:result star derivs--) |
|---|
| 451 | (make-result `(:zero-more ,prim) derivs--)) |
|---|
| 452 | (_ alt3)))) |
|---|
| 453 | (alt1 |
|---|
| 454 | (lazy |
|---|
| 455 | (match (deriv-question derivs-) |
|---|
| 456 | ((:result qstn derivs--) |
|---|
| 457 | (make-result `(:optional ,prim) derivs--)) |
|---|
| 458 | (_ alt2))))) |
|---|
| 459 | (force alt1))) |
|---|
| 460 | (_ nil))) |
|---|
| 461 | |
|---|
| 462 | (define (parse-prefix derivs) |
|---|
| 463 | (let* ((glob-alt2 |
|---|
| 464 | (lazy |
|---|
| 465 | (let* ((alt2 |
|---|
| 466 | (lazy |
|---|
| 467 | (match (deriv-not derivs) |
|---|
| 468 | ((:result val derivs-) |
|---|
| 469 | (values :not derivs-)) |
|---|
| 470 | (_ (values nil derivs))))) |
|---|
| 471 | (alt1 |
|---|
| 472 | (lazy |
|---|
| 473 | (match (deriv-and derivs) |
|---|
| 474 | ((:result val derivs-) |
|---|
| 475 | (values :and derivs-)) |
|---|
| 476 | (_ alt2))))) |
|---|
| 477 | (receive (val derivs-) (force alt1) |
|---|
| 478 | (match (deriv-suffix derivs-) |
|---|
| 479 | ((:result suffix derivs--) |
|---|
| 480 | (if (null? val) |
|---|
| 481 | (make-result suffix derivs--) |
|---|
| 482 | (make-result `(,val ,suffix) derivs--))) |
|---|
| 483 | (_ nil)))))) |
|---|
| 484 | (glob-alt1 |
|---|
| 485 | (lazy |
|---|
| 486 | (match (deriv-identifier-clause derivs) |
|---|
| 487 | ((:result ident-clause derivs-) |
|---|
| 488 | (make-result ident-clause derivs-)) |
|---|
| 489 | (_ glob-alt2))))) |
|---|
| 490 | (force glob-alt1))) |
|---|
| 491 | |
|---|
| 492 | (define (parse-sequence derivs) |
|---|
| 493 | (receive (seqs derivs) |
|---|
| 494 | (let loop((ret ()) (derivs derivs)) |
|---|
| 495 | (match (deriv-prefix derivs) |
|---|
| 496 | ((:result val derivs-) |
|---|
| 497 | (loop (cons val ret) derivs-)) |
|---|
| 498 | (_ (values (reverse ret) derivs)))) |
|---|
| 499 | (let* ((alt2 |
|---|
| 500 | (lazy |
|---|
| 501 | (make-result `(:sequence ,seqs :callback ()) derivs))) |
|---|
| 502 | (alt1 |
|---|
| 503 | (lazy |
|---|
| 504 | (match (deriv-return derivs) |
|---|
| 505 | ((:result val derivs-) |
|---|
| 506 | (match (deriv-token derivs-) |
|---|
| 507 | ((:result callback derivs--) |
|---|
| 508 | (make-result `(:sequence ,seqs :callback ,callback) derivs--)) |
|---|
| 509 | (_ alt2))) |
|---|
| 510 | (_ alt2))))) |
|---|
| 511 | (force alt1)))) |
|---|
| 512 | |
|---|
| 513 | (define (parse-expression derivs) |
|---|
| 514 | (match (deriv-sequence derivs) |
|---|
| 515 | ((:result seq derivs-) |
|---|
| 516 | (receive (ret derivs) |
|---|
| 517 | (let loop((ret `(,seq)) (derivs derivs-)) |
|---|
| 518 | (match (deriv-slash derivs) |
|---|
| 519 | ((:result sla derivs-) |
|---|
| 520 | (match (deriv-sequence derivs-) |
|---|
| 521 | ((:result seq derivs--) |
|---|
| 522 | (loop (cons seq ret) derivs--)) |
|---|
| 523 | (_ (values (reverse ret) derivs)))) |
|---|
| 524 | (_ (values (reverse ret) derivs)))) |
|---|
| 525 | (make-result (if (null? (cdr ret)) |
|---|
| 526 | (car ret) |
|---|
| 527 | (cons :expression ret)) |
|---|
| 528 | derivs))))) |
|---|
| 529 | |
|---|
| 530 | (define (parse-definition derivs) |
|---|
| 531 | (match (deriv-identifier derivs) |
|---|
| 532 | ((:result ident derivs-) |
|---|
| 533 | (match (deriv-leftarrow derivs-) |
|---|
| 534 | ((:result larr derivs--) |
|---|
| 535 | (match (deriv-expression derivs--) |
|---|
| 536 | ((:result expr derivs---) |
|---|
| 537 | (make-result `(:definition ,ident ,expr) derivs---)) |
|---|
| 538 | (_ nil))) |
|---|
| 539 | (_ nil))) |
|---|
| 540 | (_ nil))) |
|---|
| 541 | |
|---|
| 542 | ;;; |
|---|
| 543 | ;;; AST translation to PEG |
|---|
| 544 | ;;; |
|---|
| 545 | ;;; ref. WiLiKi:Rui:ParsingExpressionGrammar |
|---|
| 546 | ;;; |
|---|
| 547 | |
|---|
| 548 | ;; |
|---|
| 549 | ;; expected input |
|---|
| 550 | ;; * identifier |
|---|
| 551 | ;; (:identifier <symbol>) |
|---|
| 552 | ;; * identifier-clause |
|---|
| 553 | ;; (:identifier-clause (:identifier <symbol>) :capture <symbol>) |
|---|
| 554 | ;; * char(literal) |
|---|
| 555 | ;; (:char <char>) |
|---|
| 556 | ;; * string(literal) |
|---|
| 557 | ;; (:string <string>) |
|---|
| 558 | ;; * char-set(class) |
|---|
| 559 | ;; (:char-set <char-set>) |
|---|
| 560 | ;; * expression |
|---|
| 561 | ;; (:expression (:sequence (...) :callback ...) ...) |
|---|
| 562 | ;; |
|---|
| 563 | ;; return (values <parser> <captured-name>) |
|---|
| 564 | ;; |
|---|
| 565 | (define (translate-primary primary) |
|---|
| 566 | (let ((tag (car primary)) |
|---|
| 567 | (value (cadr primary))) |
|---|
| 568 | (case tag |
|---|
| 569 | ((:identifier) value) |
|---|
| 570 | ((:char) `($char ,value)) |
|---|
| 571 | ((:string) `($string ,value)) |
|---|
| 572 | ((:char-set) `($one-of ,value)) |
|---|
| 573 | ((:sequence) (translate-sequence primary)) |
|---|
| 574 | ((:expression) (translate-expression primary))))) |
|---|
| 575 | |
|---|
| 576 | ;; |
|---|
| 577 | ;; expected input |
|---|
| 578 | ;; * more than zero |
|---|
| 579 | ;; (:zero-more <primary>) |
|---|
| 580 | ;; * more than one |
|---|
| 581 | ;; (:one-more <primary>) |
|---|
| 582 | ;; * primary |
|---|
| 583 | ;; <primary> (see translate-primary) |
|---|
| 584 | ;; |
|---|
| 585 | ;; return (values <parser> <captured-name>) |
|---|
| 586 | ;; |
|---|
| 587 | (define (translate-suffix suffix) |
|---|
| 588 | (let ((tag (car suffix)) |
|---|
| 589 | (value (cadr suffix))) |
|---|
| 590 | (case tag |
|---|
| 591 | ((:zero-more) `($many ,(translate-primary value))) |
|---|
| 592 | ((:one-more) `($many ,(translate-primary value) 1)) |
|---|
| 593 | ((:optional) `($optional ,(translate-primary value))) |
|---|
| 594 | (else (translate-primary suffix))))) |
|---|
| 595 | |
|---|
| 596 | ;; |
|---|
| 597 | ;; expected input |
|---|
| 598 | ;; * and(syntactic predicate) |
|---|
| 599 | ;; (:and <suffix>) |
|---|
| 600 | ;; * not(syntactic predicate) |
|---|
| 601 | ;; (:not <suffix>) |
|---|
| 602 | ;; * suffix |
|---|
| 603 | ;; <suffix> (see translate-suffix) |
|---|
| 604 | ;; |
|---|
| 605 | ;; return (values <parser> <ident-clause?>) |
|---|
| 606 | ;; |
|---|
| 607 | (define (translate-prefix prefix) |
|---|
| 608 | (let ((tag (car prefix)) |
|---|
| 609 | (value (cadr prefix))) |
|---|
| 610 | (case tag |
|---|
| 611 | ((:not) (values `($not ,(translate-suffix value)) #f)) |
|---|
| 612 | ((:identifier-clause) |
|---|
| 613 | (values `(,(get-keyword :capture prefix) |
|---|
| 614 | ,(translate-primary value)) |
|---|
| 615 | #t)) |
|---|
| 616 | (else (values (translate-suffix prefix) #f))))) |
|---|
| 617 | |
|---|
| 618 | (define (translate-sequence sequence) |
|---|
| 619 | (let ((tag (car sequence)) |
|---|
| 620 | (prefixes (cadr sequence)) |
|---|
| 621 | (callback (get-keyword :callback sequence #f))) |
|---|
| 622 | (case tag |
|---|
| 623 | ((:sequence) |
|---|
| 624 | (if (null? prefixes) |
|---|
| 625 | `($do ($return ,callback)) |
|---|
| 626 | (if |
|---|
| 627 | (and (null? (cdr prefixes)) (null? callback)) |
|---|
| 628 | (translate-prefix (car prefixes)) |
|---|
| 629 | (let loop((items nil) (capture-names nil) (prefixes prefixes)) |
|---|
| 630 | (cond |
|---|
| 631 | ((null? prefixes) |
|---|
| 632 | (cons |
|---|
| 633 | '$do |
|---|
| 634 | (reverse |
|---|
| 635 | (cons |
|---|
| 636 | (if (null? callback) |
|---|
| 637 | `($return (list ,@(reverse capture-names))) |
|---|
| 638 | `($return ,callback)) |
|---|
| 639 | items)))) |
|---|
| 640 | (else |
|---|
| 641 | (receive (prefix ident-clause?) |
|---|
| 642 | (translate-prefix (car prefixes)) |
|---|
| 643 | (let1 capt (gensym) |
|---|
| 644 | (loop (cons (if ident-clause? |
|---|
| 645 | prefix |
|---|
| 646 | `(,capt ,prefix)) |
|---|
| 647 | items) |
|---|
| 648 | (cons (if ident-clause? |
|---|
| 649 | (car prefix) |
|---|
| 650 | capt) |
|---|
| 651 | capture-names) |
|---|
| 652 | (cdr prefixes) |
|---|
| 653 | ))))))))) |
|---|
| 654 | (else |
|---|
| 655 | (translate-prefix sequence))))) |
|---|
| 656 | |
|---|
| 657 | (define (translate-expression expression) |
|---|
| 658 | (let ((tag (car expression))) |
|---|
| 659 | (case tag |
|---|
| 660 | ((:expression) |
|---|
| 661 | (let1 sequences (cdr expression) |
|---|
| 662 | (if (null? (cdr sequences)) |
|---|
| 663 | (translate-sequence (car sequences)) |
|---|
| 664 | `($or ,@(map translate-sequence sequences))))) |
|---|
| 665 | (else |
|---|
| 666 | (translate-sequence expression))))) |
|---|
| 667 | |
|---|
| 668 | (define (translate-definition definition) |
|---|
| 669 | (let ((tag (car definition)) |
|---|
| 670 | (ident (cadr (cadr definition))) |
|---|
| 671 | (expression (caddr definition))) |
|---|
| 672 | (case tag |
|---|
| 673 | ((:definition) |
|---|
| 674 | `(define ,ident ,(translate-expression expression))) |
|---|
| 675 | (else (error "Translation error"))))) |
|---|
| 676 | |
|---|
| 677 | (define (hoge) |
|---|
| 678 | (let* ((spaces ($many ($one-of #[ \t]))) |
|---|
| 679 | (comma ($seq spaces ($char #\,) spaces)) |
|---|
| 680 | (dquote ($char #\")) |
|---|
| 681 | (double-dquote ($do (($string "\"\"")) ($return #\"))) |
|---|
| 682 | (quoted-body ($many ($or double-dquote ($one-of #[^\"])))) |
|---|
| 683 | (quoted ($between dquote quoted-body dquote)) |
|---|
| 684 | (unquoted ($many-till anychar ($or comma newline))) |
|---|
| 685 | (field ($or quoted unquoted)) |
|---|
| 686 | (record ($sep-by ($->rope field) comma))) |
|---|
| 687 | #?=(parse-string record "a,b,c") |
|---|
| 688 | #?=(parse-string record "\"a\" , b , c") |
|---|
| 689 | #?=(parse-string record "\"a \"\" \n\" , b , c")) |
|---|
| 690 | |
|---|
| 691 | (parse-string-with |
|---|
| 692 | ((Record <- (:fld Field) (:suffix RecordSuffix) :return (cons fld suffix)) |
|---|
| 693 | (RecordSuffix <- Comma (:fld Field) (:suffix RecordSuffix) |
|---|
| 694 | :return (cons fld suffix) |
|---|
| 695 | / :return () ) |
|---|
| 696 | (Field <- (:charlist Quoted) :return (list->string charlist) |
|---|
| 697 | / (:charlist UnQuoted) :return (list->string charlist)) |
|---|
| 698 | (Spaces <- #[ \t] *) |
|---|
| 699 | (Comma <- Spaces #\, Spaces) |
|---|
| 700 | (DQuote <- #\") |
|---|
| 701 | (DoubleDQuote <- #\" #\" :return #\") |
|---|
| 702 | (Quoted <- DQuote |
|---|
| 703 | (:body ((DoubleDQuote / #[^\"]) *)) |
|---|
| 704 | DQuote |
|---|
| 705 | :return body) |
|---|
| 706 | (UnQuoted <- ((! (Comma / newline) (:ch anychar) :return ch) *)) |
|---|
| 707 | ) |
|---|
| 708 | "\"a\" , b , c") |
|---|
| 709 | ) |
|---|
| 710 | |
|---|
| 711 | ;; (parse-string-with |
|---|
| 712 | ;; ((Record <- (:fld Field) |
|---|
| 713 | ;; (:rest-fld ((Comma (:fld Field) :return fld) *)) |
|---|
| 714 | ;; :return (cons fld rest-fld)) |
|---|
| 715 | ;; (Field <- (:charlist Quoted) :return (list->string charlist) |
|---|
| 716 | ;; / (:charlist UnQuoted) :return (list->string charlist)) |
|---|
| 717 | ;; (Spaces <- #[ \t] *) |
|---|
| 718 | ;; (Comma <- Spaces #\, Spaces) |
|---|
| 719 | ;; (DQuote <- #\") |
|---|
| 720 | ;; (Quoted <- DQuote |
|---|
| 721 | ;; (:body ((#\" #\" :return #\" / #[^\"]) *)) |
|---|
| 722 | ;; DQuote |
|---|
| 723 | ;; :return body) |
|---|
| 724 | ;; (UnQuoted <- ! (Comma / newline) (:ch anychar) (:unq UnQuoted) |
|---|
| 725 | ;; :return (cons ch unq) |
|---|
| 726 | ;; / :return ()) |
|---|
| 727 | ;; ) |
|---|
| 728 | ;; "\"a\"\"bc\" , b , c") |
|---|
| 729 | |
|---|
| 730 | ;; (parse-string-with |
|---|
| 731 | ;; ((A <- "a" A "a" / "a")) |
|---|
| 732 | ;; "aaaaa") |
|---|