#!/usr/bin/env gosh ;; -*- coding: utf-8 mode: scheme -*- ;;; ;;; Original PEG Grammars ;;; ;; Hierarchical syntax ; Grammar <- Spacing Definition+ EndOfFile ; Definition <- Identifier LEFTARROW Expression ; ; Expression <- Sequence (SLASH Sequence)* ; Sequence <- Prefix* ; Prefix <- (AND / NOT)? Suffix ; Suffix <- Primary (QUESTION / STAR / PLUS)? ; Primary <- Identifier !LEFTARROW ; / OPEN Expression CLOSE ; / Literal / Class / DOT ; ;; Lexical syntax ; Identifier <- IdentStart IdentCont* Spacing ; IdentStart <- [a-zA-Z_] ; IdentCont <- IdentStart / [0-9] ; ; Literal <- ['] (!['] Char)* ['] Spacing ; / ["] (!["] Char)* ["] Spacing ; Class <- '[' (!']' Range)* ']' Spacing ; Range <- Char '-' Char / Char ; Char <- '\\' [nrt'"\[\]\\] ; / '\\' [0-2][0-7][0-7] ; / '\\' [0-7][0-7]? ; / !'\\' . ; ; LEFTARROW <- '<-' Spacing ; SLASH <- '/' Spacing ; AND <- '&' Spacing ; NOT <- '!' Spacing ; QUESTION <- '?' Spacing ; STAR <- '*' Spacing ; PLUS <- '+' Spacing ; OPEN <- '(' Spacing ; CLOSE <- ')' Spacing ; DOT <- '.' Spacing ; ; Spacing <- (Space / Comment)* ; Comment <- '#' (!EndOfLine .)* EndOfLine ; Space <- ' ' / '\t' / EndOfLine ; EndOfLine <- '\r\n' / '\n' / '\r' ; EndOfFile <- !. ;;; ;;; S-exp PEG Grammar ;;; ; ;; Hierarchical syntax ; ; Definition <- Identifier LEFTARROW Expression ; ; Expression <- Sequence (SLASH Sequence)* ; Sequence <- Prefix* (RETURN .)? ; Prefix <- IdentifierClause / (AND / NOT)? Suffix ; Suffix <- Primary (QUESTION / STAR / PLUS)? ; IdentifierClause <- OPEN CaptureName Primary CLOSE ; Primary <- Identifier !LEFTARROW ; / OPEN Expression CLOSE ; / Literal / Class / ANY ; ;; Lexical syntax ; ; Identifier <- !NonIdentifier ; NonIdentifier <- OPEN / CLOSE ; CaptureName <- ; Literal <- / ; Class <- ; ; OPEN <- |<| ; CLOSE <- |>| ; LEFTARROW <- |<-| ; SLASH <- |/| ; AND <- |&| ; NOT <- |!| ; QUESTION <- |?| ; STAR <- |*| ; PLUS <- |+| ; RETURN <- :return ; ANY <- |%any%| ; (define-parser math-exp ; (additive <- (multitive left-val) #\+ (additive right-val) ; &return (+ left-val right-val) ; / multitive) ; (multitive <- (primary left-val) #\* (multitive right-val) ; &return (* left-val right-val) ; / primary) ; (primary <- #\( (additive val) #\) &return val ; / decimal) ; (decimal <- (#[0-9] num-char) ; &return (- (char->integer num-char) (char->integer #\0)))) (use peg) (use util.match) (define (main args) 0) (define nil ()) (define-class () (; Hierarchical tokens (non-terminals) (grammar :init-keyword :grammar) (definition :init-keyword :definition) (expression :init-keyword :expression) (sequence :init-keyword :sequence) (prefix :init-keyword :prefix) (suffix :init-keyword :suffix) (identifier-clause :init-keyword :identifier-clause) (primary :init-keyword :primary) ; Lexical tokens (terminals) (identifier :init-keyword :identifier) (non-identifier :init-keyword :non-identifier) (capture-name :init-keyword :capture-name) (literal :init-keyword :literal) (class :init-keyword :class) (open :init-keyword :open) (close :init-keyword :close) (leftarrow :init-keyword :leftarrow) (return :init-keyword :return) (any :init-keyword :any) (plus :init-keyword :plus) (star :init-keyword :star) (question :init-keyword :question) (not :init-keyword :not) (and :init-keyword :and) (slash :init-keyword :slash) ; Raw input (token :init-keyword :token :getter deriv-token) )) (define-macro (define-deriv name) `(define-method ,(string->symbol (string-append "deriv-" (symbol->string name))) ((derivs )) (let1 slt (slot-ref derivs ',name) (if (promise? slt) (let1 forced (force slt) (slot-set! derivs ',name forced) forced) slt)))) (define-macro (define-derivs . names) `(begin ,@(map (lambda (deriv-name) `(define-deriv ,deriv-name)) names))) (define-derivs ; define generic function *-deriv ; for hierarchical syntax grammar definition expression sequence prefix suffix primary identifier-clause ; for lexical tokens identifier non-identifier capture-name literal class open close leftarrow slash and not question star plus return any) (define (make-result val chars) `(:result ,val ,chars)) (define (parse tokens) (let loop((ret-deriv nil) (tokens (cons nil (reverse tokens)))) (if (null? tokens) ret-deriv (loop (letrec ((deriv (make ; code for generating code ; (for-each (lambda (ident) (format #t ":~a (lazy (parse-~a deriv))\n" ident ident)) '(leftarrow slash and not question star plus any)) ; hierarychical syntax :grammar (lazy (parse-grammar deriv)) :definition (lazy (parse-definition deriv)) :expression (lazy (parse-expression deriv)) :sequence (lazy (parse-sequence deriv)) :prefix (lazy (parse-prefix deriv)) :suffix (lazy (parse-suffix deriv)) :identifier-clause (lazy (parse-identifier-clause deriv)) :primary (lazy (parse-primary deriv)) ; lexical syntax :identifier (lazy (parse-identifier deriv)) :non-identifier (lazy (parse-non-identifier deriv)) :capture-name (lazy (parse-capture-name deriv)) :literal (lazy (parse-literal deriv)) :class (lazy (parse-class deriv)) :open (lazy (parse-open deriv)) :close (lazy (parse-close deriv)) :leftarrow (lazy (parse-leftarrow deriv)) :slash (lazy (parse-slash deriv)) :and (lazy (parse-and deriv)) :not (lazy (parse-not deriv)) :question (lazy (parse-question deriv)) :star (lazy (parse-star deriv)) :plus (lazy (parse-plus deriv)) :return (lazy (parse-return deriv)) :any (lazy (parse-any deriv)) ;raw input :token (if (null? tokens) nil (make-result (car tokens) ret-deriv))))) deriv) (cdr tokens))))) (define (parse-grammars pegs) (map (lambda (gram) (let1 gram (flatten-grammar gram) (let* ((ret (deriv-definition (parse gram))) (derivs (caddr (deriv-token (caddr ret))))) (if (not (null? derivs)) (error (format #f "Syntax error: ~a" gram)) (cadr ret))))) pegs)) ;;; ;;; Preprocessor of grammar input ;;; ;;; input: ;;; '(foo -> (e1 bar) (e2 baz) :return (do 'sth) / (e3 hoge) :return (do 'anth)) ;;; ;;; output: ;;; '(foo -> < e1 bar > < e2 baz > :return (do 'sth) / < e3 hoge > :return (do 'anth)) ;;; (define (flatten-grammar gram) (let loop((ret nil) (gram (reverse gram)) (expression? #f)) (if (null? gram) ret (loop (if (and expression? (list? (car gram))) (append `(< ,@(car gram) >) ret) (cons (car gram) ret)) (cdr gram) (cond ((eq? '/ (car gram)) #f) ((eq? :return (car gram)) #t) (else expression?)))))) (define (flatten-grammar gram) (concat-with '/ (map (lambda (sequence) (concat-with :return (let1 seq-pair (separate (cut eq? :return <>) sequence) `(,(apply append (map (lambda (elem) (if (pair? elem) `(< ,@(flatten-grammar elem) >) `(,elem))) (car seq-pair))) ,@(cdr seq-pair))))) (separate (cut eq? '/ <>) gram)))) (define (separate separator? ls) (let loop1((ret ()) (lst ls)) (if (null? lst) (if (separator? (car (reverse ls))) (reverse (cons () ret)) (reverse ret)) (receive (head tails) (let loop2((ret ()) (ls lst)) (cond ((null? ls) (values (reverse ret) ls)) ((separator? (car ls)) (values (reverse ret) (cdr ls))) (else (loop2 (cons (car ls) ret) (cdr ls))))) (loop1 (cons head ret) tails))))) (define (concat-with joiner ls) (let loop((ret (car (reverse ls))) (ls (cdr (reverse ls)))) (if (null? ls) ret (loop `(,@(car ls) ,joiner ,@ret) (cdr ls))))) (define-macro (parse-string-with grammars str) `(let () ,@(map translate-definition (parse-grammars grammars)) (parse-string ,(caar grammars) ,str))) (define-macro (define-parse-symbol ident sym) `(define (,(string->symbol (string-append "parse-" (symbol->string ident))) derivs) (match (deriv-token derivs) ((:result ',sym derivs-) (make-result ',sym derivs-)) (_ nil)))) (define-parse-symbol open <) (define-parse-symbol close >) (define-parse-symbol leftarrow <-) (define-parse-symbol slash /) (define-parse-symbol and &) (define-parse-symbol not !) (define-parse-symbol question ?) (define-parse-symbol star *) (define-parse-symbol plus +) (define-parse-symbol return :return) (define-parse-symbol any %any%) (define (parse-class derivs) (match (deriv-token derivs) ((:result val derivs-) (if (char-set? val) (make-result `(:char-set ,val) derivs-) nil)) (_ nil))) (define (parse-literal derivs) (match (deriv-token derivs) ((:result val derivs-) (cond ((string? val) (make-result `(:string ,val) derivs-)) ((char? val) (make-result `(:char ,val) derivs-)) (else nil))) (_ nil))) (define (parse-capture-name derivs) (match (deriv-token derivs) ((:result val derivs-) (if (keyword? val) (make-result val derivs-) nil)) (_ nil))) (define (parse-non-identifier derivs) (let* ((alt3 (lazy (match (deriv-slash derivs) ((:result ret derivs-) (make-result '/ derivs-)) (_ nil)))) (alt2 (lazy (match (deriv-close derivs) ((:result ret derivs-) (make-result '> derivs-)) (_ alt3)))) (alt1 (lazy (match (deriv-open derivs) ((:result ret derivs-) (make-result '< derivs-)) (_ alt2))))) (force alt1))) (define (parse-identifier derivs) (match (deriv-non-identifier derivs) (() (match (deriv-token derivs) ((:result val derivs-) (if (symbol? val) (make-result `(:identifier ,val) derivs-) nil)) (_ nil))) (_ nil))) (define (parse-primary derivs) (let* ((alt6 (lazy (match (deriv-any derivs) ((:result val derivs-) (make-result val derivs-)) (_ nil)))) (alt5 (lazy (match (deriv-class derivs) ((:result val derivs-) (make-result val derivs-)) (_ alt6)))) (alt4 (lazy (match (deriv-literal derivs) ((:result val derivs-) (make-result val derivs-)) (_ alt5)))) (alt3 (lazy (match (deriv-open derivs) ((:result val derivs-) (match (deriv-expression derivs-) ((:result exp derivs--) (match (deriv-close derivs--) ((:result val derivs---) (make-result exp derivs---)) (_ alt4))) (_ alt4))) (_ alt4)))) (alt1 (lazy (match (deriv-identifier derivs) ((:result ident derivs-) (match (deriv-leftarrow derivs-) ((:result val derivs--) alt3) (_ (make-result ident derivs-)))) (_ alt3))))) (force alt1))) (define (parse-identifier-clause derivs) (let* ((alt1 (lazy (match (deriv-open derivs) ((:result opn derivs-) (match (deriv-capture-name derivs-) ((:result capt derivs--) (match (deriv-primary derivs--) ((:result primary derivs---) (match (deriv-close derivs---) ((:result cls derivs----) (make-result `(:identifier-clause ,primary :capture ,(string->symbol (x->string capt))) derivs----)) (_ nil))) (_ nil))) (_ nil))) (_ nil))))) (force alt1))) (define (parse-suffix derivs) (match (deriv-primary derivs) ((:result prim derivs-) (let* ((alt3 (lazy (match (deriv-plus derivs-) ((:result plus derivs--) (make-result `(:one-more ,prim) derivs--)) (_ (make-result prim derivs-))))) (alt2 (lazy (match (deriv-star derivs-) ((:result star derivs--) (make-result `(:zero-more ,prim) derivs--)) (_ alt3)))) (alt1 (lazy (match (deriv-question derivs-) ((:result qstn derivs--) (make-result `(:optional ,prim) derivs--)) (_ alt2))))) (force alt1))) (_ nil))) (define (parse-prefix derivs) (let* ((glob-alt2 (lazy (let* ((alt2 (lazy (match (deriv-not derivs) ((:result val derivs-) (values :not derivs-)) (_ (values nil derivs))))) (alt1 (lazy (match (deriv-and derivs) ((:result val derivs-) (values :and derivs-)) (_ alt2))))) (receive (val derivs-) (force alt1) (match (deriv-suffix derivs-) ((:result suffix derivs--) (if (null? val) (make-result suffix derivs--) (make-result `(,val ,suffix) derivs--))) (_ nil)))))) (glob-alt1 (lazy (match (deriv-identifier-clause derivs) ((:result ident-clause derivs-) (make-result ident-clause derivs-)) (_ glob-alt2))))) (force glob-alt1))) (define (parse-sequence derivs) (receive (seqs derivs) (let loop((ret ()) (derivs derivs)) (match (deriv-prefix derivs) ((:result val derivs-) (loop (cons val ret) derivs-)) (_ (values (reverse ret) derivs)))) (let* ((alt2 (lazy (make-result `(:sequence ,seqs :callback ()) derivs))) (alt1 (lazy (match (deriv-return derivs) ((:result val derivs-) (match (deriv-token derivs-) ((:result callback derivs--) (make-result `(:sequence ,seqs :callback ,callback) derivs--)) (_ alt2))) (_ alt2))))) (force alt1)))) (define (parse-expression derivs) (match (deriv-sequence derivs) ((:result seq derivs-) (receive (ret derivs) (let loop((ret `(,seq)) (derivs derivs-)) (match (deriv-slash derivs) ((:result sla derivs-) (match (deriv-sequence derivs-) ((:result seq derivs--) (loop (cons seq ret) derivs--)) (_ (values (reverse ret) derivs)))) (_ (values (reverse ret) derivs)))) (make-result (if (null? (cdr ret)) (car ret) (cons :expression ret)) derivs))))) (define (parse-definition derivs) (match (deriv-identifier derivs) ((:result ident derivs-) (match (deriv-leftarrow derivs-) ((:result larr derivs--) (match (deriv-expression derivs--) ((:result expr derivs---) (make-result `(:definition ,ident ,expr) derivs---)) (_ nil))) (_ nil))) (_ nil))) ;;; ;;; AST translation to PEG ;;; ;;; ref. WiLiKi:Rui:ParsingExpressionGrammar ;;; ;; ;; expected input ;; * identifier ;; (:identifier ) ;; * identifier-clause ;; (:identifier-clause (:identifier ) :capture ) ;; * char(literal) ;; (:char ) ;; * string(literal) ;; (:string ) ;; * char-set(class) ;; (:char-set ) ;; * expression ;; (:expression (:sequence (...) :callback ...) ...) ;; ;; return (values ) ;; (define (translate-primary primary) (let ((tag (car primary)) (value (cadr primary))) (case tag ((:identifier) value) ((:char) `($char ,value)) ((:string) `($string ,value)) ((:char-set) `($one-of ,value)) ((:sequence) (translate-sequence primary)) ((:expression) (translate-expression primary))))) ;; ;; expected input ;; * more than zero ;; (:zero-more ) ;; * more than one ;; (:one-more ) ;; * primary ;; (see translate-primary) ;; ;; return (values ) ;; (define (translate-suffix suffix) (let ((tag (car suffix)) (value (cadr suffix))) (case tag ((:zero-more) `($many ,(translate-primary value))) ((:one-more) `($many ,(translate-primary value) 1)) ((:optional) `($optional ,(translate-primary value))) (else (translate-primary suffix))))) ;; ;; expected input ;; * and(syntactic predicate) ;; (:and ) ;; * not(syntactic predicate) ;; (:not ) ;; * suffix ;; (see translate-suffix) ;; ;; return (values ) ;; (define (translate-prefix prefix) (let ((tag (car prefix)) (value (cadr prefix))) (case tag ((:not) (values `($not ,(translate-suffix value)) #f)) ((:identifier-clause) (values `(,(get-keyword :capture prefix) ,(translate-primary value)) #t)) (else (values (translate-suffix prefix) #f))))) (define (translate-sequence sequence) (let ((tag (car sequence)) (prefixes (cadr sequence)) (callback (get-keyword :callback sequence #f))) (case tag ((:sequence) (if (null? prefixes) `($do ($return ,callback)) (if (and (null? (cdr prefixes)) (null? callback)) (translate-prefix (car prefixes)) (let loop((items nil) (capture-names nil) (prefixes prefixes)) (cond ((null? prefixes) (cons '$do (reverse (cons (if (null? callback) `($return (list ,@(reverse capture-names))) `($return ,callback)) items)))) (else (receive (prefix ident-clause?) (translate-prefix (car prefixes)) (let1 capt (gensym) (loop (cons (if ident-clause? prefix `(,capt ,prefix)) items) (cons (if ident-clause? (car prefix) capt) capture-names) (cdr prefixes) ))))))))) (else (translate-prefix sequence))))) (define (translate-expression expression) (let ((tag (car expression))) (case tag ((:expression) (let1 sequences (cdr expression) (if (null? (cdr sequences)) (translate-sequence (car sequences)) `($or ,@(map translate-sequence sequences))))) (else (translate-sequence expression))))) (define (translate-definition definition) (let ((tag (car definition)) (ident (cadr (cadr definition))) (expression (caddr definition))) (case tag ((:definition) `(define ,ident ,(translate-expression expression))) (else (error "Translation error"))))) (define (hoge) (let* ((spaces ($many ($one-of #[ \t]))) (comma ($seq spaces ($char #\,) spaces)) (dquote ($char #\")) (double-dquote ($do (($string "\"\"")) ($return #\"))) (quoted-body ($many ($or double-dquote ($one-of #[^\"])))) (quoted ($between dquote quoted-body dquote)) (unquoted ($many-till anychar ($or comma newline))) (field ($or quoted unquoted)) (record ($sep-by ($->rope field) comma))) #?=(parse-string record "a,b,c") #?=(parse-string record "\"a\" , b , c") #?=(parse-string record "\"a \"\" \n\" , b , c")) (parse-string-with ((Record <- (:fld Field) (:suffix RecordSuffix) :return (cons fld suffix)) (RecordSuffix <- Comma (:fld Field) (:suffix RecordSuffix) :return (cons fld suffix) / :return () ) (Field <- (:charlist Quoted) :return (list->string charlist) / (:charlist UnQuoted) :return (list->string charlist)) (Spaces <- #[ \t] *) (Comma <- Spaces #\, Spaces) (DQuote <- #\") (DoubleDQuote <- #\" #\" :return #\") (Quoted <- DQuote (:body ((DoubleDQuote / #[^\"]) *)) DQuote :return body) (UnQuoted <- ((! (Comma / newline) (:ch anychar) :return ch) *)) ) "\"a\" , b , c") ) ;; (parse-string-with ;; ((Record <- (:fld Field) ;; (:rest-fld ((Comma (:fld Field) :return fld) *)) ;; :return (cons fld rest-fld)) ;; (Field <- (:charlist Quoted) :return (list->string charlist) ;; / (:charlist UnQuoted) :return (list->string charlist)) ;; (Spaces <- #[ \t] *) ;; (Comma <- Spaces #\, Spaces) ;; (DQuote <- #\") ;; (Quoted <- DQuote ;; (:body ((#\" #\" :return #\" / #[^\"]) *)) ;; DQuote ;; :return body) ;; (UnQuoted <- ! (Comma / newline) (:ch anychar) (:unq UnQuoted) ;; :return (cons ch unq) ;; / :return ()) ;; ) ;; "\"a\"\"bc\" , b , c") ;; (parse-string-with ;; ((A <- "a" A "a" / "a")) ;; "aaaaa")