| 1 | ;; -*- coding: utf-8 mode: scheme -*- |
|---|
| 2 | |
|---|
| 3 | (use text.tree) |
|---|
| 4 | (require "hascheme/peg-parser") |
|---|
| 5 | |
|---|
| 6 | (define-class <ast> () ()) |
|---|
| 7 | (define-class <ast-decl> (<ast>) ()) |
|---|
| 8 | (define-class <ast-type-decl> (<ast-decl>) |
|---|
| 9 | ((left :init-keyword :left :getter type-decl-left) |
|---|
| 10 | (right :init-keyword :right :getter type-decl-right))) |
|---|
| 11 | (define-class <ast-type> (<ast>) ()) |
|---|
| 12 | (define-class <ast-type-var> (<ast>) |
|---|
| 13 | ((type-var :init-keyword :var :getter type-var))) |
|---|
| 14 | (define-class <ast-single-type> (<ast-type>) |
|---|
| 15 | ((type-name :init-keyword :name :getter type-name) |
|---|
| 16 | (type-arity :init-keyword :arity :accessor type-arity) |
|---|
| 17 | (type-vars :init-keyword :vars :accessor type-vars :init-value #f) |
|---|
| 18 | (type-module :init-keyword :module :getter type-module :init-value #f))) |
|---|
| 19 | (define-class <ast-multi-type> (<ast-type>) |
|---|
| 20 | ((types :init-keyword :name :accessor types))) |
|---|
| 21 | (define-class <ast-simple-type> (<ast>) ()) |
|---|
| 22 | (define-method make-type-decl ((left <ast-simple-type>) (right <ast-type>)) |
|---|
| 23 | ; (make <type-decl> :left left :right right) |
|---|
| 24 | `(:type-decl ,left ,right)) |
|---|
| 25 | |
|---|
| 26 | (define (hascheme:parse str) |
|---|
| 27 | (parse-string-with |
|---|
| 28 | ( |
|---|
| 29 | ;; incompl |
|---|
| 30 | (Module <- "module" Spaces LexModId Spaces "where" Spaces Body |
|---|
| 31 | / Body) |
|---|
| 32 | (Body <- "{" (:decls TopDecls) "}" :return decls) |
|---|
| 33 | (TopDecls <- Spaces* (:decl TopDecl) (:rest-decls TopDecls%) Spaces* |
|---|
| 34 | :return `(:decls ,decl ,@rest-decls) |
|---|
| 35 | / Spaces* :return ()) |
|---|
| 36 | (TopDecls% <- Spaces* ";" Spaces* TopDecl TopDecls% |
|---|
| 37 | / :return ()) |
|---|
| 38 | |
|---|
| 39 | ;; incoml |
|---|
| 40 | (TopDecl <- |
|---|
| 41 | "type" Spaces (:left SimpleType) Spaces* |
|---|
| 42 | "=" Spaces* (:right Type) |
|---|
| 43 | :return (make-type-decl left right) |
|---|
| 44 | / "data" Spaces ( Context Spaces* "=>" Spaces*) ? SimpleType |
|---|
| 45 | Spaces* "=" |
|---|
| 46 | Spaces* Constructors |
|---|
| 47 | Spaces* Deriving ? |
|---|
| 48 | |
|---|
| 49 | / Decl |
|---|
| 50 | ) |
|---|
| 51 | (Decls <- "{" Spaces* Decl Spaces* ("," Decl Spaces*) * "}") |
|---|
| 52 | |
|---|
| 53 | ;; incompl |
|---|
| 54 | (Decl <- GenDecl |
|---|
| 55 | ;; /(FunctionLHS / Variable) RHS ;; do this later |
|---|
| 56 | / Variable Spaces* RHS) |
|---|
| 57 | ;; incompl |
|---|
| 58 | (GenDecl <- Variables Spaces* "::" Spaces* |
|---|
| 59 | ( Context Spaces* "=>" Spaces* ) ? Type |
|---|
| 60 | ;; / Fixity LexInteger ? Operators ;; later |
|---|
| 61 | ) |
|---|
| 62 | ;; incompl |
|---|
| 63 | (RHS <- "=" Spaces* Expression (Spaces "where" Spaces Decls) ?) |
|---|
| 64 | |
|---|
| 65 | (Expression <- "\\" Spaces* AtomPattern |
|---|
| 66 | (Spaces AtomPattern) * Spaces* "->" Spaces* Expression |
|---|
| 67 | / "let" Spaces* Decls Spaces* "in" Spaces* Expression |
|---|
| 68 | / "if" Spaces* Expression |
|---|
| 69 | Spaces* "then" Spaces* Expression |
|---|
| 70 | Spaces* "else" Spaces* Expression |
|---|
| 71 | / "case" Spaces* Expression |
|---|
| 72 | Spaces* "of" Spaces* "{" Alts "}" |
|---|
| 73 | / "do" Spaces* "{" Spaces* Statements Spaces* "}" |
|---|
| 74 | / FuncApplyExpression |
|---|
| 75 | ) |
|---|
| 76 | |
|---|
| 77 | (FuncApplyExpression <- AtomExpression +) |
|---|
| 78 | (AtomExpression |
|---|
| 79 | <- QVar |
|---|
| 80 | / GCon |
|---|
| 81 | / Literal |
|---|
| 82 | / "(" Spaces* Expression Spaces* ")" |
|---|
| 83 | / "(" Spaces* Expression Spaces* ("," Spaces* Expression Spaces*)+ ")" |
|---|
| 84 | / "[" Spaces* Expression Spaces* ("," Spaces* Expression Spaces*)* "]" |
|---|
| 85 | / "[" Spaces* Expression ( "," Spaces* Expression Spaces* )? |
|---|
| 86 | ".." Spaces* Expression? Spaces* "]" |
|---|
| 87 | / "[" Spaces* Expression Spaces* |
|---|
| 88 | "|" Spaces* Qual Spaces* ("," Spaces* Qual Spaces*)+ "]" |
|---|
| 89 | ;; / "(" Spaces* Expression Spaces* QualifiedOperator(A,i) ")" |
|---|
| 90 | ;; / "(" LeftExpression(i) QualifiedOperator(L,i) ")" |
|---|
| 91 | ;; / "(" QualifiedOperator(A,i) "<->" Expression(i+1) ")" |
|---|
| 92 | ;; / "(" QualifiedOperator(R,i) "<->" RightExpression(i) ")" |
|---|
| 93 | ;; / QualifiedConstructor "{" [ FBind ("," FBind)* ] "}" |
|---|
| 94 | ;; / AtomExpression<QualifiedConstructor> "{" FBind ("," FBind)* "}" |
|---|
| 95 | ) |
|---|
| 96 | |
|---|
| 97 | (AtomPattern |
|---|
| 98 | <- QVar |
|---|
| 99 | / GCon |
|---|
| 100 | QCon Spaces "{" Spaces* |
|---|
| 101 | ( FieldPattern Spaces* |
|---|
| 102 | ("," Spaces* FieldPattern Spaces*) * ) |
|---|
| 103 | ? "}" |
|---|
| 104 | / Literal |
|---|
| 105 | / "_" |
|---|
| 106 | / "(" Spaces* Pattern Spaces* ")" |
|---|
| 107 | / "(" Spaces* Pattern Spaces* ("," Spaces* Pattern Spaces*) + ")" |
|---|
| 108 | / "[" Spaces* Pattern Spaces* ("," Spaces* Pattern Spaces*) * "]" |
|---|
| 109 | / "~" Spaces* AtomPattern) |
|---|
| 110 | |
|---|
| 111 | (QVar <- LexQVarId / "(" Spaces* LexQVarSym Spaces* ")") |
|---|
| 112 | (GCon <- "(" Spaces* ")" |
|---|
| 113 | / "[" Spaces* "]" |
|---|
| 114 | / "(" Spaces* ("," Spaces*)+ ")" |
|---|
| 115 | / QCon) |
|---|
| 116 | (QCon <- LexQConId / "(" Spaces* LexGConSym Spaces* ")") |
|---|
| 117 | |
|---|
| 118 | (Context <- Class / |
|---|
| 119 | "(" Spaces* |
|---|
| 120 | ( Class Spaces* ("," Spaces* Class Spaces*) * ) ? ")") |
|---|
| 121 | (Class <- LexQtyCls Spaces LexTyVar |
|---|
| 122 | / LexQtyCls "(" LexTyVar AType + ")") |
|---|
| 123 | (SimpleType <- (:con LexTyCon) (:vars SimpleTypeVars) |
|---|
| 124 | :return `(:type-name ,con :vars ,vars)) |
|---|
| 125 | (SimpleTypeVars <- Spaces (:var LexTyVar) (:vars SimpleTypeVars) |
|---|
| 126 | :return (cons var vars) |
|---|
| 127 | / :return ()) |
|---|
| 128 | (Type <- (:type BType) Spaces* (:rest-types Type%) |
|---|
| 129 | :return `(:type ,type ,@rest-types)) |
|---|
| 130 | (Type% <- "->" Spaces* (:type Type) (:rest-types Type%) |
|---|
| 131 | :return (cons type rest-types) |
|---|
| 132 | / :return ()) |
|---|
| 133 | (BType <- (:type AType) (:rest-types BType%) |
|---|
| 134 | :return (cons type rest-types)) |
|---|
| 135 | (BType% <- Spaces (:type AType) (:rest-types BType%) |
|---|
| 136 | :return (cons type rest-types) |
|---|
| 137 | / :return ()) |
|---|
| 138 | (AType <- GtyCon |
|---|
| 139 | / (:id LexTyVar) :return (make <ast-type-var> :var id) |
|---|
| 140 | / "(" Spaces* (:t1 Type) Spaces* "," |
|---|
| 141 | Spaces* (:t2 Type) Spaces* (:rest AType%) ")" |
|---|
| 142 | :return (make <ast-simple-type> :name '$Tuple |
|---|
| 143 | :arity (+ 2 (length rest)) |
|---|
| 144 | :vars `(,t1 ,t2 ,@rest)) |
|---|
| 145 | / "[" Spaces* Type Spaces* "]" |
|---|
| 146 | / "(" Spaces* Type Spaces* ")") |
|---|
| 147 | (AType% <- "," Spaces* (:t Type) Spaces* (:rest AType%) |
|---|
| 148 | :return (cons t rest) |
|---|
| 149 | / :return ()) |
|---|
| 150 | (GtyCon <- LexQtyCon |
|---|
| 151 | / "(" Spaces* ")" |
|---|
| 152 | :return (make <ast-single-type> :name '$Unit :arity 0 :vars ()) |
|---|
| 153 | / "[" Spaces* "]" |
|---|
| 154 | :return '(make <ast-single-type> :name '$Unit :arity 0) |
|---|
| 155 | ; / "(" Spaces* "->" Spaces* ")" |
|---|
| 156 | / "(" Spaces* (:commas (("," Spaces*) +)) ")" |
|---|
| 157 | :return (make <ast-single-type> :name '$Tuple |
|---|
| 158 | :arity (+ 1 (length commas)))) |
|---|
| 159 | (Literal <- Float / Integer / Char / String) |
|---|
| 160 | (Integer <- Decimal / "0o" Octal / "00" Octal |
|---|
| 161 | / "0x" Hexadecimal / "0X" Hexadecimal) |
|---|
| 162 | (Float <- Decimal "." Decimal Exponent ? |
|---|
| 163 | / Decimal Exponent) |
|---|
| 164 | (Char <- "'" (LexGraphic / LexSpace / LexEscape) "'") |
|---|
| 165 | (String <- "\"" (LexGraphic / LexSpace / LexEscape / LexGap) * "\"") |
|---|
| 166 | (Decimal <- LexDigit LexDigit *) |
|---|
| 167 | (Exponent <- #[eE] #[-+] Decimal) |
|---|
| 168 | |
|---|
| 169 | (Constructors <- Constructor Spaces* ("|" Spaces* Constructor Spaces*) *) |
|---|
| 170 | (Constructor <- |
|---|
| 171 | ;; I don't know about this case |
|---|
| 172 | ;; / (BType / "!" Spaces* AType) |
|---|
| 173 | ;; Spaces* ConstructorOperator |
|---|
| 174 | ;; Spaces* (BType / "!" Spaces* AType) |
|---|
| 175 | LexCon Spaces* |
|---|
| 176 | "{" (Spaces* |
|---|
| 177 | FieldDecl Spaces* |
|---|
| 178 | ("," Spaces* FieldDecl Spaces*) * )* "}" |
|---|
| 179 | / LexCon Spaces* ("!" ? Spaces* AType Spaces*) * |
|---|
| 180 | ) |
|---|
| 181 | (FieldDecl <- Variables Spaces* "::" |
|---|
| 182 | Spaces* (Type / "!" Spaces* AType)) |
|---|
| 183 | (Variables <- Variable Spaces* ("," Spaces* Variable Spaces*) *) |
|---|
| 184 | (Variable <- LexVarId / "(" Spaces* LexVarSym Spaces* ")") |
|---|
| 185 | (Deriving <- "deriving" |
|---|
| 186 | Spaces |
|---|
| 187 | (DClass |
|---|
| 188 | / "(" (DClass Spaces* ("," Spaces* DClass Spaces) * )? ")")) |
|---|
| 189 | |
|---|
| 190 | ;; Tokens |
|---|
| 191 | |
|---|
| 192 | ;; incompl |
|---|
| 193 | (LexCon <- LexConId |
|---|
| 194 | ; / LexConSym |
|---|
| 195 | ) |
|---|
| 196 | (LexConSym <- I Dont Know) |
|---|
| 197 | (LexModId <- LexConId) |
|---|
| 198 | (LexTyVar <- LexVarId) |
|---|
| 199 | (LexTyCon <- LexConId) |
|---|
| 200 | (LexTyCls <- LexConId) |
|---|
| 201 | (LexQVarId <- (LexModId ".") ? LexVarId) |
|---|
| 202 | (LexQVarSym <- (LexModId ".") ? LexVarSym) |
|---|
| 203 | (LexQConId <- (LexModId ".") ? LexConId) |
|---|
| 204 | (LexQConSym <- (LexModId ".") ? LexConSym) |
|---|
| 205 | (LexQtyCon <- (:mod ((LexModId ".") ?)) (:tycon LexTyCon) |
|---|
| 206 | :return (if mod `(:module ,(car mod) ,@tycon) |
|---|
| 207 | `(:module #f ,@tycon))) |
|---|
| 208 | (LexQtyCls <- (LexModId ".") ? LexTyCls) |
|---|
| 209 | (LexGConSym <- ":" / LexQConSym ) |
|---|
| 210 | |
|---|
| 211 | (LexConId <- (:init LexLarge) (:rest LexIdRest) |
|---|
| 212 | :return (string->symbol (tree->string (cons init rest)))) |
|---|
| 213 | (LexVarId <- (:init LexSmall) (:rest LexIdRest) |
|---|
| 214 | :return (string->symbol (tree->string (cons init rest)))) |
|---|
| 215 | (LexVarSym <- LexSymbol (":" / LexSymbol) *) |
|---|
| 216 | (LexConSym <- LexSymbol (":" / LexSymbol) *) |
|---|
| 217 | (LexLarge <- #[A-Z]) |
|---|
| 218 | (LexSmall <- #[a-z]) |
|---|
| 219 | (LexIdRest <- #[A-Za-z'0-9] *) |
|---|
| 220 | (LexSymbol <- #[-!#$%&*+\./<=>?@\\^\|~]) |
|---|
| 221 | |
|---|
| 222 | (LexDigit <- #[0-9]) |
|---|
| 223 | |
|---|
| 224 | (Spaces* <- Space *) |
|---|
| 225 | (Spaces <- Space +) |
|---|
| 226 | (Space <- #[ \t\n]) |
|---|
| 227 | ) |
|---|
| 228 | str)) |
|---|
| 229 | |
|---|
| 230 | |
|---|
| 231 | (define (hascheme:parser-test) |
|---|
| 232 | (test* "Simplest body" '(()) |
|---|
| 233 | (hascheme:parse "{}"))) |
|---|
| 234 | |
|---|
| 235 | (provide "hascheme/parser") |
|---|