| 1 | (library (binding mecab-ffi) |
|---|
| 2 | (export mecab-new2 |
|---|
| 3 | mecab-version |
|---|
| 4 | mecab-strerror |
|---|
| 5 | mecab-destroy |
|---|
| 6 | mecab-get-partial mecab-set-partial! |
|---|
| 7 | ;;mecab-get-theta mecab-set-theta! |
|---|
| 8 | mecab-get-lattice-level mecab-set-lattice-level! |
|---|
| 9 | mecab-get-all-morphs mecab-set-all-morphs! |
|---|
| 10 | mecab-sparse-tostr mecab-sparse-tostr2 ;mecab-sparse-tostr3 |
|---|
| 11 | mecab-sparse-tonode mecab-sparse-tonode2 |
|---|
| 12 | mecab-nbest-sparse-tostr mecab-nbest-sparse-tostr2 ;mecab-nbest-sparse-tostr3 |
|---|
| 13 | mecab-nbest-init mecab-nbest-init2 |
|---|
| 14 | mecab-nbest-next-tostr mecab-nbest-next-tostr2 |
|---|
| 15 | mecab-nbest-next-tonode |
|---|
| 16 | mecab-format-node |
|---|
| 17 | mecab-dictionary-info |
|---|
| 18 | |
|---|
| 19 | mecab-node-prev mecab-node-next mecab-node-enext mecab-node-bnext |
|---|
| 20 | mecab-node-surface mecab-node-feature mecab-node-id |
|---|
| 21 | mecab-node-length mecab-node-rlength |
|---|
| 22 | mecab-node-rc-attr mecab-node-lc-attr |
|---|
| 23 | mecab-node-posid mecab-node-char-type |
|---|
| 24 | mecab-node-stat mecab-node-normal? mecab-node-unknown? mecab-node-bos? mecab-node-eos? |
|---|
| 25 | mecab-node-best? |
|---|
| 26 | mecab-node-sentence-length |
|---|
| 27 | ;; mecab-node-alpha mecab-node-beta mecab-node-prob |
|---|
| 28 | mecab-node-wcost mecab-node-cost |
|---|
| 29 | mecab-node-token |
|---|
| 30 | |
|---|
| 31 | ; string->utf8z |
|---|
| 32 | ) |
|---|
| 33 | (import (rnrs) (core) |
|---|
| 34 | (rnrs r5rs) |
|---|
| 35 | (ypsilon ffi)) |
|---|
| 36 | |
|---|
| 37 | (define libmecab (load-shared-object "libmecab.1.dylib")) |
|---|
| 38 | |
|---|
| 39 | ;(define-c-typedef mecab-t* void*) |
|---|
| 40 | (define-c-typedef mecab-node-t* void*) |
|---|
| 41 | (define-c-typedef mecab-node-t** void*) |
|---|
| 42 | (define-c-typedef mecab-path-t* void*) |
|---|
| 43 | (define-c-typedef mecab-token-t* void*) |
|---|
| 44 | |
|---|
| 45 | (define-c-struct-type mecab-node-t |
|---|
| 46 | (mecab-node-t* prev) |
|---|
| 47 | (mecab-node-t* next) |
|---|
| 48 | (mecab-node-t* enext) |
|---|
| 49 | (mecab-node-t* bnext) |
|---|
| 50 | (mecab-path-t* rpath) |
|---|
| 51 | (mecab-path-t* lpath) |
|---|
| 52 | (mecab-node-t** begin-node-list) |
|---|
| 53 | (mecab-node-t** end-node-list) |
|---|
| 54 | (char* surface) |
|---|
| 55 | (char* feature) |
|---|
| 56 | (int id) |
|---|
| 57 | (short length) |
|---|
| 58 | (short rlength) |
|---|
| 59 | (short rc-attr) |
|---|
| 60 | (short lc-attr) |
|---|
| 61 | (short posid) |
|---|
| 62 | (char char-type) |
|---|
| 63 | (char stat) |
|---|
| 64 | (char is-best) |
|---|
| 65 | (int sentence-length) |
|---|
| 66 | (int alpha) ; float |
|---|
| 67 | (int beta) ; float |
|---|
| 68 | (int prob) ; float |
|---|
| 69 | ; (float alpha) ;; => internal inconsistency |
|---|
| 70 | ; (float beta) ;; => internal inconsistency |
|---|
| 71 | ; (float prob) ;; => internal inconsistency |
|---|
| 72 | (short wcost) |
|---|
| 73 | (long cost) |
|---|
| 74 | (mecab-token-t* token)) |
|---|
| 75 | |
|---|
| 76 | (define (compose f g) (lambda args (f (apply g args)))) |
|---|
| 77 | |
|---|
| 78 | (define sizeof-mecab-node-t 88) |
|---|
| 79 | (define (void*->mecab-node-t* void*-ptr) |
|---|
| 80 | (make-bytevector-mapping void*-ptr sizeof-mecab-node-t)) |
|---|
| 81 | (define (char*->string char*-ptr . args) |
|---|
| 82 | (if (null? args) |
|---|
| 83 | (car (string-split (char*->string char*-ptr 255) #\x0)) ;; |
|---|
| 84 | (let ((len (car args))) |
|---|
| 85 | (utf8->string (make-bytevector-mapping char*-ptr len)) |
|---|
| 86 | ))) |
|---|
| 87 | |
|---|
| 88 | (define mecab-new2 |
|---|
| 89 | (c-function libmecab "libmecab" void* mecab_new2 (char*))) |
|---|
| 90 | (define mecab-version |
|---|
| 91 | (c-function libmecab "libmecab" char* mecab_version ())) |
|---|
| 92 | (define mecab-strerror |
|---|
| 93 | (c-function libmecab "libmecab" char* mecab_strerror (void*))) |
|---|
| 94 | (define mecab-destroy |
|---|
| 95 | (c-function libmecab "libmecab" void mecab_destroy (void*))) |
|---|
| 96 | |
|---|
| 97 | ;; パラメータ変更系 |
|---|
| 98 | (define mecab-get-partial |
|---|
| 99 | (c-function libmecab "libmecab" int mecab_get_partial (void*))) |
|---|
| 100 | (define mecab-set-partial! |
|---|
| 101 | (c-function libmecab "libmecab" void mecab_set_partial (void* int))) |
|---|
| 102 | ;(define mecab-get-theta |
|---|
| 103 | ; (c-function libmecab "libmecab" float mecab_get_theta (void*))) |
|---|
| 104 | ;(define mecab-set-theta! |
|---|
| 105 | ; (c-function libmecab "libmecab" void mecab_set_theta (void* float))) |
|---|
| 106 | (define mecab-get-lattice-level |
|---|
| 107 | (c-function libmecab "libmecab" int mecab_get_lattice_level (void*))) |
|---|
| 108 | (define mecab-set-lattice-level! |
|---|
| 109 | (c-function libmecab "libmecab" int mecab_set_lattice_level (void* int))) |
|---|
| 110 | (define mecab-get-all-morphs |
|---|
| 111 | (c-function libmecab "libmecab" int mecab_get_all_morphs (void*))) |
|---|
| 112 | (define mecab-set-all-morphs! |
|---|
| 113 | (c-function libmecab "libmecab" void mecab_set_all_morphs (void* int))) |
|---|
| 114 | |
|---|
| 115 | (define mecab-sparse-tostr |
|---|
| 116 | (c-function libmecab "libmecab" char* mecab_sparse_tostr (void* char*))) |
|---|
| 117 | (define mecab-sparse-tostr2 |
|---|
| 118 | (c-function libmecab "libmecab" char* mecab_sparse_tostr (void* char* int))) |
|---|
| 119 | ;(define mecab-sparse-tostr3 |
|---|
| 120 | ; (c-function libmecab "libmecab" char* mecab_sparse_tostr (void* char* int char* int))) |
|---|
| 121 | (define mecab-sparse-tonode |
|---|
| 122 | (compose void*->mecab-node-t* |
|---|
| 123 | (c-function libmecab "libmecab" void* mecab_sparse_tonode (void* char*)))) |
|---|
| 124 | (define mecab-sparse-tonode2 |
|---|
| 125 | (compose void*->mecab-node-t* |
|---|
| 126 | (c-function libmecab "libmecab" void* mecab_sparse_tonode2 (void* char* int)))) |
|---|
| 127 | ;(define (mecab-sparse-tonode m str); mecab_node_t* を返す |
|---|
| 128 | ; (void*->mecab-node-t* (mecab-sparse-tonode__ m str))) |
|---|
| 129 | ;(define (mecab-sparse-tonode2 m str len); mecab_node_t* を返す |
|---|
| 130 | ; (void*->mecab-node-t* (mecab-sparse-tonode2__ m str len))) |
|---|
| 131 | |
|---|
| 132 | (define mecab-nbest-sparse-tostr |
|---|
| 133 | (c-function libmecab "libmecab" char* mecab_nbest_sparse_tostr (void* int char*))) |
|---|
| 134 | (define mecab-nbest-sparse-tostr2 |
|---|
| 135 | (c-function libmecab "libmecab" char* mecab_nbest_sparse_tostr2 (void* int char* int))) |
|---|
| 136 | ;(define mecab-nbest-sparse-tostr3 |
|---|
| 137 | ; (c-function libmecab "libmecab" char* mecab_nbest_sparse_tostr3 (void* int char int char* int))) |
|---|
| 138 | (define mecab-nbest-init |
|---|
| 139 | (c-function libmecab "libmecab" int mecab_nbest_init (void* char*))) |
|---|
| 140 | (define mecab-nbest-init2 |
|---|
| 141 | (c-function libmecab "libmecab" int mecab_nbest_init2 (void* char* int))) |
|---|
| 142 | (define mecab-nbest-next-tostr |
|---|
| 143 | (c-function libmecab "libmecab" char* mecab_nbest_next_tostr (void*))) |
|---|
| 144 | (define mecab-nbest-next-tostr2 |
|---|
| 145 | (c-function libmecab "libmecab" char* mecab_nbest_next_tostr2 (void* char* int))) |
|---|
| 146 | (define mecab-nbest-next-tonode ; mecab_node_t* |
|---|
| 147 | (c-function libmecab "libmecab" void* mecab_nbest_next_tonode (void*))) |
|---|
| 148 | (define mecab-format-node |
|---|
| 149 | (c-function libmecab "libmecab" char* mecab_format_node (void* void*))) ; (mecab node) |
|---|
| 150 | (define mecab-dictionary-info ; mecab_dictionary_info_t* を返す |
|---|
| 151 | (c-function libmecab "libmecab" void* mecab_dictionary_info (void*))) |
|---|
| 152 | |
|---|
| 153 | ;; APIs not supported: |
|---|
| 154 | ;; MECAB_DLL_EXTERN int mecab_do (int argc, char **argv); |
|---|
| 155 | ;; MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv); |
|---|
| 156 | ;; MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv); |
|---|
| 157 | ;; MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv); |
|---|
| 158 | ;; MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv); |
|---|
| 159 | ;; MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv); |
|---|
| 160 | ;; MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv); |
|---|
| 161 | |
|---|
| 162 | |
|---|
| 163 | ;; |
|---|
| 164 | ;; mecab_node_t |
|---|
| 165 | ;; |
|---|
| 166 | (define (mecab-node-prev node) (void*->mecab-node-t* (mecab-node-t-prev node))) |
|---|
| 167 | (define (mecab-node-next node) (void*->mecab-node-t* (mecab-node-t-next node))) |
|---|
| 168 | (define (mecab-node-enext node) (void*->mecab-node-t* (mecab-node-t-enext node))) |
|---|
| 169 | (define (mecab-node-bnext node) (void*->mecab-node-t* (mecab-node-t-bnext node))) |
|---|
| 170 | (define (mecab-node-surface node) |
|---|
| 171 | (char*->string (mecab-node-t-surface node) (mecab-node-t-length node))) |
|---|
| 172 | (define (mecab-node-feature node) |
|---|
| 173 | (let ((feature (char*->string (mecab-node-t-feature node)))) |
|---|
| 174 | (map (lambda (s) (if (string=? "*" s) #f s)) |
|---|
| 175 | (string-split feature #\,)))) |
|---|
| 176 | |
|---|
| 177 | (define (mecab-node-id node) (mecab-node-t-id node)) |
|---|
| 178 | (define (mecab-node-length node) (mecab-node-t-length node)) |
|---|
| 179 | (define (mecab-node-rlength node) (mecab-node-t-rlength node)) |
|---|
| 180 | (define (mecab-node-rc-attr node) (mecab-node-t-rc-attr node)) |
|---|
| 181 | (define (mecab-node-lc-attr node) (mecab-node-t-lc-attr node)) |
|---|
| 182 | (define (mecab-node-posid node) (mecab-node-t-posid node)) |
|---|
| 183 | (define (mecab-node-char-type node) (mecab-node-t-char-type node)) |
|---|
| 184 | (define (mecab-node-stat node) |
|---|
| 185 | (case (mecab-node-t-stat node) |
|---|
| 186 | [(0) 'mecab-nor-node] |
|---|
| 187 | [(1) 'mecab-unk-node] |
|---|
| 188 | [(2) 'mecab-bos-node] |
|---|
| 189 | [(3) 'mecab-eos-node])) |
|---|
| 190 | |
|---|
| 191 | (define (mecab-node-normal? node) (eq? 'mecab-nor-node (mecab-node-stat node))) |
|---|
| 192 | (define (mecab-node-unknown? node) (eq? 'mecab-unk-node (mecab-node-stat node))) |
|---|
| 193 | (define (mecab-node-bos? node) (eq? 'mecab-bos-node (mecab-node-stat node))) |
|---|
| 194 | (define (mecab-node-eos? node) (eq? 'mecab-eos-node (mecab-node-stat node))) |
|---|
| 195 | (define (mecab-node-best? node) (= 1 (mecab-node-t-is-best node))) |
|---|
| 196 | (define (mecab-node-sentence-length node) ; available only when BOS |
|---|
| 197 | (mecab-node-t-sentence-length node)) |
|---|
| 198 | ;(define (mecab-node-alpha node-ptr) |
|---|
| 199 | ; (pointer-ref node-ptr 16)) |
|---|
| 200 | ;(define (mecab-node-beta node-ptr) |
|---|
| 201 | ; (pointer-ref node-ptr 17)) |
|---|
| 202 | ;(define (mecab-node-prob node-ptr) |
|---|
| 203 | ; (pointer-ref node-ptr 18)) |
|---|
| 204 | (define (mecab-node-wcost node) (mecab-node-t-wcost node)) |
|---|
| 205 | (define (mecab-node-cost node) (mecab-node-t-cost node)) |
|---|
| 206 | (define (mecab-node-token node) (mecab-node-t-token node)) |
|---|
| 207 | |
|---|
| 208 | |
|---|
| 209 | ;; from 逆引きScheme |
|---|
| 210 | (define (string-split-by-char str spliter) |
|---|
| 211 | (let loop ((ls (string->list str)) (buf '()) (ret '())) |
|---|
| 212 | (if (pair? ls) |
|---|
| 213 | (if (char=? (car ls) spliter) |
|---|
| 214 | (loop (cdr ls) '() (cons (list->string (reverse buf)) ret)) |
|---|
| 215 | (loop (cdr ls) (cons (car ls) buf) ret)) |
|---|
| 216 | (reverse (cons (list->string (reverse buf)) ret))))) |
|---|
| 217 | |
|---|
| 218 | (define (string-split-by-string str spliter) |
|---|
| 219 | (if (zero? (string-length spliter)) |
|---|
| 220 | (list str) |
|---|
| 221 | (let ((spl (string->list spliter))) |
|---|
| 222 | (let loop ((ls (string->list str)) (sp spl) (tmp '()) (buf '()) (ret '())) |
|---|
| 223 | (if (pair? sp) |
|---|
| 224 | (if (pair? ls) |
|---|
| 225 | (if (char=? (car ls) (car sp)) |
|---|
| 226 | (loop (cdr ls) (cdr sp) (cons (car ls) tmp) buf ret) |
|---|
| 227 | (loop (cdr ls) spl '() (cons (car ls) (append tmp buf)) ret)) |
|---|
| 228 | (reverse (cons (list->string (reverse (append tmp buf))) ret))) |
|---|
| 229 | (loop ls spl '() '() (cons (list->string (reverse buf)) ret))))))) |
|---|
| 230 | |
|---|
| 231 | (define (string-split str spliter) |
|---|
| 232 | (cond [(char? spliter) (string-split-by-char str spliter)] |
|---|
| 233 | [(string? spliter) (string-split-by-string str spliter)] |
|---|
| 234 | [else #f])) |
|---|
| 235 | |
|---|
| 236 | ) |
|---|