| 1 | (library (binding mecab-ffi) |
|---|
| 2 | (export mecab-new2 |
|---|
| 3 | mecab-version |
|---|
| 4 | mecab-strerror |
|---|
| 5 | mecab-destroy |
|---|
| 6 | mecab-get-partial mecab-set-partial! |
|---|
| 7 | ;;mecab-get-theta mecab-set-theta! |
|---|
| 8 | mecab-get-lattice-level mecab-set-lattice-level! |
|---|
| 9 | mecab-get-all-morphs mecab-set-all-morphs! |
|---|
| 10 | mecab-sparse-tostr mecab-sparse-tostr2 ;mecab-sparse-tostr3 |
|---|
| 11 | mecab-sparse-tonode mecab-sparse-tonode2 |
|---|
| 12 | mecab-nbest-sparse-tostr mecab-nbest-sparse-tostr2 ;mecab-nbest-sparse-tostr3 |
|---|
| 13 | mecab-nbest-init mecab-nbest-init2 |
|---|
| 14 | mecab-nbest-next-tostr mecab-nbest-next-tostr2 |
|---|
| 15 | mecab-nbest-next-tonode |
|---|
| 16 | mecab-format-node |
|---|
| 17 | mecab-dictionary-info |
|---|
| 18 | |
|---|
| 19 | mecab-node-prev mecab-node-next mecab-node-enext mecab-node-bnext |
|---|
| 20 | mecab-node-surface mecab-node-feature mecab-node-id |
|---|
| 21 | mecab-node-length mecab-node-rlength |
|---|
| 22 | mecab-node-rc-attr mecab-node-lc-attr |
|---|
| 23 | mecab-node-posid mecab-node-char-type |
|---|
| 24 | mecab-node-stat mecab-node-normal? mecab-node-unknown? mecab-node-bos? mecab-node-eos? |
|---|
| 25 | mecab-node-best? |
|---|
| 26 | mecab-node-sentence-length |
|---|
| 27 | ;; mecab-node-alpha mecab-node-beta mecab-node-prob |
|---|
| 28 | mecab-node-wcost mecab-node-cost |
|---|
| 29 | ;; mecab-node-token |
|---|
| 30 | |
|---|
| 31 | string->utf8z |
|---|
| 32 | ) |
|---|
| 33 | (import (rnrs) |
|---|
| 34 | (rnrs r5rs) |
|---|
| 35 | (mosh ffi) |
|---|
| 36 | ) |
|---|
| 37 | |
|---|
| 38 | (define libmecab (open-shared-library "/usr/local/lib/libmecab.1.dylib")) |
|---|
| 39 | |
|---|
| 40 | (define mecab-new2 |
|---|
| 41 | (c-function libmecab void* mecab_new2 char*)) |
|---|
| 42 | (define mecab-version |
|---|
| 43 | (c-function libmecab char* mecab_version)) |
|---|
| 44 | (define mecab-strerror |
|---|
| 45 | (c-function libmecab char* mecab_strerror void*)) |
|---|
| 46 | (define mecab-destroy |
|---|
| 47 | (c-function libmecab void mecab_destroy void*)) |
|---|
| 48 | |
|---|
| 49 | ;; パラメータ変更系 |
|---|
| 50 | (define mecab-get-partial |
|---|
| 51 | (c-function libmecab int mecab_get_partial void*)) |
|---|
| 52 | (define mecab-set-partial! |
|---|
| 53 | (c-function libmecab void mecab_set_partial void* int)) |
|---|
| 54 | ;(define mecab-get-theta |
|---|
| 55 | ; (c-function libmecab float mecab_get_theta void*)) |
|---|
| 56 | ;(define mecab-set-theta! |
|---|
| 57 | ; (c-function libmecab void mecab_set_theta void* float)) |
|---|
| 58 | (define mecab-get-lattice-level |
|---|
| 59 | (c-function libmecab int mecab_get_lattice_level void*)) |
|---|
| 60 | (define mecab-set-lattice-level! |
|---|
| 61 | (c-function libmecab int mecab_set_lattice_level void* int)) |
|---|
| 62 | (define mecab-get-all-morphs |
|---|
| 63 | (c-function libmecab int mecab_get_all_morphs void*)) |
|---|
| 64 | (define mecab-set-all-morphs! |
|---|
| 65 | (c-function libmecab void mecab_set_all_morphs void* int)) |
|---|
| 66 | |
|---|
| 67 | (define mecab-sparse-tostr |
|---|
| 68 | (c-function libmecab char* mecab_sparse_tostr void* char*)) |
|---|
| 69 | (define mecab-sparse-tostr2 |
|---|
| 70 | (c-function libmecab char* mecab_sparse_tostr void* char* int)) |
|---|
| 71 | ;(define mecab-sparse-tostr3 |
|---|
| 72 | ; (c-function libmecab char* mecab_sparse_tostr void* char* int char* int)) |
|---|
| 73 | (define mecab-sparse-tonode ; mecab_node_t* を返す |
|---|
| 74 | (c-function libmecab void* mecab_sparse_tonode void* char*)) ;; (m, str) |
|---|
| 75 | (define mecab-sparse-tonode2 ; mecab_node_t* を返す |
|---|
| 76 | (c-function libmecab void* mecab_sparse_tonode2 void* char* int)) ;; (m str len) |
|---|
| 77 | |
|---|
| 78 | (define mecab-nbest-sparse-tostr |
|---|
| 79 | (c-function libmecab char* mecab_nbest_sparse_tostr void* int char*)) |
|---|
| 80 | (define mecab-nbest-sparse-tostr2 |
|---|
| 81 | (c-function libmecab char* mecab_nbest_sparse_tostr2 void* int char* int)) |
|---|
| 82 | ;(define mecab-nbest-sparse-tostr3 |
|---|
| 83 | ; (c-function libmecab char* mecab_nbest_sparse_tostr3 void* int char int char* int)) |
|---|
| 84 | (define mecab-nbest-init |
|---|
| 85 | (c-function libmecab int mecab_nbest_init void* char*)) |
|---|
| 86 | (define mecab-nbest-init2 |
|---|
| 87 | (c-function libmecab int mecab_nbest_init2 void* char* int)) |
|---|
| 88 | (define mecab-nbest-next-tostr |
|---|
| 89 | (c-function libmecab char* mecab_nbest_next_tostr void*)) |
|---|
| 90 | (define mecab-nbest-next-tostr2 |
|---|
| 91 | (c-function libmecab char* mecab_nbest_next_tostr2 void* char* int)) |
|---|
| 92 | (define mecab-nbest-next-tonode ; mecab_node_t* |
|---|
| 93 | (c-function libmecab void* mecab_nbest_next_tonode void*)) |
|---|
| 94 | (define mecab-format-node |
|---|
| 95 | (c-function libmecab char* mecab_format_node void* void*)) ; (mecab node) |
|---|
| 96 | (define mecab-dictionary-info ; mecab_dictionary_info_t* を返す |
|---|
| 97 | (c-function libmecab void* mecab_dictionary_info void*)) |
|---|
| 98 | |
|---|
| 99 | ;; APIs not supported: |
|---|
| 100 | ;; MECAB_DLL_EXTERN int mecab_do (int argc, char **argv); |
|---|
| 101 | ;; MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv); |
|---|
| 102 | ;; MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv); |
|---|
| 103 | ;; MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv); |
|---|
| 104 | ;; MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv); |
|---|
| 105 | ;; MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv); |
|---|
| 106 | ;; MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv); |
|---|
| 107 | |
|---|
| 108 | ;; |
|---|
| 109 | ;; mecab_node_t |
|---|
| 110 | ;; |
|---|
| 111 | (define (mecab-node-prev node-ptr) (pointer-ref node-ptr 0)) |
|---|
| 112 | (define (mecab-node-next node-ptr) (pointer-ref node-ptr 1)) |
|---|
| 113 | (define (mecab-node-enext node-ptr) (pointer-ref node-ptr 2)) |
|---|
| 114 | (define (mecab-node-bnext node-ptr) (pointer-ref node-ptr 3)) |
|---|
| 115 | (define (mecab-node-surface node-ptr) |
|---|
| 116 | (pointer->string* (pointer-ref node-ptr 8) |
|---|
| 117 | (mecab-node-length node-ptr)) ) |
|---|
| 118 | (define (mecab-node-feature node-ptr) |
|---|
| 119 | ; (string-tokenize |
|---|
| 120 | (map (lambda (s) (if (string=? "*" s) #f s)) |
|---|
| 121 | (string-split (pointer->string (pointer-ref node-ptr 9)) #\,))) |
|---|
| 122 | |
|---|
| 123 | (define (mecab-node-id node-ptr) |
|---|
| 124 | (pointer-ref node-ptr 10)) |
|---|
| 125 | (define (mecab-node-length node-ptr) |
|---|
| 126 | (bitwise-bit-field (pointer-ref node-ptr 11) 0 16)) |
|---|
| 127 | (define (mecab-node-rlength node-ptr) |
|---|
| 128 | (bitwise-bit-field (pointer-ref node-ptr 11) 16 32)) |
|---|
| 129 | (define (mecab-node-rc-attr node-ptr) |
|---|
| 130 | (bitwise-bit-field (pointer-ref node-ptr 12) 0 16)) |
|---|
| 131 | (define (mecab-node-lc-attr node-ptr) |
|---|
| 132 | (bitwise-bit-field (pointer-ref node-ptr 12) 16 32)) |
|---|
| 133 | (define (mecab-node-posid node-ptr) |
|---|
| 134 | (bitwise-bit-field (pointer-ref node-ptr 13) 0 16)) |
|---|
| 135 | (define (mecab-node-char-type node-ptr) |
|---|
| 136 | (bitwise-bit-field (pointer-ref node-ptr 13) 16 24)) |
|---|
| 137 | (define (mecab-node-stat node-ptr) |
|---|
| 138 | (case (bitwise-bit-field (pointer-ref node-ptr 13) 24 32) |
|---|
| 139 | [(0) 'mecab-nor-node] |
|---|
| 140 | [(1) 'mecab-unk-node] |
|---|
| 141 | [(2) 'mecab-bos-node] |
|---|
| 142 | [(3) 'mecab-eos-node])) |
|---|
| 143 | (define (mecab-node-normal? node-ptr) |
|---|
| 144 | (eq? 'mecab-nor-node (mecab-node-stat node-ptr))) |
|---|
| 145 | (define (mecab-node-unknown? node-ptr) |
|---|
| 146 | (eq? 'mecab-unk-node (mecab-node-stat node-ptr))) |
|---|
| 147 | (define (mecab-node-bos? node-ptr) |
|---|
| 148 | (eq? 'mecab-bos-node (mecab-node-stat node-ptr))) |
|---|
| 149 | (define (mecab-node-eos? node-ptr) |
|---|
| 150 | (eq? 'mecab-eos-node (mecab-node-stat node-ptr))) |
|---|
| 151 | (define (mecab-node-best? node-ptr) |
|---|
| 152 | (bitwise-bit-set? (pointer-ref node-ptr 14) 0)) |
|---|
| 153 | (define (mecab-node-sentence-length node-ptr) ; available only when BOS |
|---|
| 154 | (pointer-ref node-ptr 15)) |
|---|
| 155 | ;(define (mecab-node-alpha node-ptr) |
|---|
| 156 | ; (pointer-ref node-ptr 16)) |
|---|
| 157 | ;(define (mecab-node-beta node-ptr) |
|---|
| 158 | ; (pointer-ref node-ptr 17)) |
|---|
| 159 | ;(define (mecab-node-prob node-ptr) |
|---|
| 160 | ; (pointer-ref node-ptr 18)) |
|---|
| 161 | (define (mecab-node-wcost node-ptr) |
|---|
| 162 | (bitwise-bit-field (pointer-ref node-ptr 19) 0 16)) |
|---|
| 163 | (define (mecab-node-cost node-ptr) |
|---|
| 164 | (pointer-ref node-ptr 20)) |
|---|
| 165 | ;(define (mecab-node-token node-ptr) |
|---|
| 166 | ; (pointer-ref node-ptr 21)) |
|---|
| 167 | |
|---|
| 168 | ;; |
|---|
| 169 | ;; utilities by naoya_t |
|---|
| 170 | ;; |
|---|
| 171 | (define (string->utf8z str) |
|---|
| 172 | ;;文字列をutf-8なbytevectorに変換。文字列側に\x0;があっても無視されるので、変換後に末尾に0を足す |
|---|
| 173 | (let* ([u8 (string->utf8 str)] |
|---|
| 174 | [len (bytevector-length u8)] |
|---|
| 175 | [u8z (make-bytevector (+ len 1))]) |
|---|
| 176 | ;; (bytevector-copy u8z u8) ;memcpy的なのはどうすればいい |
|---|
| 177 | ; (format #t "len: ~d\n" len) |
|---|
| 178 | (let loop ((i 0)) |
|---|
| 179 | (when (< i len) |
|---|
| 180 | (bytevector-u8-set! u8z i (bytevector-u8-ref u8 i)) |
|---|
| 181 | (loop (+ i 1)))) |
|---|
| 182 | (bytevector-u8-set! u8z len 0) |
|---|
| 183 | u8z)) |
|---|
| 184 | |
|---|
| 185 | (define (read-from-ptr ptr bvec words) |
|---|
| 186 | (let loop ((i 0)) |
|---|
| 187 | (when (< i words) |
|---|
| 188 | (let ((word (pointer-ref ptr i))) |
|---|
| 189 | (bytevector-uint-set! bvec (* i 4) word (endianness little) 4) |
|---|
| 190 | (loop (+ i 1)))))) |
|---|
| 191 | |
|---|
| 192 | (define (pointer->string* ptr len) |
|---|
| 193 | (let* ([words (quotient (+ len 4) 4)] |
|---|
| 194 | [bvec (make-bytevector (* words 4))]) |
|---|
| 195 | ; (format #t "(pointer->string* ptr:~a len:~d words:~d bvec:~a)\n" |
|---|
| 196 | ; ptr len words bvec) |
|---|
| 197 | (read-from-ptr ptr bvec words) |
|---|
| 198 | (bytevector-u8-set! bvec len 0) |
|---|
| 199 | (utf8->string bvec))) |
|---|
| 200 | |
|---|
| 201 | |
|---|
| 202 | ;; from 逆引きScheme |
|---|
| 203 | (define (string-split-by-char str spliter) |
|---|
| 204 | (let loop ((ls (string->list str)) (buf '()) (ret '())) |
|---|
| 205 | (if (pair? ls) |
|---|
| 206 | (if (char=? (car ls) spliter) |
|---|
| 207 | (loop (cdr ls) '() (cons (list->string (reverse buf)) ret)) |
|---|
| 208 | (loop (cdr ls) (cons (car ls) buf) ret)) |
|---|
| 209 | (reverse (cons (list->string (reverse buf)) ret))))) |
|---|
| 210 | |
|---|
| 211 | (define (string-split-by-string str spliter) |
|---|
| 212 | (if (zero? (string-length spliter)) |
|---|
| 213 | (list str) |
|---|
| 214 | (let ((spl (string->list spliter))) |
|---|
| 215 | (let loop ((ls (string->list str)) (sp spl) (tmp '()) (buf '()) (ret '())) |
|---|
| 216 | (if (pair? sp) |
|---|
| 217 | (if (pair? ls) |
|---|
| 218 | (if (char=? (car ls) (car sp)) |
|---|
| 219 | (loop (cdr ls) (cdr sp) (cons (car ls) tmp) buf ret) |
|---|
| 220 | (loop (cdr ls) spl '() (cons (car ls) (append tmp buf)) ret)) |
|---|
| 221 | (reverse (cons (list->string (reverse (append tmp buf))) ret))) |
|---|
| 222 | (loop ls spl '() '() (cons (list->string (reverse buf)) ret))))))) |
|---|
| 223 | |
|---|
| 224 | (define (string-split str spliter) |
|---|
| 225 | (cond [(char? spliter) (string-split-by-char str spliter)] |
|---|
| 226 | [(string? spliter) (string-split-by-string str spliter)] |
|---|
| 227 | [else #f])) |
|---|
| 228 | ) |
|---|