1 | (library (binding mecab-ffi) |
---|
2 | (export mecab-new2 |
---|
3 | mecab-version |
---|
4 | mecab-strerror |
---|
5 | mecab-destroy |
---|
6 | mecab-get-partial mecab-set-partial! |
---|
7 | ;;mecab-get-theta mecab-set-theta! |
---|
8 | mecab-get-lattice-level mecab-set-lattice-level! |
---|
9 | mecab-get-all-morphs mecab-set-all-morphs! |
---|
10 | mecab-sparse-tostr mecab-sparse-tostr2 ;mecab-sparse-tostr3 |
---|
11 | mecab-sparse-tonode mecab-sparse-tonode2 |
---|
12 | mecab-nbest-sparse-tostr mecab-nbest-sparse-tostr2 ;mecab-nbest-sparse-tostr3 |
---|
13 | mecab-nbest-init mecab-nbest-init2 |
---|
14 | mecab-nbest-next-tostr mecab-nbest-next-tostr2 |
---|
15 | mecab-nbest-next-tonode |
---|
16 | mecab-format-node |
---|
17 | mecab-dictionary-info |
---|
18 | |
---|
19 | mecab-node-prev mecab-node-next mecab-node-enext mecab-node-bnext |
---|
20 | mecab-node-surface mecab-node-feature mecab-node-id |
---|
21 | mecab-node-length mecab-node-rlength |
---|
22 | mecab-node-rc-attr mecab-node-lc-attr |
---|
23 | mecab-node-posid mecab-node-char-type |
---|
24 | mecab-node-stat mecab-node-normal? mecab-node-unknown? mecab-node-bos? mecab-node-eos? |
---|
25 | mecab-node-best? |
---|
26 | mecab-node-sentence-length |
---|
27 | ;; mecab-node-alpha mecab-node-beta mecab-node-prob |
---|
28 | mecab-node-wcost mecab-node-cost |
---|
29 | ;; mecab-node-token |
---|
30 | |
---|
31 | string->utf8z |
---|
32 | ) |
---|
33 | (import (rnrs) |
---|
34 | (rnrs r5rs) |
---|
35 | (mosh ffi) |
---|
36 | ) |
---|
37 | |
---|
38 | (define libmecab (open-shared-library "/usr/local/lib/libmecab.1.dylib")) |
---|
39 | |
---|
40 | (define mecab-new2 |
---|
41 | (c-function libmecab void* mecab_new2 char*)) |
---|
42 | (define mecab-version |
---|
43 | (c-function libmecab char* mecab_version)) |
---|
44 | (define mecab-strerror |
---|
45 | (c-function libmecab char* mecab_strerror void*)) |
---|
46 | (define mecab-destroy |
---|
47 | (c-function libmecab void mecab_destroy void*)) |
---|
48 | |
---|
49 | ;; パラメータ変更系 |
---|
50 | (define mecab-get-partial |
---|
51 | (c-function libmecab int mecab_get_partial void*)) |
---|
52 | (define mecab-set-partial! |
---|
53 | (c-function libmecab void mecab_set_partial void* int)) |
---|
54 | ;(define mecab-get-theta |
---|
55 | ; (c-function libmecab float mecab_get_theta void*)) |
---|
56 | ;(define mecab-set-theta! |
---|
57 | ; (c-function libmecab void mecab_set_theta void* float)) |
---|
58 | (define mecab-get-lattice-level |
---|
59 | (c-function libmecab int mecab_get_lattice_level void*)) |
---|
60 | (define mecab-set-lattice-level! |
---|
61 | (c-function libmecab int mecab_set_lattice_level void* int)) |
---|
62 | (define mecab-get-all-morphs |
---|
63 | (c-function libmecab int mecab_get_all_morphs void*)) |
---|
64 | (define mecab-set-all-morphs! |
---|
65 | (c-function libmecab void mecab_set_all_morphs void* int)) |
---|
66 | |
---|
67 | (define mecab-sparse-tostr |
---|
68 | (c-function libmecab char* mecab_sparse_tostr void* char*)) |
---|
69 | (define mecab-sparse-tostr2 |
---|
70 | (c-function libmecab char* mecab_sparse_tostr void* char* int)) |
---|
71 | ;(define mecab-sparse-tostr3 |
---|
72 | ; (c-function libmecab char* mecab_sparse_tostr void* char* int char* int)) |
---|
73 | (define mecab-sparse-tonode ; mecab_node_t* を返す |
---|
74 | (c-function libmecab void* mecab_sparse_tonode void* char*)) ;; (m, str) |
---|
75 | (define mecab-sparse-tonode2 ; mecab_node_t* を返す |
---|
76 | (c-function libmecab void* mecab_sparse_tonode2 void* char* int)) ;; (m str len) |
---|
77 | |
---|
78 | (define mecab-nbest-sparse-tostr |
---|
79 | (c-function libmecab char* mecab_nbest_sparse_tostr void* int char*)) |
---|
80 | (define mecab-nbest-sparse-tostr2 |
---|
81 | (c-function libmecab char* mecab_nbest_sparse_tostr2 void* int char* int)) |
---|
82 | ;(define mecab-nbest-sparse-tostr3 |
---|
83 | ; (c-function libmecab char* mecab_nbest_sparse_tostr3 void* int char int char* int)) |
---|
84 | (define mecab-nbest-init |
---|
85 | (c-function libmecab int mecab_nbest_init void* char*)) |
---|
86 | (define mecab-nbest-init2 |
---|
87 | (c-function libmecab int mecab_nbest_init2 void* char* int)) |
---|
88 | (define mecab-nbest-next-tostr |
---|
89 | (c-function libmecab char* mecab_nbest_next_tostr void*)) |
---|
90 | (define mecab-nbest-next-tostr2 |
---|
91 | (c-function libmecab char* mecab_nbest_next_tostr2 void* char* int)) |
---|
92 | (define mecab-nbest-next-tonode ; mecab_node_t* |
---|
93 | (c-function libmecab void* mecab_nbest_next_tonode void*)) |
---|
94 | (define mecab-format-node |
---|
95 | (c-function libmecab char* mecab_format_node void* void*)) ; (mecab node) |
---|
96 | (define mecab-dictionary-info ; mecab_dictionary_info_t* を返す |
---|
97 | (c-function libmecab void* mecab_dictionary_info void*)) |
---|
98 | |
---|
99 | ;; APIs not supported: |
---|
100 | ;; MECAB_DLL_EXTERN int mecab_do (int argc, char **argv); |
---|
101 | ;; MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv); |
---|
102 | ;; MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv); |
---|
103 | ;; MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv); |
---|
104 | ;; MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv); |
---|
105 | ;; MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv); |
---|
106 | ;; MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv); |
---|
107 | |
---|
108 | ;; |
---|
109 | ;; mecab_node_t |
---|
110 | ;; |
---|
111 | (define (mecab-node-prev node-ptr) (pointer-ref node-ptr 0)) |
---|
112 | (define (mecab-node-next node-ptr) (pointer-ref node-ptr 1)) |
---|
113 | (define (mecab-node-enext node-ptr) (pointer-ref node-ptr 2)) |
---|
114 | (define (mecab-node-bnext node-ptr) (pointer-ref node-ptr 3)) |
---|
115 | (define (mecab-node-surface node-ptr) |
---|
116 | (pointer->string* (pointer-ref node-ptr 8) |
---|
117 | (mecab-node-length node-ptr)) ) |
---|
118 | (define (mecab-node-feature node-ptr) |
---|
119 | ; (string-tokenize |
---|
120 | (map (lambda (s) (if (string=? "*" s) #f s)) |
---|
121 | (string-split (pointer->string (pointer-ref node-ptr 9)) #\,))) |
---|
122 | |
---|
123 | (define (mecab-node-id node-ptr) |
---|
124 | (pointer-ref node-ptr 10)) |
---|
125 | (define (mecab-node-length node-ptr) |
---|
126 | (bitwise-bit-field (pointer-ref node-ptr 11) 0 16)) |
---|
127 | (define (mecab-node-rlength node-ptr) |
---|
128 | (bitwise-bit-field (pointer-ref node-ptr 11) 16 32)) |
---|
129 | (define (mecab-node-rc-attr node-ptr) |
---|
130 | (bitwise-bit-field (pointer-ref node-ptr 12) 0 16)) |
---|
131 | (define (mecab-node-lc-attr node-ptr) |
---|
132 | (bitwise-bit-field (pointer-ref node-ptr 12) 16 32)) |
---|
133 | (define (mecab-node-posid node-ptr) |
---|
134 | (bitwise-bit-field (pointer-ref node-ptr 13) 0 16)) |
---|
135 | (define (mecab-node-char-type node-ptr) |
---|
136 | (bitwise-bit-field (pointer-ref node-ptr 13) 16 24)) |
---|
137 | (define (mecab-node-stat node-ptr) |
---|
138 | (case (bitwise-bit-field (pointer-ref node-ptr 13) 24 32) |
---|
139 | [(0) 'mecab-nor-node] |
---|
140 | [(1) 'mecab-unk-node] |
---|
141 | [(2) 'mecab-bos-node] |
---|
142 | [(3) 'mecab-eos-node])) |
---|
143 | (define (mecab-node-normal? node-ptr) |
---|
144 | (eq? 'mecab-nor-node (mecab-node-stat node-ptr))) |
---|
145 | (define (mecab-node-unknown? node-ptr) |
---|
146 | (eq? 'mecab-unk-node (mecab-node-stat node-ptr))) |
---|
147 | (define (mecab-node-bos? node-ptr) |
---|
148 | (eq? 'mecab-bos-node (mecab-node-stat node-ptr))) |
---|
149 | (define (mecab-node-eos? node-ptr) |
---|
150 | (eq? 'mecab-eos-node (mecab-node-stat node-ptr))) |
---|
151 | (define (mecab-node-best? node-ptr) |
---|
152 | (bitwise-bit-set? (pointer-ref node-ptr 14) 0)) |
---|
153 | (define (mecab-node-sentence-length node-ptr) ; available only when BOS |
---|
154 | (pointer-ref node-ptr 15)) |
---|
155 | ;(define (mecab-node-alpha node-ptr) |
---|
156 | ; (pointer-ref node-ptr 16)) |
---|
157 | ;(define (mecab-node-beta node-ptr) |
---|
158 | ; (pointer-ref node-ptr 17)) |
---|
159 | ;(define (mecab-node-prob node-ptr) |
---|
160 | ; (pointer-ref node-ptr 18)) |
---|
161 | (define (mecab-node-wcost node-ptr) |
---|
162 | (bitwise-bit-field (pointer-ref node-ptr 19) 0 16)) |
---|
163 | (define (mecab-node-cost node-ptr) |
---|
164 | (pointer-ref node-ptr 20)) |
---|
165 | ;(define (mecab-node-token node-ptr) |
---|
166 | ; (pointer-ref node-ptr 21)) |
---|
167 | |
---|
168 | ;; |
---|
169 | ;; utilities by naoya_t |
---|
170 | ;; |
---|
171 | (define (string->utf8z str) |
---|
172 | ;;文字列をutf-8なbytevectorに変換。文字列側に\x0;があっても無視されるので、変換後に末尾に0を足す |
---|
173 | (let* ([u8 (string->utf8 str)] |
---|
174 | [len (bytevector-length u8)] |
---|
175 | [u8z (make-bytevector (+ len 1))]) |
---|
176 | ;; (bytevector-copy u8z u8) ;memcpy的なのはどうすればいい |
---|
177 | ; (format #t "len: ~d\n" len) |
---|
178 | (let loop ((i 0)) |
---|
179 | (when (< i len) |
---|
180 | (bytevector-u8-set! u8z i (bytevector-u8-ref u8 i)) |
---|
181 | (loop (+ i 1)))) |
---|
182 | (bytevector-u8-set! u8z len 0) |
---|
183 | u8z)) |
---|
184 | |
---|
185 | (define (read-from-ptr ptr bvec words) |
---|
186 | (let loop ((i 0)) |
---|
187 | (when (< i words) |
---|
188 | (let ((word (pointer-ref ptr i))) |
---|
189 | (bytevector-uint-set! bvec (* i 4) word (endianness little) 4) |
---|
190 | (loop (+ i 1)))))) |
---|
191 | |
---|
192 | (define (pointer->string* ptr len) |
---|
193 | (let* ([words (quotient (+ len 4) 4)] |
---|
194 | [bvec (make-bytevector (* words 4))]) |
---|
195 | ; (format #t "(pointer->string* ptr:~a len:~d words:~d bvec:~a)\n" |
---|
196 | ; ptr len words bvec) |
---|
197 | (read-from-ptr ptr bvec words) |
---|
198 | (bytevector-u8-set! bvec len 0) |
---|
199 | (utf8->string bvec))) |
---|
200 | |
---|
201 | |
---|
202 | ;; from 逆引きScheme |
---|
203 | (define (string-split-by-char str spliter) |
---|
204 | (let loop ((ls (string->list str)) (buf '()) (ret '())) |
---|
205 | (if (pair? ls) |
---|
206 | (if (char=? (car ls) spliter) |
---|
207 | (loop (cdr ls) '() (cons (list->string (reverse buf)) ret)) |
---|
208 | (loop (cdr ls) (cons (car ls) buf) ret)) |
---|
209 | (reverse (cons (list->string (reverse buf)) ret))))) |
---|
210 | |
---|
211 | (define (string-split-by-string str spliter) |
---|
212 | (if (zero? (string-length spliter)) |
---|
213 | (list str) |
---|
214 | (let ((spl (string->list spliter))) |
---|
215 | (let loop ((ls (string->list str)) (sp spl) (tmp '()) (buf '()) (ret '())) |
---|
216 | (if (pair? sp) |
---|
217 | (if (pair? ls) |
---|
218 | (if (char=? (car ls) (car sp)) |
---|
219 | (loop (cdr ls) (cdr sp) (cons (car ls) tmp) buf ret) |
---|
220 | (loop (cdr ls) spl '() (cons (car ls) (append tmp buf)) ret)) |
---|
221 | (reverse (cons (list->string (reverse (append tmp buf))) ret))) |
---|
222 | (loop ls spl '() '() (cons (list->string (reverse buf)) ret))))))) |
---|
223 | |
---|
224 | (define (string-split str spliter) |
---|
225 | (cond [(char? spliter) (string-split-by-char str spliter)] |
---|
226 | [(string? spliter) (string-split-by-string str spliter)] |
---|
227 | [else #f])) |
---|
228 | ) |
---|