1 | (library (binding mecab-ffi) |
---|
2 | (export mecab-new2 |
---|
3 | mecab-version |
---|
4 | mecab-strerror |
---|
5 | mecab-destroy |
---|
6 | mecab-get-partial mecab-set-partial! |
---|
7 | ;;mecab-get-theta mecab-set-theta! |
---|
8 | mecab-get-lattice-level mecab-set-lattice-level! |
---|
9 | mecab-get-all-morphs mecab-set-all-morphs! |
---|
10 | mecab-sparse-tostr mecab-sparse-tostr2 ;mecab-sparse-tostr3 |
---|
11 | mecab-sparse-tonode mecab-sparse-tonode2 |
---|
12 | mecab-nbest-sparse-tostr mecab-nbest-sparse-tostr2 ;mecab-nbest-sparse-tostr3 |
---|
13 | mecab-nbest-init mecab-nbest-init2 |
---|
14 | mecab-nbest-next-tostr mecab-nbest-next-tostr2 |
---|
15 | mecab-nbest-next-tonode |
---|
16 | mecab-format-node |
---|
17 | mecab-dictionary-info |
---|
18 | |
---|
19 | mecab-node-prev mecab-node-next mecab-node-enext mecab-node-bnext |
---|
20 | mecab-node-surface mecab-node-feature mecab-node-id |
---|
21 | mecab-node-length mecab-node-rlength |
---|
22 | mecab-node-rc-attr mecab-node-lc-attr |
---|
23 | mecab-node-posid mecab-node-char-type |
---|
24 | mecab-node-stat mecab-node-normal? mecab-node-unknown? mecab-node-bos? mecab-node-eos? |
---|
25 | mecab-node-best? |
---|
26 | mecab-node-sentence-length |
---|
27 | ;; mecab-node-alpha mecab-node-beta mecab-node-prob |
---|
28 | mecab-node-wcost mecab-node-cost |
---|
29 | mecab-node-token |
---|
30 | |
---|
31 | ; string->utf8z |
---|
32 | ) |
---|
33 | (import (rnrs) (core) |
---|
34 | (rnrs r5rs) |
---|
35 | (ypsilon ffi)) |
---|
36 | |
---|
37 | (define libmecab (load-shared-object "libmecab.1.dylib")) |
---|
38 | |
---|
39 | ;(define-c-typedef mecab-t* void*) |
---|
40 | (define-c-typedef mecab-node-t* void*) |
---|
41 | (define-c-typedef mecab-node-t** void*) |
---|
42 | (define-c-typedef mecab-path-t* void*) |
---|
43 | (define-c-typedef mecab-token-t* void*) |
---|
44 | |
---|
45 | (define-c-struct-type mecab-node-t |
---|
46 | (mecab-node-t* prev) |
---|
47 | (mecab-node-t* next) |
---|
48 | (mecab-node-t* enext) |
---|
49 | (mecab-node-t* bnext) |
---|
50 | (mecab-path-t* rpath) |
---|
51 | (mecab-path-t* lpath) |
---|
52 | (mecab-node-t** begin-node-list) |
---|
53 | (mecab-node-t** end-node-list) |
---|
54 | (char* surface) |
---|
55 | (char* feature) |
---|
56 | (int id) |
---|
57 | (short length) |
---|
58 | (short rlength) |
---|
59 | (short rc-attr) |
---|
60 | (short lc-attr) |
---|
61 | (short posid) |
---|
62 | (char char-type) |
---|
63 | (char stat) |
---|
64 | (char is-best) |
---|
65 | (int sentence-length) |
---|
66 | (int alpha) ; float |
---|
67 | (int beta) ; float |
---|
68 | (int prob) ; float |
---|
69 | ; (float alpha) ;; => internal inconsistency |
---|
70 | ; (float beta) ;; => internal inconsistency |
---|
71 | ; (float prob) ;; => internal inconsistency |
---|
72 | (short wcost) |
---|
73 | (long cost) |
---|
74 | (mecab-token-t* token)) |
---|
75 | |
---|
76 | (define (compose f g) (lambda args (f (apply g args)))) |
---|
77 | |
---|
78 | (define sizeof-mecab-node-t 88) |
---|
79 | (define (void*->mecab-node-t* void*-ptr) |
---|
80 | (make-bytevector-mapping void*-ptr sizeof-mecab-node-t)) |
---|
81 | (define (char*->string char*-ptr . args) |
---|
82 | (if (null? args) |
---|
83 | (car (string-split (char*->string char*-ptr 255) #\x0)) ;; |
---|
84 | (let ((len (car args))) |
---|
85 | (utf8->string (make-bytevector-mapping char*-ptr len)) |
---|
86 | ))) |
---|
87 | |
---|
88 | (define mecab-new2 |
---|
89 | (c-function libmecab "libmecab" void* mecab_new2 (char*))) |
---|
90 | (define mecab-version |
---|
91 | (c-function libmecab "libmecab" char* mecab_version ())) |
---|
92 | (define mecab-strerror |
---|
93 | (c-function libmecab "libmecab" char* mecab_strerror (void*))) |
---|
94 | (define mecab-destroy |
---|
95 | (c-function libmecab "libmecab" void mecab_destroy (void*))) |
---|
96 | |
---|
97 | ;; パラメータ変更系 |
---|
98 | (define mecab-get-partial |
---|
99 | (c-function libmecab "libmecab" int mecab_get_partial (void*))) |
---|
100 | (define mecab-set-partial! |
---|
101 | (c-function libmecab "libmecab" void mecab_set_partial (void* int))) |
---|
102 | ;(define mecab-get-theta |
---|
103 | ; (c-function libmecab "libmecab" float mecab_get_theta (void*))) |
---|
104 | ;(define mecab-set-theta! |
---|
105 | ; (c-function libmecab "libmecab" void mecab_set_theta (void* float))) |
---|
106 | (define mecab-get-lattice-level |
---|
107 | (c-function libmecab "libmecab" int mecab_get_lattice_level (void*))) |
---|
108 | (define mecab-set-lattice-level! |
---|
109 | (c-function libmecab "libmecab" int mecab_set_lattice_level (void* int))) |
---|
110 | (define mecab-get-all-morphs |
---|
111 | (c-function libmecab "libmecab" int mecab_get_all_morphs (void*))) |
---|
112 | (define mecab-set-all-morphs! |
---|
113 | (c-function libmecab "libmecab" void mecab_set_all_morphs (void* int))) |
---|
114 | |
---|
115 | (define mecab-sparse-tostr |
---|
116 | (c-function libmecab "libmecab" char* mecab_sparse_tostr (void* char*))) |
---|
117 | (define mecab-sparse-tostr2 |
---|
118 | (c-function libmecab "libmecab" char* mecab_sparse_tostr (void* char* int))) |
---|
119 | ;(define mecab-sparse-tostr3 |
---|
120 | ; (c-function libmecab "libmecab" char* mecab_sparse_tostr (void* char* int char* int))) |
---|
121 | (define mecab-sparse-tonode |
---|
122 | (compose void*->mecab-node-t* |
---|
123 | (c-function libmecab "libmecab" void* mecab_sparse_tonode (void* char*)))) |
---|
124 | (define mecab-sparse-tonode2 |
---|
125 | (compose void*->mecab-node-t* |
---|
126 | (c-function libmecab "libmecab" void* mecab_sparse_tonode2 (void* char* int)))) |
---|
127 | ;(define (mecab-sparse-tonode m str); mecab_node_t* を返す |
---|
128 | ; (void*->mecab-node-t* (mecab-sparse-tonode__ m str))) |
---|
129 | ;(define (mecab-sparse-tonode2 m str len); mecab_node_t* を返す |
---|
130 | ; (void*->mecab-node-t* (mecab-sparse-tonode2__ m str len))) |
---|
131 | |
---|
132 | (define mecab-nbest-sparse-tostr |
---|
133 | (c-function libmecab "libmecab" char* mecab_nbest_sparse_tostr (void* int char*))) |
---|
134 | (define mecab-nbest-sparse-tostr2 |
---|
135 | (c-function libmecab "libmecab" char* mecab_nbest_sparse_tostr2 (void* int char* int))) |
---|
136 | ;(define mecab-nbest-sparse-tostr3 |
---|
137 | ; (c-function libmecab "libmecab" char* mecab_nbest_sparse_tostr3 (void* int char int char* int))) |
---|
138 | (define mecab-nbest-init |
---|
139 | (c-function libmecab "libmecab" int mecab_nbest_init (void* char*))) |
---|
140 | (define mecab-nbest-init2 |
---|
141 | (c-function libmecab "libmecab" int mecab_nbest_init2 (void* char* int))) |
---|
142 | (define mecab-nbest-next-tostr |
---|
143 | (c-function libmecab "libmecab" char* mecab_nbest_next_tostr (void*))) |
---|
144 | (define mecab-nbest-next-tostr2 |
---|
145 | (c-function libmecab "libmecab" char* mecab_nbest_next_tostr2 (void* char* int))) |
---|
146 | (define mecab-nbest-next-tonode ; mecab_node_t* |
---|
147 | (c-function libmecab "libmecab" void* mecab_nbest_next_tonode (void*))) |
---|
148 | (define mecab-format-node |
---|
149 | (c-function libmecab "libmecab" char* mecab_format_node (void* void*))) ; (mecab node) |
---|
150 | (define mecab-dictionary-info ; mecab_dictionary_info_t* を返す |
---|
151 | (c-function libmecab "libmecab" void* mecab_dictionary_info (void*))) |
---|
152 | |
---|
153 | ;; APIs not supported: |
---|
154 | ;; MECAB_DLL_EXTERN int mecab_do (int argc, char **argv); |
---|
155 | ;; MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv); |
---|
156 | ;; MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv); |
---|
157 | ;; MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv); |
---|
158 | ;; MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv); |
---|
159 | ;; MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv); |
---|
160 | ;; MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv); |
---|
161 | |
---|
162 | |
---|
163 | ;; |
---|
164 | ;; mecab_node_t |
---|
165 | ;; |
---|
166 | (define (mecab-node-prev node) (void*->mecab-node-t* (mecab-node-t-prev node))) |
---|
167 | (define (mecab-node-next node) (void*->mecab-node-t* (mecab-node-t-next node))) |
---|
168 | (define (mecab-node-enext node) (void*->mecab-node-t* (mecab-node-t-enext node))) |
---|
169 | (define (mecab-node-bnext node) (void*->mecab-node-t* (mecab-node-t-bnext node))) |
---|
170 | (define (mecab-node-surface node) |
---|
171 | (char*->string (mecab-node-t-surface node) (mecab-node-t-length node))) |
---|
172 | (define (mecab-node-feature node) |
---|
173 | (let ((feature (char*->string (mecab-node-t-feature node)))) |
---|
174 | (map (lambda (s) (if (string=? "*" s) #f s)) |
---|
175 | (string-split feature #\,)))) |
---|
176 | |
---|
177 | (define (mecab-node-id node) (mecab-node-t-id node)) |
---|
178 | (define (mecab-node-length node) (mecab-node-t-length node)) |
---|
179 | (define (mecab-node-rlength node) (mecab-node-t-rlength node)) |
---|
180 | (define (mecab-node-rc-attr node) (mecab-node-t-rc-attr node)) |
---|
181 | (define (mecab-node-lc-attr node) (mecab-node-t-lc-attr node)) |
---|
182 | (define (mecab-node-posid node) (mecab-node-t-posid node)) |
---|
183 | (define (mecab-node-char-type node) (mecab-node-t-char-type node)) |
---|
184 | (define (mecab-node-stat node) |
---|
185 | (case (mecab-node-t-stat node) |
---|
186 | [(0) 'mecab-nor-node] |
---|
187 | [(1) 'mecab-unk-node] |
---|
188 | [(2) 'mecab-bos-node] |
---|
189 | [(3) 'mecab-eos-node])) |
---|
190 | |
---|
191 | (define (mecab-node-normal? node) (eq? 'mecab-nor-node (mecab-node-stat node))) |
---|
192 | (define (mecab-node-unknown? node) (eq? 'mecab-unk-node (mecab-node-stat node))) |
---|
193 | (define (mecab-node-bos? node) (eq? 'mecab-bos-node (mecab-node-stat node))) |
---|
194 | (define (mecab-node-eos? node) (eq? 'mecab-eos-node (mecab-node-stat node))) |
---|
195 | (define (mecab-node-best? node) (= 1 (mecab-node-t-is-best node))) |
---|
196 | (define (mecab-node-sentence-length node) ; available only when BOS |
---|
197 | (mecab-node-t-sentence-length node)) |
---|
198 | ;(define (mecab-node-alpha node-ptr) |
---|
199 | ; (pointer-ref node-ptr 16)) |
---|
200 | ;(define (mecab-node-beta node-ptr) |
---|
201 | ; (pointer-ref node-ptr 17)) |
---|
202 | ;(define (mecab-node-prob node-ptr) |
---|
203 | ; (pointer-ref node-ptr 18)) |
---|
204 | (define (mecab-node-wcost node) (mecab-node-t-wcost node)) |
---|
205 | (define (mecab-node-cost node) (mecab-node-t-cost node)) |
---|
206 | (define (mecab-node-token node) (mecab-node-t-token node)) |
---|
207 | |
---|
208 | |
---|
209 | ;; from 逆引きScheme |
---|
210 | (define (string-split-by-char str spliter) |
---|
211 | (let loop ((ls (string->list str)) (buf '()) (ret '())) |
---|
212 | (if (pair? ls) |
---|
213 | (if (char=? (car ls) spliter) |
---|
214 | (loop (cdr ls) '() (cons (list->string (reverse buf)) ret)) |
---|
215 | (loop (cdr ls) (cons (car ls) buf) ret)) |
---|
216 | (reverse (cons (list->string (reverse buf)) ret))))) |
---|
217 | |
---|
218 | (define (string-split-by-string str spliter) |
---|
219 | (if (zero? (string-length spliter)) |
---|
220 | (list str) |
---|
221 | (let ((spl (string->list spliter))) |
---|
222 | (let loop ((ls (string->list str)) (sp spl) (tmp '()) (buf '()) (ret '())) |
---|
223 | (if (pair? sp) |
---|
224 | (if (pair? ls) |
---|
225 | (if (char=? (car ls) (car sp)) |
---|
226 | (loop (cdr ls) (cdr sp) (cons (car ls) tmp) buf ret) |
---|
227 | (loop (cdr ls) spl '() (cons (car ls) (append tmp buf)) ret)) |
---|
228 | (reverse (cons (list->string (reverse (append tmp buf))) ret))) |
---|
229 | (loop ls spl '() '() (cons (list->string (reverse buf)) ret))))))) |
---|
230 | |
---|
231 | (define (string-split str spliter) |
---|
232 | (cond [(char? spliter) (string-split-by-char str spliter)] |
---|
233 | [(string? spliter) (string-split-by-string str spliter)] |
---|
234 | [else #f])) |
---|
235 | |
---|
236 | ) |
---|