Changeset 131
- Timestamp:
- 04/15/09 09:41:07 (16 years ago)
- Location:
- lang/scheme/gauche/bindings/mecab/trunk
- Files:
-
- 3 added
- 3 removed
- 4 modified
Legend:
- Unmodified
- Added
- Removed
-
lang/scheme/gauche/bindings/mecab/trunk/ChangeLog
r119 r131  1  2009-03-2 3 naoya_t <naoya.t@aqua.plala.or.jp> 1 2009-03-25 Naoya Tozuka <naoya_t@users.sourceforge.jp> 2 2  3  * Merged naoya_t's MeCab binding. Supports almost all of MeCab APIs and Tagger class.  3 * Merged naoya_t's mecab-gauche-0.98pre1 (http://blog.livedoor.jp/naoya_t/archives/51131440.html)  4 * Supports:  5 * Tagger class  6 * Accessors for mecab_node_t structure members  7 * All MeCab APIs, except those which overwrite their argument:  8 mecab-sparse-tostr3, mecab-nbest-sparse-tostr3, mecab-nbest-next-tostr2  9 * (mecab params ...) makes a new instance with parameter(s). The format is inspired by leque's (make-mecab :rcfile "mymecabrc") style.  10 * Separates dictionary-dependent tests from test.scm. 4 11  5 12 2009-03-01 Shiro Kawai <shiro@acm.org> … …  10 17  11 18  12   -
lang/scheme/gauche/bindings/mecab/trunk/Makefile.in
r117 r131  1 1 # 2  # $Id: Makefile.in,v 1. 3 2009/03/02 03:52:45 shirokExp $ 2 # $Id: Makefile.in,v 1.4 2009/03/25 06:22:38 naoya_t Exp $ 3 3 # 4 4  … …  55 55 $(GOSH) -I. -I$(srcdir) $(srcdir)/test.scm > test.log 56 56   57 # dictionary-dependent tests (optional)  58 check-dep : all  59 @rm -f test-dep-1.log test-dep-ipadic.log  60 $(GOSH) -I. -I$(srcdir) $(srcdir)/test-dep-1.scm > test-dep-1.log  61 $(GOSH) -I. -I$(srcdir) $(srcdir)/test-dep-ipadic.scm > test-dep-ipadic.log  62  57 63 install : all 58 64 $(INSTALL) -m 444 -T $(GAUCHE_PKGINCDIR) $(HEADERS) -
lang/scheme/gauche/bindings/mecab/trunk/mecab-lib.scm
r117 r131  31 31 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 32 ;;; 33  ;;; $Id: mecab-lib.scm,v 1. 1 2009/03/02 03:52:45 shirokExp $ 33 ;;; $Id: mecab-lib.scm,v 1.3 2009/03/25 09:42:14 naoya_t Exp $ 34 34 ;;; 35 35  36 36 (define-module text.mecab 37 37 (use srfi-1)  38 (use srfi-13) 38 39 (use gauche.charconv) 39 40 (export <mecab> <mecab-node> <mecab-dictionary-info> … …  41 42 mecab-do mecab-new mecab-new2 42 43 mecab-version mecab-strerror mecab-destroy mecab-destroyed?  44 mecab mecab-options 43 45  44 46 mecab-tagger ; message passing … …  79 81 ;; This should be configurable, since mecab can be compiled to use utf-8. 80 82 (define-constant MECAB_ENCODING 'euc-jp) 81  ;(define-constant MECAB_ENCODING 'utf-8) 82    83   84 ;; We need cv-send and cv-recv if MeCab is configured with --enable-utf8-only option.  85 ;(define (cv-send str)  86 ; (ces-convert str (gauche-character-encoding) MECAB_ENCODING))  87 ;(define (cv-recv str)  88 ; (and str (ces-convert str MECAB_ENCODING))) 83 89 (define (cv-send str) str) 84  ; (ces-convert str (gauche-character-encoding) MECAB_ENCODING))Â85  Â86 90 (define (cv-recv str) str) 87  ; (and str (ces-convert str MECAB_ENCODING)))Â88 91  89 92 (define (mecab-do args) … …  95 98 (unless (every string? args) 96 99 (error "mecab-new: list of strings required, but got:" args)) 97  (%mecab-new (map cv-send args) )) 100 (%mecab-new (map cv-send args) (mecab-parse-options args))) 98 101  99 102 (define (mecab-new2 str) 100  (%mecab-new2 (cv-send str) )) 103 (%mecab-new2 (cv-send str) (mecab-parse-options str))) 101 104  102 105 (define (mecab-strerror m) 103  (cv-recv (if m (%mecab-strerror m) 104  (%mecab-strerror-with-null)))) 105   106  (define (mecab-get-partial m) 107  (%mecab-get-partial m)) 108   109  (define (mecab-set-partial m partial) 110  (%mecab-set-partial m partial)) 111   112  (define (mecab-get-theta m) 113  (%mecab-get-theta m)) 114   115  (define (mecab-set-theta m theta) 116  (%mecab-set-theta m theta)) 117   118  (define (mecab-get-lattice-level m) 119  (%mecab-get-lattice-level m)) 120   121  (define (mecab-set-lattice-level m llevel) 122  (%mecab-set-lattice-level m llevel)) 123   124  (define (mecab-get-all-morphs m) 125  (%mecab-get-all-morphs m)) 126   127  (define (mecab-set-all-morphs m morphs) 128  (%mecab-set-all-morphs m morphs))  106 ; (cv-recv (%mecab-strerror m))) ;; m can be #f  107 (cv-recv (if m (%mecab-strerror m) (%mecab-strerror-with-null)))) 129 108  130 109 (define (mecab-sparse-tostr m str) … …  155 134 (cv-recv (%mecab-nbest-next-tostr m))) 156 135  157  (define (mecab-nbest-next-tonode m)Â158  (%mecab-nbest-next-tonode m))Â159  Â160 136 (define (mecab-format-node m node) 161  (%mecab-format-node m node)) 162   163  (define (mecab-dictionary-info m) 164  (%mecab-dictionary-info m))  137 (cv-recv (%mecab-format-node m node))) 165 138  166 139 (define (mecab-dict-index args) … …  189 162 (%mecab-test-gen (map cv-send args))) 190 163  191  ;;  164 ;; mecab_node_t 192 165 (define (mecab-node-surface n) 193 166 (cv-recv (%mecab-node-surface n))) … …  200 173 (%mecab-node-stat n))) 201 174   175 ;; mecab_dictionary_info_t 202 176 (define (mecab-dictionary-info-type dinfo) 203 177 (vector-ref #(mecab-sys-dic mecab-usr-dic mecab-unk-dic) 204 178 (%mecab-dictionary-info-type dinfo))) 205 179   180 ;; 206 181 (inline-stub 207 182 "#include <mecab.h>" … …  211 186 SCM_HEADER; 212 187 mecab_t *m; /* NULL if closed */  188 ScmObj options; 213 189 } ScmMeCab; 214 190  215 191 typedef struct ScmMeCabNodeRec { 216 192 SCM_HEADER; 217  mecab_node_t *node; 193 const mecab_node_t *node; 218 194 } ScmMeCabNode; 219 195  220 196 typedef struct ScmMeCabDictionaryInfoRec { 221 197 SCM_HEADER; 222  mecab_dictionary_info_t *dic_info; 198 const mecab_dictionary_info_t *dic_info; 223 199 } ScmMeCabDictionaryInfo;" 224 200  … …  240 216 (mecab-destroy (-> m m)) 241 217 (set! (-> m m) NULL))) 242    218  243 219 (define-cfn mecab-finalize (obj data::void*) ::void :static 244 220 (mecab-cleanup (SCM_MECAB obj))) 245 221  246  (define-cfn make-mecab (m::mecab_t* ) :static 222 (define-cfn make-mecab (m::mecab_t* options::ScmObj) :static 247 223 (when (== m NULL) (mecab-strerror NULL)) 248 224 (let* ([obj::ScmMeCab* (SCM_NEW ScmMeCab)]) 249 225 (SCM_SET_CLASS obj (& Scm_MeCabClass)) 250 226 (set! (-> obj m) m)  227 (set! (-> obj options) options) 251 228 (Scm_RegisterFinalizer (SCM_OBJ obj) mecab-finalize NULL) 252 229 (return (SCM_OBJ obj)))) 253 230  254  (define-cfn make-mecab-node (n:: mecab_node_t*) :staticÂ255  ; (when (== node NULL) (mecab-strerror NULL))  231 (define-cfn make-mecab-node (n::"const mecab_node_t*") :static  232 ;; returns #f when n==NULL ... for convenience of mecab_nbest_next_* 256 233 (if (== n NULL) (return SCM_FALSE) 257 234 (let* ([obj::ScmMeCabNode* (SCM_NEW ScmMeCabNode)]) … …  260 237 (return (SCM_OBJ obj))))) 261 238  262  (define-cfn make-mecab-dictionary-info (dic_info:: mecab_dictionary_info_t*) :staticÂ263  ; (when (== dic_info NULL) (mecab-strerror NULL))  239 (define-cfn make-mecab-dictionary-info (dic_info::"const mecab_dictionary_info_t*") :static  240 ;; returns #f when dic_info==NULL ... for convenience of mecab-dictionary-info-next 264 241 (if (== dic_info NULL) (return SCM_FALSE) 265 242 (let* ([obj::ScmMeCabDictionaryInfo* (SCM_NEW ScmMeCabDictionaryInfo)]) … …  278 255 (result (mecab-do argc argv)))) 279 256  280  (define-cproc %mecab-new (args::<list> ) 257 (define-cproc %mecab-new (args::<list> options) 281 258 (let* ([argc::int (Scm_Length args)] 282 259 [argv::char** (Scm_ListToCStringArray args TRUE NULL)]) 283  (result (make-mecab (mecab-new argc argv) ))))Â284   285  (define-cproc %mecab-new2 (arg::<string> )Â286  (result (make-mecab (mecab-new2 (Scm_GetString arg)) ))) 260 (result (make-mecab (mecab-new argc argv) options))))  261   262 (define-cproc %mecab-new2 (arg::<string> options)  263 (result (make-mecab (mecab-new2 (Scm_GetString arg)) options))) 287 264  288 265 (define-cproc mecab-version () ::<const-cstring> mecab-version) … …  294 271 (result (== (-> m m) NULL))) 295 272   273 (define-cproc mecab-options (m::<mecab>)  274 (result (-> m options)))  275  296 276 (define-cproc %mecab-strerror (m::<mecab>) ::<const-cstring> 297 277 (result (mecab-strerror (-> m m))))  278  298 279 (define-cproc %mecab-strerror-with-null () ::<const-cstring> 299 280 (result (mecab-strerror NULL)))  281   282 (define-cproc mecab-get-partial (m::<mecab>) ::<int>  283 (result (mecab-get-partial (-> m m))))  284   285 (define-cproc mecab-set-partial (m::<mecab> partial::<int>) ::<void>  286 (mecab-set-partial (-> m m) partial))  287   288 (define-cproc mecab-get-theta (m::<mecab>) ::<float>  289 (result (mecab-get-theta (-> m m))))  290   291 (define-cproc mecab-set-theta (m::<mecab> theta::<float>) ::<void>  292 (mecab-set-theta (-> m m) theta))  293   294 (define-cproc mecab-get-lattice-level (m::<mecab>) ::<int>  295 (result (mecab-get-lattice-level (-> m m))))  296   297 (define-cproc mecab-set-lattice-level (m::<mecab> level::<int>) ::<void>  298 (mecab-set-lattice-level (-> m m) level))  299   300 (define-cproc mecab-get-all-morphs (m::<mecab>) ::<int>  301 (result (mecab-get-all-morphs (-> m m))))  302   303 (define-cproc mecab-set-all-morphs (m::<mecab> all_morphs::<int>) ::<void>  304 (mecab-set-all-morphs (-> m m) all_morphs)) 300 305  301 306 (define-cproc %mecab-sparse-tostr (m::<mecab> str::<const-cstring>) … …  315 320 (result (mecab-nbest-sparse-tostr2 (-> m m) n str len))) 316 321  317  (define-cproc %mecab-nbest-init (m::<mecab> str::<const-cstring>) 318  ::<int>  322 (define-cproc %mecab-nbest-init (m::<mecab> str::<const-cstring>) ::<int> 319 323 (result (mecab-nbest-init (-> m m) str))) 320 324  321  (define-cproc %mecab-nbest-init2 (m::<mecab> str::<const-cstring> len::<uint>) 322  ::<int>  325 (define-cproc %mecab-nbest-init2 (m::<mecab> str::<const-cstring> len::<uint>) ::<int> 323 326 (result (mecab-nbest-init2 (-> m m) str len))) 324 327  325  (define-cproc %mecab-nbest-next-tostr (m::<mecab>)  328 (define-cproc %mecab-nbest-next-tostr (m::<mecab>) ;; returns null at the end 326 329 ; (result (mecab-nbest-next-tostr (-> m m)))) 327 330 " const char *s = mecab_nbest_next_tostr(m->m); 328 331 return s ? SCM_MAKE_STR_COPYING(s) : SCM_FALSE;") 329 332  330  (define-cproc %mecab-nbest-next-tonode (m::<mecab>) 333 (define-cproc mecab-nbest-next-tonode (m::<mecab>) ;; returns null at the end 331 334 (result (make-mecab-node (mecab-nbest-next-tonode (-> m m))))) 332  ;" mecab_node_t *node = mecab_nbest_next_tonode(m->m);Â333  ; return node ? make_mecab_node(node) : SCM_FALSE;")Â334 335  335 336 (define-cproc %mecab-sparse-tonode (m::<mecab> str::<const-cstring>) … …  339 340 (result (make-mecab-node (mecab-sparse-tonode2 (-> m m) str siz)))) 340 341  341  (define-cproc %mecab-dictionary-info (m::<mecab>) 342 (define-cproc mecab-dictionary-info (m::<mecab>) 342 343 (result (make-mecab-dictionary-info (mecab-dictionary-info (-> m m))))) 343 344  344  (define-cproc %mecab-get-partial (m::<mecab>) 345  ::<int> 346  (result (mecab-get-partial (-> m m)))) 347   348  (define-cproc %mecab-set-partial (m::<mecab> partial::<int>) 349  ::<void> 350  (mecab-set-partial (-> m m) partial)) 351   352  (define-cproc %mecab-get-theta (m::<mecab>) 353  ::<float> 354  (result (mecab-get-theta (-> m m)))) 355   356  (define-cproc %mecab-set-theta (m::<mecab> theta::<float>) 357  ::<void> 358  (mecab-set-theta (-> m m) theta)) 359   360  (define-cproc %mecab-get-lattice-level (m::<mecab>) 361  ::<int> 362  (result (mecab-get-lattice-level (-> m m)))) 363   364  (define-cproc %mecab-set-lattice-level (m::<mecab> level::<int>) 365  ::<void> 366  (mecab-set-lattice-level (-> m m) level)) 367   368  (define-cproc %mecab-get-all-morphs (m::<mecab>) 369  ::<int> 370  (result (mecab-get-all-morphs (-> m m)))) 371   372  (define-cproc %mecab-set-all-morphs (m::<mecab> all_morphs::<int>) 373  ::<void> 374  (mecab-set-all-morphs (-> m m) all_morphs)) 375   376  (define-cproc %mecab-format-node (m::<mecab> n::<mecab-node>) 377  ::<const-cstring>?  345 (define-cproc %mecab-format-node (m::<mecab> n::<mecab-node>) ::<const-cstring>? 378 346 (result (mecab-format-node (-> m m) (-> n node)))) 379 347  … …  407 375 ;; 408 376 (define-cproc mecab-node-prev (n::<mecab-node>) 409  " const mecab_node_t *prev_node = n->node->prev; 410  return prev_node ? make_mecab_node(prev_node) : SCM_FALSE;")  377 (result (make-mecab-node (-> (-> n node) prev)))) 411 378  412 379 (define-cproc mecab-node-next (n::<mecab-node>) 413  " const mecab_node_t *next_node = n->node->next; 414  return next_node ? make_mecab_node(next_node) : SCM_FALSE;")  380 (result (make-mecab-node (-> (-> n node) next)))) 415 381  416 382 (define-cproc mecab-node-enext (n::<mecab-node>) 417  " const mecab_node_t *enext_node = n->node->enext; 418  return enext_node ? make_mecab_node(enext_node) : SCM_FALSE;")  383 (result (make-mecab-node (-> (-> n node) enext)))) 419 384  420 385 (define-cproc mecab-node-bnext (n::<mecab-node>) 421  " const mecab_node_t *bnext_node = n->node->bnext; 422  return bnext_node ? make_mecab_node(bnext_node) : SCM_FALSE;") 423   424  ;; NULL terminate�������ޤ���������Ƽ��Ф��ˤ�;; strncpy(buf, node->feature, node->length) �Ȥ������������  386 (result (make-mecab-node (-> (-> n node) bnext))))  387  425 388 (define-cproc %mecab-node-surface (n::<mecab-node>) 426  " char buf[n->node->length + 1];Â427  memcpy(buf, n->node->surface, n->node->length);Â428  buf[n->node->length] = 0;Â429  return SCM_MAKE_STR_COPYING(buf);");Â430   431  (define-cproc %mecab-node-feature (n::<mecab-node>) ::<const-cstring>Â432  (result (-> (-> n node) feature)))Â433   434  (define-cproc mecab-node-length (n::<mecab-node>) ::<uint>Â435  (result (-> (-> n node) length)))Â436   437  (define-cproc mecab-node-rlength (n::<mecab-node>) ::<uint>Â438  (result (-> (-> n node) rlength)))Â439   440  (define-cproc mecab-node-id (n::<mecab-node>) ::<uint>Â441  (result (-> (-> n node) id)))Â442   443  (define-cproc mecab-node-rc-attr (n::<mecab-node>) ::<uint>Â444  (result (-> (-> n node) rcAttr)))Â445   446  (define-cproc mecab-node-lc-attr (n::<mecab-node>) ::<uint>Â447  (result (-> (-> n node) lcAttr)))Â448   449  (define-cproc mecab-node-posid (n::<mecab-node>) ::<uint>Â450  (result (-> (-> n node) posid)))Â451   452  (define-cproc mecab-node-char-type (n::<mecab-node>) ::<uint>Â453  (result (-> (-> n node) char-type)))Â454   455  (define-cproc %mecab-node-stat (n::<mecab-node>) ::<int>Â456  (result (-> (-> n node) stat)))Â457   458  (define-cproc mecab-node-best? (n::<mecab-node>) ::<boolean>Â459  (result (-> (-> n node) isbest)))Â460   461  (define-cproc mecab-node-alpha (n::<mecab-node>) ::<float>Â462  (result (-> (-> n node) alpha)))Â463   464  (define-cproc mecab-node-beta (n::<mecab-node>) ::<float>Â465  (result (-> (-> n node) beta)))Â466   467  (define-cproc mecab-node-prob (n::<mecab-node>) ::<float>Â468  (result (-> (-> n node) prob)))Â469   470  (define-cproc mecab-node-wcost (n::<mecab-node>) ::<int>Â471  (result (-> (-> n node) wcost)))Â472   473  (define-cproc mecab-node-cost (n::<mecab-node>) ::<int>Â474  (result (-> (-> n node) cost))) 389 (result (Scm-MakeString (-> (-> n node) surface)  390 (-> (-> n node) length) ;; size  391 -1 ;; Gauche will count the 'length' of this substring  392 SCM-STRING-COPYING)))  393   394 (define-cproc %mecab-node-feature (n::<mecab-node>)  395 ::<const-cstring> (result (-> (-> n node) feature)))  396   397 (define-cproc mecab-node-length (n::<mecab-node>)  398 ::<uint> (result (-> (-> n node) length)))  399   400 (define-cproc mecab-node-rlength (n::<mecab-node>)  401 ::<uint> (result (-> (-> n node) rlength)))  402   403 (define-cproc mecab-node-id (n::<mecab-node>)  404 ::<uint> (result (-> (-> n node) id)))  405   406 (define-cproc mecab-node-rc-attr (n::<mecab-node>)  407 ::<uint> (result (-> (-> n node) rcAttr)))  408   409 (define-cproc mecab-node-lc-attr (n::<mecab-node>)  410 ::<uint> (result (-> (-> n node) lcAttr)))  411   412 (define-cproc mecab-node-posid (n::<mecab-node>)  413 ::<uint> (result (-> (-> n node) posid)))  414   415 (define-cproc mecab-node-char-type (n::<mecab-node>)  416 ::<uint> (result (-> (-> n node) char-type)))  417   418 (define-cproc %mecab-node-stat (n::<mecab-node>)  419 ::<int> (result (-> (-> n node) stat)))  420   421 (define-cproc mecab-node-best? (n::<mecab-node>)  422 ::<boolean> (result (-> (-> n node) isbest)))  423   424 (define-cproc mecab-node-alpha (n::<mecab-node>)  425 ::<float> (result (-> (-> n node) alpha)))  426   427 (define-cproc mecab-node-beta (n::<mecab-node>)  428 ::<float> (result (-> (-> n node) beta)))  429   430 (define-cproc mecab-node-prob (n::<mecab-node>)  431 ::<float> (result (-> (-> n node) prob)))  432   433 (define-cproc mecab-node-wcost (n::<mecab-node>)  434 ::<int> (result (-> (-> n node) wcost)))  435   436 (define-cproc mecab-node-cost (n::<mecab-node>)  437 ::<int> (result (-> (-> n node) cost))) 475 438  476 439 ;; 477 440 ;; mecab_dictionary_info_t 478 441 ;; 479  ;; #define MECAB_USR_DIC 1Â480  ;; #define MECAB_SYS_DIC 0Â481  ;; #define MECAB_UNK_DIC 2Â482  ;;Â483 442 (define-cproc mecab-dictionary-info-filename (dinfo::<mecab-dictionary-info>) 484  ::<const-cstring> 485  (result (-> (-> dinfo dic_info) filename)))  443 ::<const-cstring> (result (-> (-> dinfo dic_info) filename))) 486 444  487 445 (define-cproc mecab-dictionary-info-charset (dinfo::<mecab-dictionary-info>) 488  ::<const-cstring> 489  (result (-> (-> dinfo dic_info) charset)))  446 ::<const-cstring> (result (-> (-> dinfo dic_info) charset))) 490 447  491 448 (define-cproc mecab-dictionary-info-size (dinfo::<mecab-dictionary-info>) … …  514 471  515 472 (define-method write-object ((m <mecab>) out) 516  (format out "#<mecab >")); (mecab-version))) 473 (format out "#<mecab ~s>" (mecab-options m))) 517 474 (define-method write-object ((m <mecab-node>) out) 518 475 (format out "#<mecab-node>")) … …  520 477 (format out "#<mecab-dictionary-info>")) 521 478  522  (define-reader-ctor '<mecab> mecab-new2) 523   524  (define (mecab-tagger paramstr) 525  (let1 mecabobj (mecab-new2 paramstr)  479 (define *mecab-options+*  480 '((r rcfile FILE)  481 (d dicdir DIR)  482 (u userdic FILE)  483 (l lattice-level INT)  484 (D dictionary-info)  485 (a all-morphs)  486 (O output-format-type TYPE)  487 (p partial)  488 (F node-format STR)  489 (U unk-format STR)  490 (B bos-format STR)  491 (E eos-format STR)  492 (x unk-feature STR)  493 (b input-buffer-size INT)  494 (P dump-config)  495 (M open-mutable-dictionary)  496 (C allocate-sentence)  497 (N nbest INT)  498 (t theta FLOAT)  499 (c cost-factor INT)  500 (o output FILE)  501 (v version)  502 (h help)))  503   504 (define *mecab-options* (map cdr *mecab-options+*))  505   506 (define (cast argtype obj)  507 (case argtype  508 [(STR DIR FILE) (x->string obj)]  509 [(INT FLOAT) obj]  510 [(TYPE) (string->symbol (x->string obj))]  511 [else obj]))  512   513 (define (requires-arg? option)  514 (let1 opt (assoc option *mecab-options*)  515 (if opt (= 2 (length opt)) #f)))  516   517 (define (argtype option)  518 (cadr (or (assoc option *mecab-options*) (list #f #f))))  519   520 (define (long-option-name short-option-name)  521 (cadr (or (assoc short-option-name *mecab-options+*) (list #f #f))))  522   523 (define (keyword-length keyword)  524 (string-length (keyword->string keyword)))  525   526 (define (keyword->symbol keyword)  527 (string->symbol (keyword->string keyword)))  528   529 (define (mecab-parse-options args)  530 ;; eg. ("-Ochasen" :l 1 "--theta" "0.75")  531 ;; => (:output-format-type "chasen" :lattice-level 1 :theta "0.75")  532 (let1 args* (append-map  533 (lambda (arg)  534 (cond [(string? arg)  535 (if (string=? "" arg) '()  536 (append-map  537 (lambda (str)  538 (let1 len (string-length str)  539 (if (<= 2 len)  540 (if (eq? #\- (string-ref str 0))  541 ;; --option, -X[arg]  542 (if (eq? #\- (string-ref str 1))  543 ;; --option  544 (list (string->symbol (substring str 2 len)))  545 ;; -X[arg]  546 (let1 option (long-option-name (string->symbol (substring str 1 2)))  547 (if (= len 2)  548 ;; -X  549 (list option)  550 ;; -Xarg  551 (list option (cast (argtype option) (substring str 2 len))))))  552 (list str))  553 (list str))))  554 (string-split arg #[ =])))]  555 [(keyword? arg)  556 (if (= 1 (keyword-length arg))  557 (list (long-option-name (keyword->symbol arg)))  558 (list (keyword->symbol arg)))]  559 [else  560 (list arg)]))  561 (if (list? args) args (list args)))  562 ;; eg. (:output-format-type "chasen" :lattice-level 1 :theta "0.75")  563 ;; => ((output-format-type chasen) (lattice-level 1) (theta 0.75))  564 (let loop ((rest args*) (options '()))  565 (if (null? rest)  566 (reverse! options)  567 (let1 option (car rest)  568 (if (symbol? option)  569 (if (requires-arg? option)  570 (loop (cddr rest) (cons (list option (cast (argtype option) (cadr rest))) options))  571 (loop (cdr rest) (cons (list option (if #f #f)) options)))  572 (loop (cdr rest) options)))))))  573   574 (define (mecab . args) ;; inspired by leque's make-mecab  575 (let* ([options (mecab-parse-options args)]  576 [options-str (append-map (lambda (option+arg)  577 (let* ([option (car option+arg)]  578 [option-str (format "--~a" option)])  579 (if (requires-arg? option)  580 (list option-str (x->string (cadr option+arg)))  581 (list option-str) )))  582 options)])  583 (%mecab-new options-str options)))  584   585 (define-reader-ctor 'mecab mecab)  586   587 ;; Tagger  588 (define (mecab-tagger . args)  589 (let1 m (apply mecab args) 526 590 (define (parse-to-string str . args) 527 591 (let-optionals* args ((len #f)) 528 592 (if len 529  (mecab-sparse-tostr2 mecabobj str len) 530  (mecab-sparse-tostr mecabobj str) ))) 531   532  (define (parse-to-node str) 533  (mecab-sparse-tonode mecabobj str)) 534   535  ;; ���ε�ǽ������� ��ư�����ץ����Ȥ���-l 1 ��ꤹ����������� 536  (define (parse-nbest n str) 537  (mecab-nbest-sparse-tostr mecabobj n str)) 538   539  (define (parse-nbest-init str) 540  (mecab-nbest-init mecabobj str)) 541   542  (define (next) 543  (mecab-nbest-next-tostr mecabobj)) 544   545  (define (next-node) 546  (mecab-nbest-next-tonode mecabobj)) 547   548  (define (format-node node) 549  (mecab-format-node mecabobj node))  593 (mecab-sparse-tostr2 m str len)  594 (mecab-sparse-tostr m str) )))  595   596 (define (parse-to-node str) (mecab-sparse-tonode m str))  597   598 ;; requires "-l 1" in option  599 (define (parse-nbest n str) (mecab-nbest-sparse-tostr m n str))  600   601 (define (parse-nbest-init str) (mecab-nbest-init m str))  602   603 (define (next) (mecab-nbest-next-tostr m))  604   605 (define (next-node) (mecab-nbest-next-tonode m))  606   607 (define (format-node node) (mecab-format-node m node))  608   609 (define (destroy) (mecab-destroy m)) 550 610  551 611 (lambda (m) … …  558 618 [(next-node) next-node] 559 619 [(format-node) format-node]  620 [(destroy) destroy] 560 621 )))) 561 622  562  ;;; class 563  (define-class <mecab-tagger> () (mecab #f))  623 (define-class <mecab-tagger> () (m #f))  624  564 625 (define (mecab-make-tagger paramstr) 565  (make <mecab-tagger> :mecab (mecab-new2 paramstr))) 566  (define (tagger-mecab tagger) (slot-ref tagger 'mecab))  626 (make <mecab-tagger> :mecab (mecab paramstr)))  627   628 (define (tagger-mecab tagger) (slot-ref tagger 'm))  629  567 630 (define-method parse ((tagger <mecab-tagger>) (str <string>)) 568 631 (mecab-sparse-tostr (tagger-mecab tagger) str))  632  569 633 (define-method parse ((tagger <mecab-tagger>) (str <string>) (len <integer>)) 570 634 (mecab-sparse-tostr2 (tagger-mecab tagger) str len))  635  571 636 (define-method parse-to-string ((tagger <mecab-tagger>) (str <string>)) 572 637 (mecab-sparse-tostr (tagger-mecab tagger) str))  638  573 639 (define-method parse-to-string ((tagger <mecab-tagger>) (str <string>) (len <integer>)) 574 640 (mecab-sparse-tostr (tagger-mecab tagger) str len))  641  575 642 (define-method parse-to-node ((tagger <mecab-tagger>) (str <string>)) 576 643 (mecab-sparse-tonode (tagger-mecab tagger) str))  644  577 645 (define-method parse-to-node ((tagger <mecab-tagger>) (str <string>) (len <integer>)) 578 646 (mecab-sparse-tonode2 (tagger-mecab tagger) str len))  647  579 648 (define-method parse-nbest ((tagger <mecab-tagger>) (n <integer>) (str <string>)) 580 649 (mecab-nbest-sparse-tostr (tagger-mecab tagger) str))  650  581 651 (define-method parse-nbest ((tagger <mecab-tagger>) (n <integer>) (str <string>) (len <integer>)) 582 652 (mecab-nbest-sparse-tostr (tagger-mecab tagger) str len))  653  583 654 (define-method parse-nbest-init ((tagger <mecab-tagger>) (str <string>)) 584 655 (mecab-nbest-init (tagger-mecab tagger) str))  656  585 657 (define-method parse-nbest-init ((tagger <mecab-tagger>) (str <string>) (len <integer>)) 586 658 (mecab-nbest-init (tagger-mecab tagger) str len))  659  587 660 (define-method next ((tagger <mecab-tagger>)) 588 661 (mecab-nbest-next-tostr (tagger-mecab tagger)))  662  589 663 (define-method next-node ((tagger <mecab-tagger>)) 590 664 (mecab-nbest-next-tonode (tagger-mecab tagger)))  665  591 666 (define-method format-node ((tagger <mecab-tagger>) (node <mecab-node>)) 592 667 (mecab-format-node (tagger-mecab tagger) node)) -
lang/scheme/gauche/bindings/mecab/trunk/test.scm
r117 r131  1 1 ;; -*- coding:euc-jp -*- 2 2 ;; 3  ;; testfor mecab module 3 ;; dictionary-independent tests for mecab module 4 4 ;; 5 5  6 6 (use gauche.test) 7 7  8  (test-start "mecab ") 8 (test-start "mecab: dictionary-independent tests") 9 9 (use text.mecab) 10 10 (test-module 'text.mecab) 11 11  12  (define m (mecab-new2 "")) 13  (test* "mecab-new2" #t (is-a? m <mecab>)) 14  (test* "mecab-destroy" #f (mecab-destroyed? m)) 15  ( mecab-destroy m)Â16  (test* "mecab-destroy" #t (mecab-destroyed? m)) 12 ;;  13 ;; write-object / ctor  14 ;;  15 (define-macro (displayed-string obj)  16 `(with-output-to-string (lambda () (display ,obj)))) 17 17  18  (test* "mecab-sparse-tostr" #f 19  (mecab-sparse-tostr m "�Ϻ�ϼ�Ϻ���������Ҥ�Ϥ�����")) 20  (test* "mecab-strerror" #t (string? (mecab-strerror m)))  18 (test-section "write-object (display)")  19 (let1 m (mecab-new '())  20 (test* "(mecab-new '())" "#<mecab ()>" (displayed-string m))  21 (mecab-destroy m))  22 (let1 m (mecab-new '("-O" "chasen"))  23 (test* "(mecab-new '(\"-O\" \"chasen\"))" "#<mecab ((output-format-type chasen))>" (displayed-string m))  24 (mecab-destroy m))  25 (let1 m (mecab-new2 "")  26 (test* "(mecab-new2 \"\")" "#<mecab ()>" (displayed-string m))  27 (mecab-destroy m))  28 (let1 m (mecab-new2 "-Ochasen")  29 (test* "(mecab-new2 \"-Ochasen\")" "#<mecab ((output-format-type chasen))>" (displayed-string m))  30 (mecab-destroy m))  31 (let1 m (mecab-new2 "-O chasen")  32 (test* "(mecab-new2 \"-O chasen\")" "#<mecab ((output-format-type chasen))>" (displayed-string m))  33 (mecab-destroy m)) 21 34  22  (define m (mecab-new2 "")) 23  (test* "mecab-sparse-tostr" 24  "�Ϻ ̾����,*,*,�Ϻ,ï¿½ï¿½ï¿½í¤¦,*\n\ 25  �������*,*,����*\n\ 26  ��Ϻ ̾����,*,*,��Ϻ,ï¿½ï¿½ï¿½í¤¦,*\n\ 27  �� ���ʽ�*,*,��,��,*\n\ 28  ��ư��,�Ҳ�ư�쥿���������Ʒ����ä��ɽɽ��:�� 29  �������������,���ư�����������\n\ 30  � ̾���̾��,*,�,�ۤ���:�� �ɽɽ��:�\n\ 31  ����ʽ�*,*,��,*\n\ 32  �ֻ�̾����,*,*,�ֻ��Ϥʤ�,*\n\ 33  �����ʽ�*,*,����*\n\ 34  ����� ư��,�Ҳ�ư�쥵���������,�錄����,�°ư����ʴ���ɽɽ��:���\n\ 35  �� ����,*,*,��,��,*\n\ 36  EOS\n" 37  (mecab-sparse-tostr m "�Ϻ�ϼ�Ϻ���������Ҥ�Ϥ�����")) 38   39  (mecab-destroy m)  35 (test-section "reader-ctor")  36 (let1 m #,(mecab "") ;; with reader-ctor  37 (test* "#,(mecab \"\") : mecab?" #t (mecab? m))  38 (test* "#,(mecab \"\") : options" '() (mecab-options m))  39 (mecab-destroy m))  40 (let1 m #,(mecab "-Ochasen") ;; with reader-ctor  41 (test* "#,(mecab \"-Ochasen\") : mecab?" #t (mecab? m))  42 (test* "#,(mecab \"-Ochasen\") : options" '((output-format-type chasen)) (mecab-options m))  43 (mecab-destroy m))  44 (let1 m #,(mecab :O chasen :l 1) ;; with reader-ctor  45 (test* "#,(mecab :O chasen :l 1) : mecab?" #t (mecab? m))  46 (test* "#,(mecab :O chasen :l 1) : mecab-options" '((output-format-type chasen) (lattice-level 1)) (mecab-options m))  47 (mecab-destroy m))  48   49 ;;  50 ;; mecab?, mecab-node?, mecab-dictionary-info?  51 ;;  52 (let* ([m (mecab-new2 "")]  53 [node (mecab-sparse-tonode m "")]  54 [dinfo (mecab-dictionary-info m)])  55 (test-section "mecab?, mecab-node?, mecab-dictionary-info?")  56   57 (test* "is-a? m <mecab>" #t (is-a? m <mecab>))  58 (test* "is-a? node <mecab>" #t (is-a? node <mecab-node>))  59 (test* "is-a? dinfo <mecab>" #t (is-a? dinfo <mecab-dictionary-info>))  60   61 (test* "mecab? m" #t (mecab? m))  62 (test* "mecab? node" #f (mecab? node))  63 (test* "mecab? dinfo" #f (mecab? dinfo))  64   65 (test* "mecab-node? m" #f (mecab-node? m))  66 (test* "mecab-node? node" #t (mecab-node? node))  67 (test* "mecab-node? dinfo" #f (mecab-node? dinfo))  68   69 (test* "mecab-dictionary-info? m" #f (mecab-dictionary-info? m))  70 (test* "mecab-dictionary-info? node" #f (mecab-dictionary-info? node))  71 (test* "mecab-dictionary-info? dinfo" #t (mecab-dictionary-info? dinfo))  72   73 (mecab-destroy m))  74   75 ;; mecab, mecab-options  76 (test-section "mecab-options")  77 (let1 m (mecab-new '())  78 (test* "(mecab-new '())" '() (mecab-options m))  79 (mecab-destroy m))  80 (let1 m (mecab-new '("-O" "chasen"))  81 (test* "(mecab-new '(\"-O\" \"chasen\"))" '((output-format-type chasen)) (mecab-options m))  82 (mecab-destroy m))  83 (let1 m (mecab-new2 "")  84 (test* "(mecab-new2 \"\"" '() (mecab-options m))  85 (mecab-destroy m))  86 (let1 m (mecab-new2 "-Ochasen")  87 (test* "mecab?" #t (mecab? m))  88 (test* "(mecab-new2 \"-Ochasen\") : options" '((output-format-type chasen)) (mecab-options m))  89 (mecab-destroy m))  90 (let1 m (mecab-new2 "-O chasen")  91 (test* "mecab?" #t (mecab? m))  92 (test* "(mecab-new2 \"-O chasen\") : options" '((output-format-type chasen)) (mecab-options m))  93 (mecab-destroy m))  94 (let1 m (mecab-new2 "--output-format-type chasen")  95 (test* "mecab?" #t (mecab? m))  96 (test* "(mecab-new2 \"--output-format-type chasen\") : options" '((output-format-type chasen)) (mecab-options m))  97 (mecab-destroy m))  98 (let1 m (mecab-new2 "--output-format-type=chasen")  99 (test* "mecab?" #t (mecab? m))  100 (test* "(mecab-new2 \"--output-format-type=chasen\") : options" '((output-format-type chasen)) (mecab-options m))  101 (mecab-destroy m))  102 (let1 m (mecab "")  103 (test* "(mecab \"\") : mecab?" #t (mecab? m))  104 (test* "(mecab \"\") : options" '() (mecab-options m))  105 (mecab-destroy m))  106 (let1 m (mecab "-Ochasen")  107 (test* "(mecab \"-Ochasen\") : mecab?" #t (mecab? m))  108 (test* "(mecab \"-Ochasen\") : options" '((output-format-type chasen)) (mecab-options m))  109 (mecab-destroy m))  110 (let1 m (mecab "-O chasen")  111 (test* "(mecab \"-O chasen\") : mecab?" #t (mecab? m))  112 (test* "(mecab \"-O chasen\") : options" '((output-format-type chasen)) (mecab-options m))  113 (mecab-destroy m))  114 (let1 m (mecab "-O" "chasen")  115 (test* "(mecab \"-O\" \"chasen\") : mecab?" #t (mecab? m))  116 (test* "(mecab \"-O\" \"chasen\") : options" '((output-format-type chasen)) (mecab-options m))  117 (mecab-destroy m))  118 (let1 m (mecab :O 'chasen :l 1)  119 (test* "(mecab :O chasen :l 1) : mecab?" #t (mecab? m))  120 (test* "(mecab :O chasen :l 1) : mecab-options" '((output-format-type chasen) (lattice-level 1)) (mecab-options m))  121 (mecab-destroy m))  122   123 ;;  124 ;; APIs  125 ;;  126 (test-section "mecab-new")  127 (let1 m (mecab-new '())  128 (test* "is-a? <mecab>" #t (is-a? m <mecab>))  129 (test* "mecab?" #t (mecab? m))  130   131 (test-section "mecab-destroy")  132 (test* "not destroyed yet" #f (mecab-destroyed? m))  133 (mecab-destroy m)  134 (test* "destroyed" #t (mecab-destroyed? m))  135 )  136   137 (test-section "mecab-new2")  138 (let1 m (mecab-new2 "")  139 (test* "is-a? <mecab>" #t (is-a? m <mecab>))  140 (test* "mecab?" #t (mecab? m))  141 (test* "options" '() (mecab-options m))  142 (mecab-destroy m))  143   144 (test-section "mecab-version")  145 (test* "mecab-version" 1  146 (rxmatch-num-matches  147 (#/^[0-9]+\.[0-9]+[.0-9A-Za-z]*$/ (mecab-version))))  148   149 (test-section "mecab-strerror")  150 (mecab-new2 "")  151 (test* "at mecab-new2 (ok)" "" (mecab-strerror #f))  152   153 (mecab-new2 "-d //") ;; => "tagger.cpp(149) [load_dictionary_resource(param)] param.cpp(71) [ifs] no such file or directory: //dicrc"  154 (test* "at mecab-new (err)" #f (string=? "" (mecab-strerror #f)))  155 (test* "no such file or directory" #f (not (#/no such file or directory/ (mecab-strerror #f))))  156   157 (let1 m (mecab-new2 "")  158 (mecab-sparse-tostr m "��Ĥ���")  159 (test* "noerr" "" (mecab-strerror m))  160 (mecab-destroy m))  161   162 (let1 m (mecab-new2 "")  163 (test-section "mecab-get-partial / mecab-set-partial")  164 (test* "default partial mode [0|1]" #t  165 (and (memq (mecab-get-partial m) '(0 1)) #t))  166 (mecab-set-partial m 1)  167 (test* "set to 1" 1 (mecab-get-partial m))  168 (mecab-set-partial m 0)  169 (test* "set to 0" 0 (mecab-get-partial m))  170   171 (test-section "mecab-get-theta / mecab-set-theta")  172 (let1 theta (mecab-get-theta m)  173 (test* "default partial mode [0..1]" #t (and (number? theta) (<= 0.0 theta 1.0))))  174 (mecab-set-theta m 1.0)  175 (test* "set to 1.0" 1.0 (mecab-get-theta m))  176 (mecab-set-theta m 0.5)  177 (test* "set to 0.5" 0.5 (mecab-get-theta m))  178   179 (test-section "mecab-get-lattice-level / mecab-set-lattice-level")  180 (test* "default lattice-level [0|1|2]" #t  181 (and (memq (mecab-get-lattice-level m) '(0 1 2)) #t))  182 (mecab-set-lattice-level m 1)  183 (test* "set to 1" 1 (mecab-get-lattice-level m))  184 (mecab-set-lattice-level m 2)  185 (test* "set to 2" 2 (mecab-get-lattice-level m))  186 (mecab-set-lattice-level m 0)  187 (test* "set to 0" 0 (mecab-get-lattice-level m))  188   189 (test-section "mecab-get-all-morphs / mecab-set-all-morphs")  190 (test* "default all-morphs [0|1]" #t  191 (and (memq (mecab-get-all-morphs m) '(0 1)) #t))  192 (mecab-set-all-morphs m 1)  193 (test* "set to 1" 1 (mecab-get-all-morphs m))  194 (mecab-set-all-morphs m 0)  195 (test* "set to 0" 0 (mecab-get-all-morphs m))  196 ) 40 197  41 198 (test-end)Â