[124] | 1 | ;;; |
---|
| 2 | ;;; Test cabocha |
---|
| 3 | ;;; |
---|
| 4 | ;;; 2009.3.15 by naoya_t |
---|
| 5 | ;;; |
---|
| 6 | |
---|
| 7 | (use gauche.test) |
---|
| 8 | (use srfi-1) |
---|
| 9 | |
---|
| 10 | (test-start "cabocha") |
---|
| 11 | (use cabocha) |
---|
| 12 | (test-module 'cabocha) |
---|
| 13 | |
---|
| 14 | (define c (cabocha-new)) |
---|
| 15 | (test* "cabocha?" #t (cabocha? c)) |
---|
| 16 | ;(test* "cabocha-destroyed?" #f (cabocha-destroyed? c)) |
---|
| 17 | ;; |
---|
| 18 | |
---|
| 19 | (define (cabocha-chunk-desc ch) |
---|
| 20 | (format "link:~d head:~d func:~d token-size:~d token-pos:~d score:~d feature:~a" |
---|
| 21 | (cabocha-chunk-link ch) |
---|
| 22 | (cabocha-chunk-head-pos ch) |
---|
| 23 | (cabocha-chunk-func-pos ch) |
---|
| 24 | (cabocha-chunk-token-size ch) |
---|
| 25 | (cabocha-chunk-token-pos ch) |
---|
| 26 | (cabocha-chunk-score ch) |
---|
| 27 | (cabocha-chunk-feature-list ch) |
---|
| 28 | ; (cabocha-chunk-feature-list-size ch) |
---|
| 29 | )) |
---|
| 30 | |
---|
| 31 | (define (cabocha-token-desc tok) |
---|
| 32 | (format "surface:~a (~a) feature:~a feature-list:~a ne:~a chunk:~a" |
---|
| 33 | (cabocha-token-surface tok) |
---|
| 34 | (cabocha-token-normalized-surface tok) |
---|
| 35 | (cabocha-token-feature tok) |
---|
| 36 | (cabocha-token-feature-list tok) |
---|
| 37 | ; (cabocha-token-feature-list-size tok) |
---|
| 38 | (cabocha-token-ne tok) |
---|
| 39 | ; (cabocha-chunk-desc (cabocha-token-chunk tok)) |
---|
| 40 | (cabocha-token-chunk tok) |
---|
| 41 | )) |
---|
| 42 | (define (cabocha-token->lisp tok) |
---|
| 43 | `(token ;(format "surface:~a (~a) feature:~a feature-list:~a ne:~a chunk:~a" |
---|
| 44 | ; ( ,(cabocha-token-surface tok) . ,(cabocha-token-normalized-surface tok) ) |
---|
| 45 | ,(cabocha-token-normalized-surface tok) |
---|
| 46 | ;(cabocha-token-feature tok) |
---|
| 47 | ,(cabocha-token-feature-list tok) |
---|
| 48 | ; (cabocha-token-feature-list-size tok) |
---|
| 49 | ;(cabocha-token-ne tok) |
---|
| 50 | ; (cabocha-chunk-desc (cabocha-token-chunk tok)) |
---|
| 51 | ;(cabocha-token-chunk tok) |
---|
| 52 | )) |
---|
| 53 | |
---|
| 54 | (define (vector-range vec from size) |
---|
| 55 | (let1 vec* (make-vector size) |
---|
| 56 | (dotimes (i size) |
---|
| 57 | (vector-set! vec* i (vector-ref vec (+ from i)))) |
---|
| 58 | vec*)) |
---|
| 59 | |
---|
| 60 | (define (cabocha-chunk->lisp i ch tokens) |
---|
| 61 | (let* ([token-pos (cabocha-chunk-token-pos ch)] |
---|
| 62 | [token-size (cabocha-chunk-token-size ch)] |
---|
| 63 | [tokens-in-chunk (vector-range tokens token-pos token-size)] |
---|
| 64 | [token-head-pos (cabocha-chunk-head-pos ch)] |
---|
| 65 | [token-func-pos (cabocha-chunk-func-pos ch)] |
---|
| 66 | ) |
---|
| 67 | `(chunk ;(format "link:~d head:~d func:~d token-size:~d token-pos:~d score:~d feature:~a" |
---|
| 68 | ,i |
---|
| 69 | ,(cabocha-chunk-link ch) |
---|
| 70 | ; ,(map token-surface (vector->list tokens-in-chunk)) |
---|
| 71 | ; (head ,(token-surface (vector-ref tokens-in-chunk token-head-pos))) |
---|
| 72 | ; (func ,(token-surface (vector-ref tokens-in-chunk token-func-pos))) |
---|
| 73 | ,tokens-in-chunk |
---|
| 74 | ,token-head-pos |
---|
| 75 | ,token-func-pos |
---|
| 76 | ; ,token-size |
---|
| 77 | ; ,token-pos |
---|
| 78 | ,(cabocha-chunk-score ch) |
---|
| 79 | ; ,(cabocha-chunk-feature-list ch) |
---|
| 80 | ; (cabocha-chunk-feature-list-size ch) |
---|
| 81 | ))) |
---|
| 82 | (define (pp-chunk chunk) |
---|
| 83 | ; (print " % " chunk) |
---|
| 84 | (let1 tokens-in-chunk (fourth chunk) |
---|
| 85 | (format #t "~d) => ~d ~a // head=~a func=~a score:~a\n" |
---|
| 86 | (second chunk) |
---|
| 87 | (third chunk) |
---|
| 88 | (map token-surface (vector->list tokens-in-chunk)) |
---|
| 89 | (token-surface (vector-ref tokens-in-chunk (fifth chunk))) |
---|
| 90 | (token-surface (vector-ref tokens-in-chunk (sixth chunk))) |
---|
| 91 | (seventh chunk) ))) |
---|
| 92 | |
---|
| 93 | (define (token-surface token) (cadr token)) |
---|
| 94 | |
---|
| 95 | #| |
---|
| 96 | (define (cabocha-tree-chunk-list t) |
---|
| 97 | (let loop ([i (- (cabocha-tree-chunk-size t) 1)] [lis '()]) |
---|
| 98 | (if (< i 0) lis |
---|
| 99 | (loop (- i 1) |
---|
| 100 | (cons (cabocha-chunk->lisp i (cabocha-tree-chunk t i)) lis) )))) |
---|
| 101 | |
---|
| 102 | (define (cabocha-tree-token-list t) |
---|
| 103 | (let loop ([i (- (cabocha-tree-token-size t) 1)] [lis '()]) |
---|
| 104 | (if (< i 0) lis |
---|
| 105 | (loop (- i 1) |
---|
| 106 | (cons (cabocha-token->lisp (cabocha-tree-token t i)) lis) )))) |
---|
| 107 | |# |
---|
| 108 | |
---|
| 109 | (define (cabocha-tree-chunks t) |
---|
| 110 | (let* ([tokens (cabocha-tree-tokens t)] |
---|
| 111 | [chunk-size (cabocha-tree-chunk-size t)] |
---|
| 112 | [vec (make-vector chunk-size)]) |
---|
| 113 | (dotimes (i chunk-size) |
---|
| 114 | (vector-set! vec i (cabocha-chunk->lisp i (cabocha-tree-chunk t i) tokens) )) |
---|
| 115 | vec)) |
---|
| 116 | |
---|
| 117 | (define (cabocha-tree-tokens t) |
---|
| 118 | (let* ([token-size (cabocha-tree-token-size t)] |
---|
| 119 | [vec (make-vector token-size)]) |
---|
| 120 | (dotimes (i token-size) |
---|
| 121 | (vector-set! vec i (cabocha-token->lisp (cabocha-tree-token t i)) )) |
---|
| 122 | vec)) |
---|
| 123 | |
---|
| 124 | (define (cparse sentence) |
---|
| 125 | (let* ([s (string-append sentence "。")] |
---|
| 126 | [tree (cabocha-sparse-totree c s)] |
---|
| 127 | ) |
---|
| 128 | (format #t "\n「~a」\n" s) |
---|
| 129 | |
---|
| 130 | ; (cabocha-tree-dump tree) |
---|
| 131 | (let* ([token-size (cabocha-tree-token-size tree)] |
---|
| 132 | [chunk-size (cabocha-tree-chunk-size tree)] |
---|
| 133 | [chunks (cabocha-tree-chunks tree)] |
---|
| 134 | ) |
---|
| 135 | (format #t "token size: ~d, " token-size) |
---|
| 136 | (format #t "chunk size: ~d\n" chunk-size) |
---|
| 137 | (dotimes (i chunk-size) |
---|
| 138 | (pp-chunk (vector-ref chunks i))) |
---|
| 139 | ))) |
---|
| 140 | ; (format #t " - ~s\n" (cabocha-tree-sentence tree)) |
---|
| 141 | ; (display (cabocha-sparse-tostr c s))) |
---|
| 142 | |
---|
| 143 | (load "sentences.scm") |
---|
| 144 | (for-each cparse sentences) |
---|
| 145 | |
---|
| 146 | ;; |
---|
| 147 | (cabocha-destroy c) |
---|
| 148 | ;(test* "cabocha-destroyed?" #t (cabocha-destroyed? c)) |
---|
| 149 | (test-end) |
---|