1 | ;;; |
---|
2 | ;;; Test cabocha |
---|
3 | ;;; |
---|
4 | ;;; 2009.3.15 by naoya_t |
---|
5 | ;;; |
---|
6 | |
---|
7 | (use gauche.test) |
---|
8 | (use srfi-1) |
---|
9 | |
---|
10 | (test-start "cabocha") |
---|
11 | (use cabocha) |
---|
12 | (test-module 'cabocha) |
---|
13 | |
---|
14 | (define c (cabocha-new)) |
---|
15 | (test* "cabocha?" #t (cabocha? c)) |
---|
16 | ;(test* "cabocha-destroyed?" #f (cabocha-destroyed? c)) |
---|
17 | ;; |
---|
18 | |
---|
19 | (define (cabocha-chunk-desc ch) |
---|
20 | (format "link:~d head:~d func:~d token-size:~d token-pos:~d score:~d feature:~a" |
---|
21 | (cabocha-chunk-link ch) |
---|
22 | (cabocha-chunk-head-pos ch) |
---|
23 | (cabocha-chunk-func-pos ch) |
---|
24 | (cabocha-chunk-token-size ch) |
---|
25 | (cabocha-chunk-token-pos ch) |
---|
26 | (cabocha-chunk-score ch) |
---|
27 | (cabocha-chunk-feature-list ch) |
---|
28 | ; (cabocha-chunk-feature-list-size ch) |
---|
29 | )) |
---|
30 | |
---|
31 | (define (cabocha-token-desc tok) |
---|
32 | (format "surface:~a (~a) feature:~a feature-list:~a ne:~a chunk:~a" |
---|
33 | (cabocha-token-surface tok) |
---|
34 | (cabocha-token-normalized-surface tok) |
---|
35 | (cabocha-token-feature tok) |
---|
36 | (cabocha-token-feature-list tok) |
---|
37 | ; (cabocha-token-feature-list-size tok) |
---|
38 | (cabocha-token-ne tok) |
---|
39 | ; (cabocha-chunk-desc (cabocha-token-chunk tok)) |
---|
40 | (cabocha-token-chunk tok) |
---|
41 | )) |
---|
42 | (define (cabocha-token->lisp tok) |
---|
43 | `(token ;(format "surface:~a (~a) feature:~a feature-list:~a ne:~a chunk:~a" |
---|
44 | ; ( ,(cabocha-token-surface tok) . ,(cabocha-token-normalized-surface tok) ) |
---|
45 | ,(cabocha-token-normalized-surface tok) |
---|
46 | ;(cabocha-token-feature tok) |
---|
47 | ,(cabocha-token-feature-list tok) |
---|
48 | ; (cabocha-token-feature-list-size tok) |
---|
49 | ;(cabocha-token-ne tok) |
---|
50 | ; (cabocha-chunk-desc (cabocha-token-chunk tok)) |
---|
51 | ;(cabocha-token-chunk tok) |
---|
52 | )) |
---|
53 | |
---|
54 | (define (vector-range vec from size) |
---|
55 | (let1 vec* (make-vector size) |
---|
56 | (dotimes (i size) |
---|
57 | (vector-set! vec* i (vector-ref vec (+ from i)))) |
---|
58 | vec*)) |
---|
59 | |
---|
60 | (define (cabocha-chunk->lisp i ch tokens) |
---|
61 | (let* ([token-pos (cabocha-chunk-token-pos ch)] |
---|
62 | [token-size (cabocha-chunk-token-size ch)] |
---|
63 | [tokens-in-chunk (vector-range tokens token-pos token-size)] |
---|
64 | [token-head-pos (cabocha-chunk-head-pos ch)] |
---|
65 | [token-func-pos (cabocha-chunk-func-pos ch)] |
---|
66 | ) |
---|
67 | `(chunk ;(format "link:~d head:~d func:~d token-size:~d token-pos:~d score:~d feature:~a" |
---|
68 | ,i |
---|
69 | ,(cabocha-chunk-link ch) |
---|
70 | ; ,(map token-surface (vector->list tokens-in-chunk)) |
---|
71 | ; (head ,(token-surface (vector-ref tokens-in-chunk token-head-pos))) |
---|
72 | ; (func ,(token-surface (vector-ref tokens-in-chunk token-func-pos))) |
---|
73 | ,tokens-in-chunk |
---|
74 | ,token-head-pos |
---|
75 | ,token-func-pos |
---|
76 | ; ,token-size |
---|
77 | ; ,token-pos |
---|
78 | ,(cabocha-chunk-score ch) |
---|
79 | ; ,(cabocha-chunk-feature-list ch) |
---|
80 | ; (cabocha-chunk-feature-list-size ch) |
---|
81 | ))) |
---|
82 | (define (pp-chunk chunk) |
---|
83 | ; (print " % " chunk) |
---|
84 | (let1 tokens-in-chunk (fourth chunk) |
---|
85 | (format #t "~d) => ~d ~a // head=~a func=~a score:~a\n" |
---|
86 | (second chunk) |
---|
87 | (third chunk) |
---|
88 | (map token-surface (vector->list tokens-in-chunk)) |
---|
89 | (token-surface (vector-ref tokens-in-chunk (fifth chunk))) |
---|
90 | (token-surface (vector-ref tokens-in-chunk (sixth chunk))) |
---|
91 | (seventh chunk) ))) |
---|
92 | |
---|
93 | (define (token-surface token) (cadr token)) |
---|
94 | |
---|
95 | #| |
---|
96 | (define (cabocha-tree-chunk-list t) |
---|
97 | (let loop ([i (- (cabocha-tree-chunk-size t) 1)] [lis '()]) |
---|
98 | (if (< i 0) lis |
---|
99 | (loop (- i 1) |
---|
100 | (cons (cabocha-chunk->lisp i (cabocha-tree-chunk t i)) lis) )))) |
---|
101 | |
---|
102 | (define (cabocha-tree-token-list t) |
---|
103 | (let loop ([i (- (cabocha-tree-token-size t) 1)] [lis '()]) |
---|
104 | (if (< i 0) lis |
---|
105 | (loop (- i 1) |
---|
106 | (cons (cabocha-token->lisp (cabocha-tree-token t i)) lis) )))) |
---|
107 | |# |
---|
108 | |
---|
109 | (define (cabocha-tree-chunks t) |
---|
110 | (let* ([tokens (cabocha-tree-tokens t)] |
---|
111 | [chunk-size (cabocha-tree-chunk-size t)] |
---|
112 | [vec (make-vector chunk-size)]) |
---|
113 | (dotimes (i chunk-size) |
---|
114 | (vector-set! vec i (cabocha-chunk->lisp i (cabocha-tree-chunk t i) tokens) )) |
---|
115 | vec)) |
---|
116 | |
---|
117 | (define (cabocha-tree-tokens t) |
---|
118 | (let* ([token-size (cabocha-tree-token-size t)] |
---|
119 | [vec (make-vector token-size)]) |
---|
120 | (dotimes (i token-size) |
---|
121 | (vector-set! vec i (cabocha-token->lisp (cabocha-tree-token t i)) )) |
---|
122 | vec)) |
---|
123 | |
---|
124 | (define (cparse sentence) |
---|
125 | (let* ([s (string-append sentence "。")] |
---|
126 | [tree (cabocha-sparse-totree c s)] |
---|
127 | ) |
---|
128 | (format #t "\n「~a」\n" s) |
---|
129 | |
---|
130 | ; (cabocha-tree-dump tree) |
---|
131 | (let* ([token-size (cabocha-tree-token-size tree)] |
---|
132 | [chunk-size (cabocha-tree-chunk-size tree)] |
---|
133 | [chunks (cabocha-tree-chunks tree)] |
---|
134 | ) |
---|
135 | (format #t "token size: ~d, " token-size) |
---|
136 | (format #t "chunk size: ~d\n" chunk-size) |
---|
137 | (dotimes (i chunk-size) |
---|
138 | (pp-chunk (vector-ref chunks i))) |
---|
139 | ))) |
---|
140 | ; (format #t " - ~s\n" (cabocha-tree-sentence tree)) |
---|
141 | ; (display (cabocha-sparse-tostr c s))) |
---|
142 | |
---|
143 | (load "sentences.scm") |
---|
144 | (for-each cparse sentences) |
---|
145 | |
---|
146 | ;; |
---|
147 | (cabocha-destroy c) |
---|
148 | ;(test* "cabocha-destroyed?" #t (cabocha-destroyed? c)) |
---|
149 | (test-end) |
---|