1 | ;;; pdicv-core.el --- core functions for PDIC-formatted dictionaries |
---|
2 | ;; |
---|
3 | ;; Copyright (C) 2005-2009 naoya_t. All Rights Reserved. |
---|
4 | ;; |
---|
5 | ;; Author: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
6 | ;; Maintainer: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
7 | ;; Primary distribution site: |
---|
8 | ;; http://lambdarepos.svnrepository.com/svn/share/lang/elisp/pdicv-mode/trunk |
---|
9 | ;; |
---|
10 | ;; Created: 14 Feb 2005 |
---|
11 | ;; Last modified: 30 Jan 2009 |
---|
12 | ;; Version: 0.9.2 |
---|
13 | ;; Keywords: PDIC dictionary search eijiro |
---|
14 | |
---|
15 | (provide 'pdicv-core) |
---|
16 | ;(put 'pdicv-core 'version "0.9.2") |
---|
17 | |
---|
18 | ;;; Commentary: |
---|
19 | |
---|
20 | ; (pdicv-get-header-info FILENAME) |
---|
21 | ; - ヘッダ情報を読み取る |
---|
22 | ; |
---|
23 | ; (pdicv-get-index-list FILENAME [WORD-ENCODING]) |
---|
24 | ; - PDIC辞書ファイルから、インデックスリストを取得 |
---|
25 | ; |
---|
26 | ; (pdicv-scan-datablock FILENAME PHYS CRITERIA-FUNC) |
---|
27 | ; - データブロックをスキャン |
---|
28 | ; |
---|
29 | ; (pdicv-core-search DICINFO CRITERIA [SIMPLE-MODE-P DONT-CLEAR-P]) |
---|
30 | ; - PDIC検索(コアルーチン) |
---|
31 | ; |
---|
32 | |
---|
33 | ;;; Code: |
---|
34 | (require 'nt-macros) |
---|
35 | (require 'nt-readval) |
---|
36 | (require 'nt-string) |
---|
37 | (require 'nt-bocu) |
---|
38 | (require 'nt-file) |
---|
39 | (require 'nt-english) |
---|
40 | |
---|
41 | ; decoder |
---|
42 | (defvar pdicv-null-decoder (lambda (s) s)) |
---|
43 | (defvar pdicv-sjis-decoder (lambda (s) (decode-coding-string s 'japanese-shift-jis-dos))) |
---|
44 | (defvar pdicv-latin1-decoder (lambda (s) (decode-coding-string s 'iso-latin-1-dos))) |
---|
45 | (defvar pdicv-bocu-decoder (lambda (s) (nt:bocu-decode s))) |
---|
46 | (defmacro pdicv-create-decoder (encoding) |
---|
47 | "create a decoder from user-specified encoding" |
---|
48 | `(lambda (s) (decode-coding-string s ,encoding))) |
---|
49 | |
---|
50 | (defvar pdicv-index-table-list ()) |
---|
51 | |
---|
52 | (defvar pdicv-result-height 8) |
---|
53 | ; |
---|
54 | ; ヘッダ情報を読み取る |
---|
55 | ; |
---|
56 | (defun pdicv-get-header-info (filename) |
---|
57 | "[PDIC] Get Header Info" |
---|
58 | (catch 'pdicv-get-header-info |
---|
59 | (let* ((header-buf (nt:read-from-file filename 0 256)) |
---|
60 | ; |
---|
61 | (headername nil); (substring header-buf 1 100)) |
---|
62 | (dictitle nil); (substring header-buf 101 140)) |
---|
63 | (version (nt:read-short header-buf 140)) |
---|
64 | (lword (nt:read-short header-buf 142)) |
---|
65 | (ljapa (nt:read-short header-buf 144)) |
---|
66 | (block-size (nt:read-short header-buf 146)) |
---|
67 | (index-block (nt:read-short header-buf 148)) |
---|
68 | (header-size (nt:read-short header-buf 150)) |
---|
69 | (index-size (nt:read-ushort header-buf 152)) |
---|
70 | (empty-block (nt:read-short header-buf 154)) |
---|
71 | (nindex (nt:read-short header-buf 156)) |
---|
72 | (nblock (nt:read-short header-buf 158)) |
---|
73 | (nword (nt:read-ulong header-buf 160)) |
---|
74 | (dicorder (nt:read-uchar header-buf 164)) |
---|
75 | (dictype (nt:read-uchar header-buf 165)) (dictype* nil) |
---|
76 | (attrlen (nt:read-uchar header-buf 166)) |
---|
77 | ; NEWDIC2- |
---|
78 | (olenumber 0) (os nil) |
---|
79 | (lid-word 0) (lid-japa 0) (lid-exp 0) (lid-pron 0) (lid-other 0) |
---|
80 | ; NEWDIC3- |
---|
81 | (extheader 0) (index-blkbit 0) (cypt nil) (update-count 0) |
---|
82 | (dicident nil) |
---|
83 | ;; |
---|
84 | (major-version (/ version 256)) |
---|
85 | (datablock-size (* nblock block-size)) |
---|
86 | (bocu nil) |
---|
87 | ) |
---|
88 | |
---|
89 | (setq version |
---|
90 | (nth major-version '(not-supported not-supported newdic1 newdic2 newdic3 newdic4 unicode-bocu-6))) |
---|
91 | |
---|
92 | (setq dicorder |
---|
93 | (nth (nt:read-uchar header-buf 164) '(code-order ignore-case dictionary-order order-descendant))) |
---|
94 | |
---|
95 | (when (> (logand dictype 128) 0) (setq dictype* (cons 'tree-view-mode dictype*))) |
---|
96 | (when (> (logand dictype 64) 0) (setq dictype* (cons 'crypted dictype*))) |
---|
97 | ; (when (> (logand dictype 32) 0) (setq dictype* (cons 'multilingual dictype*))) |
---|
98 | (when (> (logand dictype 16) 0) (setq dictype* (cons 'unicode dictype*))) |
---|
99 | (when (> (logand dictype 8) 0) (setq dictype* (cons 'bocu dictype*))) |
---|
100 | (when (> (logand dictype 1) 0) (setq dictype* (cons 'ar-compressed dictype*))) |
---|
101 | |
---|
102 | ;;(case major-version |
---|
103 | (cond |
---|
104 | ((= major-version 6) |
---|
105 | ;;(6 "Ver 6.xx" |
---|
106 | (setq os (nt:read-char header-buf 167)) |
---|
107 | (setq os (cond ((= os 0) 'sjis-crlf) |
---|
108 | ((= os 1) 'sjis-cr) |
---|
109 | ((= os 2) 'sjis-lf) |
---|
110 | ((= os 3) 'euc-lf) |
---|
111 | ((= os 4) 'jis-lf) |
---|
112 | ((= os 32) 'bocu) |
---|
113 | )) |
---|
114 | (when (eq os 'bocu) (setq bocu t)) |
---|
115 | (setq olenumber (nt:read-long header-buf 168)) |
---|
116 | ;; dummy_lid, 10 bytes |
---|
117 | (setq index-blkbit (if (= (nt:read-uchar header-buf 182) 1) 32 16)) |
---|
118 | ;; dummy0 @185 |
---|
119 | (setq extheader (nt:read-ulong header-buf 184)) |
---|
120 | (setq empty-block (nt:read-long header-buf 188)) ;overwrite |
---|
121 | (setq nindex (nt:read-long header-buf 192)) ;overwrite |
---|
122 | (setq nblock (nt:read-long header-buf 196)) ;overwrite |
---|
123 | (setq datablock-size (* nblock block-size)) |
---|
124 | (setq cypt (substring header-buf 200 208)) ;- reserved[8] |
---|
125 | (setq update-count (nt:read-ulong header-buf 208)) |
---|
126 | ; dummy00 @212[4] |
---|
127 | (setq dicident (substring header-buf 216 224)) |
---|
128 | ;(setq dummy (substring header-buf 224 256)) |
---|
129 | (setq index-size (* index-block block-size)) ;overwrite |
---|
130 | );6 |
---|
131 | ((= major-version 5) |
---|
132 | ;;(5 "HyperDIC, Ver 5.00" |
---|
133 | (setq os (nt:read-char header-buf 167)) |
---|
134 | (setq os (cond ((= os 0) 'sjis-crlf) |
---|
135 | ((= os 1) 'sjis-cr) |
---|
136 | ((= os 2) 'sjis-lf) |
---|
137 | ((= os 3) 'euc-lf) |
---|
138 | ((= os 4) 'jis-lf) |
---|
139 | ((= os 32) 'bocu) |
---|
140 | )) |
---|
141 | (when (eq os 'bocu) (setq bocu t)) |
---|
142 | (setq olenumber (nt:read-long header-buf 168)) |
---|
143 | (setq index-blkbit (if (= (nt:read-uchar header-buf 182) 1) 32 16)) |
---|
144 | ;; dummy0 @185 |
---|
145 | (setq extheader (nt:read-ulong header-buf 184)) |
---|
146 | (setq empty-block (nt:read-long header-buf 188)) ;overwrite |
---|
147 | (setq nindex (nt:read-long header-buf 192)) ;overwrite |
---|
148 | (setq nblock (nt:read-long header-buf 196)) ;overwrite |
---|
149 | (setq datablock-size (* nblock block-size)) |
---|
150 | (setq cypt (substring header-buf 200 208)) ;- reserved[8] |
---|
151 | (setq update-count (nt:read-ulong header-buf 208)) |
---|
152 | ; dummy00 @212[4] |
---|
153 | (setq dicident (substring header-buf 216 224)) |
---|
154 | ;(setq dummy (substring header-buf 224 256)) |
---|
155 | (setq index-size (* index-block block-size)) ;overwrite |
---|
156 | );5 |
---|
157 | (t "< 5.0" |
---|
158 | (when (>= major-version 3) |
---|
159 | "NEWDIC2-" |
---|
160 | (setq olenumber (nt:read-long header-buf 167)) |
---|
161 | ;(setq os (byte (substring header-buf 172 173))) |
---|
162 | (setq os (nth (nt:read-char header-buf 171) '(sjis-crlf))) |
---|
163 | ;(setq lid-word (short header-buf 172)) |
---|
164 | ;(setq lid-japa (short header-buf 174)) |
---|
165 | ;(setq lid-exp (short header-buf 176)) |
---|
166 | ;(setq lid-pron (short header-buf 178)) |
---|
167 | ;(setq lid-other (short header-buf 180)) |
---|
168 | ) |
---|
169 | (when (>= major-version 4) |
---|
170 | "NEWDIC3-" |
---|
171 | (setq extheader (nt:read-ulong header-buf 182)) |
---|
172 | (setq empty-block (nt:read-long header-buf 186)) ;overwrite |
---|
173 | (setq nindex (nt:read-long header-buf 190)) ;overwrite |
---|
174 | (setq nblock (nt:read-long header-buf 194)) ;overwrite |
---|
175 | (setq datablock-size (* nblock block-size)) |
---|
176 | (setq index-blkbit (if (= (nt:read-uchar header-buf 198) 1) 32 16)) |
---|
177 | (setq cypt (substring header-buf 200 208)) |
---|
178 | (setq update-count (nt:read-ulong header-buf 207)) |
---|
179 | ;(setq dummy (substring header-buf 212 256)) |
---|
180 | (setq index-size (* index-block block-size)) ;overwrite |
---|
181 | ) |
---|
182 | )); esac |
---|
183 | (list |
---|
184 | ; (cons 'headername headername) ; |
---|
185 | ; (cons 'dictitle dictitle) ; |
---|
186 | (cons 'version version) ; |
---|
187 | (cons 'lword lword) ; |
---|
188 | (cons 'ljapa ljapa) ; |
---|
189 | (cons 'block-size block-size) ; |
---|
190 | (cons 'index-block index-block) ; |
---|
191 | (cons 'header-size header-size) ; |
---|
192 | (cons 'index-size index-size) ; |
---|
193 | (cons 'empty-block empty-block) ; |
---|
194 | (cons 'nindex nindex) ; |
---|
195 | (cons 'nblock nblock) ; |
---|
196 | (cons 'nword nword) ; |
---|
197 | (cons 'dicorder dicorder) ; |
---|
198 | (cons 'dictype dictype*) ; |
---|
199 | (cons 'attrlen attrlen) ; |
---|
200 | (cons 'os os) ; |
---|
201 | ; (cons 'lid-word lid-word) ; |
---|
202 | ; (cons 'lid-japa lid-japa) ; |
---|
203 | ; (cons 'lid-exp lid-exp) ; |
---|
204 | ; (cons 'lid-pron lid-pron) ; |
---|
205 | ; (cons 'lid-other lid-other) ; |
---|
206 | (cons 'extheader extheader) ; |
---|
207 | (cons 'index-blkbit index-blkbit) ;(0=16,1=32) |
---|
208 | (cons 'cypt cypt) ; |
---|
209 | (cons 'update-count update-count) ; |
---|
210 | |
---|
211 | (cons 'index-begins-at (+ header-size extheader)) |
---|
212 | (cons 'datablock-begins-at (+ header-size extheader index-size)) |
---|
213 | (cons 'datablock-ends-at (+ header-size extheader index-size datablock-size)) |
---|
214 | (cons 'datablock-size datablock-size) |
---|
215 | (cons 'bocu bocu))))) |
---|
216 | |
---|
217 | (defun pdicv-get-index-list (filename &optional word-encoding) |
---|
218 | "[PDICV] Get the index list from PDIC file" |
---|
219 | (let* ((header (pdicv-get-header-info filename)) |
---|
220 | (index-buf (nt:read-from-file filename |
---|
221 | (-> header 'index-begins-at) (-> header 'index-size))) |
---|
222 | |
---|
223 | (32bit-address-mode (if (= (-> header 'index-blkbit) 32) t nil)) |
---|
224 | (tab-sep-p (eq 'unicode-bocu-6 (-> header 'version))) |
---|
225 | |
---|
226 | (ix 0) (ix-max (-> header 'nindex)) |
---|
227 | (ofs 0) |
---|
228 | (index-list ())) |
---|
229 | (while (< ix ix-max) |
---|
230 | (let ((phys -1) (word "") (word* nil)) |
---|
231 | (if 32bit-address-mode |
---|
232 | (progn (setq phys (nt:read-ulong index-buf ofs)) |
---|
233 | (setq ofs (+ ofs 4))) |
---|
234 | (progn (setq phys (nt:read-ushort index-buf ofs)) |
---|
235 | (setq ofs (+ ofs 2)))) |
---|
236 | (setq word* (nt:read-cstring index-buf ofs)) (setq ofs (+ ofs (cdr word*) 1)) |
---|
237 | (setq word (car word*)) |
---|
238 | |
---|
239 | (when tab-sep-p |
---|
240 | (let ((tsv (split-string word "\t"))) |
---|
241 | (when (consp tsv) |
---|
242 | (setq word (car tsv))))) |
---|
243 | ; (cond |
---|
244 | ; ((eq word-encoding 'bocu) |
---|
245 | ; (setq word (nt:bocu-decode word))) |
---|
246 | ; ((eq word-encoding 'sjis) |
---|
247 | ; (setq word (decode-coding-string word 'japanese-shift-jis-dos))) |
---|
248 | ; (word-encoding |
---|
249 | ; (setq word (decode-coding-string word word-encoding))) |
---|
250 | ; (t nil)) |
---|
251 | |
---|
252 | ; (setq index-list (cons (cons phys word) index-list)) |
---|
253 | (push (cons phys word) index-list) |
---|
254 | (setq ix (1+ ix)) |
---|
255 | )) |
---|
256 | (nreverse index-list) )) |
---|
257 | |
---|
258 | (defface pdicv-face-dummy |
---|
259 | '((( (class color) (background light) ) |
---|
260 | (:foreground "green" :background "SlateGray1" :weight bold)) |
---|
261 | (t |
---|
262 | (:foreground "red" :background "black"))) ; :weight bold |
---|
263 | "Face for caption") |
---|
264 | (defface pdicv-face-caption-red |
---|
265 | '((t (:foreground "red" :background "black"))) |
---|
266 | "Face for caption") |
---|
267 | (defface pdicv-face-caption-blue |
---|
268 | '((t (:foreground "blue" :background "black"))) |
---|
269 | "Face for caption") |
---|
270 | (defface pdicv-face-caption-green |
---|
271 | '((t (:foreground "green" :background "black"))) |
---|
272 | "Face for caption") |
---|
273 | (defface pdicv-face-gray |
---|
274 | '((t (:foreground "gray"))) |
---|
275 | "Face for text") |
---|
276 | |
---|
277 | (defvar pdicv-default-inserter |
---|
278 | (lambda (eword pron jword example) |
---|
279 | (progn |
---|
280 | (set-text-properties 0 (length eword) '(face bold) eword) |
---|
281 | ; (set-text-properties 0 (length eword) '(face pdicv-face-caption-green) eword) |
---|
282 | ; (set-text-properties 0 (length jword) '(face pdicv-face-caption-gray) jword) |
---|
283 | |
---|
284 | (setq jword (nt:replace-all jword "〓●" " // ")) |
---|
285 | (setq jword (nt:replace-all jword "\n" "\n ")) |
---|
286 | |
---|
287 | (let ((buf "")) |
---|
288 | (setq buf eword) |
---|
289 | (when (string< "" pron) (setq buf (concat buf " [" pron "]"))) |
---|
290 | ; (setq result (concat result " : " jword)) |
---|
291 | (setq buf (concat buf "\n " jword)) |
---|
292 | (when (string< "" example) (setq buf (concat buf "\n - " example))) |
---|
293 | ; (setq buf (concat buf "\n")) |
---|
294 | ; (setq buf (concat buf "\n\n")) |
---|
295 | (setq buf (concat buf "\n")) |
---|
296 | |
---|
297 | (insert buf))))) |
---|
298 | ;; |
---|
299 | ;; |
---|
300 | ;; |
---|
301 | (defun pdicv-scan-datablock (filename phys criteria-func) |
---|
302 | "[PDICV] scan a datablock" |
---|
303 | (catch 'pdicv-scan-datablock |
---|
304 | (let* ((result ()) ;(match-count 0) |
---|
305 | (header (pdicv-get-header-info filename)) |
---|
306 | (tab-sep-p (eq 'unicode-bocu-6 (-> header 'version))) |
---|
307 | (block-size (-> header 'block-size)) |
---|
308 | (offset (+ (-> header 'datablock-begins-at) (* phys block-size))) |
---|
309 | (aligned (and (member (-> header 'version) '(newdic4 unicode-bocu-6)) t)) |
---|
310 | ;; (bocu (-> header 'bocu)) |
---|
311 | (head-word (nt:read-ushort (nt:read-from-file filename offset 2))) |
---|
312 | (blocks (logand 32767 head-word)) |
---|
313 | (block-length (- (* blocks block-size) 2)) |
---|
314 | (field-size (if (zerop (logand 32768 head-word)) 2 4)) |
---|
315 | (datablock (nt:read-from-file filename (+ offset 2) block-length)) |
---|
316 | ; (list blocks field-size datablock) |
---|
317 | (p 0) |
---|
318 | (field-length 0) |
---|
319 | (compress-length 0) |
---|
320 | (rest nil) |
---|
321 | (eword "") (eword-attrib 0) |
---|
322 | ) |
---|
323 | |
---|
324 | (while (< p block-length) ; (while (< p field-size) |
---|
325 | (setq field-length |
---|
326 | (if (= field-size 2) (nt:read-ushort datablock p) (nt:read-ulong datablock p)) ) |
---|
327 | (when (zerop field-length) (throw 'pdicv-scan-datablock (nreverse result))); sfield-list)) |
---|
328 | (setq p (+ p field-size)) ;2ないし4バイト |
---|
329 | (setq compress-length (nt:read-uchar datablock p)) ; 圧縮長 |
---|
330 | (setq p (1+ p)) |
---|
331 | |
---|
332 | (when aligned |
---|
333 | (setq eword-attrib (nt:read-uchar datablock p)) ; 見出し語属性 |
---|
334 | (setq p (1+ p))) |
---|
335 | ; 見出し語以降をとりあえず rest に入れる |
---|
336 | (setq rest (substring datablock p (+ p field-length))) |
---|
337 | (setq p (+ p field-length)) |
---|
338 | ; 見出し語 (NULL終端) |
---|
339 | (let* ((eword-cstr (nt:read-cstring rest)) |
---|
340 | (eword-compressed (car eword-cstr)) (eword-len (cdr eword-cstr)) |
---|
341 | (q 0) |
---|
342 | (level 0) |
---|
343 | (extended nil) |
---|
344 | (jword-cstr nil) (jword "") (jword-len 0) |
---|
345 | (ext-list nil) |
---|
346 | (example "") (pron "") (link "")) |
---|
347 | |
---|
348 | (setq eword (if (zerop compress-length) |
---|
349 | eword-compressed |
---|
350 | (concat (substring eword 0 compress-length) eword-compressed) )) |
---|
351 | (setq q (1+ eword-len)) |
---|
352 | ;; 見出し語属性 |
---|
353 | (when (not aligned) |
---|
354 | (setq eword-attrib (nt:read-uchar rest q)) |
---|
355 | (setq q (1+ q))) |
---|
356 | |
---|
357 | (setq level (logand eword-attrib 15)) |
---|
358 | ; (insert (format ": %s %d %d\n" eword eword-len eword-attrib)) |
---|
359 | ;; (if (zerop (logand eword-attrib 128)) |
---|
360 | ;; (throw 'pdicv-scan-datablock ()); 'illegal) |
---|
361 | (setq eword-attrib (logand eword-attrib 127)) |
---|
362 | |
---|
363 | (setq extended (if (zerop (logand eword-attrib 16)) nil t)) |
---|
364 | (if extended |
---|
365 | (progn ;拡張 |
---|
366 | (setq jword-cstr (nt:read-cstring rest q)) |
---|
367 | (setq jword (car jword-cstr)) (setq jword-len (cdr jword-cstr)) |
---|
368 | (setq q (+ q jword-len 1)) |
---|
369 | (setq ext-list nil) |
---|
370 | (catch 'while |
---|
371 | (while (< q field-length) |
---|
372 | (let* ((ex-attrib (nt:read-uchar rest q)) |
---|
373 | (ex-attrib-sub (logand ex-attrib 15)) |
---|
374 | (exdata-cstr nil) |
---|
375 | (exdata "") (exdata-len 0) ) |
---|
376 | (when (= (logand ex-attrib 128) 128) (throw 'while t)) |
---|
377 | (setq q (1+ q)) |
---|
378 | (setq exdata-cstr (nt:read-cstring rest q)) |
---|
379 | (setq exdata (car exdata-cstr)) |
---|
380 | (setq exdata-len (cdr exdata-cstr)) |
---|
381 | (cond |
---|
382 | ((= ex-attrib-sub 1) (setq example exdata)) |
---|
383 | ((= ex-attrib-sub 2) (setq pron exdata)) |
---|
384 | ((= ex-attrib-sub 4) (setq link exdata)) |
---|
385 | (t nil)) |
---|
386 | (setq q (+ q exdata-len 1)) |
---|
387 | ) ; let* |
---|
388 | ) ; while |
---|
389 | ) ; catch while2 |
---|
390 | ) ; progn |
---|
391 | (progn ;標準 |
---|
392 | (setq jword (substring rest q)) |
---|
393 | (setq pron "") |
---|
394 | (setq example "")) |
---|
395 | ) ; if extended |
---|
396 | |
---|
397 | (if tab-sep-p |
---|
398 | (let* ((splitted (split-string eword "\t")) |
---|
399 | (eword (car splitted)) |
---|
400 | (entry (cadr splitted))) |
---|
401 | (when (funcall criteria-func entry eword pron jword example) |
---|
402 | (push (list entry pron jword example) result))) |
---|
403 | (when (funcall criteria-func eword eword pron jword example) |
---|
404 | (push (list eword pron jword example) result))) |
---|
405 | ;;(when (funcall criteria-func eword pron jword example) |
---|
406 | ;; (push (list eword pron jword example) result)) |
---|
407 | );let |
---|
408 | ); wend |
---|
409 | (nreverse result)))) |
---|
410 | |
---|
411 | (defun pdicv-core-search (dicinfo criteria &optional simple-mode-p dont-clear-p) |
---|
412 | "search in PDIC" |
---|
413 | (let* ((dicname (car dicinfo)) |
---|
414 | (dicfile (nth 1 dicinfo)) |
---|
415 | (encoding-list (nth 2 dicinfo)) |
---|
416 | (decoder-list ()) |
---|
417 | (index-table (-> pdicv-index-table-list dicname))) |
---|
418 | ; (if (null index-table) (setq index-table (pdicv-get-index-list dicfile))) |
---|
419 | |
---|
420 | (when (atom encoding-list) ;; expand encoding-list |
---|
421 | (setq encoding-list (list encoding-list encoding-list encoding-list encoding-list))) |
---|
422 | |
---|
423 | (while encoding-list ;; build the decoder-list |
---|
424 | (let ((encoding (car encoding-list))) |
---|
425 | (cond |
---|
426 | ((eq encoding 'bocu) (push pdicv-bocu-decoder decoder-list)) |
---|
427 | ((eq encoding 'sjis) (push pdicv-sjis-decoder decoder-list)) |
---|
428 | ((eq encoding 'latin1) (push pdicv-latin1-decoder decoder-list)) |
---|
429 | (encoding (push (pdicv-create-decoder encoding) decoder-list)) |
---|
430 | (t (push pdicv-null-decoder decoder-list)))) |
---|
431 | (setq encoding-list (cdr encoding-list))) |
---|
432 | (setq decoder-list (nreverse decoder-list)) |
---|
433 | |
---|
434 | (catch 'pdicv-core-search |
---|
435 | ; (if (null original-word-to-search) (setq original-word-to-search word-to-search)) |
---|
436 | (let* (;(buffer-name (generate-new-buffer-name *buffer-name)) |
---|
437 | ; (pdicv-buffer-name "*PDIC Viewer*") |
---|
438 | ; (dummy (if (get-buffer pdicv-buffer-name) (kill-buffer pdicv-buffer-name))) |
---|
439 | (pdicv-buffer (get-buffer-create "*PDIC Viewer*")) |
---|
440 | ;criteria |
---|
441 | (word-to-search (car criteria)) |
---|
442 | |
---|
443 | (index-needles (nth 1 criteria)) |
---|
444 | (needle1 (car index-needles)) |
---|
445 | (needle2 (cdr index-needles)) |
---|
446 | |
---|
447 | (datablock-criteria-func (nth 2 criteria)) |
---|
448 | |
---|
449 | (ix index-table) (index-size (length ix)) (curr-size index-size) |
---|
450 | (ix+ (cadr ix)); next one |
---|
451 | (match-count 0)) |
---|
452 | |
---|
453 | ; (switch-to-buffer pdicv-buffer-name) |
---|
454 | (save-current-buffer |
---|
455 | (set-buffer pdicv-buffer) |
---|
456 | (when (null dont-clear-p) (erase-buffer)) |
---|
457 | |
---|
458 | (when (not simple-mode-p) |
---|
459 | ;(pop-to-buffer pdicv-buffer-name) |
---|
460 | ; (set-buffer pdicv-buffer-name) |
---|
461 | (insert (format "検索文字列: %s\n" word-to-search)) |
---|
462 | (insert (format "該当件数: ????\n")) |
---|
463 | (newline)) |
---|
464 | ;(insert "\n")) |
---|
465 | (when index-needles |
---|
466 | (setq ix |
---|
467 | (let ((p ix) (last-p nil)) |
---|
468 | (catch 'pdicv-search-in-index |
---|
469 | (while p |
---|
470 | (let* ((elem (car p)) ;(phys (car elem)) |
---|
471 | (word (cdr elem))) |
---|
472 | (if (string< needle1 word) (throw 'pdicv-search-in-index last-p)) |
---|
473 | ;; (if (string< needle2 word) (throw 'pdicv-search-in-index last-p)) |
---|
474 | (setq last-p p) |
---|
475 | (setq p (cdr p)) )) |
---|
476 | last-p)))) |
---|
477 | (catch 'while |
---|
478 | (while ix |
---|
479 | (let* ((curr (car ix)) |
---|
480 | (phys (car curr)) (word (cdr curr)) |
---|
481 | ;; (x (insert (format "* current ix: (%d %s)\n" phys word))) |
---|
482 | (result (pdicv-scan-datablock dicfile phys datablock-criteria-func)); decoder-list nil)) |
---|
483 | (result-count (length result)) |
---|
484 | (inserter pdicv-default-inserter)) |
---|
485 | (when index-needles |
---|
486 | (when (string>= word needle2) (throw 'while t))) |
---|
487 | ;; (if (not (string< word (cdr index-needles))) (throw 'while t))) |
---|
488 | |
---|
489 | ; (insert (format "(%s with index %s ... %s)\n" |
---|
490 | ; word-to-search |
---|
491 | ; (funcall (nth 0 decoder-list) word) result)) |
---|
492 | |
---|
493 | (if result (progn |
---|
494 | (while result |
---|
495 | (let ((rec (pop result))) |
---|
496 | (funcall inserter |
---|
497 | (funcall (nth 0 decoder-list) (nth 0 rec)); eword |
---|
498 | (funcall (nth 1 decoder-list) (nth 1 rec)); pron |
---|
499 | (funcall (nth 2 decoder-list) (nth 2 rec)); jword |
---|
500 | (funcall (nth 3 decoder-list) (nth 3 rec)); example |
---|
501 | ) |
---|
502 | (setq match-count (1+ match-count)) |
---|
503 | )) |
---|
504 | (message "%5d/%5d:%7d" curr-size index-size match-count) |
---|
505 | (sit-for 0)) |
---|
506 | ;;else |
---|
507 | (when (zerop (% curr-size 128)) ;;128は適当な数 |
---|
508 | (message "%5d/%5d:%7d" curr-size index-size match-count)))) |
---|
509 | (setq ix (cdr ix)) |
---|
510 | (setq curr-size (1- curr-size)) |
---|
511 | );wend |
---|
512 | );caught |
---|
513 | |
---|
514 | ;;(insert (pdicv-scan-datablock dicfile (car (car ix)) decoder-list nil needle1 needle2)) |
---|
515 | (goto-char 1) |
---|
516 | |
---|
517 | (when (not simple-mode-p) |
---|
518 | (when (re-search-forward ": [?][?][?][?]" nil t nil) |
---|
519 | (replace-match (format ": %d" match-count) t t nil 0))) |
---|
520 | ); save-current-buffer |
---|
521 | |
---|
522 | ; (pop-to-buffer (current-buffer)) |
---|
523 | ; (setq split-height-threshold 6) |
---|
524 | (when (one-window-p) |
---|
525 | (set-window-buffer (split-window-vertically (- pdicv-result-height)) pdicv-buffer)) |
---|
526 | )))) |
---|
527 | |
---|
528 | ;;; pdicv-core.el ends here |
---|