| 1 | ;;; pdicv-core.el --- core functions for PDIC-formatted dictionaries |
|---|
| 2 | ;; |
|---|
| 3 | ;; Copyright (C) 2005-2009 naoya_t. All Rights Reserved. |
|---|
| 4 | ;; |
|---|
| 5 | ;; Author: naoya_t <naoya.t@aqua.plala.or.jp> |
|---|
| 6 | ;; Maintainer: naoya_t <naoya.t@aqua.plala.or.jp> |
|---|
| 7 | ;; Primary distribution site: |
|---|
| 8 | ;; http://lambdarepos.svnrepository.com/svn/share/lang/elisp/pdicv-mode/trunk |
|---|
| 9 | ;; |
|---|
| 10 | ;; Created: 14 Feb 2005 |
|---|
| 11 | ;; Last modified: 30 Jan 2009 |
|---|
| 12 | ;; Version: 0.9.2 |
|---|
| 13 | ;; Keywords: PDIC dictionary search eijiro |
|---|
| 14 | |
|---|
| 15 | (provide 'pdicv-core) |
|---|
| 16 | ;(put 'pdicv-core 'version "0.9.2") |
|---|
| 17 | |
|---|
| 18 | ;;; Commentary: |
|---|
| 19 | |
|---|
| 20 | ; (pdicv-get-header-info FILENAME) |
|---|
| 21 | ; - ��ؒ�Ò���������ߒ�蒤� |
|---|
| 22 | ; (pdicv-get-index-list FILENAME [WORD-ENCODING]) |
|---|
| 23 | ; - PDIC��������������뒤���钡���������Ò�������꒥���Ȓ���� |
|---|
| 24 | ; |
|---|
| 25 | ; (pdicv-scan-datablock FILENAME PHYS CRITERIA-FUNC) |
|---|
| 26 | ; - ��ǒ�������֒�풥Ò���������㒥�; (pdicv-core-search DICINFO CRITERIA [SIMPLE-MODE-P DONT-CLEAR-P]) |
|---|
| 27 | ; - PDIC��������������뒡������ |
|---|
| 28 | ; |
|---|
| 29 | |
|---|
| 30 | ;;; Code: |
|---|
| 31 | (require 'nt-macros) |
|---|
| 32 | (require 'nt-readval) |
|---|
| 33 | (require 'nt-string) |
|---|
| 34 | (require 'nt-bocu) |
|---|
| 35 | (require 'nt-file) |
|---|
| 36 | (require 'nt-english) |
|---|
| 37 | |
|---|
| 38 | ; decoder |
|---|
| 39 | (defvar pdicv-null-decoder (lambda (s) s)) |
|---|
| 40 | (defvar pdicv-sjis-decoder (lambda (s) (decode-coding-string s 'japanese-shift-jis-dos))) |
|---|
| 41 | (defvar pdicv-latin1-decoder (lambda (s) (decode-coding-string s 'iso-latin-1-dos))) |
|---|
| 42 | (defvar pdicv-bocu-decoder (lambda (s) (nt:bocu-decode s))) |
|---|
| 43 | (defmacro pdicv-create-decoder (encoding) |
|---|
| 44 | "create a decoder from user-specified encoding" |
|---|
| 45 | `(lambda (s) (decode-coding-string s ,encoding))) |
|---|
| 46 | |
|---|
| 47 | (defvar pdicv-index-table-list ()) |
|---|
| 48 | |
|---|
| 49 | (defvar pdicv-result-height 8) |
|---|
| 50 | ; |
|---|
| 51 | ; ��ؒ�Ò���������ߒ�蒤� |
|---|
| 52 | (defun pdicv-get-header-info (filename) |
|---|
| 53 | "[PDIC] Get Header Info" |
|---|
| 54 | (catch 'pdicv-get-header-info |
|---|
| 55 | (let* ((header-buf (nt:read-from-file filename 0 256)) |
|---|
| 56 | ; |
|---|
| 57 | (headername nil); (substring header-buf 1 100)) |
|---|
| 58 | (dictitle nil); (substring header-buf 101 140)) |
|---|
| 59 | (version (nt:read-short header-buf 140)) |
|---|
| 60 | (lword (nt:read-short header-buf 142)) |
|---|
| 61 | (ljapa (nt:read-short header-buf 144)) |
|---|
| 62 | (block-size (nt:read-short header-buf 146)) |
|---|
| 63 | (index-block (nt:read-short header-buf 148)) |
|---|
| 64 | (header-size (nt:read-short header-buf 150)) |
|---|
| 65 | (index-size (nt:read-ushort header-buf 152)) |
|---|
| 66 | (empty-block (nt:read-short header-buf 154)) |
|---|
| 67 | (nindex (nt:read-short header-buf 156)) |
|---|
| 68 | (nblock (nt:read-short header-buf 158)) |
|---|
| 69 | (nword (nt:read-ulong header-buf 160)) |
|---|
| 70 | (dicorder (nt:read-uchar header-buf 164)) |
|---|
| 71 | (dictype (nt:read-uchar header-buf 165)) (dictype* nil) |
|---|
| 72 | (attrlen (nt:read-uchar header-buf 166)) |
|---|
| 73 | ; NEWDIC2- |
|---|
| 74 | (olenumber 0) (os nil) |
|---|
| 75 | (lid-word 0) (lid-japa 0) (lid-exp 0) (lid-pron 0) (lid-other 0) |
|---|
| 76 | ; NEWDIC3- |
|---|
| 77 | (extheader 0) (index-blkbit 0) (cypt nil) (update-count 0) |
|---|
| 78 | (dicident nil) |
|---|
| 79 | ;; |
|---|
| 80 | (major-version (/ version 256)) |
|---|
| 81 | (datablock-size (* nblock block-size)) |
|---|
| 82 | (bocu nil) |
|---|
| 83 | ) |
|---|
| 84 | |
|---|
| 85 | (setq version |
|---|
| 86 | (nth major-version '(not-supported not-supported newdic1 newdic2 newdic3 newdic4 unicode-bocu-6))) |
|---|
| 87 | |
|---|
| 88 | (setq dicorder |
|---|
| 89 | (nth (nt:read-uchar header-buf 164) '(code-order ignore-case dictionary-order order-descendant))) |
|---|
| 90 | |
|---|
| 91 | (when (> (logand dictype 128) 0) (setq dictype* (cons 'tree-view-mode dictype*))) |
|---|
| 92 | (when (> (logand dictype 64) 0) (setq dictype* (cons 'crypted dictype*))) |
|---|
| 93 | ; (when (> (logand dictype 32) 0) (setq dictype* (cons 'multilingual dictype*))) |
|---|
| 94 | (when (> (logand dictype 16) 0) (setq dictype* (cons 'unicode dictype*))) |
|---|
| 95 | (when (> (logand dictype 8) 0) (setq dictype* (cons 'bocu dictype*))) |
|---|
| 96 | (when (> (logand dictype 1) 0) (setq dictype* (cons 'ar-compressed dictype*))) |
|---|
| 97 | |
|---|
| 98 | (case major-version |
|---|
| 99 | (6 "Ver 6.xx" |
|---|
| 100 | (setq os (nt:read-char header-buf 167)) |
|---|
| 101 | (setq os (cond ((= os 0) 'sjis-crlf) |
|---|
| 102 | ((= os 1) 'sjis-cr) |
|---|
| 103 | ((= os 2) 'sjis-lf) |
|---|
| 104 | ((= os 3) 'euc-lf) |
|---|
| 105 | ((= os 4) 'jis-lf) |
|---|
| 106 | ((= os 32) 'bocu) |
|---|
| 107 | )) |
|---|
| 108 | (when (eq os 'bocu) (setq bocu t)) |
|---|
| 109 | (setq olenumber (nt:read-long header-buf 168)) |
|---|
| 110 | ;; dummy_lid, 10 bytes |
|---|
| 111 | (setq index-blkbit (if (= (nt:read-uchar header-buf 182) 1) 32 16)) |
|---|
| 112 | ;; dummy0 @185 |
|---|
| 113 | (setq extheader (nt:read-ulong header-buf 184)) |
|---|
| 114 | (setq empty-block (nt:read-long header-buf 188)) ;overwrite |
|---|
| 115 | (setq nindex (nt:read-long header-buf 192)) ;overwrite |
|---|
| 116 | (setq nblock (nt:read-long header-buf 196)) ;overwrite |
|---|
| 117 | (setq datablock-size (* nblock block-size)) |
|---|
| 118 | (setq cypt (substring header-buf 200 208)) ;- reserved[8] |
|---|
| 119 | (setq update-count (nt:read-ulong header-buf 208)) |
|---|
| 120 | ; dummy00 @212[4] |
|---|
| 121 | (setq dicident (substring header-buf 216 224)) |
|---|
| 122 | ;(setq dummy (substring header-buf 224 256)) |
|---|
| 123 | (setq index-size (* index-block block-size)) ;overwrite |
|---|
| 124 | );6 |
|---|
| 125 | (5 "HyperDIC, Ver 5.00" |
|---|
| 126 | (setq os (nt:read-char header-buf 167)) |
|---|
| 127 | (setq os (cond ((= os 0) 'sjis-crlf) |
|---|
| 128 | ((= os 1) 'sjis-cr) |
|---|
| 129 | ((= os 2) 'sjis-lf) |
|---|
| 130 | ((= os 3) 'euc-lf) |
|---|
| 131 | ((= os 4) 'jis-lf) |
|---|
| 132 | ((= os 32) 'bocu) |
|---|
| 133 | )) |
|---|
| 134 | (when (eq os 'bocu) (setq bocu t)) |
|---|
| 135 | (setq olenumber (nt:read-long header-buf 168)) |
|---|
| 136 | (setq index-blkbit (if (= (nt:read-uchar header-buf 182) 1) 32 16)) |
|---|
| 137 | ;; dummy0 @185 |
|---|
| 138 | (setq extheader (nt:read-ulong header-buf 184)) |
|---|
| 139 | (setq empty-block (nt:read-long header-buf 188)) ;overwrite |
|---|
| 140 | (setq nindex (nt:read-long header-buf 192)) ;overwrite |
|---|
| 141 | (setq nblock (nt:read-long header-buf 196)) ;overwrite |
|---|
| 142 | (setq datablock-size (* nblock block-size)) |
|---|
| 143 | (setq cypt (substring header-buf 200 208)) ;- reserved[8] |
|---|
| 144 | (setq update-count (nt:read-ulong header-buf 208)) |
|---|
| 145 | ; dummy00 @212[4] |
|---|
| 146 | (setq dicident (substring header-buf 216 224)) |
|---|
| 147 | ;(setq dummy (substring header-buf 224 256)) |
|---|
| 148 | (setq index-size (* index-block block-size)) ;overwrite |
|---|
| 149 | );5 |
|---|
| 150 | (t "< 5.0" |
|---|
| 151 | (when (>= major-version 3) |
|---|
| 152 | "NEWDIC2-" |
|---|
| 153 | (setq olenumber (nt:read-long header-buf 167)) |
|---|
| 154 | ;(setq os (byte (substring header-buf 172 173))) |
|---|
| 155 | (setq os (nth (nt:read-char header-buf 171) '(sjis-crlf))) |
|---|
| 156 | ;(setq lid-word (short header-buf 172)) |
|---|
| 157 | ;(setq lid-japa (short header-buf 174)) |
|---|
| 158 | ;(setq lid-exp (short header-buf 176)) |
|---|
| 159 | ;(setq lid-pron (short header-buf 178)) |
|---|
| 160 | ;(setq lid-other (short header-buf 180)) |
|---|
| 161 | ) |
|---|
| 162 | (when (>= major-version 4) |
|---|
| 163 | "NEWDIC3-" |
|---|
| 164 | (setq extheader (nt:read-ulong header-buf 182)) |
|---|
| 165 | (setq empty-block (nt:read-long header-buf 186)) ;overwrite |
|---|
| 166 | (setq nindex (nt:read-long header-buf 190)) ;overwrite |
|---|
| 167 | (setq nblock (nt:read-long header-buf 194)) ;overwrite |
|---|
| 168 | (setq datablock-size (* nblock block-size)) |
|---|
| 169 | (setq index-blkbit (if (= (nt:read-uchar header-buf 198) 1) 32 16)) |
|---|
| 170 | (setq cypt (substring header-buf 200 208)) |
|---|
| 171 | (setq update-count (nt:read-ulong header-buf 207)) |
|---|
| 172 | ;(setq dummy (substring header-buf 212 256)) |
|---|
| 173 | (setq index-size (* index-block block-size)) ;overwrite |
|---|
| 174 | ) |
|---|
| 175 | )); esac |
|---|
| 176 | (list |
|---|
| 177 | ; (cons 'headername headername) ; |
|---|
| 178 | ; (cons 'dictitle dictitle) ; |
|---|
| 179 | (cons 'version version) ; |
|---|
| 180 | (cons 'lword lword) ; |
|---|
| 181 | (cons 'ljapa ljapa) ; |
|---|
| 182 | (cons 'block-size block-size) ; |
|---|
| 183 | (cons 'index-block index-block) ; |
|---|
| 184 | (cons 'header-size header-size) ; |
|---|
| 185 | (cons 'index-size index-size) ; |
|---|
| 186 | (cons 'empty-block empty-block) ; |
|---|
| 187 | (cons 'nindex nindex) ; |
|---|
| 188 | (cons 'nblock nblock) ; |
|---|
| 189 | (cons 'nword nword) ; |
|---|
| 190 | (cons 'dicorder dicorder) ; |
|---|
| 191 | (cons 'dictype dictype*) ; |
|---|
| 192 | (cons 'attrlen attrlen) ; |
|---|
| 193 | (cons 'os os) ; |
|---|
| 194 | ; (cons 'lid-word lid-word) ; |
|---|
| 195 | ; (cons 'lid-japa lid-japa) ; |
|---|
| 196 | ; (cons 'lid-exp lid-exp) ; |
|---|
| 197 | ; (cons 'lid-pron lid-pron) ; |
|---|
| 198 | ; (cons 'lid-other lid-other) ; |
|---|
| 199 | (cons 'extheader extheader) ; |
|---|
| 200 | (cons 'index-blkbit index-blkbit) ;(0=16,1=32) |
|---|
| 201 | (cons 'cypt cypt) ; |
|---|
| 202 | (cons 'update-count update-count) ; |
|---|
| 203 | |
|---|
| 204 | (cons 'index-begins-at (+ header-size extheader)) |
|---|
| 205 | (cons 'datablock-begins-at (+ header-size extheader index-size)) |
|---|
| 206 | (cons 'datablock-ends-at (+ header-size extheader index-size datablock-size)) |
|---|
| 207 | (cons 'datablock-size datablock-size) |
|---|
| 208 | (cons 'bocu bocu))))) |
|---|
| 209 | |
|---|
| 210 | (defun pdicv-get-index-list (filename &optional word-encoding) |
|---|
| 211 | "[PDICV] Get the index list from PDIC file" |
|---|
| 212 | (let* ((header (pdicv-get-header-info filename)) |
|---|
| 213 | (index-buf (nt:read-from-file filename |
|---|
| 214 | (-> header 'index-begins-at) (-> header 'index-size))) |
|---|
| 215 | |
|---|
| 216 | (32bit-address-mode (if (= (-> header 'index-blkbit) 32) t nil)) |
|---|
| 217 | (tab-sep-p (eq 'unicode-bocu-6 (-> header 'version))) |
|---|
| 218 | |
|---|
| 219 | (ix 0) (ix-max (-> header 'nindex)) |
|---|
| 220 | (ofs 0) |
|---|
| 221 | (index-list ())) |
|---|
| 222 | (while (< ix ix-max) |
|---|
| 223 | (let ((phys -1) (word "") (word* nil)) |
|---|
| 224 | (if 32bit-address-mode |
|---|
| 225 | (progn (setq phys (nt:read-ulong index-buf ofs)) |
|---|
| 226 | (setq ofs (+ ofs 4))) |
|---|
| 227 | (progn (setq phys (nt:read-ushort index-buf ofs)) |
|---|
| 228 | (setq ofs (+ ofs 2)))) |
|---|
| 229 | (setq word* (nt:read-cstring index-buf ofs)) (setq ofs (+ ofs (cdr word*) 1)) |
|---|
| 230 | (setq word (car word*)) |
|---|
| 231 | |
|---|
| 232 | (when tab-sep-p |
|---|
| 233 | (let ((tsv (split-string word "\t"))) |
|---|
| 234 | (when (consp tsv) |
|---|
| 235 | (setq word (car tsv))))) |
|---|
| 236 | ; (cond |
|---|
| 237 | ; ((eq word-encoding 'bocu) |
|---|
| 238 | ; (setq word (nt:bocu-decode word))) |
|---|
| 239 | ; ((eq word-encoding 'sjis) |
|---|
| 240 | ; (setq word (decode-coding-string word 'japanese-shift-jis-dos))) |
|---|
| 241 | ; (word-encoding |
|---|
| 242 | ; (setq word (decode-coding-string word word-encoding))) |
|---|
| 243 | ; (t nil)) |
|---|
| 244 | |
|---|
| 245 | ; (setq index-list (cons (cons phys word) index-list)) |
|---|
| 246 | (push (cons phys word) index-list) |
|---|
| 247 | (setq ix (1+ ix)) |
|---|
| 248 | )) |
|---|
| 249 | (nreverse index-list) )) |
|---|
| 250 | |
|---|
| 251 | (defface pdicv-face-dummy |
|---|
| 252 | '((( (class color) (background light) ) |
|---|
| 253 | (:foreground "green" :background "SlateGray1" :weight bold)) |
|---|
| 254 | (t |
|---|
| 255 | (:foreground "red" :background "black"))) ; :weight bold |
|---|
| 256 | "Face for caption") |
|---|
| 257 | (defface pdicv-face-caption-red |
|---|
| 258 | '((t (:foreground "red" :background "black"))) |
|---|
| 259 | "Face for caption") |
|---|
| 260 | (defface pdicv-face-caption-blue |
|---|
| 261 | '((t (:foreground "blue" :background "black"))) |
|---|
| 262 | "Face for caption") |
|---|
| 263 | (defface pdicv-face-caption-green |
|---|
| 264 | '((t (:foreground "green" :background "black"))) |
|---|
| 265 | "Face for caption") |
|---|
| 266 | (defface pdicv-face-gray |
|---|
| 267 | '((t (:foreground "gray"))) |
|---|
| 268 | "Face for text") |
|---|
| 269 | |
|---|
| 270 | (defvar pdicv-default-inserter |
|---|
| 271 | (lambda (eword pron jword example) |
|---|
| 272 | (progn |
|---|
| 273 | (set-text-properties 0 (length eword) '(face bold) eword) |
|---|
| 274 | ; (set-text-properties 0 (length eword) '(face pdicv-face-caption-green) eword) |
|---|
| 275 | ; (set-text-properties 0 (length jword) '(face pdicv-face-caption-gray) jword) |
|---|
| 276 | |
|---|
| 277 | (setq jword (nt:replace-all jword "������/ ")) |
|---|
| 278 | (setq jword (nt:replace-all jword "\n" "\n ")) |
|---|
| 279 | |
|---|
| 280 | (let ((buf "")) |
|---|
| 281 | (setq buf eword) |
|---|
| 282 | (when (string< "" pron) (setq buf (concat buf " [" pron "]"))) |
|---|
| 283 | ; (setq result (concat result " : " jword)) |
|---|
| 284 | (setq buf (concat buf "\n " jword)) |
|---|
| 285 | (when (string< "" example) (setq buf (concat buf "\n - " example))) |
|---|
| 286 | ; (setq buf (concat buf "\n")) |
|---|
| 287 | ; (setq buf (concat buf "\n\n")) |
|---|
| 288 | (setq buf (concat buf "\n")) |
|---|
| 289 | |
|---|
| 290 | (insert buf))))) |
|---|
| 291 | ;; |
|---|
| 292 | ;; |
|---|
| 293 | ;; |
|---|
| 294 | (defun pdicv-scan-datablock (filename phys criteria-func) |
|---|
| 295 | "[PDICV] scan a datablock" |
|---|
| 296 | (catch 'pdicv-scan-datablock |
|---|
| 297 | (let* ((result ()) ;(match-count 0) |
|---|
| 298 | (header (pdicv-get-header-info filename)) |
|---|
| 299 | (tab-sep-p (eq 'unicode-bocu-6 (-> header 'version))) |
|---|
| 300 | (block-size (-> header 'block-size)) |
|---|
| 301 | (offset (+ (-> header 'datablock-begins-at) (* phys block-size))) |
|---|
| 302 | (aligned (and (member (-> header 'version) '(newdic4 unicode-bocu-6)) t)) |
|---|
| 303 | ;; (bocu (-> header 'bocu)) |
|---|
| 304 | (head-word (nt:read-ushort (nt:read-from-file filename offset 2))) |
|---|
| 305 | (blocks (logand 32767 head-word)) |
|---|
| 306 | (block-length (- (* blocks block-size) 2)) |
|---|
| 307 | (field-size (if (zerop (logand 32768 head-word)) 2 4)) |
|---|
| 308 | (datablock (nt:read-from-file filename (+ offset 2) block-length)) |
|---|
| 309 | ; (list blocks field-size datablock) |
|---|
| 310 | (p 0) |
|---|
| 311 | (field-length 0) |
|---|
| 312 | (compress-length 0) |
|---|
| 313 | (rest nil) |
|---|
| 314 | (eword "") (eword-attrib 0) |
|---|
| 315 | ) |
|---|
| 316 | |
|---|
| 317 | (while (< p block-length) ; (while (< p field-size) |
|---|
| 318 | (setq field-length |
|---|
| 319 | (if (= field-size 2) (nt:read-ushort datablock p) (nt:read-ulong datablock p)) ) |
|---|
| 320 | (when (zerop field-length) (throw 'pdicv-scan-datablock (nreverse result))); sfield-list)) |
|---|
| 321 | (setq p (+ p field-size)) ;2��ʒ�����4��В����� (setq compress-length (nt:read-uchar datablock p)) ; �����̒Ĺ |
|---|
| 322 | (setq p (1+ p)) |
|---|
| 323 | |
|---|
| 324 | (when aligned |
|---|
| 325 | (setq eword-attrib (nt:read-uchar datablock p)) ; ������������� |
|---|
| 326 | (setq p (1+ p))) |
|---|
| 327 | ; �����В����쒰ʒ�ߒ����꒤������� rest ��˒���쒤� (setq rest (substring datablock p (+ p field-length))) |
|---|
| 328 | (setq p (+ p field-length)) |
|---|
| 329 | ; �����В�����NULL����ü) |
|---|
| 330 | (let* ((eword-cstr (nt:read-cstring rest)) |
|---|
| 331 | (eword-compressed (car eword-cstr)) (eword-len (cdr eword-cstr)) |
|---|
| 332 | (q 0) |
|---|
| 333 | (level 0) |
|---|
| 334 | (extended nil) |
|---|
| 335 | (jword-cstr nil) (jword "") (jword-len 0) |
|---|
| 336 | (ext-list nil) |
|---|
| 337 | (example "") (pron "") (link "")) |
|---|
| 338 | |
|---|
| 339 | (setq eword (if (zerop compress-length) |
|---|
| 340 | eword-compressed |
|---|
| 341 | (concat (substring eword 0 compress-length) eword-compressed) )) |
|---|
| 342 | (setq q (1+ eword-len)) |
|---|
| 343 | ;; ������������� |
|---|
| 344 | (when (not aligned) |
|---|
| 345 | (setq eword-attrib (nt:read-uchar rest q)) |
|---|
| 346 | (setq q (1+ q))) |
|---|
| 347 | |
|---|
| 348 | (setq level (logand eword-attrib 15)) |
|---|
| 349 | ; (insert (format ": %s %d %d\n" eword eword-len eword-attrib)) |
|---|
| 350 | ;; (if (zerop (logand eword-attrib 128)) |
|---|
| 351 | ;; (throw 'pdicv-scan-datablock ()); 'illegal) |
|---|
| 352 | (setq eword-attrib (logand eword-attrib 127)) |
|---|
| 353 | |
|---|
| 354 | (setq extended (if (zerop (logand eword-attrib 16)) nil t)) |
|---|
| 355 | (if extended |
|---|
| 356 | (progn ;��Ȓĥ |
|---|
| 357 | (setq jword-cstr (nt:read-cstring rest q)) |
|---|
| 358 | (setq jword (car jword-cstr)) (setq jword-len (cdr jword-cstr)) |
|---|
| 359 | (setq q (+ q jword-len 1)) |
|---|
| 360 | (setq ext-list nil) |
|---|
| 361 | (catch 'while |
|---|
| 362 | (while (< q field-length) |
|---|
| 363 | (let* ((ex-attrib (nt:read-uchar rest q)) |
|---|
| 364 | (ex-attrib-sub (logand ex-attrib 15)) |
|---|
| 365 | (exdata-cstr nil) |
|---|
| 366 | (exdata "") (exdata-len 0) ) |
|---|
| 367 | (when (= (logand ex-attrib 128) 128) (throw 'while t)) |
|---|
| 368 | (setq q (1+ q)) |
|---|
| 369 | (setq exdata-cstr (nt:read-cstring rest q)) |
|---|
| 370 | (setq exdata (car exdata-cstr)) |
|---|
| 371 | (setq exdata-len (cdr exdata-cstr)) |
|---|
| 372 | (cond |
|---|
| 373 | ((= ex-attrib-sub 1) (setq example exdata)) |
|---|
| 374 | ((= ex-attrib-sub 2) (setq pron exdata)) |
|---|
| 375 | ((= ex-attrib-sub 4) (setq link exdata)) |
|---|
| 376 | (t nil)) |
|---|
| 377 | (setq q (+ q exdata-len 1)) |
|---|
| 378 | ) ; let* |
|---|
| 379 | ) ; while |
|---|
| 380 | ) ; catch while2 |
|---|
| 381 | ) ; progn |
|---|
| 382 | (progn ;�ɸ��� (setq jword (substring rest q)) |
|---|
| 383 | (setq pron "") |
|---|
| 384 | (setq example "")) |
|---|
| 385 | ) ; if extended |
|---|
| 386 | |
|---|
| 387 | (if tab-sep-p |
|---|
| 388 | (let* ((splitted (split-string eword "\t")) |
|---|
| 389 | (eword (car splitted)) |
|---|
| 390 | (entry (cadr splitted))) |
|---|
| 391 | (when (funcall criteria-func entry eword pron jword example) |
|---|
| 392 | (push (list entry pron jword example) result))) |
|---|
| 393 | (when (funcall criteria-func eword eword pron jword example) |
|---|
| 394 | (push (list eword pron jword example) result))) |
|---|
| 395 | ;;(when (funcall criteria-func eword pron jword example) |
|---|
| 396 | ;; (push (list eword pron jword example) result)) |
|---|
| 397 | );let |
|---|
| 398 | ); wend |
|---|
| 399 | (nreverse result)))) |
|---|
| 400 | |
|---|
| 401 | (defun pdicv-core-search (dicinfo criteria &optional simple-mode-p dont-clear-p) |
|---|
| 402 | "search in PDIC" |
|---|
| 403 | (let* ((dicname (car dicinfo)) |
|---|
| 404 | (dicfile (nth 1 dicinfo)) |
|---|
| 405 | (encoding-list (nth 2 dicinfo)) |
|---|
| 406 | (decoder-list ()) |
|---|
| 407 | (index-table (-> pdicv-index-table-list dicname))) |
|---|
| 408 | ; (if (null index-table) (setq index-table (pdicv-get-index-list dicfile))) |
|---|
| 409 | |
|---|
| 410 | (when (atom encoding-list) ;; expand encoding-list |
|---|
| 411 | (setq encoding-list (list encoding-list encoding-list encoding-list encoding-list))) |
|---|
| 412 | |
|---|
| 413 | (while encoding-list ;; build the decoder-list |
|---|
| 414 | (let ((encoding (car encoding-list))) |
|---|
| 415 | (cond |
|---|
| 416 | ((eq encoding 'bocu) (push pdicv-bocu-decoder decoder-list)) |
|---|
| 417 | ((eq encoding 'sjis) (push pdicv-sjis-decoder decoder-list)) |
|---|
| 418 | ((eq encoding 'latin1) (push pdicv-latin1-decoder decoder-list)) |
|---|
| 419 | (encoding (push (pdicv-create-decoder encoding) decoder-list)) |
|---|
| 420 | (t (push pdicv-null-decoder decoder-list)))) |
|---|
| 421 | (setq encoding-list (cdr encoding-list))) |
|---|
| 422 | (setq decoder-list (nreverse decoder-list)) |
|---|
| 423 | |
|---|
| 424 | (catch 'pdicv-core-search |
|---|
| 425 | ; (if (null original-word-to-search) (setq original-word-to-search word-to-search)) |
|---|
| 426 | (let* (;(buffer-name (generate-new-buffer-name *buffer-name)) |
|---|
| 427 | ; (pdicv-buffer-name "*PDIC Viewer*") |
|---|
| 428 | ; (dummy (if (get-buffer pdicv-buffer-name) (kill-buffer pdicv-buffer-name))) |
|---|
| 429 | (pdicv-buffer (get-buffer-create "*PDIC Viewer*")) |
|---|
| 430 | ;criteria |
|---|
| 431 | (word-to-search (car criteria)) |
|---|
| 432 | |
|---|
| 433 | (index-needles (nth 1 criteria)) |
|---|
| 434 | (needle1 (car index-needles)) |
|---|
| 435 | (needle2 (cdr index-needles)) |
|---|
| 436 | |
|---|
| 437 | (datablock-criteria-func (nth 2 criteria)) |
|---|
| 438 | |
|---|
| 439 | (ix index-table) (index-size (length ix)) (curr-size index-size) |
|---|
| 440 | (ix+ (cadr ix)); next one |
|---|
| 441 | (match-count 0)) |
|---|
| 442 | |
|---|
| 443 | ; (switch-to-buffer pdicv-buffer-name) |
|---|
| 444 | (save-current-buffer |
|---|
| 445 | (set-buffer pdicv-buffer) |
|---|
| 446 | (when (null dont-clear-p) (erase-buffer)) |
|---|
| 447 | |
|---|
| 448 | (when (not simple-mode-p) |
|---|
| 449 | ;(pop-to-buffer pdicv-buffer-name) |
|---|
| 450 | ; (set-buffer pdicv-buffer-name) |
|---|
| 451 | (insert (format "��������� %s\n" word-to-search)) |
|---|
| 452 | (insert (format "��������???\n")) |
|---|
| 453 | (newline)) |
|---|
| 454 | ;(insert "\n")) |
|---|
| 455 | (when index-needles |
|---|
| 456 | (setq ix |
|---|
| 457 | (let ((p ix) (last-p nil)) |
|---|
| 458 | (catch 'pdicv-search-in-index |
|---|
| 459 | (while p |
|---|
| 460 | (let* ((elem (car p)) ;(phys (car elem)) |
|---|
| 461 | (word (cdr elem))) |
|---|
| 462 | (if (string< needle1 word) (throw 'pdicv-search-in-index last-p)) |
|---|
| 463 | ;; (if (string< needle2 word) (throw 'pdicv-search-in-index last-p)) |
|---|
| 464 | (setq last-p p) |
|---|
| 465 | (setq p (cdr p)) )) |
|---|
| 466 | last-p)))) |
|---|
| 467 | (catch 'while |
|---|
| 468 | (while ix |
|---|
| 469 | (let* ((curr (car ix)) |
|---|
| 470 | (phys (car curr)) (word (cdr curr)) |
|---|
| 471 | ;; (x (insert (format "* current ix: (%d %s)\n" phys word))) |
|---|
| 472 | (result (pdicv-scan-datablock dicfile phys datablock-criteria-func)); decoder-list nil)) |
|---|
| 473 | (result-count (length result)) |
|---|
| 474 | (inserter pdicv-default-inserter)) |
|---|
| 475 | (when index-needles |
|---|
| 476 | (when (string>= word needle2) (throw 'while t))) |
|---|
| 477 | ;; (if (not (string< word (cdr index-needles))) (throw 'while t))) |
|---|
| 478 | |
|---|
| 479 | ; (insert (format "(%s with index %s ... %s)\n" |
|---|
| 480 | ; word-to-search |
|---|
| 481 | ; (funcall (nth 0 decoder-list) word) result)) |
|---|
| 482 | |
|---|
| 483 | (if result (progn |
|---|
| 484 | (while result |
|---|
| 485 | (let ((rec (pop result))) |
|---|
| 486 | (funcall inserter |
|---|
| 487 | (funcall (nth 0 decoder-list) (nth 0 rec)); eword |
|---|
| 488 | (funcall (nth 1 decoder-list) (nth 1 rec)); pron |
|---|
| 489 | (funcall (nth 2 decoder-list) (nth 2 rec)); jword |
|---|
| 490 | (funcall (nth 3 decoder-list) (nth 3 rec)); example |
|---|
| 491 | ) |
|---|
| 492 | (setq match-count (1+ match-count)) |
|---|
| 493 | )) |
|---|
| 494 | (message "%5d/%5d:%7d" curr-size index-size match-count) |
|---|
| 495 | (sit-for 0)) |
|---|
| 496 | ;;else |
|---|
| 497 | (when (zerop (% curr-size 128)) ;;128��ϒŬ����ʒ�� (message "%5d/%5d:%7d" curr-size index-size match-count)))) |
|---|
| 498 | (setq ix (cdr ix)) |
|---|
| 499 | (setq curr-size (1- curr-size)) |
|---|
| 500 | );wend |
|---|
| 501 | );caught |
|---|
| 502 | |
|---|
| 503 | ;;(insert (pdicv-scan-datablock dicfile (car (car ix)) decoder-list nil needle1 needle2)) |
|---|
| 504 | (goto-char 1) |
|---|
| 505 | |
|---|
| 506 | (when (not simple-mode-p) |
|---|
| 507 | (when (re-search-forward ": [?][?][?][?]" nil t nil) |
|---|
| 508 | (replace-match (format ": %d" match-count) t t nil 0))) |
|---|
| 509 | ); save-current-buffer |
|---|
| 510 | |
|---|
| 511 | ; (pop-to-buffer (current-buffer)) |
|---|
| 512 | ; (setq split-height-threshold 6) |
|---|
| 513 | (when (one-window-p) |
|---|
| 514 | (set-window-buffer (split-window-vertically (- pdicv-result-height)) pdicv-buffer)) |
|---|
| 515 | )))) |
|---|
| 516 | |
|---|
| 517 | ;;; pdicv-core.el ends here |
|---|