| 1 | ;;; pdicv-core.el --- core functions for PDIC-formatted dictionaries | 
|---|
| 2 | ;; | 
|---|
| 3 | ;; Copyright (C) 2005-2009 naoya_t. All Rights Reserved. | 
|---|
| 4 | ;; | 
|---|
| 5 | ;; Author: naoya_t <naoya.t@aqua.plala.or.jp> | 
|---|
| 6 | ;; Maintainer: naoya_t <naoya.t@aqua.plala.or.jp> | 
|---|
| 7 | ;; Primary distribution site: | 
|---|
| 8 | ;;   http://lambdarepos.svnrepository.com/svn/share/lang/elisp/pdicv-mode/trunk | 
|---|
| 9 | ;; | 
|---|
| 10 | ;; Created: 14 Feb 2005 | 
|---|
| 11 | ;; Last modified: 30 Jan 2009 | 
|---|
| 12 | ;; Version: 0.9.2 | 
|---|
| 13 | ;; Keywords: PDIC dictionary search eijiro | 
|---|
| 14 |  | 
|---|
| 15 | (provide 'pdicv-core) | 
|---|
| 16 | ;(put 'pdicv-core 'version "0.9.2") | 
|---|
| 17 |  | 
|---|
| 18 | ;;; Commentary: | 
|---|
| 19 |  | 
|---|
| 20 | ; (pdicv-get-header-info FILENAME) | 
|---|
| 21 | ;    - ��ؒ�Ò���������ߒ�蒤� | 
|---|
| 22 | ; (pdicv-get-index-list FILENAME [WORD-ENCODING]) | 
|---|
| 23 | ;    - PDIC��������������뒤���钡���������Ò�������꒥���Ȓ���� | 
|---|
| 24 | ; | 
|---|
| 25 | ; (pdicv-scan-datablock FILENAME PHYS CRITERIA-FUNC) | 
|---|
| 26 | ;    - ��ǒ�������֒�풥Ò���������㒥�; (pdicv-core-search DICINFO CRITERIA [SIMPLE-MODE-P DONT-CLEAR-P]) | 
|---|
| 27 | ;    - PDIC��������������뒡������ | 
|---|
| 28 | ; | 
|---|
| 29 |  | 
|---|
| 30 | ;;; Code: | 
|---|
| 31 | (require 'nt-macros) | 
|---|
| 32 | (require 'nt-readval) | 
|---|
| 33 | (require 'nt-string) | 
|---|
| 34 | (require 'nt-bocu) | 
|---|
| 35 | (require 'nt-file) | 
|---|
| 36 | (require 'nt-english) | 
|---|
| 37 |  | 
|---|
| 38 | ; decoder | 
|---|
| 39 | (defvar pdicv-null-decoder (lambda (s) s)) | 
|---|
| 40 | (defvar pdicv-sjis-decoder (lambda (s) (decode-coding-string s 'japanese-shift-jis-dos))) | 
|---|
| 41 | (defvar pdicv-latin1-decoder (lambda (s) (decode-coding-string s 'iso-latin-1-dos))) | 
|---|
| 42 | (defvar pdicv-bocu-decoder (lambda (s) (nt:bocu-decode s))) | 
|---|
| 43 | (defmacro pdicv-create-decoder (encoding) | 
|---|
| 44 | "create a decoder from user-specified encoding" | 
|---|
| 45 | `(lambda (s) (decode-coding-string s ,encoding))) | 
|---|
| 46 |  | 
|---|
| 47 | (defvar pdicv-index-table-list ()) | 
|---|
| 48 |  | 
|---|
| 49 | (defvar pdicv-result-height 8) | 
|---|
| 50 | ; | 
|---|
| 51 | ; ��ؒ�Ò���������ߒ�蒤� | 
|---|
| 52 | (defun pdicv-get-header-info (filename) | 
|---|
| 53 | "[PDIC] Get Header Info" | 
|---|
| 54 | (catch 'pdicv-get-header-info | 
|---|
| 55 | (let* ((header-buf (nt:read-from-file filename 0 256)) | 
|---|
| 56 | ; | 
|---|
| 57 | (headername nil); (substring header-buf 1 100)) | 
|---|
| 58 | (dictitle nil); (substring header-buf 101 140)) | 
|---|
| 59 | (version (nt:read-short header-buf 140)) | 
|---|
| 60 | (lword (nt:read-short header-buf 142)) | 
|---|
| 61 | (ljapa (nt:read-short header-buf 144)) | 
|---|
| 62 | (block-size (nt:read-short header-buf 146)) | 
|---|
| 63 | (index-block (nt:read-short header-buf 148)) | 
|---|
| 64 | (header-size (nt:read-short header-buf 150)) | 
|---|
| 65 | (index-size (nt:read-ushort header-buf 152)) | 
|---|
| 66 | (empty-block (nt:read-short header-buf 154)) | 
|---|
| 67 | (nindex (nt:read-short header-buf 156)) | 
|---|
| 68 | (nblock (nt:read-short header-buf 158)) | 
|---|
| 69 | (nword (nt:read-ulong header-buf 160)) | 
|---|
| 70 | (dicorder (nt:read-uchar header-buf 164)) | 
|---|
| 71 | (dictype (nt:read-uchar header-buf 165)) (dictype* nil) | 
|---|
| 72 | (attrlen (nt:read-uchar header-buf 166)) | 
|---|
| 73 | ; NEWDIC2- | 
|---|
| 74 | (olenumber 0) (os nil) | 
|---|
| 75 | (lid-word 0) (lid-japa 0) (lid-exp 0) (lid-pron 0) (lid-other 0) | 
|---|
| 76 | ; NEWDIC3- | 
|---|
| 77 | (extheader 0) (index-blkbit 0) (cypt nil) (update-count 0) | 
|---|
| 78 | (dicident nil) | 
|---|
| 79 | ;; | 
|---|
| 80 | (major-version (/ version 256)) | 
|---|
| 81 | (datablock-size (* nblock block-size)) | 
|---|
| 82 | (bocu nil) | 
|---|
| 83 | ) | 
|---|
| 84 |  | 
|---|
| 85 | (setq version | 
|---|
| 86 | (nth major-version '(not-supported not-supported newdic1 newdic2 newdic3 newdic4 unicode-bocu-6))) | 
|---|
| 87 |  | 
|---|
| 88 | (setq dicorder | 
|---|
| 89 | (nth (nt:read-uchar header-buf 164) '(code-order ignore-case dictionary-order order-descendant))) | 
|---|
| 90 |  | 
|---|
| 91 | (when (> (logand dictype 128) 0) (setq dictype* (cons 'tree-view-mode dictype*))) | 
|---|
| 92 | (when (> (logand dictype 64) 0) (setq dictype* (cons 'crypted dictype*))) | 
|---|
| 93 | ;     (when (> (logand dictype 32) 0) (setq dictype* (cons 'multilingual dictype*))) | 
|---|
| 94 | (when (> (logand dictype 16) 0) (setq dictype* (cons 'unicode dictype*))) | 
|---|
| 95 | (when (> (logand dictype 8) 0) (setq dictype* (cons 'bocu dictype*))) | 
|---|
| 96 | (when (> (logand dictype 1) 0) (setq dictype* (cons 'ar-compressed dictype*))) | 
|---|
| 97 |  | 
|---|
| 98 | (case major-version | 
|---|
| 99 | (6 "Ver 6.xx" | 
|---|
| 100 | (setq os (nt:read-char header-buf 167)) | 
|---|
| 101 | (setq os (cond ((= os 0) 'sjis-crlf) | 
|---|
| 102 | ((= os 1) 'sjis-cr) | 
|---|
| 103 | ((= os 2) 'sjis-lf) | 
|---|
| 104 | ((= os 3) 'euc-lf) | 
|---|
| 105 | ((= os 4) 'jis-lf) | 
|---|
| 106 | ((= os 32) 'bocu) | 
|---|
| 107 | )) | 
|---|
| 108 | (when (eq os 'bocu) (setq bocu t)) | 
|---|
| 109 | (setq olenumber (nt:read-long header-buf 168)) | 
|---|
| 110 | ;; dummy_lid, 10 bytes | 
|---|
| 111 | (setq index-blkbit (if (= (nt:read-uchar header-buf 182) 1) 32 16)) | 
|---|
| 112 | ;; dummy0 @185 | 
|---|
| 113 | (setq extheader (nt:read-ulong header-buf 184)) | 
|---|
| 114 | (setq empty-block (nt:read-long header-buf 188)) ;overwrite | 
|---|
| 115 | (setq nindex (nt:read-long header-buf 192)) ;overwrite | 
|---|
| 116 | (setq nblock (nt:read-long header-buf 196)) ;overwrite | 
|---|
| 117 | (setq datablock-size (* nblock block-size)) | 
|---|
| 118 | (setq cypt (substring header-buf 200 208)) ;- reserved[8] | 
|---|
| 119 | (setq update-count (nt:read-ulong header-buf 208)) | 
|---|
| 120 | ; dummy00 @212[4] | 
|---|
| 121 | (setq dicident (substring header-buf 216 224)) | 
|---|
| 122 | ;(setq dummy (substring header-buf 224 256)) | 
|---|
| 123 | (setq index-size (* index-block block-size)) ;overwrite | 
|---|
| 124 | );6 | 
|---|
| 125 | (5 "HyperDIC, Ver 5.00" | 
|---|
| 126 | (setq os (nt:read-char header-buf 167)) | 
|---|
| 127 | (setq os (cond ((= os 0) 'sjis-crlf) | 
|---|
| 128 | ((= os 1) 'sjis-cr) | 
|---|
| 129 | ((= os 2) 'sjis-lf) | 
|---|
| 130 | ((= os 3) 'euc-lf) | 
|---|
| 131 | ((= os 4) 'jis-lf) | 
|---|
| 132 | ((= os 32) 'bocu) | 
|---|
| 133 | )) | 
|---|
| 134 | (when (eq os 'bocu) (setq bocu t)) | 
|---|
| 135 | (setq olenumber (nt:read-long header-buf 168)) | 
|---|
| 136 | (setq index-blkbit (if (= (nt:read-uchar header-buf 182) 1) 32 16)) | 
|---|
| 137 | ;; dummy0 @185 | 
|---|
| 138 | (setq extheader (nt:read-ulong header-buf 184)) | 
|---|
| 139 | (setq empty-block (nt:read-long header-buf 188)) ;overwrite | 
|---|
| 140 | (setq nindex (nt:read-long header-buf 192)) ;overwrite | 
|---|
| 141 | (setq nblock (nt:read-long header-buf 196)) ;overwrite | 
|---|
| 142 | (setq datablock-size (* nblock block-size)) | 
|---|
| 143 | (setq cypt (substring header-buf 200 208)) ;- reserved[8] | 
|---|
| 144 | (setq update-count (nt:read-ulong header-buf 208)) | 
|---|
| 145 | ; dummy00 @212[4] | 
|---|
| 146 | (setq dicident (substring header-buf 216 224)) | 
|---|
| 147 | ;(setq dummy (substring header-buf 224 256)) | 
|---|
| 148 | (setq index-size (* index-block block-size)) ;overwrite | 
|---|
| 149 | );5 | 
|---|
| 150 | (t "< 5.0" | 
|---|
| 151 | (when (>= major-version 3) | 
|---|
| 152 | "NEWDIC2-" | 
|---|
| 153 | (setq olenumber (nt:read-long header-buf 167)) | 
|---|
| 154 | ;(setq os (byte (substring header-buf 172 173))) | 
|---|
| 155 | (setq os (nth (nt:read-char header-buf 171) '(sjis-crlf))) | 
|---|
| 156 | ;(setq lid-word (short header-buf 172)) | 
|---|
| 157 | ;(setq lid-japa (short header-buf 174)) | 
|---|
| 158 | ;(setq lid-exp (short header-buf 176)) | 
|---|
| 159 | ;(setq lid-pron (short header-buf 178)) | 
|---|
| 160 | ;(setq lid-other (short header-buf 180)) | 
|---|
| 161 | ) | 
|---|
| 162 | (when (>= major-version 4) | 
|---|
| 163 | "NEWDIC3-" | 
|---|
| 164 | (setq extheader (nt:read-ulong header-buf 182)) | 
|---|
| 165 | (setq empty-block (nt:read-long header-buf 186)) ;overwrite | 
|---|
| 166 | (setq nindex (nt:read-long header-buf 190)) ;overwrite | 
|---|
| 167 | (setq nblock (nt:read-long header-buf 194)) ;overwrite | 
|---|
| 168 | (setq datablock-size (* nblock block-size)) | 
|---|
| 169 | (setq index-blkbit (if (= (nt:read-uchar header-buf 198) 1) 32 16)) | 
|---|
| 170 | (setq cypt (substring header-buf 200 208)) | 
|---|
| 171 | (setq update-count (nt:read-ulong header-buf 207)) | 
|---|
| 172 | ;(setq dummy (substring header-buf 212 256)) | 
|---|
| 173 | (setq index-size (* index-block block-size)) ;overwrite | 
|---|
| 174 | ) | 
|---|
| 175 | )); esac | 
|---|
| 176 | (list | 
|---|
| 177 | ;       (cons 'headername headername)   ; | 
|---|
| 178 | ;       (cons 'dictitle dictitle)       ; | 
|---|
| 179 | (cons 'version version)         ; | 
|---|
| 180 | (cons 'lword lword)             ; | 
|---|
| 181 | (cons 'ljapa ljapa)             ; | 
|---|
| 182 | (cons 'block-size block-size)   ; | 
|---|
| 183 | (cons 'index-block index-block) ; | 
|---|
| 184 | (cons 'header-size header-size) ; | 
|---|
| 185 | (cons 'index-size index-size)   ; | 
|---|
| 186 | (cons 'empty-block empty-block) ; | 
|---|
| 187 | (cons 'nindex nindex)           ; | 
|---|
| 188 | (cons 'nblock nblock)           ; | 
|---|
| 189 | (cons 'nword nword)             ; | 
|---|
| 190 | (cons 'dicorder dicorder)       ; | 
|---|
| 191 | (cons 'dictype dictype*)        ; | 
|---|
| 192 | (cons 'attrlen attrlen)         ; | 
|---|
| 193 | (cons 'os os)                   ; | 
|---|
| 194 | ;       (cons 'lid-word lid-word) ; | 
|---|
| 195 | ;       (cons 'lid-japa lid-japa) ; | 
|---|
| 196 | ;       (cons 'lid-exp lid-exp) ; | 
|---|
| 197 | ;       (cons 'lid-pron lid-pron) ; | 
|---|
| 198 | ;       (cons 'lid-other lid-other) ; | 
|---|
| 199 | (cons 'extheader extheader) ; | 
|---|
| 200 | (cons 'index-blkbit index-blkbit) ;(0=16,1=32) | 
|---|
| 201 | (cons 'cypt cypt) ; | 
|---|
| 202 | (cons 'update-count update-count) ; | 
|---|
| 203 |  | 
|---|
| 204 | (cons 'index-begins-at (+ header-size extheader)) | 
|---|
| 205 | (cons 'datablock-begins-at (+ header-size extheader index-size)) | 
|---|
| 206 | (cons 'datablock-ends-at (+ header-size extheader index-size datablock-size)) | 
|---|
| 207 | (cons 'datablock-size datablock-size) | 
|---|
| 208 | (cons 'bocu bocu))))) | 
|---|
| 209 |  | 
|---|
| 210 | (defun pdicv-get-index-list (filename &optional word-encoding) | 
|---|
| 211 | "[PDICV] Get the index list from PDIC file" | 
|---|
| 212 | (let* ((header (pdicv-get-header-info filename)) | 
|---|
| 213 | (index-buf (nt:read-from-file filename | 
|---|
| 214 | (-> header 'index-begins-at) (-> header 'index-size))) | 
|---|
| 215 |  | 
|---|
| 216 | (32bit-address-mode (if (= (-> header 'index-blkbit) 32) t nil)) | 
|---|
| 217 | (tab-sep-p (if (eq 'unicode-bocu-6 (-> header 'version)) t nil)) | 
|---|
| 218 |  | 
|---|
| 219 | (ix 0) (ix-max (-> header 'nindex)) | 
|---|
| 220 | (ofs 0) | 
|---|
| 221 | (index-list ())) | 
|---|
| 222 | (while (< ix ix-max) | 
|---|
| 223 | (let ((phys -1) (word "") (word* nil)) | 
|---|
| 224 | (if 32bit-address-mode | 
|---|
| 225 | (progn (setq phys (nt:read-ulong index-buf ofs)) | 
|---|
| 226 | (setq ofs (+ ofs 4))) | 
|---|
| 227 | (progn (setq phys (nt:read-ushort index-buf ofs)) | 
|---|
| 228 | (setq ofs (+ ofs 2)))) | 
|---|
| 229 | (setq word* (nt:read-cstring index-buf ofs)) (setq ofs (+ ofs (cdr word*) 1)) | 
|---|
| 230 | (setq word (car word*)) | 
|---|
| 231 |  | 
|---|
| 232 | (when tab-sep-p | 
|---|
| 233 | (let ((tsv (split-string word "\t"))) | 
|---|
| 234 | (when (consp tsv) | 
|---|
| 235 | (setq word (car tsv))))) | 
|---|
| 236 | ;       (cond | 
|---|
| 237 | ;        ((eq word-encoding 'bocu) | 
|---|
| 238 | ;         (setq word (nt:bocu-decode word))) | 
|---|
| 239 | ;        ((eq word-encoding 'sjis) | 
|---|
| 240 | ;         (setq word (decode-coding-string word 'japanese-shift-jis-dos))) | 
|---|
| 241 | ;        (word-encoding | 
|---|
| 242 | ;         (setq word (decode-coding-string word word-encoding))) | 
|---|
| 243 | ;        (t nil)) | 
|---|
| 244 |  | 
|---|
| 245 | ;       (setq index-list (cons (cons phys word) index-list)) | 
|---|
| 246 | (push (cons phys word) index-list) | 
|---|
| 247 | (setq ix (1+ ix)) | 
|---|
| 248 | )) | 
|---|
| 249 | (nreverse index-list) )) | 
|---|
| 250 |  | 
|---|
| 251 | (defface pdicv-face-dummy | 
|---|
| 252 | '((( (class color) (background light) ) | 
|---|
| 253 | (:foreground "green" :background "SlateGray1" :weight bold)) | 
|---|
| 254 | (t | 
|---|
| 255 | (:foreground "red" :background "black"))) ; :weight bold | 
|---|
| 256 | "Face for caption") | 
|---|
| 257 | (defface pdicv-face-caption-red | 
|---|
| 258 | '((t (:foreground "red" :background "black"))) | 
|---|
| 259 | "Face for caption") | 
|---|
| 260 | (defface pdicv-face-caption-blue | 
|---|
| 261 | '((t (:foreground "blue" :background "black"))) | 
|---|
| 262 | "Face for caption") | 
|---|
| 263 | (defface pdicv-face-caption-green | 
|---|
| 264 | '((t (:foreground "green" :background "black"))) | 
|---|
| 265 | "Face for caption") | 
|---|
| 266 | (defface pdicv-face-gray | 
|---|
| 267 | '((t (:foreground "gray"))) | 
|---|
| 268 | "Face for text") | 
|---|
| 269 |  | 
|---|
| 270 | (defvar pdicv-default-inserter | 
|---|
| 271 | (lambda (eword pron jword example) | 
|---|
| 272 | (progn | 
|---|
| 273 | (set-text-properties 0 (length eword) '(face bold) eword) | 
|---|
| 274 | ;       (set-text-properties 0 (length eword) '(face pdicv-face-caption-green) eword) | 
|---|
| 275 | ;       (set-text-properties 0 (length jword) '(face pdicv-face-caption-gray) jword) | 
|---|
| 276 |  | 
|---|
| 277 | (setq jword (nt:replace-all jword "������/ ")) | 
|---|
| 278 | (setq jword (nt:replace-all jword "\n" "\n  ")) | 
|---|
| 279 |  | 
|---|
| 280 | (let ((buf "")) | 
|---|
| 281 | (setq buf eword) | 
|---|
| 282 | (when (string< "" pron) (setq buf (concat buf " [" pron "]"))) | 
|---|
| 283 | ;               (setq result (concat result " : " jword)) | 
|---|
| 284 | (setq buf (concat buf "\n  " jword)) | 
|---|
| 285 | (when (string< "" example) (setq buf (concat buf "\n  - " example))) | 
|---|
| 286 | ;             (setq buf (concat buf "\n")) | 
|---|
| 287 | ;            (setq buf (concat buf "\n\n")) | 
|---|
| 288 | (setq buf (concat buf "\n")) | 
|---|
| 289 |  | 
|---|
| 290 | (insert buf))))) | 
|---|
| 291 | ;; | 
|---|
| 292 | ;; | 
|---|
| 293 | ;; | 
|---|
| 294 | (defun pdicv-scan-datablock (filename phys criteria-func) | 
|---|
| 295 | "[PDICV] scan a datablock" | 
|---|
| 296 | (catch 'pdicv-scan-datablock | 
|---|
| 297 | (let* ((result ()) ;(match-count 0) | 
|---|
| 298 | (header (pdicv-get-header-info filename)) | 
|---|
| 299 | (block-size (-> header 'block-size)) | 
|---|
| 300 | (offset (+ (-> header 'datablock-begins-at) (* phys block-size))) | 
|---|
| 301 | (aligned (and (member (-> header 'version) '(newdic4 unicode-bocu-6)) t)) | 
|---|
| 302 | ;; (bocu (-> header 'bocu)) | 
|---|
| 303 | (head-word (nt:read-ushort (nt:read-from-file filename offset 2))) | 
|---|
| 304 | (blocks (logand 32767 head-word)) | 
|---|
| 305 | (block-length (- (* blocks block-size) 2)) | 
|---|
| 306 | (field-size (if (zerop (logand 32768 head-word)) 2 4)) | 
|---|
| 307 | (datablock (nt:read-from-file filename (+ offset 2) block-length)) | 
|---|
| 308 | ;    (list blocks field-size datablock) | 
|---|
| 309 | (p 0) | 
|---|
| 310 | (field-length 0) | 
|---|
| 311 | (compress-length 0) | 
|---|
| 312 | (rest nil) | 
|---|
| 313 | (eword "") (eword-attrib 0) | 
|---|
| 314 | ) | 
|---|
| 315 |  | 
|---|
| 316 | (while (< p block-length) ;    (while (< p field-size) | 
|---|
| 317 | (setq field-length | 
|---|
| 318 | (if (= field-size 2) (nt:read-ushort datablock p) (nt:read-ulong datablock p)) ) | 
|---|
| 319 | (when (zerop field-length) (throw 'pdicv-scan-datablock (nreverse result))); sfield-list)) | 
|---|
| 320 | (setq p (+ p field-size)) ;2��ʒ�����4��В�����        (setq compress-length (nt:read-uchar datablock p)) ; �����̒Ĺ | 
|---|
| 321 | (setq p (1+ p)) | 
|---|
| 322 |  | 
|---|
| 323 | (when aligned | 
|---|
| 324 | (setq eword-attrib (nt:read-uchar datablock p)) ; ������������� | 
|---|
| 325 | (setq p (1+ p))) | 
|---|
| 326 | ; �����В����쒰ʒ�ߒ����꒤������� rest ��˒���쒤�       (setq rest (substring datablock p (+ p field-length))) | 
|---|
| 327 | (setq p (+ p field-length)) | 
|---|
| 328 | ; �����В�����NULL����ü) | 
|---|
| 329 | (let* ((eword-cstr (nt:read-cstring rest)) | 
|---|
| 330 | (eword-compressed (car eword-cstr)) (eword-len (cdr eword-cstr)) | 
|---|
| 331 | (q 0) | 
|---|
| 332 | (level 0) | 
|---|
| 333 | (extended nil) | 
|---|
| 334 | (jword-cstr nil) (jword "") (jword-len 0) | 
|---|
| 335 | (ext-list nil) | 
|---|
| 336 | (example "") (pron "") (link "")) | 
|---|
| 337 |  | 
|---|
| 338 | (setq eword (if (zerop compress-length) | 
|---|
| 339 | eword-compressed | 
|---|
| 340 | (concat (substring eword 0 compress-length) eword-compressed) )) | 
|---|
| 341 | (setq q (1+ eword-len)) | 
|---|
| 342 | ; ������������� | 
|---|
| 343 | (when (not aligned) | 
|---|
| 344 | (setq eword-attrib (nt:read-uchar rest q)) | 
|---|
| 345 | (setq q (1+ q))) | 
|---|
| 346 |  | 
|---|
| 347 | (setq level (logand eword-attrib 15)) | 
|---|
| 348 | ;         (insert (format ": %s %d %d\n" eword eword-len eword-attrib)) | 
|---|
| 349 | ;;        (if (zerop (logand eword-attrib 128)) | 
|---|
| 350 | ;;            (throw 'pdicv-scan-datablock ()); 'illegal) | 
|---|
| 351 | (setq eword-attrib (logand eword-attrib 127)) | 
|---|
| 352 |  | 
|---|
| 353 | (setq extended (if (zerop (logand eword-attrib 16)) nil t)) | 
|---|
| 354 | (if extended | 
|---|
| 355 | (progn ;��Ȓĥ | 
|---|
| 356 | (setq jword-cstr (nt:read-cstring rest q)) | 
|---|
| 357 | (setq jword (car jword-cstr)) (setq jword-len (cdr jword-cstr)) | 
|---|
| 358 | (setq q (+ q jword-len 1)) | 
|---|
| 359 | (setq ext-list nil) | 
|---|
| 360 | (catch 'while | 
|---|
| 361 | (while (< q field-length) | 
|---|
| 362 | (let* ((ex-attrib (nt:read-uchar rest q)) | 
|---|
| 363 | (ex-attrib-sub (logand ex-attrib 15)) | 
|---|
| 364 | (exdata-cstr nil) | 
|---|
| 365 | (exdata "") (exdata-len 0) ) | 
|---|
| 366 | (when (= (logand ex-attrib 128) 128) (throw 'while t)) | 
|---|
| 367 | (setq q (1+ q)) | 
|---|
| 368 | (setq exdata-cstr (nt:read-cstring rest q)) | 
|---|
| 369 | (setq exdata (car exdata-cstr)) | 
|---|
| 370 | (setq exdata-len (cdr exdata-cstr)) | 
|---|
| 371 | (cond | 
|---|
| 372 | ((= ex-attrib-sub 1) (setq example exdata)) | 
|---|
| 373 | ((= ex-attrib-sub 2) (setq pron exdata)) | 
|---|
| 374 | ((= ex-attrib-sub 4) (setq link exdata)) | 
|---|
| 375 | (t nil)) | 
|---|
| 376 | (setq q (+ q exdata-len 1)) | 
|---|
| 377 | ) ; let* | 
|---|
| 378 | ) ; while | 
|---|
| 379 | ) ; catch while2 | 
|---|
| 380 | ) ; progn | 
|---|
| 381 | (progn ;�ɸ���             (setq jword (substring rest q)) | 
|---|
| 382 | (setq pron "") | 
|---|
| 383 | (setq example "")) | 
|---|
| 384 | ) ; if extended | 
|---|
| 385 |  | 
|---|
| 386 | ;         (insert (format "- %s\n" eword)) | 
|---|
| 387 | (when (funcall criteria-func eword pron jword example) | 
|---|
| 388 | (push (list eword pron jword example) result)) | 
|---|
| 389 | );let | 
|---|
| 390 | ); wend | 
|---|
| 391 | (nreverse result)))) | 
|---|
| 392 |  | 
|---|
| 393 | (defun pdicv-core-search (dicinfo criteria &optional simple-mode-p dont-clear-p) | 
|---|
| 394 | "search in PDIC" | 
|---|
| 395 | (let* ((dicname (car dicinfo)) | 
|---|
| 396 | (dicfile (nth 1 dicinfo)) | 
|---|
| 397 | (encoding-list (nth 2 dicinfo)) | 
|---|
| 398 | (decoder-list ()) | 
|---|
| 399 | (index-table (-> pdicv-index-table-list dicname))) | 
|---|
| 400 | ;    (if (null index-table) (setq index-table (pdicv-get-index-list dicfile))) | 
|---|
| 401 |  | 
|---|
| 402 | (when (atom encoding-list) ;; expand encoding-list | 
|---|
| 403 | (setq encoding-list (list encoding-list encoding-list encoding-list encoding-list))) | 
|---|
| 404 |  | 
|---|
| 405 | (while encoding-list ;; build the decoder-list | 
|---|
| 406 | (let ((encoding (car encoding-list))) | 
|---|
| 407 | (cond | 
|---|
| 408 | ((eq encoding 'bocu) (push pdicv-bocu-decoder decoder-list)) | 
|---|
| 409 | ((eq encoding 'sjis) (push pdicv-sjis-decoder decoder-list)) | 
|---|
| 410 | ((eq encoding 'latin1) (push pdicv-latin1-decoder decoder-list)) | 
|---|
| 411 | (encoding (push (pdicv-create-decoder encoding) decoder-list)) | 
|---|
| 412 | (t (push pdicv-null-decoder decoder-list)))) | 
|---|
| 413 | (setq encoding-list (cdr encoding-list))) | 
|---|
| 414 | (setq decoder-list (nreverse decoder-list)) | 
|---|
| 415 |  | 
|---|
| 416 | (catch 'pdicv-core-search | 
|---|
| 417 | ;      (if (null original-word-to-search) (setq original-word-to-search word-to-search)) | 
|---|
| 418 | (let* (;(buffer-name (generate-new-buffer-name *buffer-name)) | 
|---|
| 419 | ;               (pdicv-buffer-name "*PDIC Viewer*") | 
|---|
| 420 | ;               (dummy (if (get-buffer pdicv-buffer-name) (kill-buffer pdicv-buffer-name))) | 
|---|
| 421 | (pdicv-buffer (get-buffer-create "*PDIC Viewer*")) | 
|---|
| 422 | ;criteria | 
|---|
| 423 | (word-to-search (car criteria)) | 
|---|
| 424 |  | 
|---|
| 425 | (index-needles (nth 1 criteria)) | 
|---|
| 426 | (needle1 (car index-needles)) | 
|---|
| 427 | (needle2 (cdr index-needles)) | 
|---|
| 428 |  | 
|---|
| 429 | (datablock-criteria-func (nth 2 criteria)) | 
|---|
| 430 |  | 
|---|
| 431 | (ix index-table) (index-size (length ix)) (curr-size index-size) | 
|---|
| 432 | (ix+ (cadr ix)); next one | 
|---|
| 433 | (match-count 0)) | 
|---|
| 434 |  | 
|---|
| 435 | ;         (switch-to-buffer pdicv-buffer-name) | 
|---|
| 436 | (save-current-buffer | 
|---|
| 437 | (set-buffer pdicv-buffer) | 
|---|
| 438 | (when (null dont-clear-p) (erase-buffer)) | 
|---|
| 439 |  | 
|---|
| 440 | (when (not simple-mode-p) | 
|---|
| 441 | ;(pop-to-buffer pdicv-buffer-name) | 
|---|
| 442 | ;              (set-buffer pdicv-buffer-name) | 
|---|
| 443 | (insert (format "��������� %s\n" word-to-search)) | 
|---|
| 444 | (insert (format "��������???\n")) | 
|---|
| 445 | (newline)) | 
|---|
| 446 | ;(insert "\n")) | 
|---|
| 447 | (when index-needles | 
|---|
| 448 | (setq ix | 
|---|
| 449 | (let ((p ix) (last-p nil)) | 
|---|
| 450 | (catch 'pdicv-search-in-index | 
|---|
| 451 | (while p | 
|---|
| 452 | (let* ((elem (car p)) ;(phys (car elem)) | 
|---|
| 453 | (word (cdr elem))) | 
|---|
| 454 | (if (string< needle1 word) (throw 'pdicv-search-in-index last-p)) | 
|---|
| 455 | ;; (if (string< needle2 word) (throw 'pdicv-search-in-index last-p)) | 
|---|
| 456 | (setq last-p p) | 
|---|
| 457 | (setq p (cdr p)) )) | 
|---|
| 458 | last-p)))) | 
|---|
| 459 | (catch 'while | 
|---|
| 460 | (while ix | 
|---|
| 461 | (let* ((curr (car ix)) | 
|---|
| 462 | (phys (car curr)) (word (cdr curr)) | 
|---|
| 463 | ;; (x (insert (format "* current ix: (%d %s)\n" phys word))) | 
|---|
| 464 | (result (pdicv-scan-datablock dicfile phys datablock-criteria-func)); decoder-list nil)) | 
|---|
| 465 | (result-count (length result)) | 
|---|
| 466 | (inserter pdicv-default-inserter)) | 
|---|
| 467 | (when index-needles | 
|---|
| 468 | (when (string>= word needle2) (throw 'while t))) | 
|---|
| 469 | ;;  (if (not (string< word (cdr index-needles))) (throw 'while t))) | 
|---|
| 470 |  | 
|---|
| 471 | ;               (insert (format "(%s with index %s ... %s)\n" | 
|---|
| 472 | ;                               word-to-search | 
|---|
| 473 | ;                               (funcall (nth 0 decoder-list) word) result)) | 
|---|
| 474 |  | 
|---|
| 475 | (if result (progn | 
|---|
| 476 | (while result | 
|---|
| 477 | (let ((rec (pop result))) | 
|---|
| 478 | (funcall inserter | 
|---|
| 479 | (funcall (nth 0 decoder-list) (nth 0 rec)); eword | 
|---|
| 480 | (funcall (nth 1 decoder-list) (nth 1 rec)); pron | 
|---|
| 481 | (funcall (nth 2 decoder-list) (nth 2 rec)); jword | 
|---|
| 482 | (funcall (nth 3 decoder-list) (nth 3 rec)); example | 
|---|
| 483 | ) | 
|---|
| 484 | (setq match-count (1+ match-count)) | 
|---|
| 485 | )) | 
|---|
| 486 | (message "%5d/%5d:%7d" curr-size index-size match-count) | 
|---|
| 487 | (sit-for 0)) | 
|---|
| 488 | ;;else | 
|---|
| 489 | (when (zerop (% curr-size 128)) ;;128��ϒŬ����ʒ��                      (message "%5d/%5d:%7d" curr-size index-size match-count)))) | 
|---|
| 490 | (setq ix (cdr ix)) | 
|---|
| 491 | (setq curr-size (1- curr-size)) | 
|---|
| 492 | );wend | 
|---|
| 493 | );caught | 
|---|
| 494 |  | 
|---|
| 495 | ;;(insert (pdicv-scan-datablock dicfile (car (car ix)) decoder-list nil needle1 needle2)) | 
|---|
| 496 | (goto-char 1) | 
|---|
| 497 |  | 
|---|
| 498 | (when (not simple-mode-p) | 
|---|
| 499 | (when (re-search-forward ": [?][?][?][?]" nil t nil) | 
|---|
| 500 | (replace-match (format ": %d" match-count) t t nil 0))) | 
|---|
| 501 | ); save-current-buffer | 
|---|
| 502 |  | 
|---|
| 503 | ;      (pop-to-buffer (current-buffer)) | 
|---|
| 504 | ;        (setq split-height-threshold 6) | 
|---|
| 505 | (when (one-window-p) | 
|---|
| 506 | (set-window-buffer (split-window-vertically (- pdicv-result-height)) pdicv-buffer)) | 
|---|
| 507 | )))) | 
|---|
| 508 |  | 
|---|
| 509 | ;;; pdicv-core.el ends here | 
|---|