root/lang/elisp/pdicv-mode/trunk/nt-bocu.el @ 67

Revision 67, 9.2 kB (checked in by naoya_t, 16 years ago)
Line 
1;;; nt-bocu.el --- decode/encode BOCU-1 string (via utf-8, so requires Mule-UCS)
2;;
3;; Copyright (C) 2005 Naoya TOZUKA. All Rights Reserved.
4;;
5;; Author: Naoya TOZUKA <pdicviewer@gmail.com>
6;; Maintainer: Naoya TOZUKA <pdicviewer@gmail.com>
7;; Primary distribution site: http://pdicviewer.naochan.com/el/
8;;
9;; Created: 12 Feb 2005
10;; Last modified: 15 Dec 2005 (defun --> defsubst)
11;; Version: 1.0.1
12;; Keywords: BOCU-1 encode decode
13
14(provide 'nt-bocu)
15
16;;; Commentaries:
17
18;; internal
19; (nt:bocu-decode-trail-char TR)
20; (nt:bocu-encode-trail-char NUM)
21
22; (nt:bocustr-to-rawcode-list STR)
23; (nt:diff-to-bocustr DIFF) - used by #'bocu-rawcode-list-to-bocustr
24; (nt:rawcode-list-to-bocustr STR)
25
26; (nt:bocu-decode STR)
27;   - BOCU-1 ʸ���ǥ����ɤ���ʸ��Mule������ˤ�Ѵ�
28; (nt:bocu-encode STR)
29;   - ʸ��Mule������ˤ�CU-1 ���󥳡���
30;;; Code:
31(require 'nt-utf8)
32
33(defsubst nt:bocu-decode-trail-char (tr)
34  "[BOCU] decode trail char"
35  (cond
36   ((>  tr #x20) (- tr 13)) ;21- >> 14-
37   ((>= tr #x1c) (- tr 12)) ;1C ... 1F >> 10 ... 13
38   ((>= tr #x10) (- tr 10)) ;10 ... 19 >> 06 ... 0F
39   (t (1- tr))              ;01 ... 06 >> 00 ... 05
40   ); cond
41  )
42(defsubst nt:bocu-encode-trail-char (c)
43  "[BOCU] encode trail char"
44  (cond
45   ((>  c #x13) (+ c 13)) ;14- >> 21-
46   ((>= c #x10) (+ c 12)) ;10 ... 13 >> 1C ... 1F
47   ((>= c #x06) (+ c 10)) ;06 ... 0F >> 10 ... 19
48   (t (1+ c))             ;00 ... 05 >> 01 ... 06
49   ); cond
50  )
51
52(defun nt:bocustr-to-rawcode-list (s)
53  "[BOCU] BOCU-1 string --> rawcode-list"
54  (let ((l ()) (len (length s)) (i 0)
55        (pc #x40) (lead 0) (tr 0) (code 0) (diff 0))
56    (while (< i len)
57      (setq lead (aref s i) i (1+ i))
58      (cond ((<= lead #x20) (setq code lead))
59            ((= lead #x21) ;21    (L T T T)
60             (setq diff (+ -187660 (* 243 243 243)))
61                                        ; trail 3
62             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
63             (setq diff (+ diff (* tr 243 243)))
64                                        ; trail 2
65             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
66             (setq diff (+ diff (* tr 243)))
67                                        ; trail 1
68             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
69             (setq diff (+ diff tr))
70             )
71            ((< lead #x25) ;22-24 (L T T)
72             (setq diff (+ -10513 (* (- lead #x25) 243 243)))
73                                        ; trail 2
74             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
75             (setq diff (+ diff (* tr 243)))
76                                        ; trail 1
77             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
78             (setq diff (+ diff tr))
79             )
80            ((< lead #x50) ;25-4f (L T)
81             (setq diff (+ -64 (* (- lead #x50) 243)))
82                                        ; trail 1
83             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
84             (setq diff (+ diff tr))
85             )
86            ((< lead #xd0) ;50-cf (L)
87             (setq diff (- lead #x90))
88             )
89            ((< lead #xfb) ;d0-fa (L T)
90             (setq diff (+ 64 (* (- lead #xd0) 243)))
91                                        ; trail 1
92             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
93             (setq diff (+ diff tr))
94             )
95            ((< lead #xfe) ;fb-fd (L T T)
96             (setq diff (+ 10513 (* (- lead #xfb) 243 243)))
97                                        ; trail 2
98             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
99             (setq diff (+ diff (* tr 243)))
100                                        ; trail 1
101             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
102             (setq diff (+ diff tr))
103             )
104            ((= lead #xfe) ;fe    (L T T T)
105             (setq diff 187660)
106                                        ; trail 3
107             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
108             (setq diff (+ diff (* tr 243 243)))
109                                        ; trail 2
110             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
111             (setq diff (+ diff (* tr 243)))
112                                        ; trail 1
113             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
114             (setq diff (+ diff tr))
115             )
116            ((= lead #xff) ; reset
117             )
118            ); end of cond.
119
120      (cond
121       ((<= lead #x20)
122        (push lead l)
123                                        ;       (setq r (concat r (string lead)))
124        (if (< lead #x20) (setq pc #x40)) ;#x20�ʤ餽�Τޤ�        )
125       ((< lead #xff)
126        (progn
127          (setq code (+ pc diff))
128          (if (< code 0) (setq code 0));; error recovery
129
130          (push code l)
131                                        ;         (setq r (concat r (if (> code 0) (code-to-utf8 code) "?")))
132
133          (setq pc (cond
134                    ((< code #x20) #x40)
135                    ((= code #x20) pc) ; keep pc
136                    ((and (<= #x3040 code) (<= code #x309f)) #x3070)
137                    ((and (<= #x4e00 code) (<= code #x9fa5)) #x7711)
138                    ((and (<= #xac00 code) (<= code #xd7a3)) #xc1d1)
139                    (t (+ (logand code (lognot #x7f)) #x40))
140                    )); pc
141          ))
142       (t (setq pc #x40)); #xFF: reset
143       )
144      ); wend
145    (nreverse l)
146    ); let
147  )
148
149(defun nt:diff-to-bocustr (diff)
150  "[BOCU] diff --> BOCU-1 string"
151  (catch 'bocu-encode-diff
152    (let ((s "") (t0 0) (t1 0) (t2 0) (t3 0))
153      (cond
154       ((< diff -14536567) (throw 'bocu-encode-diff 'underflow-exception))
155       ((< diff -187660)  ; [-14536567,-187660) : 21
156        (progn
157          (setq diff (- diff -14536567))
158          (setq t3 (% diff 243)) (setq diff (/ diff 243))
159          (setq t2 (% diff 243)) (setq diff (/ diff 243))
160          (setq t1 (% diff 243)) (setq diff (/ diff 243))
161                                        ;(setq t0 diff)
162          (string #x21 (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3))
163          )
164        )
165       ((< diff -10513)   ; [-187660,-10513) : 22-24
166        (progn
167          (setq diff (- diff -187660))
168          (setq t2 (% diff 243)) (setq diff (/ diff 243))
169          (setq t1 (% diff 243)) (setq diff (/ diff 243))
170          (setq t0 diff)
171          (string (+ #x22 t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2))
172          )
173        )
174       ((< diff -64)      ; [-10513,-64) : 25-4F
175        (progn
176          (setq diff (- diff -10513))
177          (setq t1 (% diff 243)) (setq diff (/ diff 243))
178          (setq t0 diff)
179          (string (+ #x25 t0) (nt:bocu-encode-trail-char t1))
180          )
181        )
182       ((< diff 64)       ; [-64,63) : 50-CF
183        (progn
184          (setq diff (- diff -64))
185          (setq t0 diff)
186          (string (+ #x50 t0))
187          )
188        )
189       ((< diff 10513)    ; [64,10513) : D0-FA
190        (progn
191          (setq diff (- diff 64))
192          (setq t1 (% diff 243)) (setq diff (/ diff 243))
193          (setq t0 diff)
194          (string (+ #xD0 t0) (nt:bocu-encode-trail-char t1))
195          )
196        )
197       ((< diff 187660)   ; [10513,187660) : FB-FD
198        (progn
199          (setq diff (- diff 10513))
200          (setq t2 (% diff 243)) (setq diff (/ diff 243))
201          (setq t1 (% diff 243)) (setq diff (/ diff 243))
202          (setq t0 diff)
203          (string (+ #xFB t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2))
204          )
205        )
206       ((< diff 14536567) ; [187660,14536567) : FE
207        (progn
208          (setq diff (- diff 187660))
209          (setq t3 (% diff 243)) (setq diff (/ diff 243))
210          (setq t2 (% diff 243)) (setq diff (/ diff 243))
211          (setq t1 (% diff 243)) (setq diff (/ diff 243))
212                                        ;(setq t0 diff)
213          (string #xFE (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3))
214          )
215        )
216       (t (throw 'bocu-encode-diff 'overflow-exception))
217       ); cond
218      ); let
219    ); caught
220  )
221
222(defun nt:rawcode-list-to-bocustr (l) ; not tested much
223  "rawcode list --> BOCU-1 string"
224  (let* ((s "") (pc #x40))
225    (while l
226      (let* ((code (car l))
227             (diff (- code pc)) )
228        (setq l (cdr l))
229        (setq s (concat s (nt:diff-to-bocustr diff)))
230        (setq pc (cond
231                  ((< code #x20) #x40)
232                  ((= code #x20) pc) ;keep pc
233                  ((and (<= #x3040 code) (<= code #x309F)) #x3070)
234                  ((and (<= #x4E00 code) (<= code #x9FA5)) #x7711)
235                  ((and (<= #xAC00 code) (<= code #xD7A3)) #xC1D1)
236                  (t (+ (logand code #xffff80) #x40))))
237        ); let*
238      ); wend
239    s
240    ); let*
241  )
242
243(defsubst nt:bocu-decode (s)
244  "decode BOCU-1 string (via utf-8)"
245  (let* ((rawcode-list (nt:bocustr-to-rawcode-list s))
246         (utf8str (nt:rawcode-list-to-utf8str rawcode-list)))
247    (decode-coding-string utf8str 'utf-8)
248    )
249  )
250
251(defsubst nt:bocu-encode (s)
252  "encode a string into BOCU-1 (via utf-8)"
253  (let* ((utf8str (encode-coding-string s 'utf-8))
254         (rawcode-list (nt:utf8str-to-rawcode-list utf8str))
255         (bocustr (nt:rawcode-list-to-bocustr rawcode-list))
256         )
257    bocustr
258    )
259  )
260
261;;; nt-bocu.el ends here
Note: See TracBrowser for help on using the browser.