root/lang/elisp/pdicv-mode/trunk/nt-bocu.el @ 75

Revision 71, 8.8 kB (checked in by naoya_t, 16 years ago)
Line 
1;;; nt-bocu.el --- decode/encode BOCU-1 string (via utf-8, so requires Mule-UCS)
2;;
3;; Copyright (C) 2005-2009 naoya_t. All Rights Reserved.
4;;
5;; Author: naoya_t <naoya.t@aqua.plala.or.jp>
6;; Maintainer: naoya_t <naoya.t@aqua.plala.or.jp>
7;; Primary distribution site:
8;;   http://lambdarepos.svnrepository.com/svn/share/lang/elisp/pdicv-mode/trunk
9;;
10;; Created: 12 Feb 2005
11;; Last modified: 15 Dec 2005 (defun --> defsubst)
12;; Version: 1.0.1
13;; Keywords: BOCU-1 encode decode
14
15(provide 'nt-bocu)
16
17;;; Commentaries:
18
19;; internal
20; (nt:bocu-decode-trail-char TR)
21; (nt:bocu-encode-trail-char NUM)
22
23; (nt:bocustr-to-rawcode-list STR)
24; (nt:diff-to-bocustr DIFF) - used by #'bocu-rawcode-list-to-bocustr
25; (nt:rawcode-list-to-bocustr STR)
26
27; (nt:bocu-decode STR)
28;   - BOCU-1 ʸ���ǥ����ɤ���ʸ��Mule������ˤ�Ѵ�
29; (nt:bocu-encode STR)
30;   - ʸ��Mule������ˤ�CU-1 ���󥳡���
31;;; Code:
32(require 'nt-utf8)
33
34(defsubst nt:bocu-decode-trail-char (tr)
35  "[BOCU] decode trail char"
36  (cond
37   ((>  tr #x20) (- tr 13)) ;21- >> 14-
38   ((>= tr #x1c) (- tr 12)) ;1C ... 1F >> 10 ... 13
39   ((>= tr #x10) (- tr 10)) ;10 ... 19 >> 06 ... 0F
40   (t (1- tr))              ;01 ... 06 >> 00 ... 05
41   ))
42
43(defsubst nt:bocu-encode-trail-char (c)
44  "[BOCU] encode trail char"
45  (cond
46   ((>  c #x13) (+ c 13)) ;14- >> 21-
47   ((>= c #x10) (+ c 12)) ;10 ... 13 >> 1C ... 1F
48   ((>= c #x06) (+ c 10)) ;06 ... 0F >> 10 ... 19
49   (t (1+ c))             ;00 ... 05 >> 01 ... 06
50   ))
51
52(defun nt:bocustr-to-rawcode-list (s)
53  "[BOCU] BOCU-1 string --> rawcode-list"
54  (let ((l ()) (len (length s)) (i 0)
55        (pc #x40) (lead 0) (tr 0) (code 0) (diff 0))
56    (while (< i len)
57      (setq lead (aref s i) i (1+ i))
58      (cond ((<= lead #x20) (setq code lead))
59            ((= lead #x21) ;21    (L T T T)
60             (setq diff (+ -187660 (* 243 243 243)))
61                                        ; trail 3
62             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
63             (setq diff (+ diff (* tr 243 243)))
64                                        ; trail 2
65             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
66             (setq diff (+ diff (* tr 243)))
67                                        ; trail 1
68             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
69             (setq diff (+ diff tr)) )
70            ((< lead #x25) ;22-24 (L T T)
71             (setq diff (+ -10513 (* (- lead #x25) 243 243)))
72                                        ; trail 2
73             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
74             (setq diff (+ diff (* tr 243)))
75                                        ; trail 1
76             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
77             (setq diff (+ diff tr)) )
78            ((< lead #x50) ;25-4f (L T)
79             (setq diff (+ -64 (* (- lead #x50) 243)))
80                                        ; trail 1
81             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
82             (setq diff (+ diff tr)) )
83            ((< lead #xd0) ;50-cf (L)
84             (setq diff (- lead #x90)) )
85            ((< lead #xfb) ;d0-fa (L T)
86             (setq diff (+ 64 (* (- lead #xd0) 243)))
87                                        ; trail 1
88             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
89             (setq diff (+ diff tr)) )
90            ((< lead #xfe) ;fb-fd (L T T)
91             (setq diff (+ 10513 (* (- lead #xfb) 243 243)))
92                                        ; trail 2
93             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
94             (setq diff (+ diff (* tr 243)))
95                                        ; trail 1
96             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
97             (setq diff (+ diff tr)) )
98            ((= lead #xfe) ;fe    (L T T T)
99             (setq diff 187660)
100                                        ; trail 3
101             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
102             (setq diff (+ diff (* tr 243 243)))
103                                        ; trail 2
104             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
105             (setq diff (+ diff (* tr 243)))
106                                        ; trail 1
107             (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i))
108             (setq diff (+ diff tr)) )
109            ((= lead #xff) )) ; reset
110
111      (cond
112       ((<= lead #x20)
113        (push lead l)
114                                        ;       (setq r (concat r (string lead)))
115        (if (< lead #x20) (setq pc #x40)) ) ;#x20�ʤ餽�Τޤ�       ((< lead #xff)
116        (progn
117          (setq code (+ pc diff))
118          (if (< code 0) (setq code 0));; error recovery
119          (push code l)
120                                        ;         (setq r (concat r (if (> code 0) (code-to-utf8 code) "?")))
121
122          (setq pc (cond
123                    ((< code #x20) #x40)
124                    ((= code #x20) pc) ; keep pc
125                    ((and (<= #x3040 code) (<= code #x309f)) #x3070)
126                    ((and (<= #x4e00 code) (<= code #x9fa5)) #x7711)
127                    ((and (<= #xac00 code) (<= code #xd7a3)) #xc1d1)
128                    (t (+ (logand code (lognot #x7f)) #x40))
129                    )); pc
130          ))
131       (t (setq pc #x40)) )); #xFF: reset
132    (nreverse l) ))
133
134(defun nt:diff-to-bocustr (diff)
135  "[BOCU] diff --> BOCU-1 string"
136  (catch 'bocu-encode-diff
137    (let ((s "") (t0 0) (t1 0) (t2 0) (t3 0))
138      (cond
139       ((< diff -14536567) (throw 'bocu-encode-diff 'underflow-exception))
140       ((< diff -187660)  ; [-14536567,-187660) : 21
141        (progn
142          (setq diff (- diff -14536567))
143          (setq t3 (% diff 243)) (setq diff (/ diff 243))
144          (setq t2 (% diff 243)) (setq diff (/ diff 243))
145          (setq t1 (% diff 243)) (setq diff (/ diff 243))
146                                        ;(setq t0 diff)
147          (string #x21 (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) ))
148       ((< diff -10513)   ; [-187660,-10513) : 22-24
149        (progn
150          (setq diff (- diff -187660))
151          (setq t2 (% diff 243)) (setq diff (/ diff 243))
152          (setq t1 (% diff 243)) (setq diff (/ diff 243))
153          (setq t0 diff)
154          (string (+ #x22 t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) ))
155       ((< diff -64)      ; [-10513,-64) : 25-4F
156        (progn
157          (setq diff (- diff -10513))
158          (setq t1 (% diff 243)) (setq diff (/ diff 243))
159          (setq t0 diff)
160          (string (+ #x25 t0) (nt:bocu-encode-trail-char t1)) ))
161       ((< diff 64)       ; [-64,63) : 50-CF
162        (progn
163          (setq diff (- diff -64))
164          (setq t0 diff)
165          (string (+ #x50 t0)) ))
166       ((< diff 10513)    ; [64,10513) : D0-FA
167        (progn
168          (setq diff (- diff 64))
169          (setq t1 (% diff 243)) (setq diff (/ diff 243))
170          (setq t0 diff)
171          (string (+ #xD0 t0) (nt:bocu-encode-trail-char t1)) ))
172       ((< diff 187660)   ; [10513,187660) : FB-FD
173        (progn
174          (setq diff (- diff 10513))
175          (setq t2 (% diff 243)) (setq diff (/ diff 243))
176          (setq t1 (% diff 243)) (setq diff (/ diff 243))
177          (setq t0 diff)
178          (string (+ #xFB t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) ))
179       ((< diff 14536567) ; [187660,14536567) : FE
180        (progn
181          (setq diff (- diff 187660))
182          (setq t3 (% diff 243)) (setq diff (/ diff 243))
183          (setq t2 (% diff 243)) (setq diff (/ diff 243))
184          (setq t1 (% diff 243)) (setq diff (/ diff 243))
185                                        ;(setq t0 diff)
186          (string #xFE (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) ))
187       (t (throw 'bocu-encode-diff 'overflow-exception)) ))))
188
189(defun nt:rawcode-list-to-bocustr (l) ; not tested much
190  "rawcode list --> BOCU-1 string"
191  (let* ((s "") (pc #x40))
192    (while l
193      (let* ((code (car l))
194             (diff (- code pc)) )
195        (setq l (cdr l))
196        (setq s (concat s (nt:diff-to-bocustr diff)))
197        (setq pc (cond
198                  ((< code #x20) #x40)
199                  ((= code #x20) pc) ;keep pc
200                  ((and (<= #x3040 code) (<= code #x309F)) #x3070)
201                  ((and (<= #x4E00 code) (<= code #x9FA5)) #x7711)
202                  ((and (<= #xAC00 code) (<= code #xD7A3)) #xC1D1)
203                  (t (+ (logand code #xffff80) #x40)))) ))
204    s))
205
206(defsubst nt:bocu-decode (s)
207  "decode BOCU-1 string (via utf-8)"
208  (let* ((rawcode-list (nt:bocustr-to-rawcode-list s))
209         (utf8str (nt:rawcode-list-to-utf8str rawcode-list)))
210    (decode-coding-string utf8str 'utf-8) ))
211
212(defsubst nt:bocu-encode (s)
213  "encode a string into BOCU-1 (via utf-8)"
214  (let* ((utf8str (encode-coding-string s 'utf-8))
215         (rawcode-list (nt:utf8str-to-rawcode-list utf8str))
216         (bocustr (nt:rawcode-list-to-bocustr rawcode-list)))
217    bocustr))
218
219;;; nt-bocu.el ends here
Note: See TracBrowser for help on using the browser.