1 | ;;; nt-bocu.el --- decode/encode BOCU-1 string (via utf-8, so requires Mule-UCS) |
---|
2 | ;; |
---|
3 | ;; Copyright (C) 2005 Naoya TOZUKA. All Rights Reserved. |
---|
4 | ;; |
---|
5 | ;; Author: Naoya TOZUKA <pdicviewer@gmail.com> |
---|
6 | ;; Maintainer: Naoya TOZUKA <pdicviewer@gmail.com> |
---|
7 | ;; Primary distribution site: http://pdicviewer.naochan.com/el/ |
---|
8 | ;; |
---|
9 | ;; Created: 12 Feb 2005 |
---|
10 | ;; Last modified: 15 Dec 2005 (defun --> defsubst) |
---|
11 | ;; Version: 1.0.1 |
---|
12 | ;; Keywords: BOCU-1 encode decode |
---|
13 | |
---|
14 | (provide 'nt-bocu) |
---|
15 | |
---|
16 | ;;; Commentaries: |
---|
17 | |
---|
18 | ;; internal |
---|
19 | ; (nt:bocu-decode-trail-char TR) |
---|
20 | ; (nt:bocu-encode-trail-char NUM) |
---|
21 | |
---|
22 | ; (nt:bocustr-to-rawcode-list STR) |
---|
23 | ; (nt:diff-to-bocustr DIFF) - used by #'bocu-rawcode-list-to-bocustr |
---|
24 | ; (nt:rawcode-list-to-bocustr STR) |
---|
25 | |
---|
26 | ; (nt:bocu-decode STR) |
---|
27 | ; - BOCU-1 ʸ���ǥ����ɤ���ʸ��Mule������ˤ�Ѵ� |
---|
28 | ; (nt:bocu-encode STR) |
---|
29 | ; - ʸ��Mule������ˤ�CU-1 ������ |
---|
30 | ;;; Code: |
---|
31 | (require 'nt-utf8) |
---|
32 | |
---|
33 | (defsubst nt:bocu-decode-trail-char (tr) |
---|
34 | "[BOCU] decode trail char" |
---|
35 | (cond |
---|
36 | ((> tr #x20) (- tr 13)) ;21- >> 14- |
---|
37 | ((>= tr #x1c) (- tr 12)) ;1C ... 1F >> 10 ... 13 |
---|
38 | ((>= tr #x10) (- tr 10)) ;10 ... 19 >> 06 ... 0F |
---|
39 | (t (1- tr)) ;01 ... 06 >> 00 ... 05 |
---|
40 | ); cond |
---|
41 | ) |
---|
42 | (defsubst nt:bocu-encode-trail-char (c) |
---|
43 | "[BOCU] encode trail char" |
---|
44 | (cond |
---|
45 | ((> c #x13) (+ c 13)) ;14- >> 21- |
---|
46 | ((>= c #x10) (+ c 12)) ;10 ... 13 >> 1C ... 1F |
---|
47 | ((>= c #x06) (+ c 10)) ;06 ... 0F >> 10 ... 19 |
---|
48 | (t (1+ c)) ;00 ... 05 >> 01 ... 06 |
---|
49 | ); cond |
---|
50 | ) |
---|
51 | |
---|
52 | (defun nt:bocustr-to-rawcode-list (s) |
---|
53 | "[BOCU] BOCU-1 string --> rawcode-list" |
---|
54 | (let ((l ()) (len (length s)) (i 0) |
---|
55 | (pc #x40) (lead 0) (tr 0) (code 0) (diff 0)) |
---|
56 | (while (< i len) |
---|
57 | (setq lead (aref s i) i (1+ i)) |
---|
58 | (cond ((<= lead #x20) (setq code lead)) |
---|
59 | ((= lead #x21) ;21 (L T T T) |
---|
60 | (setq diff (+ -187660 (* 243 243 243))) |
---|
61 | ; trail 3 |
---|
62 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
63 | (setq diff (+ diff (* tr 243 243))) |
---|
64 | ; trail 2 |
---|
65 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
66 | (setq diff (+ diff (* tr 243))) |
---|
67 | ; trail 1 |
---|
68 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
69 | (setq diff (+ diff tr)) |
---|
70 | ) |
---|
71 | ((< lead #x25) ;22-24 (L T T) |
---|
72 | (setq diff (+ -10513 (* (- lead #x25) 243 243))) |
---|
73 | ; trail 2 |
---|
74 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
75 | (setq diff (+ diff (* tr 243))) |
---|
76 | ; trail 1 |
---|
77 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
78 | (setq diff (+ diff tr)) |
---|
79 | ) |
---|
80 | ((< lead #x50) ;25-4f (L T) |
---|
81 | (setq diff (+ -64 (* (- lead #x50) 243))) |
---|
82 | ; trail 1 |
---|
83 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
84 | (setq diff (+ diff tr)) |
---|
85 | ) |
---|
86 | ((< lead #xd0) ;50-cf (L) |
---|
87 | (setq diff (- lead #x90)) |
---|
88 | ) |
---|
89 | ((< lead #xfb) ;d0-fa (L T) |
---|
90 | (setq diff (+ 64 (* (- lead #xd0) 243))) |
---|
91 | ; trail 1 |
---|
92 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
93 | (setq diff (+ diff tr)) |
---|
94 | ) |
---|
95 | ((< lead #xfe) ;fb-fd (L T T) |
---|
96 | (setq diff (+ 10513 (* (- lead #xfb) 243 243))) |
---|
97 | ; trail 2 |
---|
98 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
99 | (setq diff (+ diff (* tr 243))) |
---|
100 | ; trail 1 |
---|
101 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
102 | (setq diff (+ diff tr)) |
---|
103 | ) |
---|
104 | ((= lead #xfe) ;fe (L T T T) |
---|
105 | (setq diff 187660) |
---|
106 | ; trail 3 |
---|
107 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
108 | (setq diff (+ diff (* tr 243 243))) |
---|
109 | ; trail 2 |
---|
110 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
111 | (setq diff (+ diff (* tr 243))) |
---|
112 | ; trail 1 |
---|
113 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
114 | (setq diff (+ diff tr)) |
---|
115 | ) |
---|
116 | ((= lead #xff) ; reset |
---|
117 | ) |
---|
118 | ); end of cond. |
---|
119 | |
---|
120 | (cond |
---|
121 | ((<= lead #x20) |
---|
122 | (push lead l) |
---|
123 | ; (setq r (concat r (string lead))) |
---|
124 | (if (< lead #x20) (setq pc #x40)) ;#x20�ʤ餽�Τޤ� ) |
---|
125 | ((< lead #xff) |
---|
126 | (progn |
---|
127 | (setq code (+ pc diff)) |
---|
128 | (if (< code 0) (setq code 0));; error recovery |
---|
129 | |
---|
130 | (push code l) |
---|
131 | ; (setq r (concat r (if (> code 0) (code-to-utf8 code) "?"))) |
---|
132 | |
---|
133 | (setq pc (cond |
---|
134 | ((< code #x20) #x40) |
---|
135 | ((= code #x20) pc) ; keep pc |
---|
136 | ((and (<= #x3040 code) (<= code #x309f)) #x3070) |
---|
137 | ((and (<= #x4e00 code) (<= code #x9fa5)) #x7711) |
---|
138 | ((and (<= #xac00 code) (<= code #xd7a3)) #xc1d1) |
---|
139 | (t (+ (logand code (lognot #x7f)) #x40)) |
---|
140 | )); pc |
---|
141 | )) |
---|
142 | (t (setq pc #x40)); #xFF: reset |
---|
143 | ) |
---|
144 | ); wend |
---|
145 | (nreverse l) |
---|
146 | ); let |
---|
147 | ) |
---|
148 | |
---|
149 | (defun nt:diff-to-bocustr (diff) |
---|
150 | "[BOCU] diff --> BOCU-1 string" |
---|
151 | (catch 'bocu-encode-diff |
---|
152 | (let ((s "") (t0 0) (t1 0) (t2 0) (t3 0)) |
---|
153 | (cond |
---|
154 | ((< diff -14536567) (throw 'bocu-encode-diff 'underflow-exception)) |
---|
155 | ((< diff -187660) ; [-14536567,-187660) : 21 |
---|
156 | (progn |
---|
157 | (setq diff (- diff -14536567)) |
---|
158 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
---|
159 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
160 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
161 | ;(setq t0 diff) |
---|
162 | (string #x21 (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) |
---|
163 | ) |
---|
164 | ) |
---|
165 | ((< diff -10513) ; [-187660,-10513) : 22-24 |
---|
166 | (progn |
---|
167 | (setq diff (- diff -187660)) |
---|
168 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
169 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
170 | (setq t0 diff) |
---|
171 | (string (+ #x22 t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) |
---|
172 | ) |
---|
173 | ) |
---|
174 | ((< diff -64) ; [-10513,-64) : 25-4F |
---|
175 | (progn |
---|
176 | (setq diff (- diff -10513)) |
---|
177 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
178 | (setq t0 diff) |
---|
179 | (string (+ #x25 t0) (nt:bocu-encode-trail-char t1)) |
---|
180 | ) |
---|
181 | ) |
---|
182 | ((< diff 64) ; [-64,63) : 50-CF |
---|
183 | (progn |
---|
184 | (setq diff (- diff -64)) |
---|
185 | (setq t0 diff) |
---|
186 | (string (+ #x50 t0)) |
---|
187 | ) |
---|
188 | ) |
---|
189 | ((< diff 10513) ; [64,10513) : D0-FA |
---|
190 | (progn |
---|
191 | (setq diff (- diff 64)) |
---|
192 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
193 | (setq t0 diff) |
---|
194 | (string (+ #xD0 t0) (nt:bocu-encode-trail-char t1)) |
---|
195 | ) |
---|
196 | ) |
---|
197 | ((< diff 187660) ; [10513,187660) : FB-FD |
---|
198 | (progn |
---|
199 | (setq diff (- diff 10513)) |
---|
200 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
201 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
202 | (setq t0 diff) |
---|
203 | (string (+ #xFB t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) |
---|
204 | ) |
---|
205 | ) |
---|
206 | ((< diff 14536567) ; [187660,14536567) : FE |
---|
207 | (progn |
---|
208 | (setq diff (- diff 187660)) |
---|
209 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
---|
210 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
211 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
212 | ;(setq t0 diff) |
---|
213 | (string #xFE (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) |
---|
214 | ) |
---|
215 | ) |
---|
216 | (t (throw 'bocu-encode-diff 'overflow-exception)) |
---|
217 | ); cond |
---|
218 | ); let |
---|
219 | ); caught |
---|
220 | ) |
---|
221 | |
---|
222 | (defun nt:rawcode-list-to-bocustr (l) ; not tested much |
---|
223 | "rawcode list --> BOCU-1 string" |
---|
224 | (let* ((s "") (pc #x40)) |
---|
225 | (while l |
---|
226 | (let* ((code (car l)) |
---|
227 | (diff (- code pc)) ) |
---|
228 | (setq l (cdr l)) |
---|
229 | (setq s (concat s (nt:diff-to-bocustr diff))) |
---|
230 | (setq pc (cond |
---|
231 | ((< code #x20) #x40) |
---|
232 | ((= code #x20) pc) ;keep pc |
---|
233 | ((and (<= #x3040 code) (<= code #x309F)) #x3070) |
---|
234 | ((and (<= #x4E00 code) (<= code #x9FA5)) #x7711) |
---|
235 | ((and (<= #xAC00 code) (<= code #xD7A3)) #xC1D1) |
---|
236 | (t (+ (logand code #xffff80) #x40)))) |
---|
237 | ); let* |
---|
238 | ); wend |
---|
239 | s |
---|
240 | ); let* |
---|
241 | ) |
---|
242 | |
---|
243 | (defsubst nt:bocu-decode (s) |
---|
244 | "decode BOCU-1 string (via utf-8)" |
---|
245 | (let* ((rawcode-list (nt:bocustr-to-rawcode-list s)) |
---|
246 | (utf8str (nt:rawcode-list-to-utf8str rawcode-list))) |
---|
247 | (decode-coding-string utf8str 'utf-8) |
---|
248 | ) |
---|
249 | ) |
---|
250 | |
---|
251 | (defsubst nt:bocu-encode (s) |
---|
252 | "encode a string into BOCU-1 (via utf-8)" |
---|
253 | (let* ((utf8str (encode-coding-string s 'utf-8)) |
---|
254 | (rawcode-list (nt:utf8str-to-rawcode-list utf8str)) |
---|
255 | (bocustr (nt:rawcode-list-to-bocustr rawcode-list)) |
---|
256 | ) |
---|
257 | bocustr |
---|
258 | ) |
---|
259 | ) |
---|
260 | |
---|
261 | ;;; nt-bocu.el ends here |
---|