1 | ;;; nt-bocu.el --- decode/encode BOCU-1 string (via utf-8, so requires Mule-UCS) |
---|
2 | ;; |
---|
3 | ;; Copyright (C) 2005-2009 naoya_t. All Rights Reserved. |
---|
4 | ;; |
---|
5 | ;; Author: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
6 | ;; Maintainer: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
7 | ;; Primary distribution site: |
---|
8 | ;; http://lambdarepos.svnrepository.com/svn/share/lang/elisp/pdicv-mode/trunk |
---|
9 | ;; |
---|
10 | ;; Created: 12 Feb 2005 |
---|
11 | ;; Last modified: 15 Dec 2005 (defun --> defsubst) |
---|
12 | ;; Version: 1.0.1 |
---|
13 | ;; Keywords: BOCU-1 encode decode |
---|
14 | |
---|
15 | (provide 'nt-bocu) |
---|
16 | |
---|
17 | ;;; Commentaries: |
---|
18 | |
---|
19 | ;; internal |
---|
20 | ; (nt:bocu-decode-trail-char TR) |
---|
21 | ; (nt:bocu-encode-trail-char NUM) |
---|
22 | |
---|
23 | ; (nt:bocustr-to-rawcode-list STR) |
---|
24 | ; (nt:diff-to-bocustr DIFF) - used by #'bocu-rawcode-list-to-bocustr |
---|
25 | ; (nt:rawcode-list-to-bocustr STR) |
---|
26 | |
---|
27 | ; (nt:bocu-decode STR) |
---|
28 | ; - BOCU-1 ʸ���ǥ����ɤ���ʸ��Mule������ˤ�Ѵ� |
---|
29 | ; (nt:bocu-encode STR) |
---|
30 | ; - ʸ��Mule������ˤ�CU-1 ������ |
---|
31 | ;;; Code: |
---|
32 | (require 'nt-utf8) |
---|
33 | |
---|
34 | (defsubst nt:bocu-decode-trail-char (tr) |
---|
35 | "[BOCU] decode trail char" |
---|
36 | (cond |
---|
37 | ((> tr #x20) (- tr 13)) ;21- >> 14- |
---|
38 | ((>= tr #x1c) (- tr 12)) ;1C ... 1F >> 10 ... 13 |
---|
39 | ((>= tr #x10) (- tr 10)) ;10 ... 19 >> 06 ... 0F |
---|
40 | (t (1- tr)) ;01 ... 06 >> 00 ... 05 |
---|
41 | )) |
---|
42 | |
---|
43 | (defsubst nt:bocu-encode-trail-char (c) |
---|
44 | "[BOCU] encode trail char" |
---|
45 | (cond |
---|
46 | ((> c #x13) (+ c 13)) ;14- >> 21- |
---|
47 | ((>= c #x10) (+ c 12)) ;10 ... 13 >> 1C ... 1F |
---|
48 | ((>= c #x06) (+ c 10)) ;06 ... 0F >> 10 ... 19 |
---|
49 | (t (1+ c)) ;00 ... 05 >> 01 ... 06 |
---|
50 | )) |
---|
51 | |
---|
52 | (defun nt:bocustr-to-rawcode-list (s) |
---|
53 | "[BOCU] BOCU-1 string --> rawcode-list" |
---|
54 | (let ((l ()) (len (length s)) (i 0) |
---|
55 | (pc #x40) (lead 0) (tr 0) (code 0) (diff 0)) |
---|
56 | (while (< i len) |
---|
57 | (setq lead (aref s i) i (1+ i)) |
---|
58 | (cond ((<= lead #x20) (setq code lead)) |
---|
59 | ((= lead #x21) ;21 (L T T T) |
---|
60 | (setq diff (+ -187660 (* 243 243 243))) |
---|
61 | ; trail 3 |
---|
62 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
63 | (setq diff (+ diff (* tr 243 243))) |
---|
64 | ; trail 2 |
---|
65 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
66 | (setq diff (+ diff (* tr 243))) |
---|
67 | ; trail 1 |
---|
68 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
69 | (setq diff (+ diff tr)) ) |
---|
70 | ((< lead #x25) ;22-24 (L T T) |
---|
71 | (setq diff (+ -10513 (* (- lead #x25) 243 243))) |
---|
72 | ; trail 2 |
---|
73 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
74 | (setq diff (+ diff (* tr 243))) |
---|
75 | ; trail 1 |
---|
76 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
77 | (setq diff (+ diff tr)) ) |
---|
78 | ((< lead #x50) ;25-4f (L T) |
---|
79 | (setq diff (+ -64 (* (- lead #x50) 243))) |
---|
80 | ; trail 1 |
---|
81 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
82 | (setq diff (+ diff tr)) ) |
---|
83 | ((< lead #xd0) ;50-cf (L) |
---|
84 | (setq diff (- lead #x90)) ) |
---|
85 | ((< lead #xfb) ;d0-fa (L T) |
---|
86 | (setq diff (+ 64 (* (- lead #xd0) 243))) |
---|
87 | ; trail 1 |
---|
88 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
89 | (setq diff (+ diff tr)) ) |
---|
90 | ((< lead #xfe) ;fb-fd (L T T) |
---|
91 | (setq diff (+ 10513 (* (- lead #xfb) 243 243))) |
---|
92 | ; trail 2 |
---|
93 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
94 | (setq diff (+ diff (* tr 243))) |
---|
95 | ; trail 1 |
---|
96 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
97 | (setq diff (+ diff tr)) ) |
---|
98 | ((= lead #xfe) ;fe (L T T T) |
---|
99 | (setq diff 187660) |
---|
100 | ; trail 3 |
---|
101 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
102 | (setq diff (+ diff (* tr 243 243))) |
---|
103 | ; trail 2 |
---|
104 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
105 | (setq diff (+ diff (* tr 243))) |
---|
106 | ; trail 1 |
---|
107 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
108 | (setq diff (+ diff tr)) ) |
---|
109 | ((= lead #xff) )) ; reset |
---|
110 | |
---|
111 | (cond |
---|
112 | ((<= lead #x20) |
---|
113 | (push lead l) |
---|
114 | ; (setq r (concat r (string lead))) |
---|
115 | (if (< lead #x20) (setq pc #x40)) ) ;#x20�ʤ餽�Τޤ� ((< lead #xff) |
---|
116 | (progn |
---|
117 | (setq code (+ pc diff)) |
---|
118 | (if (< code 0) (setq code 0));; error recovery |
---|
119 | (push code l) |
---|
120 | ; (setq r (concat r (if (> code 0) (code-to-utf8 code) "?"))) |
---|
121 | |
---|
122 | (setq pc (cond |
---|
123 | ((< code #x20) #x40) |
---|
124 | ((= code #x20) pc) ; keep pc |
---|
125 | ((and (<= #x3040 code) (<= code #x309f)) #x3070) |
---|
126 | ((and (<= #x4e00 code) (<= code #x9fa5)) #x7711) |
---|
127 | ((and (<= #xac00 code) (<= code #xd7a3)) #xc1d1) |
---|
128 | (t (+ (logand code (lognot #x7f)) #x40)) |
---|
129 | )); pc |
---|
130 | )) |
---|
131 | (t (setq pc #x40)) )); #xFF: reset |
---|
132 | (nreverse l) )) |
---|
133 | |
---|
134 | (defun nt:diff-to-bocustr (diff) |
---|
135 | "[BOCU] diff --> BOCU-1 string" |
---|
136 | (catch 'bocu-encode-diff |
---|
137 | (let ((s "") (t0 0) (t1 0) (t2 0) (t3 0)) |
---|
138 | (cond |
---|
139 | ((< diff -14536567) (throw 'bocu-encode-diff 'underflow-exception)) |
---|
140 | ((< diff -187660) ; [-14536567,-187660) : 21 |
---|
141 | (progn |
---|
142 | (setq diff (- diff -14536567)) |
---|
143 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
---|
144 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
145 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
146 | ;(setq t0 diff) |
---|
147 | (string #x21 (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) )) |
---|
148 | ((< diff -10513) ; [-187660,-10513) : 22-24 |
---|
149 | (progn |
---|
150 | (setq diff (- diff -187660)) |
---|
151 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
152 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
153 | (setq t0 diff) |
---|
154 | (string (+ #x22 t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) )) |
---|
155 | ((< diff -64) ; [-10513,-64) : 25-4F |
---|
156 | (progn |
---|
157 | (setq diff (- diff -10513)) |
---|
158 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
159 | (setq t0 diff) |
---|
160 | (string (+ #x25 t0) (nt:bocu-encode-trail-char t1)) )) |
---|
161 | ((< diff 64) ; [-64,63) : 50-CF |
---|
162 | (progn |
---|
163 | (setq diff (- diff -64)) |
---|
164 | (setq t0 diff) |
---|
165 | (string (+ #x50 t0)) )) |
---|
166 | ((< diff 10513) ; [64,10513) : D0-FA |
---|
167 | (progn |
---|
168 | (setq diff (- diff 64)) |
---|
169 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
170 | (setq t0 diff) |
---|
171 | (string (+ #xD0 t0) (nt:bocu-encode-trail-char t1)) )) |
---|
172 | ((< diff 187660) ; [10513,187660) : FB-FD |
---|
173 | (progn |
---|
174 | (setq diff (- diff 10513)) |
---|
175 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
176 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
177 | (setq t0 diff) |
---|
178 | (string (+ #xFB t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) )) |
---|
179 | ((< diff 14536567) ; [187660,14536567) : FE |
---|
180 | (progn |
---|
181 | (setq diff (- diff 187660)) |
---|
182 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
---|
183 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
184 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
185 | ;(setq t0 diff) |
---|
186 | (string #xFE (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) )) |
---|
187 | (t (throw 'bocu-encode-diff 'overflow-exception)) )))) |
---|
188 | |
---|
189 | (defun nt:rawcode-list-to-bocustr (l) ; not tested much |
---|
190 | "rawcode list --> BOCU-1 string" |
---|
191 | (let* ((s "") (pc #x40)) |
---|
192 | (while l |
---|
193 | (let* ((code (car l)) |
---|
194 | (diff (- code pc)) ) |
---|
195 | (setq l (cdr l)) |
---|
196 | (setq s (concat s (nt:diff-to-bocustr diff))) |
---|
197 | (setq pc (cond |
---|
198 | ((< code #x20) #x40) |
---|
199 | ((= code #x20) pc) ;keep pc |
---|
200 | ((and (<= #x3040 code) (<= code #x309F)) #x3070) |
---|
201 | ((and (<= #x4E00 code) (<= code #x9FA5)) #x7711) |
---|
202 | ((and (<= #xAC00 code) (<= code #xD7A3)) #xC1D1) |
---|
203 | (t (+ (logand code #xffff80) #x40)))) )) |
---|
204 | s)) |
---|
205 | |
---|
206 | (defsubst nt:bocu-decode (s) |
---|
207 | "decode BOCU-1 string (via utf-8)" |
---|
208 | (let* ((rawcode-list (nt:bocustr-to-rawcode-list s)) |
---|
209 | (utf8str (nt:rawcode-list-to-utf8str rawcode-list))) |
---|
210 | (decode-coding-string utf8str 'utf-8) )) |
---|
211 | |
---|
212 | (defsubst nt:bocu-encode (s) |
---|
213 | "encode a string into BOCU-1 (via utf-8)" |
---|
214 | (let* ((utf8str (encode-coding-string s 'utf-8)) |
---|
215 | (rawcode-list (nt:utf8str-to-rawcode-list utf8str)) |
---|
216 | (bocustr (nt:rawcode-list-to-bocustr rawcode-list))) |
---|
217 | bocustr)) |
---|
218 | |
---|
219 | ;;; nt-bocu.el ends here |
---|