| [67] | 1 | ;;; nt-bocu.el --- decode/encode BOCU-1 string (via utf-8, so requires Mule-UCS) |
|---|
| 2 | ;; |
|---|
| 3 | ;; Copyright (C) 2005 Naoya TOZUKA. All Rights Reserved. |
|---|
| 4 | ;; |
|---|
| 5 | ;; Author: Naoya TOZUKA <pdicviewer@gmail.com> |
|---|
| 6 | ;; Maintainer: Naoya TOZUKA <pdicviewer@gmail.com> |
|---|
| 7 | ;; Primary distribution site: http://pdicviewer.naochan.com/el/ |
|---|
| 8 | ;; |
|---|
| 9 | ;; Created: 12 Feb 2005 |
|---|
| 10 | ;; Last modified: 15 Dec 2005 (defun --> defsubst) |
|---|
| 11 | ;; Version: 1.0.1 |
|---|
| 12 | ;; Keywords: BOCU-1 encode decode |
|---|
| 13 | |
|---|
| 14 | (provide 'nt-bocu) |
|---|
| 15 | |
|---|
| 16 | ;;; Commentaries: |
|---|
| 17 | |
|---|
| 18 | ;; internal |
|---|
| 19 | ; (nt:bocu-decode-trail-char TR) |
|---|
| 20 | ; (nt:bocu-encode-trail-char NUM) |
|---|
| 21 | |
|---|
| 22 | ; (nt:bocustr-to-rawcode-list STR) |
|---|
| 23 | ; (nt:diff-to-bocustr DIFF) - used by #'bocu-rawcode-list-to-bocustr |
|---|
| 24 | ; (nt:rawcode-list-to-bocustr STR) |
|---|
| 25 | |
|---|
| 26 | ; (nt:bocu-decode STR) |
|---|
| 27 | ; - BOCU-1 ʸ���ǥ����ɤ���ʸ��Mule������ˤ�Ѵ� |
|---|
| 28 | ; (nt:bocu-encode STR) |
|---|
| 29 | ; - ʸ��Mule������ˤ�CU-1 ������ |
|---|
| 30 | ;;; Code: |
|---|
| 31 | (require 'nt-utf8) |
|---|
| 32 | |
|---|
| 33 | (defsubst nt:bocu-decode-trail-char (tr) |
|---|
| 34 | "[BOCU] decode trail char" |
|---|
| 35 | (cond |
|---|
| 36 | ((> tr #x20) (- tr 13)) ;21- >> 14- |
|---|
| 37 | ((>= tr #x1c) (- tr 12)) ;1C ... 1F >> 10 ... 13 |
|---|
| 38 | ((>= tr #x10) (- tr 10)) ;10 ... 19 >> 06 ... 0F |
|---|
| 39 | (t (1- tr)) ;01 ... 06 >> 00 ... 05 |
|---|
| 40 | ); cond |
|---|
| 41 | ) |
|---|
| 42 | (defsubst nt:bocu-encode-trail-char (c) |
|---|
| 43 | "[BOCU] encode trail char" |
|---|
| 44 | (cond |
|---|
| 45 | ((> c #x13) (+ c 13)) ;14- >> 21- |
|---|
| 46 | ((>= c #x10) (+ c 12)) ;10 ... 13 >> 1C ... 1F |
|---|
| 47 | ((>= c #x06) (+ c 10)) ;06 ... 0F >> 10 ... 19 |
|---|
| 48 | (t (1+ c)) ;00 ... 05 >> 01 ... 06 |
|---|
| 49 | ); cond |
|---|
| 50 | ) |
|---|
| 51 | |
|---|
| 52 | (defun nt:bocustr-to-rawcode-list (s) |
|---|
| 53 | "[BOCU] BOCU-1 string --> rawcode-list" |
|---|
| 54 | (let ((l ()) (len (length s)) (i 0) |
|---|
| 55 | (pc #x40) (lead 0) (tr 0) (code 0) (diff 0)) |
|---|
| 56 | (while (< i len) |
|---|
| 57 | (setq lead (aref s i) i (1+ i)) |
|---|
| 58 | (cond ((<= lead #x20) (setq code lead)) |
|---|
| 59 | ((= lead #x21) ;21 (L T T T) |
|---|
| 60 | (setq diff (+ -187660 (* 243 243 243))) |
|---|
| 61 | ; trail 3 |
|---|
| 62 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 63 | (setq diff (+ diff (* tr 243 243))) |
|---|
| 64 | ; trail 2 |
|---|
| 65 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 66 | (setq diff (+ diff (* tr 243))) |
|---|
| 67 | ; trail 1 |
|---|
| 68 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 69 | (setq diff (+ diff tr)) |
|---|
| 70 | ) |
|---|
| 71 | ((< lead #x25) ;22-24 (L T T) |
|---|
| 72 | (setq diff (+ -10513 (* (- lead #x25) 243 243))) |
|---|
| 73 | ; trail 2 |
|---|
| 74 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 75 | (setq diff (+ diff (* tr 243))) |
|---|
| 76 | ; trail 1 |
|---|
| 77 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 78 | (setq diff (+ diff tr)) |
|---|
| 79 | ) |
|---|
| 80 | ((< lead #x50) ;25-4f (L T) |
|---|
| 81 | (setq diff (+ -64 (* (- lead #x50) 243))) |
|---|
| 82 | ; trail 1 |
|---|
| 83 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 84 | (setq diff (+ diff tr)) |
|---|
| 85 | ) |
|---|
| 86 | ((< lead #xd0) ;50-cf (L) |
|---|
| 87 | (setq diff (- lead #x90)) |
|---|
| 88 | ) |
|---|
| 89 | ((< lead #xfb) ;d0-fa (L T) |
|---|
| 90 | (setq diff (+ 64 (* (- lead #xd0) 243))) |
|---|
| 91 | ; trail 1 |
|---|
| 92 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 93 | (setq diff (+ diff tr)) |
|---|
| 94 | ) |
|---|
| 95 | ((< lead #xfe) ;fb-fd (L T T) |
|---|
| 96 | (setq diff (+ 10513 (* (- lead #xfb) 243 243))) |
|---|
| 97 | ; trail 2 |
|---|
| 98 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 99 | (setq diff (+ diff (* tr 243))) |
|---|
| 100 | ; trail 1 |
|---|
| 101 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 102 | (setq diff (+ diff tr)) |
|---|
| 103 | ) |
|---|
| 104 | ((= lead #xfe) ;fe (L T T T) |
|---|
| 105 | (setq diff 187660) |
|---|
| 106 | ; trail 3 |
|---|
| 107 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 108 | (setq diff (+ diff (* tr 243 243))) |
|---|
| 109 | ; trail 2 |
|---|
| 110 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 111 | (setq diff (+ diff (* tr 243))) |
|---|
| 112 | ; trail 1 |
|---|
| 113 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
|---|
| 114 | (setq diff (+ diff tr)) |
|---|
| 115 | ) |
|---|
| 116 | ((= lead #xff) ; reset |
|---|
| 117 | ) |
|---|
| 118 | ); end of cond. |
|---|
| 119 | |
|---|
| 120 | (cond |
|---|
| 121 | ((<= lead #x20) |
|---|
| 122 | (push lead l) |
|---|
| 123 | ; (setq r (concat r (string lead))) |
|---|
| 124 | (if (< lead #x20) (setq pc #x40)) ;#x20�ʤ餽�Τޤ� ) |
|---|
| 125 | ((< lead #xff) |
|---|
| 126 | (progn |
|---|
| 127 | (setq code (+ pc diff)) |
|---|
| 128 | (if (< code 0) (setq code 0));; error recovery |
|---|
| 129 | |
|---|
| 130 | (push code l) |
|---|
| 131 | ; (setq r (concat r (if (> code 0) (code-to-utf8 code) "?"))) |
|---|
| 132 | |
|---|
| 133 | (setq pc (cond |
|---|
| 134 | ((< code #x20) #x40) |
|---|
| 135 | ((= code #x20) pc) ; keep pc |
|---|
| 136 | ((and (<= #x3040 code) (<= code #x309f)) #x3070) |
|---|
| 137 | ((and (<= #x4e00 code) (<= code #x9fa5)) #x7711) |
|---|
| 138 | ((and (<= #xac00 code) (<= code #xd7a3)) #xc1d1) |
|---|
| 139 | (t (+ (logand code (lognot #x7f)) #x40)) |
|---|
| 140 | )); pc |
|---|
| 141 | )) |
|---|
| 142 | (t (setq pc #x40)); #xFF: reset |
|---|
| 143 | ) |
|---|
| 144 | ); wend |
|---|
| 145 | (nreverse l) |
|---|
| 146 | ); let |
|---|
| 147 | ) |
|---|
| 148 | |
|---|
| 149 | (defun nt:diff-to-bocustr (diff) |
|---|
| 150 | "[BOCU] diff --> BOCU-1 string" |
|---|
| 151 | (catch 'bocu-encode-diff |
|---|
| 152 | (let ((s "") (t0 0) (t1 0) (t2 0) (t3 0)) |
|---|
| 153 | (cond |
|---|
| 154 | ((< diff -14536567) (throw 'bocu-encode-diff 'underflow-exception)) |
|---|
| 155 | ((< diff -187660) ; [-14536567,-187660) : 21 |
|---|
| 156 | (progn |
|---|
| 157 | (setq diff (- diff -14536567)) |
|---|
| 158 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 159 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 160 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 161 | ;(setq t0 diff) |
|---|
| 162 | (string #x21 (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) |
|---|
| 163 | ) |
|---|
| 164 | ) |
|---|
| 165 | ((< diff -10513) ; [-187660,-10513) : 22-24 |
|---|
| 166 | (progn |
|---|
| 167 | (setq diff (- diff -187660)) |
|---|
| 168 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 169 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 170 | (setq t0 diff) |
|---|
| 171 | (string (+ #x22 t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) |
|---|
| 172 | ) |
|---|
| 173 | ) |
|---|
| 174 | ((< diff -64) ; [-10513,-64) : 25-4F |
|---|
| 175 | (progn |
|---|
| 176 | (setq diff (- diff -10513)) |
|---|
| 177 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 178 | (setq t0 diff) |
|---|
| 179 | (string (+ #x25 t0) (nt:bocu-encode-trail-char t1)) |
|---|
| 180 | ) |
|---|
| 181 | ) |
|---|
| 182 | ((< diff 64) ; [-64,63) : 50-CF |
|---|
| 183 | (progn |
|---|
| 184 | (setq diff (- diff -64)) |
|---|
| 185 | (setq t0 diff) |
|---|
| 186 | (string (+ #x50 t0)) |
|---|
| 187 | ) |
|---|
| 188 | ) |
|---|
| 189 | ((< diff 10513) ; [64,10513) : D0-FA |
|---|
| 190 | (progn |
|---|
| 191 | (setq diff (- diff 64)) |
|---|
| 192 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 193 | (setq t0 diff) |
|---|
| 194 | (string (+ #xD0 t0) (nt:bocu-encode-trail-char t1)) |
|---|
| 195 | ) |
|---|
| 196 | ) |
|---|
| 197 | ((< diff 187660) ; [10513,187660) : FB-FD |
|---|
| 198 | (progn |
|---|
| 199 | (setq diff (- diff 10513)) |
|---|
| 200 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 201 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 202 | (setq t0 diff) |
|---|
| 203 | (string (+ #xFB t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) |
|---|
| 204 | ) |
|---|
| 205 | ) |
|---|
| 206 | ((< diff 14536567) ; [187660,14536567) : FE |
|---|
| 207 | (progn |
|---|
| 208 | (setq diff (- diff 187660)) |
|---|
| 209 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 210 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 211 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
|---|
| 212 | ;(setq t0 diff) |
|---|
| 213 | (string #xFE (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) |
|---|
| 214 | ) |
|---|
| 215 | ) |
|---|
| 216 | (t (throw 'bocu-encode-diff 'overflow-exception)) |
|---|
| 217 | ); cond |
|---|
| 218 | ); let |
|---|
| 219 | ); caught |
|---|
| 220 | ) |
|---|
| 221 | |
|---|
| 222 | (defun nt:rawcode-list-to-bocustr (l) ; not tested much |
|---|
| 223 | "rawcode list --> BOCU-1 string" |
|---|
| 224 | (let* ((s "") (pc #x40)) |
|---|
| 225 | (while l |
|---|
| 226 | (let* ((code (car l)) |
|---|
| 227 | (diff (- code pc)) ) |
|---|
| 228 | (setq l (cdr l)) |
|---|
| 229 | (setq s (concat s (nt:diff-to-bocustr diff))) |
|---|
| 230 | (setq pc (cond |
|---|
| 231 | ((< code #x20) #x40) |
|---|
| 232 | ((= code #x20) pc) ;keep pc |
|---|
| 233 | ((and (<= #x3040 code) (<= code #x309F)) #x3070) |
|---|
| 234 | ((and (<= #x4E00 code) (<= code #x9FA5)) #x7711) |
|---|
| 235 | ((and (<= #xAC00 code) (<= code #xD7A3)) #xC1D1) |
|---|
| 236 | (t (+ (logand code #xffff80) #x40)))) |
|---|
| 237 | ); let* |
|---|
| 238 | ); wend |
|---|
| 239 | s |
|---|
| 240 | ); let* |
|---|
| 241 | ) |
|---|
| 242 | |
|---|
| 243 | (defsubst nt:bocu-decode (s) |
|---|
| 244 | "decode BOCU-1 string (via utf-8)" |
|---|
| 245 | (let* ((rawcode-list (nt:bocustr-to-rawcode-list s)) |
|---|
| 246 | (utf8str (nt:rawcode-list-to-utf8str rawcode-list))) |
|---|
| 247 | (decode-coding-string utf8str 'utf-8) |
|---|
| 248 | ) |
|---|
| 249 | ) |
|---|
| 250 | |
|---|
| 251 | (defsubst nt:bocu-encode (s) |
|---|
| 252 | "encode a string into BOCU-1 (via utf-8)" |
|---|
| 253 | (let* ((utf8str (encode-coding-string s 'utf-8)) |
|---|
| 254 | (rawcode-list (nt:utf8str-to-rawcode-list utf8str)) |
|---|
| 255 | (bocustr (nt:rawcode-list-to-bocustr rawcode-list)) |
|---|
| 256 | ) |
|---|
| 257 | bocustr |
|---|
| 258 | ) |
|---|
| 259 | ) |
|---|
| 260 | |
|---|
| 261 | ;;; nt-bocu.el ends here |
|---|