[67] | 1 | ;;; nt-bocu.el --- decode/encode BOCU-1 string (via utf-8, so requires Mule-UCS) |
---|
| 2 | ;; |
---|
| 3 | ;; Copyright (C) 2005 Naoya TOZUKA. All Rights Reserved. |
---|
| 4 | ;; |
---|
| 5 | ;; Author: Naoya TOZUKA <pdicviewer@gmail.com> |
---|
| 6 | ;; Maintainer: Naoya TOZUKA <pdicviewer@gmail.com> |
---|
| 7 | ;; Primary distribution site: http://pdicviewer.naochan.com/el/ |
---|
| 8 | ;; |
---|
| 9 | ;; Created: 12 Feb 2005 |
---|
| 10 | ;; Last modified: 15 Dec 2005 (defun --> defsubst) |
---|
| 11 | ;; Version: 1.0.1 |
---|
| 12 | ;; Keywords: BOCU-1 encode decode |
---|
| 13 | |
---|
| 14 | (provide 'nt-bocu) |
---|
| 15 | |
---|
| 16 | ;;; Commentaries: |
---|
| 17 | |
---|
| 18 | ;; internal |
---|
| 19 | ; (nt:bocu-decode-trail-char TR) |
---|
| 20 | ; (nt:bocu-encode-trail-char NUM) |
---|
| 21 | |
---|
| 22 | ; (nt:bocustr-to-rawcode-list STR) |
---|
| 23 | ; (nt:diff-to-bocustr DIFF) - used by #'bocu-rawcode-list-to-bocustr |
---|
| 24 | ; (nt:rawcode-list-to-bocustr STR) |
---|
| 25 | |
---|
| 26 | ; (nt:bocu-decode STR) |
---|
| 27 | ; - BOCU-1 ʸ���ǥ����ɤ���ʸ��Mule������ˤ�Ѵ� |
---|
| 28 | ; (nt:bocu-encode STR) |
---|
| 29 | ; - ʸ��Mule������ˤ�CU-1 ������ |
---|
| 30 | ;;; Code: |
---|
| 31 | (require 'nt-utf8) |
---|
| 32 | |
---|
| 33 | (defsubst nt:bocu-decode-trail-char (tr) |
---|
| 34 | "[BOCU] decode trail char" |
---|
| 35 | (cond |
---|
| 36 | ((> tr #x20) (- tr 13)) ;21- >> 14- |
---|
| 37 | ((>= tr #x1c) (- tr 12)) ;1C ... 1F >> 10 ... 13 |
---|
| 38 | ((>= tr #x10) (- tr 10)) ;10 ... 19 >> 06 ... 0F |
---|
| 39 | (t (1- tr)) ;01 ... 06 >> 00 ... 05 |
---|
| 40 | ); cond |
---|
| 41 | ) |
---|
| 42 | (defsubst nt:bocu-encode-trail-char (c) |
---|
| 43 | "[BOCU] encode trail char" |
---|
| 44 | (cond |
---|
| 45 | ((> c #x13) (+ c 13)) ;14- >> 21- |
---|
| 46 | ((>= c #x10) (+ c 12)) ;10 ... 13 >> 1C ... 1F |
---|
| 47 | ((>= c #x06) (+ c 10)) ;06 ... 0F >> 10 ... 19 |
---|
| 48 | (t (1+ c)) ;00 ... 05 >> 01 ... 06 |
---|
| 49 | ); cond |
---|
| 50 | ) |
---|
| 51 | |
---|
| 52 | (defun nt:bocustr-to-rawcode-list (s) |
---|
| 53 | "[BOCU] BOCU-1 string --> rawcode-list" |
---|
| 54 | (let ((l ()) (len (length s)) (i 0) |
---|
| 55 | (pc #x40) (lead 0) (tr 0) (code 0) (diff 0)) |
---|
| 56 | (while (< i len) |
---|
| 57 | (setq lead (aref s i) i (1+ i)) |
---|
| 58 | (cond ((<= lead #x20) (setq code lead)) |
---|
| 59 | ((= lead #x21) ;21 (L T T T) |
---|
| 60 | (setq diff (+ -187660 (* 243 243 243))) |
---|
| 61 | ; trail 3 |
---|
| 62 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 63 | (setq diff (+ diff (* tr 243 243))) |
---|
| 64 | ; trail 2 |
---|
| 65 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 66 | (setq diff (+ diff (* tr 243))) |
---|
| 67 | ; trail 1 |
---|
| 68 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 69 | (setq diff (+ diff tr)) |
---|
| 70 | ) |
---|
| 71 | ((< lead #x25) ;22-24 (L T T) |
---|
| 72 | (setq diff (+ -10513 (* (- lead #x25) 243 243))) |
---|
| 73 | ; trail 2 |
---|
| 74 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 75 | (setq diff (+ diff (* tr 243))) |
---|
| 76 | ; trail 1 |
---|
| 77 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 78 | (setq diff (+ diff tr)) |
---|
| 79 | ) |
---|
| 80 | ((< lead #x50) ;25-4f (L T) |
---|
| 81 | (setq diff (+ -64 (* (- lead #x50) 243))) |
---|
| 82 | ; trail 1 |
---|
| 83 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 84 | (setq diff (+ diff tr)) |
---|
| 85 | ) |
---|
| 86 | ((< lead #xd0) ;50-cf (L) |
---|
| 87 | (setq diff (- lead #x90)) |
---|
| 88 | ) |
---|
| 89 | ((< lead #xfb) ;d0-fa (L T) |
---|
| 90 | (setq diff (+ 64 (* (- lead #xd0) 243))) |
---|
| 91 | ; trail 1 |
---|
| 92 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 93 | (setq diff (+ diff tr)) |
---|
| 94 | ) |
---|
| 95 | ((< lead #xfe) ;fb-fd (L T T) |
---|
| 96 | (setq diff (+ 10513 (* (- lead #xfb) 243 243))) |
---|
| 97 | ; trail 2 |
---|
| 98 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 99 | (setq diff (+ diff (* tr 243))) |
---|
| 100 | ; trail 1 |
---|
| 101 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 102 | (setq diff (+ diff tr)) |
---|
| 103 | ) |
---|
| 104 | ((= lead #xfe) ;fe (L T T T) |
---|
| 105 | (setq diff 187660) |
---|
| 106 | ; trail 3 |
---|
| 107 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 108 | (setq diff (+ diff (* tr 243 243))) |
---|
| 109 | ; trail 2 |
---|
| 110 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 111 | (setq diff (+ diff (* tr 243))) |
---|
| 112 | ; trail 1 |
---|
| 113 | (setq tr (nt:bocu-decode-trail-char (aref s i)) i (1+ i)) |
---|
| 114 | (setq diff (+ diff tr)) |
---|
| 115 | ) |
---|
| 116 | ((= lead #xff) ; reset |
---|
| 117 | ) |
---|
| 118 | ); end of cond. |
---|
| 119 | |
---|
| 120 | (cond |
---|
| 121 | ((<= lead #x20) |
---|
| 122 | (push lead l) |
---|
| 123 | ; (setq r (concat r (string lead))) |
---|
| 124 | (if (< lead #x20) (setq pc #x40)) ;#x20�ʤ餽�Τޤ� ) |
---|
| 125 | ((< lead #xff) |
---|
| 126 | (progn |
---|
| 127 | (setq code (+ pc diff)) |
---|
| 128 | (if (< code 0) (setq code 0));; error recovery |
---|
| 129 | |
---|
| 130 | (push code l) |
---|
| 131 | ; (setq r (concat r (if (> code 0) (code-to-utf8 code) "?"))) |
---|
| 132 | |
---|
| 133 | (setq pc (cond |
---|
| 134 | ((< code #x20) #x40) |
---|
| 135 | ((= code #x20) pc) ; keep pc |
---|
| 136 | ((and (<= #x3040 code) (<= code #x309f)) #x3070) |
---|
| 137 | ((and (<= #x4e00 code) (<= code #x9fa5)) #x7711) |
---|
| 138 | ((and (<= #xac00 code) (<= code #xd7a3)) #xc1d1) |
---|
| 139 | (t (+ (logand code (lognot #x7f)) #x40)) |
---|
| 140 | )); pc |
---|
| 141 | )) |
---|
| 142 | (t (setq pc #x40)); #xFF: reset |
---|
| 143 | ) |
---|
| 144 | ); wend |
---|
| 145 | (nreverse l) |
---|
| 146 | ); let |
---|
| 147 | ) |
---|
| 148 | |
---|
| 149 | (defun nt:diff-to-bocustr (diff) |
---|
| 150 | "[BOCU] diff --> BOCU-1 string" |
---|
| 151 | (catch 'bocu-encode-diff |
---|
| 152 | (let ((s "") (t0 0) (t1 0) (t2 0) (t3 0)) |
---|
| 153 | (cond |
---|
| 154 | ((< diff -14536567) (throw 'bocu-encode-diff 'underflow-exception)) |
---|
| 155 | ((< diff -187660) ; [-14536567,-187660) : 21 |
---|
| 156 | (progn |
---|
| 157 | (setq diff (- diff -14536567)) |
---|
| 158 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 159 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 160 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 161 | ;(setq t0 diff) |
---|
| 162 | (string #x21 (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) |
---|
| 163 | ) |
---|
| 164 | ) |
---|
| 165 | ((< diff -10513) ; [-187660,-10513) : 22-24 |
---|
| 166 | (progn |
---|
| 167 | (setq diff (- diff -187660)) |
---|
| 168 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 169 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 170 | (setq t0 diff) |
---|
| 171 | (string (+ #x22 t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) |
---|
| 172 | ) |
---|
| 173 | ) |
---|
| 174 | ((< diff -64) ; [-10513,-64) : 25-4F |
---|
| 175 | (progn |
---|
| 176 | (setq diff (- diff -10513)) |
---|
| 177 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 178 | (setq t0 diff) |
---|
| 179 | (string (+ #x25 t0) (nt:bocu-encode-trail-char t1)) |
---|
| 180 | ) |
---|
| 181 | ) |
---|
| 182 | ((< diff 64) ; [-64,63) : 50-CF |
---|
| 183 | (progn |
---|
| 184 | (setq diff (- diff -64)) |
---|
| 185 | (setq t0 diff) |
---|
| 186 | (string (+ #x50 t0)) |
---|
| 187 | ) |
---|
| 188 | ) |
---|
| 189 | ((< diff 10513) ; [64,10513) : D0-FA |
---|
| 190 | (progn |
---|
| 191 | (setq diff (- diff 64)) |
---|
| 192 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 193 | (setq t0 diff) |
---|
| 194 | (string (+ #xD0 t0) (nt:bocu-encode-trail-char t1)) |
---|
| 195 | ) |
---|
| 196 | ) |
---|
| 197 | ((< diff 187660) ; [10513,187660) : FB-FD |
---|
| 198 | (progn |
---|
| 199 | (setq diff (- diff 10513)) |
---|
| 200 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 201 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 202 | (setq t0 diff) |
---|
| 203 | (string (+ #xFB t0) (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2)) |
---|
| 204 | ) |
---|
| 205 | ) |
---|
| 206 | ((< diff 14536567) ; [187660,14536567) : FE |
---|
| 207 | (progn |
---|
| 208 | (setq diff (- diff 187660)) |
---|
| 209 | (setq t3 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 210 | (setq t2 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 211 | (setq t1 (% diff 243)) (setq diff (/ diff 243)) |
---|
| 212 | ;(setq t0 diff) |
---|
| 213 | (string #xFE (nt:bocu-encode-trail-char t1) (nt:bocu-encode-trail-char t2) (nt:bocu-encode-trail-char t3)) |
---|
| 214 | ) |
---|
| 215 | ) |
---|
| 216 | (t (throw 'bocu-encode-diff 'overflow-exception)) |
---|
| 217 | ); cond |
---|
| 218 | ); let |
---|
| 219 | ); caught |
---|
| 220 | ) |
---|
| 221 | |
---|
| 222 | (defun nt:rawcode-list-to-bocustr (l) ; not tested much |
---|
| 223 | "rawcode list --> BOCU-1 string" |
---|
| 224 | (let* ((s "") (pc #x40)) |
---|
| 225 | (while l |
---|
| 226 | (let* ((code (car l)) |
---|
| 227 | (diff (- code pc)) ) |
---|
| 228 | (setq l (cdr l)) |
---|
| 229 | (setq s (concat s (nt:diff-to-bocustr diff))) |
---|
| 230 | (setq pc (cond |
---|
| 231 | ((< code #x20) #x40) |
---|
| 232 | ((= code #x20) pc) ;keep pc |
---|
| 233 | ((and (<= #x3040 code) (<= code #x309F)) #x3070) |
---|
| 234 | ((and (<= #x4E00 code) (<= code #x9FA5)) #x7711) |
---|
| 235 | ((and (<= #xAC00 code) (<= code #xD7A3)) #xC1D1) |
---|
| 236 | (t (+ (logand code #xffff80) #x40)))) |
---|
| 237 | ); let* |
---|
| 238 | ); wend |
---|
| 239 | s |
---|
| 240 | ); let* |
---|
| 241 | ) |
---|
| 242 | |
---|
| 243 | (defsubst nt:bocu-decode (s) |
---|
| 244 | "decode BOCU-1 string (via utf-8)" |
---|
| 245 | (let* ((rawcode-list (nt:bocustr-to-rawcode-list s)) |
---|
| 246 | (utf8str (nt:rawcode-list-to-utf8str rawcode-list))) |
---|
| 247 | (decode-coding-string utf8str 'utf-8) |
---|
| 248 | ) |
---|
| 249 | ) |
---|
| 250 | |
---|
| 251 | (defsubst nt:bocu-encode (s) |
---|
| 252 | "encode a string into BOCU-1 (via utf-8)" |
---|
| 253 | (let* ((utf8str (encode-coding-string s 'utf-8)) |
---|
| 254 | (rawcode-list (nt:utf8str-to-rawcode-list utf8str)) |
---|
| 255 | (bocustr (nt:rawcode-list-to-bocustr rawcode-list)) |
---|
| 256 | ) |
---|
| 257 | bocustr |
---|
| 258 | ) |
---|
| 259 | ) |
---|
| 260 | |
---|
| 261 | ;;; nt-bocu.el ends here |
---|