[67] | 1 | ;;; nt-utf8.el --- converter between utf-8 and rawcode |
---|
| 2 | ;;; some functions require Mule-UCS |
---|
| 3 | ;; |
---|
| 4 | ;; Copyright (C) 2005 Naoya TOZUKA. All Rights Reserved. |
---|
| 5 | ;; |
---|
| 6 | ;; Author: Naoya TOZUKA <pdicviewer@gmail.com> |
---|
| 7 | ;; Maintainer: Naoya TOZUKA <pdicviewer@gmail.com> |
---|
| 8 | ;; Primary distribution site: http://pdicviewer.naochan.com/el/ |
---|
| 9 | ;; |
---|
| 10 | ;; Created: 14 Feb 2005 |
---|
| 11 | ;; Last modified: 15 Dec 2005 |
---|
| 12 | ;; Version: 1.0 |
---|
| 13 | ;; Keywords: UTF-8 |
---|
| 14 | |
---|
| 15 | (provide 'nt-utf8) |
---|
| 16 | |
---|
| 17 | ;;; Commentaries: |
---|
| 18 | ; |
---|
| 19 | ;; core functions |
---|
| 20 | ; (nt:utf8-rawcode-to-utf8str CODE) |
---|
| 21 | ; (nt:utf8-rawcode-list-to-utf8str LIST) |
---|
| 22 | ; (nt:utf8-utf8str-to-rawcode-list STR) |
---|
| 23 | |
---|
| 24 | ;; applied functions |
---|
| 25 | ; (nt:utf8-mulestr-to-unicode-list STR) - requires Mule-UCS |
---|
| 26 | ; - Mule��ʸ����ʸ���������ʸ��icode�����ɥꥹ�Ȥ�Ѵ� |
---|
| 27 | ; (nt:utf8-unicode-list-to-mulestr LIST) - requires Mule-UCS |
---|
| 28 | ; - ʸ���������ʸ��icode�����ɥꥹ�Ȥ�ule��ʸ����� |
---|
| 29 | |
---|
| 30 | ;;; Code: |
---|
| 31 | (defun nt:rawcode-to-utf8str (code) |
---|
| 32 | "convert a raw-code to utf-8 string" |
---|
| 33 | (cond |
---|
| 34 | ((<= code #x007f) (string code)) |
---|
| 35 | ((<= code #x07ff) (string (logior #xc0 (lsh code -6)) |
---|
| 36 | (logior #x80 (logand code #x3f)) |
---|
| 37 | )) |
---|
| 38 | ((<= code #xffff) (string (logior #xe0 (lsh code -12)) |
---|
| 39 | (logior #x80 (logand (lsh code -6) #x3f)) |
---|
| 40 | (logior #x80 (logand code #x3f)) |
---|
| 41 | )) |
---|
| 42 | ((<= code #x10ffff) (string (logior #xf0 1 (lsh code -18) |
---|
| 43 | (logior #x80 2 (logand (lsh code -12) #x3f)) |
---|
| 44 | (logior #x80 3 (logand (lsh code -6) #x3f)) |
---|
| 45 | (logior #x80 4 (logand code #x3f)) |
---|
| 46 | )) |
---|
| 47 | ) |
---|
| 48 | );cond |
---|
| 49 | ) |
---|
| 50 | |
---|
| 51 | (defun nt:rawcode-list-to-utf8str (l) |
---|
| 52 | "convert raw-code list to utf-8 string" |
---|
| 53 | (let ((s "")) |
---|
| 54 | (while l |
---|
| 55 | (setq s (concat s (nt:rawcode-to-utf8str (car l)))) |
---|
| 56 | (setq l (cdr l)) |
---|
| 57 | ) |
---|
| 58 | s |
---|
| 59 | ) |
---|
| 60 | ) |
---|
| 61 | |
---|
| 62 | (defun nt:utf8str-to-rawcode-list (s) |
---|
| 63 | "convert utf-8 string to raw-code list" |
---|
| 64 | (let ((result ()) |
---|
| 65 | (len (length s)) (i 0)) |
---|
| 66 | (while (< i len) |
---|
| 67 | (let ((c (aref s i)) |
---|
| 68 | (code -1)) |
---|
| 69 | (setq i (1+ i)) |
---|
| 70 | (setq code |
---|
| 71 | (cond |
---|
| 72 | ((zerop (logand c #x80)) ; 0xxxxxxx |
---|
| 73 | c) ; > 000000000xxxxxxx : 0000-007F |
---|
| 74 | ((= (logand c #xe0) #xc0) ; 110yyyyy 10xxxxxx |
---|
| 75 | (let ((t1 (aref s i))) ; > 00000yyyyyxxxxxx : 0080-07FF |
---|
| 76 | (setq i (1+ i)) |
---|
| 77 | (+ (lsh (logand #x1f c) 6) |
---|
| 78 | (logand #x3f t1)) |
---|
| 79 | )) |
---|
| 80 | ((= (logand c #xf0) #xe0) ; 1110zzzz 10yyyyyy 10xxxxxx |
---|
| 81 | (let ((t1 (aref s i)) ; > zzzzyyyyyyxxxxxx : 0800-FFFF |
---|
| 82 | (t2 (aref s (1+ i)))) |
---|
| 83 | (setq i (+ i 2)) |
---|
| 84 | (+ (lsh (logand #x0f c) 12) |
---|
| 85 | (lsh (logand #x3f t1) 6) |
---|
| 86 | (logand #x3f t2)) |
---|
| 87 | )) |
---|
| 88 | ((= (logand c #xf8) #xf0) ; 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx |
---|
| 89 | (let ((t1 (aref s i)) ; > 000uuuuuzzzzyyyyyyxxxxxx : 10000-10FFFF |
---|
| 90 | (t2 (aref s (1+ i))) |
---|
| 91 | (t3 (aref s (+ i 2)))) |
---|
| 92 | (setq i (+ i 3)) |
---|
| 93 | (+ (lsh (logand #x07 c) 16) |
---|
| 94 | (lsh (logand #x3f t1) 12) |
---|
| 95 | (lsh (logand #x3f t2) 6) |
---|
| 96 | (logand #x3f t3)) |
---|
| 97 | )) |
---|
| 98 | (t -1) |
---|
| 99 | )); code |
---|
| 100 | (setq result (cons code result)) |
---|
| 101 | );let |
---|
| 102 | ); wend |
---|
| 103 | (nreverse result) |
---|
| 104 | ); let |
---|
| 105 | ) |
---|
| 106 | |
---|
| 107 | ; |
---|
| 108 | ; applied codes |
---|
| 109 | ; |
---|
| 110 | (defun nt:mulestr-to-unicode-list (s) |
---|
| 111 | "convert any Emacs-string to a Unicode raw-code list" |
---|
| 112 | (nt:utf8str-to-rawcode-list (encode-coding-string s 'utf-8)) |
---|
| 113 | ) |
---|
| 114 | |
---|
| 115 | (defun nt:unicode-list-to-mulestr (l) |
---|
| 116 | "convert a Unicode raw-code list to an Emacs-string" |
---|
| 117 | (decode-coding-string (nt:rawcode-list-to-utf8str l) 'utf-8) |
---|
| 118 | ) |
---|
| 119 | |
---|
| 120 | ;;; nt-utf8.el ends here |
---|