[67] | 1 | ;;; nt-utf8.el --- converter between utf-8 and rawcode |
---|
| 2 | ;;; some functions require Mule-UCS |
---|
| 3 | ;; |
---|
[71] | 4 | ;; Copyright (C) 2005-2009 naoya_t. All Rights Reserved. |
---|
[67] | 5 | ;; |
---|
[71] | 6 | ;; Author: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
| 7 | ;; Maintainer: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
| 8 | ;; Primary distribution site: |
---|
| 9 | ;; http://lambdarepos.svnrepository.com/svn/share/lang/elisp/pdicv-mode/trunk |
---|
[67] | 10 | ;; |
---|
| 11 | ;; Created: 14 Feb 2005 |
---|
| 12 | ;; Last modified: 15 Dec 2005 |
---|
| 13 | ;; Version: 1.0 |
---|
| 14 | ;; Keywords: UTF-8 |
---|
| 15 | |
---|
| 16 | (provide 'nt-utf8) |
---|
| 17 | |
---|
| 18 | ;;; Commentaries: |
---|
| 19 | ; |
---|
| 20 | ;; core functions |
---|
| 21 | ; (nt:utf8-rawcode-to-utf8str CODE) |
---|
| 22 | ; (nt:utf8-rawcode-list-to-utf8str LIST) |
---|
| 23 | ; (nt:utf8-utf8str-to-rawcode-list STR) |
---|
| 24 | |
---|
| 25 | ;; applied functions |
---|
| 26 | ; (nt:utf8-mulestr-to-unicode-list STR) - requires Mule-UCS |
---|
| 27 | ; - Mule��ʸ����ʸ���������ʸ��icode�����ɥꥹ�Ȥ�Ѵ� |
---|
| 28 | ; (nt:utf8-unicode-list-to-mulestr LIST) - requires Mule-UCS |
---|
| 29 | ; - ʸ���������ʸ��icode�����ɥꥹ�Ȥ�ule��ʸ����� |
---|
| 30 | |
---|
| 31 | ;;; Code: |
---|
| 32 | (defun nt:rawcode-to-utf8str (code) |
---|
| 33 | "convert a raw-code to utf-8 string" |
---|
| 34 | (cond |
---|
| 35 | ((<= code #x007f) (string code)) |
---|
| 36 | ((<= code #x07ff) (string (logior #xc0 (lsh code -6)) |
---|
| 37 | (logior #x80 (logand code #x3f)) |
---|
| 38 | )) |
---|
| 39 | ((<= code #xffff) (string (logior #xe0 (lsh code -12)) |
---|
| 40 | (logior #x80 (logand (lsh code -6) #x3f)) |
---|
| 41 | (logior #x80 (logand code #x3f)) |
---|
| 42 | )) |
---|
| 43 | ((<= code #x10ffff) (string (logior #xf0 1 (lsh code -18) |
---|
| 44 | (logior #x80 2 (logand (lsh code -12) #x3f)) |
---|
| 45 | (logior #x80 3 (logand (lsh code -6) #x3f)) |
---|
| 46 | (logior #x80 4 (logand code #x3f)) |
---|
| 47 | )) |
---|
| 48 | ) |
---|
| 49 | );cond |
---|
| 50 | ) |
---|
| 51 | |
---|
| 52 | (defun nt:rawcode-list-to-utf8str (l) |
---|
| 53 | "convert raw-code list to utf-8 string" |
---|
| 54 | (let ((s "")) |
---|
| 55 | (while l |
---|
| 56 | (setq s (concat s (nt:rawcode-to-utf8str (car l)))) |
---|
| 57 | (setq l (cdr l)) |
---|
| 58 | ) |
---|
| 59 | s |
---|
| 60 | ) |
---|
| 61 | ) |
---|
| 62 | |
---|
| 63 | (defun nt:utf8str-to-rawcode-list (s) |
---|
| 64 | "convert utf-8 string to raw-code list" |
---|
| 65 | (let ((result ()) |
---|
| 66 | (len (length s)) (i 0)) |
---|
| 67 | (while (< i len) |
---|
| 68 | (let ((c (aref s i)) |
---|
| 69 | (code -1)) |
---|
| 70 | (setq i (1+ i)) |
---|
| 71 | (setq code |
---|
| 72 | (cond |
---|
| 73 | ((zerop (logand c #x80)) ; 0xxxxxxx |
---|
| 74 | c) ; > 000000000xxxxxxx : 0000-007F |
---|
| 75 | ((= (logand c #xe0) #xc0) ; 110yyyyy 10xxxxxx |
---|
| 76 | (let ((t1 (aref s i))) ; > 00000yyyyyxxxxxx : 0080-07FF |
---|
| 77 | (setq i (1+ i)) |
---|
| 78 | (+ (lsh (logand #x1f c) 6) |
---|
| 79 | (logand #x3f t1)) |
---|
| 80 | )) |
---|
| 81 | ((= (logand c #xf0) #xe0) ; 1110zzzz 10yyyyyy 10xxxxxx |
---|
| 82 | (let ((t1 (aref s i)) ; > zzzzyyyyyyxxxxxx : 0800-FFFF |
---|
| 83 | (t2 (aref s (1+ i)))) |
---|
| 84 | (setq i (+ i 2)) |
---|
| 85 | (+ (lsh (logand #x0f c) 12) |
---|
| 86 | (lsh (logand #x3f t1) 6) |
---|
| 87 | (logand #x3f t2)) |
---|
| 88 | )) |
---|
| 89 | ((= (logand c #xf8) #xf0) ; 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx |
---|
| 90 | (let ((t1 (aref s i)) ; > 000uuuuuzzzzyyyyyyxxxxxx : 10000-10FFFF |
---|
| 91 | (t2 (aref s (1+ i))) |
---|
| 92 | (t3 (aref s (+ i 2)))) |
---|
| 93 | (setq i (+ i 3)) |
---|
| 94 | (+ (lsh (logand #x07 c) 16) |
---|
| 95 | (lsh (logand #x3f t1) 12) |
---|
| 96 | (lsh (logand #x3f t2) 6) |
---|
| 97 | (logand #x3f t3)) |
---|
| 98 | )) |
---|
| 99 | (t -1) |
---|
| 100 | )); code |
---|
| 101 | (setq result (cons code result)) |
---|
| 102 | );let |
---|
| 103 | ); wend |
---|
| 104 | (nreverse result) |
---|
| 105 | ); let |
---|
| 106 | ) |
---|
| 107 | |
---|
| 108 | ; |
---|
| 109 | ; applied codes |
---|
| 110 | ; |
---|
| 111 | (defun nt:mulestr-to-unicode-list (s) |
---|
| 112 | "convert any Emacs-string to a Unicode raw-code list" |
---|
| 113 | (nt:utf8str-to-rawcode-list (encode-coding-string s 'utf-8)) |
---|
| 114 | ) |
---|
| 115 | |
---|
| 116 | (defun nt:unicode-list-to-mulestr (l) |
---|
| 117 | "convert a Unicode raw-code list to an Emacs-string" |
---|
| 118 | (decode-coding-string (nt:rawcode-list-to-utf8str l) 'utf-8) |
---|
| 119 | ) |
---|
| 120 | |
---|
| 121 | ;;; nt-utf8.el ends here |
---|