1 | ;;; nt-utf8.el --- converter between utf-8 and rawcode |
---|
2 | ;;; some functions require Mule-UCS |
---|
3 | ;; |
---|
4 | ;; Copyright (C) 2005-2009 naoya_t. All Rights Reserved. |
---|
5 | ;; |
---|
6 | ;; Author: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
7 | ;; Maintainer: naoya_t <naoya.t@aqua.plala.or.jp> |
---|
8 | ;; Primary distribution site: |
---|
9 | ;; http://lambdarepos.svnrepository.com/svn/share/lang/elisp/pdicv-mode/trunk |
---|
10 | ;; |
---|
11 | ;; Created: 14 Feb 2005 |
---|
12 | ;; Last modified: 15 Dec 2005 |
---|
13 | ;; Version: 1.0 |
---|
14 | ;; Keywords: UTF-8 |
---|
15 | |
---|
16 | (provide 'nt-utf8) |
---|
17 | |
---|
18 | ;;; Commentaries: |
---|
19 | ; |
---|
20 | ;; core functions |
---|
21 | ; (nt:utf8-rawcode-to-utf8str CODE) |
---|
22 | ; (nt:utf8-rawcode-list-to-utf8str LIST) |
---|
23 | ; (nt:utf8-utf8str-to-rawcode-list STR) |
---|
24 | |
---|
25 | ;; applied functions |
---|
26 | ; (nt:utf8-mulestr-to-unicode-list STR) - requires Mule-UCS |
---|
27 | ; - Mule��ʸ����ʸ���������ʸ��icode�����ɥꥹ�Ȥ�Ѵ� |
---|
28 | ; (nt:utf8-unicode-list-to-mulestr LIST) - requires Mule-UCS |
---|
29 | ; - ʸ���������ʸ��icode�����ɥꥹ�Ȥ�ule��ʸ����� |
---|
30 | |
---|
31 | ;;; Code: |
---|
32 | (defun nt:rawcode-to-utf8str (code) |
---|
33 | "convert a raw-code to utf-8 string" |
---|
34 | (cond |
---|
35 | ((<= code #x007f) (string code)) |
---|
36 | ((<= code #x07ff) (string (logior #xc0 (lsh code -6)) |
---|
37 | (logior #x80 (logand code #x3f)) |
---|
38 | )) |
---|
39 | ((<= code #xffff) (string (logior #xe0 (lsh code -12)) |
---|
40 | (logior #x80 (logand (lsh code -6) #x3f)) |
---|
41 | (logior #x80 (logand code #x3f)) |
---|
42 | )) |
---|
43 | ((<= code #x10ffff) (string (logior #xf0 1 (lsh code -18) |
---|
44 | (logior #x80 2 (logand (lsh code -12) #x3f)) |
---|
45 | (logior #x80 3 (logand (lsh code -6) #x3f)) |
---|
46 | (logior #x80 4 (logand code #x3f)) |
---|
47 | )) |
---|
48 | ) |
---|
49 | );cond |
---|
50 | ) |
---|
51 | |
---|
52 | (defun nt:rawcode-list-to-utf8str (l) |
---|
53 | "convert raw-code list to utf-8 string" |
---|
54 | (let ((s "")) |
---|
55 | (while l |
---|
56 | (setq s (concat s (nt:rawcode-to-utf8str (car l)))) |
---|
57 | (setq l (cdr l)) |
---|
58 | ) |
---|
59 | s |
---|
60 | ) |
---|
61 | ) |
---|
62 | |
---|
63 | (defun nt:utf8str-to-rawcode-list (s) |
---|
64 | "convert utf-8 string to raw-code list" |
---|
65 | (let ((result ()) |
---|
66 | (len (length s)) (i 0)) |
---|
67 | (while (< i len) |
---|
68 | (let ((c (aref s i)) |
---|
69 | (code -1)) |
---|
70 | (setq i (1+ i)) |
---|
71 | (setq code |
---|
72 | (cond |
---|
73 | ((zerop (logand c #x80)) ; 0xxxxxxx |
---|
74 | c) ; > 000000000xxxxxxx : 0000-007F |
---|
75 | ((= (logand c #xe0) #xc0) ; 110yyyyy 10xxxxxx |
---|
76 | (let ((t1 (aref s i))) ; > 00000yyyyyxxxxxx : 0080-07FF |
---|
77 | (setq i (1+ i)) |
---|
78 | (+ (lsh (logand #x1f c) 6) |
---|
79 | (logand #x3f t1)) |
---|
80 | )) |
---|
81 | ((= (logand c #xf0) #xe0) ; 1110zzzz 10yyyyyy 10xxxxxx |
---|
82 | (let ((t1 (aref s i)) ; > zzzzyyyyyyxxxxxx : 0800-FFFF |
---|
83 | (t2 (aref s (1+ i)))) |
---|
84 | (setq i (+ i 2)) |
---|
85 | (+ (lsh (logand #x0f c) 12) |
---|
86 | (lsh (logand #x3f t1) 6) |
---|
87 | (logand #x3f t2)) |
---|
88 | )) |
---|
89 | ((= (logand c #xf8) #xf0) ; 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx |
---|
90 | (let ((t1 (aref s i)) ; > 000uuuuuzzzzyyyyyyxxxxxx : 10000-10FFFF |
---|
91 | (t2 (aref s (1+ i))) |
---|
92 | (t3 (aref s (+ i 2)))) |
---|
93 | (setq i (+ i 3)) |
---|
94 | (+ (lsh (logand #x07 c) 16) |
---|
95 | (lsh (logand #x3f t1) 12) |
---|
96 | (lsh (logand #x3f t2) 6) |
---|
97 | (logand #x3f t3)) |
---|
98 | )) |
---|
99 | (t -1) |
---|
100 | )); code |
---|
101 | (setq result (cons code result)) |
---|
102 | );let |
---|
103 | ); wend |
---|
104 | (nreverse result) |
---|
105 | ); let |
---|
106 | ) |
---|
107 | |
---|
108 | ; |
---|
109 | ; applied codes |
---|
110 | ; |
---|
111 | (defun nt:mulestr-to-unicode-list (s) |
---|
112 | "convert any Emacs-string to a Unicode raw-code list" |
---|
113 | (nt:utf8str-to-rawcode-list (encode-coding-string s 'utf-8)) |
---|
114 | ) |
---|
115 | |
---|
116 | (defun nt:unicode-list-to-mulestr (l) |
---|
117 | "convert a Unicode raw-code list to an Emacs-string" |
---|
118 | (decode-coding-string (nt:rawcode-list-to-utf8str l) 'utf-8) |
---|
119 | ) |
---|
120 | |
---|
121 | ;;; nt-utf8.el ends here |
---|