rev |
line source |
yuuji@0
|
1 /* ========================================================================
|
yuuji@0
|
2 * Copyright 1988-2007 University of Washington
|
yuuji@0
|
3 *
|
yuuji@0
|
4 * Licensed under the Apache License, Version 2.0 (the "License");
|
yuuji@0
|
5 * you may not use this file except in compliance with the License.
|
yuuji@0
|
6 * You may obtain a copy of the License at
|
yuuji@0
|
7 *
|
yuuji@0
|
8 * http://www.apache.org/licenses/LICENSE-2.0
|
yuuji@0
|
9 *
|
yuuji@0
|
10 *
|
yuuji@0
|
11 * ========================================================================
|
yuuji@0
|
12 */
|
yuuji@0
|
13
|
yuuji@0
|
14 /*
|
yuuji@0
|
15 * Program: UTF-8 auxillary routines (c-client and MIME2 support)
|
yuuji@0
|
16 *
|
yuuji@0
|
17 * Author: Mark Crispin
|
yuuji@0
|
18 * Networks and Distributed Computing
|
yuuji@0
|
19 * Computing & Communications
|
yuuji@0
|
20 * University of Washington
|
yuuji@0
|
21 * Administration Building, AG-44
|
yuuji@0
|
22 * Seattle, WA 98195
|
yuuji@0
|
23 * Internet: MRC@CAC.Washington.EDU
|
yuuji@0
|
24 *
|
yuuji@0
|
25 * Date: 11 June 1997
|
yuuji@0
|
26 * Last Edited: 12 October 2007
|
yuuji@0
|
27 */
|
yuuji@0
|
28
|
yuuji@0
|
29
|
yuuji@0
|
30 #include <stdio.h>
|
yuuji@0
|
31 #include <ctype.h>
|
yuuji@0
|
32 #include "c-client.h"
|
yuuji@0
|
33
|
yuuji@0
|
34 /* Convert charset labelled stringlist to UTF-8 in place
|
yuuji@0
|
35 * Accepts: string list
|
yuuji@0
|
36 * charset
|
yuuji@0
|
37 */
|
yuuji@0
|
38
|
yuuji@0
|
39 static void utf8_stringlist (STRINGLIST *st,char *charset)
|
yuuji@0
|
40 {
|
yuuji@0
|
41 SIZEDTEXT txt;
|
yuuji@0
|
42 /* convert entire stringstruct */
|
yuuji@0
|
43 if (st) do if (utf8_text (&st->text,charset,&txt,U8T_CANONICAL)) {
|
yuuji@0
|
44 fs_give ((void **) &st->text.data);
|
yuuji@0
|
45 st->text.data = txt.data; /* transfer this text */
|
yuuji@0
|
46 st->text.size = txt.size;
|
yuuji@0
|
47 } while (st = st->next);
|
yuuji@0
|
48 }
|
yuuji@0
|
49
|
yuuji@0
|
50
|
yuuji@0
|
51 /* Convert charset labelled searchpgm to UTF-8 in place
|
yuuji@0
|
52 * Accepts: search program
|
yuuji@0
|
53 * charset
|
yuuji@0
|
54 */
|
yuuji@0
|
55
|
yuuji@0
|
56 void utf8_searchpgm (SEARCHPGM *pgm,char *charset)
|
yuuji@0
|
57 {
|
yuuji@0
|
58 SIZEDTEXT txt;
|
yuuji@0
|
59 SEARCHHEADER *hl;
|
yuuji@0
|
60 SEARCHOR *ol;
|
yuuji@0
|
61 SEARCHPGMLIST *pl;
|
yuuji@0
|
62 if (pgm) { /* must have a search program */
|
yuuji@0
|
63 utf8_stringlist (pgm->bcc,charset);
|
yuuji@0
|
64 utf8_stringlist (pgm->cc,charset);
|
yuuji@0
|
65 utf8_stringlist (pgm->from,charset);
|
yuuji@0
|
66 utf8_stringlist (pgm->to,charset);
|
yuuji@0
|
67 utf8_stringlist (pgm->subject,charset);
|
yuuji@0
|
68 for (hl = pgm->header; hl; hl = hl->next) {
|
yuuji@0
|
69 if (utf8_text (&hl->line,charset,&txt,U8T_CANONICAL)) {
|
yuuji@0
|
70 fs_give ((void **) &hl->line.data);
|
yuuji@0
|
71 hl->line.data = txt.data;
|
yuuji@0
|
72 hl->line.size = txt.size;
|
yuuji@0
|
73 }
|
yuuji@0
|
74 if (utf8_text (&hl->text,charset,&txt,U8T_CANONICAL)) {
|
yuuji@0
|
75 fs_give ((void **) &hl->text.data);
|
yuuji@0
|
76 hl->text.data = txt.data;
|
yuuji@0
|
77 hl->text.size = txt.size;
|
yuuji@0
|
78 }
|
yuuji@0
|
79 }
|
yuuji@0
|
80 utf8_stringlist (pgm->body,charset);
|
yuuji@0
|
81 utf8_stringlist (pgm->text,charset);
|
yuuji@0
|
82 for (ol = pgm->or; ol; ol = ol->next) {
|
yuuji@0
|
83 utf8_searchpgm (ol->first,charset);
|
yuuji@0
|
84 utf8_searchpgm (ol->second,charset);
|
yuuji@0
|
85 }
|
yuuji@0
|
86 for (pl = pgm->not; pl; pl = pl->next) utf8_searchpgm (pl->pgm,charset);
|
yuuji@0
|
87 utf8_stringlist (pgm->return_path,charset);
|
yuuji@0
|
88 utf8_stringlist (pgm->sender,charset);
|
yuuji@0
|
89 utf8_stringlist (pgm->reply_to,charset);
|
yuuji@0
|
90 utf8_stringlist (pgm->in_reply_to,charset);
|
yuuji@0
|
91 utf8_stringlist (pgm->message_id,charset);
|
yuuji@0
|
92 utf8_stringlist (pgm->newsgroups,charset);
|
yuuji@0
|
93 utf8_stringlist (pgm->followup_to,charset);
|
yuuji@0
|
94 utf8_stringlist (pgm->references,charset);
|
yuuji@0
|
95 }
|
yuuji@0
|
96 }
|
yuuji@0
|
97
|
yuuji@0
|
98 /* Convert MIME-2 sized text to UTF-8
|
yuuji@0
|
99 * Accepts: source sized text
|
yuuji@0
|
100 * charset
|
yuuji@0
|
101 * flags (same as utf8_text())
|
yuuji@0
|
102 * Returns: T if successful, NIL if failure
|
yuuji@0
|
103 */
|
yuuji@0
|
104
|
yuuji@0
|
105 #define MINENCWORD 9
|
yuuji@0
|
106 #define MAXENCWORD 75
|
yuuji@0
|
107
|
yuuji@0
|
108 /* This resizing algorithm is stupid, but hopefully it should never be triggered
|
yuuji@0
|
109 * except for a pathological header. The main concern is that we don't get a
|
yuuji@0
|
110 * buffer overflow.
|
yuuji@0
|
111 */
|
yuuji@0
|
112
|
yuuji@0
|
113 #define DSIZE 65536 /* real headers should never be this big */
|
yuuji@0
|
114 #define FUZZ 10 /* paranoia fuzz */
|
yuuji@0
|
115
|
yuuji@0
|
116 long utf8_mime2text (SIZEDTEXT *src,SIZEDTEXT *dst,long flags)
|
yuuji@0
|
117 {
|
yuuji@0
|
118 unsigned char *s,*se,*e,*ee,*t,*te;
|
yuuji@0
|
119 char *cs,*ce,*ls;
|
yuuji@0
|
120 SIZEDTEXT txt,rtxt;
|
yuuji@0
|
121 unsigned long i;
|
yuuji@0
|
122 size_t dsize = min (DSIZE,((src->size / 4) + 1) * 9);
|
yuuji@0
|
123 /* always create buffer if canonicalizing */
|
yuuji@0
|
124 dst->data = (flags & U8T_CANONICAL) ?
|
yuuji@0
|
125 (unsigned char *) fs_get ((size_t) dsize) : NIL;
|
yuuji@0
|
126 dst->size = 0; /* nothing written yet */
|
yuuji@0
|
127 /* look for encoded words */
|
yuuji@0
|
128 for (s = src->data, se = src->data + src->size; s < se; s++) {
|
yuuji@0
|
129 if (((se - s) > MINENCWORD) && (*s == '=') && (s[1] == '?') &&
|
yuuji@0
|
130 (cs = (char *) mime2_token (s+2,se,(unsigned char **) &ce)) &&
|
yuuji@0
|
131 (e = mime2_token ((unsigned char *) ce+1,se,&ee)) &&
|
yuuji@0
|
132 (te = mime2_text (t = e+2,se)) && (ee == e + 1) &&
|
yuuji@0
|
133 ((te - s) < MAXENCWORD)) {
|
yuuji@0
|
134 if (mime2_decode (e,t,te,&txt)) {
|
yuuji@0
|
135 *ce = '\0'; /* temporarily tie off charset */
|
yuuji@0
|
136 if (ls = strchr (cs,'*')) *ls = '\0';
|
yuuji@0
|
137 /* convert to UTF-8 as best we can */
|
yuuji@0
|
138 if (!utf8_text (&txt,cs,&rtxt,flags)) utf8_text (&txt,NIL,&rtxt,flags);
|
yuuji@0
|
139 if (dst->data) { /* make sure existing buffer fits */
|
yuuji@0
|
140 while (dsize <= (dst->size + rtxt.size + FUZZ)) {
|
yuuji@0
|
141 dsize += DSIZE; /* kick it up */
|
yuuji@0
|
142 fs_resize ((void **) &dst->data,dsize);
|
yuuji@0
|
143 }
|
yuuji@0
|
144 }
|
yuuji@0
|
145 else { /* make a new buffer */
|
yuuji@0
|
146 while (dsize <= (dst->size + rtxt.size)) dsize += DSIZE;
|
yuuji@0
|
147 memcpy (dst->data = (unsigned char *) fs_get (dsize),src->data,
|
yuuji@0
|
148 dst->size = s - src->data);
|
yuuji@0
|
149 }
|
yuuji@0
|
150 for (i = 0; i < rtxt.size; i++) dst->data[dst->size++] = rtxt.data[i];
|
yuuji@0
|
151
|
yuuji@0
|
152 /* all done with converted text */
|
yuuji@0
|
153 if (rtxt.data != txt.data) fs_give ((void **) &rtxt.data);
|
yuuji@0
|
154 if (ls) *ls = '*'; /* restore language tag delimiter */
|
yuuji@0
|
155 *ce = '?'; /* restore charset delimiter */
|
yuuji@0
|
156 /* all done with decoded text */
|
yuuji@0
|
157 fs_give ((void **) &txt.data);
|
yuuji@0
|
158 s = te+1; /* continue scan after encoded word */
|
yuuji@0
|
159 /* skip leading whitespace */
|
yuuji@0
|
160 for (t = s + 1; (t < se) && ((*t == ' ') || (*t == '\t')); t++);
|
yuuji@0
|
161 /* see if likely continuation encoded word */
|
yuuji@0
|
162 if (t < (se - MINENCWORD)) switch (*t) {
|
yuuji@0
|
163 case '=': /* possible encoded word? */
|
yuuji@0
|
164 if (t[1] == '?') s = t - 1;
|
yuuji@0
|
165 break;
|
yuuji@0
|
166 case '\015': /* CR, eat a following LF */
|
yuuji@0
|
167 if (t[1] == '\012') t++;
|
yuuji@0
|
168 case '\012': /* possible end of logical line */
|
yuuji@0
|
169 if ((t[1] == ' ') || (t[1] == '\t')) {
|
yuuji@0
|
170 do t++;
|
yuuji@0
|
171 while ((t < (se - MINENCWORD)) && ((t[1] == ' ')||(t[1] == '\t')));
|
yuuji@0
|
172 if ((t < (se - MINENCWORD)) && (t[1] == '=') && (t[2] == '?'))
|
yuuji@0
|
173 s = t; /* definitely looks like continuation */
|
yuuji@0
|
174 }
|
yuuji@0
|
175 }
|
yuuji@0
|
176 }
|
yuuji@0
|
177 else { /* restore original text */
|
yuuji@0
|
178 if (dst->data) fs_give ((void **) &dst->data);
|
yuuji@0
|
179 dst->data = src->data;
|
yuuji@0
|
180 dst->size = src->size;
|
yuuji@0
|
181 return NIL; /* syntax error: MIME-2 decoding failure */
|
yuuji@0
|
182 }
|
yuuji@0
|
183 }
|
yuuji@0
|
184 else do if (dst->data) { /* stash ASCII characters until LWSP */
|
yuuji@0
|
185 if (dsize < (dst->size + FUZZ)) {
|
yuuji@0
|
186 dsize += DSIZE; /* kick it up */
|
yuuji@0
|
187 fs_resize ((void **) &dst->data,dsize);
|
yuuji@0
|
188 }
|
yuuji@0
|
189 /* kludge: assumes ASCII doesn't decompose and titlecases to one byte */
|
yuuji@0
|
190 dst->data[dst->size++] = (flags & U8T_CASECANON) ?
|
yuuji@0
|
191 (unsigned char) ucs4_titlecase (*s) : *s;
|
yuuji@0
|
192 }
|
yuuji@0
|
193 while ((*s != ' ') && (*s != '\t') && (*s != '\015') && (*s != '\012') &&
|
yuuji@0
|
194 (++s < se));
|
yuuji@0
|
195 }
|
yuuji@0
|
196 if (dst->data) dst->data[dst->size] = '\0';
|
yuuji@0
|
197 else { /* nothing converted, return identity */
|
yuuji@0
|
198 dst->data = src->data;
|
yuuji@0
|
199 dst->size = src->size;
|
yuuji@0
|
200 }
|
yuuji@0
|
201 return T; /* success */
|
yuuji@0
|
202 }
|
yuuji@0
|
203
|
yuuji@0
|
204 /* Decode MIME-2 text
|
yuuji@0
|
205 * Accepts: Encoding
|
yuuji@0
|
206 * text
|
yuuji@0
|
207 * text end
|
yuuji@0
|
208 * destination sized text
|
yuuji@0
|
209 * Returns: T if successful, else NIL
|
yuuji@0
|
210 */
|
yuuji@0
|
211
|
yuuji@0
|
212 long mime2_decode (unsigned char *e,unsigned char *t,unsigned char *te,
|
yuuji@0
|
213 SIZEDTEXT *txt)
|
yuuji@0
|
214 {
|
yuuji@0
|
215 unsigned char *q;
|
yuuji@0
|
216 txt->data = NIL; /* initially no returned data */
|
yuuji@0
|
217 switch (*e) { /* dispatch based upon encoding */
|
yuuji@0
|
218 case 'Q': case 'q': /* sort-of QUOTED-PRINTABLE */
|
yuuji@0
|
219 txt->data = (unsigned char *) fs_get ((size_t) (te - t) + 1);
|
yuuji@0
|
220 for (q = t,txt->size = 0; q < te; q++) switch (*q) {
|
yuuji@0
|
221 case '=': /* quoted character */
|
yuuji@0
|
222 /* both must be hex */
|
yuuji@0
|
223 if (!isxdigit (q[1]) || !isxdigit (q[2])) {
|
yuuji@0
|
224 fs_give ((void **) &txt->data);
|
yuuji@0
|
225 return NIL; /* syntax error: bad quoted character */
|
yuuji@0
|
226 }
|
yuuji@0
|
227 /* assemble character */
|
yuuji@0
|
228 txt->data[txt->size++] = hex2byte (q[1],q[2]);
|
yuuji@0
|
229 q += 2; /* advance past quoted character */
|
yuuji@0
|
230 break;
|
yuuji@0
|
231 case '_': /* convert to space */
|
yuuji@0
|
232 txt->data[txt->size++] = ' ';
|
yuuji@0
|
233 break;
|
yuuji@0
|
234 default: /* ordinary character */
|
yuuji@0
|
235 txt->data[txt->size++] = *q;
|
yuuji@0
|
236 break;
|
yuuji@0
|
237 }
|
yuuji@0
|
238 txt->data[txt->size] = '\0';
|
yuuji@0
|
239 break;
|
yuuji@0
|
240 case 'B': case 'b': /* BASE64 */
|
yuuji@0
|
241 if (txt->data = (unsigned char *) rfc822_base64 (t,te - t,&txt->size))
|
yuuji@0
|
242 break;
|
yuuji@0
|
243 default: /* any other encoding is unknown */
|
yuuji@0
|
244 return NIL; /* syntax error: unknown encoding */
|
yuuji@0
|
245 }
|
yuuji@0
|
246 return T;
|
yuuji@0
|
247 }
|
yuuji@0
|
248
|
yuuji@0
|
249 /* Get MIME-2 token from encoded word
|
yuuji@0
|
250 * Accepts: current text pointer
|
yuuji@0
|
251 * text limit pointer
|
yuuji@0
|
252 * pointer to returned end pointer
|
yuuji@0
|
253 * Returns: current text pointer & end pointer if success, else NIL
|
yuuji@0
|
254 */
|
yuuji@0
|
255
|
yuuji@0
|
256 unsigned char *mime2_token (unsigned char *s,unsigned char *se,
|
yuuji@0
|
257 unsigned char **t)
|
yuuji@0
|
258 {
|
yuuji@0
|
259 for (*t = s; **t != '?'; ++*t) {
|
yuuji@0
|
260 if ((*t < se) && isgraph (**t)) switch (**t) {
|
yuuji@0
|
261 case '(': case ')': case '<': case '>': case '@': case ',': case ';':
|
yuuji@0
|
262 case ':': case '\\': case '"': case '/': case '[': case ']': case '.':
|
yuuji@0
|
263 case '=':
|
yuuji@0
|
264 return NIL; /* none of these are valid in tokens */
|
yuuji@0
|
265 }
|
yuuji@0
|
266 else return NIL; /* out of text or CTL or space */
|
yuuji@0
|
267 }
|
yuuji@0
|
268 return s;
|
yuuji@0
|
269 }
|
yuuji@0
|
270
|
yuuji@0
|
271
|
yuuji@0
|
272 /* Get MIME-2 text from encoded word
|
yuuji@0
|
273 * Accepts: current text pointer
|
yuuji@0
|
274 * text limit pointer
|
yuuji@0
|
275 * pointer to returned end pointer
|
yuuji@0
|
276 * Returns: end pointer if success, else NIL
|
yuuji@0
|
277 */
|
yuuji@0
|
278
|
yuuji@0
|
279 unsigned char *mime2_text (unsigned char *s,unsigned char *se)
|
yuuji@0
|
280 {
|
yuuji@0
|
281 unsigned char *t = se - 1;
|
yuuji@0
|
282 /* search for closing ?, make sure valid */
|
yuuji@0
|
283 while ((s < t) && (*s != '?') && isgraph (*s++));
|
yuuji@0
|
284 return ((s < t) && (*s == '?') && (s[1] == '=') &&
|
yuuji@0
|
285 ((se == (s + 2)) || (s[2] == ' ') || (s[2] == '\t') ||
|
yuuji@0
|
286 (s[2] == '\015') || (s[2] == '\012'))) ? s : NIL;
|
yuuji@0
|
287 }
|
yuuji@0
|
288
|
yuuji@0
|
289 /* Convert UTF-16 string to Modified Base64
|
yuuji@0
|
290 * Accepts: destination pointer
|
yuuji@0
|
291 * source string
|
yuuji@0
|
292 * source length in octets
|
yuuji@0
|
293 * Returns: updated destination pointer
|
yuuji@0
|
294 */
|
yuuji@0
|
295
|
yuuji@0
|
296 static unsigned char *utf16_to_mbase64 (unsigned char *t,unsigned char *s,
|
yuuji@0
|
297 size_t i)
|
yuuji@0
|
298 {
|
yuuji@0
|
299 char *v = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
|
yuuji@0
|
300 *t++ = '&'; /* write shift-in */
|
yuuji@0
|
301 while (i >= 3) { /* process tuplets */
|
yuuji@0
|
302 *t++ = v[s[0] >> 2]; /* byte 1: high 6 bits (1) */
|
yuuji@0
|
303 /* byte 2: low 2 bits (1), high 4 bits (2) */
|
yuuji@0
|
304 *t++ = v[((s[0] << 4) + (s[1] >> 4)) & 0x3f];
|
yuuji@0
|
305 /* byte 3: low 4 bits (2), high 2 bits (3) */
|
yuuji@0
|
306 *t++ = v[((s[1] << 2) + (s[2] >> 6)) & 0x3f];
|
yuuji@0
|
307 *t++ = v[s[2] & 0x3f]; /* byte 4: low 6 bits (3) */
|
yuuji@0
|
308 s += 3;
|
yuuji@0
|
309 i -= 3;
|
yuuji@0
|
310 }
|
yuuji@0
|
311 if (i) {
|
yuuji@0
|
312 *t++ = v[s[0] >> 2]; /* byte 1: high 6 bits (1) */
|
yuuji@0
|
313 /* byte 2: low 2 bits (1), high 4 bits (2) */
|
yuuji@0
|
314 *t++ = v[((s[0] << 4) + (--i ? (s[1] >> 4) : 0)) & 0x3f];
|
yuuji@0
|
315 /* byte 3: low 4 bits (2) */
|
yuuji@0
|
316 if (i) *t++ = v[(s[1] << 2) & 0x3f];
|
yuuji@0
|
317 }
|
yuuji@0
|
318 *t++ = '-'; /* write shift-out */
|
yuuji@0
|
319 return t;
|
yuuji@0
|
320 }
|
yuuji@0
|
321
|
yuuji@0
|
322
|
yuuji@0
|
323 /* Poot a UTF-16 value to a buffer
|
yuuji@0
|
324 * Accepts: buffer pointer
|
yuuji@0
|
325 * value
|
yuuji@0
|
326 * Returns: updated pointer
|
yuuji@0
|
327 */
|
yuuji@0
|
328
|
yuuji@0
|
329 static unsigned char *utf16_poot (unsigned char *s,unsigned long c)
|
yuuji@0
|
330 {
|
yuuji@0
|
331 *s++ = (unsigned char) (c >> 8);
|
yuuji@0
|
332 *s++ = (unsigned char) (c & 0xff);
|
yuuji@0
|
333 return s;
|
yuuji@0
|
334 }
|
yuuji@0
|
335
|
yuuji@0
|
336 /* Convert UTF-8 to Modified UTF-7
|
yuuji@0
|
337 * Accepts: UTF-8 string
|
yuuji@0
|
338 * Returns: Modified UTF-7 string on success, NIL if invalid UTF-8
|
yuuji@0
|
339 */
|
yuuji@0
|
340
|
yuuji@0
|
341 #define MAXUNIUTF8 4 /* maximum length of Unicode UTF-8 sequence */
|
yuuji@0
|
342
|
yuuji@0
|
343 unsigned char *utf8_to_mutf7 (unsigned char *src)
|
yuuji@0
|
344 {
|
yuuji@0
|
345 unsigned char *u16buf,*utf16;
|
yuuji@0
|
346 unsigned char *ret,*t;
|
yuuji@0
|
347 unsigned long j,c;
|
yuuji@0
|
348 unsigned char *s = src;
|
yuuji@0
|
349 unsigned long i = 0;
|
yuuji@0
|
350 int nonascii = 0;
|
yuuji@0
|
351 while (*s) { /* pass one: count destination octets */
|
yuuji@0
|
352 if (*s & 0x80) { /* non-ASCII character? */
|
yuuji@0
|
353 j = MAXUNIUTF8; /* get single UCS-4 codepoint */
|
yuuji@0
|
354 if ((c = utf8_get (&s,&j)) & U8G_ERROR) return NIL;
|
yuuji@0
|
355 /* tally number of UTF-16 octets */
|
yuuji@0
|
356 nonascii += (c & U8GM_NONBMP) ? 4 : 2;
|
yuuji@0
|
357 }
|
yuuji@0
|
358 else { /* ASCII character */
|
yuuji@0
|
359 if (nonascii) { /* add pending Modified BASE64 size + shifts */
|
yuuji@0
|
360 i += ((nonascii / 3) * 4) + ((j = nonascii % 3) ? j + 1 : 0) + 2;
|
yuuji@0
|
361 nonascii = 0; /* back to ASCII */
|
yuuji@0
|
362 }
|
yuuji@0
|
363 if (*s == '&') i += 2; /* two octets if the escape */
|
yuuji@0
|
364 else ++i; /* otherwise just count another octet */
|
yuuji@0
|
365 ++s; /* advance to next source octet */
|
yuuji@0
|
366 }
|
yuuji@0
|
367 }
|
yuuji@0
|
368 if (nonascii) /* add pending Modified BASE64 size + shifts */
|
yuuji@0
|
369 i += ((nonascii / 3) * 4) + ((j = nonascii % 3) ? j + 1 : 0) + 2;
|
yuuji@0
|
370
|
yuuji@0
|
371 /* create return buffer */
|
yuuji@0
|
372 t = ret = (unsigned char *) fs_get (i + 1);
|
yuuji@0
|
373 /* and scratch buffer */
|
yuuji@0
|
374 utf16 = u16buf = (unsigned char *) fs_get (i + 1);
|
yuuji@0
|
375 for (s = src; *s;) { /* pass two: copy destination octets */
|
yuuji@0
|
376 if (*s & 0x80) { /* non-ASCII character? */
|
yuuji@0
|
377 j = MAXUNIUTF8; /* get single UCS-4 codepoint */
|
yuuji@0
|
378 if ((c = utf8_get (&s,&j)) & U8G_ERROR) return NIL;
|
yuuji@0
|
379 if (c & U8GM_NONBMP) { /* non-BMP? */
|
yuuji@0
|
380 c -= UTF16_BASE; /* yes, convert to surrogate */
|
yuuji@0
|
381 utf16 = utf16_poot (utf16_poot (utf16,(c >> UTF16_SHIFT)+UTF16_SURRH),
|
yuuji@0
|
382 (c & UTF16_MASK) + UTF16_SURRL);
|
yuuji@0
|
383 }
|
yuuji@0
|
384 else utf16 = utf16_poot (utf16,c);
|
yuuji@0
|
385 }
|
yuuji@0
|
386 else { /* ASCII character */
|
yuuji@0
|
387 if (utf16 != u16buf) { /* add pending Modified BASE64 size + shifts */
|
yuuji@0
|
388 t = utf16_to_mbase64 (t,u16buf,utf16 - u16buf);
|
yuuji@0
|
389 utf16 = u16buf; /* reset buffer */
|
yuuji@0
|
390 }
|
yuuji@0
|
391 *t++ = *s; /* copy the character */
|
yuuji@0
|
392 if (*s == '&') *t++ = '-';/* special sequence if the escape */
|
yuuji@0
|
393 ++s; /* advance to next source octet */
|
yuuji@0
|
394 }
|
yuuji@0
|
395 }
|
yuuji@0
|
396 /* add pending Modified BASE64 size + shifts */
|
yuuji@0
|
397 if (utf16 != u16buf) t = utf16_to_mbase64 (t,u16buf,utf16 - u16buf);
|
yuuji@0
|
398 *t = '\0'; /* tie off destination */
|
yuuji@0
|
399 if (i != (t - ret)) fatal ("utf8_to_mutf7 botch");
|
yuuji@0
|
400 fs_give ((void **) &u16buf);
|
yuuji@0
|
401 return ret;
|
yuuji@0
|
402 }
|
yuuji@0
|
403
|
yuuji@0
|
404 /* Convert Modified UTF-7 to UTF-8
|
yuuji@0
|
405 * Accepts: Modified UTF-7 string
|
yuuji@0
|
406 * Returns: UTF-8 string on success, NIL if invalid Modified UTF-7
|
yuuji@0
|
407 */
|
yuuji@0
|
408
|
yuuji@0
|
409 unsigned char *utf8_from_mutf7 (unsigned char *src)
|
yuuji@0
|
410 {
|
yuuji@0
|
411 SIZEDTEXT utf8,utf7;
|
yuuji@0
|
412 unsigned char *s;
|
yuuji@0
|
413 int mbase64 = 0;
|
yuuji@0
|
414 /* disallow bogus strings */
|
yuuji@0
|
415 if (mail_utf7_valid (src)) return NIL;
|
yuuji@0
|
416 /* initialize SIZEDTEXTs */
|
yuuji@0
|
417 memset (&utf7,0,sizeof (SIZEDTEXT));
|
yuuji@0
|
418 memset (&utf8,0,sizeof (SIZEDTEXT));
|
yuuji@0
|
419 /* make copy of source */
|
yuuji@0
|
420 for (s = cpytxt (&utf7,src,strlen (src)); *s; ++s) switch (*s) {
|
yuuji@0
|
421 case '&': /* Modified UTF-7 uses & instead of + */
|
yuuji@0
|
422 *s = '+';
|
yuuji@0
|
423 mbase64 = T; /* note that we are in Modified BASE64 */
|
yuuji@0
|
424 break;
|
yuuji@0
|
425 case '+': /* temporarily swap text + to & */
|
yuuji@0
|
426 if (!mbase64) *s = '&';
|
yuuji@0
|
427 break;
|
yuuji@0
|
428 case '-': /* shift back to ASCII */
|
yuuji@0
|
429 mbase64 = NIL;
|
yuuji@0
|
430 break;
|
yuuji@0
|
431 case ',': /* Modified UTF-7 uses , instead of / ... */
|
yuuji@0
|
432 if (mbase64) *s = '/'; /* ...in Modified BASE64 */
|
yuuji@0
|
433 break;
|
yuuji@0
|
434 }
|
yuuji@0
|
435 /* do the conversion */
|
yuuji@0
|
436 utf8_text_utf7 (&utf7,&utf8,NIL,NIL);
|
yuuji@0
|
437 /* no longer need copy of source */
|
yuuji@0
|
438 fs_give ((void **) &utf7.data);
|
yuuji@0
|
439 /* post-process: switch & and + */
|
yuuji@0
|
440 for (s = utf8.data; *s; ++s) switch (*s) {
|
yuuji@0
|
441 case '&':
|
yuuji@0
|
442 *s = '+';
|
yuuji@0
|
443 break;
|
yuuji@0
|
444 case '+':
|
yuuji@0
|
445 *s = '&';
|
yuuji@0
|
446 break;
|
yuuji@0
|
447 }
|
yuuji@0
|
448 return utf8.data;
|
yuuji@0
|
449 }
|