aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Neukirchen <chneukirchen@gmail.com>2016-10-14 22:20:11 +0200
committerChristian Neukirchen <chneukirchen@gmail.com>2016-10-14 22:20:11 +0200
commit27915af60dd2cde02068268bbb32574527dc715c (patch)
tree4426bfaab3a61c87845721b9a0db41b0a81b94ec
parent631b6c7c826477ebc3b8b59a2a77df8592e0c627 (diff)
downloadmblaze-27915af60dd2cde02068268bbb32574527dc715c.tar.gz
rfc2047: detect partial multibyte sequences and decode them correctly
-rw-r--r--rfc2047.c58
1 files changed, 49 insertions, 9 deletions
diff --git a/rfc2047.c b/rfc2047.c
index 8462d25..b9c41ab 100644
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -126,6 +126,7 @@ int
blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
{
iconv_t ic = (iconv_t)-1;
+ char *srcenc = 0;
char *b = src;
@@ -134,11 +135,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
if (!s)
goto nocodeok;
+ // keep track of partial multibyte sequences
+ char *partial = 0;
+ size_t partiallen = 0;
+
do {
char *t;
t = b;
while (t < s) // strip space-only inbetween encoded words
if (!isfws(*t++)) {
+ if (partial) // mixed up encodings
+ goto nocode;
while (b < s && dlen) {
*dst++ = *b++;
dlen--;
@@ -156,7 +163,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
goto nocode;
*e = 0;
- ic = iconv_open(tgtenc, s);
+ if (!srcenc || strcmp(srcenc, s) != 0) {
+ if (partial) // mixed up encodings
+ goto nocode;
+ free(srcenc);
+ srcenc = strdup(s);
+ if (!srcenc)
+ goto nocode;
+ if (ic != (iconv_t)-1)
+ iconv_close(ic);
+ ic = iconv_open(tgtenc, srcenc);
+ }
*e = '?';
e++;
@@ -180,23 +197,38 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
else
goto nocode;
+ if (partial) {
+ dec = realloc(dec, declen + partiallen);
+ if (!dec)
+ goto nocode;
+ memmove(dec + partiallen, dec, declen);
+ memcpy(dec, partial, partiallen);
+ declen += partiallen;
+ free(partial);
+ partial = 0;
+ partiallen = 0;
+ }
+
decchunk = dec;
int r = iconv(ic, &dec, &declen, &dst, &dlen);
if (r < 0) {
if (errno == E2BIG) {
- iconv_close(ic);
break;
- } else if (errno == EILSEQ || errno == EINVAL) {
+ } else if (errno == EILSEQ) {
goto nocode;
+ } else if (errno == EINVAL) {
+ partial = malloc(declen);
+ if (!partial)
+ goto nocode;
+ memcpy(partial, dec, declen);
+ partiallen = declen;
} else {
perror("iconv");
goto nocode;
}
}
- iconv_close(ic);
-
- while (declen && dlen) {
+ while (!partial && declen && dlen) {
*dst++ = *dec++;
declen--;
dlen--;
@@ -214,13 +246,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
*dst = 0;
+ if (ic != (iconv_t)-1)
+ iconv_close(ic);
+ free(srcenc);
+
return 1;
nocode:
+ fprintf(stderr, "error decoding rfc2047\n");
if (ic != (iconv_t)-1)
iconv_close(ic);
-
- fprintf(stderr, "error decoding rfc2047\n");
+ free(srcenc);
nocodeok:
while (*src && dlen) {
*dst++ = *src++;
@@ -257,6 +293,10 @@ main() {
char test4dec[255];
blaze822_decode_rfc2047(test4dec, test4, sizeof test4dec, "UTF-8");
printf("%s\n", test4dec);
-
+
+ char test5[] = "=?UTF-8?Q?z=E2=80?= =?UTF-8?Q?=99z?=";
+ char test5dec[255];
+ blaze822_decode_rfc2047(test5dec, test5, sizeof test5dec, "UTF-8");
+ printf("%s\n", test5dec);
}
#endif