summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2017-11-10 13:34:21 -0500
committerRich Felker <dalias@aerifal.cx>2017-11-10 13:34:21 -0500
commit9eb6dd5165b803715f82b9f5d4b557878f77a580 (patch)
treef126f61faa8fceb29bb98c946b1eef542e0f7302
parentbff59d13a82cd4c02792fd73da0e7c79bda022ff (diff)
downloadmusl-9eb6dd5165b803715f82b9f5d4b557878f77a580.tar.gz
musl-9eb6dd5165b803715f82b9f5d4b557878f77a580.tar.bz2
musl-9eb6dd5165b803715f82b9f5d4b557878f77a580.tar.xz
musl-9eb6dd5165b803715f82b9f5d4b557878f77a580.zip
handle ascii range individually in each iconv case
short-circuiting low bytes before the switch precluded support for character encodings that don't coincide with ascii in this range. this limitation affected iso-2022 encodings, which use the esc byte to introduce a shift sequence, and things like ebcdic.
-rw-r--r--src/locale/iconv.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index c64bcf35..af0d8283 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -193,8 +193,9 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
c = *(unsigned char *)*in;
l = 1;
- if (c >= 128 || type-UTF_32BE < 7U) switch (type) {
+ switch (type) {
case UTF_8:
+ if (c < 128) break; // optimization
l = mbrtowc_utf8(&wc, *in, *inb, &st);
if (!l) l++;
else if (l == (size_t)-1) goto ilseq;
@@ -202,7 +203,8 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
c = wc;
break;
case US_ASCII:
- goto ilseq;
+ if (c >= 128) goto ilseq;
+ break;
case WCHAR_T:
l = sizeof(wchar_t);
if (*inb < l) goto starved;
@@ -234,6 +236,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
}
break;
case SHIFT_JIS:
+ if (c < 128) break;
if (c-0xa1 <= 0xdf-0xa1) {
c += 0xff61-0xa1;
break;
@@ -257,6 +260,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
if (!c) goto ilseq;
break;
case EUC_JP:
+ if (c < 128) break;
l = 2;
if (*inb < 2) goto starved;
d = *((unsigned char *)*in + 1);
@@ -273,9 +277,11 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
if (!c) goto ilseq;
break;
case GB2312:
+ if (c < 128) break;
if (c < 0xa1) goto ilseq;
case GBK:
case GB18030:
+ if (c < 128) break;
c -= 0x81;
if (c >= 126) goto ilseq;
l = 2;
@@ -311,6 +317,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
c = gb18030[c][d];
break;
case BIG5:
+ if (c < 128) break;
l = 2;
if (*inb < 2) goto starved;
d = *((unsigned char *)*in + 1);
@@ -348,6 +355,7 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
if (!c) goto ilseq;
break;
case EUC_KR:
+ if (c < 128) break;
l = 2;
if (*inb < 2) goto starved;
d = *((unsigned char *)*in + 1);