diff options
author | Rich Felker <dalias@aerifal.cx> | 2017-11-10 17:06:32 -0500 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2017-11-10 17:22:43 -0500 |
commit | a39f20bf9f8e59573a479bff23df345b2b4d2345 (patch) | |
tree | e8b70165af9c19269e1ae95bcf52362ee5baa12a | |
parent | 5b546faa67544af395d6407553762b37e9711157 (diff) | |
download | musl-a39f20bf9f8e59573a479bff23df345b2b4d2345.tar.gz musl-a39f20bf9f8e59573a479bff23df345b2b4d2345.tar.bz2 musl-a39f20bf9f8e59573a479bff23df345b2b4d2345.tar.xz musl-a39f20bf9f8e59573a479bff23df345b2b4d2345.zip |
add iso-2022-jp support (decoding only) to iconv
this implementation aims to match the baseline defined by rfc1468 (the
original mime charset definition) plus the halfwidth katakana
extension included in the whatwg definition of the charset. rejection
of si/so controls and newlines in doublebyte state are not currently
enforced. the jis x 0201 mode is currently interpreted as having the
yen sign and overline character in place of backslash and tilde; ascii
mode has the standard ascii characters in those slots.
-rw-r--r-- | src/locale/iconv.c | 47 |
1 files changed, 45 insertions, 2 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c index 0696b555..2107b055 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -18,6 +18,7 @@ #define UTF_8 0310 #define EUC_JP 0320 #define SHIFT_JIS 0321 +#define ISO2022_JP 0322 #define GB18030 0330 #define GBK 0331 #define GB2312 0332 @@ -41,6 +42,7 @@ static const unsigned char charmaps[] = "ascii\0usascii\0iso646\0iso646us\0\0\307" "eucjp\0\0\320" "shiftjis\0sjis\0\0\321" +"iso2022jp\0\0\322" "gb18030\0\0\330" "gbk\0\0\331" "gb2312\0\0\332" @@ -123,6 +125,7 @@ static size_t extract_to(iconv_t cd) iconv_t iconv_open(const char *to, const char *from) { size_t f, t; + struct stateful_cd *scd; if ((t = find_charmap(to))==-1 || (f = find_charmap(from))==-1 @@ -132,8 +135,9 @@ iconv_t iconv_open(const char *to, const char *from) } iconv_t cd = combine_to_from(t, f); - if (0) { - struct stateful_cd *scd = malloc(sizeof *scd); + switch (charmaps[f]) { + case ISO2022_JP: + scd = malloc(sizeof *scd); if (!scd) return (iconv_t)-1; scd->base_cd = cd; scd->state = 0; @@ -294,6 +298,45 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri c = jis0208[c][d]; if (!c) goto ilseq; break; + case ISO2022_JP: + if (c >= 128) goto ilseq; + if (c == '\033') { + l = 3; + if (*inb < 3) goto starved; + c = *((unsigned char *)*in + 1); + d = *((unsigned char *)*in + 2); + if (c != '(' && c != '$') goto ilseq; + switch (128*(c=='$') + d) { + case 'B': scd->state=0; continue; + case 'J': scd->state=1; continue; + case 'I': scd->state=4; continue; + case 128+'@': scd->state=2; continue; + case 128+'B': scd->state=3; continue; + } + goto ilseq; + } + switch (scd->state) { + case 1: + if (c=='\\') c = 0xa5; + if (c=='~') c = 0x203e; + break; + case 2: + case 3: + l = 2; + if (*inb < 2) goto starved; + d = *((unsigned char *)*in + 1); + c -= 0x21; + d -= 0x21; + if (c >= 84 || d >= 94) goto ilseq; + c = jis0208[c][d]; + if (!c) goto ilseq; + break; + case 4: + if (c-0x60 < 0x1f) goto ilseq; + if (c-0x21 < 0x5e) c += 0xff61-0x21; + break; + } + break; case GB2312: if (c < 128) break; if (c < 0xa1) goto ilseq; |