diff options
author | Rich Felker <dalias@aerifal.cx> | 2011-07-12 20:30:04 -0400 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2011-07-12 20:30:04 -0400 |
commit | 0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf (patch) | |
tree | faefa6b692ff1937935f61f033c570dd2d84dcfb /src/locale/iconv.c | |
parent | c3c5e88c31b78f7b32b3d8b5c2450d75fa858951 (diff) | |
download | musl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.tar.gz musl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.tar.bz2 musl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.tar.xz musl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.zip |
gb18030 support in iconv (only from, not to)
also support (and restrict to subsets) older chinese sets, and
explicitly refuse to convert to cjk (since there's no code for it yet)
Diffstat (limited to 'src/locale/iconv.c')
-rw-r--r-- | src/locale/iconv.c | 53 |
1 files changed, 51 insertions, 2 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c index fb982921..a7d4fd9f 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -17,6 +17,9 @@ #define UTF_8 0310 #define EUC_JP 0320 #define SHIFT_JIS 0321 +#define GB18030 0330 +#define GBK 0331 +#define GB2312 0332 /* FIXME: these are not implemented yet * EUC: A1-FE A1-FE @@ -41,6 +44,9 @@ static const unsigned char charmaps[] = "ascii\0usascii\0iso646\0iso646us\0\0\306" "eucjp\0\0\320" "shiftjis\0sjis\0\0\321" +"gb18030\0\0\330" +"gbk\0\0\331" +"gb2312\0\0\332" #include "codepages.h" ; @@ -52,6 +58,10 @@ static const unsigned short jis0208[84][94] = { #include "jis0208.h" }; +static const unsigned short gb18030[126][190] = { +#include "gb18030.h" +}; + static int fuzzycmp(const unsigned char *a, const unsigned char *b) { for (; *a && *b; a++, b++) { @@ -82,7 +92,9 @@ iconv_t iconv_open(const char *to, const char *from) { size_t f, t; - if ((t = find_charmap(to))==-1 || (f = find_charmap(from))==-1) { + if ((t = find_charmap(to))==-1 + || (f = find_charmap(from))==-1 + || (t >= 0320)) { errno = EINVAL; return (iconv_t)-1; } @@ -127,7 +139,6 @@ static void put_32(unsigned char *s, unsigned c, int e) #define mbrtowc_utf8 mbrtowc #define wctomb_utf8 wctomb -#include <stdio.h> size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb) { size_t x=0; @@ -229,6 +240,44 @@ size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb) c = jis0208[c][d]; if (!c) goto ilseq; break; + case GB2312: + if (c < 0xa1) goto ilseq; + case GBK: + case GB18030: + c -= 0x81; + if (c >= 126) goto ilseq; + l = 2; + if (*inb < 2) goto starved; + d = *((unsigned char *)*in + 1); + if (d < 0xa1 && type == GB2312) goto ilseq; + if (d-0x40>=191 || d==127) { + if (d-'0'>9 || type != GB18030) + goto ilseq; + l = 4; + if (*inb < 4) goto starved; + c = (10*c + d-'0') * 1260; + d = *((unsigned char *)*in + 2); + if (d-0x81>126) goto ilseq; + c += 10*(d-0x81); + d = *((unsigned char *)*in + 3); + if (d-'0'>9) goto ilseq; + c += d-'0'; + c += 128; + for (d=0; d<=c; ) { + k = 0; + for (int i=0; i<126; i++) + for (int j=0; j<190; j++) + if (gb18030[i][j]-d <= c-d) + k++; + d = c+1; + c += k; + } + break; + } + d -= 0x40; + if (d>63) d--; + c = gb18030[c][d]; + break; default: if (c < 128+type) break; c -= 128+type; |