summaryrefslogtreecommitdiff
path: root/src/locale/iconv.c
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2011-07-12 20:30:04 -0400
committerRich Felker <dalias@aerifal.cx>2011-07-12 20:30:04 -0400
commit0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf (patch)
treefaefa6b692ff1937935f61f033c570dd2d84dcfb /src/locale/iconv.c
parentc3c5e88c31b78f7b32b3d8b5c2450d75fa858951 (diff)
downloadmusl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.tar.gz
musl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.tar.bz2
musl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.tar.xz
musl-0e2331c9b6e0c0b4f24019d4062f4c655d28cbaf.zip
gb18030 support in iconv (only from, not to)
also support (and restrict to subsets) older chinese sets, and explicitly refuse to convert to cjk (since there's no code for it yet)
Diffstat (limited to 'src/locale/iconv.c')
-rw-r--r--src/locale/iconv.c53
1 files changed, 51 insertions, 2 deletions
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index fb982921..a7d4fd9f 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -17,6 +17,9 @@
#define UTF_8 0310
#define EUC_JP 0320
#define SHIFT_JIS 0321
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
/* FIXME: these are not implemented yet
* EUC: A1-FE A1-FE
@@ -41,6 +44,9 @@ static const unsigned char charmaps[] =
"ascii\0usascii\0iso646\0iso646us\0\0\306"
"eucjp\0\0\320"
"shiftjis\0sjis\0\0\321"
+"gb18030\0\0\330"
+"gbk\0\0\331"
+"gb2312\0\0\332"
#include "codepages.h"
;
@@ -52,6 +58,10 @@ static const unsigned short jis0208[84][94] = {
#include "jis0208.h"
};
+static const unsigned short gb18030[126][190] = {
+#include "gb18030.h"
+};
+
static int fuzzycmp(const unsigned char *a, const unsigned char *b)
{
for (; *a && *b; a++, b++) {
@@ -82,7 +92,9 @@ iconv_t iconv_open(const char *to, const char *from)
{
size_t f, t;
- if ((t = find_charmap(to))==-1 || (f = find_charmap(from))==-1) {
+ if ((t = find_charmap(to))==-1
+ || (f = find_charmap(from))==-1
+ || (t >= 0320)) {
errno = EINVAL;
return (iconv_t)-1;
}
@@ -127,7 +139,6 @@ static void put_32(unsigned char *s, unsigned c, int e)
#define mbrtowc_utf8 mbrtowc
#define wctomb_utf8 wctomb
-#include <stdio.h>
size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb)
{
size_t x=0;
@@ -229,6 +240,44 @@ size_t iconv(iconv_t cd0, char **in, size_t *inb, char **out, size_t *outb)
c = jis0208[c][d];
if (!c) goto ilseq;
break;
+ case GB2312:
+ if (c < 0xa1) goto ilseq;
+ case GBK:
+ case GB18030:
+ c -= 0x81;
+ if (c >= 126) goto ilseq;
+ l = 2;
+ if (*inb < 2) goto starved;
+ d = *((unsigned char *)*in + 1);
+ if (d < 0xa1 && type == GB2312) goto ilseq;
+ if (d-0x40>=191 || d==127) {
+ if (d-'0'>9 || type != GB18030)
+ goto ilseq;
+ l = 4;
+ if (*inb < 4) goto starved;
+ c = (10*c + d-'0') * 1260;
+ d = *((unsigned char *)*in + 2);
+ if (d-0x81>126) goto ilseq;
+ c += 10*(d-0x81);
+ d = *((unsigned char *)*in + 3);
+ if (d-'0'>9) goto ilseq;
+ c += d-'0';
+ c += 128;
+ for (d=0; d<=c; ) {
+ k = 0;
+ for (int i=0; i<126; i++)
+ for (int j=0; j<190; j++)
+ if (gb18030[i][j]-d <= c-d)
+ k++;
+ d = c+1;
+ c += k;
+ }
+ break;
+ }
+ d -= 0x40;
+ if (d>63) d--;
+ c = gb18030[c][d];
+ break;
default:
if (c < 128+type) break;
c -= 128+type;