summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-04-23 19:19:26 -0400
committerRich Felker <dalias@aerifal.cx>2012-04-23 19:19:26 -0400
commit1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351 (patch)
tree9ab2438310a30b91aa19962205d0a6fe065d868e
parent38b5d7d0526be24fba0f0789407727e4e7950e63 (diff)
downloadmusl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.tar.gz
musl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.tar.bz2
musl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.tar.xz
musl-1a63a9fc30e7a1f1239e3cedcb5041e5ec1c5351.zip
sync case mappings with unicode 6.1
also special-case ß (U+00DF) as lowercase even though it does not have a mapping to uppercase. unicode added an uppercase version of this character but does not map it, presumably because the uppercase version is not actually used except for some obscure purpose...
-rw-r--r--src/ctype/iswlower.c2
-rw-r--r--src/ctype/towctrans.c36
2 files changed, 30 insertions, 8 deletions
diff --git a/src/ctype/iswlower.c b/src/ctype/iswlower.c
index 0a568e77..438fe26a 100644
--- a/src/ctype/iswlower.c
+++ b/src/ctype/iswlower.c
@@ -2,5 +2,5 @@
int iswlower(wint_t wc)
{
- return towupper(wc) != wc;
+ return towupper(wc) != wc || wc == 0xdf;
}
diff --git a/src/ctype/towctrans.c b/src/ctype/towctrans.c
index 0b1eed04..2842d690 100644
--- a/src/ctype/towctrans.c
+++ b/src/ctype/towctrans.c
@@ -30,6 +30,7 @@ static const struct {
CASELACE(0x4c1,0x4cd),
CASELACE(0x4d0,0x50e),
+ CASELACE(0x514,0x526),
CASEMAP(0x531,0x556,0x561),
CASELACE(0x01a0,0x01a4),
@@ -69,12 +70,19 @@ static const struct {
CASEMAP(0x2c00,0x2c2e,0x2c30),
CASELACE(0x2c67,0x2c6b),
CASELACE(0x2c80,0x2ce2),
+ CASELACE(0x2ceb,0x2ced),
+
+ CASELACE(0xa640,0xa66c),
+ CASELACE(0xa680,0xa696),
CASELACE(0xa722,0xa72e),
CASELACE(0xa732,0xa76e),
CASELACE(0xa779,0xa77b),
CASELACE(0xa77e,0xa786),
+ CASELACE(0xa790,0xa792),
+ CASELACE(0xa7a0,0xa7a8),
+
CASEMAP(0xff21,0xff3a,0xff41),
{ 0,0,0 }
};
@@ -144,6 +152,8 @@ static const unsigned short pairs[][2] = {
{ 0x03f7, 0x03f8 },
{ 0x03fa, 0x03fb },
{ 0x1e60, 0x1e9b },
+ { 0xdf, 0xdf },
+ { 0x1e9e, 0xdf },
{ 0x1f59, 0x1f51 },
{ 0x1f5b, 0x1f53 },
@@ -181,10 +191,20 @@ static const unsigned short pairs[][2] = {
{ 0x2c6d, 0x251 },
{ 0x2c6e, 0x271 },
{ 0x2c6f, 0x250 },
+ { 0x2c70, 0x252 },
{ 0x2c72, 0x2c73 },
{ 0x2c75, 0x2c76 },
+ { 0x2c7e, 0x23f },
+ { 0x2c7f, 0x240 },
+ { 0x2cf2, 0x2cf3 },
{ 0xa77d, 0x1d79 },
+ { 0xa78b, 0xa78c },
+ { 0xa78d, 0x265 },
+ { 0xa7aa, 0x266 },
+
+ { 0x10c7, 0x2d27 },
+ { 0x10cd, 0x2d2d },
/* bogus greek 'symbol' letters */
{ 0x376, 0x377 },
@@ -207,17 +227,19 @@ static wchar_t __towcase(wchar_t wc, int lower)
int i;
int lmul = 2*lower-1;
int lmask = lower-1;
- if ((unsigned)wc - 0x10400 < 0x50)
- return wc + lmul*0x28;
/* no letters with case in these large ranges */
if (!iswalpha(wc)
|| (unsigned)wc - 0x0600 <= 0x0fff-0x0600
- || (unsigned)wc - 0x2e00 <= 0xa6ff-0x2e00
+ || (unsigned)wc - 0x2e00 <= 0xa63f-0x2e00
|| (unsigned)wc - 0xa800 <= 0xfeff-0xa800)
return wc;
/* special case because the diff between upper/lower is too big */
- if ((unsigned)wc - 0x10a0 < 0x26 || (unsigned)wc - 0x2d00 < 0x26)
- return wc + lmul*(0x2d00-0x10a0);
+ if (lower && (unsigned)wc - 0x10a0 < 0x2e)
+ if (wc>0x10c5 && wc != 0x10c7 && wc != 0x10cd) return wc;
+ else return wc + 0x2d00 - 0x10a0;
+ if (!lower && (unsigned)wc - 0x2d00 < 0x26)
+ if (wc>0x2d25 && wc != 0x2d27 && wc != 0x2d2d) return wc;
+ else return wc + 0x10a0 - 0x2d00;
for (i=0; casemaps[i].len; i++) {
int base = casemaps[i].upper + (lmask & casemaps[i].lower);
if ((unsigned)wc-base < casemaps[i].len) {
@@ -230,8 +252,8 @@ static wchar_t __towcase(wchar_t wc, int lower)
if (pairs[i][1-lower] == wc)
return pairs[i][lower];
}
- if ((unsigned)wc - 0x10428 + (lower<<5) + (lower<<3) < 0x28)
- return wc - 0x28 + (lower<<10) + (lower<<6);
+ if ((unsigned)wc - (0x10428 - 0x28*lower) < 0x28)
+ return wc - 0x28 + 0x50*lower;
return wc;
}