summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2015-06-16 05:35:31 +0000
committerRich Felker <dalias@aerifal.cx>2015-06-16 06:10:29 +0000
commit16f18d036d9a7bf590ee6eb86785c0a9658220b6 (patch)
treedeb18b50c750e47dce2e0de0061f6de93cb11957
parent1507ebf837334e9e07cfab1ca1c2e88449069a80 (diff)
downloadmusl-16f18d036d9a7bf590ee6eb86785c0a9658220b6.tar.gz
musl-16f18d036d9a7bf590ee6eb86785c0a9658220b6.tar.bz2
musl-16f18d036d9a7bf590ee6eb86785c0a9658220b6.tar.xz
musl-16f18d036d9a7bf590ee6eb86785c0a9658220b6.zip
byte-based C locale, phase 2: stdio and iconv (multibyte callers)
this patch adjusts libc components which use the multibyte functions internally, and which depend on them operating in a particular encoding, to make the appropriate locale changes before calling them and restore the calling thread's locale afterwards. activating the byte-based C locale without these changes would cause regressions in stdio and iconv. in the case of iconv, the current implementation was simply using the multibyte functions as UTF-8 conversions. setting a multibyte UTF-8 locale for the duration of the iconv operation allows the code to continue working. in the case of stdio, POSIX requires that FILE streams have an encoding rule bound at the time of setting wide orientation. as long as all locales, including the C locale, used the same encoding, treating high bytes as UTF-8, there was no need to store an encoding rule as part of the stream's state. a new locale field in the FILE structure points to the locale that should be made active during fgetwc/fputwc/ungetwc on the stream. it cannot point to the locale active at the time the stream becomes oriented, because this locale could be mutable (the global locale) or could be destroyed (locale_t objects produced by newlocale) before the stream is closed. instead, a pointer to the static C or C.UTF-8 locale object added in commit commit aeeac9ca5490d7d90fe061ab72da446c01ddf746 is used. this is valid since categories other than LC_CTYPE will not affect these functions.
-rw-r--r--src/internal/stdio_impl.h1
-rw-r--r--src/locale/iconv.c6
-rw-r--r--src/stdio/fgetwc.c15
-rw-r--r--src/stdio/fputwc.c5
-rw-r--r--src/stdio/fputws.c5
-rw-r--r--src/stdio/fwide.c11
-rw-r--r--src/stdio/ungetwc.c5
7 files changed, 40 insertions, 8 deletions
diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h
index e1325fe1..72c55192 100644
--- a/src/internal/stdio_impl.h
+++ b/src/internal/stdio_impl.h
@@ -47,6 +47,7 @@ struct _IO_FILE {
unsigned char *shend;
off_t shlim, shcnt;
FILE *prev_locked, *next_locked;
+ struct __locale_struct *locale;
};
size_t __stdio_read(FILE *, unsigned char *, size_t);
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index e6121aea..1eeea94e 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -5,6 +5,7 @@
#include <stdlib.h>
#include <limits.h>
#include <stdint.h>
+#include "locale_impl.h"
#define UTF_32BE 0300
#define UTF_16LE 0301
@@ -165,9 +166,12 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
int err;
unsigned char type = map[-1];
unsigned char totype = tomap[-1];
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
if (!in || !*in || !*inb) return 0;
+ *ploc = UTF8_LOCALE;
+
for (; *inb; *in+=l, *inb-=l) {
c = *(unsigned char *)*in;
l = 1;
@@ -431,6 +435,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
break;
}
}
+ *ploc = loc;
return x;
ilseq:
err = EILSEQ;
@@ -445,5 +450,6 @@ starved:
x = -1;
end:
errno = err;
+ *ploc = loc;
return x;
}
diff --git a/src/stdio/fgetwc.c b/src/stdio/fgetwc.c
index b261b44f..e455cfec 100644
--- a/src/stdio/fgetwc.c
+++ b/src/stdio/fgetwc.c
@@ -1,8 +1,9 @@
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
#include <errno.h>
-wint_t __fgetwc_unlocked(FILE *f)
+static wint_t __fgetwc_unlocked_internal(FILE *f)
{
mbstate_t st = { 0 };
wchar_t wc;
@@ -10,8 +11,6 @@ wint_t __fgetwc_unlocked(FILE *f)
unsigned char b;
size_t l;
- if (f->mode <= 0) fwide(f, 1);
-
/* Convert character from buffer if possible */
if (f->rpos < f->rend) {
l = mbrtowc(&wc, (void *)f->rpos, f->rend - f->rpos, &st);
@@ -39,6 +38,16 @@ wint_t __fgetwc_unlocked(FILE *f)
return wc;
}
+wint_t __fgetwc_unlocked(FILE *f)
+{
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
+ if (f->mode <= 0) fwide(f, 1);
+ *ploc = f->locale;
+ wchar_t wc = __fgetwc_unlocked_internal(f);
+ *ploc = loc;
+ return wc;
+}
+
wint_t fgetwc(FILE *f)
{
wint_t c;
diff --git a/src/stdio/fputwc.c b/src/stdio/fputwc.c
index 1bf165bf..789fe9c9 100644
--- a/src/stdio/fputwc.c
+++ b/src/stdio/fputwc.c
@@ -1,4 +1,5 @@
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
#include <limits.h>
#include <ctype.h>
@@ -7,8 +8,10 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f)
{
char mbc[MB_LEN_MAX];
int l;
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
if (f->mode <= 0) fwide(f, 1);
+ *ploc = f->locale;
if (isascii(c)) {
c = putc_unlocked(c, f);
@@ -20,6 +23,8 @@ wint_t __fputwc_unlocked(wchar_t c, FILE *f)
l = wctomb(mbc, c);
if (l < 0 || __fwritex((void *)mbc, l, f) < l) c = WEOF;
}
+ if (c==WEOF) f->flags |= F_ERR;
+ *ploc = loc;
return c;
}
diff --git a/src/stdio/fputws.c b/src/stdio/fputws.c
index 317d65f1..0ed02f1c 100644
--- a/src/stdio/fputws.c
+++ b/src/stdio/fputws.c
@@ -1,23 +1,28 @@
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
int fputws(const wchar_t *restrict ws, FILE *restrict f)
{
unsigned char buf[BUFSIZ];
size_t l=0;
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
FLOCK(f);
fwide(f, 1);
+ *ploc = f->locale;
while (ws && (l = wcsrtombs((void *)buf, (void*)&ws, sizeof buf, 0))+1 > 1)
if (__fwritex(buf, l, f) < l) {
FUNLOCK(f);
+ *ploc = loc;
return -1;
}
FUNLOCK(f);
+ *ploc = loc;
return l; /* 0 or -1 */
}
diff --git a/src/stdio/fwide.c b/src/stdio/fwide.c
index 8088e7ad..8410b153 100644
--- a/src/stdio/fwide.c
+++ b/src/stdio/fwide.c
@@ -1,13 +1,14 @@
-#include <wchar.h>
#include "stdio_impl.h"
-
-#define SH (8*sizeof(int)-1)
-#define NORMALIZE(x) ((x)>>SH | -((-(x))>>SH))
+#include "locale_impl.h"
int fwide(FILE *f, int mode)
{
FLOCK(f);
- if (!f->mode) f->mode = NORMALIZE(mode);
+ if (mode) {
+ if (!f->locale) f->locale = MB_CUR_MAX==1
+ ? C_LOCALE : UTF8_LOCALE;
+ if (!f->mode) f->mode = mode>0 ? 1 : -1;
+ }
mode = f->mode;
FUNLOCK(f);
return mode;
diff --git a/src/stdio/ungetwc.c b/src/stdio/ungetwc.c
index d4c7de39..80d6e203 100644
--- a/src/stdio/ungetwc.c
+++ b/src/stdio/ungetwc.c
@@ -1,4 +1,5 @@
#include "stdio_impl.h"
+#include "locale_impl.h"
#include <wchar.h>
#include <limits.h>
#include <ctype.h>
@@ -8,15 +9,18 @@ wint_t ungetwc(wint_t c, FILE *f)
{
unsigned char mbc[MB_LEN_MAX];
int l=1;
+ locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
FLOCK(f);
if (f->mode <= 0) fwide(f, 1);
+ *ploc = f->locale;
if (!f->rpos) __toread(f);
if (!f->rpos || f->rpos < f->buf - UNGET + l || c == WEOF ||
(!isascii(c) && (l = wctomb((void *)mbc, c)) < 0)) {
FUNLOCK(f);
+ *ploc = loc;
return WEOF;
}
@@ -26,5 +30,6 @@ wint_t ungetwc(wint_t c, FILE *f)
f->flags &= ~F_EOF;
FUNLOCK(f);
+ *ploc = loc;
return c;
}