diff options
author | Rich Felker <dalias@aerifal.cx> | 2011-02-12 00:22:29 -0500 |
---|---|---|
committer | Rich Felker <dalias@aerifal.cx> | 2011-02-12 00:22:29 -0500 |
commit | 0b44a0315b47dd8eced9f3b7f31580cf14bbfc01 (patch) | |
tree | 6eaef0d8a720fa3da580de87b647fff796fe80b3 /src/multibyte/decode.c | |
download | musl-0b44a0315b47dd8eced9f3b7f31580cf14bbfc01.tar.gz musl-0b44a0315b47dd8eced9f3b7f31580cf14bbfc01.tar.bz2 musl-0b44a0315b47dd8eced9f3b7f31580cf14bbfc01.tar.xz musl-0b44a0315b47dd8eced9f3b7f31580cf14bbfc01.zip |
initial check-in, version 0.5.0v0.5.0
Diffstat (limited to 'src/multibyte/decode.c')
-rw-r--r-- | src/multibyte/decode.c | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/src/multibyte/decode.c b/src/multibyte/decode.c new file mode 100644 index 00000000..8d3d3c0b --- /dev/null +++ b/src/multibyte/decode.c @@ -0,0 +1,47 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +/* Decodes UTF-8 byte-by-byte. The c argument must be initialized to 0 + * to begin decoding; when finished it will contain the Unicode scalar + * value decoded. Return value is 1 if finished, 0 if in-progress, and + * -1 if an invalid sequence was encountered. After an invalid sequence, + * the state (in c) automatically resets to 0 if a continuation byte was + * expected to facilitate a calling idiom of immediately retrying a + * failed decode call after processing the invalid sequence. If the + * second try fails, the byte is invalid as a starter as well. + * + * A trivial usage idiom is: + * while (src<end && (n=decode(dst, *src))>=0) 1[dst+=n]=0, src++; + */ + +int decode(unsigned *c, unsigned b) +{ + if (!*c) { + if (b < 0x80) { + *c = b; + return 1; + } else if (b-SA >= SB-SA) { + *c = FAILSTATE; + return -1; + } + *c = bittab[b-SA]; + return 0; + } + + if (OOB(*c,b)) { + *c = 0; + return -1; + } + *c = *c<<6 | b-0x80; + return !(*c&(1U<<31)); +} |