You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mblaze/safe_u8putstr.c

69 lines
1.6 KiB
C

#include <stdio.h>
#include <stdint.h>
void
safe_u8putstr(char *s0, size_t l, FILE *stream)
{
// tty-safe output of s, with relaxed utf-8 semantics:
// - C0 and C1 are displayed as escape sequences
// - valid utf8 is printed as is
// - rest is printed bytewise as is (probably latin1)
// - translate CRLF to CR
unsigned char *s = (unsigned char* )s0;
unsigned char *e = s + l;
while (s < e) {
if ((*s & 0x80) == 0) {
if (*s < 32 &&
*s != ' ' && *s != '\t' && *s != '\n' && *s != '\r') {
// C0
fputc(0xe2, stream);
fputc(0x90, stream);
fputc(0x80+*s, stream);
} else if (*s == 127) {
// DEL
fputc(0xe2, stream);
fputc(0x90, stream);
fputc(0xa1, stream);
} else if (*s == '\r') {
if (e - s > 1 && s[1] == '\n')
s++;
fputc(*s, stream);
} else {
// safe ASCII
fputc(*s, stream);
}
} else if ((*s & 0xc0) == 0x80) {
// C1
fputc(0xe2, stream);
fputc(0x90, stream);
fputc(0x80+0x1b, stream);
fputc(0xe2, stream);
fputc(0x90, stream);
fputc(*s, stream);
} else {
uint32_t f = 0;
if (e - s >= 4)
f = (s[0]<<24) | (s[1]<<16) | (s[2]<<8) | s[3];
else if (e - s == 3)
f = (s[0]<<24) | (s[1]<<16) | (s[2]<<8);
else if (e - s == 2)
f = (s[0]<<24) | (s[1]<<16);
else if (e - s == 1)
f = (s[0]<<24);
if ((f & 0xe0c00000) == 0xc0800000) goto u2;
else if ((f & 0xf0c0c000) == 0xe0808000) goto u3;
else if ((f & 0xf8c0c0c0) == 0xf0808080) goto u4;
else /* invalid utf8 */ goto u1;
u4: fputc(*s++, stream);
u3: fputc(*s++, stream);
u2: fputc(*s++, stream);
u1: fputc(*s, stream);
}
s++;
}
}