From 564bb223eff77c087f06d4048b109cbf4bf16612 Mon Sep 17 00:00:00 2001 From: Leah Neukirchen Date: Mon, 13 Mar 2017 15:50:41 +0100 Subject: [PATCH] mshow: print plain text safely --- Makefile | 2 +- blaze822.h | 5 ++++ mshow.c | 57 +++++++++++++++++++++++------------------ safe_u8putstr.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 26 deletions(-) create mode 100644 safe_u8putstr.c diff --git a/Makefile b/Makefile index 8953d3d..1ab619f 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ maddr magrep mexport mflag mgenmid mhdr mlist mpick mscan msed mseq mshow msort mthread : seq.o slurp.o maddr magrep mhdr mpick mscan mshow : rfc2047.o magrep mshow : rfc2045.o -mshow : filter.o +mshow : filter.o safe_u8putstr.c msort : mystrverscmp.o mmime : slurp.o diff --git a/blaze822.h b/blaze822.h index 053ef0a..e767114 100644 --- a/blaze822.h +++ b/blaze822.h @@ -82,3 +82,8 @@ time_t tm_to_secs(const struct tm *tm); // slurp.c int slurp(char *filename, char **bufo, off_t *leno); + +// safe_u8putstr.c + +#include +void safe_u8putstr(char *s0, size_t l, FILE *stream); diff --git a/mshow.c b/mshow.c index 38242f9..da553bd 100644 --- a/mshow.c +++ b/mshow.c @@ -30,43 +30,43 @@ static char *Oflag; struct message *filters; static int mimecount; +static int safe_output; static char defaultAflags[] = "text/plain:text/html"; static char *Aflag = defaultAflags; +static int +printable(int c) +{ + return (unsigned)c-0x20 < 0x5f; +} + +int +print_ascii(char *body, size_t bodylen) +{ + if (safe_output) { + safe_u8putstr(body, bodylen, stdout); + return bodylen; + } else { + return fwrite(body, 1, bodylen, stdout); + } +} + void printhdr(char *hdr) { int uc = 1; - while (*hdr && *hdr != ':') { + while (*hdr && *hdr != ':' && printable(*hdr)) { putc(uc ? toupper(*hdr) : *hdr, stdout); uc = (*hdr == '-'); hdr++; } - if (*hdr) - printf("%s\n", hdr); -} - -int -print_ascii(char *body, size_t bodylen) -{ - if (!memchr(body, '\r', bodylen)) - return fwrite(body, 1, bodylen, stdout); - - // crlf normalization required - size_t i; - for (i = 0; i < bodylen; i++) { - if (body[i] == '\r') { - if (!(i+1 < bodylen && body[i+1] == '\n')) - putc_unlocked('\n', stdout); - continue; - } - putc_unlocked(body[i], stdout); + if (*hdr) { + print_ascii(hdr, strlen(hdr)); + fputc('\n', stdout); } - - return bodylen; } void @@ -529,7 +529,8 @@ print_date_header(char *v) now = time(0); } - printf("Date: %s", v); + printf("Date: "); + print_ascii(v, strlen(v)); time_t t = blaze822_date(v); if (t == -1) { @@ -608,7 +609,10 @@ print_decode_header(char *h, char *v) char d[4096]; blaze822_decode_rfc2047(d, v, sizeof d, "UTF-8"); printhdr(h); - printf(": %s\n", d); + fputc(':', stdout); + fputc(' ', stdout); + print_ascii(d, strlen(d)); + fputc('\n', stdout); } void @@ -674,7 +678,7 @@ show(char *file) printf("\n"); if (rflag || !blaze822_check_mime(msg)) { // raw body - fwrite(blaze822_body(msg), 1, blaze822_bodylen(msg), stdout); + print_ascii(blaze822_body(msg), blaze822_bodylen(msg)); goto done; } @@ -713,6 +717,9 @@ main(int argc, char *argv[]) exit(1); } + if (!rflag && !Oflag) + safe_output = 1; + if (xflag) { // extract extract(xflag, argc-optind, argv+optind, 0); } else if (Oflag) { // extract to stdout diff --git a/safe_u8putstr.c b/safe_u8putstr.c new file mode 100644 index 0000000..758c5be --- /dev/null +++ b/safe_u8putstr.c @@ -0,0 +1,68 @@ +#include +#include + +void +safe_u8putstr(char *s0, size_t l, FILE *stream) +{ + // tty-safe output of s, with relaxed utf-8 semantics: + // - C0 and C1 are displayed as escape sequences + // - valid utf8 is printed as is + // - rest is printed bytewise as is (probably latin1) + // - translate CRLF to CR + + unsigned char *s = (unsigned char* )s0; + unsigned char *e = s + l; + + while (s < e) { + if ((*s & 0x80) == 0) { + if (*s < 32 && + *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r') { + // C0 + fputc(0xe2, stream); + fputc(0x90, stream); + fputc(0x80+*s, stream); + } else if (*s == 127) { + // DEL + fputc(0xe2, stream); + fputc(0x90, stream); + fputc(0xa1, stream); + } else if (*s == '\r') { + if (e - s > 1 && s[1] == '\n') + s++; + fputc(*s, stream); + } else { + // safe ASCII + fputc(*s, stream); + } + } else if ((*s & 0xc0) == 0x80) { + // C1 + fputc(0xe2, stream); + fputc(0x90, stream); + fputc(0x80+0x1b, stream); + + fputc(0xe2, stream); + fputc(0x90, stream); + fputc(*s, stream); + } else { + uint32_t f = 0; + if (e - s >= 4) + f = (s[0]<<24) | (s[1]<<16) | (s[2]<<8) | s[3]; + else if (e - s == 3) + f = (s[0]<<24) | (s[1]<<16) | (s[2]<<8); + else if (e - s == 2) + f = (s[0]<<24) | (s[1]<<16); + else if (e - s == 1) + f = (s[0]<<24); + + if ((f & 0xe0c00000) == 0xc0800000) goto u2; + else if ((f & 0xf0c0c000) == 0xe0808000) goto u3; + else if ((f & 0xf8c0c0c0) == 0xf0808080) goto u4; + else /* invalid utf8 */ goto u1; +u4: fputc(*s++, stream); +u3: fputc(*s++, stream); +u2: fputc(*s++, stream); +u1: fputc(*s, stream); + } + s++; + } +}