Page MenuHomeFreeBSD

D42067.id128404.diff
No OneTemporary

D42067.id128404.diff

diff --git a/sys/kern/tty_ttydisc.c b/sys/kern/tty_ttydisc.c
--- a/sys/kern/tty_ttydisc.c
+++ b/sys/kern/tty_ttydisc.c
@@ -43,6 +43,9 @@
#include <sys/uio.h>
#include <sys/vnode.h>
+#include <teken/teken.h>
+#include <teken/teken_wcwidth.h>
+
/*
* Standard TTYDISC `termios' line discipline.
*/
@@ -78,8 +81,13 @@
/* Character is alphanumeric. */
#define CTL_ALNUM(c) (((c) >= '0' && (c) <= '9') || \
((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
+/* Character is UTF8-encoded. */
+#define CTL_UTF8(c) (!!((c) & 0x80))
+/* Character is a UTF8 continuation byte. */
+#define CTL_UTF8_CONT(c) (((c) & 0xc0) == 0x80)
#define TTY_STACKBUF 256
+#define UTF8_STACKBUF 4
void
ttydisc_open(struct tty *tp)
@@ -800,6 +808,72 @@
ttyoutq_write_nofrag(&tp->t_outq,
"\b\b\b\b\b\b\b\b", tablen);
return (0);
+ } else if ((tp->t_termios.c_iflag & IUTF8) != 0 &&
+ CTL_UTF8(c)) {
+ uint8_t bytes[UTF8_STACKBUF] = { 0 };
+ int curidx = UTF8_STACKBUF - 1, cwidth = 1,
+ nb = 0;
+ teken_char_t codepoint;
+
+ /* Save current byte. */
+ bytes[curidx] = c;
+ curidx--;
+ nb++;
+ /* Loop back through inq until we hit the
+ * leading byte. */
+ while (CTL_UTF8_CONT(c) && nb < UTF8_STACKBUF) {
+ ttyinq_peekchar(&tp->t_inq, &c, &quote);
+ ttyinq_unputchar(&tp->t_inq);
+ bytes[curidx] = c;
+ curidx--;
+ nb++;
+ }
+ /*
+ * Shift array so that the leading
+ * byte ends up at idx 0.
+ */
+ if (nb < UTF8_STACKBUF)
+ memmove(&bytes[0], &bytes[curidx + 1],
+ nb * sizeof(uint8_t));
+ /* Check for malformed UTF8 characters. */
+ if (nb == UTF8_STACKBUF &&
+ CTL_UTF8_CONT(bytes[0])) {
+ /*
+ * Place all bytes back into the inq and
+ * delete the last byte only.
+ */
+ ttyinq_write(&tp->t_inq, bytes,
+ UTF8_STACKBUF, 0);
+ } else {
+ /* Find codepoint and width. */
+ codepoint =
+ teken_utf8_bytes_to_codepoint(bytes,
+ nb);
+ if (codepoint !=
+ TEKEN_UTF8_INVALID_CODEPOINT) {
+ cwidth = teken_wcwidth(
+ codepoint);
+ } else {
+ /*
+ * Place all bytes back into the
+ * inq and fall back to
+ * default behaviour.
+ */
+ ttyinq_write(&tp->t_inq, bytes,
+ nb, 0);
+ }
+ }
+ tp->t_column -= cwidth;
+ /*
+ * Delete character by punching
+ * 'cwidth' spaces over it.
+ */
+ if (cwidth == 1)
+ ttyoutq_write_nofrag(&tp->t_outq,
+ "\b \b", 3);
+ else if (cwidth == 2)
+ ttyoutq_write_nofrag(&tp->t_outq,
+ "\b\b \b\b", 6);
} else {
/*
* Remove a regular character by
diff --git a/sys/teken/teken_wcwidth.h b/sys/teken/teken_wcwidth.h
--- a/sys/teken/teken_wcwidth.h
+++ b/sys/teken/teken_wcwidth.h
@@ -8,6 +8,8 @@
* Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
*/
+#define TEKEN_UTF8_INVALID_CODEPOINT -1
+
struct interval {
teken_char_t first;
teken_char_t last;
@@ -116,3 +118,31 @@
(ucs >= 0x20000 && ucs <= 0x2fffd) ||
(ucs >= 0x30000 && ucs <= 0x3fffd)));
}
+
+/*
+ * Converts an UTF-8 byte sequence to a codepoint as specified in
+ * https://datatracker.ietf.org/doc/html/rfc3629#section-3 . The function
+ * expects the 'bytes' array to start with the leading character.
+ */
+static teken_char_t
+teken_utf8_bytes_to_codepoint(uint8_t bytes[4], int nbytes)
+{
+
+ /* Check for malformed characters. */
+ if (bitcount(bytes[0] & 0xf0) != nbytes)
+ return (TEKEN_UTF8_INVALID_CODEPOINT);
+
+ switch (nbytes) {
+ case 1:
+ return (bytes[0] & 0x7f);
+ case 2:
+ return (bytes[0] & 0xf) << 6 | (bytes[1] & 0x3f);
+ case 3:
+ return (bytes[0] & 0xf) << 12 | (bytes[1] & 0x3f) << 6 | (bytes[2] & 0x3f);
+ case 4:
+ return (bytes[0] & 0x7) << 18 | (bytes[1] & 0x3f) << 12 |
+ (bytes[2] & 0x3f) << 6 | (bytes[3] & 0x3f);
+ default:
+ return (TEKEN_UTF8_INVALID_CODEPOINT);
+ }
+}

File Metadata

Mime Type
text/plain
Expires
Wed, Sep 25, 9:45 AM (15 h, 23 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12752507
Default Alt Text
D42067.id128404.diff (3 KB)

Event Timeline