Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F96374210
D42067.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
3 KB
Referenced Files
None
Subscribers
None
D42067.diff
View Options
diff --git a/sys/kern/tty_ttydisc.c b/sys/kern/tty_ttydisc.c
--- a/sys/kern/tty_ttydisc.c
+++ b/sys/kern/tty_ttydisc.c
@@ -43,6 +43,9 @@
#include <sys/uio.h>
#include <sys/vnode.h>
+#include <teken/teken.h>
+#include <teken/teken_wcwidth.h>
+
/*
* Standard TTYDISC `termios' line discipline.
*/
@@ -78,8 +81,13 @@
/* Character is alphanumeric. */
#define CTL_ALNUM(c) (((c) >= '0' && (c) <= '9') || \
((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
+/* Character is UTF8-encoded. */
+#define CTL_UTF8(c) (!!((c) & 0x80))
+/* Character is a UTF8 continuation byte. */
+#define CTL_UTF8_CONT(c) (((c) & 0xc0) == 0x80)
#define TTY_STACKBUF 256
+#define UTF8_STACKBUF 4
void
ttydisc_open(struct tty *tp)
@@ -800,6 +808,72 @@
ttyoutq_write_nofrag(&tp->t_outq,
"\b\b\b\b\b\b\b\b", tablen);
return (0);
+ } else if ((tp->t_termios.c_iflag & IUTF8) != 0 &&
+ CTL_UTF8(c)) {
+ uint8_t bytes[UTF8_STACKBUF] = { 0 };
+ int curidx = UTF8_STACKBUF - 1, cwidth = 1,
+ nb = 0;
+ teken_char_t codepoint;
+
+ /* Save current byte. */
+ bytes[curidx] = c;
+ curidx--;
+ nb++;
+ /* Loop back through inq until we hit the
+ * leading byte. */
+ while (CTL_UTF8_CONT(c) && nb < UTF8_STACKBUF) {
+ ttyinq_peekchar(&tp->t_inq, &c, "e);
+ ttyinq_unputchar(&tp->t_inq);
+ bytes[curidx] = c;
+ curidx--;
+ nb++;
+ }
+ /*
+ * Shift array so that the leading
+ * byte ends up at idx 0.
+ */
+ if (nb < UTF8_STACKBUF)
+ memmove(&bytes[0], &bytes[curidx + 1],
+ nb * sizeof(uint8_t));
+ /* Check for malformed UTF8 characters. */
+ if (nb == UTF8_STACKBUF &&
+ CTL_UTF8_CONT(bytes[0])) {
+ /*
+ * Place all bytes back into the inq and
+ * delete the last byte only.
+ */
+ ttyinq_write(&tp->t_inq, bytes,
+ UTF8_STACKBUF, 0);
+ } else {
+ /* Find codepoint and width. */
+ codepoint =
+ teken_utf8_bytes_to_codepoint(bytes,
+ nb);
+ if (codepoint !=
+ TEKEN_UTF8_INVALID_CODEPOINT) {
+ cwidth = teken_wcwidth(
+ codepoint);
+ } else {
+ /*
+ * Place all bytes back into the
+ * inq and fall back to
+ * default behaviour.
+ */
+ ttyinq_write(&tp->t_inq, bytes,
+ nb, 0);
+ }
+ }
+ tp->t_column -= cwidth;
+ /*
+ * Delete character by punching
+ * 'cwidth' spaces over it.
+ */
+ if (cwidth == 1)
+ ttyoutq_write_nofrag(&tp->t_outq,
+ "\b \b", 3);
+ else if (cwidth == 2)
+ ttyoutq_write_nofrag(&tp->t_outq,
+ "\b\b \b\b", 6);
} else {
/*
* Remove a regular character by
diff --git a/sys/teken/teken_wcwidth.h b/sys/teken/teken_wcwidth.h
--- a/sys/teken/teken_wcwidth.h
+++ b/sys/teken/teken_wcwidth.h
@@ -8,6 +8,8 @@
* Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
*/
+#define TEKEN_UTF8_INVALID_CODEPOINT -1
+
struct interval {
teken_char_t first;
teken_char_t last;
@@ -116,3 +118,31 @@
(ucs >= 0x20000 && ucs <= 0x2fffd) ||
(ucs >= 0x30000 && ucs <= 0x3fffd)));
}
+
+/*
+ * Converts an UTF-8 byte sequence to a codepoint as specified in
+ * https://datatracker.ietf.org/doc/html/rfc3629#section-3 . The function
+ * expects the 'bytes' array to start with the leading character.
+ */
+static teken_char_t
+teken_utf8_bytes_to_codepoint(uint8_t bytes[4], int nbytes)
+{
+
+ /* Check for malformed characters. */
+ if (bitcount(bytes[0] & 0xf0) != nbytes)
+ return (TEKEN_UTF8_INVALID_CODEPOINT);
+
+ switch (nbytes) {
+ case 1:
+ return (bytes[0] & 0x7f);
+ case 2:
+ return (bytes[0] & 0xf) << 6 | (bytes[1] & 0x3f);
+ case 3:
+ return (bytes[0] & 0xf) << 12 | (bytes[1] & 0x3f) << 6 | (bytes[2] & 0x3f);
+ case 4:
+ return (bytes[0] & 0x7) << 18 | (bytes[1] & 0x3f) << 12 |
+ (bytes[2] & 0x3f) << 6 | (bytes[3] & 0x3f);
+ default:
+ return (TEKEN_UTF8_INVALID_CODEPOINT);
+ }
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Sep 25, 6:22 PM (21 h, 52 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12752507
Default Alt Text
D42067.diff (3 KB)
Attached To
Mode
D42067: tty: fix improper backspace behaviour for UTF8 characters when in canonical mode
Attached
Detach File
Event Timeline
Log In to Comment