Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102546297
D42147.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
2 KB
Referenced Files
None
Subscribers
None
D42147.diff
View Options
diff --git a/sys/kern/tty_ttydisc.c b/sys/kern/tty_ttydisc.c
--- a/sys/kern/tty_ttydisc.c
+++ b/sys/kern/tty_ttydisc.c
@@ -844,23 +844,25 @@
*/
ttyinq_write(&tp->t_inq, bytes,
UTF8_STACKBUF, 0);
+ ttyinq_unputchar(&tp->t_inq);
} else {
/* Find codepoint and width. */
codepoint =
teken_utf8_bytes_to_codepoint(bytes,
nb);
- if (codepoint !=
- TEKEN_UTF8_INVALID_CODEPOINT) {
- cwidth = teken_wcwidth(
- codepoint);
- } else {
+ if (codepoint ==
+ TEKEN_UTF8_INVALID_CODEPOINT ||
+ (cwidth = teken_wcwidth(
+ codepoint)) == -1) {
/*
* Place all bytes back into the
* inq and fall back to
* default behaviour.
*/
+ cwidth = 1;
ttyinq_write(&tp->t_inq, bytes,
nb, 0);
+ ttyinq_unputchar(&tp->t_inq);
}
}
tp->t_column -= cwidth;
diff --git a/sys/teken/teken_wcwidth.h b/sys/teken/teken_wcwidth.h
--- a/sys/teken/teken_wcwidth.h
+++ b/sys/teken/teken_wcwidth.h
@@ -128,15 +128,32 @@
teken_utf8_bytes_to_codepoint(uint8_t bytes[4], int nbytes)
{
- /* Check for malformed characters. */
- if (__bitcount(bytes[0] & 0xf0) != nbytes)
+ /*
+ * Check for malformed characters by comparing 'nbytes'
+ * to the byte length of the character.
+ *
+ * The table in section 3 of RFC 3629 defines 4 different
+ * values indicating the length of a UTF-8 byte sequence.
+ *
+ * 0xxxxxxx -> 1 byte
+ * 110xxxxx -> 2 bytes
+ * 1110xxxx -> 3 bytes
+ * 11110xxx -> 4 bytes
+ *
+ * The length is determined by the higher-order bits in
+ * the leading octet (except in the first case, where an MSB
+ * of 0 means a byte length of 1). Here we flip the 4 upper
+ * bits and count the leading zeros using __builtin_clz()
+ * to determine the number of bytes.
+ */
+ if (__builtin_clz(~(bytes[0] & 0xf0) << 24) != nbytes)
return (TEKEN_UTF8_INVALID_CODEPOINT);
switch (nbytes) {
case 1:
return (bytes[0] & 0x7f);
case 2:
- return (bytes[0] & 0xf) << 6 | (bytes[1] & 0x3f);
+ return (bytes[0] & 0x1f) << 6 | (bytes[1] & 0x3f);
case 3:
return (bytes[0] & 0xf) << 12 | (bytes[1] & 0x3f) << 6 | (bytes[2] & 0x3f);
case 4:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Nov 14, 9:30 PM (7 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14633981
Default Alt Text
D42147.diff (2 KB)
Attached To
Mode
D42147: tty/teken: fix UTF8 sequence validation logic
Attached
Detach File
Event Timeline
Log In to Comment