speakup: Turn i18n files utf-8
authorSamuel Thibault <samuel.thibault@ens-lyon.org>
Wed, 27 Mar 2024 11:50:51 +0000 (12:50 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 11 Apr 2024 12:30:29 +0000 (14:30 +0200)
i18n currently assume latin1 encoding, which is not enough for most
languages.

This separates out the utf-8 processing of /dev/synthu, and uses it for
a new synth_writeu, which we make synth_printf now use. This has the
effect of making all the i18 messages processed in utf-8.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20240327115051.ng7xqnhozyii4ik2@begin
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/accessibility/speakup/devsynth.c
drivers/accessibility/speakup/speakup.h
drivers/accessibility/speakup/synth.c

index cb7e1114e8ebe2df99d783cd795956384997f0bf..e3d909bd048048a05e1d84d140c3e8f128bad9a9 100644 (file)
@@ -39,13 +39,13 @@ static ssize_t speakup_file_write(struct file *fp, const char __user *buffer,
 static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer,
                                   size_t nbytes, loff_t *ppos)
 {
-       size_t count = nbytes, want;
+       size_t count = nbytes, consumed, want;
        const char __user *ptr = buffer;
        size_t bytes;
        unsigned long flags;
        unsigned char buf[256];
        u16 ubuf[256];
-       size_t in, in2, out;
+       size_t in, out;
 
        if (!synth)
                return -ENODEV;
@@ -58,57 +58,24 @@ static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer,
                        return -EFAULT;
 
                /* Convert to u16 */
-               for (in = 0, out = 0; in < bytes; in++) {
-                       unsigned char c = buf[in];
-                       int nbytes = 8 - fls(c ^ 0xff);
-                       u32 value;
-
-                       switch (nbytes) {
-                       case 8: /* 0xff */
-                       case 7: /* 0xfe */
-                       case 1: /* 0x80 */
-                               /* Invalid, drop */
-                               goto drop;
-
-                       case 0:
-                               /* ASCII, copy */
-                               ubuf[out++] = c;
-                               continue;
+               for (in = 0, out = 0; in < bytes; in += consumed) {
+                       s32 value;
 
-                       default:
-                               /* 2..6-byte UTF-8 */
+                       value = synth_utf8_get(buf + in, bytes - in, &consumed, &want);
+                       if (value == -1) {
+                               /* Invalid or incomplete */
 
-                               if (bytes - in < nbytes) {
+                               if (want > bytes - in)
                                        /* We don't have it all yet, stop here
                                         * and wait for the rest
                                         */
                                        bytes = in;
-                                       want = nbytes;
-                                       continue;
-                               }
-
-                               /* First byte */
-                               value = c & ((1u << (7 - nbytes)) - 1);
-
-                               /* Other bytes */
-                               for (in2 = 2; in2 <= nbytes; in2++) {
-                                       c = buf[in + 1];
-                                       if ((c & 0xc0) != 0x80) {
-                                               /* Invalid, drop the head */
-                                               want = 1;
-                                               goto drop;
-                                       }
-                                       value = (value << 6) | (c & 0x3f);
-                                       in++;
-                               }
-
-                               if (value < 0x10000)
-                                       ubuf[out++] = value;
-                               want = 1;
-                               break;
+
+                               continue;
                        }
-drop:
-                       /* empty statement */;
+
+                       if (value < 0x10000)
+                               ubuf[out++] = value;
                }
 
                count -= bytes;
index 364fde99749ed187bbba8d6e4a25ba7039efdb8f..54f1226ea061913b7f4568cc13bc6aea11b5df21 100644 (file)
@@ -76,7 +76,9 @@ int speakup_paste_selection(struct tty_struct *tty);
 void speakup_cancel_paste(void);
 void speakup_register_devsynth(void);
 void speakup_unregister_devsynth(void);
+s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want);
 void synth_write(const char *buf, size_t count);
+void synth_writeu(const char *buf, size_t count);
 int synth_supports_indexing(void);
 
 extern struct vc_data *spk_sel_cons;
index 45f90610313382cac117da4cbe93d1d782a96054..85062e605d796de523da0f901d6d441a2eaf7200 100644 (file)
@@ -217,10 +217,95 @@ void synth_write(const char *_buf, size_t count)
        synth_start();
 }
 
+/* Consume one utf-8 character from buf (that contains up to count bytes),
+ * returns the unicode codepoint if valid, -1 otherwise.
+ * In all cases, returns the number of consumed bytes in *consumed,
+ * and the minimum number of bytes that would be needed for the next character
+ * in *want.
+ */
+s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want)
+{
+       unsigned char c = buf[0];
+       int nbytes = 8 - fls(c ^ 0xff);
+       u32 value;
+       size_t i;
+
+       switch (nbytes) {
+       case 8: /* 0xff */
+       case 7: /* 0xfe */
+       case 1: /* 0x80 */
+               /* Invalid, drop */
+               *consumed = 1;
+               *want = 1;
+               return -1;
+
+       case 0:
+               /* ASCII, take as such */
+               *consumed = 1;
+               *want = 1;
+               return c;
+
+       default:
+               /* 2..6-byte UTF-8 */
+
+               if (count < nbytes) {
+                       /* We don't have it all */
+                       *consumed = 0;
+                       *want = nbytes;
+                       return -1;
+               }
+
+               /* First byte */
+               value = c & ((1u << (7 - nbytes)) - 1);
+
+               /* Other bytes */
+               for (i = 1; i < nbytes; i++) {
+                       c = buf[i];
+                       if ((c & 0xc0) != 0x80) {
+                               /* Invalid, drop the head */
+                               *consumed = i;
+                               *want = 1;
+                               return -1;
+                       }
+                       value = (value << 6) | (c & 0x3f);
+               }
+
+               *consumed = nbytes;
+               *want = 1;
+               return value;
+       }
+}
+
+void synth_writeu(const char *buf, size_t count)
+{
+       size_t i, consumed, want;
+
+       /* Convert to u16 */
+       for (i = 0; i < count; i++) {
+               s32 value;
+
+               value = synth_utf8_get(buf + i, count - i, &consumed, &want);
+               if (value == -1) {
+                       /* Invalid or incomplete */
+
+                       if (want > count - i)
+                               /* We don't have it all, stop */
+                               count = i;
+
+                       continue;
+               }
+
+               if (value < 0x10000)
+                       synth_buffer_add(value);
+       }
+
+       synth_start();
+}
+
 void synth_printf(const char *fmt, ...)
 {
        va_list args;
-       unsigned char buf[160], *p;
+       unsigned char buf[160];
        int r;
 
        va_start(args, fmt);
@@ -229,10 +314,7 @@ void synth_printf(const char *fmt, ...)
        if (r > sizeof(buf) - 1)
                r = sizeof(buf) - 1;
 
-       p = buf;
-       while (r--)
-               synth_buffer_add(*p++);
-       synth_start();
+       synth_writeu(buf, r);
 }
 EXPORT_SYMBOL_GPL(synth_printf);