Procházet zdrojové kódy

add `wcwidth` and `wcswidth`

blueloveTH před 3 měsíci
rodič
revize
6f9b7943d4

+ 3 - 0
include/typings/picoterm.pyi

@@ -3,3 +3,6 @@ def enable_full_buffering_mode() -> None:
 
 def split_ansi_escaped_string(s: str) -> list[str]:
     """Perform split on ANSI escaped string."""
+
+def wcwidth(c: int) -> int: ...
+def wcswidth(s: str) -> int: ...

+ 53 - 1
src/modules/picoterm.c

@@ -1,8 +1,20 @@
+#include "pocketpy/common/str.h"
 #include "pocketpy/pocketpy.h"
 #include "pocketpy/objects/base.h"
-#include <stdio.h>
 #include "pocketpy/common/vector.h"
 
+#include <stdio.h>
+
+const char* c11__u32_east_asian_width(int c);
+
+static int c11__wcwidth(int c) {
+    if(c >= 32 && c < 0x7f) return 1;
+    if(c < 32) return 0;
+    const char* w = c11__u32_east_asian_width(c);
+    bool fullwidth = (w[0] == 'F' && w[1] == '\0') || (w[0] == 'W' && w[1] == '\0');
+    return fullwidth ? 2 : 1;
+}
+
 static bool picoterm_enable_full_buffering_mode(int argc, py_Ref argv) {
     PY_CHECK_ARGC(0);
     static char buf[1024 * 32];  // 32KB
@@ -39,11 +51,51 @@ static bool picoterm_split_ansi_escaped_string(int argc, py_Ref argv) {
     return true;
 }
 
+static bool picoterm_wcwidth(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    PY_CHECK_ARG_TYPE(0, tp_int);
+    int c = py_toint(py_arg(0));
+    py_newint(py_retval(), c11__wcwidth(c));
+    return true;
+}
+
+static bool picoterm_wcswidth(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    PY_CHECK_ARG_TYPE(0, tp_str);
+    c11_sv sv = py_tosv(py_arg(0));
+    c11_vector /*T=AnsiEscapedToken*/ tokens;
+    c11_vector__ctor(&tokens, sizeof(AnsiEscapedToken));
+    if(!split_ansi_escaped_string(sv, &tokens)) {
+        c11_vector__dtor(&tokens);
+        return ValueError("invalid ANSI escape sequences");
+    }
+    int total_width = 0;
+    for(int i = 0; i < tokens.length; i++) {
+        AnsiEscapedToken* p_token = c11__at(AnsiEscapedToken, &tokens, i);
+        if(p_token->suffix != '\0') continue;
+        const char* curr_char = p_token->text.data;
+        const char* end_char = p_token->text.data + p_token->text.size;
+        while(curr_char < end_char) {
+            unsigned char c = *curr_char;
+            int u8bytes = c11__u8_header(c, true);
+            if(u8bytes == 0) return ValueError("invalid utf-8 header: %d", (int)c);
+            int value = c11__u8_value(u8bytes, curr_char);
+            total_width += c11__wcwidth(value);
+            curr_char += u8bytes;
+        }
+    }
+    c11_vector__dtor(&tokens);
+    py_newint(py_retval(), total_width);
+    return true;
+}
+
 void pk__add_module_picoterm() {
     py_Ref mod = py_newmodule("picoterm");
 
     py_bindfunc(mod, "enable_full_buffering_mode", picoterm_enable_full_buffering_mode);
     py_bindfunc(mod, "split_ansi_escaped_string", picoterm_split_ansi_escaped_string);
+    py_bindfunc(mod, "wcwidth", picoterm_wcwidth);
+    py_bindfunc(mod, "wcswidth", picoterm_wcswidth);
 }
 
 static bool split_ansi_escaped_string(c11_sv sv, c11_vector* out_tokens) {

+ 1 - 1
src/modules/unicodedata.c

@@ -1022,7 +1022,7 @@ const static c11_u32_range kEastAsianWidthRanges[] = {
 };
 // clang-format on
 
-const static char* c11__u32_east_asian_width(int c) {
+const char* c11__u32_east_asian_width(int c) {
     const char* data =
         c11__search_u32_ranges(c,
                                kEastAsianWidthRanges,

+ 8 - 1
tests/92_picoterm.py

@@ -20,4 +20,11 @@ cpnts = picoterm.split_ansi_escaped_string(text)
 assert cpnts == ['\x1b[3m', '\x1b[38;2;200;200;0m', '\x1b[48;2;78;118;164m', 'hello, ', '\n', 'world', '\x1b[0m', '\x1b[0m', '\x1b[0m', '123']
 
 cpnts_join = ''.join(cpnts)
-assert cpnts_join == text
+assert cpnts_join == text
+
+assert picoterm.wcwidth(ord('\n')) == 0
+assert picoterm.wcwidth(ord('a')) == 1
+assert picoterm.wcwidth(ord('测')) == 2
+assert picoterm.wcwidth(ord('👀')) == 2
+
+assert picoterm.wcswidth("hello, 测试a测试👀测\n") == 7 + 1 + 12