Sfoglia il codice sorgente

add `unicodedata` module

blueloveTH 10 mesi fa
parent
commit
3879903d73

+ 15 - 0
docs/modules/unicodedata.md

@@ -0,0 +1,15 @@
+---
+icon: package
+label: unicodedata
+---
+
+### `unicodedata.east_asian_width(char: str) -> str`
+
+Returns the East Asian width of a Unicode character. The width is one of the following values:
+
+- `F`: Fullwidth
+- `H`: Halfwidth
+- `N`: Neutral
+- `Na`: Narrow
+- `W`: Wide
+- `A`: Ambiguous

+ 7 - 0
include/pocketpy/common/str.h

@@ -21,6 +21,12 @@ typedef struct c11_bytes {
     unsigned char data[];  // flexible array member
 } c11_bytes;
 
+typedef struct {
+    int start;
+    int end;
+    char data[4];
+} c11_u32_range;
+
 bool c11_bytes__eq(c11_bytes* self, c11_bytes* other);
 
 int c11_sv__cmp(c11_sv self, c11_sv other);
@@ -66,6 +72,7 @@ int c11__unicode_index_to_byte(const char* data, int i);
 int c11__byte_index_to_unicode(const char* data, int n);
 
 bool c11__is_unicode_Lo_char(int c);
+const char* c11__search_u32_ranges(int c, const c11_u32_range* p, int n_ranges);
 int c11__u8_header(unsigned char c, bool suppress);
 int c11__u8_value(int u8bytes, const char* data);
 int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]);

+ 1 - 0
include/pocketpy/interpreter/modules.h

@@ -17,6 +17,7 @@ void pk__add_module_inspect();
 void pk__add_module_pickle();
 void pk__add_module_base64();
 void pk__add_module_importlib();
+void pk__add_module_unicodedata();
 
 void pk__add_module_vmath();
 void pk__add_module_array2d();

+ 46 - 0
scripts/gen_unicodedata.py

@@ -0,0 +1,46 @@
+import unicodedata
+from tqdm import trange
+from typing import Literal
+
+info = []
+
+for i in trange(0x110000):
+    char = chr(i)
+    category = unicodedata.category(char)
+    east_asian_width = unicodedata.east_asian_width(char)
+    info.append((i, category, east_asian_width))
+
+def merge(index: Literal[1, 2], filter):
+    # index = 1, category
+    # index = 2, east_asian_width
+    result: list[tuple[int, int, str]] = []
+    last_value = None
+    last_start = None
+    for i in range(len(info)):
+        value = info[i][index]
+        if value != last_value:
+            if last_value is not None:
+                result.append((last_start, i - 1, last_value))
+            last_value = value
+            last_start = i
+    if last_value is not None:
+        result.append((last_start, len(info) - 1, last_value))
+    return [x for x in result if filter(x[2])]
+
+df_category = merge(1, lambda x: x == 'Lo')
+df_east_asian_width = merge(2, lambda x: x != 'N')
+
+def to_c11(ranges, name, with_value=True):
+    with open(f'{name}.c', 'wt', encoding='utf-8', newline='\n') as f:
+        f.write(f'const static c11_u32_range {name}[] = {{\n')
+        for start, end, value in ranges:
+            if with_value:
+                f.write(f'    {{ {start}, {end}, "{value}\\0" }},\n')
+            else:
+                f.write(f'    {{ {start}, {end} }},\n')
+        f.write(f'}};\n')
+
+to_c11(df_category, 'kLoRanges', with_value=False)
+to_c11(df_east_asian_width, 'kEastAsianWidthRanges', with_value=True)
+
+

File diff suppressed because it is too large
+ 0 - 1
src/common/str.c


+ 1 - 0
src/interpreter/vm.c

@@ -231,6 +231,7 @@ void VM__ctor(VM* self) {
     pk__add_module_pickle();
     pk__add_module_base64();
     pk__add_module_importlib();
+    pk__add_module_unicodedata();
 
     pk__add_module_conio();
     pk__add_module_lz4();    // optional

+ 1051 - 0
src/modules/unicodedata.c

@@ -0,0 +1,1051 @@
+#include "pocketpy/common/str.h"
+#include "pocketpy/pocketpy.h"
+
+const static c11_u32_range kEastAsianWidthRanges[] = {
+    {32,      126,     "Na\0"},
+    {161,     161,     "A\0" },
+    {162,     163,     "Na\0"},
+    {164,     164,     "A\0" },
+    {165,     166,     "Na\0"},
+    {167,     168,     "A\0" },
+    {170,     170,     "A\0" },
+    {172,     172,     "Na\0"},
+    {173,     174,     "A\0" },
+    {175,     175,     "Na\0"},
+    {176,     180,     "A\0" },
+    {182,     186,     "A\0" },
+    {188,     191,     "A\0" },
+    {198,     198,     "A\0" },
+    {208,     208,     "A\0" },
+    {215,     216,     "A\0" },
+    {222,     225,     "A\0" },
+    {230,     230,     "A\0" },
+    {232,     234,     "A\0" },
+    {236,     237,     "A\0" },
+    {240,     240,     "A\0" },
+    {242,     243,     "A\0" },
+    {247,     250,     "A\0" },
+    {252,     252,     "A\0" },
+    {254,     254,     "A\0" },
+    {257,     257,     "A\0" },
+    {273,     273,     "A\0" },
+    {275,     275,     "A\0" },
+    {283,     283,     "A\0" },
+    {294,     295,     "A\0" },
+    {299,     299,     "A\0" },
+    {305,     307,     "A\0" },
+    {312,     312,     "A\0" },
+    {319,     322,     "A\0" },
+    {324,     324,     "A\0" },
+    {328,     331,     "A\0" },
+    {333,     333,     "A\0" },
+    {338,     339,     "A\0" },
+    {358,     359,     "A\0" },
+    {363,     363,     "A\0" },
+    {462,     462,     "A\0" },
+    {464,     464,     "A\0" },
+    {466,     466,     "A\0" },
+    {468,     468,     "A\0" },
+    {470,     470,     "A\0" },
+    {472,     472,     "A\0" },
+    {474,     474,     "A\0" },
+    {476,     476,     "A\0" },
+    {593,     593,     "A\0" },
+    {609,     609,     "A\0" },
+    {708,     708,     "A\0" },
+    {711,     711,     "A\0" },
+    {713,     715,     "A\0" },
+    {717,     717,     "A\0" },
+    {720,     720,     "A\0" },
+    {728,     731,     "A\0" },
+    {733,     733,     "A\0" },
+    {735,     735,     "A\0" },
+    {768,     879,     "A\0" },
+    {888,     889,     "F\0" },
+    {896,     899,     "F\0" },
+    {907,     907,     "F\0" },
+    {909,     909,     "F\0" },
+    {913,     929,     "A\0" },
+    {930,     930,     "F\0" },
+    {931,     937,     "A\0" },
+    {945,     961,     "A\0" },
+    {963,     969,     "A\0" },
+    {1025,    1025,    "A\0" },
+    {1040,    1103,    "A\0" },
+    {1105,    1105,    "A\0" },
+    {1328,    1328,    "F\0" },
+    {1367,    1368,    "F\0" },
+    {1419,    1420,    "F\0" },
+    {1424,    1424,    "F\0" },
+    {1480,    1487,    "F\0" },
+    {1515,    1518,    "F\0" },
+    {1525,    1535,    "F\0" },
+    {1806,    1806,    "F\0" },
+    {1867,    1868,    "F\0" },
+    {1970,    1983,    "F\0" },
+    {2043,    2044,    "F\0" },
+    {2094,    2095,    "F\0" },
+    {2111,    2111,    "F\0" },
+    {2140,    2141,    "F\0" },
+    {2143,    2143,    "F\0" },
+    {2155,    2159,    "F\0" },
+    {2191,    2191,    "F\0" },
+    {2194,    2199,    "F\0" },
+    {2436,    2436,    "F\0" },
+    {2445,    2446,    "F\0" },
+    {2449,    2450,    "F\0" },
+    {2473,    2473,    "F\0" },
+    {2481,    2481,    "F\0" },
+    {2483,    2485,    "F\0" },
+    {2490,    2491,    "F\0" },
+    {2501,    2502,    "F\0" },
+    {2505,    2506,    "F\0" },
+    {2511,    2518,    "F\0" },
+    {2520,    2523,    "F\0" },
+    {2526,    2526,    "F\0" },
+    {2532,    2533,    "F\0" },
+    {2559,    2560,    "F\0" },
+    {2564,    2564,    "F\0" },
+    {2571,    2574,    "F\0" },
+    {2577,    2578,    "F\0" },
+    {2601,    2601,    "F\0" },
+    {2609,    2609,    "F\0" },
+    {2612,    2612,    "F\0" },
+    {2615,    2615,    "F\0" },
+    {2618,    2619,    "F\0" },
+    {2621,    2621,    "F\0" },
+    {2627,    2630,    "F\0" },
+    {2633,    2634,    "F\0" },
+    {2638,    2640,    "F\0" },
+    {2642,    2648,    "F\0" },
+    {2653,    2653,    "F\0" },
+    {2655,    2661,    "F\0" },
+    {2679,    2688,    "F\0" },
+    {2692,    2692,    "F\0" },
+    {2702,    2702,    "F\0" },
+    {2706,    2706,    "F\0" },
+    {2729,    2729,    "F\0" },
+    {2737,    2737,    "F\0" },
+    {2740,    2740,    "F\0" },
+    {2746,    2747,    "F\0" },
+    {2758,    2758,    "F\0" },
+    {2762,    2762,    "F\0" },
+    {2766,    2767,    "F\0" },
+    {2769,    2783,    "F\0" },
+    {2788,    2789,    "F\0" },
+    {2802,    2808,    "F\0" },
+    {2816,    2816,    "F\0" },
+    {2820,    2820,    "F\0" },
+    {2829,    2830,    "F\0" },
+    {2833,    2834,    "F\0" },
+    {2857,    2857,    "F\0" },
+    {2865,    2865,    "F\0" },
+    {2868,    2868,    "F\0" },
+    {2874,    2875,    "F\0" },
+    {2885,    2886,    "F\0" },
+    {2889,    2890,    "F\0" },
+    {2894,    2900,    "F\0" },
+    {2904,    2907,    "F\0" },
+    {2910,    2910,    "F\0" },
+    {2916,    2917,    "F\0" },
+    {2936,    2945,    "F\0" },
+    {2948,    2948,    "F\0" },
+    {2955,    2957,    "F\0" },
+    {2961,    2961,    "F\0" },
+    {2966,    2968,    "F\0" },
+    {2971,    2971,    "F\0" },
+    {2973,    2973,    "F\0" },
+    {2976,    2978,    "F\0" },
+    {2981,    2983,    "F\0" },
+    {2987,    2989,    "F\0" },
+    {3002,    3005,    "F\0" },
+    {3011,    3013,    "F\0" },
+    {3017,    3017,    "F\0" },
+    {3022,    3023,    "F\0" },
+    {3025,    3030,    "F\0" },
+    {3032,    3045,    "F\0" },
+    {3067,    3071,    "F\0" },
+    {3085,    3085,    "F\0" },
+    {3089,    3089,    "F\0" },
+    {3113,    3113,    "F\0" },
+    {3130,    3131,    "F\0" },
+    {3141,    3141,    "F\0" },
+    {3145,    3145,    "F\0" },
+    {3150,    3156,    "F\0" },
+    {3159,    3159,    "F\0" },
+    {3163,    3164,    "F\0" },
+    {3166,    3167,    "F\0" },
+    {3172,    3173,    "F\0" },
+    {3184,    3190,    "F\0" },
+    {3213,    3213,    "F\0" },
+    {3217,    3217,    "F\0" },
+    {3241,    3241,    "F\0" },
+    {3252,    3252,    "F\0" },
+    {3258,    3259,    "F\0" },
+    {3269,    3269,    "F\0" },
+    {3273,    3273,    "F\0" },
+    {3278,    3284,    "F\0" },
+    {3287,    3292,    "F\0" },
+    {3295,    3295,    "F\0" },
+    {3300,    3301,    "F\0" },
+    {3312,    3312,    "F\0" },
+    {3315,    3327,    "F\0" },
+    {3341,    3341,    "F\0" },
+    {3345,    3345,    "F\0" },
+    {3397,    3397,    "F\0" },
+    {3401,    3401,    "F\0" },
+    {3408,    3411,    "F\0" },
+    {3428,    3429,    "F\0" },
+    {3456,    3456,    "F\0" },
+    {3460,    3460,    "F\0" },
+    {3479,    3481,    "F\0" },
+    {3506,    3506,    "F\0" },
+    {3516,    3516,    "F\0" },
+    {3518,    3519,    "F\0" },
+    {3527,    3529,    "F\0" },
+    {3531,    3534,    "F\0" },
+    {3541,    3541,    "F\0" },
+    {3543,    3543,    "F\0" },
+    {3552,    3557,    "F\0" },
+    {3568,    3569,    "F\0" },
+    {3573,    3584,    "F\0" },
+    {3643,    3646,    "F\0" },
+    {3676,    3712,    "F\0" },
+    {3715,    3715,    "F\0" },
+    {3717,    3717,    "F\0" },
+    {3723,    3723,    "F\0" },
+    {3748,    3748,    "F\0" },
+    {3750,    3750,    "F\0" },
+    {3774,    3775,    "F\0" },
+    {3781,    3781,    "F\0" },
+    {3783,    3783,    "F\0" },
+    {3790,    3791,    "F\0" },
+    {3802,    3803,    "F\0" },
+    {3808,    3839,    "F\0" },
+    {3912,    3912,    "F\0" },
+    {3949,    3952,    "F\0" },
+    {3992,    3992,    "F\0" },
+    {4029,    4029,    "F\0" },
+    {4045,    4045,    "F\0" },
+    {4059,    4095,    "F\0" },
+    {4294,    4294,    "F\0" },
+    {4296,    4300,    "F\0" },
+    {4302,    4303,    "F\0" },
+    {4352,    4447,    "W\0" },
+    {4681,    4681,    "F\0" },
+    {4686,    4687,    "F\0" },
+    {4695,    4695,    "F\0" },
+    {4697,    4697,    "F\0" },
+    {4702,    4703,    "F\0" },
+    {4745,    4745,    "F\0" },
+    {4750,    4751,    "F\0" },
+    {4785,    4785,    "F\0" },
+    {4790,    4791,    "F\0" },
+    {4799,    4799,    "F\0" },
+    {4801,    4801,    "F\0" },
+    {4806,    4807,    "F\0" },
+    {4823,    4823,    "F\0" },
+    {4881,    4881,    "F\0" },
+    {4886,    4887,    "F\0" },
+    {4955,    4956,    "F\0" },
+    {4989,    4991,    "F\0" },
+    {5018,    5023,    "F\0" },
+    {5110,    5111,    "F\0" },
+    {5118,    5119,    "F\0" },
+    {5789,    5791,    "F\0" },
+    {5881,    5887,    "F\0" },
+    {5910,    5918,    "F\0" },
+    {5943,    5951,    "F\0" },
+    {5972,    5983,    "F\0" },
+    {5997,    5997,    "F\0" },
+    {6001,    6001,    "F\0" },
+    {6004,    6015,    "F\0" },
+    {6110,    6111,    "F\0" },
+    {6122,    6127,    "F\0" },
+    {6138,    6143,    "F\0" },
+    {6170,    6175,    "F\0" },
+    {6265,    6271,    "F\0" },
+    {6315,    6319,    "F\0" },
+    {6390,    6399,    "F\0" },
+    {6431,    6431,    "F\0" },
+    {6444,    6447,    "F\0" },
+    {6460,    6463,    "F\0" },
+    {6465,    6467,    "F\0" },
+    {6510,    6511,    "F\0" },
+    {6517,    6527,    "F\0" },
+    {6572,    6575,    "F\0" },
+    {6602,    6607,    "F\0" },
+    {6619,    6621,    "F\0" },
+    {6684,    6685,    "F\0" },
+    {6751,    6751,    "F\0" },
+    {6781,    6782,    "F\0" },
+    {6794,    6799,    "F\0" },
+    {6810,    6815,    "F\0" },
+    {6830,    6831,    "F\0" },
+    {6863,    6911,    "F\0" },
+    {6989,    6991,    "F\0" },
+    {7039,    7039,    "F\0" },
+    {7156,    7163,    "F\0" },
+    {7224,    7226,    "F\0" },
+    {7242,    7244,    "F\0" },
+    {7305,    7311,    "F\0" },
+    {7355,    7356,    "F\0" },
+    {7368,    7375,    "F\0" },
+    {7419,    7423,    "F\0" },
+    {7958,    7959,    "F\0" },
+    {7966,    7967,    "F\0" },
+    {8006,    8007,    "F\0" },
+    {8014,    8015,    "F\0" },
+    {8024,    8024,    "F\0" },
+    {8026,    8026,    "F\0" },
+    {8028,    8028,    "F\0" },
+    {8030,    8030,    "F\0" },
+    {8062,    8063,    "F\0" },
+    {8117,    8117,    "F\0" },
+    {8133,    8133,    "F\0" },
+    {8148,    8149,    "F\0" },
+    {8156,    8156,    "F\0" },
+    {8176,    8177,    "F\0" },
+    {8181,    8181,    "F\0" },
+    {8191,    8191,    "F\0" },
+    {8208,    8208,    "A\0" },
+    {8211,    8214,    "A\0" },
+    {8216,    8217,    "A\0" },
+    {8220,    8221,    "A\0" },
+    {8224,    8226,    "A\0" },
+    {8228,    8231,    "A\0" },
+    {8240,    8240,    "A\0" },
+    {8242,    8243,    "A\0" },
+    {8245,    8245,    "A\0" },
+    {8251,    8251,    "A\0" },
+    {8254,    8254,    "A\0" },
+    {8293,    8293,    "F\0" },
+    {8306,    8307,    "F\0" },
+    {8308,    8308,    "A\0" },
+    {8319,    8319,    "A\0" },
+    {8321,    8324,    "A\0" },
+    {8335,    8335,    "F\0" },
+    {8349,    8351,    "F\0" },
+    {8361,    8361,    "H\0" },
+    {8364,    8364,    "A\0" },
+    {8385,    8399,    "F\0" },
+    {8433,    8447,    "F\0" },
+    {8451,    8451,    "A\0" },
+    {8453,    8453,    "A\0" },
+    {8457,    8457,    "A\0" },
+    {8467,    8467,    "A\0" },
+    {8470,    8470,    "A\0" },
+    {8481,    8482,    "A\0" },
+    {8486,    8486,    "A\0" },
+    {8491,    8491,    "A\0" },
+    {8531,    8532,    "A\0" },
+    {8539,    8542,    "A\0" },
+    {8544,    8555,    "A\0" },
+    {8560,    8569,    "A\0" },
+    {8585,    8585,    "A\0" },
+    {8588,    8591,    "F\0" },
+    {8592,    8601,    "A\0" },
+    {8632,    8633,    "A\0" },
+    {8658,    8658,    "A\0" },
+    {8660,    8660,    "A\0" },
+    {8679,    8679,    "A\0" },
+    {8704,    8704,    "A\0" },
+    {8706,    8707,    "A\0" },
+    {8711,    8712,    "A\0" },
+    {8715,    8715,    "A\0" },
+    {8719,    8719,    "A\0" },
+    {8721,    8721,    "A\0" },
+    {8725,    8725,    "A\0" },
+    {8730,    8730,    "A\0" },
+    {8733,    8736,    "A\0" },
+    {8739,    8739,    "A\0" },
+    {8741,    8741,    "A\0" },
+    {8743,    8748,    "A\0" },
+    {8750,    8750,    "A\0" },
+    {8756,    8759,    "A\0" },
+    {8764,    8765,    "A\0" },
+    {8776,    8776,    "A\0" },
+    {8780,    8780,    "A\0" },
+    {8786,    8786,    "A\0" },
+    {8800,    8801,    "A\0" },
+    {8804,    8807,    "A\0" },
+    {8810,    8811,    "A\0" },
+    {8814,    8815,    "A\0" },
+    {8834,    8835,    "A\0" },
+    {8838,    8839,    "A\0" },
+    {8853,    8853,    "A\0" },
+    {8857,    8857,    "A\0" },
+    {8869,    8869,    "A\0" },
+    {8895,    8895,    "A\0" },
+    {8978,    8978,    "A\0" },
+    {8986,    8987,    "W\0" },
+    {9001,    9002,    "W\0" },
+    {9193,    9196,    "W\0" },
+    {9200,    9200,    "W\0" },
+    {9203,    9203,    "W\0" },
+    {9255,    9279,    "F\0" },
+    {9291,    9311,    "F\0" },
+    {9312,    9449,    "A\0" },
+    {9451,    9547,    "A\0" },
+    {9552,    9587,    "A\0" },
+    {9600,    9615,    "A\0" },
+    {9618,    9621,    "A\0" },
+    {9632,    9633,    "A\0" },
+    {9635,    9641,    "A\0" },
+    {9650,    9651,    "A\0" },
+    {9654,    9655,    "A\0" },
+    {9660,    9661,    "A\0" },
+    {9664,    9665,    "A\0" },
+    {9670,    9672,    "A\0" },
+    {9675,    9675,    "A\0" },
+    {9678,    9681,    "A\0" },
+    {9698,    9701,    "A\0" },
+    {9711,    9711,    "A\0" },
+    {9725,    9726,    "W\0" },
+    {9733,    9734,    "A\0" },
+    {9737,    9737,    "A\0" },
+    {9742,    9743,    "A\0" },
+    {9748,    9749,    "W\0" },
+    {9756,    9756,    "A\0" },
+    {9758,    9758,    "A\0" },
+    {9792,    9792,    "A\0" },
+    {9794,    9794,    "A\0" },
+    {9800,    9811,    "W\0" },
+    {9824,    9825,    "A\0" },
+    {9827,    9829,    "A\0" },
+    {9831,    9834,    "A\0" },
+    {9836,    9837,    "A\0" },
+    {9839,    9839,    "A\0" },
+    {9855,    9855,    "W\0" },
+    {9875,    9875,    "W\0" },
+    {9886,    9887,    "A\0" },
+    {9889,    9889,    "W\0" },
+    {9898,    9899,    "W\0" },
+    {9917,    9918,    "W\0" },
+    {9919,    9919,    "A\0" },
+    {9924,    9925,    "W\0" },
+    {9926,    9933,    "A\0" },
+    {9934,    9934,    "W\0" },
+    {9935,    9939,    "A\0" },
+    {9940,    9940,    "W\0" },
+    {9941,    9953,    "A\0" },
+    {9955,    9955,    "A\0" },
+    {9960,    9961,    "A\0" },
+    {9962,    9962,    "W\0" },
+    {9963,    9969,    "A\0" },
+    {9970,    9971,    "W\0" },
+    {9972,    9972,    "A\0" },
+    {9973,    9973,    "W\0" },
+    {9974,    9977,    "A\0" },
+    {9978,    9978,    "W\0" },
+    {9979,    9980,    "A\0" },
+    {9981,    9981,    "W\0" },
+    {9982,    9983,    "A\0" },
+    {9989,    9989,    "W\0" },
+    {9994,    9995,    "W\0" },
+    {10024,   10024,   "W\0" },
+    {10045,   10045,   "A\0" },
+    {10060,   10060,   "W\0" },
+    {10062,   10062,   "W\0" },
+    {10067,   10069,   "W\0" },
+    {10071,   10071,   "W\0" },
+    {10102,   10111,   "A\0" },
+    {10133,   10135,   "W\0" },
+    {10160,   10160,   "W\0" },
+    {10175,   10175,   "W\0" },
+    {10214,   10221,   "Na\0"},
+    {10629,   10630,   "Na\0"},
+    {11035,   11036,   "W\0" },
+    {11088,   11088,   "W\0" },
+    {11093,   11093,   "W\0" },
+    {11094,   11097,   "A\0" },
+    {11124,   11125,   "F\0" },
+    {11158,   11158,   "F\0" },
+    {11508,   11512,   "F\0" },
+    {11558,   11558,   "F\0" },
+    {11560,   11564,   "F\0" },
+    {11566,   11567,   "F\0" },
+    {11624,   11630,   "F\0" },
+    {11633,   11646,   "F\0" },
+    {11671,   11679,   "F\0" },
+    {11687,   11687,   "F\0" },
+    {11695,   11695,   "F\0" },
+    {11703,   11703,   "F\0" },
+    {11711,   11711,   "F\0" },
+    {11719,   11719,   "F\0" },
+    {11727,   11727,   "F\0" },
+    {11735,   11735,   "F\0" },
+    {11743,   11743,   "F\0" },
+    {11870,   11903,   "F\0" },
+    {11904,   11929,   "W\0" },
+    {11930,   11930,   "F\0" },
+    {11931,   12019,   "W\0" },
+    {12020,   12031,   "F\0" },
+    {12032,   12245,   "W\0" },
+    {12246,   12271,   "F\0" },
+    {12272,   12283,   "W\0" },
+    {12284,   12288,   "F\0" },
+    {12289,   12350,   "W\0" },
+    {12352,   12352,   "F\0" },
+    {12353,   12438,   "W\0" },
+    {12439,   12440,   "F\0" },
+    {12441,   12543,   "W\0" },
+    {12544,   12548,   "F\0" },
+    {12549,   12591,   "W\0" },
+    {12592,   12592,   "F\0" },
+    {12593,   12686,   "W\0" },
+    {12687,   12687,   "F\0" },
+    {12688,   12771,   "W\0" },
+    {12772,   12783,   "F\0" },
+    {12784,   12830,   "W\0" },
+    {12831,   12831,   "F\0" },
+    {12832,   12871,   "W\0" },
+    {12872,   12879,   "A\0" },
+    {12880,   19903,   "W\0" },
+    {19968,   42124,   "W\0" },
+    {42125,   42127,   "F\0" },
+    {42128,   42182,   "W\0" },
+    {42183,   42191,   "F\0" },
+    {42540,   42559,   "F\0" },
+    {42744,   42751,   "F\0" },
+    {42955,   42959,   "F\0" },
+    {42962,   42962,   "F\0" },
+    {42964,   42964,   "F\0" },
+    {42970,   42993,   "F\0" },
+    {43053,   43055,   "F\0" },
+    {43066,   43071,   "F\0" },
+    {43128,   43135,   "F\0" },
+    {43206,   43213,   "F\0" },
+    {43226,   43231,   "F\0" },
+    {43348,   43358,   "F\0" },
+    {43360,   43388,   "W\0" },
+    {43389,   43391,   "F\0" },
+    {43470,   43470,   "F\0" },
+    {43482,   43485,   "F\0" },
+    {43519,   43519,   "F\0" },
+    {43575,   43583,   "F\0" },
+    {43598,   43599,   "F\0" },
+    {43610,   43611,   "F\0" },
+    {43715,   43738,   "F\0" },
+    {43767,   43776,   "F\0" },
+    {43783,   43784,   "F\0" },
+    {43791,   43792,   "F\0" },
+    {43799,   43807,   "F\0" },
+    {43815,   43815,   "F\0" },
+    {43823,   43823,   "F\0" },
+    {43884,   43887,   "F\0" },
+    {44014,   44015,   "F\0" },
+    {44026,   44031,   "F\0" },
+    {44032,   55203,   "W\0" },
+    {55204,   55215,   "F\0" },
+    {55239,   55242,   "F\0" },
+    {55292,   55295,   "F\0" },
+    {57344,   63743,   "A\0" },
+    {63744,   64109,   "W\0" },
+    {64110,   64111,   "F\0" },
+    {64112,   64217,   "W\0" },
+    {64218,   64255,   "F\0" },
+    {64263,   64274,   "F\0" },
+    {64280,   64284,   "F\0" },
+    {64311,   64311,   "F\0" },
+    {64317,   64317,   "F\0" },
+    {64319,   64319,   "F\0" },
+    {64322,   64322,   "F\0" },
+    {64325,   64325,   "F\0" },
+    {64451,   64466,   "F\0" },
+    {64912,   64913,   "F\0" },
+    {64968,   64974,   "F\0" },
+    {64976,   65007,   "F\0" },
+    {65024,   65039,   "A\0" },
+    {65040,   65049,   "W\0" },
+    {65050,   65055,   "F\0" },
+    {65072,   65106,   "W\0" },
+    {65107,   65107,   "F\0" },
+    {65108,   65126,   "W\0" },
+    {65127,   65127,   "F\0" },
+    {65128,   65131,   "W\0" },
+    {65132,   65135,   "F\0" },
+    {65141,   65141,   "F\0" },
+    {65277,   65278,   "F\0" },
+    {65280,   65376,   "F\0" },
+    {65377,   65470,   "H\0" },
+    {65471,   65473,   "F\0" },
+    {65474,   65479,   "H\0" },
+    {65480,   65481,   "F\0" },
+    {65482,   65487,   "H\0" },
+    {65488,   65489,   "F\0" },
+    {65490,   65495,   "H\0" },
+    {65496,   65497,   "F\0" },
+    {65498,   65500,   "H\0" },
+    {65501,   65511,   "F\0" },
+    {65512,   65518,   "H\0" },
+    {65519,   65528,   "F\0" },
+    {65533,   65533,   "A\0" },
+    {65534,   65535,   "F\0" },
+    {65548,   65548,   "F\0" },
+    {65575,   65575,   "F\0" },
+    {65595,   65595,   "F\0" },
+    {65598,   65598,   "F\0" },
+    {65614,   65615,   "F\0" },
+    {65630,   65663,   "F\0" },
+    {65787,   65791,   "F\0" },
+    {65795,   65798,   "F\0" },
+    {65844,   65846,   "F\0" },
+    {65935,   65935,   "F\0" },
+    {65949,   65951,   "F\0" },
+    {65953,   65999,   "F\0" },
+    {66046,   66175,   "F\0" },
+    {66205,   66207,   "F\0" },
+    {66257,   66271,   "F\0" },
+    {66300,   66303,   "F\0" },
+    {66340,   66348,   "F\0" },
+    {66379,   66383,   "F\0" },
+    {66427,   66431,   "F\0" },
+    {66462,   66462,   "F\0" },
+    {66500,   66503,   "F\0" },
+    {66518,   66559,   "F\0" },
+    {66718,   66719,   "F\0" },
+    {66730,   66735,   "F\0" },
+    {66772,   66775,   "F\0" },
+    {66812,   66815,   "F\0" },
+    {66856,   66863,   "F\0" },
+    {66916,   66926,   "F\0" },
+    {66939,   66939,   "F\0" },
+    {66955,   66955,   "F\0" },
+    {66963,   66963,   "F\0" },
+    {66966,   66966,   "F\0" },
+    {66978,   66978,   "F\0" },
+    {66994,   66994,   "F\0" },
+    {67002,   67002,   "F\0" },
+    {67005,   67071,   "F\0" },
+    {67383,   67391,   "F\0" },
+    {67414,   67423,   "F\0" },
+    {67432,   67455,   "F\0" },
+    {67462,   67462,   "F\0" },
+    {67505,   67505,   "F\0" },
+    {67515,   67583,   "F\0" },
+    {67590,   67591,   "F\0" },
+    {67593,   67593,   "F\0" },
+    {67638,   67638,   "F\0" },
+    {67641,   67643,   "F\0" },
+    {67645,   67646,   "F\0" },
+    {67670,   67670,   "F\0" },
+    {67743,   67750,   "F\0" },
+    {67760,   67807,   "F\0" },
+    {67827,   67827,   "F\0" },
+    {67830,   67834,   "F\0" },
+    {67868,   67870,   "F\0" },
+    {67898,   67902,   "F\0" },
+    {67904,   67967,   "F\0" },
+    {68024,   68027,   "F\0" },
+    {68048,   68049,   "F\0" },
+    {68100,   68100,   "F\0" },
+    {68103,   68107,   "F\0" },
+    {68116,   68116,   "F\0" },
+    {68120,   68120,   "F\0" },
+    {68150,   68151,   "F\0" },
+    {68155,   68158,   "F\0" },
+    {68169,   68175,   "F\0" },
+    {68185,   68191,   "F\0" },
+    {68256,   68287,   "F\0" },
+    {68327,   68330,   "F\0" },
+    {68343,   68351,   "F\0" },
+    {68406,   68408,   "F\0" },
+    {68438,   68439,   "F\0" },
+    {68467,   68471,   "F\0" },
+    {68498,   68504,   "F\0" },
+    {68509,   68520,   "F\0" },
+    {68528,   68607,   "F\0" },
+    {68681,   68735,   "F\0" },
+    {68787,   68799,   "F\0" },
+    {68851,   68857,   "F\0" },
+    {68904,   68911,   "F\0" },
+    {68922,   69215,   "F\0" },
+    {69247,   69247,   "F\0" },
+    {69290,   69290,   "F\0" },
+    {69294,   69295,   "F\0" },
+    {69298,   69375,   "F\0" },
+    {69416,   69423,   "F\0" },
+    {69466,   69487,   "F\0" },
+    {69514,   69551,   "F\0" },
+    {69580,   69599,   "F\0" },
+    {69623,   69631,   "F\0" },
+    {69710,   69713,   "F\0" },
+    {69750,   69758,   "F\0" },
+    {69827,   69836,   "F\0" },
+    {69838,   69839,   "F\0" },
+    {69865,   69871,   "F\0" },
+    {69882,   69887,   "F\0" },
+    {69941,   69941,   "F\0" },
+    {69960,   69967,   "F\0" },
+    {70007,   70015,   "F\0" },
+    {70112,   70112,   "F\0" },
+    {70133,   70143,   "F\0" },
+    {70162,   70162,   "F\0" },
+    {70207,   70271,   "F\0" },
+    {70279,   70279,   "F\0" },
+    {70281,   70281,   "F\0" },
+    {70286,   70286,   "F\0" },
+    {70302,   70302,   "F\0" },
+    {70314,   70319,   "F\0" },
+    {70379,   70383,   "F\0" },
+    {70394,   70399,   "F\0" },
+    {70404,   70404,   "F\0" },
+    {70413,   70414,   "F\0" },
+    {70417,   70418,   "F\0" },
+    {70441,   70441,   "F\0" },
+    {70449,   70449,   "F\0" },
+    {70452,   70452,   "F\0" },
+    {70458,   70458,   "F\0" },
+    {70469,   70470,   "F\0" },
+    {70473,   70474,   "F\0" },
+    {70478,   70479,   "F\0" },
+    {70481,   70486,   "F\0" },
+    {70488,   70492,   "F\0" },
+    {70500,   70501,   "F\0" },
+    {70509,   70511,   "F\0" },
+    {70517,   70655,   "F\0" },
+    {70748,   70748,   "F\0" },
+    {70754,   70783,   "F\0" },
+    {70856,   70863,   "F\0" },
+    {70874,   71039,   "F\0" },
+    {71094,   71095,   "F\0" },
+    {71134,   71167,   "F\0" },
+    {71237,   71247,   "F\0" },
+    {71258,   71263,   "F\0" },
+    {71277,   71295,   "F\0" },
+    {71354,   71359,   "F\0" },
+    {71370,   71423,   "F\0" },
+    {71451,   71452,   "F\0" },
+    {71468,   71471,   "F\0" },
+    {71495,   71679,   "F\0" },
+    {71740,   71839,   "F\0" },
+    {71923,   71934,   "F\0" },
+    {71943,   71944,   "F\0" },
+    {71946,   71947,   "F\0" },
+    {71956,   71956,   "F\0" },
+    {71959,   71959,   "F\0" },
+    {71990,   71990,   "F\0" },
+    {71993,   71994,   "F\0" },
+    {72007,   72015,   "F\0" },
+    {72026,   72095,   "F\0" },
+    {72104,   72105,   "F\0" },
+    {72152,   72153,   "F\0" },
+    {72165,   72191,   "F\0" },
+    {72264,   72271,   "F\0" },
+    {72355,   72367,   "F\0" },
+    {72441,   72703,   "F\0" },
+    {72713,   72713,   "F\0" },
+    {72759,   72759,   "F\0" },
+    {72774,   72783,   "F\0" },
+    {72813,   72815,   "F\0" },
+    {72848,   72849,   "F\0" },
+    {72872,   72872,   "F\0" },
+    {72887,   72959,   "F\0" },
+    {72967,   72967,   "F\0" },
+    {72970,   72970,   "F\0" },
+    {73015,   73017,   "F\0" },
+    {73019,   73019,   "F\0" },
+    {73022,   73022,   "F\0" },
+    {73032,   73039,   "F\0" },
+    {73050,   73055,   "F\0" },
+    {73062,   73062,   "F\0" },
+    {73065,   73065,   "F\0" },
+    {73103,   73103,   "F\0" },
+    {73106,   73106,   "F\0" },
+    {73113,   73119,   "F\0" },
+    {73130,   73439,   "F\0" },
+    {73465,   73647,   "F\0" },
+    {73649,   73663,   "F\0" },
+    {73714,   73726,   "F\0" },
+    {74650,   74751,   "F\0" },
+    {74863,   74863,   "F\0" },
+    {74869,   74879,   "F\0" },
+    {75076,   77711,   "F\0" },
+    {77811,   77823,   "F\0" },
+    {78895,   78895,   "F\0" },
+    {78905,   82943,   "F\0" },
+    {83527,   92159,   "F\0" },
+    {92729,   92735,   "F\0" },
+    {92767,   92767,   "F\0" },
+    {92778,   92781,   "F\0" },
+    {92863,   92863,   "F\0" },
+    {92874,   92879,   "F\0" },
+    {92910,   92911,   "F\0" },
+    {92918,   92927,   "F\0" },
+    {92998,   93007,   "F\0" },
+    {93018,   93018,   "F\0" },
+    {93026,   93026,   "F\0" },
+    {93048,   93052,   "F\0" },
+    {93072,   93759,   "F\0" },
+    {93851,   93951,   "F\0" },
+    {94027,   94030,   "F\0" },
+    {94088,   94094,   "F\0" },
+    {94112,   94175,   "F\0" },
+    {94176,   94180,   "W\0" },
+    {94181,   94191,   "F\0" },
+    {94192,   94193,   "W\0" },
+    {94194,   94207,   "F\0" },
+    {94208,   100343,  "W\0" },
+    {100344,  100351,  "F\0" },
+    {100352,  101589,  "W\0" },
+    {101590,  101631,  "F\0" },
+    {101632,  101640,  "W\0" },
+    {101641,  110575,  "F\0" },
+    {110576,  110579,  "W\0" },
+    {110580,  110580,  "F\0" },
+    {110581,  110587,  "W\0" },
+    {110588,  110588,  "F\0" },
+    {110589,  110590,  "W\0" },
+    {110591,  110591,  "F\0" },
+    {110592,  110882,  "W\0" },
+    {110883,  110927,  "F\0" },
+    {110928,  110930,  "W\0" },
+    {110931,  110947,  "F\0" },
+    {110948,  110951,  "W\0" },
+    {110952,  110959,  "F\0" },
+    {110960,  111355,  "W\0" },
+    {111356,  113663,  "F\0" },
+    {113771,  113775,  "F\0" },
+    {113789,  113791,  "F\0" },
+    {113801,  113807,  "F\0" },
+    {113818,  113819,  "F\0" },
+    {113828,  118527,  "F\0" },
+    {118574,  118575,  "F\0" },
+    {118599,  118607,  "F\0" },
+    {118724,  118783,  "F\0" },
+    {119030,  119039,  "F\0" },
+    {119079,  119080,  "F\0" },
+    {119275,  119295,  "F\0" },
+    {119366,  119519,  "F\0" },
+    {119540,  119551,  "F\0" },
+    {119639,  119647,  "F\0" },
+    {119673,  119807,  "F\0" },
+    {119893,  119893,  "F\0" },
+    {119965,  119965,  "F\0" },
+    {119968,  119969,  "F\0" },
+    {119971,  119972,  "F\0" },
+    {119975,  119976,  "F\0" },
+    {119981,  119981,  "F\0" },
+    {119994,  119994,  "F\0" },
+    {119996,  119996,  "F\0" },
+    {120004,  120004,  "F\0" },
+    {120070,  120070,  "F\0" },
+    {120075,  120076,  "F\0" },
+    {120085,  120085,  "F\0" },
+    {120093,  120093,  "F\0" },
+    {120122,  120122,  "F\0" },
+    {120127,  120127,  "F\0" },
+    {120133,  120133,  "F\0" },
+    {120135,  120137,  "F\0" },
+    {120145,  120145,  "F\0" },
+    {120486,  120487,  "F\0" },
+    {120780,  120781,  "F\0" },
+    {121484,  121498,  "F\0" },
+    {121504,  121504,  "F\0" },
+    {121520,  122623,  "F\0" },
+    {122655,  122879,  "F\0" },
+    {122887,  122887,  "F\0" },
+    {122905,  122906,  "F\0" },
+    {122914,  122914,  "F\0" },
+    {122917,  122917,  "F\0" },
+    {122923,  123135,  "F\0" },
+    {123181,  123183,  "F\0" },
+    {123198,  123199,  "F\0" },
+    {123210,  123213,  "F\0" },
+    {123216,  123535,  "F\0" },
+    {123567,  123583,  "F\0" },
+    {123642,  123646,  "F\0" },
+    {123648,  124895,  "F\0" },
+    {124903,  124903,  "F\0" },
+    {124908,  124908,  "F\0" },
+    {124911,  124911,  "F\0" },
+    {124927,  124927,  "F\0" },
+    {125125,  125126,  "F\0" },
+    {125143,  125183,  "F\0" },
+    {125260,  125263,  "F\0" },
+    {125274,  125277,  "F\0" },
+    {125280,  126064,  "F\0" },
+    {126133,  126208,  "F\0" },
+    {126270,  126463,  "F\0" },
+    {126468,  126468,  "F\0" },
+    {126496,  126496,  "F\0" },
+    {126499,  126499,  "F\0" },
+    {126501,  126502,  "F\0" },
+    {126504,  126504,  "F\0" },
+    {126515,  126515,  "F\0" },
+    {126520,  126520,  "F\0" },
+    {126522,  126522,  "F\0" },
+    {126524,  126529,  "F\0" },
+    {126531,  126534,  "F\0" },
+    {126536,  126536,  "F\0" },
+    {126538,  126538,  "F\0" },
+    {126540,  126540,  "F\0" },
+    {126544,  126544,  "F\0" },
+    {126547,  126547,  "F\0" },
+    {126549,  126550,  "F\0" },
+    {126552,  126552,  "F\0" },
+    {126554,  126554,  "F\0" },
+    {126556,  126556,  "F\0" },
+    {126558,  126558,  "F\0" },
+    {126560,  126560,  "F\0" },
+    {126563,  126563,  "F\0" },
+    {126565,  126566,  "F\0" },
+    {126571,  126571,  "F\0" },
+    {126579,  126579,  "F\0" },
+    {126584,  126584,  "F\0" },
+    {126589,  126589,  "F\0" },
+    {126591,  126591,  "F\0" },
+    {126602,  126602,  "F\0" },
+    {126620,  126624,  "F\0" },
+    {126628,  126628,  "F\0" },
+    {126634,  126634,  "F\0" },
+    {126652,  126703,  "F\0" },
+    {126706,  126975,  "F\0" },
+    {126980,  126980,  "W\0" },
+    {127020,  127023,  "F\0" },
+    {127124,  127135,  "F\0" },
+    {127151,  127152,  "F\0" },
+    {127168,  127168,  "F\0" },
+    {127183,  127183,  "W\0" },
+    {127184,  127184,  "F\0" },
+    {127222,  127231,  "F\0" },
+    {127232,  127242,  "A\0" },
+    {127248,  127277,  "A\0" },
+    {127280,  127337,  "A\0" },
+    {127344,  127373,  "A\0" },
+    {127374,  127374,  "W\0" },
+    {127375,  127376,  "A\0" },
+    {127377,  127386,  "W\0" },
+    {127387,  127404,  "A\0" },
+    {127406,  127461,  "F\0" },
+    {127488,  127490,  "W\0" },
+    {127491,  127503,  "F\0" },
+    {127504,  127547,  "W\0" },
+    {127548,  127551,  "F\0" },
+    {127552,  127560,  "W\0" },
+    {127561,  127567,  "F\0" },
+    {127568,  127569,  "W\0" },
+    {127570,  127583,  "F\0" },
+    {127584,  127589,  "W\0" },
+    {127590,  127743,  "F\0" },
+    {127744,  127776,  "W\0" },
+    {127789,  127797,  "W\0" },
+    {127799,  127868,  "W\0" },
+    {127870,  127891,  "W\0" },
+    {127904,  127946,  "W\0" },
+    {127951,  127955,  "W\0" },
+    {127968,  127984,  "W\0" },
+    {127988,  127988,  "W\0" },
+    {127992,  128062,  "W\0" },
+    {128064,  128064,  "W\0" },
+    {128066,  128252,  "W\0" },
+    {128255,  128317,  "W\0" },
+    {128331,  128334,  "W\0" },
+    {128336,  128359,  "W\0" },
+    {128378,  128378,  "W\0" },
+    {128405,  128406,  "W\0" },
+    {128420,  128420,  "W\0" },
+    {128507,  128591,  "W\0" },
+    {128640,  128709,  "W\0" },
+    {128716,  128716,  "W\0" },
+    {128720,  128722,  "W\0" },
+    {128725,  128727,  "W\0" },
+    {128728,  128732,  "F\0" },
+    {128733,  128735,  "W\0" },
+    {128747,  128748,  "W\0" },
+    {128749,  128751,  "F\0" },
+    {128756,  128764,  "W\0" },
+    {128765,  128767,  "F\0" },
+    {128884,  128895,  "F\0" },
+    {128985,  128991,  "F\0" },
+    {128992,  129003,  "W\0" },
+    {129004,  129007,  "F\0" },
+    {129008,  129008,  "W\0" },
+    {129009,  129023,  "F\0" },
+    {129036,  129039,  "F\0" },
+    {129096,  129103,  "F\0" },
+    {129114,  129119,  "F\0" },
+    {129160,  129167,  "F\0" },
+    {129198,  129199,  "F\0" },
+    {129202,  129279,  "F\0" },
+    {129292,  129338,  "W\0" },
+    {129340,  129349,  "W\0" },
+    {129351,  129535,  "W\0" },
+    {129620,  129631,  "F\0" },
+    {129646,  129647,  "F\0" },
+    {129648,  129652,  "W\0" },
+    {129653,  129655,  "F\0" },
+    {129656,  129660,  "W\0" },
+    {129661,  129663,  "F\0" },
+    {129664,  129670,  "W\0" },
+    {129671,  129679,  "F\0" },
+    {129680,  129708,  "W\0" },
+    {129709,  129711,  "F\0" },
+    {129712,  129722,  "W\0" },
+    {129723,  129727,  "F\0" },
+    {129728,  129733,  "W\0" },
+    {129734,  129743,  "F\0" },
+    {129744,  129753,  "W\0" },
+    {129754,  129759,  "F\0" },
+    {129760,  129767,  "W\0" },
+    {129768,  129775,  "F\0" },
+    {129776,  129782,  "W\0" },
+    {129783,  129791,  "F\0" },
+    {129939,  129939,  "F\0" },
+    {129995,  130031,  "F\0" },
+    {130042,  131071,  "F\0" },
+    {131072,  173791,  "W\0" },
+    {173792,  173823,  "F\0" },
+    {173824,  177976,  "W\0" },
+    {177977,  177983,  "F\0" },
+    {177984,  178205,  "W\0" },
+    {178206,  178207,  "F\0" },
+    {178208,  183969,  "W\0" },
+    {183970,  183983,  "F\0" },
+    {183984,  191456,  "W\0" },
+    {191457,  194559,  "F\0" },
+    {194560,  195101,  "W\0" },
+    {195102,  196607,  "F\0" },
+    {196608,  201546,  "W\0" },
+    {201547,  917504,  "F\0" },
+    {917506,  917535,  "F\0" },
+    {917632,  917759,  "F\0" },
+    {917760,  917999,  "A\0" },
+    {918000,  983039,  "F\0" },
+    {983040,  1048573, "A\0" },
+    {1048574, 1048575, "F\0" },
+    {1048576, 1114109, "A\0" },
+    {1114110, 1114111, "F\0" },
+};
+
+const static char* c11__u32_east_asian_width(int c) {
+    const char* data =
+        c11__search_u32_ranges(c,
+                               kEastAsianWidthRanges,
+                               sizeof(kEastAsianWidthRanges) / sizeof(c11_u32_range));
+    if(data == NULL) return "N";
+    return data;
+}
+
+static bool unicodedata_east_asian_width(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    PY_CHECK_ARG_TYPE(0, tp_str);
+    c11_sv sv = py_tosv(py_arg(0));
+    if(c11_sv__u8_length(sv) != 1) {
+        return TypeError("east_asian_width() expected a character, but string of length %d found",
+                         c11_sv__u8_length(sv));
+    }
+    int u8bytes = c11__u8_header(sv.data[0], true);
+    if(u8bytes == 0) return ValueError("invalid utf-8 char: %c", sv.data[0]);
+    int value = c11__u8_value(u8bytes, sv.data);
+    const char* width = c11__u32_east_asian_width(value);
+    py_newstr(py_retval(), width);
+    return true;
+}
+
+void pk__add_module_unicodedata() {
+    py_Ref mod = py_newmodule("unicodedata");
+
+    py_bindfunc(mod, "east_asian_width", unicodedata_east_asian_width);
+}

+ 1 - 1
src/public/modules.c

@@ -472,7 +472,7 @@ static bool builtins_ord(int argc, py_Ref argv) {
                          c11_sv__u8_length(sv));
     }
     int u8bytes = c11__u8_header(sv.data[0], true);
-    if(u8bytes == 0) { return ValueError("invalid char: %c", sv.data[0]); }
+    if(u8bytes == 0) return ValueError("invalid utf-8 char: %c", sv.data[0]);
     int value = c11__u8_value(u8bytes, sv.data);
     py_newint(py_retval(), value);
     return true;

+ 17 - 0
tests/83_unicodedata.py

@@ -0,0 +1,17 @@
+from unicodedata import east_asian_width
+
+# full width
+assert east_asian_width("A") == "F"
+# half width
+assert east_asian_width("サ") == "H"
+# narrow
+assert east_asian_width("a") == "Na"
+# wide
+assert east_asian_width("测") == "W"
+assert east_asian_width("🥕") == "W"
+assert east_asian_width("。") == "W"
+# ambiguous
+assert east_asian_width("°") == "A"
+# neutral
+assert east_asian_width("\n") == "N"
+

Some files were not shown because too many files changed in this diff