Răsfoiți Sursa

add `pickle` module

blueloveTH 1 an în urmă
părinte
comite
5e5f2525b4

+ 0 - 1
include/pocketpy/common/_generated.h

@@ -12,6 +12,5 @@ extern const char kPythonLibs_datetime[];
 extern const char kPythonLibs_functools[];
 extern const char kPythonLibs_heapq[];
 extern const char kPythonLibs_operator[];
-extern const char kPythonLibs_pickle[];
 extern const char kPythonLibs_this[];
 extern const char kPythonLibs_typing[];

+ 1 - 0
include/pocketpy/interpreter/modules.h

@@ -12,6 +12,7 @@ void pk__add_module_easing();
 void pk__add_module_traceback();
 void pk__add_module_enum();
 void pk__add_module_inspect();
+void pk__add_module_pickle();
 
 void pk__add_module_linalg();
 void pk__add_module_array2d();

+ 4 - 1
include/pocketpy/pocketpy.h

@@ -580,7 +580,10 @@ PK_API bool py_len(py_Ref val) PY_RAISE PY_RETURN;
 PK_API bool py_json_dumps(py_Ref val) PY_RAISE PY_RETURN;
 /// Python equivalent to `json.loads(val)`.
 PK_API bool py_json_loads(const char* source) PY_RAISE PY_RETURN;
-
+/// Python equivalent to `pickle.dumps(val)`.
+PK_API bool py_pickle_dumps(py_Ref val) PY_RAISE PY_RETURN;
+/// Python equivalent to `pickle.loads(val)`.
+PK_API bool py_pickle_loads(const unsigned char* data, int size) PY_RAISE PY_RETURN;
 /************* Unchecked Functions *************/
 
 PK_API py_ObjectRef py_tuple_data(py_Ref self);

+ 0 - 182
python/pickle.py

@@ -1,182 +0,0 @@
-import json
-import builtins
-
-_BASIC_TYPES = [int, float, str, bool, type(None)]
-_MOD_T_SEP = "@"
-
-def _find_class(path: str):
-    if _MOD_T_SEP not in path:
-        return builtins.__dict__[path]
-    modpath, name = path.split(_MOD_T_SEP)
-    return __import__(modpath).__dict__[name]
-
-class _Pickler:
-    def __init__(self, obj) -> None:
-        self.obj = obj
-        self.raw_memo = {}  # id -> int
-        self.memo = []      # int -> object
-
-    @staticmethod
-    def _type_id(t: type):
-        assert type(t) is type
-        name = t.__name__
-        mod = t.__module__
-        if mod is not None:
-            name = mod + _MOD_T_SEP + name
-        return name
-
-    def wrap(self, o):
-        o_t = type(o)
-        if o_t in _BASIC_TYPES:
-            return o
-        if o_t is type:
-            return ["type", self._type_id(o)]
-
-        index = self.raw_memo.get(id(o), None)
-        if index is not None:
-            return [index]
-        
-        ret = []
-        index = len(self.memo)
-        self.memo.append(ret)
-        self.raw_memo[id(o)] = index
-
-        if o_t is tuple:
-            ret.append("tuple")
-            ret.append([self.wrap(i) for i in o])
-            return [index]
-        if o_t is bytes:
-            ret.append("bytes")
-            ret.append([o[j] for j in range(len(o))])
-            return [index]
-        if o_t is list:
-            ret.append("list")
-            ret.append([self.wrap(i) for i in o])
-            return [index]
-        if o_t is dict:
-            ret.append("dict")
-            ret.append([[self.wrap(k), self.wrap(v)] for k,v in o.items()])
-            return [index]
-        
-        _0 = self._type_id(o_t)
-
-        if getattr(o_t, '__struct__', False):
-            ret.append(_0)
-            ret.append(o.tostruct().hex())
-            return [index]
-
-        if hasattr(o, "__getnewargs__"):
-            _1 = o.__getnewargs__()     # an iterable
-            _1 = [self.wrap(i) for i in _1]
-        else:
-            _1 = None
-
-        if o.__dict__ is None:
-            _2 = None
-        else:
-            _2 = {k: self.wrap(v) for k,v in o.__dict__.items()}
-
-        ret.append(_0)  # type id
-        ret.append(_1)  # newargs
-        ret.append(_2)  # state
-        return [index]
-    
-    def run_pipe(self):
-        o = self.wrap(self.obj)
-        return [o, self.memo]
-
-
-
-class _Unpickler:
-    def __init__(self, obj, memo: list) -> None:
-        self.obj = obj
-        self.memo = memo
-        self._unwrapped = [None] * len(memo)
-
-    def tag(self, index, o):
-        assert self._unwrapped[index] is None
-        self._unwrapped[index] = o
-
-    def unwrap(self, o, index=None):
-        if type(o) in _BASIC_TYPES:
-            return o
-        assert type(o) is list
-
-        if o[0] == "type":
-            return _find_class(o[1])
-
-        # reference
-        if type(o[0]) is int:
-            assert index is None    # index should be None
-            index = o[0]
-            if self._unwrapped[index] is None:
-                o = self.memo[index]
-                assert type(o) is list
-                assert type(o[0]) is str
-                self.unwrap(o, index)
-                assert self._unwrapped[index] is not None
-            return self._unwrapped[index]
-        
-        # concrete reference type
-        if o[0] == "tuple":
-            ret = tuple([self.unwrap(i) for i in o[1]])
-            self.tag(index, ret)
-            return ret
-        if o[0] == "bytes":
-            ret = bytes(o[1])
-            self.tag(index, ret)
-            return ret
-        if o[0] == "list":
-            ret = []
-            self.tag(index, ret)
-            for i in o[1]:
-                ret.append(self.unwrap(i))
-            return ret
-        if o[0] == "dict":
-            ret = {}
-            self.tag(index, ret)
-            for k,v in o[1]:
-                ret[self.unwrap(k)] = self.unwrap(v)
-            return ret
-        
-        # generic object
-        cls = _find_class(o[0])
-        # if getattr(cls, '__struct__', False):
-        if False:
-            inst = cls.fromstruct(struct.fromhex(o[1]))
-            self.tag(index, inst)
-            return inst
-        else:
-            _, newargs, state = o
-            # create uninitialized instance
-            new_f = getattr(cls, "__new__")
-            if newargs is not None:
-                newargs = [self.unwrap(i) for i in newargs]
-                inst = new_f(cls, *newargs)
-            else:
-                inst = new_f(cls)
-            self.tag(index, inst)
-            # restore state
-            if state is not None:
-                for k,v in state.items():
-                    setattr(inst, k, self.unwrap(v))
-            return inst
-
-    def run_pipe(self):
-        return self.unwrap(self.obj)
-
-
-def _wrap(o):
-    return _Pickler(o).run_pipe()
-
-def _unwrap(packed: list):
-    return _Unpickler(*packed).run_pipe()
-
-def dumps(o) -> bytes:
-    o = _wrap(o)
-    return json.dumps(o).encode()
-
-def loads(b) -> object:
-    assert type(b) is bytes
-    o = json.loads(b.decode())
-    return _unwrap(o)

Fișier diff suprimat deoarece este prea mare
+ 0 - 0
src/common/_generated.c


+ 1 - 0
src/interpreter/vm.c

@@ -216,6 +216,7 @@ void VM__ctor(VM* self) {
     pk__add_module_traceback();
     pk__add_module_enum();
     pk__add_module_inspect();
+    pk__add_module_pickle();
 
     pk__add_module_conio();
     pk__add_module_pkpy();

+ 428 - 0
src/modules/pickle.c

@@ -0,0 +1,428 @@
+#include "pocketpy/common/vector.h"
+#include "pocketpy/pocketpy.h"
+
+#include "pocketpy/common/utils.h"
+#include "pocketpy/common/sstream.h"
+#include "pocketpy/interpreter/vm.h"
+#include <stdint.h>
+
+typedef enum {
+    // clang-format off
+    PKL_NONE,
+    PKL_INT8, PKL_INT16, PKL_INT32, PKL_INT64,
+    PKL_FLOAT32, PKL_FLOAT64,
+    PKL_TRUE, PKL_FALSE,
+    PKL_STRING, PKL_BYTES,
+    PKL_BUILD_LIST,
+    PKL_BUILD_TUPLE,
+    PKL_BUILD_DICT,
+    PKL_VEC2, PKL_VEC3,
+    PKL_VEC2I, PKL_VEC3I,
+    PKL_TYPE,
+    PKL_EOF,
+    // clang-format on
+} PickleOp;
+
+typedef struct {
+    c11_vector /*T=char*/ codes;
+} PickleObject;
+
+static void PickleObject__ctor(PickleObject* self) { c11_vector__ctor(&self->codes, sizeof(char)); }
+
+static void PickleObject__dtor(PickleObject* self) { c11_vector__dtor(&self->codes); }
+
+static void PickleObject__py_submit(PickleObject* self, py_OutRef out) {
+    int size;
+    unsigned char* data = c11_vector__submit(&self->codes, &size);
+    unsigned char* out_data = py_newbytes(out, size);
+    memcpy(out_data, data, size);
+}
+
+static void PickleObject__write_bytes(PickleObject* buf, const void* data, int size) {
+    c11_vector__extend(char, &buf->codes, data, size);
+}
+
+static void pkl__emit_op(PickleObject* buf, PickleOp op) {
+    c11_vector__push(char, &buf->codes, op);
+}
+
+static void pkl__emit_int(PickleObject* buf, py_i64 val) {
+    if(val >= INT8_MIN && val <= INT8_MAX) {
+        pkl__emit_op(buf, PKL_INT8);
+        PickleObject__write_bytes(buf, &val, 1);
+    } else if(val >= INT16_MIN && val <= INT16_MAX) {
+        pkl__emit_op(buf, PKL_INT16);
+        PickleObject__write_bytes(buf, &val, 2);
+    } else if(val >= INT32_MIN && val <= INT32_MAX) {
+        pkl__emit_op(buf, PKL_INT32);
+        PickleObject__write_bytes(buf, &val, 4);
+    } else {
+        pkl__emit_op(buf, PKL_INT64);
+        PickleObject__write_bytes(buf, &val, 8);
+    }
+}
+
+#define UNALIGNED_READ(p_val, p_buf)                                                               \
+    do {                                                                                           \
+        memcpy((p_val), (p_buf), sizeof(*(p_val)));                                                \
+        (p_buf) += sizeof(*(p_val));                                                               \
+    } while(0)
+
+static py_i64 pkl__read_int(const unsigned char** p) {
+    PickleOp op = (PickleOp) * *p;
+    (*p)++;
+    switch(op) {
+        case PKL_INT8: {
+            int8_t val;
+            UNALIGNED_READ(&val, *p);
+            return val;
+        }
+        case PKL_INT16: {
+            int16_t val;
+            UNALIGNED_READ(&val, *p);
+            return val;
+        }
+        case PKL_INT32: {
+            int32_t val;
+            UNALIGNED_READ(&val, *p);
+            return val;
+        }
+        case PKL_INT64: {
+            int64_t val;
+            UNALIGNED_READ(&val, *p);
+            return val;
+        }
+        default: c11__abort("pkl__read_int(): invalid op: %d", op);
+    }
+}
+
+const static char* pkl__read_cstr(const unsigned char** p) {
+    const char* p_str = (const char*)*p;
+    int length = strlen(p_str);
+    *p += length + 1;  // include '\0'
+    return p_str;
+}
+
+static bool pickle_loads(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    PY_CHECK_ARG_TYPE(0, tp_bytes);
+    int size;
+    const unsigned char* data = py_tobytes(argv, &size);
+    return py_pickle_loads(data, size);
+}
+
+static bool pickle_dumps(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    return py_pickle_dumps(argv);
+}
+
+void pk__add_module_pickle() {
+    py_Ref mod = py_newmodule("pickle");
+
+    int x = 1;
+    bool is_little_endian = *(char*)&x == 1;
+    if(!is_little_endian) c11__abort("is_little_endian != true");
+
+    py_bindfunc(mod, "loads", pickle_loads);
+    py_bindfunc(mod, "dumps", pickle_dumps);
+}
+
+static bool pickle__write_object(PickleObject* buf, py_TValue* obj);
+
+static bool pickle__write_array(PickleObject* buf, PickleOp op, py_TValue* arr, int length) {
+    for(int i = 0; i < length; i++) {
+        bool ok = pickle__write_object(buf, arr + i);
+        if(!ok) return false;
+    }
+    pkl__emit_op(buf, op);
+    pkl__emit_int(buf, length);
+    return true;
+}
+
+static bool pickle__write_dict_kv(py_Ref k, py_Ref v, void* ctx) {
+    PickleObject* buf = (PickleObject*)ctx;
+    if(!pickle__write_object(buf, k)) return false;
+    if(!pickle__write_object(buf, v)) return false;
+    return true;
+}
+
+static bool pickle__write_object(PickleObject* buf, py_TValue* obj) {
+    switch(obj->type) {
+        case tp_NoneType: {
+            pkl__emit_op(buf, PKL_NONE);
+            return true;
+        }
+        case tp_int: {
+            py_i64 val = obj->_i64;
+            pkl__emit_int(buf, val);
+            return true;
+        }
+        case tp_float: {
+            py_f64 val = obj->_f64;
+            float val32 = (float)val;
+            if(val == val32) {
+                pkl__emit_op(buf, PKL_FLOAT32);
+                PickleObject__write_bytes(buf, &val32, 4);
+            } else {
+                pkl__emit_op(buf, PKL_FLOAT64);
+                PickleObject__write_bytes(buf, &val, 8);
+            }
+            return true;
+        }
+        case tp_bool: {
+            bool val = obj->_bool;
+            pkl__emit_op(buf, val ? PKL_TRUE : PKL_FALSE);
+            return true;
+        }
+        case tp_str: {
+            pkl__emit_op(buf, PKL_STRING);
+            c11_sv sv = py_tosv(obj);
+            pkl__emit_int(buf, sv.size);
+            PickleObject__write_bytes(buf, sv.data, sv.size);
+            return true;
+        }
+        case tp_bytes: {
+            pkl__emit_op(buf, PKL_BYTES);
+            int size;
+            unsigned char* data = py_tobytes(obj, &size);
+            pkl__emit_int(buf, size);
+            PickleObject__write_bytes(buf, data, size);
+            return true;
+        }
+        case tp_list: {
+            return pickle__write_array(buf, PKL_BUILD_LIST, py_list_data(obj), py_list_len(obj));
+        }
+        case tp_tuple: {
+            return pickle__write_array(buf, PKL_BUILD_TUPLE, py_tuple_data(obj), py_tuple_len(obj));
+        }
+        case tp_dict: {
+            bool ok = py_dict_apply(obj, pickle__write_dict_kv, (void*)buf);
+            if(!ok) return false;
+            pkl__emit_op(buf, PKL_BUILD_DICT);
+            pkl__emit_int(buf, py_dict_len(obj));
+            return true;
+        }
+        case tp_vec2: {
+            c11_vec2 val = py_tovec2(obj);
+            pkl__emit_op(buf, PKL_VEC2);
+            PickleObject__write_bytes(buf, &val, sizeof(c11_vec2));
+            return true;
+        }
+        case tp_vec3: {
+            c11_vec3 val = py_tovec3(obj);
+            pkl__emit_op(buf, PKL_VEC3);
+            PickleObject__write_bytes(buf, &val, sizeof(c11_vec3));
+            return true;
+        }
+        case tp_vec2i: {
+            c11_vec2i val = py_tovec2i(obj);
+            pkl__emit_op(buf, PKL_VEC2I);
+            pkl__emit_int(buf, val.x);
+            pkl__emit_int(buf, val.y);
+            return true;
+        }
+        case tp_vec3i: {
+            c11_vec3i val = py_tovec3i(obj);
+            pkl__emit_op(buf, PKL_VEC3I);
+            pkl__emit_int(buf, val.x);
+            pkl__emit_int(buf, val.y);
+            pkl__emit_int(buf, val.z);
+            return true;
+        }
+        case tp_type: {
+            pkl__emit_op(buf, PKL_TYPE);
+            py_TypeInfo* ti = pk__type_info(py_totype(obj));
+            const char* mod_name = py_tostr(py_getdict(&ti->module, __name__));
+            c11_sbuf path_buf;
+            c11_sbuf__ctor(&path_buf);
+            c11_sbuf__write_cstr(&path_buf, mod_name);
+            c11_sbuf__write_cstr(&path_buf, "@");
+            c11_sbuf__write_cstr(&path_buf, py_name2str(ti->name));
+            c11_string* path = c11_sbuf__submit(&path_buf);
+            // include '\0'
+            PickleObject__write_bytes(buf, path->data, path->size + 1);
+            c11_string__delete(path);
+            return true;
+        }
+        default: return TypeError("'%t' object is not picklable", obj->type);
+    }
+}
+
+bool py_pickle_dumps(py_Ref val) {
+    PickleObject buf;
+    PickleObject__ctor(&buf);
+    bool ok = pickle__write_object(&buf, val);
+    if(!ok) {
+        PickleObject__dtor(&buf);
+        return false;
+    }
+    pkl__emit_op(&buf, PKL_EOF);
+    PickleObject__py_submit(&buf, py_retval());
+    return true;
+}
+
+bool py_pickle_loads(const unsigned char* data, int size) {
+    py_StackRef p0 = py_peek(0);
+    const unsigned char* p = data;
+    while(true) {
+        PickleOp op = (PickleOp)*p;
+        p++;
+        switch(op) {
+            case PKL_NONE: {
+                py_pushnone();
+                break;
+            }
+            case PKL_INT8: {
+                int8_t val;
+                UNALIGNED_READ(&val, p);
+                py_newint(py_pushtmp(), val);
+                break;
+            }
+            case PKL_INT16: {
+                int16_t val;
+                UNALIGNED_READ(&val, p);
+                py_newint(py_pushtmp(), val);
+                break;
+            }
+            case PKL_INT32: {
+                int32_t val;
+                UNALIGNED_READ(&val, p);
+                py_newint(py_pushtmp(), val);
+                break;
+            }
+            case PKL_INT64: {
+                int64_t val;
+                UNALIGNED_READ(&val, p);
+                py_newint(py_pushtmp(), val);
+                break;
+            }
+            case PKL_FLOAT32: {
+                float val;
+                UNALIGNED_READ(&val, p);
+                py_newfloat(py_pushtmp(), val);
+                break;
+            }
+            case PKL_FLOAT64: {
+                double val;
+                UNALIGNED_READ(&val, p);
+                py_newfloat(py_pushtmp(), val);
+                break;
+            }
+            case PKL_TRUE: {
+                py_newbool(py_pushtmp(), true);
+                break;
+            }
+            case PKL_FALSE: {
+                py_newbool(py_pushtmp(), false);
+                break;
+            }
+            case PKL_STRING: {
+                int size = pkl__read_int(&p);
+                char* dst = py_newstrn(py_pushtmp(), size);
+                memcpy(dst, p, size);
+                p += size;
+                break;
+            }
+            case PKL_BYTES: {
+                int size = pkl__read_int(&p);
+                unsigned char* dst = py_newbytes(py_pushtmp(), size);
+                memcpy(dst, p, size);
+                p += size;
+                break;
+            }
+            case PKL_BUILD_LIST: {
+                int length = pkl__read_int(&p);
+                py_OutRef val = py_retval();
+                py_newlistn(val, length);
+                for(int i = length - 1; i >= 0; i--) {
+                    py_StackRef item = py_peek(-1);
+                    py_list_setitem(val, i, item);
+                    py_pop();
+                }
+                py_push(val);
+                break;
+            }
+            case PKL_BUILD_TUPLE: {
+                int length = pkl__read_int(&p);
+                py_OutRef val = py_retval();
+                py_newtuple(val, length);
+                for(int i = length - 1; i >= 0; i--) {
+                    py_StackRef item = py_peek(-1);
+                    py_tuple_setitem(val, i, item);
+                    py_pop();
+                }
+                py_push(val);
+                break;
+            }
+            case PKL_BUILD_DICT: {
+                int length = pkl__read_int(&p);
+                py_OutRef val = py_pushtmp();
+                py_newdict(val);
+                py_StackRef begin = py_peek(-1) - 2 * length;
+                py_StackRef end = py_peek(-1);
+                for(py_StackRef i = begin; i < end; i += 2) {
+                    py_StackRef k = i;
+                    py_StackRef v = i + 1;
+                    bool ok = py_dict_setitem(val, k, v);
+                    if(!ok) return false;
+                }
+                py_assign(py_retval(), val);
+                py_shrink(2 * length + 1);
+                py_push(py_retval());
+                break;
+            }
+            case PKL_VEC2: {
+                c11_vec2 val;
+                UNALIGNED_READ(&val, p);
+                py_newvec2(py_pushtmp(), val);
+                break;
+            }
+            case PKL_VEC3: {
+                c11_vec3 val;
+                UNALIGNED_READ(&val, p);
+                py_newvec3(py_pushtmp(), val);
+                break;
+            }
+            case PKL_VEC2I: {
+                c11_vec2i val;
+                val.x = pkl__read_int(&p);
+                val.y = pkl__read_int(&p);
+                py_newvec2i(py_pushtmp(), val);
+                break;
+            }
+            case PKL_VEC3I: {
+                c11_vec3i val;
+                val.x = pkl__read_int(&p);
+                val.y = pkl__read_int(&p);
+                val.z = pkl__read_int(&p);
+                py_newvec3i(py_pushtmp(), val);
+                break;
+            }
+            case PKL_TYPE: {
+                const char* path = pkl__read_cstr(&p);
+                char* sep_index = strchr(path, '@');
+                assert(sep_index != NULL);
+                *sep_index = '\0';
+                const char* mod_name = path;
+                const char* type_name = sep_index + 1;
+                py_Type t = py_gettype(mod_name, py_name(type_name));
+                *sep_index = '@';
+                if(t == 0) {
+                    return ImportError("cannot import '%s' from '%s'", type_name, mod_name);
+                }
+                py_push(py_tpobject(t));
+                break;
+            }
+            case PKL_EOF: {
+                if(py_peek(0) - p0 != 1) { return ValueError("invalid pickle data"); }
+                py_assign(py_retval(), p0);
+                py_pop();
+                return true;
+            }
+            default: c11__unreachable();
+        }
+    }
+    return true;
+}
+
+#undef UNALIGNED_READ

+ 41 - 0
tests/90_pickle.py

@@ -1,3 +1,44 @@
+import pickle as pkl
+
+def test(data): # type: ignore
+    print('-'*50)
+    b = pkl.dumps(data)
+    print(b)
+    o = pkl.loads(b)
+    print(o)
+    assert data == o
+
+test(None)                      # PKL_NONE
+test(1)                         # PKL_INT8
+test(277)                       # PKL_INT16
+test(-66666)                    # PKL_INT32
+test(0xffffffffffff)            # PKL_INT64
+test(1.0)                       # PKL_FLOAT32
+test(1.12312434234)             # PKL_FLOAT64
+test(True)                      # PKL_TRUE
+test(False)                     # PKL_FALSE
+test("hello")                   # PKL_STRING
+test(b"hello")                  # PKL_BYTES
+
+from linalg import vec2, vec3, vec2i, vec3i
+
+test(vec2(2/3, 1.0))            # PKL_VEC2
+test(vec3(2/3, 1.0, 3.0))       # PKL_VEC3
+test(vec2i(1, 2))               # PKL_VEC2I
+test(vec3i(1, 2, 3))            # PKL_VEC3I
+
+test(vec3i)                     # PKL_TYPE
+
+test([1, 2, 3])                 # PKL_LIST
+test((1, 2, 3))                 # PKL_TUPLE
+test({1: 2, 3: 4})              # PKL_DICT
+
+# test complex data
+test([1, '2', True])
+test([1, '2', 3.0, True])
+test([1, '2', True, {'key': 4}])
+test([1, '2', 3.0, True, {'k1': 4, 'k2': [b'xxxx']}])
+
 exit()
 
 from pickle import dumps, loads, _wrap, _unwrap

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff