| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373 |
- #include "pocketpy/objects/codeobject.h"
- #include "pocketpy/common/serialize.h"
- #include "pocketpy/common/utils.h"
- // Magic number for CodeObject serialization: "CO" = 0x434F
- #define CODEOBJECT_MAGIC 0x434F
- #define CODEOBJECT_VER_MAJOR 1
- #define CODEOBJECT_VER_MINOR 0
- #define CODEOBJECT_VER_MINOR_MIN 0
- // Forward declarations
- static void FuncDecl__serialize(c11_serializer* s,
- const FuncDecl* decl,
- const struct SourceData* parent_src);
- static FuncDecl_ FuncDecl__deserialize(c11_deserializer* d, SourceData_ embedded_src);
- static void CodeObject__serialize(c11_serializer* s,
- const CodeObject* co,
- const struct SourceData* parent_src);
- static CodeObject CodeObject__deserialize(c11_deserializer* d, SourceData_ embedded_src);
- // Serialize a py_TValue constant
- // Supported types: None, int, float, bool, str, bytes, tuple, Ellipsis
- static void TValue__serialize(c11_serializer* s, py_Ref val) {
- c11_serializer__write_type(s, val->type);
- // 1. co_consts: int | float | str
- // 2. function defaults: see `read_literal()` in compiler.c
- switch(val->type) {
- case tp_int: c11_serializer__write_i64(s, val->_i64); break;
- case tp_float: c11_serializer__write_f64(s, val->_f64); break;
- case tp_str: {
- c11_sv sv = py_tosv((py_Ref)val);
- c11_serializer__write_i32(s, sv.size);
- c11_serializer__write_bytes(s, sv.data, sv.size);
- break;
- }
- case tp_bool: {
- bool value = py_tobool(val);
- c11_serializer__write_i8(s, value ? 1 : 0);
- break;
- }
- case tp_NoneType: break;
- case tp_ellipsis: break;
- case tp_tuple: {
- int len = py_tuple_len(val);
- c11_serializer__write_i32(s, len);
- for(int i = 0; i < len; i++) {
- py_Ref item = py_tuple_getitem(val, i);
- TValue__serialize(s, item);
- }
- break;
- }
- default: c11__abort("TValue__serialize: invalid type '%s'", py_tpname(val->type));
- }
- }
- // Deserialize a py_TValue constant
- static void TValue__deserialize(c11_deserializer* d, py_OutRef out) {
- py_Type type = c11_deserializer__read_type(d);
- switch(type) {
- case tp_int: {
- py_i64 v = c11_deserializer__read_i64(d);
- py_newint(out, v);
- break;
- }
- case tp_float: {
- py_f64 v = c11_deserializer__read_f64(d);
- py_newfloat(out, v);
- break;
- }
- case tp_str: {
- int size = c11_deserializer__read_i32(d);
- char* dst = py_newstrn(out, size);
- char* src = c11_deserializer__read_bytes(d, size);
- memcpy(dst, src, size);
- break;
- }
- case tp_bool: {
- bool v = c11_deserializer__read_i8(d) != 0;
- py_newbool(out, v);
- break;
- }
- case tp_NoneType: {
- py_newnone(out);
- break;
- }
- case tp_ellipsis: {
- py_newellipsis(out);
- break;
- }
- case tp_tuple: {
- int len = c11_deserializer__read_i32(d);
- py_newtuple(out, len);
- for(int i = 0; i < len; i++) {
- py_ItemRef item = py_tuple_getitem(out, i);
- TValue__deserialize(d, item);
- }
- break;
- }
- default:
- c11__abort("TValue__deserialize: invalid type '%s'", py_tpname(type));
- }
- }
- // Serialize CodeObject
- static void CodeObject__serialize(c11_serializer* s,
- const CodeObject* co,
- const struct SourceData* parent_src) {
- // SourceData
- if(!parent_src) {
- c11_serializer__write_i8(s, (int8_t)co->src->mode);
- c11_serializer__write_i8(s, co->src->is_dynamic ? 1 : 0);
- c11_serializer__write_cstr(s, co->src->filename->data);
- } else {
- c11__rtassert(co->src == parent_src);
- }
- // name
- c11_serializer__write_cstr(s, co->name->data);
- // codes
- _Static_assert(sizeof(Bytecode) == sizeof(uint16_t) * 2, "");
- c11_serializer__write_i32(s, co->codes.length);
- c11_serializer__write_bytes(s, co->codes.data, co->codes.length * sizeof(Bytecode));
- // codes_ex
- _Static_assert(sizeof(BytecodeEx) == sizeof(int32_t) * 2, "");
- c11_serializer__write_i32(s, co->codes_ex.length);
- c11_serializer__write_bytes(s, co->codes_ex.data, co->codes_ex.length * sizeof(BytecodeEx));
- // consts
- c11_serializer__write_i32(s, co->consts.length);
- for(int i = 0; i < co->consts.length; i++) {
- py_Ref val = c11__at(py_TValue, &co->consts, i);
- TValue__serialize(s, val);
- }
- // varnames (as cstr via py_name2str)
- c11_serializer__write_i32(s, co->varnames.length);
- for(int i = 0; i < co->varnames.length; i++) {
- py_Name name = c11__getitem(py_Name, &co->varnames, i);
- c11_serializer__write_cstr(s, py_name2str(name));
- }
- // names (as cstr via py_name2str)
- c11_serializer__write_i32(s, co->names.length);
- for(int i = 0; i < co->names.length; i++) {
- py_Name name = c11__getitem(py_Name, &co->names, i);
- c11_serializer__write_cstr(s, py_name2str(name));
- }
- // nlocals
- c11_serializer__write_i32(s, co->nlocals);
- // blocks
- _Static_assert(sizeof(CodeBlock) == sizeof(int32_t) * 5, "");
- c11_serializer__write_i32(s, co->blocks.length);
- c11_serializer__write_bytes(s, co->blocks.data, co->blocks.length * sizeof(CodeBlock));
- // func_decls
- c11_serializer__write_i32(s, co->func_decls.length);
- for(int i = 0; i < co->func_decls.length; i++) {
- const FuncDecl* decl = c11__getitem(FuncDecl_, &co->func_decls, i);
- FuncDecl__serialize(s, decl, co->src);
- }
- // start_line, end_line
- c11_serializer__write_i32(s, co->start_line);
- c11_serializer__write_i32(s, co->end_line);
- }
- // Deserialize CodeObject (initialize co before calling)
- static CodeObject CodeObject__deserialize(c11_deserializer* d, SourceData_ embedded_src) {
- CodeObject co;
- // SourceData
- SourceData_ src;
- if(embedded_src != NULL) {
- src = embedded_src;
- PK_INCREF(src);
- } else {
- enum py_CompileMode mode = (enum py_CompileMode)c11_deserializer__read_i8(d);
- bool is_dynamic = c11_deserializer__read_i8(d) != 0;
- const char* filename = c11_deserializer__read_cstr(d);
- src = SourceData__rcnew(NULL, filename, mode, is_dynamic);
- }
- // name
- const char* name = c11_deserializer__read_cstr(d);
- c11_sv name_sv = {name, strlen(name)};
- // Initialize the CodeObject
- CodeObject__ctor(&co, src, name_sv);
- PK_DECREF(src); // CodeObject__ctor increments ref count
- // Clear the default root block that CodeObject__ctor adds
- c11_vector__clear(&co.blocks);
- // codes
- int codes_len = c11_deserializer__read_i32(d);
- c11_vector__extend(&co.codes,
- c11_deserializer__read_bytes(d, codes_len * sizeof(Bytecode)),
- codes_len);
- // codes_ex
- int codes_ex_len = c11_deserializer__read_i32(d);
- c11_vector__extend(&co.codes_ex,
- c11_deserializer__read_bytes(d, codes_ex_len * sizeof(BytecodeEx)),
- codes_ex_len);
- // consts
- int consts_len = c11_deserializer__read_i32(d);
- for(int i = 0; i < consts_len; i++) {
- py_Ref p_val = c11_vector__emplace(&co.consts);
- TValue__deserialize(d, p_val);
- }
- // varnames
- int varnames_len = c11_deserializer__read_i32(d);
- for(int i = 0; i < varnames_len; i++) {
- const char* s = c11_deserializer__read_cstr(d);
- py_Name n = py_name(s);
- c11_vector__push(py_Name, &co.varnames, n);
- c11_smallmap_n2d__set(&co.varnames_inv, n, i);
- }
- // names
- int names_len = c11_deserializer__read_i32(d);
- for(int i = 0; i < names_len; i++) {
- const char* s = c11_deserializer__read_cstr(d);
- py_Name n = py_name(s);
- c11_vector__push(py_Name, &co.names, n);
- c11_smallmap_n2d__set(&co.names_inv, n, i);
- }
- // nlocals
- co.nlocals = c11_deserializer__read_i32(d);
- // blocks
- int blocks_len = c11_deserializer__read_i32(d);
- c11_vector__extend(&co.blocks,
- c11_deserializer__read_bytes(d, blocks_len * sizeof(CodeBlock)),
- blocks_len);
- // func_decls
- int func_decls_len = c11_deserializer__read_i32(d);
- for(int i = 0; i < func_decls_len; i++) {
- FuncDecl_ decl = FuncDecl__deserialize(d, src);
- c11_vector__push(FuncDecl_, &co.func_decls, decl);
- }
- // start_line, end_line
- co.start_line = c11_deserializer__read_i32(d);
- co.end_line = c11_deserializer__read_i32(d);
- return co;
- }
- // Serialize FuncDecl
- static void FuncDecl__serialize(c11_serializer* s,
- const FuncDecl* decl,
- const struct SourceData* parent_src) {
- // CodeObject (embedded)
- CodeObject__serialize(s, &decl->code, parent_src);
- // args
- c11_serializer__write_i32(s, decl->args.length);
- c11_serializer__write_bytes(s, decl->args.data, decl->args.length * sizeof(int32_t));
- // kwargs
- c11_serializer__write_i32(s, decl->kwargs.length);
- for(int i = 0; i < decl->kwargs.length; i++) {
- FuncDeclKwArg* kw = c11__at(FuncDeclKwArg, &decl->kwargs, i);
- c11_serializer__write_i32(s, kw->index);
- c11_serializer__write_cstr(s, py_name2str(kw->key));
- TValue__serialize(s, &kw->value);
- }
- // starred_arg, starred_kwarg
- c11_serializer__write_i32(s, decl->starred_arg);
- c11_serializer__write_i32(s, decl->starred_kwarg);
- // nested
- c11_serializer__write_i8(s, decl->nested ? 1 : 0);
- // docstring
- int has_docstring = decl->docstring != NULL ? 1 : 0;
- c11_serializer__write_i8(s, has_docstring);
- if(has_docstring) c11_serializer__write_cstr(s, decl->docstring);
- // type
- c11_serializer__write_i8(s, (int8_t)decl->type);
- }
- // Deserialize FuncDecl
- static FuncDecl_ FuncDecl__deserialize(c11_deserializer* d, SourceData_ embedded_src) {
- FuncDecl* self = PK_MALLOC(sizeof(FuncDecl));
- self->rc.count = 1;
- self->rc.dtor = (void (*)(void*))FuncDecl__dtor;
- c11_vector__ctor(&self->args, sizeof(int32_t));
- c11_vector__ctor(&self->kwargs, sizeof(FuncDeclKwArg));
- c11_smallmap_n2d__ctor(&self->kw_to_index);
- // CodeObject (embedded)
- self->code = CodeObject__deserialize(d, embedded_src);
- // args
- int args_len = c11_deserializer__read_i32(d);
- c11_vector__extend(&self->args,
- c11_deserializer__read_bytes(d, args_len * sizeof(int32_t)),
- args_len);
- // kwargs
- int kwargs_len = c11_deserializer__read_i32(d);
- for(int i = 0; i < kwargs_len; i++) {
- FuncDeclKwArg* kw = c11_vector__emplace(&self->kwargs);
- kw->index = c11_deserializer__read_i32(d);
- const char* key_str = c11_deserializer__read_cstr(d);
- kw->key = py_name(key_str);
- TValue__deserialize(d, &kw->value);
- c11_smallmap_n2d__set(&self->kw_to_index, kw->key, kw->index);
- }
- // starred_arg
- self->starred_arg = c11_deserializer__read_i32(d);
- // starred_kwarg
- self->starred_kwarg = c11_deserializer__read_i32(d);
- // nested
- self->nested = c11_deserializer__read_i8(d) != 0;
- // docstring
- int has_docstring = c11_deserializer__read_i8(d);
- if(has_docstring) {
- const char* docstring = c11_deserializer__read_cstr(d);
- self->docstring = c11_strdup(docstring);
- } else {
- self->docstring = NULL;
- }
- // type
- self->type = (FuncType)c11_deserializer__read_i8(d);
- return self;
- }
- // Public API: Serialize CodeObject to bytes
- void* CodeObject__dumps(const CodeObject* co, int* size) {
- c11_serializer s;
- c11_serializer__ctor(&s, CODEOBJECT_MAGIC, CODEOBJECT_VER_MAJOR, CODEOBJECT_VER_MINOR);
- CodeObject__serialize(&s, co, NULL);
- return c11_serializer__submit(&s, size);
- }
- // Public API: Deserialize CodeObject from bytes
- // Returns error message or NULL on success
- char* CodeObject__loads(const void* data, int size, CodeObject* out) {
- c11_deserializer d;
- c11_deserializer__ctor(&d, data, size);
- if(!c11_deserializer__check_header(&d,
- CODEOBJECT_MAGIC,
- CODEOBJECT_VER_MAJOR,
- CODEOBJECT_VER_MINOR_MIN)) {
- char* error_msg = c11_strdup(d.error_msg);
- c11_deserializer__dtor(&d);
- return error_msg;
- }
- *out = CodeObject__deserialize(&d, NULL);
- c11_deserializer__dtor(&d);
- return NULL;
- }
- #undef CODEOBJECT_MAGIC
- #undef CODEOBJECT_VER_MAJOR
- #undef CODEOBJECT_VER_MINOR
- #undef CODEOBJECT_VER_MINOR_MIN
|