codeobject_ser.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #include "pocketpy/objects/codeobject.h"
  2. #include "pocketpy/common/serialize.h"
  3. #include "pocketpy/common/utils.h"
  4. // Magic number for CodeObject serialization: "CO" = 0x434F
  5. #define CODEOBJECT_MAGIC 0x434F
  6. #define CODEOBJECT_VER_MAJOR 1
  7. #define CODEOBJECT_VER_MINOR 0
  8. #define CODEOBJECT_VER_MINOR_MIN 0
  9. // Forward declarations
  10. static void FuncDecl__serialize(c11_serializer* s,
  11. const FuncDecl* decl,
  12. const struct SourceData* parent_src);
  13. static FuncDecl_ FuncDecl__deserialize(c11_deserializer* d, SourceData_ embedded_src);
  14. static void CodeObject__serialize(c11_serializer* s,
  15. const CodeObject* co,
  16. const struct SourceData* parent_src);
  17. static CodeObject CodeObject__deserialize(c11_deserializer* d, SourceData_ embedded_src);
  18. // Serialize a py_TValue constant
  19. // Supported types: None, int, float, bool, str, bytes, tuple, Ellipsis
  20. static void TValue__serialize(c11_serializer* s, py_Ref val) {
  21. c11_serializer__write_type(s, val->type);
  22. // 1. co_consts: int | float | str
  23. // 2. function defaults: see `read_literal()` in compiler.c
  24. switch(val->type) {
  25. case tp_int: c11_serializer__write_i64(s, val->_i64); break;
  26. case tp_float: c11_serializer__write_f64(s, val->_f64); break;
  27. case tp_str: {
  28. c11_sv sv = py_tosv((py_Ref)val);
  29. c11_serializer__write_i32(s, sv.size);
  30. c11_serializer__write_bytes(s, sv.data, sv.size);
  31. break;
  32. }
  33. case tp_bool: {
  34. bool value = py_tobool(val);
  35. c11_serializer__write_i8(s, value ? 1 : 0);
  36. break;
  37. }
  38. case tp_NoneType: break;
  39. case tp_ellipsis: break;
  40. case tp_tuple: {
  41. int len = py_tuple_len(val);
  42. c11_serializer__write_i32(s, len);
  43. for(int i = 0; i < len; i++) {
  44. py_Ref item = py_tuple_getitem(val, i);
  45. TValue__serialize(s, item);
  46. }
  47. break;
  48. }
  49. default: c11__abort("TValue__serialize: invalid type '%s'", py_tpname(val->type));
  50. }
  51. }
  52. // Deserialize a py_TValue constant
  53. static void TValue__deserialize(c11_deserializer* d, py_OutRef out) {
  54. py_Type type = c11_deserializer__read_type(d);
  55. switch(type) {
  56. case tp_int: {
  57. py_i64 v = c11_deserializer__read_i64(d);
  58. py_newint(out, v);
  59. break;
  60. }
  61. case tp_float: {
  62. py_f64 v = c11_deserializer__read_f64(d);
  63. py_newfloat(out, v);
  64. break;
  65. }
  66. case tp_str: {
  67. int size = c11_deserializer__read_i32(d);
  68. char* dst = py_newstrn(out, size);
  69. char* src = c11_deserializer__read_bytes(d, size);
  70. memcpy(dst, src, size);
  71. break;
  72. }
  73. case tp_bool: {
  74. bool v = c11_deserializer__read_i8(d) != 0;
  75. py_newbool(out, v);
  76. break;
  77. }
  78. case tp_NoneType: {
  79. py_newnone(out);
  80. break;
  81. }
  82. case tp_ellipsis: {
  83. py_newellipsis(out);
  84. break;
  85. }
  86. case tp_tuple: {
  87. int len = c11_deserializer__read_i32(d);
  88. py_newtuple(out, len);
  89. for(int i = 0; i < len; i++) {
  90. py_ItemRef item = py_tuple_getitem(out, i);
  91. TValue__deserialize(d, item);
  92. }
  93. break;
  94. }
  95. default:
  96. c11__abort("TValue__deserialize: invalid type '%s'", py_tpname(type));
  97. }
  98. }
  99. // Serialize CodeObject
  100. static void CodeObject__serialize(c11_serializer* s,
  101. const CodeObject* co,
  102. const struct SourceData* parent_src) {
  103. // SourceData
  104. if(!parent_src) {
  105. c11_serializer__write_i8(s, (int8_t)co->src->mode);
  106. c11_serializer__write_i8(s, co->src->is_dynamic ? 1 : 0);
  107. c11_serializer__write_cstr(s, co->src->filename->data);
  108. } else {
  109. c11__rtassert(co->src == parent_src);
  110. }
  111. // name
  112. c11_serializer__write_cstr(s, co->name->data);
  113. // codes
  114. _Static_assert(sizeof(Bytecode) == sizeof(uint16_t) * 2, "");
  115. c11_serializer__write_i32(s, co->codes.length);
  116. c11_serializer__write_bytes(s, co->codes.data, co->codes.length * sizeof(Bytecode));
  117. // codes_ex
  118. _Static_assert(sizeof(BytecodeEx) == sizeof(int32_t) * 2, "");
  119. c11_serializer__write_i32(s, co->codes_ex.length);
  120. c11_serializer__write_bytes(s, co->codes_ex.data, co->codes_ex.length * sizeof(BytecodeEx));
  121. // consts
  122. c11_serializer__write_i32(s, co->consts.length);
  123. for(int i = 0; i < co->consts.length; i++) {
  124. py_Ref val = c11__at(py_TValue, &co->consts, i);
  125. TValue__serialize(s, val);
  126. }
  127. // varnames (as cstr via py_name2str)
  128. c11_serializer__write_i32(s, co->varnames.length);
  129. for(int i = 0; i < co->varnames.length; i++) {
  130. py_Name name = c11__getitem(py_Name, &co->varnames, i);
  131. c11_serializer__write_cstr(s, py_name2str(name));
  132. }
  133. // names (as cstr via py_name2str)
  134. c11_serializer__write_i32(s, co->names.length);
  135. for(int i = 0; i < co->names.length; i++) {
  136. py_Name name = c11__getitem(py_Name, &co->names, i);
  137. c11_serializer__write_cstr(s, py_name2str(name));
  138. }
  139. // nlocals
  140. c11_serializer__write_i32(s, co->nlocals);
  141. // blocks
  142. _Static_assert(sizeof(CodeBlock) == sizeof(int32_t) * 5, "");
  143. c11_serializer__write_i32(s, co->blocks.length);
  144. c11_serializer__write_bytes(s, co->blocks.data, co->blocks.length * sizeof(CodeBlock));
  145. // func_decls
  146. c11_serializer__write_i32(s, co->func_decls.length);
  147. for(int i = 0; i < co->func_decls.length; i++) {
  148. const FuncDecl* decl = c11__getitem(FuncDecl_, &co->func_decls, i);
  149. FuncDecl__serialize(s, decl, co->src);
  150. }
  151. // start_line, end_line
  152. c11_serializer__write_i32(s, co->start_line);
  153. c11_serializer__write_i32(s, co->end_line);
  154. }
  155. // Deserialize CodeObject (initialize co before calling)
  156. static CodeObject CodeObject__deserialize(c11_deserializer* d, SourceData_ embedded_src) {
  157. CodeObject co;
  158. // SourceData
  159. SourceData_ src;
  160. if(embedded_src != NULL) {
  161. src = embedded_src;
  162. PK_INCREF(src);
  163. } else {
  164. enum py_CompileMode mode = (enum py_CompileMode)c11_deserializer__read_i8(d);
  165. bool is_dynamic = c11_deserializer__read_i8(d) != 0;
  166. const char* filename = c11_deserializer__read_cstr(d);
  167. src = SourceData__rcnew(NULL, filename, mode, is_dynamic);
  168. }
  169. // name
  170. const char* name = c11_deserializer__read_cstr(d);
  171. c11_sv name_sv = {name, strlen(name)};
  172. // Initialize the CodeObject
  173. CodeObject__ctor(&co, src, name_sv);
  174. PK_DECREF(src); // CodeObject__ctor increments ref count
  175. // Clear the default root block that CodeObject__ctor adds
  176. c11_vector__clear(&co.blocks);
  177. // codes
  178. int codes_len = c11_deserializer__read_i32(d);
  179. c11_vector__extend(&co.codes,
  180. c11_deserializer__read_bytes(d, codes_len * sizeof(Bytecode)),
  181. codes_len);
  182. // codes_ex
  183. int codes_ex_len = c11_deserializer__read_i32(d);
  184. c11_vector__extend(&co.codes_ex,
  185. c11_deserializer__read_bytes(d, codes_ex_len * sizeof(BytecodeEx)),
  186. codes_ex_len);
  187. // consts
  188. int consts_len = c11_deserializer__read_i32(d);
  189. for(int i = 0; i < consts_len; i++) {
  190. py_Ref p_val = c11_vector__emplace(&co.consts);
  191. TValue__deserialize(d, p_val);
  192. }
  193. // varnames
  194. int varnames_len = c11_deserializer__read_i32(d);
  195. for(int i = 0; i < varnames_len; i++) {
  196. const char* s = c11_deserializer__read_cstr(d);
  197. py_Name n = py_name(s);
  198. c11_vector__push(py_Name, &co.varnames, n);
  199. c11_smallmap_n2d__set(&co.varnames_inv, n, i);
  200. }
  201. // names
  202. int names_len = c11_deserializer__read_i32(d);
  203. for(int i = 0; i < names_len; i++) {
  204. const char* s = c11_deserializer__read_cstr(d);
  205. py_Name n = py_name(s);
  206. c11_vector__push(py_Name, &co.names, n);
  207. c11_smallmap_n2d__set(&co.names_inv, n, i);
  208. }
  209. // nlocals
  210. co.nlocals = c11_deserializer__read_i32(d);
  211. // blocks
  212. int blocks_len = c11_deserializer__read_i32(d);
  213. c11_vector__extend(&co.blocks,
  214. c11_deserializer__read_bytes(d, blocks_len * sizeof(CodeBlock)),
  215. blocks_len);
  216. // func_decls
  217. int func_decls_len = c11_deserializer__read_i32(d);
  218. for(int i = 0; i < func_decls_len; i++) {
  219. FuncDecl_ decl = FuncDecl__deserialize(d, src);
  220. c11_vector__push(FuncDecl_, &co.func_decls, decl);
  221. }
  222. // start_line, end_line
  223. co.start_line = c11_deserializer__read_i32(d);
  224. co.end_line = c11_deserializer__read_i32(d);
  225. return co;
  226. }
  227. // Serialize FuncDecl
  228. static void FuncDecl__serialize(c11_serializer* s,
  229. const FuncDecl* decl,
  230. const struct SourceData* parent_src) {
  231. // CodeObject (embedded)
  232. CodeObject__serialize(s, &decl->code, parent_src);
  233. // args
  234. c11_serializer__write_i32(s, decl->args.length);
  235. c11_serializer__write_bytes(s, decl->args.data, decl->args.length * sizeof(int32_t));
  236. // kwargs
  237. c11_serializer__write_i32(s, decl->kwargs.length);
  238. for(int i = 0; i < decl->kwargs.length; i++) {
  239. FuncDeclKwArg* kw = c11__at(FuncDeclKwArg, &decl->kwargs, i);
  240. c11_serializer__write_i32(s, kw->index);
  241. c11_serializer__write_cstr(s, py_name2str(kw->key));
  242. TValue__serialize(s, &kw->value);
  243. }
  244. // starred_arg, starred_kwarg
  245. c11_serializer__write_i32(s, decl->starred_arg);
  246. c11_serializer__write_i32(s, decl->starred_kwarg);
  247. // nested
  248. c11_serializer__write_i8(s, decl->nested ? 1 : 0);
  249. // docstring
  250. int has_docstring = decl->docstring != NULL ? 1 : 0;
  251. c11_serializer__write_i8(s, has_docstring);
  252. if(has_docstring) c11_serializer__write_cstr(s, decl->docstring);
  253. // type
  254. c11_serializer__write_i8(s, (int8_t)decl->type);
  255. }
  256. // Deserialize FuncDecl
  257. static FuncDecl_ FuncDecl__deserialize(c11_deserializer* d, SourceData_ embedded_src) {
  258. FuncDecl* self = PK_MALLOC(sizeof(FuncDecl));
  259. self->rc.count = 1;
  260. self->rc.dtor = (void (*)(void*))FuncDecl__dtor;
  261. c11_vector__ctor(&self->args, sizeof(int32_t));
  262. c11_vector__ctor(&self->kwargs, sizeof(FuncDeclKwArg));
  263. c11_smallmap_n2d__ctor(&self->kw_to_index);
  264. // CodeObject (embedded)
  265. self->code = CodeObject__deserialize(d, embedded_src);
  266. // args
  267. int args_len = c11_deserializer__read_i32(d);
  268. c11_vector__extend(&self->args,
  269. c11_deserializer__read_bytes(d, args_len * sizeof(int32_t)),
  270. args_len);
  271. // kwargs
  272. int kwargs_len = c11_deserializer__read_i32(d);
  273. for(int i = 0; i < kwargs_len; i++) {
  274. FuncDeclKwArg* kw = c11_vector__emplace(&self->kwargs);
  275. kw->index = c11_deserializer__read_i32(d);
  276. const char* key_str = c11_deserializer__read_cstr(d);
  277. kw->key = py_name(key_str);
  278. TValue__deserialize(d, &kw->value);
  279. c11_smallmap_n2d__set(&self->kw_to_index, kw->key, kw->index);
  280. }
  281. // starred_arg
  282. self->starred_arg = c11_deserializer__read_i32(d);
  283. // starred_kwarg
  284. self->starred_kwarg = c11_deserializer__read_i32(d);
  285. // nested
  286. self->nested = c11_deserializer__read_i8(d) != 0;
  287. // docstring
  288. int has_docstring = c11_deserializer__read_i8(d);
  289. if(has_docstring) {
  290. const char* docstring = c11_deserializer__read_cstr(d);
  291. self->docstring = c11_strdup(docstring);
  292. } else {
  293. self->docstring = NULL;
  294. }
  295. // type
  296. self->type = (FuncType)c11_deserializer__read_i8(d);
  297. return self;
  298. }
  299. // Public API: Serialize CodeObject to bytes
  300. void* CodeObject__dumps(const CodeObject* co, int* size) {
  301. c11_serializer s;
  302. c11_serializer__ctor(&s, CODEOBJECT_MAGIC, CODEOBJECT_VER_MAJOR, CODEOBJECT_VER_MINOR);
  303. CodeObject__serialize(&s, co, NULL);
  304. return c11_serializer__submit(&s, size);
  305. }
  306. // Public API: Deserialize CodeObject from bytes
  307. // Returns error message or NULL on success
  308. char* CodeObject__loads(const void* data, int size, CodeObject* out) {
  309. c11_deserializer d;
  310. c11_deserializer__ctor(&d, data, size);
  311. if(!c11_deserializer__check_header(&d,
  312. CODEOBJECT_MAGIC,
  313. CODEOBJECT_VER_MAJOR,
  314. CODEOBJECT_VER_MINOR_MIN)) {
  315. char* error_msg = c11_strdup(d.error_msg);
  316. c11_deserializer__dtor(&d);
  317. return error_msg;
  318. }
  319. *out = CodeObject__deserialize(&d, NULL);
  320. c11_deserializer__dtor(&d);
  321. return NULL;
  322. }
  323. #undef CODEOBJECT_MAGIC
  324. #undef CODEOBJECT_VER_MAJOR
  325. #undef CODEOBJECT_VER_MINOR
  326. #undef CODEOBJECT_VER_MINOR_MIN