pickle.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. #include "pocketpy/common/vector.h"
  2. #include "pocketpy/pocketpy.h"
  3. #include "pocketpy/common/utils.h"
  4. #include "pocketpy/common/sstream.h"
  5. #include "pocketpy/interpreter/vm.h"
  6. #include <stdint.h>
  7. typedef enum {
  8. // clang-format off
  9. PKL_NONE,
  10. PKL_INT8, PKL_INT16, PKL_INT32, PKL_INT64,
  11. PKL_FLOAT32, PKL_FLOAT64,
  12. PKL_TRUE, PKL_FALSE,
  13. PKL_STRING, PKL_BYTES,
  14. PKL_BUILD_LIST,
  15. PKL_BUILD_TUPLE,
  16. PKL_BUILD_DICT,
  17. PKL_VEC2, PKL_VEC3,
  18. PKL_VEC2I, PKL_VEC3I,
  19. PKL_TYPE,
  20. PKL_EOF,
  21. // clang-format on
  22. } PickleOp;
  23. typedef struct {
  24. c11_vector /*T=char*/ codes;
  25. } PickleObject;
  26. static void PickleObject__ctor(PickleObject* self) { c11_vector__ctor(&self->codes, sizeof(char)); }
  27. static void PickleObject__dtor(PickleObject* self) { c11_vector__dtor(&self->codes); }
  28. static void PickleObject__py_submit(PickleObject* self, py_OutRef out) {
  29. int size;
  30. unsigned char* data = c11_vector__submit(&self->codes, &size);
  31. unsigned char* out_data = py_newbytes(out, size);
  32. memcpy(out_data, data, size);
  33. }
  34. static void PickleObject__write_bytes(PickleObject* buf, const void* data, int size) {
  35. c11_vector__extend(char, &buf->codes, data, size);
  36. }
  37. static void pkl__emit_op(PickleObject* buf, PickleOp op) {
  38. c11_vector__push(char, &buf->codes, op);
  39. }
  40. static void pkl__emit_int(PickleObject* buf, py_i64 val) {
  41. if((int8_t)val == val) {
  42. pkl__emit_op(buf, PKL_INT8);
  43. PickleObject__write_bytes(buf, &val, 1);
  44. } else if((int16_t)val == val) {
  45. pkl__emit_op(buf, PKL_INT16);
  46. PickleObject__write_bytes(buf, &val, 2);
  47. } else if((int32_t)val == val) {
  48. pkl__emit_op(buf, PKL_INT32);
  49. PickleObject__write_bytes(buf, &val, 4);
  50. } else {
  51. pkl__emit_op(buf, PKL_INT64);
  52. PickleObject__write_bytes(buf, &val, 8);
  53. }
  54. }
  55. #define UNALIGNED_READ(p_val, p_buf) \
  56. do { \
  57. memcpy((p_val), (p_buf), sizeof(*(p_val))); \
  58. (p_buf) += sizeof(*(p_val)); \
  59. } while(0)
  60. static py_i64 pkl__read_int(const unsigned char** p) {
  61. PickleOp op = (PickleOp) * *p;
  62. (*p)++;
  63. switch(op) {
  64. case PKL_INT8: {
  65. int8_t val;
  66. UNALIGNED_READ(&val, *p);
  67. return val;
  68. }
  69. case PKL_INT16: {
  70. int16_t val;
  71. UNALIGNED_READ(&val, *p);
  72. return val;
  73. }
  74. case PKL_INT32: {
  75. int32_t val;
  76. UNALIGNED_READ(&val, *p);
  77. return val;
  78. }
  79. case PKL_INT64: {
  80. int64_t val;
  81. UNALIGNED_READ(&val, *p);
  82. return val;
  83. }
  84. default: c11__abort("pkl__read_int(): invalid op: %d", op);
  85. }
  86. }
  87. const static char* pkl__read_cstr(const unsigned char** p) {
  88. const char* p_str = (const char*)*p;
  89. int length = strlen(p_str);
  90. *p += length + 1; // include '\0'
  91. return p_str;
  92. }
  93. static bool pickle_loads(int argc, py_Ref argv) {
  94. PY_CHECK_ARGC(1);
  95. PY_CHECK_ARG_TYPE(0, tp_bytes);
  96. int size;
  97. const unsigned char* data = py_tobytes(argv, &size);
  98. return py_pickle_loads(data, size);
  99. }
  100. static bool pickle_dumps(int argc, py_Ref argv) {
  101. PY_CHECK_ARGC(1);
  102. return py_pickle_dumps(argv);
  103. }
  104. void pk__add_module_pickle() {
  105. py_Ref mod = py_newmodule("pickle");
  106. py_bindfunc(mod, "loads", pickle_loads);
  107. py_bindfunc(mod, "dumps", pickle_dumps);
  108. }
  109. static bool pickle__write_object(PickleObject* buf, py_TValue* obj);
  110. static bool pickle__write_array(PickleObject* buf, PickleOp op, py_TValue* arr, int length) {
  111. for(int i = 0; i < length; i++) {
  112. bool ok = pickle__write_object(buf, arr + i);
  113. if(!ok) return false;
  114. }
  115. pkl__emit_op(buf, op);
  116. pkl__emit_int(buf, length);
  117. return true;
  118. }
  119. static bool pickle__write_dict_kv(py_Ref k, py_Ref v, void* ctx) {
  120. PickleObject* buf = (PickleObject*)ctx;
  121. if(!pickle__write_object(buf, k)) return false;
  122. if(!pickle__write_object(buf, v)) return false;
  123. return true;
  124. }
  125. static bool pickle__write_object(PickleObject* buf, py_TValue* obj) {
  126. switch(obj->type) {
  127. case tp_NoneType: {
  128. pkl__emit_op(buf, PKL_NONE);
  129. return true;
  130. }
  131. case tp_int: {
  132. py_i64 val = obj->_i64;
  133. pkl__emit_int(buf, val);
  134. return true;
  135. }
  136. case tp_float: {
  137. py_f64 val = obj->_f64;
  138. float val32 = (float)val;
  139. if(val == val32) {
  140. pkl__emit_op(buf, PKL_FLOAT32);
  141. PickleObject__write_bytes(buf, &val32, 4);
  142. } else {
  143. pkl__emit_op(buf, PKL_FLOAT64);
  144. PickleObject__write_bytes(buf, &val, 8);
  145. }
  146. return true;
  147. }
  148. case tp_bool: {
  149. bool val = obj->_bool;
  150. pkl__emit_op(buf, val ? PKL_TRUE : PKL_FALSE);
  151. return true;
  152. }
  153. case tp_str: {
  154. pkl__emit_op(buf, PKL_STRING);
  155. c11_sv sv = py_tosv(obj);
  156. pkl__emit_int(buf, sv.size);
  157. PickleObject__write_bytes(buf, sv.data, sv.size);
  158. return true;
  159. }
  160. case tp_bytes: {
  161. pkl__emit_op(buf, PKL_BYTES);
  162. int size;
  163. unsigned char* data = py_tobytes(obj, &size);
  164. pkl__emit_int(buf, size);
  165. PickleObject__write_bytes(buf, data, size);
  166. return true;
  167. }
  168. case tp_list: {
  169. return pickle__write_array(buf, PKL_BUILD_LIST, py_list_data(obj), py_list_len(obj));
  170. }
  171. case tp_tuple: {
  172. return pickle__write_array(buf, PKL_BUILD_TUPLE, py_tuple_data(obj), py_tuple_len(obj));
  173. }
  174. case tp_dict: {
  175. bool ok = py_dict_apply(obj, pickle__write_dict_kv, (void*)buf);
  176. if(!ok) return false;
  177. pkl__emit_op(buf, PKL_BUILD_DICT);
  178. pkl__emit_int(buf, py_dict_len(obj));
  179. return true;
  180. }
  181. case tp_vec2: {
  182. c11_vec2 val = py_tovec2(obj);
  183. pkl__emit_op(buf, PKL_VEC2);
  184. PickleObject__write_bytes(buf, &val, sizeof(c11_vec2));
  185. return true;
  186. }
  187. case tp_vec3: {
  188. c11_vec3 val = py_tovec3(obj);
  189. pkl__emit_op(buf, PKL_VEC3);
  190. PickleObject__write_bytes(buf, &val, sizeof(c11_vec3));
  191. return true;
  192. }
  193. case tp_vec2i: {
  194. c11_vec2i val = py_tovec2i(obj);
  195. pkl__emit_op(buf, PKL_VEC2I);
  196. pkl__emit_int(buf, val.x);
  197. pkl__emit_int(buf, val.y);
  198. return true;
  199. }
  200. case tp_vec3i: {
  201. c11_vec3i val = py_tovec3i(obj);
  202. pkl__emit_op(buf, PKL_VEC3I);
  203. pkl__emit_int(buf, val.x);
  204. pkl__emit_int(buf, val.y);
  205. pkl__emit_int(buf, val.z);
  206. return true;
  207. }
  208. case tp_type: {
  209. pkl__emit_op(buf, PKL_TYPE);
  210. py_TypeInfo* ti = pk__type_info(py_totype(obj));
  211. const char* mod_name = py_tostr(py_getdict(&ti->module, __name__));
  212. c11_sbuf path_buf;
  213. c11_sbuf__ctor(&path_buf);
  214. c11_sbuf__write_cstr(&path_buf, mod_name);
  215. c11_sbuf__write_cstr(&path_buf, "@");
  216. c11_sbuf__write_cstr(&path_buf, py_name2str(ti->name));
  217. c11_string* path = c11_sbuf__submit(&path_buf);
  218. // include '\0'
  219. PickleObject__write_bytes(buf, path->data, path->size + 1);
  220. c11_string__delete(path);
  221. return true;
  222. }
  223. default: return TypeError("'%t' object is not picklable", obj->type);
  224. }
  225. }
  226. bool py_pickle_dumps(py_Ref val) {
  227. PickleObject buf;
  228. PickleObject__ctor(&buf);
  229. bool ok = pickle__write_object(&buf, val);
  230. if(!ok) {
  231. PickleObject__dtor(&buf);
  232. return false;
  233. }
  234. pkl__emit_op(&buf, PKL_EOF);
  235. PickleObject__py_submit(&buf, py_retval());
  236. return true;
  237. }
  238. bool py_pickle_loads(const unsigned char* data, int size) {
  239. py_StackRef p0 = py_peek(0);
  240. const unsigned char* p = data;
  241. while(true) {
  242. PickleOp op = (PickleOp)*p;
  243. p++;
  244. switch(op) {
  245. case PKL_NONE: {
  246. py_pushnone();
  247. break;
  248. }
  249. case PKL_INT8: {
  250. int8_t val;
  251. UNALIGNED_READ(&val, p);
  252. py_newint(py_pushtmp(), val);
  253. break;
  254. }
  255. case PKL_INT16: {
  256. int16_t val;
  257. UNALIGNED_READ(&val, p);
  258. py_newint(py_pushtmp(), val);
  259. break;
  260. }
  261. case PKL_INT32: {
  262. int32_t val;
  263. UNALIGNED_READ(&val, p);
  264. py_newint(py_pushtmp(), val);
  265. break;
  266. }
  267. case PKL_INT64: {
  268. int64_t val;
  269. UNALIGNED_READ(&val, p);
  270. py_newint(py_pushtmp(), val);
  271. break;
  272. }
  273. case PKL_FLOAT32: {
  274. float val;
  275. UNALIGNED_READ(&val, p);
  276. py_newfloat(py_pushtmp(), val);
  277. break;
  278. }
  279. case PKL_FLOAT64: {
  280. double val;
  281. UNALIGNED_READ(&val, p);
  282. py_newfloat(py_pushtmp(), val);
  283. break;
  284. }
  285. case PKL_TRUE: {
  286. py_newbool(py_pushtmp(), true);
  287. break;
  288. }
  289. case PKL_FALSE: {
  290. py_newbool(py_pushtmp(), false);
  291. break;
  292. }
  293. case PKL_STRING: {
  294. int size = pkl__read_int(&p);
  295. char* dst = py_newstrn(py_pushtmp(), size);
  296. memcpy(dst, p, size);
  297. p += size;
  298. break;
  299. }
  300. case PKL_BYTES: {
  301. int size = pkl__read_int(&p);
  302. unsigned char* dst = py_newbytes(py_pushtmp(), size);
  303. memcpy(dst, p, size);
  304. p += size;
  305. break;
  306. }
  307. case PKL_BUILD_LIST: {
  308. int length = pkl__read_int(&p);
  309. py_OutRef val = py_retval();
  310. py_newlistn(val, length);
  311. for(int i = length - 1; i >= 0; i--) {
  312. py_StackRef item = py_peek(-1);
  313. py_list_setitem(val, i, item);
  314. py_pop();
  315. }
  316. py_push(val);
  317. break;
  318. }
  319. case PKL_BUILD_TUPLE: {
  320. int length = pkl__read_int(&p);
  321. py_OutRef val = py_retval();
  322. py_newtuple(val, length);
  323. for(int i = length - 1; i >= 0; i--) {
  324. py_StackRef item = py_peek(-1);
  325. py_tuple_setitem(val, i, item);
  326. py_pop();
  327. }
  328. py_push(val);
  329. break;
  330. }
  331. case PKL_BUILD_DICT: {
  332. int length = pkl__read_int(&p);
  333. py_OutRef val = py_pushtmp();
  334. py_newdict(val);
  335. py_StackRef begin = py_peek(-1) - 2 * length;
  336. py_StackRef end = py_peek(-1);
  337. for(py_StackRef i = begin; i < end; i += 2) {
  338. py_StackRef k = i;
  339. py_StackRef v = i + 1;
  340. bool ok = py_dict_setitem(val, k, v);
  341. if(!ok) return false;
  342. }
  343. py_assign(py_retval(), val);
  344. py_shrink(2 * length + 1);
  345. py_push(py_retval());
  346. break;
  347. }
  348. case PKL_VEC2: {
  349. c11_vec2 val;
  350. UNALIGNED_READ(&val, p);
  351. py_newvec2(py_pushtmp(), val);
  352. break;
  353. }
  354. case PKL_VEC3: {
  355. c11_vec3 val;
  356. UNALIGNED_READ(&val, p);
  357. py_newvec3(py_pushtmp(), val);
  358. break;
  359. }
  360. case PKL_VEC2I: {
  361. c11_vec2i val;
  362. val.x = pkl__read_int(&p);
  363. val.y = pkl__read_int(&p);
  364. py_newvec2i(py_pushtmp(), val);
  365. break;
  366. }
  367. case PKL_VEC3I: {
  368. c11_vec3i val;
  369. val.x = pkl__read_int(&p);
  370. val.y = pkl__read_int(&p);
  371. val.z = pkl__read_int(&p);
  372. py_newvec3i(py_pushtmp(), val);
  373. break;
  374. }
  375. case PKL_TYPE: {
  376. const char* path = pkl__read_cstr(&p);
  377. char* sep_index = strchr(path, '@');
  378. assert(sep_index != NULL);
  379. *sep_index = '\0';
  380. const char* mod_name = path;
  381. const char* type_name = sep_index + 1;
  382. py_Type t = py_gettype(mod_name, py_name(type_name));
  383. *sep_index = '@';
  384. if(t == 0) {
  385. return ImportError("cannot import '%s' from '%s'", type_name, mod_name);
  386. }
  387. py_push(py_tpobject(t));
  388. break;
  389. }
  390. case PKL_EOF: {
  391. if(py_peek(0) - p0 != 1) { return ValueError("invalid pickle data"); }
  392. py_assign(py_retval(), p0);
  393. py_pop();
  394. return true;
  395. }
  396. default: c11__unreachable();
  397. }
  398. }
  399. return true;
  400. }
  401. #undef UNALIGNED_READ