py_dict.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. #include "pocketpy/pocketpy.h"
  2. #include "pocketpy/common/utils.h"
  3. #include "pocketpy/common/sstream.h"
  4. #include "pocketpy/objects/object.h"
  5. #include "pocketpy/interpreter/vm.h"
  6. #define PK_DICT_MAX_COLLISION 3
  7. typedef struct {
  8. py_i64 hash;
  9. py_TValue key;
  10. py_TValue val;
  11. } DictEntry;
  12. typedef struct {
  13. int _[PK_DICT_MAX_COLLISION];
  14. } DictIndex;
  15. typedef struct {
  16. int length;
  17. int capacity;
  18. DictIndex* indices;
  19. c11_vector /*T=DictEntry*/ entries;
  20. } Dict;
  21. typedef struct {
  22. DictEntry* curr;
  23. DictEntry* end;
  24. } DictIterator;
  25. static void Dict__ctor(Dict* self, int capacity) {
  26. self->length = 0;
  27. self->capacity = capacity;
  28. self->indices = malloc(self->capacity * sizeof(DictIndex));
  29. memset(self->indices, -1, self->capacity * sizeof(DictIndex));
  30. c11_vector__ctor(&self->entries, sizeof(DictEntry));
  31. c11_vector__reserve(&self->entries, capacity);
  32. }
  33. static void Dict__dtor(Dict* self) {
  34. self->length = 0;
  35. self->capacity = 0;
  36. free(self->indices);
  37. c11_vector__dtor(&self->entries);
  38. }
  39. static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
  40. py_i64 hash;
  41. if(!py_hash(key, &hash)) return false;
  42. int idx = hash & (self->capacity - 1);
  43. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  44. int idx2 = self->indices[idx]._[i];
  45. if(idx2 == -1) continue;
  46. DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
  47. int res = py_equal(&entry->key, key);
  48. if(res == 1) {
  49. *out = entry;
  50. return true;
  51. }
  52. if(res == -1) return false; // error
  53. }
  54. *out = NULL;
  55. return true;
  56. }
  57. static void Dict__clear(Dict* self) {
  58. memset(self->indices, -1, self->capacity * sizeof(DictIndex));
  59. c11_vector__clear(&self->entries);
  60. self->length = 0;
  61. }
  62. static void Dict__rehash_2x(Dict* self) {
  63. Dict old_dict = *self;
  64. int new_capacity = self->capacity * 2;
  65. assert(new_capacity <= 1073741824);
  66. do {
  67. Dict__ctor(self, new_capacity);
  68. for(int i = 0; i < old_dict.entries.count; i++) {
  69. DictEntry* entry = c11__at(DictEntry, &old_dict.entries, i);
  70. if(py_isnil(&entry->key)) continue;
  71. int idx = entry->hash & (new_capacity - 1);
  72. bool success = false;
  73. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  74. int idx2 = self->indices[idx]._[i];
  75. if(idx2 == -1) {
  76. // insert new entry (empty slot)
  77. c11_vector__push(DictEntry, &self->entries, *entry);
  78. self->indices[idx]._[i] = self->entries.count - 1;
  79. self->length++;
  80. success = true;
  81. break;
  82. }
  83. }
  84. if(!success) {
  85. Dict__dtor(self);
  86. new_capacity *= 2;
  87. continue;
  88. }
  89. }
  90. // resize complete
  91. Dict__dtor(&old_dict);
  92. return;
  93. } while(1);
  94. }
  95. static void Dict__compact_entries(Dict* self) {
  96. int* mappings = malloc(self->entries.count * sizeof(int));
  97. int n = 0;
  98. for(int i = 0; i < self->entries.count; i++) {
  99. DictEntry* entry = c11__at(DictEntry, &self->entries, i);
  100. if(py_isnil(&entry->key)) continue;
  101. mappings[i] = n;
  102. if(i != n) {
  103. DictEntry* new_entry = c11__at(DictEntry, &self->entries, n);
  104. *new_entry = *entry;
  105. }
  106. n++;
  107. }
  108. self->entries.count = n;
  109. // update indices
  110. for(int i = 0; i < self->capacity; i++) {
  111. for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) {
  112. int idx = self->indices[i]._[j];
  113. if(idx == -1) continue;
  114. self->indices[i]._[j] = mappings[idx];
  115. }
  116. }
  117. free(mappings);
  118. }
  119. static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
  120. py_i64 hash;
  121. if(!py_hash(key, &hash)) return false;
  122. int idx = hash & (self->capacity - 1);
  123. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  124. int idx2 = self->indices[idx]._[i];
  125. if(idx2 == -1) {
  126. // insert new entry
  127. DictEntry* new_entry = c11_vector__emplace(&self->entries);
  128. new_entry->hash = hash;
  129. new_entry->key = *key;
  130. new_entry->val = *val;
  131. self->indices[idx]._[i] = self->entries.count - 1;
  132. self->length++;
  133. return true;
  134. }
  135. // update existing entry
  136. DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
  137. int res = py_equal(&entry->key, key);
  138. if(res == 1) {
  139. entry->val = *val;
  140. return true;
  141. }
  142. if(res == -1) return false; // error
  143. }
  144. // no empty slot found
  145. Dict__rehash_2x(self);
  146. return Dict__set(self, key, val);
  147. }
  148. /// Delete an entry from the dict.
  149. /// If the key is found, `py_retval()` is set to the value.
  150. /// If the key is not found, `py_retval()` is set to `nil`.
  151. /// Returns false on error.
  152. static bool Dict__pop(Dict* self, py_Ref key) {
  153. py_i64 hash;
  154. if(!py_hash(key, &hash)) return false;
  155. int idx = hash & (self->capacity - 1);
  156. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  157. int idx2 = self->indices[idx]._[i];
  158. if(idx2 == -1) continue;
  159. DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
  160. int res = py_equal(&entry->key, key);
  161. if(res == 1) {
  162. *py_retval() = entry->val;
  163. py_newnil(&entry->key);
  164. self->indices[idx]._[i] = -1;
  165. self->length--;
  166. if(self->length < self->entries.count / 2) Dict__compact_entries(self);
  167. return true;
  168. }
  169. if(res == -1) return false; // error
  170. }
  171. py_newnil(py_retval());
  172. return true;
  173. }
  174. static void DictIterator__ctor(DictIterator* self, Dict* dict) {
  175. self->curr = dict->entries.data;
  176. self->end = self->curr + dict->entries.count;
  177. }
  178. static DictEntry* DictIterator__next(DictIterator* self) {
  179. DictEntry* retval;
  180. do {
  181. if(self->curr == self->end) return NULL;
  182. retval = self->curr++;
  183. } while(py_isnil(&retval->key));
  184. return retval;
  185. }
  186. ///////////////////////////////
  187. static bool dict__new__(int argc, py_Ref argv) {
  188. py_Type cls = py_totype(argv);
  189. int slots = cls == tp_dict ? 0 : -1;
  190. Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict));
  191. Dict__ctor(ud, 8);
  192. return true;
  193. }
  194. void py_newdict(py_Ref out) {
  195. Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
  196. Dict__ctor(ud, 8);
  197. }
  198. static bool dict__init__(int argc, py_Ref argv) {
  199. py_newnone(py_retval());
  200. if(argc > 2) return TypeError("dict.__init__() takes at most 2 arguments (%d given)", argc);
  201. if(argc == 1) return true;
  202. assert(argc == 2);
  203. PY_CHECK_ARG_TYPE(1, tp_list);
  204. Dict* self = py_touserdata(argv);
  205. py_Ref list = py_arg(1);
  206. for(int i = 0; i < py_list_len(list); i++) {
  207. py_Ref tuple = py_list_getitem(list, i);
  208. if(!py_istuple(tuple) || py_tuple_len(tuple) != 2) {
  209. return TypeError("dict.__init__() argument must be a list of tuple-2");
  210. }
  211. py_Ref key = py_tuple_getitem(tuple, 0);
  212. py_Ref val = py_tuple_getitem(tuple, 1);
  213. if(!Dict__set(self, key, val)) return false;
  214. }
  215. return true;
  216. }
  217. static bool dict__getitem__(int argc, py_Ref argv) {
  218. PY_CHECK_ARGC(2);
  219. Dict* self = py_touserdata(argv);
  220. DictEntry* entry;
  221. if(!Dict__try_get(self, py_arg(1), &entry)) return false;
  222. if(entry) {
  223. *py_retval() = entry->val;
  224. return true;
  225. }
  226. // try __missing__
  227. py_Ref missing = py_tpfindmagic(argv->type, __missing__);
  228. if(missing) return py_call(missing, argc, argv);
  229. return KeyError(py_arg(1));
  230. }
  231. static bool dict__setitem__(int argc, py_Ref argv) {
  232. PY_CHECK_ARGC(3);
  233. Dict* self = py_touserdata(argv);
  234. return Dict__set(self, py_arg(1), py_arg(2));
  235. }
  236. static bool dict__delitem__(int argc, py_Ref argv) {
  237. PY_CHECK_ARGC(2);
  238. Dict* self = py_touserdata(argv);
  239. if(!Dict__pop(self, py_arg(1))) return false;
  240. return true;
  241. }
  242. static bool dict__contains__(int argc, py_Ref argv) {
  243. PY_CHECK_ARGC(2);
  244. Dict* self = py_touserdata(argv);
  245. DictEntry* entry;
  246. if(!Dict__try_get(self, py_arg(1), &entry)) return false;
  247. py_newbool(py_retval(), entry != NULL);
  248. return true;
  249. }
  250. static bool dict__len__(int argc, py_Ref argv) {
  251. PY_CHECK_ARGC(1);
  252. Dict* self = py_touserdata(argv);
  253. py_newint(py_retval(), self->length);
  254. return true;
  255. }
  256. static bool dict__repr__(int argc, py_Ref argv) {
  257. PY_CHECK_ARGC(1);
  258. Dict* self = py_touserdata(argv);
  259. c11_sbuf buf;
  260. c11_sbuf__ctor(&buf);
  261. c11_sbuf__write_char(&buf, '{');
  262. bool is_first = true;
  263. for(int i = 0; i < self->entries.count; i++) {
  264. DictEntry* entry = c11__at(DictEntry, &self->entries, i);
  265. if(py_isnil(&entry->key)) continue;
  266. if(!is_first) c11_sbuf__write_cstr(&buf, ", ");
  267. if(!py_repr(&entry->key)) return false;
  268. c11_sbuf__write_sv(&buf, py_tosv(py_retval()));
  269. c11_sbuf__write_cstr(&buf, ": ");
  270. if(!py_repr(&entry->val)) return false;
  271. c11_sbuf__write_sv(&buf, py_tosv(py_retval()));
  272. is_first = false;
  273. }
  274. c11_sbuf__write_char(&buf, '}');
  275. c11_sbuf__py_submit(&buf, py_retval());
  276. return true;
  277. }
  278. static bool dict__eq__(int argc, py_Ref argv) {
  279. PY_CHECK_ARGC(2);
  280. Dict* self = py_touserdata(py_arg(0));
  281. if(!py_isdict(py_arg(1))) {
  282. py_newnotimplemented(py_retval());
  283. return true;
  284. }
  285. Dict* other = py_touserdata(py_arg(1));
  286. if(self->length != other->length) {
  287. py_newbool(py_retval(), false);
  288. return true;
  289. }
  290. DictIterator iter;
  291. DictIterator__ctor(&iter, self);
  292. // for each self key
  293. while(1) {
  294. DictEntry* entry = DictIterator__next(&iter);
  295. if(!entry) break;
  296. DictEntry* other_entry;
  297. if(!Dict__try_get(other, &entry->key, &other_entry)) return false;
  298. if(!other_entry) {
  299. py_newbool(py_retval(), false);
  300. return true;
  301. }
  302. int res = py_equal(&entry->val, &other_entry->val);
  303. if(res == -1) return false;
  304. if(!res) {
  305. py_newbool(py_retval(), false);
  306. return true;
  307. }
  308. }
  309. py_newbool(py_retval(), true);
  310. return true;
  311. }
  312. static bool dict__ne__(int argc, py_Ref argv) {
  313. if(!dict__eq__(argc, argv)) return false;
  314. if(py_isbool(py_retval())) {
  315. bool res = py_tobool(py_retval());
  316. py_newbool(py_retval(), !res);
  317. }
  318. return true;
  319. }
  320. static bool dict_clear(int argc, py_Ref argv) {
  321. PY_CHECK_ARGC(1);
  322. Dict* self = py_touserdata(argv);
  323. Dict__clear(self);
  324. py_newnone(py_retval());
  325. return true;
  326. }
  327. static bool dict_copy(int argc, py_Ref argv) {
  328. PY_CHECK_ARGC(1);
  329. Dict* self = py_touserdata(argv);
  330. Dict* new_dict = py_newobject(py_retval(), tp_dict, 0, sizeof(Dict));
  331. new_dict->capacity = self->capacity;
  332. new_dict->length = self->length;
  333. new_dict->entries = c11_vector__copy(&self->entries);
  334. // copy indices
  335. new_dict->indices = malloc(new_dict->capacity * sizeof(DictIndex));
  336. memcpy(new_dict->indices, self->indices, new_dict->capacity * sizeof(DictIndex));
  337. return true;
  338. }
  339. static bool dict_update(int argc, py_Ref argv) {
  340. PY_CHECK_ARGC(2);
  341. PY_CHECK_ARG_TYPE(1, tp_dict);
  342. Dict* self = py_touserdata(argv);
  343. Dict* other = py_touserdata(py_arg(1));
  344. for(int i = 0; i < other->entries.count; i++) {
  345. DictEntry* entry = c11__at(DictEntry, &other->entries, i);
  346. if(py_isnil(&entry->key)) continue;
  347. if(!Dict__set(self, &entry->key, &entry->val)) return false;
  348. }
  349. py_newnone(py_retval());
  350. return true;
  351. }
  352. static bool dict_get(int argc, py_Ref argv) {
  353. Dict* self = py_touserdata(argv);
  354. if(argc > 3) return TypeError("get() takes at most 3 arguments (%d given)", argc);
  355. py_Ref default_val = argc == 3 ? py_arg(2) : py_None;
  356. DictEntry* entry;
  357. if(!Dict__try_get(self, py_arg(1), &entry)) return false;
  358. *py_retval() = entry ? entry->val : *default_val;
  359. return true;
  360. }
  361. static bool dict_pop(int argc, py_Ref argv) {
  362. Dict* self = py_touserdata(argv);
  363. if(argc < 2 || argc > 3) return TypeError("pop() takes 2 or 3 arguments (%d given)", argc);
  364. py_Ref default_val = argc == 3 ? py_arg(2) : py_None;
  365. if(!Dict__pop(self, py_arg(1))) return false;
  366. if(py_isnil(py_retval())) *py_retval() = *default_val;
  367. return true;
  368. }
  369. static bool dict_items(int argc, py_Ref argv) {
  370. PY_CHECK_ARGC(1);
  371. Dict* self = py_touserdata(argv);
  372. DictIterator* ud = py_newobject(py_retval(), tp_dict_items, 1, sizeof(DictIterator));
  373. DictIterator__ctor(ud, self);
  374. py_setslot(py_retval(), 0, argv); // keep a reference to the dict
  375. return true;
  376. }
  377. static bool dict_keys(int argc, py_Ref argv) {
  378. PY_CHECK_ARGC(1);
  379. Dict* self = py_touserdata(argv);
  380. py_newtuple(py_retval(), self->length);
  381. DictIterator iter;
  382. DictIterator__ctor(&iter, self);
  383. int i = 0;
  384. while(1) {
  385. DictEntry* entry = DictIterator__next(&iter);
  386. if(!entry) break;
  387. py_tuple_setitem(py_retval(), i++, &entry->key);
  388. }
  389. assert(i == self->length);
  390. return true;
  391. }
  392. static bool dict_values(int argc, py_Ref argv) {
  393. PY_CHECK_ARGC(1);
  394. Dict* self = py_touserdata(argv);
  395. py_newtuple(py_retval(), self->length);
  396. DictIterator iter;
  397. DictIterator__ctor(&iter, self);
  398. int i = 0;
  399. while(1) {
  400. DictEntry* entry = DictIterator__next(&iter);
  401. if(!entry) break;
  402. py_tuple_setitem(py_retval(), i++, &entry->val);
  403. }
  404. assert(i == self->length);
  405. return true;
  406. }
  407. static void dict__gc_mark(void* ud) {
  408. Dict* self = ud;
  409. for(int i = 0; i < self->entries.count; i++) {
  410. DictEntry* entry = c11__at(DictEntry, &self->entries, i);
  411. if(py_isnil(&entry->key)) continue;
  412. pk__mark_value(&entry->key);
  413. pk__mark_value(&entry->val);
  414. }
  415. }
  416. py_Type pk_dict__register() {
  417. py_Type type = pk_newtype("dict", tp_object, NULL, (void (*)(void*))Dict__dtor, false, false);
  418. pk__tp_set_marker(type, dict__gc_mark);
  419. py_bindmagic(type, __new__, dict__new__);
  420. py_bindmagic(type, __init__, dict__init__);
  421. py_bindmagic(type, __getitem__, dict__getitem__);
  422. py_bindmagic(type, __setitem__, dict__setitem__);
  423. py_bindmagic(type, __delitem__, dict__delitem__);
  424. py_bindmagic(type, __contains__, dict__contains__);
  425. py_bindmagic(type, __len__, dict__len__);
  426. py_bindmagic(type, __repr__, dict__repr__);
  427. py_bindmagic(type, __eq__, dict__eq__);
  428. py_bindmagic(type, __ne__, dict__ne__);
  429. py_bindmethod(type, "clear", dict_clear);
  430. py_bindmethod(type, "copy", dict_copy);
  431. py_bindmethod(type, "update", dict_update);
  432. py_bindmethod(type, "get", dict_get);
  433. py_bindmethod(type, "pop", dict_pop);
  434. py_bindmethod(type, "items", dict_items);
  435. py_bindmethod(type, "keys", dict_keys);
  436. py_bindmethod(type, "values", dict_values);
  437. py_setdict(py_tpobject(type), __hash__, py_None);
  438. return type;
  439. }
  440. //////////////////////////
  441. static bool dict_items__next__(int argc, py_Ref argv) {
  442. PY_CHECK_ARGC(1);
  443. DictIterator* iter = py_touserdata(py_arg(0));
  444. DictEntry* entry = (DictIterator__next(iter));
  445. if(entry) {
  446. py_newtuple(py_retval(), 2);
  447. py_tuple_setitem(py_retval(), 0, &entry->key);
  448. py_tuple_setitem(py_retval(), 1, &entry->val);
  449. return true;
  450. }
  451. return StopIteration();
  452. }
  453. py_Type pk_dict_items__register() {
  454. py_Type type = pk_newtype("dict_items", tp_object, NULL, NULL, false, true);
  455. py_bindmagic(type, __iter__, pk_wrapper__self);
  456. py_bindmagic(type, __next__, dict_items__next__);
  457. return type;
  458. }
  459. //////////////////////////
  460. py_Ref py_dict_getitem(py_Ref self, py_Ref key) {
  461. assert(py_isdict(self));
  462. Dict* ud = py_touserdata(self);
  463. DictEntry* entry;
  464. if(!Dict__try_get(ud, key, &entry)) return NULL;
  465. if(entry) return &entry->val;
  466. return NULL;
  467. }
  468. void py_dict_delitem(py_Ref self, py_Ref key) {
  469. assert(py_isdict(self));
  470. Dict* ud = py_touserdata(self);
  471. Dict__pop(ud, key);
  472. }
  473. void py_dict_setitem(py_Ref self, py_Ref key, py_Ref val) {
  474. assert(py_isdict(self));
  475. Dict* ud = py_touserdata(self);
  476. Dict__set(ud, key, val);
  477. }
  478. bool py_dict_contains(py_Ref self, py_Ref key) {
  479. assert(py_isdict(self));
  480. Dict* ud = py_touserdata(self);
  481. DictEntry* entry;
  482. bool ok = Dict__try_get(ud, key, &entry);
  483. return ok && entry != NULL;
  484. }
  485. int py_dict_len(py_Ref self) {
  486. assert(py_isdict(self));
  487. Dict* ud = py_touserdata(self);
  488. return ud->length;
  489. }
  490. bool py_dict_apply(py_Ref self, bool (*f)(py_Ref, py_Ref, void*), void* ctx) {
  491. Dict* ud = py_touserdata(self);
  492. for(int i = 0; i < ud->entries.count; i++) {
  493. DictEntry* entry = c11__at(DictEntry, &ud->entries, i);
  494. if(py_isnil(&entry->key)) continue;
  495. if(!f(&entry->key, &entry->val, ctx)) return false;
  496. }
  497. return true;
  498. }